summaryrefslogtreecommitdiff
path: root/lib/fdt_strerror.c
AgeCommit message (Expand)Author
2012-07-23of/lib: Allow scripts/dtc/libfdt to be used from kernel codeDavid Daney
ec-testing Russell King's ARM Linux kernel treeRussell King
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat
-rw-r--r--.gitattributes1
-rw-r--r--.mailmap4
-rw-r--r--CREDITS6
-rw-r--r--Documentation/ABI/testing/sysfs-class-led-trigger-netdev89
-rw-r--r--Documentation/ABI/testing/sysfs-devices-system-cpu2
-rw-r--r--Documentation/RCU/Design/Requirements/Requirements.rst36
-rw-r--r--Documentation/RCU/whatisRCU.rst1
-rw-r--r--Documentation/admin-guide/bcache.rst3
-rw-r--r--Documentation/admin-guide/cgroup-v1/memory.rst2
-rw-r--r--Documentation/admin-guide/cgroup-v2.rst51
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt257
-rw-r--r--Documentation/admin-guide/mm/damon/start.rst10
-rw-r--r--Documentation/admin-guide/mm/damon/usage.rst146
-rw-r--r--Documentation/admin-guide/perf/hisi-pmu.rst40
-rw-r--r--Documentation/admin-guide/sysctl/kernel.rst2
-rw-r--r--Documentation/admin-guide/sysctl/net.rst4
-rw-r--r--Documentation/arch/arm/arm.rst (renamed from Documentation/arm/arm.rst)0
-rw-r--r--Documentation/arch/arm/booting.rst (renamed from Documentation/arm/booting.rst)0
-rw-r--r--Documentation/arch/arm/cluster-pm-race-avoidance.rst (renamed from Documentation/arm/cluster-pm-race-avoidance.rst)0
-rw-r--r--Documentation/arch/arm/features.rst (renamed from Documentation/arm/features.rst)0
-rw-r--r--Documentation/arch/arm/firmware.rst (renamed from Documentation/arm/firmware.rst)0
-rw-r--r--Documentation/arch/arm/google/chromebook-boot-flow.rst (renamed from Documentation/arm/google/chromebook-boot-flow.rst)0
-rw-r--r--Documentation/arch/arm/index.rst (renamed from Documentation/arm/index.rst)0
-rw-r--r--Documentation/arch/arm/interrupts.rst (renamed from Documentation/arm/interrupts.rst)0
-rw-r--r--Documentation/arch/arm/ixp4xx.rst (renamed from Documentation/arm/ixp4xx.rst)0
-rw-r--r--Documentation/arch/arm/kernel_mode_neon.rst (renamed from Documentation/arm/kernel_mode_neon.rst)0
-rw-r--r--Documentation/arch/arm/kernel_user_helpers.rst (renamed from Documentation/arm/kernel_user_helpers.rst)0
-rw-r--r--Documentation/arch/arm/keystone/knav-qmss.rst (renamed from Documentation/arm/keystone/knav-qmss.rst)0
-rw-r--r--Documentation/arch/arm/keystone/overview.rst (renamed from Documentation/arm/keystone/overview.rst)0
-rw-r--r--Documentation/arch/arm/marvell.rst (renamed from Documentation/arm/marvell.rst)0
-rw-r--r--Documentation/arch/arm/mem_alignment.rst (renamed from Documentation/arm/mem_alignment.rst)0
-rw-r--r--Documentation/arch/arm/memory.rst (renamed from Documentation/arm/memory.rst)0
-rw-r--r--Documentation/arch/arm/microchip.rst (renamed from Documentation/arm/microchip.rst)0
-rw-r--r--Documentation/arch/arm/netwinder.rst (renamed from Documentation/arm/netwinder.rst)0
-rw-r--r--Documentation/arch/arm/nwfpe/index.rst (renamed from Documentation/arm/nwfpe/index.rst)0
-rw-r--r--Documentation/arch/arm/nwfpe/netwinder-fpe.rst (renamed from Documentation/arm/nwfpe/netwinder-fpe.rst)0
-rw-r--r--Documentation/arch/arm/nwfpe/notes.rst (renamed from Documentation/arm/nwfpe/notes.rst)0
-rw-r--r--Documentation/arch/arm/nwfpe/nwfpe.rst (renamed from Documentation/arm/nwfpe/nwfpe.rst)0
-rw-r--r--Documentation/arch/arm/nwfpe/todo.rst (renamed from Documentation/arm/nwfpe/todo.rst)0
-rw-r--r--Documentation/arch/arm/omap/dss.rst (renamed from Documentation/arm/omap/dss.rst)0
-rw-r--r--Documentation/arch/arm/omap/index.rst (renamed from Documentation/arm/omap/index.rst)0
-rw-r--r--Documentation/arch/arm/omap/omap.rst (renamed from Documentation/arm/omap/omap.rst)0
-rw-r--r--Documentation/arch/arm/omap/omap_pm.rst (renamed from Documentation/arm/omap/omap_pm.rst)0
-rw-r--r--Documentation/arch/arm/porting.rst (renamed from Documentation/arm/porting.rst)0
-rw-r--r--Documentation/arch/arm/pxa/mfp.rst (renamed from Documentation/arm/pxa/mfp.rst)0
-rw-r--r--Documentation/arch/arm/sa1100/assabet.rst (renamed from Documentation/arm/sa1100/assabet.rst)0
-rw-r--r--Documentation/arch/arm/sa1100/cerf.rst (renamed from Documentation/arm/sa1100/cerf.rst)0
-rw-r--r--Documentation/arch/arm/sa1100/index.rst (renamed from Documentation/arm/sa1100/index.rst)0
-rw-r--r--Documentation/arch/arm/sa1100/lart.rst (renamed from Documentation/arm/sa1100/lart.rst)0
-rw-r--r--Documentation/arch/arm/sa1100/serial_uart.rst (renamed from Documentation/arm/sa1100/serial_uart.rst)0
-rw-r--r--Documentation/arch/arm/samsung/bootloader-interface.rst (renamed from Documentation/arm/samsung/bootloader-interface.rst)0
-rwxr-xr-xDocumentation/arch/arm/samsung/clksrc-change-registers.awk (renamed from Documentation/arm/samsung/clksrc-change-registers.awk)0
-rw-r--r--Documentation/arch/arm/samsung/gpio.rst (renamed from Documentation/arm/samsung/gpio.rst)0
-rw-r--r--Documentation/arch/arm/samsung/index.rst (renamed from Documentation/arm/samsung/index.rst)0
-rw-r--r--Documentation/arch/arm/samsung/overview.rst (renamed from Documentation/arm/samsung/overview.rst)0
-rw-r--r--Documentation/arch/arm/setup.rst (renamed from Documentation/arm/setup.rst)0
-rw-r--r--Documentation/arch/arm/spear/overview.rst (renamed from Documentation/arm/spear/overview.rst)0
-rw-r--r--Documentation/arch/arm/sti/overview.rst (renamed from Documentation/arm/sti/overview.rst)0
-rw-r--r--Documentation/arch/arm/sti/stih407-overview.rst (renamed from Documentation/arm/sti/stih407-overview.rst)0
-rw-r--r--Documentation/arch/arm/sti/stih418-overview.rst (renamed from Documentation/arm/sti/stih418-overview.rst)0
-rw-r--r--Documentation/arch/arm/stm32/overview.rst (renamed from Documentation/arm/stm32/overview.rst)0
-rw-r--r--Documentation/arch/arm/stm32/stm32-dma-mdma-chaining.rst (renamed from Documentation/arm/stm32/stm32-dma-mdma-chaining.rst)0
-rw-r--r--Documentation/arch/arm/stm32/stm32f429-overview.rst (renamed from Documentation/arm/stm32/stm32f429-overview.rst)0
-rw-r--r--Documentation/arch/arm/stm32/stm32f746-overview.rst (renamed from Documentation/arm/stm32/stm32f746-overview.rst)0
-rw-r--r--Documentation/arch/arm/stm32/stm32f769-overview.rst (renamed from Documentation/arm/stm32/stm32f769-overview.rst)0
-rw-r--r--Documentation/arch/arm/stm32/stm32h743-overview.rst (renamed from Documentation/arm/stm32/stm32h743-overview.rst)0
-rw-r--r--Documentation/arch/arm/stm32/stm32h750-overview.rst (renamed from Documentation/arm/stm32/stm32h750-overview.rst)0
-rw-r--r--Documentation/arch/arm/stm32/stm32mp13-overview.rst (renamed from Documentation/arm/stm32/stm32mp13-overview.rst)0
-rw-r--r--Documentation/arch/arm/stm32/stm32mp151-overview.rst (renamed from Documentation/arm/stm32/stm32mp151-overview.rst)0
-rw-r--r--Documentation/arch/arm/stm32/stm32mp157-overview.rst (renamed from Documentation/arm/stm32/stm32mp157-overview.rst)0
-rw-r--r--Documentation/arch/arm/sunxi.rst (renamed from Documentation/arm/sunxi.rst)0
-rw-r--r--Documentation/arch/arm/sunxi/clocks.rst (renamed from Documentation/arm/sunxi/clocks.rst)0
-rw-r--r--Documentation/arch/arm/swp_emulation.rst (renamed from Documentation/arm/swp_emulation.rst)0
-rw-r--r--Documentation/arch/arm/tcm.rst (renamed from Documentation/arm/tcm.rst)0
-rw-r--r--Documentation/arch/arm/uefi.rst (renamed from Documentation/arm/uefi.rst)0
-rw-r--r--Documentation/arch/arm/vfp/release-notes.rst (renamed from Documentation/arm/vfp/release-notes.rst)0
-rw-r--r--Documentation/arch/arm/vlocks.rst (renamed from Documentation/arm/vlocks.rst)0
-rw-r--r--Documentation/arch/arm64/acpi_object_usage.rst (renamed from Documentation/arm64/acpi_object_usage.rst)81
-rw-r--r--Documentation/arch/arm64/amu.rst (renamed from Documentation/arm64/amu.rst)0
-rw-r--r--Documentation/arch/arm64/arm-acpi.rst (renamed from Documentation/arm64/arm-acpi.rst)171
-rw-r--r--Documentation/arch/arm64/asymmetric-32bit.rst (renamed from Documentation/arm64/asymmetric-32bit.rst)0
-rw-r--r--Documentation/arch/arm64/booting.rst (renamed from Documentation/arm64/booting.rst)32
-rw-r--r--Documentation/arch/arm64/cpu-feature-registers.rst (renamed from Documentation/arm64/cpu-feature-registers.rst)2
-rw-r--r--Documentation/arch/arm64/elf_hwcaps.rst (renamed from Documentation/arm64/elf_hwcaps.rst)15
-rw-r--r--Documentation/arch/arm64/features.rst (renamed from Documentation/arm64/features.rst)0
-rw-r--r--Documentation/arch/arm64/hugetlbpage.rst (renamed from Documentation/arm64/hugetlbpage.rst)0
-rw-r--r--Documentation/arch/arm64/index.rst (renamed from Documentation/arm64/index.rst)2
-rw-r--r--Documentation/arch/arm64/kasan-offsets.sh (renamed from Documentation/arm64/kasan-offsets.sh)0
-rw-r--r--Documentation/arch/arm64/kdump.rst92
-rw-r--r--Documentation/arch/arm64/legacy_instructions.rst (renamed from Documentation/arm64/legacy_instructions.rst)0
-rw-r--r--Documentation/arch/arm64/memory-tagging-extension.rst (renamed from Documentation/arm64/memory-tagging-extension.rst)2
-rw-r--r--Documentation/arch/arm64/memory.rst (renamed from Documentation/arm64/memory.rst)8
-rw-r--r--Documentation/arch/arm64/perf.rst (renamed from Documentation/arm64/perf.rst)0
-rw-r--r--Documentation/arch/arm64/pointer-authentication.rst (renamed from Documentation/arm64/pointer-authentication.rst)0
-rw-r--r--Documentation/arch/arm64/ptdump.rst96
-rw-r--r--Documentation/arch/arm64/silicon-errata.rst (renamed from Documentation/arm64/silicon-errata.rst)4
-rw-r--r--Documentation/arch/arm64/sme.rst (renamed from Documentation/arm64/sme.rst)2
-rw-r--r--Documentation/arch/arm64/sve.rst (renamed from Documentation/arm64/sve.rst)2
-rw-r--r--Documentation/arch/arm64/tagged-address-abi.rst (renamed from Documentation/arm64/tagged-address-abi.rst)2
-rw-r--r--Documentation/arch/arm64/tagged-pointers.rst (renamed from Documentation/arm64/tagged-pointers.rst)2
-rw-r--r--Documentation/arch/index.rst4
-rw-r--r--Documentation/arch/x86/resctrl.rst7
-rw-r--r--Documentation/bpf/bpf_iterators.rst7
-rw-r--r--Documentation/bpf/cpumasks.rst5
-rw-r--r--Documentation/bpf/instruction-set.rst9
-rw-r--r--Documentation/bpf/kfuncs.rst61
-rw-r--r--Documentation/bpf/llvm_reloc.rst18
-rw-r--r--Documentation/bpf/map_hash.rst53
-rw-r--r--Documentation/bpf/map_lru_hash_update.dot172
-rw-r--r--Documentation/bpf/map_sockmap.rst10
-rw-r--r--Documentation/bpf/prog_cgroup_sockopt.rst57
-rw-r--r--Documentation/conf.py1
-rw-r--r--Documentation/core-api/cpu_hotplug.rst13
-rw-r--r--Documentation/core-api/kernel-api.rst18
-rw-r--r--Documentation/core-api/pin_user_pages.rst6
-rw-r--r--Documentation/core-api/this_cpu_ops.rst2
-rw-r--r--Documentation/core-api/workqueue.rst32
-rw-r--r--Documentation/crypto/async-tx-api.rst2
-rw-r--r--Documentation/dev-tools/kasan.rst9
-rw-r--r--Documentation/dev-tools/kselftest.rst23
-rw-r--r--Documentation/dev-tools/kunit/architecture.rst4
-rw-r--r--Documentation/dev-tools/kunit/start.rst7
-rw-r--r--Documentation/dev-tools/kunit/usage.rst69
-rw-r--r--Documentation/devicetree/bindings/arm/xen.txt2
-rw-r--r--Documentation/devicetree/bindings/cpu/idle-states.yaml2
-rw-r--r--Documentation/devicetree/bindings/firmware/qcom,scm.yaml2
-rw-r--r--Documentation/devicetree/bindings/i2c/opencores,i2c-ocores.yaml1
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/loongson,eiointc.yaml59
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/nuvoton,npcm-memory-controller.yaml50
-rw-r--r--Documentation/devicetree/bindings/mfd/rockchip,rk806.yaml406
-rw-r--r--Documentation/devicetree/bindings/mmc/arm,pl18x.yaml7
-rw-r--r--Documentation/devicetree/bindings/mmc/brcm,bcm2835-sdhost.txt23
-rw-r--r--Documentation/devicetree/bindings/mmc/brcm,bcm2835-sdhost.yaml54
-rw-r--r--Documentation/devicetree/bindings/mmc/brcm,kona-sdhci.txt21
-rw-r--r--Documentation/devicetree/bindings/mmc/brcm,kona-sdhci.yaml48
-rw-r--r--Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.yaml1
-rw-r--r--Documentation/devicetree/bindings/mmc/sdhci-msm.yaml3
-rw-r--r--Documentation/devicetree/bindings/mtd/allwinner,sun4i-a10-nand.yaml5
-rw-r--r--Documentation/devicetree/bindings/mtd/amlogic,meson-nand.yaml10
-rw-r--r--Documentation/devicetree/bindings/mtd/brcm,brcmnand.yaml3
-rw-r--r--Documentation/devicetree/bindings/mtd/denali,nand.yaml9
-rw-r--r--Documentation/devicetree/bindings/mtd/ingenic,nand.yaml4
-rw-r--r--Documentation/devicetree/bindings/mtd/intel,lgm-ebunand.yaml5
-rw-r--r--Documentation/devicetree/bindings/mtd/marvell,nand-controller.yaml226
-rw-r--r--Documentation/devicetree/bindings/mtd/marvell-nand.txt126
-rw-r--r--Documentation/devicetree/bindings/mtd/mediatek,mtk-nfc.yaml3
-rw-r--r--Documentation/devicetree/bindings/mtd/mtd.yaml2
-rw-r--r--Documentation/devicetree/bindings/mtd/nand-controller.yaml85
-rw-r--r--Documentation/devicetree/bindings/mtd/partitions/partition.yaml1
-rw-r--r--Documentation/devicetree/bindings/mtd/partitions/partitions.yaml1
-rw-r--r--Documentation/devicetree/bindings/mtd/qcom,nandc.yaml45
-rw-r--r--Documentation/devicetree/bindings/mtd/raw-nand-chip.yaml111
-rw-r--r--Documentation/devicetree/bindings/mtd/rockchip,nand-controller.yaml3
-rw-r--r--Documentation/devicetree/bindings/mtd/st,stm32-fmc2-nand.yaml3
-rw-r--r--Documentation/devicetree/bindings/mtd/ti,am654-hbmc.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/altr,tse.yaml4
-rw-r--r--Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/bluetooth/qualcomm-bluetooth.yaml3
-rw-r--r--Documentation/devicetree/bindings/net/brcm,bcmgenet.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/cdns,macb.yaml11
-rw-r--r--Documentation/devicetree/bindings/net/dsa/marvell.txt2
-rw-r--r--Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml32
-rw-r--r--Documentation/devicetree/bindings/net/ethernet-phy.yaml6
-rw-r--r--Documentation/devicetree/bindings/net/intel,dwmac-plat.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/maxlinear,gpy2xx.yaml11
-rw-r--r--Documentation/devicetree/bindings/net/mediatek-dwmac.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/micrel,ks8851.yaml3
-rw-r--r--Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/pse-pd/pse-controller.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/qcom,ethqos.yaml12
-rw-r--r--Documentation/devicetree/bindings/net/rockchip-dwmac.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/snps,dwmac.yaml3
-rw-r--r--Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml4
-rw-r--r--Documentation/devicetree/bindings/net/toshiba,visconti-dwmac.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/wireless/qcom,ath10k.yaml4
-rw-r--r--Documentation/devicetree/bindings/net/xilinx_axienet.txt101
-rw-r--r--Documentation/devicetree/bindings/net/xlnx,axi-ethernet.yaml183
-rw-r--r--Documentation/devicetree/bindings/perf/fsl-imx-ddr.yaml3
-rw-r--r--Documentation/devicetree/bindings/regulator/mt6358-regulator.txt34
-rw-r--r--Documentation/devicetree/bindings/regulator/pfuze100.yaml3
-rw-r--r--Documentation/devicetree/bindings/regulator/pwm-regulator.yaml1
-rw-r--r--Documentation/devicetree/bindings/regulator/renesas,raa215300.yaml85
-rw-r--r--Documentation/devicetree/bindings/regulator/ti,tps62870.yaml52
-rw-r--r--Documentation/devicetree/bindings/spi/allwinner,sun4i-a10-spi.yaml8
-rw-r--r--Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml18
-rw-r--r--Documentation/devicetree/bindings/spi/atmel,at91rm9200-spi.yaml4
-rw-r--r--Documentation/devicetree/bindings/spi/cdns,qspi-nor.yaml18
-rw-r--r--Documentation/devicetree/bindings/spi/qcom,spi-qcom-qspi.yaml3
-rw-r--r--Documentation/devicetree/bindings/spi/renesas,rzv2m-csi.yaml70
-rw-r--r--Documentation/devicetree/bindings/spi/samsung,spi.yaml2
-rw-r--r--Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml2
-rw-r--r--Documentation/devicetree/bindings/spi/socionext,uniphier-spi.yaml3
-rw-r--r--Documentation/devicetree/bindings/spi/spi-controller.yaml2
-rw-r--r--Documentation/devicetree/bindings/spi/spi-zynqmp-qspi.yaml6
-rw-r--r--Documentation/devicetree/bindings/thermal/armada-thermal.txt1
-rw-r--r--Documentation/devicetree/bindings/thermal/brcm,bcm2835-thermal.txt41
-rw-r--r--Documentation/devicetree/bindings/thermal/brcm,bcm2835-thermal.yaml48
-rw-r--r--Documentation/devicetree/bindings/thermal/qcom-tsens.yaml32
-rw-r--r--Documentation/devicetree/bindings/timer/brcm,kona-timer.txt25
-rw-r--r--Documentation/devicetree/bindings/timer/brcm,kona-timer.yaml52
-rw-r--r--Documentation/devicetree/bindings/timer/loongson,ls1x-pwmtimer.yaml48
-rw-r--r--Documentation/devicetree/bindings/timer/ralink,rt2880-timer.yaml44
-rw-r--r--Documentation/doc-guide/sphinx.rst11
-rw-r--r--Documentation/driver-api/basics.rst8
-rw-r--r--Documentation/driver-api/edac.rst120
-rw-r--r--Documentation/driver-api/ptp.rst29
-rw-r--r--Documentation/filesystems/autofs-mount-control.rst2
-rw-r--r--Documentation/filesystems/autofs.rst2
-rw-r--r--Documentation/filesystems/directory-locking.rst26
-rw-r--r--Documentation/filesystems/fsverity.rst192
-rw-r--r--Documentation/filesystems/locking.rst2
-rw-r--r--Documentation/filesystems/vfs.rst1
-rw-r--r--Documentation/leds/leds-class.rst81
-rw-r--r--Documentation/maintainer/configure-git.rst2
-rw-r--r--Documentation/mm/damon/design.rst337
-rw-r--r--Documentation/mm/damon/faq.rst23
-rw-r--r--Documentation/mm/damon/maintainer-profile.rst4
-rw-r--r--Documentation/mm/page_migration.rst7
-rw-r--r--Documentation/mm/page_tables.rst149
-rw-r--r--Documentation/mm/split_page_table_lock.rst17
-rw-r--r--Documentation/netlink/genetlink-c.yaml10
-rw-r--r--Documentation/netlink/genetlink-legacy.yaml23
-rw-r--r--Documentation/netlink/genetlink.yaml6
-rw-r--r--Documentation/netlink/specs/devlink.yaml8
-rw-r--r--Documentation/netlink/specs/ethtool.yaml120
-rw-r--r--Documentation/netlink/specs/ovs_datapath.yaml30
-rw-r--r--Documentation/netlink/specs/ovs_flow.yaml980
-rw-r--r--Documentation/netlink/specs/ovs_vport.yaml13
-rw-r--r--Documentation/networking/device_drivers/ethernet/amazon/ena.rst34
-rw-r--r--Documentation/networking/device_drivers/ethernet/intel/ice.rst18
-rw-r--r--Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst45
-rw-r--r--Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst10
-rw-r--r--Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst7
-rw-r--r--Documentation/networking/device_drivers/ethernet/mellanox/mlx5/switchdev.rst22
-rw-r--r--Documentation/networking/ip-sysctl.rst32
-rw-r--r--Documentation/networking/scaling.rst4
-rw-r--r--Documentation/process/2.Process.rst7
-rw-r--r--Documentation/process/changes.rst2
-rw-r--r--Documentation/process/handling-regressions.rst208
-rw-r--r--Documentation/process/maintainer-tip.rst3
-rw-r--r--Documentation/process/submitting-patches.rst25
-rw-r--r--Documentation/rust/quick-start.rst4
-rw-r--r--Documentation/scheduler/sched-deadline.rst5
-rw-r--r--Documentation/subsystem-apis.rst34
-rw-r--r--Documentation/trace/user_events.rst7
-rw-r--r--Documentation/translations/zh_CN/arch/arm/Booting (renamed from Documentation/translations/zh_CN/arm/Booting)4
-rw-r--r--Documentation/translations/zh_CN/arch/arm/kernel_user_helpers.txt (renamed from Documentation/translations/zh_CN/arm/kernel_user_helpers.txt)4
-rw-r--r--Documentation/translations/zh_CN/arch/arm64/amu.rst (renamed from Documentation/translations/zh_CN/arm64/amu.rst)4
-rw-r--r--Documentation/translations/zh_CN/arch/arm64/booting.txt (renamed from Documentation/translations/zh_CN/arm64/booting.txt)4
-rw-r--r--Documentation/translations/zh_CN/arch/arm64/elf_hwcaps.rst (renamed from Documentation/translations/zh_CN/arm64/elf_hwcaps.rst)10
-rw-r--r--Documentation/translations/zh_CN/arch/arm64/hugetlbpage.rst (renamed from Documentation/translations/zh_CN/arm64/hugetlbpage.rst)4
-rw-r--r--Documentation/translations/zh_CN/arch/arm64/index.rst (renamed from Documentation/translations/zh_CN/arm64/index.rst)4
-rw-r--r--Documentation/translations/zh_CN/arch/arm64/legacy_instructions.txt (renamed from Documentation/translations/zh_CN/arm64/legacy_instructions.txt)4
-rw-r--r--Documentation/translations/zh_CN/arch/arm64/memory.txt (renamed from Documentation/translations/zh_CN/arm64/memory.txt)4
-rw-r--r--Documentation/translations/zh_CN/arch/arm64/perf.rst (renamed from Documentation/translations/zh_CN/arm64/perf.rst)4
-rw-r--r--Documentation/translations/zh_CN/arch/arm64/silicon-errata.txt (renamed from Documentation/translations/zh_CN/arm64/silicon-errata.txt)4
-rw-r--r--Documentation/translations/zh_CN/arch/arm64/tagged-pointers.txt (renamed from Documentation/translations/zh_CN/arm64/tagged-pointers.txt)4
-rw-r--r--Documentation/translations/zh_CN/arch/index.rst2
-rw-r--r--Documentation/translations/zh_CN/mm/page_migration.rst2
-rw-r--r--Documentation/translations/zh_TW/arch/arm64/amu.rst (renamed from Documentation/translations/zh_TW/arm64/amu.rst)4
-rw-r--r--Documentation/translations/zh_TW/arch/arm64/booting.txt (renamed from Documentation/translations/zh_TW/arm64/booting.txt)4
-rw-r--r--Documentation/translations/zh_TW/arch/arm64/elf_hwcaps.rst (renamed from Documentation/translations/zh_TW/arm64/elf_hwcaps.rst)10
-rw-r--r--Documentation/translations/zh_TW/arch/arm64/hugetlbpage.rst (renamed from Documentation/translations/zh_TW/arm64/hugetlbpage.rst)4
-rw-r--r--Documentation/translations/zh_TW/arch/arm64/index.rst (renamed from Documentation/translations/zh_TW/arm64/index.rst)4
-rw-r--r--Documentation/translations/zh_TW/arch/arm64/legacy_instructions.txt (renamed from Documentation/translations/zh_TW/arm64/legacy_instructions.txt)4
-rw-r--r--Documentation/translations/zh_TW/arch/arm64/memory.txt (renamed from Documentation/translations/zh_TW/arm64/memory.txt)4
-rw-r--r--Documentation/translations/zh_TW/arch/arm64/perf.rst (renamed from Documentation/translations/zh_TW/arm64/perf.rst)4
-rw-r--r--Documentation/translations/zh_TW/arch/arm64/silicon-errata.txt (renamed from Documentation/translations/zh_TW/arm64/silicon-errata.txt)4
-rw-r--r--Documentation/translations/zh_TW/arch/arm64/tagged-pointers.txt (renamed from Documentation/translations/zh_TW/arm64/tagged-pointers.txt)4
-rw-r--r--Documentation/translations/zh_TW/index.rst2
-rw-r--r--Documentation/userspace-api/netlink/intro-specs.rst79
-rw-r--r--Documentation/virt/guest-halt-polling.rst2
-rw-r--r--Documentation/virt/kvm/api.rst2
-rw-r--r--Documentation/virt/kvm/halt-polling.rst10
-rw-r--r--Documentation/virt/kvm/locking.rst18
-rw-r--r--Documentation/virt/kvm/ppc-pv.rst8
-rw-r--r--Documentation/virt/kvm/vcpu-requests.rst6
-rw-r--r--Documentation/virt/paravirt_ops.rst16
-rw-r--r--MAINTAINERS116
-rw-r--r--Makefile8
-rw-r--r--arch/Kconfig49
-rw-r--r--arch/alpha/include/asm/atomic.h35
-rw-r--r--arch/alpha/include/asm/bugs.h20
-rw-r--r--arch/alpha/include/uapi/asm/socket.h3
-rw-r--r--arch/alpha/kernel/osf_sys.c2
-rw-r--r--arch/alpha/kernel/setup.c2
-rw-r--r--arch/alpha/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/arc/include/asm/atomic-spinlock.h9
-rw-r--r--arch/arc/include/asm/atomic.h24
-rw-r--r--arch/arc/include/asm/atomic64-arcv2.h19
-rw-r--r--arch/arm/Kconfig4
-rw-r--r--arch/arm/boot/compressed/atags_to_fdt.c1
-rw-r--r--arch/arm/boot/compressed/fdt_check_mem_start.c1
-rw-r--r--arch/arm/boot/compressed/misc.c6
-rw-r--r--arch/arm/boot/compressed/misc.h11
-rw-r--r--arch/arm/common/mcpm_entry.c2
-rw-r--r--arch/arm/common/mcpm_head.S2
-rw-r--r--arch/arm/common/vlock.S2
-rw-r--r--arch/arm/include/asm/arm_pmuv3.h5
-rw-r--r--arch/arm/include/asm/assembler.h17
-rw-r--r--arch/arm/include/asm/atomic.h15
-rw-r--r--arch/arm/include/asm/bugs.h4
-rw-r--r--arch/arm/include/asm/ftrace.h4
-rw-r--r--arch/arm/include/asm/irq.h1
-rw-r--r--arch/arm/include/asm/mach/arch.h1
-rw-r--r--arch/arm/include/asm/page.h22
-rw-r--r--arch/arm/include/asm/ptrace.h3
-rw-r--r--arch/arm/include/asm/setup.h9
-rw-r--r--arch/arm/include/asm/signal.h5
-rw-r--r--arch/arm/include/asm/smp.h2
-rw-r--r--arch/arm/include/asm/spectre.h4
-rw-r--r--arch/arm/include/asm/suspend.h1
-rw-r--r--arch/arm/include/asm/sync_bitops.h29
-rw-r--r--arch/arm/include/asm/syscalls.h51
-rw-r--r--arch/arm/include/asm/tcm.h11
-rw-r--r--arch/arm/include/asm/traps.h9
-rw-r--r--arch/arm/include/asm/unwind.h4
-rw-r--r--arch/arm/include/asm/vdso.h5
-rw-r--r--arch/arm/include/asm/vfp.h1
-rw-r--r--arch/arm/include/uapi/asm/setup.h2
-rw-r--r--arch/arm/kernel/atags_parse.c4
-rw-r--r--arch/arm/kernel/bugs.c3
-rw-r--r--arch/arm/kernel/entry-armv.S2
-rw-r--r--arch/arm/kernel/fiq.c1
-rw-r--r--arch/arm/kernel/head-inflate-data.c5
-rw-r--r--arch/arm/kernel/head.h7
-rw-r--r--arch/arm/kernel/module.c10
-rw-r--r--arch/arm/kernel/setup.c13
-rw-r--r--arch/arm/kernel/signal.c1
-rw-r--r--arch/arm/kernel/smp.c18
-rw-r--r--arch/arm/kernel/sys_arm.c1
-rw-r--r--arch/arm/kernel/sys_oabi-compat.c2
-rw-r--r--arch/arm/kernel/traps.c2
-rw-r--r--arch/arm/kernel/vdso.c2
-rw-r--r--arch/arm/lib/bitops.h14
-rw-r--r--arch/arm/lib/testchangebit.S4
-rw-r--r--arch/arm/lib/testclearbit.S4
-rw-r--r--arch/arm/lib/testsetbit.S4
-rw-r--r--arch/arm/lib/uaccess_with_memcpy.c3
-rw-r--r--arch/arm/mach-exynos/common.h2
-rw-r--r--arch/arm/mach-mxs/mach-mxs.c2
-rw-r--r--arch/arm/mach-omap1/board-ams-delta.c1
-rw-r--r--arch/arm/mach-omap1/board-nokia770.c1
-rw-r--r--arch/arm/mach-omap1/board-osk.c1
-rw-r--r--arch/arm/mach-omap1/board-palmte.c1
-rw-r--r--arch/arm/mach-omap1/board-sx1.c1
-rw-r--r--arch/arm/mach-omap1/irq.c3
-rw-r--r--arch/arm/mach-pxa/gumstix.c1
-rw-r--r--arch/arm/mach-pxa/pxa25x.c1
-rw-r--r--arch/arm/mach-pxa/pxa27x.c1
-rw-r--r--arch/arm/mach-pxa/spitz.c3
-rw-r--r--arch/arm/mach-sti/Kconfig2
-rw-r--r--arch/arm/mm/Kconfig4
-rw-r--r--arch/arm/mm/dma-mapping.c2
-rw-r--r--arch/arm/mm/fault-armv.c5
-rw-r--r--arch/arm/mm/fault.c3
-rw-r--r--arch/arm/mm/fault.h4
-rw-r--r--arch/arm/mm/flush.c1
-rw-r--r--arch/arm/mm/mmu.c2
-rw-r--r--arch/arm/mm/nommu.c1
-rw-r--r--arch/arm/mm/tcm.h17
-rw-r--r--arch/arm/probes/kprobes/checkers-common.c2
-rw-r--r--arch/arm/probes/kprobes/core.c2
-rw-r--r--arch/arm/probes/kprobes/opt-arm.c2
-rw-r--r--arch/arm/probes/kprobes/test-core.c2
-rw-r--r--arch/arm/probes/kprobes/test-core.h4
-rw-r--r--arch/arm/tools/mach-types2
-rw-r--r--arch/arm/tools/syscall.tbl1
-rw-r--r--arch/arm/vdso/vgettimeofday.c2
-rw-r--r--arch/arm/vfp/vfpmodule.c1
-rw-r--r--arch/arm64/Kconfig39
-rw-r--r--arch/arm64/boot/dts/qcom/sc7180-idp.dts5
-rw-r--r--arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi5
-rw-r--r--arch/arm64/boot/dts/qcom/sc7180.dtsi2
-rw-r--r--arch/arm64/boot/dts/qcom/sc7280-chrome-common.dtsi5
-rw-r--r--arch/arm64/boot/dts/qcom/sc7280.dtsi2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3308.dtsi1
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3328-rock64.dts14
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3328.dtsi1
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3566-soquartz-cm4.dts18
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3566-soquartz.dtsi29
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5c.dts2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dts1
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3568.dtsi14
-rw-r--r--arch/arm64/boot/dts/rockchip/rk356x.dtsi7
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588s.dtsi9
-rw-r--r--arch/arm64/hyperv/mshyperv.c2
-rw-r--r--arch/arm64/include/asm/alternative-macros.h54
-rw-r--r--arch/arm64/include/asm/alternative.h7
-rw-r--r--arch/arm64/include/asm/arch_timer.h8
-rw-r--r--arch/arm64/include/asm/archrandom.h2
-rw-r--r--arch/arm64/include/asm/asm-uaccess.h2
-rw-r--r--arch/arm64/include/asm/atomic.h28
-rw-r--r--arch/arm64/include/asm/atomic_ll_sc.h56
-rw-r--r--arch/arm64/include/asm/atomic_lse.h39
-rw-r--r--arch/arm64/include/asm/cache.h3
-rw-r--r--arch/arm64/include/asm/cmpxchg.h48
-rw-r--r--arch/arm64/include/asm/compat.h2
-rw-r--r--arch/arm64/include/asm/cpu.h1
-rw-r--r--arch/arm64/include/asm/cpufeature.h24
-rw-r--r--arch/arm64/include/asm/efi.h4
-rw-r--r--arch/arm64/include/asm/el2_setup.h31
-rw-r--r--arch/arm64/include/asm/esr.h30
-rw-r--r--arch/arm64/include/asm/exception.h6
-rw-r--r--arch/arm64/include/asm/hw_breakpoint.h8
-rw-r--r--arch/arm64/include/asm/hwcap.h1
-rw-r--r--arch/arm64/include/asm/image.h2
-rw-r--r--arch/arm64/include/asm/io.h12
-rw-r--r--arch/arm64/include/asm/irqflags.h2
-rw-r--r--arch/arm64/include/asm/kernel-pgtable.h8
-rw-r--r--arch/arm64/include/asm/kvm_arm.h4
-rw-r--r--arch/arm64/include/asm/kvm_asm.h18
-rw-r--r--arch/arm64/include/asm/kvm_host.h14
-rw-r--r--arch/arm64/include/asm/lse.h2
-rw-r--r--arch/arm64/include/asm/memory.h16
-rw-r--r--arch/arm64/include/asm/mmu_context.h10
-rw-r--r--arch/arm64/include/asm/module.h8
-rw-r--r--arch/arm64/include/asm/module.lds.h2
-rw-r--r--arch/arm64/include/asm/percpu.h30
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h8
-rw-r--r--arch/arm64/include/asm/pgtable-prot.h122
-rw-r--r--arch/arm64/include/asm/scs.h1
-rw-r--r--arch/arm64/include/asm/smp.h2
-rw-r--r--arch/arm64/include/asm/spectre.h16
-rw-r--r--arch/arm64/include/asm/syscall_wrapper.h4
-rw-r--r--arch/arm64/include/asm/sysreg.h85
-rw-r--r--arch/arm64/include/asm/thread_info.h4
-rw-r--r--arch/arm64/include/asm/traps.h2
-rw-r--r--arch/arm64/include/asm/uaccess.h2
-rw-r--r--arch/arm64/include/asm/unistd.h2
-rw-r--r--arch/arm64/include/asm/unistd32.h2
-rw-r--r--arch/arm64/include/uapi/asm/hwcap.h1
-rw-r--r--arch/arm64/include/uapi/asm/sigcontext.h2
-rw-r--r--arch/arm64/kernel/Makefile4
-rw-r--r--arch/arm64/kernel/alternative.c27
-rw-r--r--arch/arm64/kernel/cpufeature.c106
-rw-r--r--arch/arm64/kernel/cpuidle.c2
-rw-r--r--arch/arm64/kernel/cpuinfo.c2
-rw-r--r--arch/arm64/kernel/entry-common.c17
-rw-r--r--arch/arm64/kernel/entry.S57
-rw-r--r--arch/arm64/kernel/fpsimd.c1
-rw-r--r--arch/arm64/kernel/ftrace.c8
-rw-r--r--arch/arm64/kernel/head.S8
-rw-r--r--arch/arm64/kernel/hibernate.c1
-rw-r--r--arch/arm64/kernel/hw_breakpoint.c8
-rw-r--r--arch/arm64/kernel/hyp-stub.S18
-rw-r--r--arch/arm64/kernel/idreg-override.c2
-rw-r--r--arch/arm64/kernel/kaslr.c83
-rw-r--r--arch/arm64/kernel/kexec_image.c2
-rw-r--r--arch/arm64/kernel/kuser32.S2
-rw-r--r--arch/arm64/kernel/module-plts.c1
-rw-r--r--arch/arm64/kernel/module.c159
-rw-r--r--arch/arm64/kernel/mte.c17
-rw-r--r--arch/arm64/kernel/setup.c2
-rw-r--r--arch/arm64/kernel/signal.c3
-rw-r--r--arch/arm64/kernel/smp.c14
-rw-r--r--arch/arm64/kernel/syscall.c2
-rw-r--r--arch/arm64/kernel/traps.c63
-rw-r--r--arch/arm64/kernel/watchdog_hld.c36
-rw-r--r--arch/arm64/kvm/debug.c2
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h21
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h12
-rw-r--r--arch/arm64/kvm/hyp/nvhe/debug-sr.c2
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c14
-rw-r--r--arch/arm64/kvm/pmu-emul.c20
-rw-r--r--arch/arm64/kvm/pmu.c27
-rw-r--r--arch/arm64/kvm/sys_regs.c16
-rw-r--r--arch/arm64/kvm/vgic/vgic-init.c11
-rw-r--r--arch/arm64/lib/xor-neon.c8
-rw-r--r--arch/arm64/mm/context.c2
-rw-r--r--arch/arm64/mm/fault.c25
-rw-r--r--arch/arm64/mm/flush.c1
-rw-r--r--arch/arm64/mm/hugetlbpage.c11
-rw-r--r--arch/arm64/mm/init.c51
-rw-r--r--arch/arm64/mm/kasan_init.c17
-rw-r--r--arch/arm64/mm/mmu.c13
-rw-r--r--arch/arm64/mm/proc.S19
-rw-r--r--arch/arm64/net/bpf_jit_comp.c55
-rw-r--r--arch/arm64/tools/cpucaps4
-rwxr-xr-xarch/arm64/tools/gen-cpucaps.awk4
-rw-r--r--arch/arm64/tools/sysreg297
-rw-r--r--arch/csky/Kconfig1
-rw-r--r--arch/csky/include/asm/atomic.h35
-rw-r--r--arch/csky/include/asm/smp.h2
-rw-r--r--arch/csky/kernel/smp.c8
-rw-r--r--arch/hexagon/include/asm/atomic.h69
-rw-r--r--arch/hexagon/kernel/setup.c6
-rw-r--r--arch/ia64/Kconfig1
-rw-r--r--arch/ia64/include/asm/atomic.h7
-rw-r--r--arch/ia64/include/asm/bugs.h20
-rw-r--r--arch/ia64/kernel/setup.c5
-rw-r--r--arch/ia64/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/ia64/mm/hugetlbpage.c4
-rw-r--r--arch/loongarch/Kconfig1
-rw-r--r--arch/loongarch/include/asm/atomic.h56
-rw-r--r--arch/loongarch/include/asm/bugs.h15
-rw-r--r--arch/loongarch/include/asm/loongarch.h2
-rw-r--r--arch/loongarch/kernel/setup.c4
-rw-r--r--arch/loongarch/kernel/time.c6
-rw-r--r--arch/m68k/Kconfig1
-rw-r--r--arch/m68k/configs/amiga_defconfig2
-rw-r--r--arch/m68k/configs/apollo_defconfig2
-rw-r--r--arch/m68k/configs/atari_defconfig2
-rw-r--r--arch/m68k/configs/bvme6000_defconfig2
-rw-r--r--arch/m68k/configs/hp300_defconfig2
-rw-r--r--arch/m68k/configs/mac_defconfig2
-rw-r--r--arch/m68k/configs/multi_defconfig2
-rw-r--r--arch/m68k/configs/mvme147_defconfig2
-rw-r--r--arch/m68k/configs/mvme16x_defconfig2
-rw-r--r--arch/m68k/configs/q40_defconfig2
-rw-r--r--arch/m68k/configs/sun3_defconfig1
-rw-r--r--arch/m68k/configs/sun3x_defconfig2
-rw-r--r--arch/m68k/configs/virt_defconfig2
-rw-r--r--arch/m68k/include/asm/atomic.h18
-rw-r--r--arch/m68k/include/asm/bugs.h21
-rw-r--r--arch/m68k/include/asm/mmu_context.h6
-rw-r--r--arch/m68k/kernel/setup_mm.c3
-rw-r--r--arch/m68k/kernel/sys_m68k.c2
-rw-r--r--arch/m68k/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/m68k/mm/mcfmmu.c52
-rw-r--r--arch/microblaze/include/asm/cache.h5
-rw-r--r--arch/microblaze/include/asm/page.h5
-rw-r--r--arch/microblaze/include/asm/setup.h2
-rw-r--r--arch/microblaze/kernel/prom.c2
-rw-r--r--arch/microblaze/kernel/signal.c5
-rw-r--r--arch/microblaze/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/mips/Kconfig2
-rw-r--r--arch/mips/bmips/setup.c5
-rw-r--r--arch/mips/cavium-octeon/smp.c1
-rw-r--r--arch/mips/include/asm/atomic.h11
-rw-r--r--arch/mips/include/asm/bugs.h17
-rw-r--r--arch/mips/include/asm/fw/cfe/cfe_api.h3
-rw-r--r--arch/mips/include/asm/irq.h1
-rw-r--r--arch/mips/include/asm/mach-loongson32/loongson1.h1
-rw-r--r--arch/mips/include/asm/mach-loongson32/regs-pwm.h25
-rw-r--r--arch/mips/include/asm/smp-ops.h1
-rw-r--r--arch/mips/include/uapi/asm/socket.h3
-rw-r--r--arch/mips/kernel/setup.c13
-rw-r--r--arch/mips/kernel/smp-bmips.c1
-rw-r--r--arch/mips/kernel/smp-cps.c14
-rw-r--r--arch/mips/kernel/smp.c8
-rw-r--r--arch/mips/kernel/syscalls/syscall_n32.tbl1
-rw-r--r--arch/mips/kernel/syscalls/syscall_n64.tbl1
-rw-r--r--arch/mips/kernel/syscalls/syscall_o32.tbl1
-rw-r--r--arch/mips/loongson32/Kconfig37
-rw-r--r--arch/mips/loongson32/common/time.c210
-rw-r--r--arch/mips/loongson64/smp.c1
-rw-r--r--arch/mips/mm/tlb-r4k.c12
-rw-r--r--arch/nios2/kernel/cpuinfo.c2
-rw-r--r--arch/nios2/kernel/setup.c6
-rw-r--r--arch/openrisc/include/asm/atomic.h3
-rw-r--r--arch/parisc/Kconfig1
-rw-r--r--arch/parisc/include/asm/atomic.h27
-rw-r--r--arch/parisc/include/asm/bugs.h20
-rw-r--r--arch/parisc/include/asm/pgtable.h3
-rw-r--r--arch/parisc/include/uapi/asm/socket.h3
-rw-r--r--arch/parisc/kernel/cache.c26
-rw-r--r--arch/parisc/kernel/pci-dma.c2
-rw-r--r--arch/parisc/kernel/process.c4
-rw-r--r--arch/parisc/kernel/smp.c8
-rw-r--r--arch/parisc/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/parisc/mm/hugetlbpage.c4
-rw-r--r--arch/powerpc/Kconfig5
-rw-r--r--arch/powerpc/include/asm/atomic.h24
-rw-r--r--arch/powerpc/include/asm/bugs.h15
-rw-r--r--arch/powerpc/include/asm/cache.h4
-rw-r--r--arch/powerpc/include/asm/irq.h7
-rw-r--r--arch/powerpc/include/asm/nmi.h12
-rw-r--r--arch/powerpc/include/asm/page_32.h4
-rw-r--r--arch/powerpc/include/asm/pgtable.h3
-rw-r--r--arch/powerpc/kernel/smp.c13
-rw-r--r--arch/powerpc/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/powerpc/kernel/tau_6xx.c2
-rw-r--r--arch/powerpc/kernel/watchdog.c12
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_radix.c2
-rw-r--r--arch/powerpc/mm/book3s64/hash_tlb.c4
-rw-r--r--arch/powerpc/mm/book3s64/iommu_api.c2
-rw-r--r--arch/powerpc/mm/book3s64/radix_tlb.c10
-rw-r--r--arch/powerpc/mm/book3s64/subpage_prot.c2
-rw-r--r--arch/powerpc/mm/hugetlbpage.c2
-rw-r--r--arch/powerpc/platforms/powermac/setup.c3
-rw-r--r--arch/powerpc/platforms/pseries/dlpar.c3
-rw-r--r--arch/powerpc/platforms/pseries/mobility.c4
-rw-r--r--arch/powerpc/xmon/xmon.c5
-rw-r--r--arch/riscv/Kconfig1
-rw-r--r--arch/riscv/include/asm/atomic.h72
-rw-r--r--arch/riscv/include/asm/irq.h2
-rw-r--r--arch/riscv/include/asm/smp.h2
-rw-r--r--arch/riscv/include/asm/timex.h2
-rw-r--r--arch/riscv/kernel/cpu-hotplug.c14
-rw-r--r--arch/riscv/mm/hugetlbpage.c4
-rw-r--r--arch/riscv/purgatory/Makefile2
-rw-r--r--arch/s390/Kconfig1
-rw-r--r--arch/s390/boot/vmem.c15
-rw-r--r--arch/s390/configs/debug_defconfig1
-rw-r--r--arch/s390/configs/defconfig1
-rw-r--r--arch/s390/crypto/paes_s390.c9
-rw-r--r--arch/s390/include/asm/asm-prototypes.h4
-rw-r--r--arch/s390/include/asm/cmpxchg.h32
-rw-r--r--arch/s390/include/asm/cpacf.h7
-rw-r--r--arch/s390/include/asm/cpu_mf.h2
-rw-r--r--arch/s390/include/asm/os_info.h7
-rw-r--r--arch/s390/include/asm/percpu.h34
-rw-r--r--arch/s390/include/asm/pgtable.h3
-rw-r--r--arch/s390/include/asm/physmem_info.h5
-rw-r--r--arch/s390/include/asm/pkey.h4
-rw-r--r--arch/s390/include/asm/thread_info.h3
-rw-r--r--arch/s390/include/asm/timex.h13
-rw-r--r--arch/s390/include/uapi/asm/pkey.h15
-rw-r--r--arch/s390/kernel/crash_dump.c2
-rw-r--r--arch/s390/kernel/entry.h2
-rw-r--r--arch/s390/kernel/ipl.c16
-rw-r--r--arch/s390/kernel/module.c3
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c452
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c16
-rw-r--r--arch/s390/kernel/perf_pai_crypto.c19
-rw-r--r--arch/s390/kernel/perf_pai_ext.c23
-rw-r--r--arch/s390/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/s390/kernel/time.c5
-rw-r--r--arch/s390/kernel/uv.c2
-rw-r--r--arch/s390/kvm/interrupt.c2
-rw-r--r--arch/s390/lib/Makefile2
-rw-r--r--arch/s390/lib/tishift.S63
-rw-r--r--arch/s390/mm/gmap.c31
-rw-r--r--arch/s390/mm/pageattr.c1
-rw-r--r--arch/s390/mm/pgtable.c12
-rw-r--r--arch/s390/mm/vmem.c14
-rw-r--r--arch/s390/purgatory/Makefile2
-rw-r--r--arch/sh/Kconfig1
-rw-r--r--arch/sh/drivers/dma/dma-api.c2
-rw-r--r--arch/sh/include/asm/atomic-grb.h9
-rw-r--r--arch/sh/include/asm/atomic-irq.h9
-rw-r--r--arch/sh/include/asm/atomic-llsc.h9
-rw-r--r--arch/sh/include/asm/atomic.h3
-rw-r--r--arch/sh/include/asm/bugs.h74
-rw-r--r--arch/sh/include/asm/cache.h6
-rw-r--r--arch/sh/include/asm/irq.h1
-rw-r--r--arch/sh/include/asm/page.h6
-rw-r--r--arch/sh/include/asm/processor.h2
-rw-r--r--arch/sh/include/asm/rtc.h2
-rw-r--r--arch/sh/include/asm/thread_info.h3
-rw-r--r--arch/sh/kernel/idle.c1
-rw-r--r--arch/sh/kernel/setup.c59
-rw-r--r--arch/sh/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/sh/mm/hugetlbpage.c4
-rw-r--r--arch/sparc/Kconfig3
-rw-r--r--arch/sparc/Kconfig.debug14
-rw-r--r--arch/sparc/include/asm/atomic_32.h18
-rw-r--r--arch/sparc/include/asm/atomic_64.h29
-rw-r--r--arch/sparc/include/asm/bugs.h18
-rw-r--r--arch/sparc/include/asm/irq_32.h1
-rw-r--r--arch/sparc/include/asm/irq_64.h1
-rw-r--r--arch/sparc/include/asm/nmi.h1
-rw-r--r--arch/sparc/include/asm/timer_64.h1
-rw-r--r--arch/sparc/include/uapi/asm/socket.h3
-rw-r--r--arch/sparc/kernel/ioport.c2
-rw-r--r--arch/sparc/kernel/kernel.h1
-rw-r--r--arch/sparc/kernel/nmi.c15
-rw-r--r--arch/sparc/kernel/setup_32.c9
-rw-r--r--arch/sparc/kernel/setup_64.c2
-rw-r--r--arch/sparc/kernel/signal32.c2
-rw-r--r--arch/sparc/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/sparc/mm/fault_64.c3
-rw-r--r--arch/sparc/mm/hugetlbpage.c4
-rw-r--r--arch/sparc/mm/io-unit.c2
-rw-r--r--arch/sparc/mm/iommu.c2
-rw-r--r--arch/sparc/mm/tlb.c2
-rw-r--r--arch/sparc/prom/bootstr_32.c2
-rw-r--r--arch/um/Kconfig2
-rw-r--r--arch/um/Makefile2
-rw-r--r--arch/um/drivers/ubd_kern.c20
-rw-r--r--arch/um/include/asm/bugs.h7
-rw-r--r--arch/um/include/shared/user.h1
-rw-r--r--arch/um/kernel/um_arch.c3
-rw-r--r--arch/um/os-Linux/drivers/tuntap_user.c2
-rw-r--r--arch/x86/Kconfig62
-rw-r--r--arch/x86/Kconfig.cpu2
-rw-r--r--arch/x86/Makefile12
-rw-r--r--arch/x86/Makefile.postlink47
-rw-r--r--arch/x86/boot/Makefile2
-rw-r--r--arch/x86/boot/compressed/Makefile11
-rw-r--r--arch/x86/boot/compressed/efi.h10
-rw-r--r--arch/x86/boot/compressed/error.c19
-rw-r--r--arch/x86/boot/compressed/error.h1
-rw-r--r--arch/x86/boot/compressed/kaslr.c40
-rw-r--r--arch/x86/boot/compressed/mem.c86
-rw-r--r--arch/x86/boot/compressed/misc.c6
-rw-r--r--arch/x86/boot/compressed/misc.h10
-rw-r--r--arch/x86/boot/compressed/sev.c54
-rw-r--r--arch/x86/boot/compressed/sev.h23
-rw-r--r--arch/x86/boot/compressed/tdx-shared.c2
-rw-r--r--arch/x86/boot/compressed/tdx.c4
-rw-r--r--arch/x86/boot/cpu.c13
-rw-r--r--arch/x86/coco/core.c10
-rw-r--r--arch/x86/coco/tdx/Makefile2
-rw-r--r--arch/x86/coco/tdx/tdx-shared.c71
-rw-r--r--arch/x86/coco/tdx/tdx.c171
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl1
-rw-r--r--arch/x86/entry/syscalls/syscall_64.tbl1
-rw-r--r--arch/x86/entry/thunk_64.S20
-rw-r--r--arch/x86/entry/vdso/vgetcpu.c1
-rw-r--r--arch/x86/events/amd/core.c2
-rw-r--r--arch/x86/events/amd/ibs.c53
-rw-r--r--arch/x86/events/intel/core.c33
-rw-r--r--arch/x86/hyperv/hv_init.c2
-rw-r--r--arch/x86/hyperv/hv_vtl.c2
-rw-r--r--arch/x86/hyperv/ivm.c6
-rw-r--r--arch/x86/include/asm/Kbuild1
-rw-r--r--arch/x86/include/asm/alternative.h5
-rw-r--r--arch/x86/include/asm/apic.h5
-rw-r--r--arch/x86/include/asm/apicdef.h11
-rw-r--r--arch/x86/include/asm/atomic.h87
-rw-r--r--arch/x86/include/asm/atomic64_32.h76
-rw-r--r--arch/x86/include/asm/atomic64_64.h81
-rw-r--r--arch/x86/include/asm/bugs.h2
-rw-r--r--arch/x86/include/asm/cmpxchg.h25
-rw-r--r--arch/x86/include/asm/cmpxchg_32.h2
-rw-r--r--arch/x86/include/asm/cmpxchg_64.h67
-rw-r--r--arch/x86/include/asm/coco.h19
-rw-r--r--arch/x86/include/asm/cpu.h7
-rw-r--r--arch/x86/include/asm/cpufeature.h5
-rw-r--r--arch/x86/include/asm/cpumask.h5
-rw-r--r--arch/x86/include/asm/doublefault.h4
-rw-r--r--arch/x86/include/asm/efi.h2
-rw-r--r--arch/x86/include/asm/fpu/api.h2
-rw-r--r--arch/x86/include/asm/ftrace.h3
-rw-r--r--arch/x86/include/asm/irq.h2
-rw-r--r--arch/x86/include/asm/mce.h3
-rw-r--r--arch/x86/include/asm/mem_encrypt.h9
-rw-r--r--arch/x86/include/asm/mshyperv.h5
-rw-r--r--arch/x86/include/asm/mtrr.h51
-rw-r--r--arch/x86/include/asm/nops.h16
-rw-r--r--arch/x86/include/asm/nospec-branch.h6
-rw-r--r--arch/x86/include/asm/orc_header.h19
-rw-r--r--arch/x86/include/asm/percpu.h102
-rw-r--r--arch/x86/include/asm/perf_event.h12
-rw-r--r--arch/x86/include/asm/pgtable.h1
-rw-r--r--arch/x86/include/asm/pgtable_64.h4
-rw-r--r--arch/x86/include/asm/pgtable_types.h3
-rw-r--r--arch/x86/include/asm/processor.h1
-rw-r--r--arch/x86/include/asm/realmode.h3
-rw-r--r--arch/x86/include/asm/sev-common.h9
-rw-r--r--arch/x86/include/asm/sev.h34
-rw-r--r--arch/x86/include/asm/shared/tdx.h64
-rw-r--r--arch/x86/include/asm/sigframe.h2
-rw-r--r--arch/x86/include/asm/smp.h28
-rw-r--r--arch/x86/include/asm/syscall.h6
-rw-r--r--arch/x86/include/asm/tdx.h21
-rw-r--r--arch/x86/include/asm/thread_info.h3
-rw-r--r--arch/x86/include/asm/time.h1
-rw-r--r--arch/x86/include/asm/tlbflush.h11
-rw-r--r--arch/x86/include/asm/topology.h22
-rw-r--r--arch/x86/include/asm/tsc.h3
-rw-r--r--arch/x86/include/asm/unaccepted_memory.h27
-rw-r--r--arch/x86/include/asm/unwind_hints.h9
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h32
-rw-r--r--arch/x86/include/asm/uv/uv_mmrs.h18
-rw-r--r--arch/x86/include/asm/vdso/gettimeofday.h41
-rw-r--r--arch/x86/include/asm/x86_init.h5
-rw-r--r--arch/x86/include/uapi/asm/mtrr.h14
-rw-r--r--arch/x86/kernel/acpi/sleep.c9
-rw-r--r--arch/x86/kernel/acpi/sleep.h1
-rw-r--r--arch/x86/kernel/alternative.c474
-rw-r--r--arch/x86/kernel/amd_nb.c50
-rw-r--r--arch/x86/kernel/apic/apic.c40
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c5
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c318
-rw-r--r--arch/x86/kernel/callthunks.c8
-rw-r--r--arch/x86/kernel/cpu/Makefile4
-rw-r--r--arch/x86/kernel/cpu/bugs.c51
-rw-r--r--arch/x86/kernel/cpu/cacheinfo.c21
-rw-r--r--arch/x86/kernel/cpu/common.c132
-rw-r--r--arch/x86/kernel/cpu/cpu.h1
-rw-r--r--arch/x86/kernel/cpu/mce/amd.c6
-rw-r--r--arch/x86/kernel/cpu/mce/core.c18
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c13
-rw-r--r--arch/x86/kernel/cpu/mtrr/Makefile2
-rw-r--r--arch/x86/kernel/cpu/mtrr/amd.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/centaur.c11
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c87
-rw-r--r--arch/x86/kernel/cpu/mtrr/cyrix.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c675
-rw-r--r--arch/x86/kernel/cpu/mtrr/legacy.c90
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.c206
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.h31
-rw-r--r--arch/x86/kernel/cpu/resctrl/rdtgroup.c171
-rw-r--r--arch/x86/kernel/cpu/sgx/encl.c4
-rw-r--r--arch/x86/kernel/cpu/sgx/ioctl.c2
-rw-r--r--arch/x86/kernel/doublefault_32.c1
-rw-r--r--arch/x86/kernel/fpu/init.c8
-rw-r--r--arch/x86/kernel/ftrace.c3
-rw-r--r--arch/x86/kernel/head32.c1
-rw-r--r--arch/x86/kernel/head_32.S14
-rw-r--r--arch/x86/kernel/head_64.S85
-rw-r--r--arch/x86/kernel/irq.c7
-rw-r--r--arch/x86/kernel/itmt.c23
-rw-r--r--arch/x86/kernel/kvmclock.c4
-rw-r--r--arch/x86/kernel/ldt.c6
-rw-r--r--arch/x86/kernel/nmi.c2
-rw-r--r--arch/x86/kernel/platform-quirks.c1
-rw-r--r--arch/x86/kernel/process.c28
-rw-r--r--arch/x86/kernel/pvclock.c4
-rw-r--r--arch/x86/kernel/setup.c4
-rw-r--r--arch/x86/kernel/sev-shared.c103
-rw-r--r--arch/x86/kernel/sev.c263
-rw-r--r--arch/x86/kernel/signal.c4
-rw-r--r--arch/x86/kernel/smp.c107
-rw-r--r--arch/x86/kernel/smpboot.c761
-rw-r--r--arch/x86/kernel/topology.c98
-rw-r--r--arch/x86/kernel/tsc.c58
-rw-r--r--arch/x86/kernel/tsc_sync.c36
-rw-r--r--arch/x86/kernel/unwind_orc.c78
-rw-r--r--arch/x86/kernel/vmlinux.lds.S4
-rw-r--r--arch/x86/kernel/x86_init.c5
-rw-r--r--arch/x86/kvm/x86.c9
-rw-r--r--arch/x86/lib/Makefile3
-rw-r--r--arch/x86/lib/cmpxchg16b_emu.S43
-rw-r--r--arch/x86/lib/cmpxchg8b_emu.S67
-rw-r--r--arch/x86/lib/csum-partial_64.c101
-rw-r--r--arch/x86/lib/getuser.S32
-rw-r--r--arch/x86/lib/memmove_64.S13
-rw-r--r--arch/x86/lib/msr.c32
-rw-r--r--arch/x86/lib/putuser.S24
-rw-r--r--arch/x86/lib/retpoline.S2
-rw-r--r--arch/x86/lib/usercopy_64.c1
-rw-r--r--arch/x86/math-emu/fpu_entry.c1
-rw-r--r--arch/x86/mm/highmem_32.c1
-rw-r--r--arch/x86/mm/init_32.c17
-rw-r--r--arch/x86/mm/kaslr.c8
-rw-r--r--arch/x86/mm/mem_encrypt_amd.c19
-rw-r--r--arch/x86/mm/mem_encrypt_identity.c4
-rw-r--r--arch/x86/mm/pat/set_memory.c6
-rw-r--r--arch/x86/mm/pgtable.c24
-rw-r--r--arch/x86/net/bpf_jit_comp.c2
-rw-r--r--arch/x86/pci/ce4100.c4
-rw-r--r--arch/x86/platform/efi/efi.c3
-rw-r--r--arch/x86/platform/olpc/olpc_dt.c2
-rw-r--r--arch/x86/power/cpu.c37
-rw-r--r--arch/x86/purgatory/Makefile2
-rw-r--r--arch/x86/realmode/init.c3
-rw-r--r--arch/x86/realmode/rm/trampoline_64.S27
-rw-r--r--arch/x86/video/fbdev.c1
-rw-r--r--arch/x86/xen/efi.c2
-rw-r--r--arch/x86/xen/enlighten_hvm.c11
-rw-r--r--arch/x86/xen/enlighten_pv.c54
-rw-r--r--arch/x86/xen/mmu_pv.c16
-rw-r--r--arch/x86/xen/setup.c28
-rw-r--r--arch/x86/xen/smp.h4
-rw-r--r--arch/x86/xen/smp_hvm.c16
-rw-r--r--arch/x86/xen/smp_pv.c57
-rw-r--r--arch/x86/xen/time.c3
-rw-r--r--arch/x86/xen/xen-ops.h5
-rw-r--r--arch/xtensa/Kconfig14
-rw-r--r--arch/xtensa/Kconfig.debug8
-rw-r--r--arch/xtensa/boot/boot-redboot/Makefile9
-rw-r--r--arch/xtensa/include/asm/asm-prototypes.h29
-rw-r--r--arch/xtensa/include/asm/asmmacro.h1
-rw-r--r--arch/xtensa/include/asm/atomic.h12
-rw-r--r--arch/xtensa/include/asm/bugs.h18
-rw-r--r--arch/xtensa/include/asm/core.h8
-rw-r--r--arch/xtensa/include/asm/ftrace.h9
-rw-r--r--arch/xtensa/include/asm/platform.h20
-rw-r--r--arch/xtensa/include/asm/string.h3
-rw-r--r--arch/xtensa/include/asm/traps.h7
-rw-r--r--arch/xtensa/kernel/align.S256
-rw-r--r--arch/xtensa/kernel/mcount.S1
-rw-r--r--arch/xtensa/kernel/platform.c29
-rw-r--r--arch/xtensa/kernel/setup.c41
-rw-r--r--arch/xtensa/kernel/stacktrace.c4
-rw-r--r--arch/xtensa/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/xtensa/kernel/time.c4
-rw-r--r--arch/xtensa/kernel/traps.c102
-rw-r--r--arch/xtensa/kernel/xtensa_ksyms.c95
-rw-r--r--arch/xtensa/lib/Makefile3
-rw-r--r--arch/xtensa/lib/ashldi3.S1
-rw-r--r--arch/xtensa/lib/ashrdi3.S1
-rw-r--r--arch/xtensa/lib/bswapdi2.S1
-rw-r--r--arch/xtensa/lib/bswapsi2.S1
-rw-r--r--arch/xtensa/lib/checksum.S2
-rw-r--r--arch/xtensa/lib/divsi3.S1
-rw-r--r--arch/xtensa/lib/lshrdi3.S1
-rw-r--r--arch/xtensa/lib/memcopy.S19
-rw-r--r--arch/xtensa/lib/memset.S2
-rw-r--r--arch/xtensa/lib/modsi3.S1
-rw-r--r--arch/xtensa/lib/mulsi3.S1
-rw-r--r--arch/xtensa/lib/strncpy_user.S1
-rw-r--r--arch/xtensa/lib/strnlen_user.S1
-rw-r--r--arch/xtensa/lib/udivsi3.S1
-rw-r--r--arch/xtensa/lib/umodsi3.S1
-rw-r--r--arch/xtensa/lib/umulsidi3.S1
-rw-r--r--arch/xtensa/lib/usercopy.S1
-rw-r--r--arch/xtensa/mm/kasan_init.c2
-rw-r--r--arch/xtensa/mm/misc.S5
-rw-r--r--arch/xtensa/mm/tlb.c5
-rw-r--r--arch/xtensa/platforms/iss/setup.c24
-rw-r--r--arch/xtensa/platforms/iss/simdisk.c6
-rw-r--r--arch/xtensa/platforms/xt2000/setup.c48
-rw-r--r--arch/xtensa/platforms/xtfpga/setup.c34
-rw-r--r--block/Makefile2
-rw-r--r--block/bdev.c252
-rw-r--r--block/bfq-iosched.c9
-rw-r--r--block/bio.c37
-rw-r--r--block/blk-cgroup-fc-appid.c2
-rw-r--r--block/blk-cgroup.c19
-rw-r--r--block/blk-core.c1
-rw-r--r--block/blk-flush.c110
-rw-r--r--block/blk-ioc.c36
-rw-r--r--block/blk-iocost.c7
-rw-r--r--block/blk-ioprio.c23
-rw-r--r--block/blk-map.c22
-rw-r--r--block/blk-mq-debugfs.c10
-rw-r--r--block/blk-mq-sched.h8
-rw-r--r--block/blk-mq-tag.c15
-rw-r--r--block/blk-mq.c141
-rw-r--r--block/blk-mq.h14
-rw-r--r--block/blk-rq-qos.c20
-rw-r--r--block/blk-wbt.c2
-rw-r--r--block/blk-zoned.c20
-rw-r--r--block/blk.h40
-rw-r--r--block/bsg-lib.c2
-rw-r--r--block/bsg.c26
-rw-r--r--block/disk-events.c19
-rw-r--r--block/early-lookup.c316
-rw-r--r--block/elevator.c2
-rw-r--r--block/fops.c88
-rw-r--r--block/genhd.c187
-rw-r--r--block/ioctl.c107
-rw-r--r--block/mq-deadline.c125
-rw-r--r--block/partitions/amiga.c102
-rw-r--r--block/partitions/core.c50
-rw-r--r--crypto/af_alg.c193
-rw-r--r--crypto/algif_aead.c56
-rw-r--r--crypto/algif_hash.c188
-rw-r--r--crypto/algif_rng.c2
-rw-r--r--crypto/algif_skcipher.c24
-rw-r--r--drivers/accel/qaic/qaic_data.c4
-rw-r--r--drivers/acpi/acpi_ffh.c2
-rw-r--r--drivers/acpi/acpi_lpss.c10
-rw-r--r--drivers/acpi/acpi_pad.c1
-rw-r--r--drivers/acpi/acpica/achware.h2
-rw-r--r--drivers/acpi/apei/bert.c2
-rw-r--r--drivers/acpi/apei/ghes.c4
-rw-r--r--drivers/acpi/arm64/Makefile2
-rw-r--r--drivers/acpi/arm64/agdi.c2
-rw-r--r--drivers/acpi/arm64/apmt.c12
-rw-r--r--drivers/acpi/arm64/init.c13
-rw-r--r--drivers/acpi/arm64/init.h6
-rw-r--r--drivers/acpi/arm64/iort.c1
-rw-r--r--drivers/acpi/bus.c60
-rw-r--r--drivers/acpi/button.c164
-rw-r--r--drivers/acpi/ec.c31
-rw-r--r--drivers/acpi/nfit/nfit.h2
-rw-r--r--drivers/acpi/processor_idle.c4
-rw-r--r--drivers/acpi/resource.c60
-rw-r--r--drivers/acpi/scan.c81
-rw-r--r--drivers/acpi/sleep.c18
-rw-r--r--drivers/acpi/thermal.c287
-rw-r--r--drivers/acpi/tiny-power-button.c49
-rw-r--r--drivers/acpi/video_detect.c45
-rw-r--r--drivers/acpi/x86/s2idle.c66
-rw-r--r--drivers/acpi/x86/utils.c26
-rw-r--r--drivers/auxdisplay/ht16k33.c2
-rw-r--r--drivers/auxdisplay/lcd2s.c2
-rw-r--r--drivers/base/dd.c6
-rw-r--r--drivers/base/devres.c6
-rw-r--r--drivers/base/node.c7
-rw-r--r--drivers/base/power/domain.c15
-rw-r--r--drivers/base/power/wakeup.c5
-rw-r--r--drivers/base/regmap/Makefile2
-rw-r--r--drivers/base/regmap/internal.h15
-rw-r--r--drivers/base/regmap/regcache-maple.c140
-rw-r--r--drivers/base/regmap/regcache.c12
-rw-r--r--drivers/base/regmap/regmap-debugfs.c11
-rw-r--r--drivers/base/regmap/regmap-irq.c273
-rw-r--r--drivers/base/regmap/regmap-kunit.c451
-rw-r--r--drivers/base/regmap/regmap-mmio.c2
-rw-r--r--drivers/base/regmap/regmap-raw-ram.c133
-rw-r--r--drivers/base/regmap/regmap-spi-avmm.c2
-rw-r--r--drivers/base/regmap/regmap.c28
-rw-r--r--drivers/block/amiflop.c20
-rw-r--r--drivers/block/aoe/aoeblk.c8
-rw-r--r--drivers/block/aoe/aoechr.c30
-rw-r--r--drivers/block/ataflop.c43
-rw-r--r--drivers/block/brd.c91
-rw-r--r--drivers/block/drbd/drbd_bitmap.c4
-rw-r--r--drivers/block/drbd/drbd_main.c26
-rw-r--r--drivers/block/drbd/drbd_nl.c24
-rw-r--r--drivers/block/drbd/drbd_receiver.c1
-rw-r--r--drivers/block/floppy.c74
-rw-r--r--drivers/block/loop.c26
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c4
-rw-r--r--drivers/block/nbd.c15
-rw-r--r--drivers/block/pktcdvd.c560
-rw-r--r--drivers/block/rbd.c6
-rw-r--r--drivers/block/rnbd/Makefile6
-rw-r--r--drivers/block/rnbd/rnbd-clt-sysfs.c24
-rw-r--r--drivers/block/rnbd/rnbd-clt.c8
-rw-r--r--drivers/block/rnbd/rnbd-common.c23
-rw-r--r--drivers/block/rnbd/rnbd-proto.h31
-rw-r--r--drivers/block/rnbd/rnbd-srv-sysfs.c28
-rw-r--r--drivers/block/rnbd/rnbd-srv.c67
-rw-r--r--drivers/block/rnbd/rnbd-srv.h4
-rw-r--r--drivers/block/sunvdc.c2
-rw-r--r--drivers/block/swim.c26
-rw-r--r--drivers/block/swim3.c33
-rw-r--r--drivers/block/ublk_drv.c498
-rw-r--r--drivers/block/virtio_blk.c82
-rw-r--r--drivers/block/xen-blkback/xenbus.c4
-rw-r--r--drivers/block/xen-blkfront.c2
-rw-r--r--drivers/block/z2ram.c8
-rw-r--r--drivers/block/zram/zram_drv.c25
-rw-r--r--drivers/cdrom/cdrom.c42
-rw-r--r--drivers/cdrom/gdrom.c12
-rw-r--r--drivers/char/random.c4
-rw-r--r--drivers/clk/Kconfig2
-rw-r--r--drivers/clk/clk-rk808.c34
-rw-r--r--drivers/clk/imx/clk-imx1.c1
-rw-r--r--drivers/clk/imx/clk-imx27.c1
-rw-r--r--drivers/clk/imx/clk-imx31.c1
-rw-r--r--drivers/clk/imx/clk-imx35.c1
-rw-r--r--drivers/clocksource/Kconfig9
-rw-r--r--drivers/clocksource/Makefile1
-rw-r--r--drivers/clocksource/arm_arch_timer.c54
-rw-r--r--drivers/clocksource/hyperv_timer.c96
-rw-r--r--drivers/clocksource/ingenic-timer.c10
-rw-r--r--drivers/clocksource/timer-cadence-ttc.c19
-rw-r--r--drivers/clocksource/timer-imx-gpt.c25
-rw-r--r--drivers/clocksource/timer-loongson1-pwm.c236
-rw-r--r--drivers/cpufreq/Kconfig2
-rw-r--r--drivers/cpufreq/Kconfig.x8617
-rw-r--r--drivers/cpufreq/amd-pstate.c131
-rw-r--r--drivers/cpufreq/cpufreq.c3
-rw-r--r--drivers/cpufreq/intel_pstate.c2
-rw-r--r--drivers/cpuidle/cpuidle.c8
-rw-r--r--drivers/cpuidle/poll_state.c4
-rw-r--r--drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c2
-rw-r--r--drivers/crypto/allwinner/sun4i-ss/sun4i-ss-core.c2
-rw-r--r--drivers/crypto/allwinner/sun4i-ss/sun4i-ss-hash.c2
-rw-r--r--drivers/crypto/allwinner/sun4i-ss/sun4i-ss.h2
-rw-r--r--drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c2
-rw-r--r--drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c2
-rw-r--r--drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c2
-rw-r--r--drivers/crypto/allwinner/sun8i-ce/sun8i-ce-prng.c2
-rw-r--r--drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c2
-rw-r--r--drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c2
-rw-r--r--drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c2
-rw-r--r--drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c2
-rw-r--r--drivers/crypto/allwinner/sun8i-ss/sun8i-ss-prng.c2
-rw-r--r--drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c12
-rw-r--r--drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c6
-rw-r--r--drivers/devfreq/exynos-bus.c1
-rw-r--r--drivers/devfreq/mtk-cci-devfreq.c3
-rw-r--r--drivers/dma-buf/udmabuf.c47
-rw-r--r--drivers/edac/Kconfig11
-rw-r--r--drivers/edac/Makefile1
-rw-r--r--drivers/edac/amd64_edac.c398
-rw-r--r--drivers/edac/amd64_edac.h2
-rw-r--r--drivers/edac/mce_amd.c3
-rw-r--r--drivers/edac/npcm_edac.c543
-rw-r--r--drivers/edac/thunderx_edac.c2
-rw-r--r--drivers/firmware/efi/Kconfig14
-rw-r--r--drivers/firmware/efi/Makefile1
-rw-r--r--drivers/firmware/efi/efi.c47
-rw-r--r--drivers/firmware/efi/libstub/Makefile2
-rw-r--r--drivers/firmware/efi/libstub/bitmap.c41
-rw-r--r--drivers/firmware/efi/libstub/efistub.h6
-rw-r--r--drivers/firmware/efi/libstub/find.c43
-rw-r--r--drivers/firmware/efi/libstub/unaccepted_memory.c222
-rw-r--r--drivers/firmware/efi/libstub/x86-stub.c75
-rw-r--r--drivers/firmware/efi/unaccepted_memory.c147
-rw-r--r--drivers/firmware/iscsi_ibft_find.c26
-rw-r--r--drivers/gpio/gpio-104-dio-48e.c37
-rw-r--r--drivers/gpio/gpio-sifive.c8
-rw-r--r--drivers/gpio/gpiolib.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atom.c2
-rw-r--r--drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c2
-rw-r--r--drivers/gpu/drm/display/drm_dp_helper.c2
-rw-r--r--drivers/gpu/drm/display/drm_dp_mst_topology.c4
-rw-r--r--drivers/gpu/drm/drm_gem.c68
-rw-r--r--drivers/gpu/drm/drm_managed.c6
-rw-r--r--drivers/gpu/drm/drm_mipi_dsi.c2
-rw-r--r--drivers/gpu/drm/i2c/tda998x_drv.c2
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_shmem.c55
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c8
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.c50
-rw-r--r--drivers/gpu/drm/mediatek/mtk_hdmi_ddc.c2
-rw-r--r--drivers/gpu/drm/radeon/radeon_atombios.c4
-rw-r--r--drivers/gpu/drm/radeon/radeon_combios.c4
-rw-r--r--drivers/gpu/drm/radeon/radeon_ttm.c2
-rw-r--r--drivers/gpu/drm/rockchip/inno_hdmi.c2
-rw-r--r--drivers/gpu/drm/rockchip/rk3066_hdmi.c2
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_hdmi_i2c.c2
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_msg_x86.h16
-rw-r--r--drivers/greybus/connection.c4
-rw-r--r--drivers/greybus/svc.c2
-rw-r--r--drivers/hv/channel_mgmt.c18
-rw-r--r--drivers/hv/hv_common.c48
-rw-r--r--drivers/hv/vmbus_drv.c5
-rw-r--r--drivers/hwtracing/coresight/coresight-trbe.c33
-rw-r--r--drivers/hwtracing/coresight/coresight-trbe.h38
-rw-r--r--drivers/i2c/busses/i2c-imx-lpi2c.c4
-rw-r--r--drivers/i2c/busses/i2c-qup.c21
-rw-r--r--drivers/idle/intel_idle.c231
-rw-r--r--drivers/infiniband/hw/mlx5/ib_rep.c103
-rw-r--r--drivers/infiniband/hw/qib/qib_user_pages.c2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom.c2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c2
-rw-r--r--drivers/infiniband/sw/siw/siw_mem.c2
-rw-r--r--drivers/infiniband/sw/siw/siw_qp_tx.c16
-rw-r--r--drivers/input/misc/Kconfig2
-rw-r--r--drivers/input/touchscreen/sun4i-ts.c2
-rw-r--r--drivers/iommu/Kconfig1
-rw-r--r--drivers/iommu/amd/amd_iommu_types.h9
-rw-r--r--drivers/iommu/amd/iommu.c18
-rw-r--r--drivers/iommu/dma-iommu.c58
-rw-r--r--drivers/iommu/intel/irq_remapping.c8
-rw-r--r--drivers/iommu/iommu.c2
-rw-r--r--drivers/iommu/iommufd/pages.c4
-rw-r--r--drivers/irqchip/irq-clps711x.c7
-rw-r--r--drivers/irqchip/irq-ftintc010.c4
-rw-r--r--drivers/irqchip/irq-gic-v3-its.c2
-rw-r--r--drivers/irqchip/irq-gic-v3.c45
-rw-r--r--drivers/irqchip/irq-jcore-aic.c7
-rw-r--r--drivers/irqchip/irq-loongson-eiointc.c135
-rw-r--r--drivers/irqchip/irq-loongson-liointc.c13
-rw-r--r--drivers/irqchip/irq-loongson-pch-pic.c10
-rw-r--r--drivers/irqchip/irq-mmp.c127
-rw-r--r--drivers/irqchip/irq-mxs.c1
-rw-r--r--drivers/irqchip/irq-stm32-exti.c13
-rw-r--r--drivers/leds/trigger/ledtrig-netdev.c380
-rw-r--r--drivers/md/bcache/bcache.h12
-rw-r--r--drivers/md/bcache/btree.c48
-rw-r--r--drivers/md/bcache/btree.h5
-rw-r--r--drivers/md/bcache/request.c4
-rw-r--r--drivers/md/bcache/stats.h1
-rw-r--r--drivers/md/bcache/super.c29
-rw-r--r--drivers/md/bcache/sysfs.c31
-rw-r--r--drivers/md/bcache/sysfs.h2
-rw-r--r--drivers/md/bcache/writeback.c10
-rw-r--r--drivers/md/dm-cache-metadata.c2
-rw-r--r--drivers/md/dm-cache-target.c12
-rw-r--r--drivers/md/dm-clone-target.c10
-rw-r--r--drivers/md/dm-core.h7
-rw-r--r--drivers/md/dm-crypt.c5
-rw-r--r--drivers/md/dm-era-target.c6
-rw-r--r--drivers/md/dm-init.c4
-rw-r--r--drivers/md/dm-integrity.c4
-rw-r--r--drivers/md/dm-ioctl.c10
-rw-r--r--drivers/md/dm-raid.c4
-rw-r--r--drivers/md/dm-snap.c18
-rw-r--r--drivers/md/dm-table.c37
-rw-r--r--drivers/md/dm-thin-metadata.c2
-rw-r--r--drivers/md/dm-thin.c9
-rw-r--r--drivers/md/dm-verity-fec.c2
-rw-r--r--drivers/md/dm-verity-target.c6
-rw-r--r--drivers/md/dm-zoned-metadata.c6
-rw-r--r--drivers/md/dm.c22
-rw-r--r--drivers/md/dm.h2
-rw-r--r--drivers/md/md-autodetect.c3
-rw-r--r--drivers/md/md-bitmap.c93
-rw-r--r--drivers/md/md-bitmap.h8
-rw-r--r--drivers/md/md-cluster.c17
-rw-r--r--drivers/md/md-multipath.c4
-rw-r--r--drivers/md/md.c280
-rw-r--r--drivers/md/md.h37
-rw-r--r--drivers/md/raid1-10.c74
-rw-r--r--drivers/md/raid1.c43
-rw-r--r--drivers/md/raid1.h2
-rw-r--r--drivers/md/raid10.c199
-rw-r--r--drivers/md/raid10.h2
-rw-r--r--drivers/md/raid5-cache.c24
-rw-r--r--drivers/md/raid5-ppl.c4
-rw-r--r--drivers/md/raid5.c72
-rw-r--r--drivers/md/raid5.h4
-rw-r--r--drivers/media/platform/amphion/vpu_core.c2
-rw-r--r--drivers/media/platform/amphion/vpu_v4l2.c2
-rw-r--r--drivers/media/platform/chips-media/coda-common.c2
-rw-r--r--drivers/media/v4l2-core/videobuf-dma-sg.c2
-rw-r--r--drivers/memstick/host/r592.c4
-rw-r--r--drivers/mfd/Kconfig53
-rw-r--r--drivers/mfd/Makefile7
-rw-r--r--drivers/mfd/axp20x-i2c.c2
-rw-r--r--drivers/mfd/axp20x.c78
-rw-r--r--drivers/mfd/rk8xx-core.c (renamed from drivers/mfd/rk808.c)352
-rw-r--r--drivers/mfd/rk8xx-i2c.c185
-rw-r--r--drivers/mfd/rk8xx-spi.c124
-rw-r--r--drivers/mfd/tps6594-core.c462
-rw-r--r--drivers/mfd/tps6594-i2c.c244
-rw-r--r--drivers/mfd/tps6594-spi.c129
-rw-r--r--drivers/misc/lkdtm/bugs.c5
-rw-r--r--drivers/misc/sgi-gru/grufault.c4
-rw-r--r--drivers/mmc/core/block.c47
-rw-r--r--drivers/mmc/core/card.h30
-rw-r--r--drivers/mmc/core/core.c15
-rw-r--r--drivers/mmc/core/quirks.h27
-rw-r--r--drivers/mmc/core/sd.c2
-rw-r--r--drivers/mmc/host/Kconfig2
-rw-r--r--drivers/mmc/host/bcm2835.c4
-rw-r--r--drivers/mmc/host/cqhci.h3
-rw-r--r--drivers/mmc/host/dw_mmc-bluefield.c2
-rw-r--r--drivers/mmc/host/dw_mmc-k3.c2
-rw-r--r--drivers/mmc/host/dw_mmc-pltfm.c5
-rw-r--r--drivers/mmc/host/dw_mmc-pltfm.h2
-rw-r--r--drivers/mmc/host/dw_mmc-starfive.c2
-rw-r--r--drivers/mmc/host/litex_mmc.c1
-rw-r--r--drivers/mmc/host/meson-gx-mmc.c14
-rw-r--r--drivers/mmc/host/meson-mx-sdhc-mmc.c8
-rw-r--r--drivers/mmc/host/mmci.c211
-rw-r--r--drivers/mmc/host/mmci.h25
-rw-r--r--drivers/mmc/host/mmci_stm32_sdmmc.c179
-rw-r--r--drivers/mmc/host/mtk-sd.c50
-rw-r--r--drivers/mmc/host/mvsdio.c2
-rw-r--r--drivers/mmc/host/omap.c2
-rw-r--r--drivers/mmc/host/omap_hsmmc.c6
-rw-r--r--drivers/mmc/host/owl-mmc.c2
-rw-r--r--drivers/mmc/host/sdhci-acpi.c2
-rw-r--r--drivers/mmc/host/sdhci-msm.c226
-rw-r--r--drivers/mmc/host/sdhci-pci-core.c1
-rw-r--r--drivers/mmc/host/sdhci-pci-gli.c406
-rw-r--r--drivers/mmc/host/sdhci-pci.h2
-rw-r--r--drivers/mmc/host/sdhci-spear.c4
-rw-r--r--drivers/mmc/host/sdhci.c4
-rw-r--r--drivers/mmc/host/sdhci.h7
-rw-r--r--drivers/mmc/host/sh_mmcif.c2
-rw-r--r--drivers/mmc/host/sunxi-mmc.c4
-rw-r--r--drivers/mmc/host/usdhi6rol0.c6
-rw-r--r--drivers/most/configfs.c8
-rw-r--r--drivers/mtd/chips/cfi_cmdset_0001.c3
-rw-r--r--drivers/mtd/chips/cfi_cmdset_0002.c3
-rw-r--r--drivers/mtd/chips/cfi_cmdset_0020.c3
-rw-r--r--drivers/mtd/chips/cfi_probe.c3
-rw-r--r--drivers/mtd/chips/cfi_util.c3
-rw-r--r--drivers/mtd/chips/gen_probe.c2
-rw-r--r--drivers/mtd/chips/jedec_probe.c3
-rw-r--r--drivers/mtd/chips/map_ram.c3
-rw-r--r--drivers/mtd/chips/map_rom.c3
-rw-r--r--drivers/mtd/devices/block2mtd.c64
-rw-r--r--drivers/mtd/devices/st_spi_fsm.c28
-rw-r--r--drivers/mtd/maps/pismo.c2
-rw-r--r--drivers/mtd/mtd_blkdevs.c8
-rw-r--r--drivers/mtd/mtdblock.c2
-rw-r--r--drivers/mtd/mtdcore.c21
-rw-r--r--drivers/mtd/mtdpart.c1
-rw-r--r--drivers/mtd/nand/raw/Makefile1
-rw-r--r--drivers/mtd/nand/raw/arasan-nand-controller.c15
-rw-r--r--drivers/mtd/nand/raw/internals.h1
-rw-r--r--drivers/mtd/nand/raw/meson_nand.c134
-rw-r--r--drivers/mtd/nand/raw/nand_ids.c5
-rw-r--r--drivers/mtd/nand/raw/nand_macronix.c167
-rw-r--r--drivers/mtd/nand/raw/nand_sandisk.c26
-rw-r--r--drivers/mtd/nand/spi/gigadevice.c10
-rw-r--r--drivers/mtd/nand/spi/macronix.c20
-rw-r--r--drivers/mtd/sm_ftl.c2
-rw-r--r--drivers/mtd/ubi/block.c9
-rw-r--r--drivers/net/Kconfig1
-rw-r--r--drivers/net/bonding/bond_main.c17
-rw-r--r--drivers/net/bonding/bonding_priv.h4
-rw-r--r--drivers/net/can/Kconfig3
-rw-r--r--drivers/net/can/at91_can.c6
-rw-r--r--drivers/net/can/bxcan.c17
-rw-r--r--drivers/net/can/c_can/c_can_platform.c6
-rw-r--r--drivers/net/can/cc770/cc770_isa.c6
-rw-r--r--drivers/net/can/cc770/cc770_platform.c6
-rw-r--r--drivers/net/can/ctucanfd/ctucanfd_platform.c6
-rw-r--r--drivers/net/can/dev/length.c15
-rw-r--r--drivers/net/can/dev/rx-offload.c2
-rw-r--r--drivers/net/can/flexcan/flexcan-core.c6
-rw-r--r--drivers/net/can/grcan.c6
-rw-r--r--drivers/net/can/ifi_canfd/ifi_canfd.c6
-rw-r--r--drivers/net/can/janz-ican3.c6
-rw-r--r--drivers/net/can/kvaser_pciefd.c955
-rw-r--r--drivers/net/can/m_can/m_can.c4
-rw-r--r--drivers/net/can/m_can/m_can_platform.c6
-rw-r--r--drivers/net/can/mscan/mpc5xxx_can.c6
-rw-r--r--drivers/net/can/rcar/rcar_can.c5
-rw-r--r--drivers/net/can/rcar/rcar_canfd.c6
-rw-r--r--drivers/net/can/sja1000/sja1000.c40
-rw-r--r--drivers/net/can/sja1000/sja1000.h1
-rw-r--r--drivers/net/can/sja1000/sja1000_isa.c6
-rw-r--r--drivers/net/can/sja1000/sja1000_platform.c11
-rw-r--r--drivers/net/can/softing/softing_main.c5
-rw-r--r--drivers/net/can/sun4i_can.c6
-rw-r--r--drivers/net/can/ti_hecc.c8
-rw-r--r--drivers/net/can/usb/Kconfig12
-rw-r--r--drivers/net/can/usb/Makefile1
-rw-r--r--drivers/net/can/usb/esd_usb.c352
-rw-r--r--drivers/net/can/usb/f81604.c1201
-rw-r--r--drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c2
-rw-r--r--drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c13
-rw-r--r--drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c6
-rw-r--r--drivers/net/can/xilinx_can.c25
-rw-r--r--drivers/net/dsa/b53/b53_serdes.c3
-rw-r--r--drivers/net/dsa/hirschmann/hellcreek.c14
-rw-r--r--drivers/net/dsa/lan9303-core.c1
-rw-r--r--drivers/net/dsa/lan9303_i2c.c2
-rw-r--r--drivers/net/dsa/microchip/ksz8795.c28
-rw-r--r--drivers/net/dsa/microchip/ksz8863_smi.c13
-rw-r--r--drivers/net/dsa/microchip/ksz9477.c116
-rw-r--r--drivers/net/dsa/microchip/ksz9477_i2c.c4
-rw-r--r--drivers/net/dsa/microchip/ksz_common.c51
-rw-r--r--drivers/net/dsa/microchip/ksz_common.h76
-rw-r--r--drivers/net/dsa/microchip/ksz_spi.c2
-rw-r--r--drivers/net/dsa/microchip/lan937x_main.c8
-rw-r--r--drivers/net/dsa/mt7530.c51
-rw-r--r--drivers/net/dsa/mt7530.h6
-rw-r--r--drivers/net/dsa/mv88e6xxx/chip.c178
-rw-r--r--drivers/net/dsa/mv88e6xxx/chip.h15
-rw-r--r--drivers/net/dsa/mv88e6xxx/global2.c5
-rw-r--r--drivers/net/dsa/mv88e6xxx/port.c29
-rw-r--r--drivers/net/dsa/mv88e6xxx/port.h15
-rw-r--r--drivers/net/dsa/mv88e6xxx/serdes.c47
-rw-r--r--drivers/net/dsa/mv88e6xxx/serdes.h4
-rw-r--r--drivers/net/dsa/ocelot/felix_vsc9959.c25
-rw-r--r--drivers/net/dsa/ocelot/seville_vsc9953.c20
-rw-r--r--drivers/net/dsa/qca/ar9331.c16
-rw-r--r--drivers/net/dsa/qca/qca8k-8xxx.c15
-rw-r--r--drivers/net/dsa/qca/qca8k-common.c6
-rw-r--r--drivers/net/dsa/qca/qca8k-leds.c201
-rw-r--r--drivers/net/dsa/sja1105/sja1105_main.c14
-rw-r--r--drivers/net/dsa/sja1105/sja1105_mdio.c11
-rw-r--r--drivers/net/dsa/sja1105/sja1105_tas.c7
-rw-r--r--drivers/net/dsa/xrs700x/xrs700x_i2c.c2
-rw-r--r--drivers/net/ethernet/8390/8390.h2
-rw-r--r--drivers/net/ethernet/8390/apne.c7
-rw-r--r--drivers/net/ethernet/8390/axnet_cs.c6
-rw-r--r--drivers/net/ethernet/8390/hydra.c6
-rw-r--r--drivers/net/ethernet/8390/lib8390.c5
-rw-r--r--drivers/net/ethernet/8390/mac8390.c6
-rw-r--r--drivers/net/ethernet/8390/mcf8390.c4
-rw-r--r--drivers/net/ethernet/8390/ne.c4
-rw-r--r--drivers/net/ethernet/8390/ne2k-pci.c1
-rw-r--r--drivers/net/ethernet/8390/pcnet_cs.c5
-rw-r--r--drivers/net/ethernet/8390/smc-ultra.c4
-rw-r--r--drivers/net/ethernet/8390/stnic.c5
-rw-r--r--drivers/net/ethernet/8390/wd.c4
-rw-r--r--drivers/net/ethernet/8390/zorro8390.c7
-rw-r--r--drivers/net/ethernet/altera/Kconfig4
-rw-r--r--drivers/net/ethernet/altera/altera_tse_main.c65
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_admin_defs.h6
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_netdev.c136
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_netdev.h4
-rw-r--r--drivers/net/ethernet/amd/pds_core/core.c4
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c13
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_macsec.c40
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_ring.c6
-rw-r--r--drivers/net/ethernet/arc/emac.h2
-rw-r--r--drivers/net/ethernet/arc/emac_arc.c6
-rw-r--r--drivers/net/ethernet/arc/emac_main.c4
-rw-r--r--drivers/net/ethernet/arc/emac_rockchip.c5
-rw-r--r--drivers/net/ethernet/broadcom/bnx2.c1
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c10
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c1
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c14
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmmii.c2
-rw-r--r--drivers/net/ethernet/broadcom/tg3.c1
-rw-r--r--drivers/net/ethernet/cadence/macb.h13
-rw-r--r--drivers/net/ethernet/cadence/macb_main.c47
-rw-r--r--drivers/net/ethernet/cavium/Kconfig5
-rw-r--r--drivers/net/ethernet/cavium/liquidio/Makefile8
-rw-r--r--drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c8
-rw-r--r--drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c7
-rw-r--r--drivers/net/ethernet/cavium/liquidio/cn66xx_device.c1
-rw-r--r--drivers/net/ethernet/cavium/liquidio/cn68xx_device.c1
-rw-r--r--drivers/net/ethernet/cavium/liquidio/lio_core.c16
-rw-r--r--drivers/net/ethernet/cavium/liquidio/lio_ethtool.c1
-rw-r--r--drivers/net/ethernet/cavium/liquidio/lio_main.c18
-rw-r--r--drivers/net/ethernet/cavium/liquidio/lio_vf_main.c15
-rw-r--r--drivers/net/ethernet/cavium/liquidio/octeon_device.c24
-rw-r--r--drivers/net/ethernet/cavium/liquidio/octeon_droq.c4
-rw-r--r--drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c5
-rw-r--r--drivers/net/ethernet/cavium/liquidio/octeon_nic.c3
-rw-r--r--drivers/net/ethernet/cavium/liquidio/request_manager.c14
-rw-r--r--drivers/net/ethernet/cavium/liquidio/response_manager.c3
-rw-r--r--drivers/net/ethernet/cavium/thunder/thunder_bgx.c3
-rw-r--r--drivers/net/ethernet/chelsio/cxgb3/sge.c5
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c2
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h5
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/sge.c2
-rw-r--r--drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c2
-rw-r--r--drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h3
-rw-r--r--drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c118
-rw-r--r--drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_main.c2
-rw-r--r--drivers/net/ethernet/emulex/benet/be_main.c36
-rw-r--r--drivers/net/ethernet/engleder/tsnep_selftests.c12
-rw-r--r--drivers/net/ethernet/engleder/tsnep_tc.c4
-rw-r--r--drivers/net/ethernet/freescale/Kconfig2
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c40
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc.c12
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc.h1
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc_pf.c22
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc_qos.c113
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c40
-rw-r--r--drivers/net/ethernet/freescale/fman/fman_dtsec.c7
-rw-r--r--drivers/net/ethernet/freescale/fman/fman_memac.c18
-rw-r--r--drivers/net/ethernet/freescale/fs_enet/mii-fec.c2
-rw-r--r--drivers/net/ethernet/fungible/funeth/funeth_rx.c5
-rw-r--r--drivers/net/ethernet/fungible/funeth/funeth_tx.c3
-rw-r--r--drivers/net/ethernet/google/gve/gve_main.c5
-rw-r--r--drivers/net/ethernet/google/gve/gve_tx.c2
-rw-r--r--drivers/net/ethernet/google/gve/gve_tx_dqo.c4
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hnae3.h3
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_rss.c3
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_rss.h3
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c36
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c7
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c29
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c47
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h11
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c14
-rw-r--r--drivers/net/ethernet/i825xx/82596.c5
-rw-r--r--drivers/net/ethernet/i825xx/lasi_82596.c5
-rw-r--r--drivers/net/ethernet/i825xx/lib82596.c5
-rw-r--r--drivers/net/ethernet/i825xx/sun3_82586.c1
-rw-r--r--drivers/net/ethernet/i825xx/sun3_82586.h1
-rw-r--r--drivers/net/ethernet/intel/e1000e/netdev.c2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c6
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_xsk.c2
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf.h10
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_alloc.h3
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_common.c45
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_main.c78
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_osdep.h9
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_prototype.h5
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_txrx.c43
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_txrx.h4
-rw-r--r--drivers/net/ethernet/intel/ice/Makefile1
-rw-r--r--drivers/net/ethernet/intel/ice/ice.h51
-rw-r--r--drivers/net/ethernet/intel/ice/ice_adminq_cmd.h2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_arfs.c5
-rw-r--r--drivers/net/ethernet/intel/ice/ice_base.c50
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.c10
-rw-r--r--drivers/net/ethernet/intel/ice/ice_controlq.c12
-rw-r--r--drivers/net/ethernet/intel/ice/ice_controlq.h1
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ddp.h9
-rw-r--r--drivers/net/ethernet/intel/ice/ice_devlink.c10
-rw-r--r--drivers/net/ethernet/intel/ice/ice_eswitch.c84
-rw-r--r--drivers/net/ethernet/intel/ice/ice_eswitch.h14
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ethtool.c311
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ethtool.h105
-rw-r--r--drivers/net/ethernet/intel/ice/ice_flow.c23
-rw-r--r--drivers/net/ethernet/intel/ice/ice_idc.c54
-rw-r--r--drivers/net/ethernet/intel/ice/ice_irq.c378
-rw-r--r--drivers/net/ethernet/intel/ice/ice_irq.h25
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lag.c12
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lag.h54
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.c332
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.h5
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c357
-rw-r--r--drivers/net/ethernet/intel/ice/ice_protocol_type.h197
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp.c64
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp.h16
-rw-r--r--drivers/net/ethernet/intel/ice/ice_repr.c17
-rw-r--r--drivers/net/ethernet/intel/ice/ice_repr.h5
-rw-r--r--drivers/net/ethernet/intel/ice/ice_sched.c11
-rw-r--r--drivers/net/ethernet/intel/ice/ice_sriov.c52
-rw-r--r--drivers/net/ethernet/intel/ice/ice_switch.c251
-rw-r--r--drivers/net/ethernet/intel/ice/ice_switch.h13
-rw-r--r--drivers/net/ethernet/intel/ice/ice_tc_lib.c34
-rw-r--r--drivers/net/ethernet/intel/ice/ice_tc_lib.h1
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vf_lib.c35
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vf_lib.h7
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl.c8
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vlan_mode.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_xsk.c5
-rw-r--r--drivers/net/ethernet/intel/igb/igb_main.c7
-rw-r--r--drivers/net/ethernet/intel/igc/igc.h43
-rw-r--r--drivers/net/ethernet/intel/igc/igc_main.c151
-rw-r--r--drivers/net/ethernet/intel/igc/igc_ptp.c142
-rw-r--r--drivers/net/ethernet/litex/litex_liteeth.c19
-rw-r--r--drivers/net/ethernet/marvell/mvneta.c178
-rw-r--r--drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c14
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep/octep_rx.c2
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/Kconfig1
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/common.h9
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/mbox.h11
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu.c5
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu.h20
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c5
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c74
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c292
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h3
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c18
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/Makefile2
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c135
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c215
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h93
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c29
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c138
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h13
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c43
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c43
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h4
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c19
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/qos.c1363
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/qos.h69
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c296
-rw-r--r--drivers/net/ethernet/marvell/prestera/prestera_flower.c6
-rw-r--r--drivers/net/ethernet/marvell/prestera/prestera_main.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/main.c65
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/debugfs.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/dev.c22
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/devlink.c87
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h17
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/params.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/params.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c44
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/qos.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act_stats.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c22
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c17
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c26
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c18
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_fs.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c137
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.h7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.c25
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c190
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c25
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c69
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_debugfs.c89
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c21
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c37
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c203
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h80
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c451
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/events.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c77
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.h10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fw.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c160
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/health.c25
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c141
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c31
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c129
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h37
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/events.h40
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h34
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c212
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h42
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c49
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/rdma.c24
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c14
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sriov.c50
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c18
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c32
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/thermal.c15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/vport.c16
-rw-r--r--drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_format.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/i2c.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.c22
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.h8
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c45
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c19
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c832
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h12
-rw-r--r--drivers/net/ethernet/microchip/enc28j60.c28
-rw-r--r--drivers/net/ethernet/microchip/lan743x_main.c22
-rw-r--r--drivers/net/ethernet/microchip/lan966x/Kconfig11
-rw-r--r--drivers/net/ethernet/microchip/lan966x/Makefile1
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_dcb.c365
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_main.c3
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_main.h60
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_phylink.c7
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_port.c149
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_regs.h147
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_tc.c10
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c61
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_vcap_ag_api.c264
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_vcap_debugfs.c23
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c82
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_main.c1
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_phylink.c8
-rw-r--r--drivers/net/ethernet/microchip/vcap/vcap_ag_api.h67
-rw-r--r--drivers/net/ethernet/microchip/vcap/vcap_api.c8
-rw-r--r--drivers/net/ethernet/microsoft/mana/hw_channel.c2
-rw-r--r--drivers/net/ethernet/microsoft/mana/mana_en.c19
-rw-r--r--drivers/net/ethernet/mscc/ocelot_flower.c10
-rw-r--r--drivers/net/ethernet/myricom/myri10ge/myri10ge.c1
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_devlink.c10
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_common.c6
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c32
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h7
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c54
-rw-r--r--drivers/net/ethernet/oki-semi/pch_gbe/Kconfig2
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_ethtool.c10
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_lif.c5
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_lif.h1
-rw-r--r--drivers/net/ethernet/qualcomm/qca_spi.c3
-rw-r--r--drivers/net/ethernet/realtek/r8169_main.c41
-rw-r--r--drivers/net/ethernet/renesas/rswitch.c38
-rw-r--r--drivers/net/ethernet/renesas/rswitch.h7
-rw-r--r--drivers/net/ethernet/samsung/sxgbe/sxgbe_common.h2
-rw-r--r--drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c4
-rw-r--r--drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c5
-rw-r--r--drivers/net/ethernet/sfc/Kconfig1
-rw-r--r--drivers/net/ethernet/sfc/Makefile3
-rw-r--r--drivers/net/ethernet/sfc/ef10.c38
-rw-r--r--drivers/net/ethernet/sfc/ef100_netdev.c85
-rw-r--r--drivers/net/ethernet/sfc/ef100_nic.c7
-rw-r--r--drivers/net/ethernet/sfc/ef100_tx.c4
-rw-r--r--drivers/net/ethernet/sfc/ef100_tx.h2
-rw-r--r--drivers/net/ethernet/sfc/efx.c9
-rw-r--r--drivers/net/ethernet/sfc/efx_devlink.c80
-rw-r--r--drivers/net/ethernet/sfc/falcon/selftest.c47
-rw-r--r--drivers/net/ethernet/sfc/mae.c141
-rw-r--r--drivers/net/ethernet/sfc/mae.h9
-rw-r--r--drivers/net/ethernet/sfc/net_driver.h3
-rw-r--r--drivers/net/ethernet/sfc/selftest.c47
-rw-r--r--drivers/net/ethernet/sfc/siena/selftest.c47
-rw-r--r--drivers/net/ethernet/sfc/siena/tx_common.c1
-rw-r--r--drivers/net/ethernet/sfc/tc.c407
-rw-r--r--drivers/net/ethernet/sfc/tc.h51
-rw-r--r--drivers/net/ethernet/sfc/tc_bindings.c13
-rw-r--r--drivers/net/ethernet/sfc/tc_bindings.h14
-rw-r--r--drivers/net/ethernet/sfc/tc_counters.c58
-rw-r--r--drivers/net/ethernet/sfc/tc_counters.h3
-rw-r--r--drivers/net/ethernet/sfc/tc_encap_actions.c747
-rw-r--r--drivers/net/ethernet/sfc/tc_encap_actions.h114
-rw-r--r--drivers/net/ethernet/sfc/tx_common.c5
-rw-r--r--drivers/net/ethernet/sfc/tx_common.h2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/Kconfig3
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/Makefile2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c257
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h29
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/common.h1
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c6
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c16
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c9
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c9
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c297
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c6
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c108
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c6
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c6
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c6
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c6
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c19
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c20
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c15
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c168
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h16
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c7
-rw-r--r--drivers/net/ethernet/sun/cassini.c8
-rw-r--r--drivers/net/ethernet/sun/sunvnet_common.c1
-rw-r--r--drivers/net/ethernet/ti/am65-cpsw-qos.c11
-rw-r--r--drivers/net/ethernet/wangxun/Kconfig10
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_hw.c272
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_hw.h3
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_lib.c737
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_lib.h1
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_type.h220
-rw-r--r--drivers/net/ethernet/wangxun/ngbe/ngbe_main.c20
-rw-r--r--drivers/net/ethernet/wangxun/ngbe/ngbe_type.h1
-rw-r--r--drivers/net/ethernet/wangxun/txgbe/Makefile1
-rw-r--r--drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c28
-rw-r--r--drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c32
-rw-r--r--drivers/net/ethernet/wangxun/txgbe/txgbe_main.c98
-rw-r--r--drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c673
-rw-r--r--drivers/net/ethernet/wangxun/txgbe/txgbe_phy.h10
-rw-r--r--drivers/net/ethernet/wangxun/txgbe/txgbe_type.h90
-rw-r--r--drivers/net/ethernet/xilinx/xilinx_axienet_main.c16
-rw-r--r--drivers/net/gtp.c2
-rw-r--r--drivers/net/hyperv/hyperv_net.h5
-rw-r--r--drivers/net/hyperv/netvsc_drv.c10
-rw-r--r--drivers/net/hyperv/rndis_filter.c29
-rw-r--r--drivers/net/ieee802154/adf7242.c2
-rw-r--r--drivers/net/ieee802154/ca8210.c4
-rw-r--r--drivers/net/ieee802154/mac802154_hwsim.c6
-rw-r--r--drivers/net/ipvlan/ipvlan_core.c12
-rw-r--r--drivers/net/mctp/mctp-i2c.c2
-rw-r--r--drivers/net/mdio/Kconfig11
-rw-r--r--drivers/net/mdio/Makefile1
-rw-r--r--drivers/net/mdio/mdio-mux-mmioreg.c7
-rw-r--r--drivers/net/mdio/mdio-regmap.c93
-rw-r--r--drivers/net/pcs/Kconfig6
-rw-r--r--drivers/net/pcs/Makefile1
-rw-r--r--drivers/net/pcs/pcs-altera-tse.c160
-rw-r--r--drivers/net/pcs/pcs-lynx.c123
-rw-r--r--drivers/net/pcs/pcs-mtk-lynxi.c39
-rw-r--r--drivers/net/pcs/pcs-xpcs.c265
-rw-r--r--drivers/net/pcs/pcs-xpcs.h3
-rw-r--r--drivers/net/phy/Kconfig17
-rw-r--r--drivers/net/phy/Makefile1
-rw-r--r--drivers/net/phy/at803x.c44
-rw-r--r--drivers/net/phy/bcm-phy-lib.c264
-rw-r--r--drivers/net/phy/bcm-phy-lib.h10
-rw-r--r--drivers/net/phy/broadcom.c177
-rw-r--r--drivers/net/phy/dp83867.c2
-rw-r--r--drivers/net/phy/dp83869.c13
-rw-r--r--drivers/net/phy/dp83td510.c23
-rw-r--r--drivers/net/phy/mdio_bus.c2
-rw-r--r--drivers/net/phy/mediatek-ge-soc.c1116
-rw-r--r--drivers/net/phy/mediatek-ge.c3
-rw-r--r--drivers/net/phy/micrel.c331
-rw-r--r--drivers/net/phy/microchip_t1s.c274
-rw-r--r--drivers/net/phy/mscc/mscc.h1
-rw-r--r--drivers/net/phy/mscc/mscc_main.c51
-rw-r--r--drivers/net/phy/phy-c45.c9
-rw-r--r--drivers/net/phy/phy.c11
-rw-r--r--drivers/net/phy/phy_device.c36
-rw-r--r--drivers/net/phy/phylink.c209
-rw-r--r--drivers/net/phy/realtek.c34
-rw-r--r--drivers/net/phy/sfp-bus.c20
-rw-r--r--drivers/net/phy/sfp.c345
-rw-r--r--drivers/net/phy/sfp.h1
-rw-r--r--drivers/net/ppp/Kconfig34
-rw-r--r--drivers/net/ppp/pppoe.c2
-rw-r--r--drivers/net/tap.c1
-rw-r--r--drivers/net/usb/Kconfig10
-rw-r--r--drivers/net/usb/ipheth.c186
-rw-r--r--drivers/net/usb/qmi_wwan.c1
-rw-r--r--drivers/net/usb/r8152.c1
-rw-r--r--drivers/net/veth.c26
-rw-r--r--drivers/net/virtio_net.c661
-rw-r--r--drivers/net/vmxnet3/vmxnet3_drv.c4
-rw-r--r--drivers/net/vxlan/vxlan_core.c21
-rw-r--r--drivers/net/wireguard/device.c1
-rw-r--r--drivers/net/wireless/ath/ath10k/ahb.c20
-rw-r--r--drivers/net/wireless/ath/ath10k/core.c12
-rw-r--r--drivers/net/wireless/ath/ath10k/core.h3
-rw-r--r--drivers/net/wireless/ath/ath10k/debug.c4
-rw-r--r--drivers/net/wireless/ath/ath10k/htt.h6
-rw-r--r--drivers/net/wireless/ath/ath10k/mac.c13
-rw-r--r--drivers/net/wireless/ath/ath10k/qmi.c3
-rw-r--r--drivers/net/wireless/ath/ath10k/snoc.c8
-rw-r--r--drivers/net/wireless/ath/ath10k/wmi.c34
-rw-r--r--drivers/net/wireless/ath/ath10k/wmi.h2
-rw-r--r--drivers/net/wireless/ath/ath11k/ahb.c8
-rw-r--r--drivers/net/wireless/ath/ath11k/ce.c4
-rw-r--r--drivers/net/wireless/ath/ath11k/core.c89
-rw-r--r--drivers/net/wireless/ath/ath11k/core.h16
-rw-r--r--drivers/net/wireless/ath/ath11k/debug.c2
-rw-r--r--drivers/net/wireless/ath/ath11k/debug.h49
-rw-r--r--drivers/net/wireless/ath/ath11k/debugfs_htt_stats.c114
-rw-r--r--drivers/net/wireless/ath/ath11k/debugfs_htt_stats.h43
-rw-r--r--drivers/net/wireless/ath/ath11k/dp_rx.c8
-rw-r--r--drivers/net/wireless/ath/ath11k/dp_tx.c12
-rw-r--r--drivers/net/wireless/ath/ath11k/hal.c10
-rw-r--r--drivers/net/wireless/ath/ath11k/hal_rx.c24
-rw-r--r--drivers/net/wireless/ath/ath11k/htc.c42
-rw-r--r--drivers/net/wireless/ath/ath11k/hw.c5
-rw-r--r--drivers/net/wireless/ath/ath11k/hw.h1
-rw-r--r--drivers/net/wireless/ath/ath11k/mac.c539
-rw-r--r--drivers/net/wireless/ath/ath11k/mhi.c6
-rw-r--r--drivers/net/wireless/ath/ath11k/pci.c25
-rw-r--r--drivers/net/wireless/ath/ath11k/pcic.c6
-rw-r--r--drivers/net/wireless/ath/ath11k/peer.c4
-rw-r--r--drivers/net/wireless/ath/ath11k/qmi.c76
-rw-r--r--drivers/net/wireless/ath/ath11k/reg.c4
-rw-r--r--drivers/net/wireless/ath/ath11k/testmode.c387
-rw-r--r--drivers/net/wireless/ath/ath11k/testmode.h6
-rw-r--r--drivers/net/wireless/ath/ath11k/testmode_i.h18
-rw-r--r--drivers/net/wireless/ath/ath11k/wmi.c628
-rw-r--r--drivers/net/wireless/ath/ath11k/wmi.h70
-rw-r--r--drivers/net/wireless/ath/ath11k/wow.c3
-rw-r--r--drivers/net/wireless/ath/ath12k/core.c2
-rw-r--r--drivers/net/wireless/ath/ath12k/core.h1
-rw-r--r--drivers/net/wireless/ath/ath12k/dp_rx.c27
-rw-r--r--drivers/net/wireless/ath/ath12k/hal.c16
-rw-r--r--drivers/net/wireless/ath/ath12k/hal.h2
-rw-r--r--drivers/net/wireless/ath/ath12k/hw.c6
-rw-r--r--drivers/net/wireless/ath/ath12k/hw.h2
-rw-r--r--drivers/net/wireless/ath/ath12k/mac.c51
-rw-r--r--drivers/net/wireless/ath/ath12k/pci.c14
-rw-r--r--drivers/net/wireless/ath/ath12k/qmi.c9
-rw-r--r--drivers/net/wireless/ath/ath12k/qmi.h1
-rw-r--r--drivers/net/wireless/ath/ath12k/wmi.c103
-rw-r--r--drivers/net/wireless/ath/ath12k/wmi.h10
-rw-r--r--drivers/net/wireless/ath/ath9k/ar9003_hw.c27
-rw-r--r--drivers/net/wireless/ath/ath9k/htc_hst.c8
-rw-r--r--drivers/net/wireless/ath/ath9k/main.c11
-rw-r--r--drivers/net/wireless/ath/ath9k/wmi.c4
-rw-r--r--drivers/net/wireless/ath/wil6210/fw.h4
-rw-r--r--drivers/net/wireless/ath/wil6210/wmi.h2
-rw-r--r--drivers/net/wireless/atmel/Kconfig2
-rw-r--r--drivers/net/wireless/atmel/atmel_cs.c13
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c6
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/tracepoint.h7
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/wcc/core.c4
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmsmac/brcms_trace_brcmsmac_msg.h8
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmutil/utils.c4
-rw-r--r--drivers/net/wireless/intel/iwlwifi/Makefile1
-rw-r--r--drivers/net/wireless/intel/iwlwifi/cfg/1000.c10
-rw-r--r--drivers/net/wireless/intel/iwlwifi/cfg/2000.c18
-rw-r--r--drivers/net/wireless/intel/iwlwifi/cfg/22000.c832
-rw-r--r--drivers/net/wireless/intel/iwlwifi/cfg/5000.c10
-rw-r--r--drivers/net/wireless/intel/iwlwifi/cfg/6000.c18
-rw-r--r--drivers/net/wireless/intel/iwlwifi/cfg/7000.c22
-rw-r--r--drivers/net/wireless/intel/iwlwifi/cfg/8000.c10
-rw-r--r--drivers/net/wireless/intel/iwlwifi/cfg/9000.c10
-rw-r--r--drivers/net/wireless/intel/iwlwifi/cfg/ax210.c301
-rw-r--r--drivers/net/wireless/intel/iwlwifi/cfg/bz.c183
-rw-r--r--drivers/net/wireless/intel/iwlwifi/cfg/sc.c166
-rw-r--r--drivers/net/wireless/intel/iwlwifi/dvm/rs.c43
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/acpi.c121
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/acpi.h41
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/binding.h14
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/commands.h23
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/config.h15
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/context.h13
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/d3.h6
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/datapath.h6
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/location.h16
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h65
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/mac.h24
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h46
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/offload.h3
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/phy-ctxt.h6
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/phy.h10
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/power.h2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/rs.h2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/rx.h6
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/scan.h41
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/time-event.h7
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/tx.h13
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/dbg.c72
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/dbg.h18
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/debugfs.c35
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/dump.c15
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/file.h6
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/pnvm.c234
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/pnvm.h27
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/runtime.h6
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/uefi.c272
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/uefi.h47
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-config.h69
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-context-info-gen3.h32
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-context-info.h5
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c56
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-drv.c75
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-drv.h6
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-io.c4
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c155
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h4
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-prph.h6
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-trans.h108
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mei/main.c5
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/binding.c10
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/constants.h6
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/d3.c420
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c6
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c202
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c48
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/ftm-responder.c23
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/fw.c93
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/link.c37
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c122
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c200
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/mld-key.c99
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/mld-mac.c26
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c177
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c28
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/mvm.h105
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/nvm.c63
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/offloading.c4
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/ops.c16
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c15
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/power.c56
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c77
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/rs.c11
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/rx.c10
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c46
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/scan.c163
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/sf.c5
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/sta.c90
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/sta.h20
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/time-event.c4
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/tx.c150
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/utils.c26
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c273
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c8
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/drv.c519
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/internal.h12
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/rx.c31
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c13
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/trans.c69
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/tx.c5
-rw-r--r--drivers/net/wireless/intel/iwlwifi/queue/tx.c12
-rw-r--r--drivers/net/wireless/intersil/hostap/Kconfig2
-rw-r--r--drivers/net/wireless/intersil/hostap/hostap_ioctl.c2
-rw-r--r--drivers/net/wireless/intersil/orinoco/orinoco_cs.c13
-rw-r--r--drivers/net/wireless/intersil/orinoco/spectrum_cs.c13
-rw-r--r--drivers/net/wireless/intersil/p54/p54spi.c1
-rw-r--r--drivers/net/wireless/legacy/ray_cs.c33
-rw-r--r--drivers/net/wireless/legacy/wl3501_cs.c16
-rw-r--r--drivers/net/wireless/marvell/mwifiex/11n.h4
-rw-r--r--drivers/net/wireless/marvell/mwifiex/cfg80211.c12
-rw-r--r--drivers/net/wireless/marvell/mwifiex/main.c8
-rw-r--r--drivers/net/wireless/marvell/mwifiex/scan.c6
-rw-r--r--drivers/net/wireless/marvell/mwifiex/wmm.h15
-rw-r--r--drivers/net/wireless/mediatek/mt7601u/debugfs.c2
-rw-r--r--drivers/net/wireless/mediatek/mt7601u/trace.h2
-rw-r--r--drivers/net/wireless/microchip/wilc1000/hif.c8
-rw-r--r--drivers/net/wireless/microchip/wilc1000/hif.h2
-rw-r--r--drivers/net/wireless/microchip/wilc1000/wlan_cfg.h2
-rw-r--r--drivers/net/wireless/microchip/wilc1000/wlan_if.h2
-rw-r--r--drivers/net/wireless/ralink/rt2x00/rt2x00link.c2
-rw-r--r--drivers/net/wireless/realtek/rtl8xxxu/Kconfig3
-rw-r--r--drivers/net/wireless/realtek/rtl8xxxu/Makefile2
-rw-r--r--drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h84
-rw-r--r--drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8188e.c3
-rw-r--r--drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8188f.c25
-rw-r--r--drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c28
-rw-r--r--drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192f.c2090
-rw-r--r--drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8710b.c37
-rw-r--r--drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c5
-rw-r--r--drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c567
-rw-r--r--drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h52
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/base.c20
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/base.h1
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/core.c10
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/core.h2
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/pci.c7
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c8
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8188ee/led.c40
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8188ee/led.h5
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c1
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8188ee/trx.c2
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c8
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192ce/led.c41
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192ce/led.h5
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c1
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192ce/trx.c2
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192cu/led.c56
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192cu/led.h6
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c2
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192cu/trx.c4
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c8
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192de/led.c42
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192de/led.h5
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c1
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.c2
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c8
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192ee/led.c40
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192ee/led.h5
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c1
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c16
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192se/led.c42
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192se/led.h5
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c1
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c8
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8723ae/led.c42
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8723ae/led.h5
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c1
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8723ae/trx.c2
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c8
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8723be/led.c42
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8723be/led.h5
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c1
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c14
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8821ae/led.c60
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8821ae/led.h9
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c1
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/usb.c42
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/wifi.h29
-rw-r--r--drivers/net/wireless/realtek/rtw88/Kconfig11
-rw-r--r--drivers/net/wireless/realtek/rtw88/Makefile3
-rw-r--r--drivers/net/wireless/realtek/rtw88/debug.c59
-rw-r--r--drivers/net/wireless/realtek/rtw88/fw.c68
-rw-r--r--drivers/net/wireless/realtek/rtw88/fw.h13
-rw-r--r--drivers/net/wireless/realtek/rtw88/mac.c6
-rw-r--r--drivers/net/wireless/realtek/rtw88/mac80211.c13
-rw-r--r--drivers/net/wireless/realtek/rtw88/main.c15
-rw-r--r--drivers/net/wireless/realtek/rtw88/main.h1
-rw-r--r--drivers/net/wireless/realtek/rtw88/pci.c5
-rw-r--r--drivers/net/wireless/realtek/rtw88/ps.c3
-rw-r--r--drivers/net/wireless/realtek/rtw88/reg.h2
-rw-r--r--drivers/net/wireless/realtek/rtw88/rtw8723d.c15
-rw-r--r--drivers/net/wireless/realtek/rtw88/rtw8723d.h6
-rw-r--r--drivers/net/wireless/realtek/rtw88/rtw8723ds.c41
-rw-r--r--drivers/net/wireless/realtek/rtw88/sdio.c24
-rw-r--r--drivers/net/wireless/realtek/rtw88/tx.c94
-rw-r--r--drivers/net/wireless/realtek/rtw88/tx.h124
-rw-r--r--drivers/net/wireless/realtek/rtw88/usb.c17
-rw-r--r--drivers/net/wireless/realtek/rtw89/Kconfig14
-rw-r--r--drivers/net/wireless/realtek/rtw89/Makefile12
-rw-r--r--drivers/net/wireless/realtek/rtw89/acpi.c52
-rw-r--r--drivers/net/wireless/realtek/rtw89/acpi.h21
-rw-r--r--drivers/net/wireless/realtek/rtw89/coex.c9
-rw-r--r--drivers/net/wireless/realtek/rtw89/core.c261
-rw-r--r--drivers/net/wireless/realtek/rtw89/core.h217
-rw-r--r--drivers/net/wireless/realtek/rtw89/debug.c60
-rw-r--r--drivers/net/wireless/realtek/rtw89/efuse.c21
-rw-r--r--drivers/net/wireless/realtek/rtw89/efuse.h1
-rw-r--r--drivers/net/wireless/realtek/rtw89/fw.c175
-rw-r--r--drivers/net/wireless/realtek/rtw89/fw.h281
-rw-r--r--drivers/net/wireless/realtek/rtw89/mac.c160
-rw-r--r--drivers/net/wireless/realtek/rtw89/mac.h5
-rw-r--r--drivers/net/wireless/realtek/rtw89/mac80211.c16
-rw-r--r--drivers/net/wireless/realtek/rtw89/pci.c25
-rw-r--r--drivers/net/wireless/realtek/rtw89/pci.h1
-rw-r--r--drivers/net/wireless/realtek/rtw89/phy.c344
-rw-r--r--drivers/net/wireless/realtek/rtw89/phy.h12
-rw-r--r--drivers/net/wireless/realtek/rtw89/reg.h134
-rw-r--r--drivers/net/wireless/realtek/rtw89/regd.c324
-rw-r--r--drivers/net/wireless/realtek/rtw89/rtw8851b.c2442
-rw-r--r--drivers/net/wireless/realtek/rtw89/rtw8851b.h76
-rw-r--r--drivers/net/wireless/realtek/rtw89/rtw8851b_rfk.c3621
-rw-r--r--drivers/net/wireless/realtek/rtw89/rtw8851b_rfk.h28
-rw-r--r--drivers/net/wireless/realtek/rtw89/rtw8851b_table.c252
-rw-r--r--drivers/net/wireless/realtek/rtw89/rtw8851b_table.h2
-rw-r--r--drivers/net/wireless/realtek/rtw89/rtw8851be.c86
-rw-r--r--drivers/net/wireless/realtek/rtw89/rtw8852a.c15
-rw-r--r--drivers/net/wireless/realtek/rtw89/rtw8852a_rfk.c36
-rw-r--r--drivers/net/wireless/realtek/rtw89/rtw8852a_rfk.h1
-rw-r--r--drivers/net/wireless/realtek/rtw89/rtw8852b.c8
-rw-r--r--drivers/net/wireless/realtek/rtw89/rtw8852b_rfk.c8
-rw-r--r--drivers/net/wireless/realtek/rtw89/rtw8852b_table.c2
-rw-r--r--drivers/net/wireless/realtek/rtw89/rtw8852b_table.h2
-rw-r--r--drivers/net/wireless/realtek/rtw89/rtw8852c.c8
-rw-r--r--drivers/net/wireless/realtek/rtw89/rtw8852c_rfk.c11
-rw-r--r--drivers/net/wireless/realtek/rtw89/rtw8852c_table.c27992
-rw-r--r--drivers/net/wireless/realtek/rtw89/rtw8852c_table.h2
-rw-r--r--drivers/net/wireless/realtek/rtw89/ser.c48
-rw-r--r--drivers/net/wireless/realtek/rtw89/txrx.h176
-rw-r--r--drivers/net/wireless/realtek/rtw89/wow.c2
-rw-r--r--drivers/net/wireless/rsi/rsi_91x_sdio.c9
-rw-r--r--drivers/net/wireless/virtual/mac80211_hwsim.c23
-rw-r--r--drivers/net/wwan/iosm/iosm_ipc_imem.c17
-rw-r--r--drivers/net/wwan/iosm/iosm_ipc_imem.h15
-rw-r--r--drivers/net/wwan/iosm/iosm_ipc_mux.h4
-rw-r--r--drivers/net/wwan/iosm/iosm_ipc_mux_codec.c15
-rw-r--r--drivers/net/wwan/iosm/iosm_ipc_mux_codec.h2
-rw-r--r--drivers/net/wwan/iosm/iosm_ipc_pcie.c4
-rw-r--r--drivers/net/wwan/iosm/iosm_ipc_port.c17
-rw-r--r--drivers/net/wwan/iosm/iosm_ipc_trace.c8
-rw-r--r--drivers/net/wwan/iosm/iosm_ipc_wwan.c23
-rw-r--r--drivers/net/wwan/t7xx/t7xx_hif_cldma.c13
-rw-r--r--drivers/net/wwan/t7xx/t7xx_hif_dpmaif_tx.c5
-rw-r--r--drivers/net/xen-netback/netback.c4
-rw-r--r--drivers/nfc/fdp/fdp.c3
-rw-r--r--drivers/nfc/fdp/i2c.c2
-rw-r--r--drivers/nfc/microread/i2c.c2
-rw-r--r--drivers/nfc/nfcmrvl/i2c.c2
-rw-r--r--drivers/nfc/nxp-nci/i2c.c4
-rw-r--r--drivers/nfc/pn533/i2c.c2
-rw-r--r--drivers/nfc/pn544/i2c.c2
-rw-r--r--drivers/nfc/s3fwrn5/i2c.c2
-rw-r--r--drivers/nfc/st-nci/i2c.c2
-rw-r--r--drivers/nfc/st21nfca/i2c.c2
-rw-r--r--drivers/nubus/nubus.c13
-rw-r--r--drivers/nubus/proc.c33
-rw-r--r--drivers/nvme/host/Makefile2
-rw-r--r--drivers/nvme/host/auth.c6
-rw-r--r--drivers/nvme/host/core.c672
-rw-r--r--drivers/nvme/host/fabrics.c241
-rw-r--r--drivers/nvme/host/fabrics.h21
-rw-r--r--drivers/nvme/host/fc.c8
-rw-r--r--drivers/nvme/host/ioctl.c70
-rw-r--r--drivers/nvme/host/multipath.c6
-rw-r--r--drivers/nvme/host/nvme.h15
-rw-r--r--drivers/nvme/host/pci.c3
-rw-r--r--drivers/nvme/host/rdma.c81
-rw-r--r--drivers/nvme/host/sysfs.c668
-rw-r--r--drivers/nvme/host/tcp.c141
-rw-r--r--drivers/nvme/target/fabrics-cmd-auth.c13
-rw-r--r--drivers/nvme/target/fcloop.c5
-rw-r--r--drivers/nvme/target/io-cmd-bdev.c4
-rw-r--r--drivers/nvme/target/nvmet.h2
-rw-r--r--drivers/nvme/target/tcp.c46
-rw-r--r--drivers/parport/procfs.c185
-rw-r--r--drivers/parport/share.c2
-rw-r--r--drivers/pci/Kconfig1
-rw-r--r--drivers/pci/controller/pci-hyperv.c139
-rw-r--r--drivers/perf/Kconfig8
-rw-r--r--drivers/perf/Makefile1
-rw-r--r--drivers/perf/apple_m1_cpu_pmu.c30
-rw-r--r--drivers/perf/arm-cci.c4
-rw-r--r--drivers/perf/arm-cmn.c172
-rw-r--r--drivers/perf/arm_cspmu/Kconfig3
-rw-r--r--drivers/perf/arm_cspmu/arm_cspmu.c89
-rw-r--r--drivers/perf/arm_cspmu/arm_cspmu.h5
-rw-r--r--drivers/perf/arm_dmc620_pmu.c22
-rw-r--r--drivers/perf/arm_pmu.c7
-rw-r--r--drivers/perf/arm_pmuv3.c33
-rw-r--r--drivers/perf/fsl_imx9_ddr_perf.c711
-rw-r--r--drivers/perf/hisilicon/Makefile2
-rw-r--r--drivers/perf/hisilicon/hisi_pcie_pmu.c2
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_pa_pmu.c127
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_pmu.c4
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_pmu.h14
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_uc_pmu.c578
-rw-r--r--drivers/perf/qcom_l2_pmu.c2
-rw-r--r--drivers/pinctrl/Kconfig2
-rw-r--r--drivers/pinctrl/pinctrl-amd.c6
-rw-r--r--drivers/pinctrl/pinctrl-rk805.c189
-rw-r--r--drivers/platform/chrome/cros_ec_i2c.c2
-rw-r--r--drivers/platform/chrome/cros_ec_lpc.c15
-rw-r--r--drivers/platform/chrome/cros_ec_spi.c8
-rw-r--r--drivers/platform/chrome/cros_hps_i2c.c2
-rw-r--r--drivers/platform/chrome/cros_typec_switch.c11
-rw-r--r--drivers/platform/x86/amd/pmc.c4
-rw-r--r--drivers/platform/x86/amd/pmf/core.c10
-rw-r--r--drivers/power/supply/Kconfig2
-rw-r--r--drivers/powercap/Kconfig18
-rw-r--r--drivers/powercap/Makefile1
-rw-r--r--drivers/powercap/intel_rapl_common.c883
-rw-r--r--drivers/powercap/intel_rapl_msr.c31
-rw-r--r--drivers/powercap/intel_rapl_tpmi.c325
-rw-r--r--drivers/ptp/Kconfig2
-rw-r--r--drivers/ptp/ptp_chardev.c5
-rw-r--r--drivers/ptp/ptp_clock.c4
-rw-r--r--drivers/ptp/ptp_clockmatrix.c36
-rw-r--r--drivers/ptp/ptp_clockmatrix.h2
-rw-r--r--drivers/ptp/ptp_idt82p33.c18
-rw-r--r--drivers/ptp/ptp_idt82p33.h4
-rw-r--r--drivers/ptp/ptp_ocp.c7
-rw-r--r--drivers/ptp/ptp_sysfs.c12
-rw-r--r--drivers/pwm/pwm-atmel.c2
-rw-r--r--drivers/pwm/pwm-pxa.c2
-rw-r--r--drivers/ras/debugfs.c2
-rw-r--r--drivers/regulator/88pg86x.c2
-rw-r--r--drivers/regulator/Kconfig33
-rw-r--r--drivers/regulator/Makefile3
-rw-r--r--drivers/regulator/act8865-regulator.c2
-rw-r--r--drivers/regulator/ad5398.c2
-rw-r--r--drivers/regulator/axp20x-regulator.c290
-rw-r--r--drivers/regulator/core.c30
-rw-r--r--drivers/regulator/da9121-regulator.c2
-rw-r--r--drivers/regulator/da9210-regulator.c2
-rw-r--r--drivers/regulator/da9211-regulator.c2
-rw-r--r--drivers/regulator/fan53555.c2
-rw-r--r--drivers/regulator/fan53880.c2
-rw-r--r--drivers/regulator/helpers.c23
-rw-r--r--drivers/regulator/isl6271a-regulator.c2
-rw-r--r--drivers/regulator/isl9305.c2
-rw-r--r--drivers/regulator/lp3971.c2
-rw-r--r--drivers/regulator/lp3972.c2
-rw-r--r--drivers/regulator/lp872x.c2
-rw-r--r--drivers/regulator/lp8755.c2
-rw-r--r--drivers/regulator/ltc3589.c4
-rw-r--r--drivers/regulator/ltc3676.c4
-rw-r--r--drivers/regulator/max1586.c2
-rw-r--r--drivers/regulator/max20086-regulator.c2
-rw-r--r--drivers/regulator/max20411-regulator.c2
-rw-r--r--drivers/regulator/max77826-regulator.c2
-rw-r--r--drivers/regulator/max8649.c2
-rw-r--r--drivers/regulator/max8660.c2
-rw-r--r--drivers/regulator/max8893.c2
-rw-r--r--drivers/regulator/max8952.c2
-rw-r--r--drivers/regulator/max8973-regulator.c2
-rw-r--r--drivers/regulator/mcp16502.c2
-rw-r--r--drivers/regulator/mp5416.c2
-rw-r--r--drivers/regulator/mp8859.c2
-rw-r--r--drivers/regulator/mp886x.c2
-rw-r--r--drivers/regulator/mpq7920.c2
-rw-r--r--drivers/regulator/mt6311-regulator.c2
-rw-r--r--drivers/regulator/mt6358-regulator.c221
-rw-r--r--drivers/regulator/pca9450-regulator.c2
-rw-r--r--drivers/regulator/pf8x00-regulator.c2
-rw-r--r--drivers/regulator/pfuze100-regulator.c2
-rw-r--r--drivers/regulator/pv88060-regulator.c2
-rw-r--r--drivers/regulator/pv88080-regulator.c2
-rw-r--r--drivers/regulator/pv88090-regulator.c2
-rw-r--r--drivers/regulator/raa215300.c190
-rw-r--r--drivers/regulator/rk808-regulator.c399
-rw-r--r--drivers/regulator/rpi-panel-attiny-regulator.c2
-rw-r--r--drivers/regulator/rt4801-regulator.c2
-rw-r--r--drivers/regulator/rt5190a-regulator.c2
-rw-r--r--drivers/regulator/rt5739.c2
-rw-r--r--drivers/regulator/rt5759-regulator.c2
-rw-r--r--drivers/regulator/rt6160-regulator.c2
-rw-r--r--drivers/regulator/rt6190-regulator.c2
-rw-r--r--drivers/regulator/rt6245-regulator.c2
-rw-r--r--drivers/regulator/rtmv20-regulator.c2
-rw-r--r--drivers/regulator/rtq2134-regulator.c2
-rw-r--r--drivers/regulator/rtq6752-regulator.c2
-rw-r--r--drivers/regulator/slg51000-regulator.c2
-rw-r--r--drivers/regulator/stm32-pwr.c2
-rw-r--r--drivers/regulator/sy8106a-regulator.c2
-rw-r--r--drivers/regulator/sy8824x.c2
-rw-r--r--drivers/regulator/sy8827n.c2
-rw-r--r--drivers/regulator/tps51632-regulator.c2
-rw-r--r--drivers/regulator/tps62360-regulator.c2
-rw-r--r--drivers/regulator/tps6286x-regulator.c2
-rw-r--r--drivers/regulator/tps6287x-regulator.c189
-rw-r--r--drivers/regulator/tps65023-regulator.c2
-rw-r--r--drivers/regulator/tps65132-regulator.c2
-rw-r--r--drivers/regulator/tps65219-regulator.c6
-rw-r--r--drivers/regulator/tps6594-regulator.c615
-rw-r--r--drivers/rtc/Kconfig2
-rw-r--r--drivers/s390/block/dasd.c10
-rw-r--r--drivers/s390/block/dasd_genhd.c5
-rw-r--r--drivers/s390/block/dasd_int.h3
-rw-r--r--drivers/s390/block/dasd_ioctl.c2
-rw-r--r--drivers/s390/block/dcssblk.c11
-rw-r--r--drivers/s390/char/zcore.c41
-rw-r--r--drivers/s390/cio/vfio_ccw_drv.c2
-rw-r--r--drivers/s390/cio/vfio_ccw_private.h2
-rw-r--r--drivers/s390/crypto/pkey_api.c509
-rw-r--r--drivers/s390/crypto/vfio_ap_ops.c134
-rw-r--r--drivers/s390/crypto/vfio_ap_private.h3
-rw-r--r--drivers/s390/net/ctcm_dbug.c2
-rw-r--r--drivers/s390/net/ctcm_main.c6
-rw-r--r--drivers/s390/net/ctcm_main.h1
-rw-r--r--drivers/s390/net/ctcm_mpc.c18
-rw-r--r--drivers/s390/net/ctcm_sysfs.c46
-rw-r--r--drivers/s390/net/ism_drv.c2
-rw-r--r--drivers/s390/net/lcs.c13
-rw-r--r--drivers/s390/net/lcs.h2
-rw-r--r--drivers/scsi/3w-9xxx.c2
-rw-r--r--drivers/scsi/NCR5380.c2
-rw-r--r--drivers/scsi/aacraid/aachba.c2
-rw-r--r--drivers/scsi/bnx2i/bnx2i_init.c2
-rw-r--r--drivers/scsi/ch.c3
-rw-r--r--drivers/scsi/hptiop.c4
-rw-r--r--drivers/scsi/ibmvscsi/ibmvscsi.c6
-rw-r--r--drivers/scsi/iscsi_tcp.c26
-rw-r--r--drivers/scsi/iscsi_tcp.h2
-rw-r--r--drivers/scsi/megaraid/megaraid_sas_base.c12
-rw-r--r--drivers/scsi/megaraid/megaraid_sas_fp.c6
-rw-r--r--drivers/scsi/qedi/qedi_main.c2
-rw-r--r--drivers/scsi/scsi_bsg.c4
-rw-r--r--drivers/scsi/scsi_ioctl.c38
-rw-r--r--drivers/scsi/sd.c39
-rw-r--r--drivers/scsi/sg.c16
-rw-r--r--drivers/scsi/smartpqi/smartpqi_init.c2
-rw-r--r--drivers/scsi/sr.c22
-rw-r--r--drivers/scsi/st.c2
-rw-r--r--drivers/soc/qcom/qcom-geni-se.c67
-rw-r--r--drivers/spi/Kconfig7
-rw-r--r--drivers/spi/Makefile1
-rw-r--r--drivers/spi/spi-atmel.c18
-rw-r--r--drivers/spi/spi-cadence-quadspi.c19
-rw-r--r--drivers/spi/spi-cadence.c1
-rw-r--r--drivers/spi/spi-dw-core.c14
-rw-r--r--drivers/spi/spi-dw-dma.c76
-rw-r--r--drivers/spi/spi-dw-mmio.c22
-rw-r--r--drivers/spi/spi-dw.h1
-rw-r--r--drivers/spi/spi-fsl-lpspi.c8
-rw-r--r--drivers/spi/spi-geni-qcom.c107
-rw-r--r--drivers/spi/spi-hisi-kunpeng.c2
-rw-r--r--drivers/spi/spi-imx.c63
-rw-r--r--drivers/spi/spi-mt65xx.c33
-rw-r--r--drivers/spi/spi-pl022.c4
-rw-r--r--drivers/spi/spi-qcom-qspi.c218
-rw-r--r--drivers/spi/spi-rzv2m-csi.c667
-rw-r--r--drivers/spi/spi-s3c64xx.c197
-rw-r--r--drivers/spi/spi-sc18is602.c2
-rw-r--r--drivers/spi/spi-sn-f-ospi.c17
-rw-r--r--drivers/spi/spi-stm32.c274
-rw-r--r--drivers/spi/spi-sun6i.c133
-rw-r--r--drivers/spi/spi-xcomm.c2
-rw-r--r--drivers/spi/spidev.c5
-rw-r--r--drivers/target/iscsi/iscsi_target_util.c15
-rw-r--r--drivers/target/target_core_iblock.c11
-rw-r--r--drivers/target/target_core_pscsi.c9
-rw-r--r--drivers/thermal/Kconfig8
-rw-r--r--drivers/thermal/amlogic_thermal.c3
-rw-r--r--drivers/thermal/armada_thermal.c32
-rw-r--r--drivers/thermal/imx8mm_thermal.c3
-rw-r--r--drivers/thermal/imx_sc_thermal.c3
-rw-r--r--drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c218
-rw-r--r--drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.h57
-rw-r--r--drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c11
-rw-r--r--drivers/thermal/intel/intel_soc_dts_iosf.c2
-rw-r--r--drivers/thermal/k3_bandgap.c3
-rw-r--r--drivers/thermal/mediatek/auxadc_thermal.c14
-rw-r--r--drivers/thermal/mediatek/lvts_thermal.c4
-rw-r--r--drivers/thermal/qcom/qcom-spmi-adc-tm5.c4
-rw-r--r--drivers/thermal/qcom/qcom-spmi-temp-alarm.c38
-rw-r--r--drivers/thermal/qcom/tsens-v0_1.c126
-rw-r--r--drivers/thermal/qcom/tsens-v1.c22
-rw-r--r--drivers/thermal/qcom/tsens.c26
-rw-r--r--drivers/thermal/qcom/tsens.h6
-rw-r--r--drivers/thermal/qoriq_thermal.c52
-rw-r--r--drivers/thermal/rcar_gen3_thermal.c141
-rw-r--r--drivers/thermal/st/st_thermal.c4
-rw-r--r--drivers/thermal/st/st_thermal.h2
-rw-r--r--drivers/thermal/st/st_thermal_memmap.c6
-rw-r--r--drivers/thermal/sun8i_thermal.c59
-rw-r--r--drivers/thermal/tegra/tegra30-tsensor.c3
-rw-r--r--drivers/thermal/thermal-generic-adc.c4
-rw-r--r--drivers/thermal/thermal_core.h2
-rw-r--r--drivers/thermal/thermal_hwmon.c5
-rw-r--r--drivers/thermal/ti-soc-thermal/ti-thermal-common.c3
-rw-r--r--drivers/tty/serial/Kconfig4
-rw-r--r--drivers/tty/tty_io.c4
-rw-r--r--drivers/usb/core/buffer.c8
-rw-r--r--drivers/vdpa/vdpa_user/vduse_dev.c2
-rw-r--r--drivers/vfio/vfio_iommu_type1.c9
-rw-r--r--drivers/vhost/vdpa.c2
-rw-r--r--drivers/virt/acrn/ioreq.c4
-rw-r--r--drivers/virt/coco/sev-guest/Kconfig1
-rw-r--r--drivers/xen/privcmd.c2
-rw-r--r--drivers/xen/pvcalls-back.c4
-rw-r--r--fs/9p/vfs_file.c26
-rw-r--r--fs/Makefile10
-rw-r--r--fs/adfs/file.c2
-rw-r--r--fs/affs/file.c2
-rw-r--r--fs/afs/file.c20
-rw-r--r--fs/afs/write.c23
-rw-r--r--fs/aio.c26
-rw-r--r--fs/autofs/root.c6
-rw-r--r--fs/befs/btree.c2
-rw-r--r--fs/befs/linuxvfs.c2
-rw-r--r--fs/bfs/file.c2
-rw-r--r--fs/binfmt_elf.c4
-rw-r--r--fs/binfmt_elf_fdpic.c12
-rw-r--r--fs/btrfs/async-thread.c44
-rw-r--r--fs/btrfs/async-thread.h3
-rw-r--r--fs/btrfs/bio.c122
-rw-r--r--fs/btrfs/bio.h29
-rw-r--r--fs/btrfs/block-group.c49
-rw-r--r--fs/btrfs/block-group.h9
-rw-r--r--fs/btrfs/block-rsv.c19
-rw-r--r--fs/btrfs/block-rsv.h2
-rw-r--r--fs/btrfs/btrfs_inode.h23
-rw-r--r--fs/btrfs/check-integrity.c21
-rw-r--r--fs/btrfs/compression.c50
-rw-r--r--fs/btrfs/compression.h7
-rw-r--r--fs/btrfs/ctree.c429
-rw-r--r--fs/btrfs/ctree.h6
-rw-r--r--fs/btrfs/defrag.c3
-rw-r--r--fs/btrfs/delayed-ref.c110
-rw-r--r--fs/btrfs/delayed-ref.h25
-rw-r--r--fs/btrfs/dev-replace.c14
-rw-r--r--fs/btrfs/discard.c34
-rw-r--r--fs/btrfs/discard.h1
-rw-r--r--fs/btrfs/disk-io.c453
-rw-r--r--fs/btrfs/disk-io.h7
-rw-r--r--fs/btrfs/extent-io-tree.c37
-rw-r--r--fs/btrfs/extent-io-tree.h62
-rw-r--r--fs/btrfs/extent-tree.c186
-rw-r--r--fs/btrfs/extent-tree.h2
-rw-r--r--fs/btrfs/extent_io.c848
-rw-r--r--fs/btrfs/extent_io.h11
-rw-r--r--fs/btrfs/extent_map.c110
-rw-r--r--fs/btrfs/extent_map.h6
-rw-r--r--fs/btrfs/file-item.c90
-rw-r--r--fs/btrfs/file-item.h1
-rw-r--r--fs/btrfs/file.c20
-rw-r--r--fs/btrfs/free-space-cache.c122
-rw-r--r--fs/btrfs/free-space-cache.h2
-rw-r--r--fs/btrfs/free-space-tree.c3
-rw-r--r--fs/btrfs/fs.h5
-rw-r--r--fs/btrfs/inode-item.h16
-rw-r--r--fs/btrfs/inode.c592
-rw-r--r--fs/btrfs/ioctl.c35
-rw-r--r--fs/btrfs/locking.c5
-rw-r--r--fs/btrfs/lzo.c6
-rw-r--r--fs/btrfs/messages.c8
-rw-r--r--fs/btrfs/messages.h15
-rw-r--r--fs/btrfs/misc.h20
-rw-r--r--fs/btrfs/ordered-data.c364
-rw-r--r--fs/btrfs/ordered-data.h27
-rw-r--r--fs/btrfs/print-tree.c16
-rw-r--r--fs/btrfs/print-tree.h4
-rw-r--r--fs/btrfs/qgroup.c20
-rw-r--r--fs/btrfs/raid56.c49
-rw-r--r--fs/btrfs/raid56.h3
-rw-r--r--fs/btrfs/relocation.c61
-rw-r--r--fs/btrfs/relocation.h3
-rw-r--r--fs/btrfs/scrub.c147
-rw-r--r--fs/btrfs/send.c16
-rw-r--r--fs/btrfs/subpage.c97
-rw-r--r--fs/btrfs/subpage.h12
-rw-r--r--fs/btrfs/super.c22
-rw-r--r--fs/btrfs/tests/extent-io-tests.c16
-rw-r--r--fs/btrfs/transaction.c16
-rw-r--r--fs/btrfs/transaction.h3
-rw-r--r--fs/btrfs/tree-checker.c156
-rw-r--r--fs/btrfs/tree-checker.h29
-rw-r--r--fs/btrfs/tree-log.c58
-rw-r--r--fs/btrfs/tree-log.h2
-rw-r--r--fs/btrfs/tree-mod-log.c257
-rw-r--r--fs/btrfs/volumes.c263
-rw-r--r--fs/btrfs/volumes.h79
-rw-r--r--fs/btrfs/zlib.c2
-rw-r--r--fs/btrfs/zoned.c159
-rw-r--r--fs/btrfs/zoned.h8
-rw-r--r--fs/btrfs/zstd.c2
-rw-r--r--fs/buffer.c263
-rw-r--r--fs/cachefiles/namei.c11
-rw-r--r--fs/ceph/file.c71
-rw-r--r--fs/char_dev.c2
-rw-r--r--fs/coda/file.c29
-rw-r--r--fs/coredump.c2
-rw-r--r--fs/cramfs/inode.c2
-rw-r--r--fs/crypto/fscrypt_private.h2
-rw-r--r--fs/crypto/hooks.c10
-rw-r--r--fs/d_path.c1
-rw-r--r--fs/direct-io.c81
-rw-r--r--fs/dlm/config.c4
-rw-r--r--fs/dlm/lowcomms.c10
-rw-r--r--fs/ecryptfs/file.c27
-rw-r--r--fs/erofs/compress.h3
-rw-r--r--fs/erofs/data.c2
-rw-r--r--fs/erofs/decompressor.c8
-rw-r--r--fs/erofs/internal.h41
-rw-r--r--fs/erofs/super.c76
-rw-r--r--fs/erofs/utils.c86
-rw-r--r--fs/erofs/xattr.c670
-rw-r--r--fs/erofs/zdata.c269
-rw-r--r--fs/erofs/zmap.c75
-rw-r--r--fs/eventfd.c12
-rw-r--r--fs/eventpoll.c2
-rw-r--r--fs/exec.c2
-rw-r--r--fs/exfat/file.c2
-rw-r--r--fs/ext2/file.c2
-rw-r--r--fs/ext4/ext4.h1
-rw-r--r--fs/ext4/file.c24
-rw-r--r--fs/ext4/inode.c4
-rw-r--r--fs/ext4/ioctl.c24
-rw-r--r--fs/ext4/namei.c17
-rw-r--r--fs/ext4/super.c27
-rw-r--r--fs/f2fs/file.c46
-rw-r--r--fs/f2fs/namei.c16
-rw-r--r--fs/f2fs/super.c12
-rw-r--r--fs/fat/file.c2
-rw-r--r--fs/file_table.c91
-rw-r--r--fs/fs-writeback.c16
-rw-r--r--fs/fs_context.c3
-rw-r--r--fs/fuse/file.c47
-rw-r--r--fs/gfs2/aops.c69
-rw-r--r--fs/gfs2/aops.h2
-rw-r--r--fs/gfs2/file.c10
-rw-r--r--fs/gfs2/ops_fstype.c2
-rw-r--r--fs/hfs/inode.c2
-rw-r--r--fs/hfsplus/inode.c2
-rw-r--r--fs/hostfs/hostfs.h1
-rw-r--r--fs/hostfs/hostfs_kern.c215
-rw-r--r--fs/hostfs/hostfs_user.c1
-rw-r--r--fs/hpfs/file.c2
-rw-r--r--fs/hugetlbfs/inode.c13
-rw-r--r--fs/inode.c65
-rw-r--r--fs/internal.h48
-rw-r--r--fs/iomap/buffered-io.c15
-rw-r--r--fs/iomap/direct-io.c89
-rw-r--r--fs/jbd2/journal.c6
-rw-r--r--fs/jffs2/build.c5
-rw-r--r--fs/jffs2/file.c2
-rw-r--r--fs/jffs2/xattr.c13
-rw-r--r--fs/jffs2/xattr.h4
-rw-r--r--fs/jfs/file.c2
-rw-r--r--fs/jfs/jfs_logmgr.c12
-rw-r--r--fs/jfs/namei.c6
-rw-r--r--fs/kernfs/file.c2
-rw-r--r--fs/libfs.c41
-rw-r--r--fs/lockd/svc.c1
-rw-r--r--fs/minix/file.c2
-rw-r--r--fs/namei.c50
-rw-r--r--fs/namespace.c476
-rw-r--r--fs/netfs/iterator.c266
-rw-r--r--fs/nfs/blocklayout/dev.c10
-rw-r--r--fs/nfs/file.c29
-rw-r--r--fs/nfs/internal.h2
-rw-r--r--fs/nfs/nfs4file.c2
-rw-r--r--fs/nfs/nfsroot.c2
-rw-r--r--fs/nfsd/cache.h2
-rw-r--r--fs/nfsd/export.c12
-rw-r--r--fs/nfsd/nfs3proc.c14
-rw-r--r--fs/nfsd/nfs3xdr.c11
-rw-r--r--fs/nfsd/nfs4xdr.c289
-rw-r--r--fs/nfsd/nfscache.c25
-rw-r--r--fs/nfsd/nfsctl.c116
-rw-r--r--fs/nfsd/nfsfh.c26
-rw-r--r--fs/nfsd/nfsproc.c14
-rw-r--r--fs/nfsd/nfssvc.c5
-rw-r--r--fs/nfsd/nfsxdr.c11
-rw-r--r--fs/nfsd/trace.h259
-rw-r--r--fs/nfsd/vfs.c82
-rw-r--r--fs/nfsd/vfs.h9
-rw-r--r--fs/nilfs2/file.c2
-rw-r--r--fs/nilfs2/page.c10
-rw-r--r--fs/nilfs2/segbuf.c6
-rw-r--r--fs/nilfs2/segment.c7
-rw-r--r--fs/nilfs2/super.c35
-rw-r--r--fs/no-block.c19
-rw-r--r--fs/ntfs/aops.c2
-rw-r--r--fs/ntfs/attrib.c2
-rw-r--r--fs/ntfs/compress.c2
-rw-r--r--fs/ntfs/file.c4
-rw-r--r--fs/ntfs/mft.c36
-rw-r--r--fs/ntfs/super.c4
-rw-r--r--fs/ntfs3/file.c34
-rw-r--r--fs/ocfs2/cluster/heartbeat.c7
-rw-r--r--fs/ocfs2/cluster/tcp.c38
-rw-r--r--fs/ocfs2/file.c43
-rw-r--r--fs/ocfs2/localalloc.c2
-rw-r--r--fs/ocfs2/ocfs2_trace.h5
-rw-r--r--fs/ocfs2/quota_local.c9
-rw-r--r--fs/omfs/file.c2
-rw-r--r--fs/open.c90
-rw-r--r--fs/orangefs/file.c22
-rw-r--r--fs/overlayfs/file.c31
-rw-r--r--fs/overlayfs/overlayfs.h5
-rw-r--r--fs/pnode.c42
-rw-r--r--fs/pnode.h3
-rw-r--r--fs/proc/inode.c4
-rw-r--r--fs/proc/kcore.c2
-rw-r--r--fs/proc/meminfo.c5
-rw-r--r--fs/proc/proc_sysctl.c246
-rw-r--r--fs/proc/task_mmu.c77
-rw-r--r--fs/proc/task_nommu.c6
-rw-r--r--fs/proc/vmcore.c4
-rw-r--r--fs/proc_namespace.c6
-rw-r--r--fs/pstore/blk.c4
-rw-r--r--fs/pstore/ram.c6
-rw-r--r--fs/pstore/ram_core.c2
-rw-r--r--fs/ramfs/file-mmu.c2
-rw-r--r--fs/ramfs/file-nommu.c2
-rw-r--r--fs/ramfs/inode.c2
-rw-r--r--fs/read_write.c2
-rw-r--r--fs/readdir.c8
-rw-r--r--fs/reiserfs/file.c2
-rw-r--r--fs/reiserfs/inode.c9
-rw-r--r--fs/reiserfs/journal.c25
-rw-r--r--fs/reiserfs/reiserfs.h1
-rw-r--r--fs/reiserfs/xattr_security.c1
-rw-r--r--fs/remap_range.c5
-rw-r--r--fs/romfs/mmap-nommu.c2
-rw-r--r--fs/smb/client/cifsfs.c12
-rw-r--r--fs/smb/client/cifsfs.h3
-rw-r--r--fs/smb/client/file.c16
-rw-r--r--fs/smb/client/smb2ops.c4
-rw-r--r--fs/smb/client/smbdirect.c2
-rw-r--r--fs/smb/server/server.c33
-rw-r--r--fs/smb/server/smb2misc.c33
-rw-r--r--fs/smb/server/smb2pdu.c70
-rw-r--r--fs/smb/server/smbacl.c10
-rw-r--r--fs/smb/server/vfs.c117
-rw-r--r--fs/smb/server/vfs.h17
-rw-r--r--fs/smb/server/vfs_cache.c2
-rw-r--r--fs/splice.c345
-rw-r--r--fs/squashfs/block.c118
-rw-r--r--fs/squashfs/decompressor.c1
-rw-r--r--fs/squashfs/decompressor_multi_percpu.c1
-rw-r--r--fs/squashfs/squashfs_fs_sb.h1
-rw-r--r--fs/squashfs/super.c17
-rw-r--r--fs/super.c72
-rw-r--r--fs/sysctls.c5
-rw-r--r--fs/sysv/dir.c22
-rw-r--r--fs/sysv/file.c2
-rw-r--r--fs/sysv/itree.c4
-rw-r--r--fs/sysv/namei.c8
-rw-r--r--fs/ubifs/file.c2
-rw-r--r--fs/udf/file.c2
-rw-r--r--fs/udf/namei.c14
-rw-r--r--fs/ufs/file.c2
-rw-r--r--fs/userfaultfd.c62
-rw-r--r--fs/vboxsf/file.c2
-rw-r--r--fs/vboxsf/super.c2
-rw-r--r--fs/verity/Kconfig16
-rw-r--r--fs/verity/enable.c21
-rw-r--r--fs/verity/fsverity_private.h23
-rw-r--r--fs/verity/hash_algs.c139
-rw-r--r--fs/verity/measure.c37
-rw-r--r--fs/verity/open.c12
-rw-r--r--fs/verity/read_metadata.c4
-rw-r--r--fs/verity/signature.c8
-rw-r--r--fs/verity/verify.c164
-rw-r--r--fs/xfs/libxfs/xfs_btree.h2
-rw-r--r--fs/xfs/scrub/btree.h2
-rw-r--r--fs/xfs/xfs_file.c36
-rw-r--r--fs/xfs/xfs_fsops.c3
-rw-r--r--fs/xfs/xfs_mount.h4
-rw-r--r--fs/xfs/xfs_super.c34
-rw-r--r--fs/xfs/xfs_trace.h2
-rw-r--r--fs/zonefs/file.c252
-rw-r--r--fs/zonefs/super.c11
-rw-r--r--fs/zonefs/zonefs.h2
-rw-r--r--include/acpi/acpi_bus.h2
-rw-r--r--include/acpi/acpixf.h1
-rw-r--r--include/acpi/actbl.h3
-rw-r--r--include/acpi/actbl3.h2
-rw-r--r--include/asm-generic/atomic.h3
-rw-r--r--include/asm-generic/bitops/atomic.h12
-rw-r--r--include/asm-generic/bitops/lock.h8
-rw-r--r--include/asm-generic/bug.h5
-rw-r--r--include/asm-generic/bugs.h11
-rw-r--r--include/asm-generic/percpu.h201
-rw-r--r--include/asm-generic/vmlinux.lds.h3
-rw-r--r--include/clocksource/hyperv_timer.h24
-rw-r--r--include/crypto/b128ops.h14
-rw-r--r--include/crypto/if_alg.h9
-rw-r--r--include/kunit/resource.h92
-rw-r--r--include/kunit/test.h34
-rw-r--r--include/linux/acpi.h9
-rw-r--r--include/linux/acpi_agdi.h13
-rw-r--r--include/linux/acpi_apmt.h19
-rw-r--r--include/linux/acpi_iort.h5
-rw-r--r--include/linux/amd-pstate.h4
-rw-r--r--include/linux/atomic/atomic-arch-fallback.h5153
-rw-r--r--include/linux/atomic/atomic-instrumented.h3555
-rw-r--r--include/linux/atomic/atomic-long.h2122
-rw-r--r--include/linux/audit.h2
-rw-r--r--include/linux/audit_arch.h2
-rw-r--r--include/linux/bio.h12
-rw-r--r--include/linux/blk-mq.h67
-rw-r--r--include/linux/blk_types.h4
-rw-r--r--include/linux/blkdev.h101
-rw-r--r--include/linux/blktrace_api.h6
-rw-r--r--include/linux/bpf.h7
-rw-r--r--include/linux/bpf_verifier.h52
-rw-r--r--include/linux/bpfilter.h1
-rw-r--r--include/linux/brcmphy.h66
-rw-r--r--include/linux/bsg.h2
-rw-r--r--include/linux/btf.h18
-rw-r--r--include/linux/buffer_head.h4
-rw-r--r--include/linux/cache.h6
-rw-r--r--include/linux/can/length.h302
-rw-r--r--include/linux/cdrom.h12
-rw-r--r--include/linux/cgroup.h2
-rw-r--r--include/linux/compaction.h104
-rw-r--r--include/linux/compiler_attributes.h25
-rw-r--r--include/linux/context_tracking.h4
-rw-r--r--include/linux/context_tracking_state.h2
-rw-r--r--include/linux/cpu.h10
-rw-r--r--include/linux/cpufreq.h5
-rw-r--r--include/linux/cpuhotplug.h18
-rw-r--r--include/linux/cpumask.h2
-rw-r--r--include/linux/cpuset.h12
-rw-r--r--include/linux/delay.h1
-rw-r--r--include/linux/devfreq.h3
-rw-r--r--include/linux/device-mapper.h10
-rw-r--r--include/linux/device/driver.h2
-rw-r--r--include/linux/dma-map-ops.h61
-rw-r--r--include/linux/dma-mapping.h5
-rw-r--r--include/linux/dmar.h125
-rw-r--r--include/linux/efi.h16
-rw-r--r--include/linux/err.h48
-rw-r--r--include/linux/eventfd.h8
-rw-r--r--include/linux/fault-inject.h9
-rw-r--r--include/linux/filter.h1
-rw-r--r--include/linux/fortify-string.h161
-rw-r--r--include/linux/frontswap.h2
-rw-r--r--include/linux/fs.h90
-rw-r--r--include/linux/fsnotify.h4
-rw-r--r--include/linux/fsverity.h14
-rw-r--r--include/linux/gfp.h15
-rw-r--r--include/linux/gpio/driver.h8
-rw-r--r--include/linux/highmem.h2
-rw-r--r--include/linux/hugetlb.h33
-rw-r--r--include/linux/ieee80211.h287
-rw-r--r--include/linux/iio/iio.h2
-rw-r--r--include/linux/init.h19
-rw-r--r--include/linux/intel_rapl.h40
-rw-r--r--include/linux/io.h5
-rw-r--r--include/linux/io_uring.h18
-rw-r--r--include/linux/io_uring_types.h10
-rw-r--r--include/linux/irq.h59
-rw-r--r--include/linux/irqchip/mmp.h10
-rw-r--r--include/linux/irqchip/mxs.h11
-rw-r--r--include/linux/irqdesc.h3
-rw-r--r--include/linux/iscsi_ibft.h10
-rw-r--r--include/linux/jump_label.h2
-rw-r--r--include/linux/kallsyms.h17
-rw-r--r--include/linux/kasan.h2
-rw-r--r--include/linux/kcov.h17
-rw-r--r--include/linux/key.h3
-rw-r--r--include/linux/kthread.h1
-rw-r--r--include/linux/leds.h58
-rw-r--r--include/linux/lockdep.h8
-rw-r--r--include/linux/lockdep_types.h8
-rw-r--r--include/linux/lsm_hook_defs.h1
-rw-r--r--include/linux/maple_tree.h130
-rw-r--r--include/linux/math.h22
-rw-r--r--include/linux/math64.h2
-rw-r--r--include/linux/mdio.h49
-rw-r--r--include/linux/mdio/mdio-regmap.h26
-rw-r--r--include/linux/memblock.h1
-rw-r--r--include/linux/memcontrol.h24
-rw-r--r--include/linux/memory_hotplug.h8
-rw-r--r--include/linux/mfd/axp20x.h32
-rw-r--r--include/linux/mfd/rk808.h417
-rw-r--r--include/linux/mfd/tps6594.h1020
-rw-r--r--include/linux/migrate.h20
-rw-r--r--include/linux/mlx5/device.h1
-rw-r--r--include/linux/mlx5/driver.h15
-rw-r--r--include/linux/mlx5/mlx5_ifc.h36
-rw-r--r--include/linux/mlx5/vport.h2
-rw-r--r--include/linux/mm.h273
-rw-r--r--include/linux/mm_inline.h14
-rw-r--r--include/linux/mm_types.h23
-rw-r--r--include/linux/mmc/card.h1
-rw-r--r--include/linux/mmc/sdio_ids.h3
-rw-r--r--include/linux/mmdebug.h14
-rw-r--r--include/linux/mmzone.h59
-rw-r--r--include/linux/module.h9
-rw-r--r--include/linux/mount.h3
-rw-r--r--include/linux/mroute.h11
-rw-r--r--include/linux/mroute6.h31
-rw-r--r--include/linux/mtd/blktrans.h2
-rw-r--r--include/linux/net.h10
-rw-r--r--include/linux/net_mm.h17
-rw-r--r--include/linux/netdevice.h39
-rw-r--r--include/linux/netfilter.h4
-rw-r--r--include/linux/netfs.h4
-rw-r--r--include/linux/netlink.h1
-rw-r--r--include/linux/nmi.h83
-rw-r--r--include/linux/nubus.h1
-rw-r--r--include/linux/nvme-fc-driver.h10
-rw-r--r--include/linux/olpc-ec.h2
-rw-r--r--include/linux/overflow.h18
-rw-r--r--include/linux/page-isolation.h23
-rw-r--r--include/linux/pagemap.h6
-rw-r--r--include/linux/pagevec.h67
-rw-r--r--include/linux/panic.h3
-rw-r--r--include/linux/parport.h2
-rw-r--r--include/linux/pci_ids.h1
-rw-r--r--include/linux/pcs-altera-tse.h17
-rw-r--r--include/linux/pcs-lynx.h5
-rw-r--r--include/linux/pcs/pcs-xpcs.h9
-rw-r--r--include/linux/percpu-defs.h45
-rw-r--r--include/linux/percpu.h2
-rw-r--r--include/linux/perf/arm_pmu.h4
-rw-r--r--include/linux/perf_event.h16
-rw-r--r--include/linux/pgtable.h176
-rw-r--r--include/linux/phy.h55
-rw-r--r--include/linux/phylink.h122
-rw-r--r--include/linux/pipe_fs_i.h4
-rw-r--r--include/linux/pktcdvd.h1
-rw-r--r--include/linux/platform_data/spi-s3c64xx.h1
-rw-r--r--include/linux/proc_fs.h2
-rw-r--r--include/linux/ptp_clock_kernel.h11
-rw-r--r--include/linux/ramfs.h1
-rw-r--r--include/linux/rbtree_latch.h2
-rw-r--r--include/linux/rcupdate.h54
-rw-r--r--include/linux/ref_tracker.h25
-rw-r--r--include/linux/regmap.h33
-rw-r--r--include/linux/regulator/driver.h2
-rw-r--r--include/linux/regulator/mt6358-regulator.h10
-rw-r--r--include/linux/regulator/pca9450.h4
-rw-r--r--include/linux/root_dev.h9
-rw-r--r--include/linux/scatterlist.h84
-rw-r--r--include/linux/sched.h14
-rw-r--r--include/linux/sched/clock.h17
-rw-r--r--include/linux/sched/sd_flags.h5
-rw-r--r--include/linux/sched/signal.h2
-rw-r--r--include/linux/sched/topology.h2
-rw-r--r--include/linux/security.h6
-rw-r--r--include/linux/seqlock.h15
-rw-r--r--include/linux/sfp.h14
-rw-r--r--include/linux/skbuff.h128
-rw-r--r--include/linux/slab.h14
-rw-r--r--include/linux/slub_def.h12
-rw-r--r--include/linux/soc/qcom/geni-se.h4
-rw-r--r--include/linux/socket.h6
-rw-r--r--include/linux/spi/spi.h17
-rw-r--r--include/linux/splice.h6
-rw-r--r--include/linux/srcu.h8
-rw-r--r--include/linux/stmmac.h1
-rw-r--r--include/linux/string.h2
-rw-r--r--include/linux/sunrpc/svc.h34
-rw-r--r--include/linux/sunrpc/svc_rdma.h5
-rw-r--r--include/linux/sunrpc/xdr.h3
-rw-r--r--include/linux/suspend.h27
-rw-r--r--include/linux/swap.h29
-rw-r--r--include/linux/swapops.h17
-rw-r--r--include/linux/syscalls.h6
-rw-r--r--include/linux/sysctl.h45
-rw-r--r--include/linux/thread_info.h5
-rw-r--r--include/linux/time_namespace.h3
-rw-r--r--include/linux/types.h6
-rw-r--r--include/linux/uio.h25
-rw-r--r--include/linux/umh.h2
-rw-r--r--include/linux/userfaultfd_k.h6
-rw-r--r--include/linux/watch_queue.h3
-rw-r--r--include/linux/workqueue.h15
-rw-r--r--include/linux/zpool.h20
-rw-r--r--include/net/bonding.h4
-rw-r--r--include/net/cfg80211.h175
-rw-r--r--include/net/cfg802154.h3
-rw-r--r--include/net/devlink.h238
-rw-r--r--include/net/dsa.h26
-rw-r--r--include/net/flow.h3
-rw-r--r--include/net/flow_dissector.h23
-rw-r--r--include/net/gro.h27
-rw-r--r--include/net/gso.h109
-rw-r--r--include/net/ieee80211_radiotap.h2
-rw-r--r--include/net/ieee802154_netdev.h20
-rw-r--r--include/net/inet_common.h5
-rw-r--r--include/net/ip.h20
-rw-r--r--include/net/kcm.h2
-rw-r--r--include/net/mac80211.h88
-rw-r--r--include/net/macsec.h10
-rw-r--r--include/net/mctp.h4
-rw-r--r--include/net/netfilter/nf_conntrack_expect.h18
-rw-r--r--include/net/netfilter/nf_flow_table.h4
-rw-r--r--include/net/netfilter/nf_tables.h34
-rw-r--r--include/net/netns/ipv4.h2
-rw-r--r--include/net/phonet/phonet.h21
-rw-r--r--include/net/pkt_cls.h1
-rw-r--r--include/net/pkt_sched.h56
-rw-r--r--include/net/regulatory.h13
-rw-r--r--include/net/route.h6
-rw-r--r--include/net/rpl.h3
-rw-r--r--include/net/sch_generic.h14
-rw-r--r--include/net/scm.h72
-rw-r--r--include/net/sock.h13
-rw-r--r--include/net/tcp.h17
-rw-r--r--include/net/tls.h10
-rw-r--r--include/net/udp.h5
-rw-r--r--include/net/vxlan.h4
-rw-r--r--include/net/xdp_sock_drv.h4
-rw-r--r--include/net/xfrm.h1
-rw-r--r--include/net/xsk_buff_pool.h2
-rw-r--r--include/scsi/scsi_ioctl.h4
-rw-r--r--include/soc/imx/timer.h16
-rw-r--r--include/trace/events/block.h26
-rw-r--r--include/trace/events/btrfs.h39
-rw-r--r--include/trace/events/compaction.h11
-rw-r--r--include/trace/events/csd.h72
-rw-r--r--include/trace/events/mmflags.h4
-rw-r--r--include/trace/events/rpcrdma.h8
-rw-r--r--include/trace/events/sunrpc.h39
-rw-r--r--include/trace/events/timer.h6
-rw-r--r--include/trace/events/writeback.h2
-rw-r--r--include/uapi/asm-generic/socket.h3
-rw-r--r--include/uapi/asm-generic/unistd.h5
-rw-r--r--include/uapi/linux/affs_hardblocks.h68
-rw-r--r--include/uapi/linux/auto_dev-ioctl.h2
-rw-r--r--include/uapi/linux/bpf.h31
-rw-r--r--include/uapi/linux/can.h1
-rw-r--r--include/uapi/linux/can/raw.h2
-rw-r--r--include/uapi/linux/capability.h5
-rw-r--r--include/uapi/linux/elf.h3
-rw-r--r--include/uapi/linux/eventfd.h11
-rw-r--r--include/uapi/linux/if_link.h1
-rw-r--r--include/uapi/linux/io_uring.h16
-rw-r--r--include/uapi/linux/mdio.h24
-rw-r--r--include/uapi/linux/mman.h14
-rw-r--r--include/uapi/linux/mount.h3
-rw-r--r--include/uapi/linux/mptcp.h29
-rw-r--r--include/uapi/linux/netfilter/nf_tables.h4
-rw-r--r--include/uapi/linux/nl80211.h30
-rw-r--r--include/uapi/linux/openvswitch.h1
-rw-r--r--include/uapi/linux/pkt_cls.h11
-rw-r--r--include/uapi/linux/pkt_sched.h10
-rw-r--r--include/uapi/linux/pktcdvd.h1
-rw-r--r--include/uapi/linux/ptp_clock.h3
-rw-r--r--include/uapi/linux/spi/spi.h3
-rw-r--r--include/uapi/linux/types.h4
-rw-r--r--include/uapi/linux/ublk_cmd.h33
-rw-r--r--include/uapi/linux/vfio.h9
-rw-r--r--include/xen/events.h3
-rw-r--r--include/xen/xen.h3
-rw-r--r--init/Kconfig10
-rw-r--r--init/do_mounts.c437
-rw-r--r--init/do_mounts.h14
-rw-r--r--init/do_mounts_initrd.c11
-rw-r--r--init/main.c43
-rw-r--r--io_uring/cancel.c5
-rw-r--r--io_uring/filetable.c11
-rw-r--r--io_uring/filetable.h28
-rw-r--r--io_uring/io_uring.c497
-rw-r--r--io_uring/io_uring.h17
-rw-r--r--io_uring/msg_ring.c4
-rw-r--r--io_uring/net.c77
-rw-r--r--io_uring/poll.c15
-rw-r--r--io_uring/poll.h2
-rw-r--r--io_uring/rsrc.c42
-rw-r--r--io_uring/rw.c6
-rw-r--r--io_uring/rw.h1
-rw-r--r--io_uring/tctx.c31
-rw-r--r--io_uring/timeout.c6
-rw-r--r--io_uring/uring_cmd.c16
-rw-r--r--kernel/Makefile5
-rw-r--r--kernel/audit.h2
-rw-r--r--kernel/bpf/bloom_filter.c3
-rw-r--r--kernel/bpf/bpf_local_storage.c3
-rw-r--r--kernel/bpf/bpf_lru_list.c21
-rw-r--r--kernel/bpf/bpf_lru_list.h7
-rw-r--r--kernel/bpf/bpf_struct_ops.c3
-rw-r--r--kernel/bpf/btf.c104
-rw-r--r--kernel/bpf/cgroup.c15
-rw-r--r--kernel/bpf/core.c8
-rw-r--r--kernel/bpf/cpumap.c4
-rw-r--r--kernel/bpf/cpumask.c38
-rw-r--r--kernel/bpf/devmap.c3
-rw-r--r--kernel/bpf/hashtab.c6
-rw-r--r--kernel/bpf/helpers.c135
-rw-r--r--kernel/bpf/inode.c27
-rw-r--r--kernel/bpf/log.c3
-rw-r--r--kernel/bpf/lpm_trie.c3
-rw-r--r--kernel/bpf/memalloc.c31
-rw-r--r--kernel/bpf/preload/bpf_preload_kern.c4
-rw-r--r--kernel/bpf/queue_stack_maps.c4
-rw-r--r--kernel/bpf/reuseport_array.c3
-rw-r--r--kernel/bpf/stackmap.c3
-rw-r--r--kernel/bpf/syscall.c237
-rw-r--r--kernel/bpf/trampoline.c32
-rw-r--r--kernel/bpf/verifier.c1046
-rw-r--r--kernel/capability.c2
-rw-r--r--kernel/cgroup/cgroup-internal.h2
-rw-r--r--kernel/cgroup/cgroup-v1.c4
-rw-r--r--kernel/cgroup/cgroup.c104
-rw-r--r--kernel/cgroup/cpuset.c267
-rw-r--r--kernel/cgroup/legacy_freezer.c8
-rw-r--r--kernel/cgroup/misc.c1
-rw-r--r--kernel/cgroup/rdma.c2
-rw-r--r--kernel/cgroup/rstat.c26
-rw-r--r--kernel/context_tracking.c12
-rw-r--r--kernel/cpu.c402
-rw-r--r--kernel/dma/Kconfig7
-rw-r--r--kernel/dma/direct.c2
-rw-r--r--kernel/dma/direct.h3
-rw-r--r--kernel/events/core.c81
-rw-r--r--kernel/events/uprobes.c15
-rw-r--r--kernel/fork.c10
-rw-r--r--kernel/irq/chip.c17
-rw-r--r--kernel/irq/debugfs.c2
-rw-r--r--kernel/irq/internals.h13
-rw-r--r--kernel/irq/irqdesc.c77
-rw-r--r--kernel/irq/irqdomain.c2
-rw-r--r--kernel/irq/resend.c47
-rw-r--r--kernel/kallsyms.c95
-rw-r--r--kernel/kcov.c7
-rw-r--r--kernel/kexec_core.c93
-rw-r--r--kernel/kexec_file.c7
-rw-r--r--kernel/ksyms_common.c43
-rw-r--r--kernel/kthread.c14
-rw-r--r--kernel/locking/lock_events.h4
-rw-r--r--kernel/locking/lockdep.c118
-rw-r--r--kernel/locking/locktorture.c51
-rw-r--r--kernel/module/kallsyms.c28
-rw-r--r--kernel/module/main.c120
-rw-r--r--kernel/panic.c3
-rw-r--r--kernel/params.c2
-rw-r--r--kernel/pid_sysctl.h1
-rw-r--r--kernel/power/hibernate.c179
-rw-r--r--kernel/power/main.c33
-rw-r--r--kernel/power/power.h15
-rw-r--r--kernel/power/snapshot.c54
-rw-r--r--kernel/power/swap.c30
-rw-r--r--kernel/printk/printk.c2
-rw-r--r--kernel/rcu/Kconfig18
-rw-r--r--kernel/rcu/rcu.h6
-rw-r--r--kernel/rcu/rcuscale.c199
-rw-r--r--kernel/rcu/tasks.h12
-rw-r--r--kernel/rcu/tree.c131
-rw-r--r--kernel/rcu/tree_exp.h2
-rw-r--r--kernel/rcu/tree_nocb.h52
-rw-r--r--kernel/rcu/tree_plugin.h4
-rw-r--r--kernel/sched/clock.c21
-rw-r--r--kernel/sched/core.c322
-rw-r--r--kernel/sched/cpufreq_schedutil.c3
-rw-r--r--kernel/sched/deadline.c124
-rw-r--r--kernel/sched/debug.c2
-rw-r--r--kernel/sched/fair.c329
-rw-r--r--kernel/sched/psi.c19
-rw-r--r--kernel/sched/sched.h107
-rw-r--r--kernel/sched/topology.c15
-rw-r--r--kernel/sched/wait.c7
-rw-r--r--kernel/signal.c23
-rw-r--r--kernel/smp.c43
-rw-r--r--kernel/smpboot.c163
-rw-r--r--kernel/softirq.c22
-rw-r--r--kernel/sys_ni.c1
-rw-r--r--kernel/sysctl.c107
-rw-r--r--kernel/time/alarmtimer.c4
-rw-r--r--kernel/time/clocksource.c2
-rw-r--r--kernel/time/hrtimer.c3
-rw-r--r--kernel/time/posix-timers.c525
-rw-r--r--kernel/time/sched_clock.c24
-rw-r--r--kernel/time/tick-common.c13
-rw-r--r--kernel/time/tick-sched.c15
-rw-r--r--kernel/time/timekeeping.c4
-rw-r--r--kernel/trace/bpf_trace.c4
-rw-r--r--kernel/trace/ftrace.c18
-rw-r--r--kernel/trace/trace.c10
-rw-r--r--kernel/trace/trace_events.c4
-rw-r--r--kernel/trace/trace_events_inject.c4
-rw-r--r--kernel/trace/trace_events_user.c292
-rw-r--r--kernel/trace/trace_kprobe.c2
-rw-r--r--kernel/trace/trace_output.c2
-rw-r--r--kernel/trace/trace_probe.c2
-rw-r--r--kernel/umh.c11
-rw-r--r--kernel/watch_queue.c12
-rw-r--r--kernel/watchdog.c356
-rw-r--r--kernel/watchdog_buddy.c113
-rw-r--r--kernel/watchdog_perf.c (renamed from kernel/watchdog_hld.c)105
-rw-r--r--kernel/workqueue.c335
-rw-r--r--kernel/workqueue_internal.h24
-rw-r--r--lib/Kconfig.debug131
-rw-r--r--lib/Kconfig.ubsan57
-rw-r--r--lib/Makefile4
-rw-r--r--lib/checksum_kunit.c334
-rw-r--r--lib/crypto/curve25519-hacl64.c2
-rw-r--r--lib/crypto/poly1305-donna64.c2
-rw-r--r--lib/debugobjects.c9
-rw-r--r--lib/decompress_inflate.c2
-rw-r--r--lib/decompress_unxz.c2
-rw-r--r--lib/decompress_unzstd.c2
-rw-r--r--lib/devmem_is_allowed.c1
-rw-r--r--lib/devres.c2
-rw-r--r--lib/fortify_kunit.c14
-rw-r--r--lib/iov_iter.c466
-rw-r--r--lib/kobject.c3
-rw-r--r--lib/kunit/debugfs.c1
-rw-r--r--lib/kunit/executor_test.c11
-rw-r--r--lib/kunit/kunit-example-test.c56
-rw-r--r--lib/kunit/kunit-test.c88
-rw-r--r--lib/kunit/resource.c99
-rw-r--r--lib/kunit/test.c157
-rw-r--r--lib/maple_tree.c1600
-rw-r--r--lib/net_utils.c3
-rw-r--r--lib/overflow_kunit.c2
-rw-r--r--lib/raid6/neon.h22
-rw-r--r--lib/raid6/neon.uc1
-rw-r--r--lib/raid6/recov_neon.c8
-rw-r--r--lib/raid6/recov_neon_inner.c1
-rw-r--r--lib/ref_tracker.c179
-rw-r--r--lib/scatterlist.c269
-rw-r--r--lib/show_mem.c37
-rw-r--r--lib/strcat_kunit.c104
-rw-r--r--lib/string.c4
-rw-r--r--lib/string_helpers.c12
-rw-r--r--lib/test_bpf.c3
-rw-r--r--lib/test_maple_tree.c863
-rw-r--r--lib/test_ref_tracker.c2
-rw-r--r--lib/test_sysctl.c91
-rw-r--r--lib/ts_bm.c4
-rw-r--r--lib/ubsan.c3
-rw-r--r--lib/ubsan.h11
-rw-r--r--lib/zstd/common/zstd_deps.h18
-rw-r--r--mm/Kconfig16
-rw-r--r--mm/Makefile4
-rw-r--r--mm/backing-dev.c17
-rw-r--r--mm/cma.c4
-rw-r--r--mm/compaction.c334
-rw-r--r--mm/damon/core-test.h24
-rw-r--r--mm/damon/ops-common.c32
-rw-r--r--mm/damon/ops-common.h4
-rw-r--r--mm/damon/paddr.c6
-rw-r--r--mm/damon/vaddr.c26
-rw-r--r--mm/debug.c9
-rw-r--r--mm/debug_page_alloc.c59
-rw-r--r--mm/debug_vm_pgtable.c9
-rw-r--r--mm/dmapool.c10
-rw-r--r--mm/early_ioremap.c8
-rw-r--r--mm/fadvise.c17
-rw-r--r--mm/fail_page_alloc.c66
-rw-r--r--mm/filemap.c481
-rw-r--r--mm/frontswap.c10
-rw-r--r--mm/gup.c452
-rw-r--r--mm/gup_test.c27
-rw-r--r--mm/highmem.c12
-rw-r--r--mm/hmm.c6
-rw-r--r--mm/huge_memory.c56
-rw-r--r--mm/hugetlb.c126
-rw-r--r--mm/hugetlb_vmemmap.c17
-rw-r--r--mm/internal.h93
-rw-r--r--mm/kasan/common.c2
-rw-r--r--mm/kasan/generic.c76
-rw-r--r--mm/kasan/init.c9
-rw-r--r--mm/kasan/kasan.h159
-rw-r--r--mm/kasan/report.c44
-rw-r--r--mm/kasan/report_generic.c12
-rw-r--r--mm/kasan/report_hw_tags.c2
-rw-r--r--mm/kasan/report_sw_tags.c2
-rw-r--r--mm/kasan/shadow.c46
-rw-r--r--mm/kasan/sw_tags.c20
-rw-r--r--mm/kasan/tags.c2
-rw-r--r--mm/khugepaged.c126
-rw-r--r--mm/kmsan/core.c6
-rw-r--r--mm/kmsan/instrumentation.c2
-rw-r--r--mm/ksm.c38
-rw-r--r--mm/madvise.c150
-rw-r--r--mm/mapping_dirty_helpers.c38
-rw-r--r--mm/memblock.c42
-rw-r--r--mm/memcontrol.c253
-rw-r--r--mm/memfd.c9
-rw-r--r--mm/memory-failure.c45
-rw-r--r--mm/memory-tiers.c3
-rw-r--r--mm/memory.c378
-rw-r--r--mm/memory_hotplug.c42
-rw-r--r--mm/mempolicy.c28
-rw-r--r--mm/migrate.c382
-rw-r--r--mm/migrate_device.c46
-rw-r--r--mm/mincore.c11
-rw-r--r--mm/mlock.c10
-rw-r--r--mm/mm_init.c161
-rw-r--r--mm/mmap.c222
-rw-r--r--mm/mprotect.c89
-rw-r--r--mm/mremap.c35
-rw-r--r--mm/oom_kill.c8
-rw-r--r--mm/page-writeback.c6
-rw-r--r--mm/page_alloc.c1046
-rw-r--r--mm/page_io.c8
-rw-r--r--mm/page_isolation.c33
-rw-r--r--mm/page_owner.c2
-rw-r--r--mm/page_table_check.c6
-rw-r--r--mm/page_vma_mapped.c114
-rw-r--r--mm/pagewalk.c33
-rw-r--r--mm/percpu-internal.h11
-rw-r--r--mm/pgtable-generic.c58
-rw-r--r--mm/process_vm_access.c2
-rw-r--r--mm/ptdump.c2
-rw-r--r--mm/readahead.c1
-rw-r--r--mm/rmap.c36
-rw-r--r--mm/secretmem.c4
-rw-r--r--mm/shmem.c141
-rw-r--r--mm/show_mem.c429
-rw-r--r--mm/shrinker_debug.c39
-rw-r--r--mm/slab.c6
-rw-r--r--mm/slab.h58
-rw-r--r--mm/slab_common.c41
-rw-r--r--mm/slub.c139
-rw-r--r--mm/sparse-vmemmap.c8
-rw-r--r--mm/sparse.c12
-rw-r--r--mm/swap.c20
-rw-r--r--mm/swap_state.c87
-rw-r--r--mm/swapfile.c115
-rw-r--r--mm/truncate.c27
-rw-r--r--mm/userfaultfd.c12
-rw-r--r--mm/vmalloc.c147
-rw-r--r--mm/vmscan.c322
-rw-r--r--mm/vmstat.c18
-rw-r--r--mm/workingset.c158
-rw-r--r--mm/z3fold.c249
-rw-r--r--mm/zbud.c167
-rw-r--r--mm/zpool.c48
-rw-r--r--mm/zsmalloc.c408
-rw-r--r--mm/zswap.c239
-rw-r--r--net/Kconfig2
-rw-r--r--net/appletalk/ddp.c1
-rw-r--r--net/atm/pvc.c1
-rw-r--r--net/atm/svc.c1
-rw-r--r--net/ax25/af_ax25.c1
-rw-r--r--net/bpf/test_run.c204
-rw-r--r--net/bpfilter/bpfilter_kern.c2
-rw-r--r--net/bridge/br_device.c1
-rw-r--r--net/bridge/br_forward.c3
-rw-r--r--net/bridge/br_input.c1
-rw-r--r--net/bridge/br_private.h27
-rw-r--r--net/caif/caif_socket.c2
-rw-r--r--net/can/bcm.c1
-rw-r--r--net/can/isotp.c6
-rw-r--r--net/can/j1939/socket.c1
-rw-r--r--net/can/raw.c1
-rw-r--r--net/ceph/messenger_v1.c30
-rw-r--r--net/ceph/messenger_v2.c19
-rw-r--r--net/core/Makefile2
-rw-r--r--net/core/dev.c158
-rw-r--r--net/core/filter.c212
-rw-r--r--net/core/flow_dissector.c40
-rw-r--r--net/core/gro.c114
-rw-r--r--net/core/gso.c273
-rw-r--r--net/core/net_namespace.c4
-rw-r--r--net/core/netdev-genl-gen.c2
-rw-r--r--net/core/netdev-genl-gen.h2
-rw-r--r--net/core/netpoll.c5
-rw-r--r--net/core/pktgen.c13
-rw-r--r--net/core/rtnetlink.c187
-rw-r--r--net/core/skbuff.c308
-rw-r--r--net/core/sock.c166
-rw-r--r--net/core/sock_map.c4
-rw-r--r--net/dccp/dccp.h2
-rw-r--r--net/dccp/ipv4.c1
-rw-r--r--net/dccp/ipv6.c1
-rw-r--r--net/dccp/proto.c12
-rw-r--r--net/devlink/health.c2
-rw-r--r--net/devlink/leftover.c196
-rw-r--r--net/dsa/dsa.c26
-rw-r--r--net/dsa/port.c32
-rw-r--r--net/dsa/slave.c84
-rw-r--r--net/dsa/switch.c4
-rw-r--r--net/dsa/switch.h3
-rw-r--r--net/ethtool/ioctl.c15
-rw-r--r--net/ethtool/netlink.c12
-rw-r--r--net/handshake/genl.c2
-rw-r--r--net/handshake/genl.h2
-rw-r--r--net/hsr/hsr_device.c5
-rw-r--r--net/hsr/hsr_main.h1
-rw-r--r--net/hsr/hsr_slave.c15
-rw-r--r--net/ieee802154/header_ops.c36
-rw-r--r--net/ieee802154/nl802154.c13
-rw-r--r--net/ieee802154/socket.c17
-rw-r--r--net/ieee802154/trace.h2
-rw-r--r--net/ipv4/af_inet.c56
-rw-r--r--net/ipv4/bpfilter/sockopt.c11
-rw-r--r--net/ipv4/esp4_offload.c4
-rw-r--r--net/ipv4/fou_nl.c2
-rw-r--r--net/ipv4/fou_nl.h2
-rw-r--r--net/ipv4/gre_offload.c1
-rw-r--r--net/ipv4/inet_connection_sock.c21
-rw-r--r--net/ipv4/ip_gre.c8
-rw-r--r--net/ipv4/ip_output.c170
-rw-r--r--net/ipv4/ipconfig.c10
-rw-r--r--net/ipv4/ipmr.c63
-rw-r--r--net/ipv4/ping.c56
-rw-r--r--net/ipv4/raw.c26
-rw-r--r--net/ipv4/syncookies.c4
-rw-r--r--net/ipv4/sysctl_net_ipv4.c19
-rw-r--r--net/ipv4/tcp.c334
-rw-r--r--net/ipv4/tcp_bpf.c73
-rw-r--r--net/ipv4/tcp_input.c2
-rw-r--r--net/ipv4/tcp_ipv4.c26
-rw-r--r--net/ipv4/tcp_minisocks.c2
-rw-r--r--net/ipv4/tcp_offload.c8
-rw-r--r--net/ipv4/tcp_output.c195
-rw-r--r--net/ipv4/tcp_timer.c17
-rw-r--r--net/ipv4/udp.c375
-rw-r--r--net/ipv4/udp_impl.h2
-rw-r--r--net/ipv4/udp_offload.c1
-rw-r--r--net/ipv4/udplite.c1
-rw-r--r--net/ipv4/xfrm4_input.c1
-rw-r--r--net/ipv6/addrconf.c4
-rw-r--r--net/ipv6/af_inet6.c6
-rw-r--r--net/ipv6/esp6_offload.c4
-rw-r--r--net/ipv6/exthdrs.c36
-rw-r--r--net/ipv6/ip6_offload.c1
-rw-r--r--net/ipv6/ip6_output.c19
-rw-r--r--net/ipv6/ip6mr.c44
-rw-r--r--net/ipv6/raw.c17
-rw-r--r--net/ipv6/route.c25
-rw-r--r--net/ipv6/rpl.c7
-rw-r--r--net/ipv6/seg6_iptunnel.c3
-rw-r--r--net/ipv6/tcp_ipv6.c12
-rw-r--r--net/ipv6/tcpv6_offload.c3
-rw-r--r--net/ipv6/udp.c15
-rw-r--r--net/ipv6/udp_offload.c1
-rw-r--r--net/ipv6/xfrm6_input.c3
-rw-r--r--net/kcm/kcmsock.c339
-rw-r--r--net/key/af_key.c1
-rw-r--r--net/l2tp/l2tp_core.h2
-rw-r--r--net/l2tp/l2tp_ip.c10
-rw-r--r--net/l2tp/l2tp_ip6.c1
-rw-r--r--net/llc/af_llc.c1
-rw-r--r--net/mac80211/agg-tx.c14
-rw-r--r--net/mac80211/cfg.c102
-rw-r--r--net/mac80211/chan.c13
-rw-r--r--net/mac80211/debug.h8
-rw-r--r--net/mac80211/debugfs_netdev.c21
-rw-r--r--net/mac80211/debugfs_sta.c185
-rw-r--r--net/mac80211/driver-ops.h10
-rw-r--r--net/mac80211/eht.c5
-rw-r--r--net/mac80211/he.c3
-rw-r--r--net/mac80211/ht.c5
-rw-r--r--net/mac80211/ibss.c38
-rw-r--r--net/mac80211/ieee80211_i.h69
-rw-r--r--net/mac80211/iface.c48
-rw-r--r--net/mac80211/key.c8
-rw-r--r--net/mac80211/link.c57
-rw-r--r--net/mac80211/main.c7
-rw-r--r--net/mac80211/mesh.c40
-rw-r--r--net/mac80211/mesh.h19
-rw-r--r--net/mac80211/mesh_hwmp.c6
-rw-r--r--net/mac80211/mesh_plink.c37
-rw-r--r--net/mac80211/mesh_ps.c7
-rw-r--r--net/mac80211/mlme.c568
-rw-r--r--net/mac80211/ocb.c10
-rw-r--r--net/mac80211/offchannel.c4
-rw-r--r--net/mac80211/rx.c49
-rw-r--r--net/mac80211/scan.c95
-rw-r--r--net/mac80211/sta_info.c240
-rw-r--r--net/mac80211/status.c6
-rw-r--r--net/mac80211/tdls.c276
-rw-r--r--net/mac80211/trace.h10
-rw-r--r--net/mac80211/tx.c73
-rw-r--r--net/mac80211/util.c312
-rw-r--r--net/mac802154/ieee802154_i.h21
-rw-r--r--net/mac802154/main.c2
-rw-r--r--net/mac802154/rx.c70
-rw-r--r--net/mac802154/scan.c68
-rw-r--r--net/mac802154/trace.h2
-rw-r--r--net/mctp/af_mctp.c1
-rw-r--r--net/mctp/route.c3
-rw-r--r--net/mpls/af_mpls.c1
-rw-r--r--net/mpls/mpls_gso.c1
-rw-r--r--net/mptcp/mib.c6
-rw-r--r--net/mptcp/mib.h18
-rw-r--r--net/mptcp/options.c19
-rw-r--r--net/mptcp/pm.c47
-rw-r--r--net/mptcp/pm_netlink.c143
-rw-r--r--net/mptcp/pm_userspace.c5
-rw-r--r--net/mptcp/protocol.c225
-rw-r--r--net/mptcp/protocol.h25
-rw-r--r--net/mptcp/sockopt.c153
-rw-r--r--net/mptcp/subflow.c25
-rw-r--r--net/ncsi/ncsi-rsp.c93
-rw-r--r--net/netfilter/ipset/ip_set_core.c2
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c10
-rw-r--r--net/netfilter/ipvs/Kconfig27
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c26
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c6
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c52
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c1
-rw-r--r--net/netfilter/nf_conntrack_sip.c2
-rw-r--r--net/netfilter/nf_flow_table_core.c24
-rw-r--r--net/netfilter/nf_flow_table_ip.c232
-rw-r--r--net/netfilter/nf_nat_core.c92
-rw-r--r--net/netfilter/nf_tables_api.c449
-rw-r--r--net/netfilter/nfnetlink_osf.c1
-rw-r--r--net/netfilter/nfnetlink_queue.c1
-rw-r--r--net/netfilter/nft_bitwise.c2
-rw-r--r--net/netfilter/nft_byteorder.c6
-rw-r--r--net/netfilter/nft_ct.c2
-rw-r--r--net/netfilter/nft_dynset.c2
-rw-r--r--net/netfilter/nft_exthdr.c110
-rw-r--r--net/netfilter/nft_flow_offload.c12
-rw-r--r--net/netfilter/nft_fwd_netdev.c2
-rw-r--r--net/netfilter/nft_hash.c2
-rw-r--r--net/netfilter/nft_immediate.c90
-rw-r--r--net/netfilter/nft_lookup.c23
-rw-r--r--net/netfilter/nft_meta.c2
-rw-r--r--net/netfilter/nft_payload.c3
-rw-r--r--net/netfilter/nft_range.c2
-rw-r--r--net/netfilter/nft_reject.c2
-rw-r--r--net/netfilter/nft_rt.c2
-rw-r--r--net/netfilter/nft_set_bitmap.c5
-rw-r--r--net/netfilter/nft_set_hash.c23
-rw-r--r--net/netfilter/nft_set_pipapo.c26
-rw-r--r--net/netfilter/nft_set_rbtree.c5
-rw-r--r--net/netfilter/nft_socket.c4
-rw-r--r--net/netfilter/nft_tproxy.c2
-rw-r--r--net/netfilter/nft_tunnel.c4
-rw-r--r--net/netfilter/nft_xfrm.c4
-rw-r--r--net/netfilter/xt_osf.c1
-rw-r--r--net/netlabel/netlabel_domainhash.h2
-rw-r--r--net/netlink/af_netlink.c8
-rw-r--r--net/netlink/diag.c7
-rw-r--r--net/netlink/genetlink.c2
-rw-r--r--net/netrom/af_netrom.c1
-rw-r--r--net/nfc/llcp.h1
-rw-r--r--net/nfc/llcp_commands.c15
-rw-r--r--net/nfc/llcp_core.c49
-rw-r--r--net/nfc/llcp_sock.c18
-rw-r--r--net/nfc/netlink.c20
-rw-r--r--net/nfc/nfc.h1
-rw-r--r--net/nsh/nsh.c1
-rw-r--r--net/openvswitch/actions.c13
-rw-r--r--net/openvswitch/datapath.c1
-rw-r--r--net/openvswitch/flow_netlink.c2
-rw-r--r--net/openvswitch/meter.c4
-rw-r--r--net/packet/af_packet.c2
-rw-r--r--net/phonet/datagram.c11
-rw-r--r--net/phonet/pep.c11
-rw-r--r--net/phonet/socket.c4
-rw-r--r--net/qrtr/af_qrtr.c1
-rw-r--r--net/qrtr/ns.c2
-rw-r--r--net/rds/af_rds.c1
-rw-r--r--net/rds/tcp_send.c23
-rw-r--r--net/rose/af_rose.c1
-rw-r--r--net/rxrpc/af_rxrpc.c3
-rw-r--r--net/sched/act_pedit.c4
-rw-r--r--net/sched/act_police.c1
-rw-r--r--net/sched/cls_flower.c132
-rw-r--r--net/sched/sch_cake.c1
-rw-r--r--net/sched/sch_htb.c7
-rw-r--r--net/sched/sch_netem.c62
-rw-r--r--net/sched/sch_taprio.c90
-rw-r--r--net/sched/sch_tbf.c1
-rw-r--r--net/sctp/offload.c1
-rw-r--r--net/sctp/protocol.c5
-rw-r--r--net/sctp/socket.c26
-rw-r--r--net/sctp/stream_sched.c9
-rw-r--r--net/smc/af_smc.c29
-rw-r--r--net/smc/smc_stats.c2
-rw-r--r--net/smc/smc_stats.h1
-rw-r--r--net/smc/smc_tx.c19
-rw-r--r--net/smc/smc_tx.h2
-rw-r--r--net/socket.c89
-rw-r--r--net/sunrpc/svc.c51
-rw-r--r--net/sunrpc/svc_xprt.c26
-rw-r--r--net/sunrpc/svcsock.c76
-rw-r--r--net/sunrpc/xdr.c26
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c8
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c36
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_rw.c24
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c62
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c18
-rw-r--r--net/tipc/bearer.c18
-rw-r--r--net/tipc/bearer.h4
-rw-r--r--net/tipc/socket.c3
-rw-r--r--net/tipc/udp_media.c4
-rw-r--r--net/tls/tls.h8
-rw-r--r--net/tls/tls_device.c110
-rw-r--r--net/tls/tls_device_fallback.c2
-rw-r--r--net/tls/tls_main.c70
-rw-r--r--net/tls/tls_sw.c249
-rw-r--r--net/unix/Kconfig6
-rw-r--r--net/unix/af_unix.c226
-rw-r--r--net/vmw_vsock/af_vsock.c3
-rw-r--r--net/wireless/core.c174
-rw-r--r--net/wireless/core.h14
-rw-r--r--net/wireless/nl80211.c119
-rw-r--r--net/wireless/pmsr.c4
-rw-r--r--net/wireless/rdev-ops.h27
-rw-r--r--net/wireless/reg.c20
-rw-r--r--net/wireless/scan.c1263
-rw-r--r--net/wireless/sme.c19
-rw-r--r--net/wireless/sysfs.c8
-rw-r--r--net/wireless/trace.h47
-rw-r--r--net/wireless/util.c110
-rw-r--r--net/wireless/wext-core.c6
-rw-r--r--net/wireless/wext-sme.c4
-rw-r--r--net/x25/af_x25.c1
-rw-r--r--net/xdp/xdp_umem.c2
-rw-r--r--net/xdp/xsk.c1
-rw-r--r--net/xdp/xsk_buff_pool.c7
-rw-r--r--net/xdp/xskmap.c4
-rw-r--r--net/xfrm/espintcp.c14
-rw-r--r--net/xfrm/xfrm_device.c1
-rw-r--r--net/xfrm/xfrm_input.c8
-rw-r--r--net/xfrm/xfrm_interface_core.c55
-rw-r--r--net/xfrm/xfrm_ipcomp.c5
-rw-r--r--net/xfrm/xfrm_output.c1
-rw-r--r--net/xfrm/xfrm_policy.c14
-rw-r--r--rust/alloc/README.md3
-rw-r--r--rust/alloc/alloc.rs55
-rw-r--r--rust/alloc/boxed.rs446
-rw-r--r--rust/alloc/collections/mod.rs5
-rw-r--r--rust/alloc/lib.rs71
-rw-r--r--rust/alloc/raw_vec.rs16
-rw-r--r--rust/alloc/slice.rs445
-rw-r--r--rust/alloc/vec/drain.rs81
-rw-r--r--rust/alloc/vec/drain_filter.rs60
-rw-r--r--rust/alloc/vec/into_iter.rs125
-rw-r--r--rust/alloc/vec/is_zero.rs96
-rw-r--r--rust/alloc/vec/mod.rs464
-rw-r--r--rust/alloc/vec/set_len_on_drop.rs5
-rw-r--r--rust/alloc/vec/spec_extend.rs63
-rw-r--r--rust/bindings/bindings_helper.h1
-rw-r--r--rust/bindings/lib.rs1
-rw-r--r--rust/helpers.c7
-rw-r--r--rust/kernel/build_assert.rs2
-rw-r--r--rust/kernel/error.rs61
-rw-r--r--rust/kernel/init.rs5
-rw-r--r--rust/kernel/init/macros.rs85
-rw-r--r--rust/kernel/lib.rs4
-rw-r--r--rust/kernel/std_vendor.rs2
-rw-r--r--rust/kernel/str.rs22
-rw-r--r--rust/kernel/sync/arc.rs25
-rw-r--r--rust/kernel/task.rs10
-rw-r--r--rust/kernel/types.rs13
-rw-r--r--rust/macros/helpers.rs86
-rw-r--r--rust/macros/pin_data.rs168
-rw-r--r--rust/macros/quote.rs14
-rw-r--r--rust/uapi/lib.rs1
-rw-r--r--samples/bpf/tcp_basertt_kern.c2
-rw-r--r--samples/bpf/xdp1_kern.c2
-rw-r--r--samples/bpf/xdp2_kern.c2
-rw-r--r--samples/kmemleak/kmemleak-test.c2
-rw-r--r--scripts/Makefile.build2
-rw-r--r--scripts/Makefile.ubsan2
-rwxr-xr-xscripts/atomic/atomic-tbl.sh112
-rw-r--r--scripts/atomic/atomics.tbl2
-rwxr-xr-xscripts/atomic/fallbacks/acquire4
-rwxr-xr-xscripts/atomic/fallbacks/add_negative14
-rwxr-xr-xscripts/atomic/fallbacks/add_unless15
-rwxr-xr-xscripts/atomic/fallbacks/andnot6
-rw-r--r--scripts/atomic/fallbacks/cmpxchg3
-rwxr-xr-xscripts/atomic/fallbacks/dec6
-rwxr-xr-xscripts/atomic/fallbacks/dec_and_test14
-rwxr-xr-xscripts/atomic/fallbacks/dec_if_positive8
-rwxr-xr-xscripts/atomic/fallbacks/dec_unless_positive8
-rwxr-xr-xscripts/atomic/fallbacks/fence4
-rwxr-xr-xscripts/atomic/fallbacks/fetch_add_unless17
-rwxr-xr-xscripts/atomic/fallbacks/inc6
-rwxr-xr-xscripts/atomic/fallbacks/inc_and_test14
-rwxr-xr-xscripts/atomic/fallbacks/inc_not_zero13
-rwxr-xr-xscripts/atomic/fallbacks/inc_unless_negative8
-rwxr-xr-xscripts/atomic/fallbacks/read_acquire6
-rwxr-xr-xscripts/atomic/fallbacks/release4
-rwxr-xr-xscripts/atomic/fallbacks/set_release6
-rwxr-xr-xscripts/atomic/fallbacks/sub_and_test15
-rwxr-xr-xscripts/atomic/fallbacks/try_cmpxchg6
-rw-r--r--scripts/atomic/fallbacks/xchg3
-rwxr-xr-xscripts/atomic/gen-atomic-fallback.sh266
-rwxr-xr-xscripts/atomic/gen-atomic-instrumented.sh42
-rwxr-xr-xscripts/atomic/gen-atomic-long.sh38
-rw-r--r--scripts/atomic/kerneldoc/add13
-rw-r--r--scripts/atomic/kerneldoc/add_negative13
-rw-r--r--scripts/atomic/kerneldoc/add_unless18
-rw-r--r--scripts/atomic/kerneldoc/and13
-rw-r--r--scripts/atomic/kerneldoc/andnot13
-rw-r--r--scripts/atomic/kerneldoc/cmpxchg14
-rw-r--r--scripts/atomic/kerneldoc/dec12
-rw-r--r--scripts/atomic/kerneldoc/dec_and_test12
-rw-r--r--scripts/atomic/kerneldoc/dec_if_positive12
-rw-r--r--scripts/atomic/kerneldoc/dec_unless_positive12
-rw-r--r--scripts/atomic/kerneldoc/inc12
-rw-r--r--scripts/atomic/kerneldoc/inc_and_test12
-rw-r--r--scripts/atomic/kerneldoc/inc_not_zero12
-rw-r--r--scripts/atomic/kerneldoc/inc_unless_negative12
-rw-r--r--scripts/atomic/kerneldoc/or13
-rw-r--r--scripts/atomic/kerneldoc/read12
-rw-r--r--scripts/atomic/kerneldoc/set13
-rw-r--r--scripts/atomic/kerneldoc/sub13
-rw-r--r--scripts/atomic/kerneldoc/sub_and_test13
-rw-r--r--scripts/atomic/kerneldoc/try_cmpxchg15
-rw-r--r--scripts/atomic/kerneldoc/xchg13
-rw-r--r--scripts/atomic/kerneldoc/xor13
-rwxr-xr-xscripts/check-sysctl-docs10
-rwxr-xr-xscripts/checkpatch.pl24
-rw-r--r--scripts/gdb/linux/constants.py.in12
-rwxr-xr-xscripts/gfp-translate6
-rwxr-xr-xscripts/kernel-doc2
-rwxr-xr-xscripts/min-tool-version.sh8
-rw-r--r--scripts/mod/modpost.c5
-rw-r--r--scripts/orc_hash.sh16
-rwxr-xr-xscripts/pahole-flags.sh3
-rw-r--r--scripts/spelling.txt22
-rw-r--r--security/commoncap.c20
-rw-r--r--security/device_cgroup.c3
-rw-r--r--security/integrity/evm/evm_crypto.c2
-rw-r--r--security/integrity/evm/evm_main.c4
-rw-r--r--security/integrity/iint.c15
-rw-r--r--security/integrity/ima/ima_api.c40
-rw-r--r--security/integrity/ima/ima_main.c12
-rw-r--r--security/integrity/ima/ima_modsig.c3
-rw-r--r--security/integrity/ima/ima_policy.c3
-rw-r--r--security/keys/sysctl.c7
-rw-r--r--security/landlock/Kconfig2
-rw-r--r--security/lsm_audit.c2
-rw-r--r--security/safesetid/lsm.c2
-rw-r--r--security/security.c21
-rw-r--r--security/selinux/Makefile30
-rw-r--r--security/selinux/avc.c20
-rw-r--r--security/selinux/hooks.c78
-rw-r--r--security/selinux/ima.c2
-rw-r--r--security/selinux/include/audit.h2
-rw-r--r--security/selinux/include/avc.h3
-rw-r--r--security/selinux/include/ibpkey.h1
-rw-r--r--security/selinux/include/ima.h2
-rw-r--r--security/selinux/include/initial_sid_to_string.h3
-rw-r--r--security/selinux/include/security.h2
-rw-r--r--security/selinux/netlabel.c8
-rw-r--r--security/selinux/selinuxfs.c4
-rw-r--r--security/selinux/ss/avtab.c2
-rw-r--r--security/selinux/ss/avtab.h2
-rw-r--r--security/selinux/ss/conditional.c8
-rw-r--r--security/selinux/ss/conditional.h2
-rw-r--r--security/selinux/ss/context.h2
-rw-r--r--security/selinux/ss/policydb.c6
-rw-r--r--security/selinux/ss/policydb.h2
-rw-r--r--security/selinux/ss/services.c40
-rw-r--r--security/smack/smack.h1
-rw-r--r--security/smack/smack_lsm.c63
-rw-r--r--security/tomoyo/domain.c2
-rw-r--r--sound/pci/hda/patch_realtek.c2
-rw-r--r--sound/soc/codecs/Kconfig2
-rw-r--r--sound/soc/intel/boards/sof_sdw.c2
-rw-r--r--tools/arch/x86/include/asm/nops.h16
-rw-r--r--tools/arch/x86/kcpuid/.gitignore1
-rw-r--r--tools/arch/x86/kcpuid/kcpuid.c7
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-map.rst8
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-prog.rst11
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool7
-rw-r--r--tools/bpf/bpftool/common.c9
-rw-r--r--tools/bpf/bpftool/feature.c24
-rw-r--r--tools/bpf/bpftool/iter.c2
-rw-r--r--tools/bpf/bpftool/link.c16
-rw-r--r--tools/bpf/bpftool/main.h2
-rw-r--r--tools/bpf/bpftool/map.c19
-rw-r--r--tools/bpf/bpftool/prog.c53
-rw-r--r--tools/bpf/bpftool/struct_ops.c2
-rw-r--r--tools/bpf/resolve_btfids/Makefile4
-rw-r--r--tools/include/nolibc/Makefile19
-rw-r--r--tools/include/nolibc/arch-aarch64.h39
-rw-r--r--tools/include/nolibc/arch-arm.h74
-rw-r--r--tools/include/nolibc/arch-i386.h48
-rw-r--r--tools/include/nolibc/arch-loongarch.h49
-rw-r--r--tools/include/nolibc/arch-mips.h64
-rw-r--r--tools/include/nolibc/arch-riscv.h51
-rw-r--r--tools/include/nolibc/arch-s390.h15
-rw-r--r--tools/include/nolibc/arch-x86_64.h42
-rw-r--r--tools/include/nolibc/arch.h2
-rw-r--r--tools/include/nolibc/compiler.h25
-rw-r--r--tools/include/nolibc/nolibc.h2
-rw-r--r--tools/include/nolibc/stackprotector.h19
-rw-r--r--tools/include/nolibc/stdint.h24
-rw-r--r--tools/include/nolibc/stdio.h95
-rw-r--r--tools/include/nolibc/stdlib.h18
-rw-r--r--tools/include/nolibc/string.h4
-rw-r--r--tools/include/nolibc/sys.h131
-rw-r--r--tools/include/nolibc/types.h14
-rw-r--r--tools/include/nolibc/unistd.h15
-rw-r--r--tools/include/uapi/asm-generic/socket.h3
-rw-r--r--tools/include/uapi/linux/bpf.h31
-rw-r--r--tools/lib/bpf/bpf.c17
-rw-r--r--tools/lib/bpf/bpf.h18
-rw-r--r--tools/lib/bpf/bpf_helpers.h15
-rw-r--r--tools/lib/bpf/bpf_tracing.h3
-rw-r--r--tools/lib/bpf/btf.c2
-rw-r--r--tools/lib/bpf/btf_dump.c22
-rw-r--r--tools/lib/bpf/gen_loader.c14
-rw-r--r--tools/lib/bpf/libbpf.c154
-rw-r--r--tools/lib/bpf/libbpf.h18
-rw-r--r--tools/lib/bpf/libbpf.map5
-rw-r--r--tools/lib/bpf/libbpf_probes.c2
-rw-r--r--tools/lib/bpf/libbpf_version.h2
-rw-r--r--tools/lib/bpf/usdt.c5
-rw-r--r--tools/lib/subcmd/parse-options.h8
-rw-r--r--tools/lib/subcmd/subcmd-util.h5
-rw-r--r--tools/net/ynl/Makefile19
-rw-r--r--tools/net/ynl/Makefile.deps20
-rw-r--r--tools/net/ynl/generated/Makefile50
-rw-r--r--tools/net/ynl/generated/devlink-user.c721
-rw-r--r--tools/net/ynl/generated/devlink-user.h210
-rw-r--r--tools/net/ynl/generated/ethtool-user.c6353
-rw-r--r--tools/net/ynl/generated/ethtool-user.h5531
-rw-r--r--tools/net/ynl/generated/fou-user.c328
-rw-r--r--tools/net/ynl/generated/fou-user.h337
-rw-r--r--tools/net/ynl/generated/handshake-user.c331
-rw-r--r--tools/net/ynl/generated/handshake-user.h145
-rw-r--r--tools/net/ynl/generated/netdev-user.c200
-rw-r--r--tools/net/ynl/generated/netdev-user.h85
-rw-r--r--tools/net/ynl/lib/Makefile28
-rw-r--r--tools/net/ynl/lib/nlspec.py46
-rw-r--r--tools/net/ynl/lib/ynl.c901
-rw-r--r--tools/net/ynl/lib/ynl.h237
-rw-r--r--tools/net/ynl/lib/ynl.py137
-rw-r--r--tools/net/ynl/samples/.gitignore3
-rw-r--r--tools/net/ynl/samples/Makefile30
-rw-r--r--tools/net/ynl/samples/devlink.c60
-rw-r--r--tools/net/ynl/samples/ethtool.c65
-rw-r--r--tools/net/ynl/samples/netdev.c108
-rwxr-xr-xtools/net/ynl/ynl-gen-c.py745
-rwxr-xr-xtools/net/ynl/ynl-regen.sh6
-rw-r--r--tools/objtool/Documentation/objtool.txt10
-rw-r--r--tools/objtool/arch/powerpc/include/arch/elf.h11
-rw-r--r--tools/objtool/arch/x86/decode.c6
-rw-r--r--tools/objtool/arch/x86/include/arch/elf.h11
-rw-r--r--tools/objtool/arch/x86/special.c14
-rw-r--r--tools/objtool/builtin-check.c5
-rw-r--r--tools/objtool/check.c643
-rw-r--r--tools/objtool/elf.c521
-rw-r--r--tools/objtool/include/objtool/builtin.h1
-rw-r--r--tools/objtool/include/objtool/cfi.h1
-rw-r--r--tools/objtool/include/objtool/elf.h309
-rw-r--r--tools/objtool/include/objtool/warn.h21
-rw-r--r--tools/objtool/noreturns.h46
-rw-r--r--tools/objtool/orc_gen.c8
-rw-r--r--tools/objtool/special.c4
-rw-r--r--tools/perf/arch/x86/include/arch-tests.h1
-rw-r--r--tools/perf/arch/x86/tests/Build1
-rw-r--r--tools/perf/arch/x86/tests/amd-ibs-via-core-pmu.c71
-rw-r--r--tools/perf/arch/x86/tests/arch-tests.c2
-rw-r--r--tools/perf/trace/beauty/include/linux/socket.h1
-rw-r--r--tools/perf/trace/beauty/msg_flags.c6
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-decoder.c2
-rw-r--r--tools/spi/spidev_test.c107
-rw-r--r--tools/testing/kunit/configs/all_tests.config2
-rw-r--r--tools/testing/kunit/configs/arch_uml.config3
-rw-r--r--tools/testing/kunit/kunit_kernel.py6
-rw-r--r--tools/testing/kunit/mypy.ini6
-rwxr-xr-xtools/testing/kunit/run_checks.py2
-rw-r--r--tools/testing/radix-tree/linux/init.h1
-rw-r--r--tools/testing/radix-tree/maple.c164
-rw-r--r--tools/testing/selftests/Makefile22
-rw-r--r--tools/testing/selftests/arm64/abi/hwcap.c22
-rw-r--r--tools/testing/selftests/arm64/abi/ptrace.c32
-rw-r--r--tools/testing/selftests/arm64/signal/.gitignore2
-rw-r--r--tools/testing/selftests/arm64/signal/test_signals_utils.c3
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/tpidr2_restore.c86
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.aarch6483
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.s390x1
-rw-r--r--tools/testing/selftests/bpf/Makefile3
-rw-r--r--tools/testing/selftests/bpf/bench.c15
-rw-r--r--tools/testing/selftests/bpf/bench.h1
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c14
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c10
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c10
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_bpf_loop.c10
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_count.c14
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_local_storage.c12
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_local_storage_create.c8
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c10
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_rename.c15
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_ringbufs.c2
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_strncmp.c11
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_trigger.c21
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh26
-rw-r--r--tools/testing/selftests/bpf/bpf_kfuncs.h6
-rw-r--r--tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c182
-rw-r--r--tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h107
-rw-r--r--tools/testing/selftests/bpf/config4
-rw-r--r--tools/testing/selftests/bpf/network_helpers.c23
-rw-r--r--tools/testing/selftests/bpf/network_helpers.h1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arg_parsing.c68
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c34
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_obj_pinning.c268
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf.c40
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c20
-rw-r--r--tools/testing/selftests/bpf/prog_tests/check_mtu.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cpumask.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/dynptr.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fib_lookup.c61
-rw-r--r--tools/testing/selftests/bpf/prog_tests/global_map_resize.c227
-rw-r--r--tools/testing/selftests/bpf/prog_tests/module_attach.c12
-rw-r--r--tools/testing/selftests/bpf/prog_tests/netcnt.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sock_destroy.c221
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt.c100
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c59
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt_multi.c108
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt_qos_to_cc.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/subprogs_extable.c29
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c53
-rw-r--r--tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verifier.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/vrf_socket_lookup.c312
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_bonding.c121
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_ksym.c4
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_misc.h4
-rw-r--r--tools/testing/selftests/bpf/progs/cb_refs.c4
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c13
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c17
-rw-r--r--tools/testing/selftests/bpf/progs/cpumask_common.h6
-rw-r--r--tools/testing/selftests/bpf/progs/cpumask_success.c64
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_fail.c308
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_success.c337
-rw-r--r--tools/testing/selftests/bpf/progs/iters.c26
-rw-r--r--tools/testing/selftests/bpf/progs/jit_probe_mem.c4
-rw-r--r--tools/testing/selftests/bpf/progs/kfunc_call_destructive.c3
-rw-r--r--tools/testing/selftests/bpf/progs/kfunc_call_fail.c9
-rw-r--r--tools/testing/selftests/bpf/progs/kfunc_call_race.c3
-rw-r--r--tools/testing/selftests/bpf/progs/kfunc_call_test.c17
-rw-r--r--tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c9
-rw-r--r--tools/testing/selftests/bpf/progs/local_kptr_stash.c5
-rw-r--r--tools/testing/selftests/bpf/progs/map_kptr.c5
-rw-r--r--tools/testing/selftests/bpf/progs/map_kptr_fail.c4
-rw-r--r--tools/testing/selftests/bpf/progs/refcounted_kptr.c2
-rw-r--r--tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c4
-rw-r--r--tools/testing/selftests/bpf/progs/sock_destroy_prog.c145
-rw-r--r--tools/testing/selftests/bpf/progs/sock_destroy_prog_fail.c22
-rw-r--r--tools/testing/selftests/bpf/progs/sockopt_inherit.c18
-rw-r--r--tools/testing/selftests/bpf/progs/sockopt_multi.c26
-rw-r--r--tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c10
-rw-r--r--tools/testing/selftests/bpf/progs/sockopt_sk.c25
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func1.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_map_resize.c58
-rw-r--r--tools/testing/selftests/bpf/progs/test_sock_fields.c5
-rw-r--r--tools/testing/selftests/bpf/progs/test_subprogs_extable.c51
-rw-r--r--tools/testing/selftests/bpf/progs/test_task_under_cgroup.c51
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_dynptr.c1
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_scalar_ids.c659
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_spill_fill.c79
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_subprog_precision.c536
-rw-r--r--tools/testing/selftests/bpf/progs/vrf_socket_lookup.c89
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_hw_metadata.c4
-rw-r--r--tools/testing/selftests/bpf/test_progs.c113
-rw-r--r--tools/testing/selftests/bpf/test_progs.h1
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c194
-rwxr-xr-xtools/testing/selftests/bpf/test_xsk.sh10
-rw-r--r--tools/testing/selftests/bpf/testing_helpers.c268
-rw-r--r--tools/testing/selftests/bpf/testing_helpers.h12
-rw-r--r--tools/testing/selftests/bpf/verifier/precise.c143
-rw-r--r--tools/testing/selftests/bpf/veristat.c9
-rw-r--r--tools/testing/selftests/bpf/xdp_hw_metadata.c47
-rw-r--r--tools/testing/selftests/bpf/xdp_metadata.h1
-rw-r--r--tools/testing/selftests/bpf/xsk.h5
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.c771
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.h31
-rw-r--r--tools/testing/selftests/cachestat/.gitignore2
-rw-r--r--tools/testing/selftests/cachestat/Makefile8
-rw-r--r--tools/testing/selftests/cachestat/test_cachestat.c269
-rw-r--r--tools/testing/selftests/cgroup/test_memcontrol.c9
-rw-r--r--tools/testing/selftests/clone3/clone3.c5
-rw-r--r--tools/testing/selftests/cpufreq/config8
-rw-r--r--tools/testing/selftests/damon/config7
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh1
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/egress_vid_classification.sh5
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/extack.sh24
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1d.sh5
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh1
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh3
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/q_in_q_veto.sh8
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh1
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh8
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh2
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum/q_in_vni_veto.sh1
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/vxlan.sh41
-rwxr-xr-xtools/testing/selftests/ftrace/ftracetest2
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_opt_types.tc34
-rw-r--r--tools/testing/selftests/kselftest/runner.sh11
-rw-r--r--tools/testing/selftests/kselftest_harness.h6
-rw-r--r--tools/testing/selftests/kvm/aarch64/get-reg-list.c53
-rw-r--r--tools/testing/selftests/landlock/config9
-rw-r--r--tools/testing/selftests/landlock/config.um1
-rw-r--r--tools/testing/selftests/landlock/fs_test.c387
-rw-r--r--tools/testing/selftests/lib.mk40
-rw-r--r--tools/testing/selftests/media_tests/video_device_test.c111
-rw-r--r--tools/testing/selftests/mm/.gitignore3
-rw-r--r--tools/testing/selftests/mm/Makefile18
-rw-r--r--tools/testing/selftests/mm/cow.c37
-rw-r--r--tools/testing/selftests/mm/gup_longterm.c459
-rw-r--r--tools/testing/selftests/mm/hugepage-shm.c4
-rw-r--r--tools/testing/selftests/mm/hugepage-vmemmap.c4
-rw-r--r--tools/testing/selftests/mm/hugetlb-madvise.c8
-rw-r--r--tools/testing/selftests/mm/khugepaged.c11
-rw-r--r--tools/testing/selftests/mm/madv_populate.c7
-rw-r--r--tools/testing/selftests/mm/map_fixed_noreplace.c4
-rw-r--r--tools/testing/selftests/mm/map_hugetlb.c12
-rw-r--r--tools/testing/selftests/mm/map_populate.c2
-rw-r--r--tools/testing/selftests/mm/migration.c5
-rw-r--r--tools/testing/selftests/mm/mlock-random-test.c1
-rw-r--r--tools/testing/selftests/mm/mlock2-tests.c1
-rw-r--r--tools/testing/selftests/mm/mlock2.h8
-rw-r--r--tools/testing/selftests/mm/mrelease_test.c10
-rw-r--r--tools/testing/selftests/mm/mremap_dontunmap.c4
-rw-r--r--tools/testing/selftests/mm/on-fault-limit.c4
-rw-r--r--tools/testing/selftests/mm/pkey-powerpc.h3
-rw-r--r--tools/testing/selftests/mm/pkey-x86.h20
-rw-r--r--tools/testing/selftests/mm/protection_keys.c13
-rw-r--r--tools/testing/selftests/mm/run_vmtests.sh10
-rw-r--r--tools/testing/selftests/mm/uffd-common.c59
-rw-r--r--tools/testing/selftests/mm/uffd-common.h5
-rw-r--r--tools/testing/selftests/mm/uffd-stress.c10
-rw-r--r--tools/testing/selftests/mm/uffd-unit-tests.c16
-rw-r--r--tools/testing/selftests/mm/vm_util.c86
-rw-r--r--tools/testing/selftests/mm/vm_util.h5
-rw-r--r--tools/testing/selftests/net/.gitignore1
-rw-r--r--tools/testing/selftests/net/Makefile1
-rw-r--r--tools/testing/selftests/net/af_unix/Makefile3
-rw-r--r--tools/testing/selftests/net/af_unix/scm_pidfd.c430
-rwxr-xr-xtools/testing/selftests/net/fcnal-test.sh114
-rw-r--r--tools/testing/selftests/net/forwarding/Makefile2
-rwxr-xr-xtools/testing/selftests/net/forwarding/dual_vxlan_bridge.sh1
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bound.sh1
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh7
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh3
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh4
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh3
-rw-r--r--tools/testing/selftests/net/forwarding/mirror_topo_lib.sh1
-rwxr-xr-xtools/testing/selftests/net/forwarding/pedit_dsfield.sh4
-rwxr-xr-xtools/testing/selftests/net/forwarding/q_in_vni.sh1
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_bridge.sh3
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_bridge_vlan.sh24
-rwxr-xr-xtools/testing/selftests/net/forwarding/skbedit_priority.sh4
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_flower_cfm.sh206
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh350
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh2
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh741
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_sockopt.c120
-rw-r--r--tools/testing/selftests/net/nettest.c46
-rwxr-xr-xtools/testing/selftests/net/rtnetlink.sh1
-rwxr-xr-xtools/testing/selftests/net/test_vxlan_nolocalbypass.sh240
-rw-r--r--tools/testing/selftests/net/tls.c155
-rwxr-xr-xtools/testing/selftests/net/vrf-xfrm-tests.sh32
-rw-r--r--tools/testing/selftests/nolibc/.gitignore1
-rw-r--r--tools/testing/selftests/nolibc/Makefile35
-rw-r--r--tools/testing/selftests/nolibc/nolibc-test.c231
-rw-r--r--tools/testing/selftests/pidfd/pidfd.h1
-rw-r--r--tools/testing/selftests/pidfd/pidfd_fdinfo_test.c1
-rw-r--r--tools/testing/selftests/pidfd/pidfd_test.c3
-rw-r--r--tools/testing/selftests/prctl/set-anon-vma-name-test.c2
-rw-r--r--tools/testing/selftests/ptp/testptp.c29
-rw-r--r--tools/testing/selftests/rcutorture/bin/functions.sh2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/BUSTED-BOOST.boot2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot2
-rwxr-xr-xtools/testing/selftests/run_kselftest.sh7
-rwxr-xr-xtools/testing/selftests/sysctl/sysctl.sh115
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/infra/filter.json25
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json25
-rw-r--r--tools/testing/selftests/user_events/dyn_test.c177
-rw-r--r--tools/testing/selftests/user_events/ftrace_test.c88
-rw-r--r--tools/testing/selftests/user_events/perf_test.c82
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_clock_getres.c4
-rw-r--r--tools/workqueue/wq_monitor.py168
-rw-r--r--virt/kvm/async_pf.c3
-rw-r--r--virt/kvm/kvm_main.c33
3913 files changed, 165352 insertions, 60541 deletions
diff --git a/.gitattributes b/.gitattributes
index c9ba5bfc4036..2325c529e185 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -2,3 +2,4 @@
*.[ch] diff=cpp
*.dts diff=dts
*.dts[io] diff=dts
+*.rs diff=rust
diff --git a/.mailmap b/.mailmap
index 650689d00930..4d71480e193f 100644
--- a/.mailmap
+++ b/.mailmap
@@ -70,6 +70,8 @@ Baolin Wang <baolin.wang@linux.alibaba.com> <baolin.wang@unisoc.com>
Baolin Wang <baolin.wang@linux.alibaba.com> <baolin.wang7@gmail.com>
Bart Van Assche <bvanassche@acm.org> <bart.vanassche@sandisk.com>
Bart Van Assche <bvanassche@acm.org> <bart.vanassche@wdc.com>
+Ben Dooks <ben-linux@fluff.org> <ben.dooks@simtec.co.uk>
+Ben Dooks <ben-linux@fluff.org> <ben.dooks@sifive.com>
Ben Gardner <bgardner@wabtec.com>
Ben M Cahill <ben.m.cahill@intel.com>
Ben Widawsky <bwidawsk@kernel.org> <ben@bwidawsk.net>
@@ -181,6 +183,8 @@ Henrik Rydberg <rydberg@bitmath.org>
Herbert Xu <herbert@gondor.apana.org.au>
Huacai Chen <chenhuacai@kernel.org> <chenhc@lemote.com>
Huacai Chen <chenhuacai@kernel.org> <chenhuacai@loongson.cn>
+J. Bruce Fields <bfields@fieldses.org> <bfields@redhat.com>
+J. Bruce Fields <bfields@fieldses.org> <bfields@citi.umich.edu>
Jacob Shin <Jacob.Shin@amd.com>
Jaegeuk Kim <jaegeuk@kernel.org> <jaegeuk@google.com>
Jaegeuk Kim <jaegeuk@kernel.org> <jaegeuk.kim@samsung.com>
diff --git a/CREDITS b/CREDITS
index de7e4dbbc599..8b4882024635 100644
--- a/CREDITS
+++ b/CREDITS
@@ -383,6 +383,12 @@ E: tomas@nocrew.org
W: http://tomas.nocrew.org/
D: dsp56k device driver
+N: Srivatsa S. Bhat
+E: srivatsa@csail.mit.edu
+D: Maintainer of Generic Paravirt-Ops subsystem
+D: Maintainer of VMware hypervisor interface
+D: Maintainer of VMware virtual PTP clock driver (ptp_vmw)
+
N: Ross Biro
E: ross.biro@gmail.com
D: Original author of the Linux networking code
diff --git a/Documentation/ABI/testing/sysfs-class-led-trigger-netdev b/Documentation/ABI/testing/sysfs-class-led-trigger-netdev
index 646540950e38..78b62a23b14a 100644
--- a/Documentation/ABI/testing/sysfs-class-led-trigger-netdev
+++ b/Documentation/ABI/testing/sysfs-class-led-trigger-netdev
@@ -13,6 +13,11 @@ Description:
Specifies the duration of the LED blink in milliseconds.
Defaults to 50 ms.
+ With hw_control ON, the interval value MUST be set to the
+ default value and cannot be changed.
+ Trying to set any value in this specific mode will return
+ an EINVAL error.
+
What: /sys/class/leds/<led>/link
Date: Dec 2017
KernelVersion: 4.16
@@ -39,6 +44,9 @@ Description:
If set to 1, the LED will blink for the milliseconds specified
in interval to signal transmission.
+ With hw_control ON, the blink interval is controlled by hardware
+ and won't reflect the value set in interval.
+
What: /sys/class/leds/<led>/rx
Date: Dec 2017
KernelVersion: 4.16
@@ -50,3 +58,84 @@ Description:
If set to 1, the LED will blink for the milliseconds specified
in interval to signal reception.
+
+ With hw_control ON, the blink interval is controlled by hardware
+ and won't reflect the value set in interval.
+
+What: /sys/class/leds/<led>/hw_control
+Date: Jun 2023
+KernelVersion: 6.5
+Contact: linux-leds@vger.kernel.org
+Description:
+ Communicate whether the LED trigger modes are driven by hardware
+ or software fallback is used.
+
+ If 0, the LED is using software fallback to blink.
+
+ If 1, the LED is using hardware control to blink and signal the
+ requested modes.
+
+What: /sys/class/leds/<led>/link_10
+Date: Jun 2023
+KernelVersion: 6.5
+Contact: linux-leds@vger.kernel.org
+Description:
+ Signal the link speed state of 10Mbps of the named network device.
+
+ If set to 0 (default), the LED's normal state is off.
+
+ If set to 1, the LED's normal state reflects the link state
+ speed of 10MBps of the named network device.
+ Setting this value also immediately changes the LED state.
+
+What: /sys/class/leds/<led>/link_100
+Date: Jun 2023
+KernelVersion: 6.5
+Contact: linux-leds@vger.kernel.org
+Description:
+ Signal the link speed state of 100Mbps of the named network device.
+
+ If set to 0 (default), the LED's normal state is off.
+
+ If set to 1, the LED's normal state reflects the link state
+ speed of 100Mbps of the named network device.
+ Setting this value also immediately changes the LED state.
+
+What: /sys/class/leds/<led>/link_1000
+Date: Jun 2023
+KernelVersion: 6.5
+Contact: linux-leds@vger.kernel.org
+Description:
+ Signal the link speed state of 1000Mbps of the named network device.
+
+ If set to 0 (default), the LED's normal state is off.
+
+ If set to 1, the LED's normal state reflects the link state
+ speed of 1000Mbps of the named network device.
+ Setting this value also immediately changes the LED state.
+
+What: /sys/class/leds/<led>/half_duplex
+Date: Jun 2023
+KernelVersion: 6.5
+Contact: linux-leds@vger.kernel.org
+Description:
+ Signal the link half duplex state of the named network device.
+
+ If set to 0 (default), the LED's normal state is off.
+
+ If set to 1, the LED's normal state reflects the link half
+ duplex state of the named network device.
+ Setting this value also immediately changes the LED state.
+
+What: /sys/class/leds/<led>/full_duplex
+Date: Jun 2023
+KernelVersion: 6.5
+Contact: linux-leds@vger.kernel.org
+Description:
+ Signal the link full duplex state of the named network device.
+
+ If set to 0 (default), the LED's normal state is off.
+
+ If set to 1, the LED's normal state reflects the link full
+ duplex state of the named network device.
+ Setting this value also immediately changes the LED state.
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index f54867cadb0f..ecd585ca2d50 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -670,7 +670,7 @@ Description: Preferred MTE tag checking mode
"async" Prefer asynchronous mode
================ ==============================================
- See also: Documentation/arm64/memory-tagging-extension.rst
+ See also: Documentation/arch/arm64/memory-tagging-extension.rst
What: /sys/devices/system/cpu/nohz_full
Date: Apr 2015
diff --git a/Documentation/RCU/Design/Requirements/Requirements.rst b/Documentation/RCU/Design/Requirements/Requirements.rst
index 49387d823619..f3b605285a87 100644
--- a/Documentation/RCU/Design/Requirements/Requirements.rst
+++ b/Documentation/RCU/Design/Requirements/Requirements.rst
@@ -2071,41 +2071,7 @@ call.
Because RCU avoids interrupting idle CPUs, it is illegal to execute an
RCU read-side critical section on an idle CPU. (Kernels built with
-``CONFIG_PROVE_RCU=y`` will splat if you try it.) The RCU_NONIDLE()
-macro and ``_rcuidle`` event tracing is provided to work around this
-restriction. In addition, rcu_is_watching() may be used to test
-whether or not it is currently legal to run RCU read-side critical
-sections on this CPU. I learned of the need for diagnostics on the one
-hand and RCU_NONIDLE() on the other while inspecting idle-loop code.
-Steven Rostedt supplied ``_rcuidle`` event tracing, which is used quite
-heavily in the idle loop. However, there are some restrictions on the
-code placed within RCU_NONIDLE():
-
-#. Blocking is prohibited. In practice, this is not a serious
- restriction given that idle tasks are prohibited from blocking to
- begin with.
-#. Although nesting RCU_NONIDLE() is permitted, they cannot nest
- indefinitely deeply. However, given that they can be nested on the
- order of a million deep, even on 32-bit systems, this should not be a
- serious restriction. This nesting limit would probably be reached
- long after the compiler OOMed or the stack overflowed.
-#. Any code path that enters RCU_NONIDLE() must sequence out of that
- same RCU_NONIDLE(). For example, the following is grossly
- illegal:
-
- ::
-
- 1 RCU_NONIDLE({
- 2 do_something();
- 3 goto bad_idea; /* BUG!!! */
- 4 do_something_else();});
- 5 bad_idea:
-
-
- It is just as illegal to transfer control into the middle of
- RCU_NONIDLE()'s argument. Yes, in theory, you could transfer in
- as long as you also transferred out, but in practice you could also
- expect to get sharply worded review comments.
+``CONFIG_PROVE_RCU=y`` will splat if you try it.)
It is similarly socially unacceptable to interrupt an ``nohz_full`` CPU
running in userspace. RCU must therefore track ``nohz_full`` userspace
diff --git a/Documentation/RCU/whatisRCU.rst b/Documentation/RCU/whatisRCU.rst
index 8eddef28d3a1..e488c8e557a9 100644
--- a/Documentation/RCU/whatisRCU.rst
+++ b/Documentation/RCU/whatisRCU.rst
@@ -1117,7 +1117,6 @@ All: lockdep-checked RCU utility APIs::
RCU_LOCKDEP_WARN
rcu_sleep_check
- RCU_NONIDLE
All: Unchecked RCU-protected pointer access::
diff --git a/Documentation/admin-guide/bcache.rst b/Documentation/admin-guide/bcache.rst
index bb5032a99234..6fdb495ac466 100644
--- a/Documentation/admin-guide/bcache.rst
+++ b/Documentation/admin-guide/bcache.rst
@@ -508,9 +508,6 @@ cache_miss_collisions
cache miss, but raced with a write and data was already present (usually 0
since the synchronization for cache misses was rewritten)
-cache_readaheads
- Count of times readahead occurred.
-
Sysfs - cache set
~~~~~~~~~~~~~~~~~
diff --git a/Documentation/admin-guide/cgroup-v1/memory.rst b/Documentation/admin-guide/cgroup-v1/memory.rst
index 47d1d7d932a8..fabaad3fd9c2 100644
--- a/Documentation/admin-guide/cgroup-v1/memory.rst
+++ b/Documentation/admin-guide/cgroup-v1/memory.rst
@@ -297,7 +297,7 @@ Lock order is as follows::
Page lock (PG_locked bit of page->flags)
mm->page_table_lock or split pte_lock
- lock_page_memcg (memcg->move_lock)
+ folio_memcg_lock (memcg->move_lock)
mapping->i_pages lock
lruvec->lru_lock.
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index e592a9364473..4ef890191196 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1580,6 +1580,13 @@ PAGE_SIZE multiple when read back.
Healthy workloads are not expected to reach this limit.
+ memory.swap.peak
+ A read-only single value file which exists on non-root
+ cgroups.
+
+ The max swap usage recorded for the cgroup and its
+ descendants since the creation of the cgroup.
+
memory.swap.max
A read-write single value file which exists on non-root
cgroups. The default is "max".
@@ -2022,31 +2029,33 @@ that attribute:
no-change
Do not modify the I/O priority class.
- none-to-rt
- For requests that do not have an I/O priority class (NONE),
- change the I/O priority class into RT. Do not modify
- the I/O priority class of other requests.
+ promote-to-rt
+ For requests that have a non-RT I/O priority class, change it into RT.
+ Also change the priority level of these requests to 4. Do not modify
+ the I/O priority of requests that have priority class RT.
restrict-to-be
For requests that do not have an I/O priority class or that have I/O
- priority class RT, change it into BE. Do not modify the I/O priority
- class of requests that have priority class IDLE.
+ priority class RT, change it into BE. Also change the priority level
+ of these requests to 0. Do not modify the I/O priority class of
+ requests that have priority class IDLE.
idle
Change the I/O priority class of all requests into IDLE, the lowest
I/O priority class.
+ none-to-rt
+ Deprecated. Just an alias for promote-to-rt.
+
The following numerical values are associated with the I/O priority policies:
-+-------------+---+
-| no-change | 0 |
-+-------------+---+
-| none-to-rt | 1 |
-+-------------+---+
-| rt-to-be | 2 |
-+-------------+---+
-| all-to-idle | 3 |
-+-------------+---+
++----------------+---+
+| no-change | 0 |
++----------------+---+
+| rt-to-be | 2 |
++----------------+---+
+| all-to-idle | 3 |
++----------------+---+
The numerical value that corresponds to each I/O priority class is as follows:
@@ -2062,9 +2071,13 @@ The numerical value that corresponds to each I/O priority class is as follows:
The algorithm to set the I/O priority class for a request is as follows:
-- Translate the I/O priority class policy into a number.
-- Change the request I/O priority class into the maximum of the I/O priority
- class policy number and the numerical I/O priority class.
+- If I/O priority class policy is promote-to-rt, change the request I/O
+ priority class to IOPRIO_CLASS_RT and change the request I/O priority
+ level to 4.
+- If I/O priorityt class is not promote-to-rt, translate the I/O priority
+ class policy into a number, then change the request I/O priority class
+ into the maximum of the I/O priority class policy number and the numerical
+ I/O priority class.
PID
---
@@ -2437,7 +2450,7 @@ Miscellaneous controller provides 3 interface files. If two misc resources (res_
res_b 10
misc.current
- A read-only flat-keyed file shown in the non-root cgroups. It shows
+ A read-only flat-keyed file shown in the all cgroups. It shows
the current usage of the resources in the cgroup and its children.::
$ cat misc.current
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 9e5bab29685f..d172651ed914 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -304,7 +304,7 @@
EL0 is indicated by /sys/devices/system/cpu/aarch32_el0
and hot-unplug operations may be restricted.
- See Documentation/arm64/asymmetric-32bit.rst for more
+ See Documentation/arch/arm64/asymmetric-32bit.rst for more
information.
amd_iommu= [HW,X86-64]
@@ -429,6 +429,9 @@
arm64.nosme [ARM64] Unconditionally disable Scalable Matrix
Extension support
+ arm64.nomops [ARM64] Unconditionally disable Memory Copy and Memory
+ Set instructions support
+
ataflop= [HW,M68k]
atarimouse= [HW,MOUSE] Atari Mouse
@@ -818,20 +821,6 @@
Format:
<first_slot>,<last_slot>,<port>,<enum_bit>[,<debug>]
- cpu0_hotplug [X86] Turn on CPU0 hotplug feature when
- CONFIG_BOOTPARAM_HOTPLUG_CPU0 is off.
- Some features depend on CPU0. Known dependencies are:
- 1. Resume from suspend/hibernate depends on CPU0.
- Suspend/hibernate will fail if CPU0 is offline and you
- need to online CPU0 before suspend/hibernate.
- 2. PIC interrupts also depend on CPU0. CPU0 can't be
- removed if a PIC interrupt is detected.
- It's said poweroff/reboot may depend on CPU0 on some
- machines although I haven't seen such issues so far
- after CPU0 is offline on a few tested machines.
- If the dependencies are under your control, you can
- turn on cpu0_hotplug.
-
cpuidle.off=1 [CPU_IDLE]
disable the cpuidle sub-system
@@ -852,6 +841,12 @@
on every CPU online, such as boot, and resume from suspend.
Default: 10000
+ cpuhp.parallel=
+ [SMP] Enable/disable parallel bringup of secondary CPUs
+ Format: <bool>
+ Default is enabled if CONFIG_HOTPLUG_PARALLEL=y. Otherwise
+ the parameter has no effect.
+
crash_kexec_post_notifiers
Run kdump after running panic-notifiers and dumping
kmsg. This only for the users who doubt kdump always
@@ -2117,6 +2112,16 @@
disable
Do not enable intel_pstate as the default
scaling driver for the supported processors
+ active
+ Use intel_pstate driver to bypass the scaling
+ governors layer of cpufreq and provides it own
+ algorithms for p-state selection. There are two
+ P-state selection algorithms provided by
+ intel_pstate in the active mode: powersave and
+ performance. The way they both operate depends
+ on whether or not the hardware managed P-states
+ (HWP) feature has been enabled in the processor
+ and possibly on the processor model.
passive
Use intel_pstate as a scaling driver, but configure it
to work with generic cpufreq governors (instead of
@@ -2551,12 +2556,13 @@
If the value is 0 (the default), KVM will pick a period based
on the ratio, such that a page is zapped after 1 hour on average.
- kvm-amd.nested= [KVM,AMD] Allow nested virtualization in KVM/SVM.
- Default is 1 (enabled)
+ kvm-amd.nested= [KVM,AMD] Control nested virtualization feature in
+ KVM/SVM. Default is 1 (enabled).
- kvm-amd.npt= [KVM,AMD] Disable nested paging (virtualized MMU)
- for all guests.
- Default is 1 (enabled) if in 64-bit or 32-bit PAE mode.
+ kvm-amd.npt= [KVM,AMD] Control KVM's use of Nested Page Tables,
+ a.k.a. Two-Dimensional Page Tables. Default is 1
+ (enabled). Disable by KVM if hardware lacks support
+ for NPT.
kvm-arm.mode=
[KVM,ARM] Select one of KVM/arm64's modes of operation.
@@ -2602,30 +2608,33 @@
Format: <integer>
Default: 5
- kvm-intel.ept= [KVM,Intel] Disable extended page tables
- (virtualized MMU) support on capable Intel chips.
- Default is 1 (enabled)
+ kvm-intel.ept= [KVM,Intel] Control KVM's use of Extended Page Tables,
+ a.k.a. Two-Dimensional Page Tables. Default is 1
+ (enabled). Disable by KVM if hardware lacks support
+ for EPT.
kvm-intel.emulate_invalid_guest_state=
- [KVM,Intel] Disable emulation of invalid guest state.
- Ignored if kvm-intel.enable_unrestricted_guest=1, as
- guest state is never invalid for unrestricted guests.
- This param doesn't apply to nested guests (L2), as KVM
- never emulates invalid L2 guest state.
- Default is 1 (enabled)
+ [KVM,Intel] Control whether to emulate invalid guest
+ state. Ignored if kvm-intel.enable_unrestricted_guest=1,
+ as guest state is never invalid for unrestricted
+ guests. This param doesn't apply to nested guests (L2),
+ as KVM never emulates invalid L2 guest state.
+ Default is 1 (enabled).
kvm-intel.flexpriority=
- [KVM,Intel] Disable FlexPriority feature (TPR shadow).
- Default is 1 (enabled)
+ [KVM,Intel] Control KVM's use of FlexPriority feature
+ (TPR shadow). Default is 1 (enabled). Disalbe by KVM if
+ hardware lacks support for it.
kvm-intel.nested=
- [KVM,Intel] Enable VMX nesting (nVMX).
- Default is 0 (disabled)
+ [KVM,Intel] Control nested virtualization feature in
+ KVM/VMX. Default is 1 (enabled).
kvm-intel.unrestricted_guest=
- [KVM,Intel] Disable unrestricted guest feature
- (virtualized real and unpaged mode) on capable
- Intel chips. Default is 1 (enabled)
+ [KVM,Intel] Control KVM's use of unrestricted guest
+ feature (virtualized real and unpaged mode). Default
+ is 1 (enabled). Disable by KVM if EPT is disabled or
+ hardware lacks support for it.
kvm-intel.vmentry_l1d_flush=[KVM,Intel] Mitigation for L1 Terminal Fault
CVE-2018-3620.
@@ -2639,9 +2648,10 @@
Default is cond (do L1 cache flush in specific instances)
- kvm-intel.vpid= [KVM,Intel] Disable Virtual Processor Identification
- feature (tagged TLBs) on capable Intel chips.
- Default is 1 (enabled)
+ kvm-intel.vpid= [KVM,Intel] Control KVM's use of Virtual Processor
+ Identification feature (tagged TLBs). Default is 1
+ (enabled). Disable by KVM if hardware lacks support
+ for it.
l1d_flush= [X86,INTEL]
Control mitigation for L1D based snooping vulnerability.
@@ -3423,6 +3433,10 @@
[HW] Make the MicroTouch USB driver use raw coordinates
('y', default) or cooked coordinates ('n')
+ mtrr=debug [X86]
+ Enable printing debug information related to MTRR
+ registers at boot time.
+
mtrr_chunk_size=nn[KMG] [X86]
used for mtrr cleanup. It is largest continuous chunk
that could hold holes aka. UC entries.
@@ -4736,43 +4750,6 @@
the propagation of recent CPU-hotplug changes up
the rcu_node combining tree.
- rcutree.use_softirq= [KNL]
- If set to zero, move all RCU_SOFTIRQ processing to
- per-CPU rcuc kthreads. Defaults to a non-zero
- value, meaning that RCU_SOFTIRQ is used by default.
- Specify rcutree.use_softirq=0 to use rcuc kthreads.
-
- But note that CONFIG_PREEMPT_RT=y kernels disable
- this kernel boot parameter, forcibly setting it
- to zero.
-
- rcutree.rcu_fanout_exact= [KNL]
- Disable autobalancing of the rcu_node combining
- tree. This is used by rcutorture, and might
- possibly be useful for architectures having high
- cache-to-cache transfer latencies.
-
- rcutree.rcu_fanout_leaf= [KNL]
- Change the number of CPUs assigned to each
- leaf rcu_node structure. Useful for very
- large systems, which will choose the value 64,
- and for NUMA systems with large remote-access
- latencies, which will choose a value aligned
- with the appropriate hardware boundaries.
-
- rcutree.rcu_min_cached_objs= [KNL]
- Minimum number of objects which are cached and
- maintained per one CPU. Object size is equal
- to PAGE_SIZE. The cache allows to reduce the
- pressure to page allocator, also it makes the
- whole algorithm to behave better in low memory
- condition.
-
- rcutree.rcu_delay_page_cache_fill_msec= [KNL]
- Set the page-cache refill delay (in milliseconds)
- in response to low-memory conditions. The range
- of permitted values is in the range 0:100000.
-
rcutree.jiffies_till_first_fqs= [KNL]
Set delay from grace-period initialization to
first attempt to force quiescent states.
@@ -4811,21 +4788,6 @@
When RCU_NOCB_CPU is set, also adjust the
priority of NOCB callback kthreads.
- rcutree.rcu_divisor= [KNL]
- Set the shift-right count to use to compute
- the callback-invocation batch limit bl from
- the number of callbacks queued on this CPU.
- The result will be bounded below by the value of
- the rcutree.blimit kernel parameter. Every bl
- callbacks, the softirq handler will exit in
- order to allow the CPU to do other work.
-
- Please note that this callback-invocation batch
- limit applies only to non-offloaded callback
- invocation. Offloaded callbacks are instead
- invoked in the context of an rcuoc kthread, which
- scheduler will preempt as it does any other task.
-
rcutree.nocb_nobypass_lim_per_jiffy= [KNL]
On callback-offloaded (rcu_nocbs) CPUs,
RCU reduces the lock contention that would
@@ -4839,14 +4801,6 @@
the ->nocb_bypass queue. The definition of "too
many" is supplied by this kernel boot parameter.
- rcutree.rcu_nocb_gp_stride= [KNL]
- Set the number of NOCB callback kthreads in
- each group, which defaults to the square root
- of the number of CPUs. Larger numbers reduce
- the wakeup overhead on the global grace-period
- kthread, but increases that same overhead on
- each group's NOCB grace-period kthread.
-
rcutree.qhimark= [KNL]
Set threshold of queued RCU callbacks beyond which
batch limiting is disabled.
@@ -4864,6 +4818,56 @@
on rcutree.qhimark at boot time and to zero to
disable more aggressive help enlistment.
+ rcutree.rcu_delay_page_cache_fill_msec= [KNL]
+ Set the page-cache refill delay (in milliseconds)
+ in response to low-memory conditions. The range
+ of permitted values is in the range 0:100000.
+
+ rcutree.rcu_divisor= [KNL]
+ Set the shift-right count to use to compute
+ the callback-invocation batch limit bl from
+ the number of callbacks queued on this CPU.
+ The result will be bounded below by the value of
+ the rcutree.blimit kernel parameter. Every bl
+ callbacks, the softirq handler will exit in
+ order to allow the CPU to do other work.
+
+ Please note that this callback-invocation batch
+ limit applies only to non-offloaded callback
+ invocation. Offloaded callbacks are instead
+ invoked in the context of an rcuoc kthread, which
+ scheduler will preempt as it does any other task.
+
+ rcutree.rcu_fanout_exact= [KNL]
+ Disable autobalancing of the rcu_node combining
+ tree. This is used by rcutorture, and might
+ possibly be useful for architectures having high
+ cache-to-cache transfer latencies.
+
+ rcutree.rcu_fanout_leaf= [KNL]
+ Change the number of CPUs assigned to each
+ leaf rcu_node structure. Useful for very
+ large systems, which will choose the value 64,
+ and for NUMA systems with large remote-access
+ latencies, which will choose a value aligned
+ with the appropriate hardware boundaries.
+
+ rcutree.rcu_min_cached_objs= [KNL]
+ Minimum number of objects which are cached and
+ maintained per one CPU. Object size is equal
+ to PAGE_SIZE. The cache allows to reduce the
+ pressure to page allocator, also it makes the
+ whole algorithm to behave better in low memory
+ condition.
+
+ rcutree.rcu_nocb_gp_stride= [KNL]
+ Set the number of NOCB callback kthreads in
+ each group, which defaults to the square root
+ of the number of CPUs. Larger numbers reduce
+ the wakeup overhead on the global grace-period
+ kthread, but increases that same overhead on
+ each group's NOCB grace-period kthread.
+
rcutree.rcu_kick_kthreads= [KNL]
Cause the grace-period kthread to get an extra
wake_up() if it sleeps three times longer than
@@ -4871,6 +4875,13 @@
This wake_up() will be accompanied by a
WARN_ONCE() splat and an ftrace_dump().
+ rcutree.rcu_resched_ns= [KNL]
+ Limit the time spend invoking a batch of RCU
+ callbacks to the specified number of nanoseconds.
+ By default, this limit is checked only once
+ every 32 callbacks in order to limit the pain
+ inflicted by local_clock() overhead.
+
rcutree.rcu_unlock_delay= [KNL]
In CONFIG_RCU_STRICT_GRACE_PERIOD=y kernels,
this specifies an rcu_read_unlock()-time delay
@@ -4885,6 +4896,16 @@
rcu_node tree with an eye towards determining
why a new grace period has not yet started.
+ rcutree.use_softirq= [KNL]
+ If set to zero, move all RCU_SOFTIRQ processing to
+ per-CPU rcuc kthreads. Defaults to a non-zero
+ value, meaning that RCU_SOFTIRQ is used by default.
+ Specify rcutree.use_softirq=0 to use rcuc kthreads.
+
+ But note that CONFIG_PREEMPT_RT=y kernels disable
+ this kernel boot parameter, forcibly setting it
+ to zero.
+
rcuscale.gp_async= [KNL]
Measure performance of asynchronous
grace-period primitives such as call_rcu().
@@ -5087,8 +5108,17 @@
rcutorture.stall_cpu_block= [KNL]
Sleep while stalling if set. This will result
- in warnings from preemptible RCU in addition
- to any other stall-related activity.
+ in warnings from preemptible RCU in addition to
+ any other stall-related activity. Note that
+ in kernels built with CONFIG_PREEMPTION=n and
+ CONFIG_PREEMPT_COUNT=y, this parameter will
+ cause the CPU to pass through a quiescent state.
+ Given CONFIG_PREEMPTION=n, this will suppress
+ RCU CPU stall warnings, but will instead result
+ in scheduling-while-atomic splats.
+
+ Use of this module parameter results in splats.
+
rcutorture.stall_cpu_holdoff= [KNL]
Time to wait (s) after boot before inducing stall.
@@ -5452,7 +5482,12 @@
port and the regular usb controller gets disabled.
root= [KNL] Root filesystem
- See name_to_dev_t comment in init/do_mounts.c.
+ Usually this a a block device specifier of some kind,
+ see the early_lookup_bdev comment in
+ block/early-lookup.c for details.
+ Alternatively this can be "ram" for the legacy initial
+ ramdisk, "nfs" and "cifs" for root on a network file
+ system, or "mtd" and "ubi" for mounting from raw flash.
rootdelay= [KNL] Delay (in seconds) to pause before attempting to
mount the root filesystem
@@ -6563,6 +6598,12 @@
unknown_nmi_panic
[X86] Cause panic on unknown NMI.
+ unwind_debug [X86-64]
+ Enable unwinder debug output. This can be
+ useful for debugging certain unwinder error
+ conditions, including corrupt stacks and
+ bad/missing unwinder metadata.
+
usbcore.authorized_default=
[USB] Default USB device authorization:
(default -1 = authorized except for wireless USB,
@@ -6931,6 +6972,18 @@
it can be updated at runtime by writing to the
corresponding sysfs file.
+ workqueue.cpu_intensive_thresh_us=
+ Per-cpu work items which run for longer than this
+ threshold are automatically considered CPU intensive
+ and excluded from concurrency management to prevent
+ them from noticeably delaying other per-cpu work
+ items. Default is 10000 (10ms).
+
+ If CONFIG_WQ_CPU_INTENSIVE_REPORT is set, the kernel
+ will report the work functions which violate this
+ threshold repeatedly. They are likely good
+ candidates for using WQ_UNBOUND workqueues instead.
+
workqueue.disable_numa
By default, all work items queued to unbound
workqueues are affine to the NUMA nodes they're
diff --git a/Documentation/admin-guide/mm/damon/start.rst b/Documentation/admin-guide/mm/damon/start.rst
index 9f88afc734da..7aa0071ff1c3 100644
--- a/Documentation/admin-guide/mm/damon/start.rst
+++ b/Documentation/admin-guide/mm/damon/start.rst
@@ -119,9 +119,9 @@ set size has chronologically changed.::
Data Access Pattern Aware Memory Management
===========================================
-Below three commands make every memory region of size >=4K that doesn't
-accessed for >=60 seconds in your workload to be swapped out. ::
+Below command makes every memory region of size >=4K that has not accessed for
+>=60 seconds in your workload to be swapped out. ::
- $ echo "#min-size max-size min-acc max-acc min-age max-age action" > test_scheme
- $ echo "4K max 0 0 60s max pageout" >> test_scheme
- $ damo schemes -c test_scheme <pid of your workload>
+ $ sudo damo schemes --damos_access_rate 0 0 --damos_sz_region 4K max \
+ --damos_age 60s max --damos_action pageout \
+ <pid of your workload>
diff --git a/Documentation/admin-guide/mm/damon/usage.rst b/Documentation/admin-guide/mm/damon/usage.rst
index 9b823fec974d..2d495fa85a0e 100644
--- a/Documentation/admin-guide/mm/damon/usage.rst
+++ b/Documentation/admin-guide/mm/damon/usage.rst
@@ -10,9 +10,8 @@ DAMON provides below interfaces for different users.
`This <https://github.com/awslabs/damo>`_ is for privileged people such as
system administrators who want a just-working human-friendly interface.
Using this, users can use the DAMON’s major features in a human-friendly way.
- It may not be highly tuned for special cases, though. It supports both
- virtual and physical address spaces monitoring. For more detail, please
- refer to its `usage document
+ It may not be highly tuned for special cases, though. For more detail,
+ please refer to its `usage document
<https://github.com/awslabs/damo/blob/next/USAGE.md>`_.
- *sysfs interface.*
:ref:`This <sysfs_interface>` is for privileged user space programmers who
@@ -20,11 +19,7 @@ DAMON provides below interfaces for different users.
features by reading from and writing to special sysfs files. Therefore,
you can write and use your personalized DAMON sysfs wrapper programs that
reads/writes the sysfs files instead of you. The `DAMON user space tool
- <https://github.com/awslabs/damo>`_ is one example of such programs. It
- supports both virtual and physical address spaces monitoring. Note that this
- interface provides only simple :ref:`statistics <damos_stats>` for the
- monitoring results. For detailed monitoring results, DAMON provides a
- :ref:`tracepoint <tracepoint>`.
+ <https://github.com/awslabs/damo>`_ is one example of such programs.
- *debugfs interface. (DEPRECATED!)*
:ref:`This <debugfs_interface>` is almost identical to :ref:`sysfs interface
<sysfs_interface>`. This is deprecated, so users should move to the
@@ -139,7 +134,7 @@ scheme of the kdamond. Writing ``clear_schemes_tried_regions`` to ``state``
file clears the DAMON-based operating scheme action tried regions directory for
each DAMON-based operation scheme of the kdamond. For details of the
DAMON-based operation scheme action tried regions directory, please refer to
-:ref:tried_regions section <sysfs_schemes_tried_regions>`.
+:ref:`tried_regions section <sysfs_schemes_tried_regions>`.
If the state is ``on``, reading ``pid`` shows the pid of the kdamond thread.
@@ -259,12 +254,9 @@ be equal or smaller than ``start`` of directory ``N+1``.
contexts/<N>/schemes/
---------------------
-For usual DAMON-based data access aware memory management optimizations, users
-would normally want the system to apply a memory management action to a memory
-region of a specific access pattern. DAMON receives such formalized operation
-schemes from the user and applies those to the target memory regions. Users
-can get and set the schemes by reading from and writing to files under this
-directory.
+The directory for DAMON-based Operation Schemes (:ref:`DAMOS
+<damon_design_damos>`). Users can get and set the schemes by reading from and
+writing to files under this directory.
In the beginning, this directory has only one file, ``nr_schemes``. Writing a
number (``N``) to the file creates the number of child directories named ``0``
@@ -277,12 +269,12 @@ In each scheme directory, five directories (``access_pattern``, ``quotas``,
``watermarks``, ``filters``, ``stats``, and ``tried_regions``) and one file
(``action``) exist.
-The ``action`` file is for setting and getting what action you want to apply to
-memory regions having specific access pattern of the interest. The keywords
-that can be written to and read from the file and their meaning are as below.
+The ``action`` file is for setting and getting the scheme's :ref:`action
+<damon_design_damos_action>`. The keywords that can be written to and read
+from the file and their meaning are as below.
Note that support of each action depends on the running DAMON operations set
-`implementation <sysfs_contexts>`.
+:ref:`implementation <sysfs_contexts>`.
- ``willneed``: Call ``madvise()`` for the region with ``MADV_WILLNEED``.
Supported by ``vaddr`` and ``fvaddr`` operations set.
@@ -304,32 +296,21 @@ Note that support of each action depends on the running DAMON operations set
schemes/<N>/access_pattern/
---------------------------
-The target access pattern of each DAMON-based operation scheme is constructed
-with three ranges including the size of the region in bytes, number of
-monitored accesses per aggregate interval, and number of aggregated intervals
-for the age of the region.
+The directory for the target access :ref:`pattern
+<damon_design_damos_access_pattern>` of the given DAMON-based operation scheme.
Under the ``access_pattern`` directory, three directories (``sz``,
``nr_accesses``, and ``age``) each having two files (``min`` and ``max``)
exist. You can set and get the access pattern for the given scheme by writing
to and reading from the ``min`` and ``max`` files under ``sz``,
-``nr_accesses``, and ``age`` directories, respectively.
+``nr_accesses``, and ``age`` directories, respectively. Note that the ``min``
+and the ``max`` form a closed interval.
schemes/<N>/quotas/
-------------------
-Optimal ``target access pattern`` for each ``action`` is workload dependent, so
-not easy to find. Worse yet, setting a scheme of some action too aggressive
-can cause severe overhead. To avoid such overhead, users can limit time and
-size quota for each scheme. In detail, users can ask DAMON to try to use only
-up to specific time (``time quota``) for applying the action, and to apply the
-action to only up to specific amount (``size quota``) of memory regions having
-the target access pattern within a given time interval (``reset interval``).
-
-When the quota limit is expected to be exceeded, DAMON prioritizes found memory
-regions of the ``target access pattern`` based on their size, access frequency,
-and age. For personalized prioritization, users can set the weights for the
-three properties.
+The directory for the :ref:`quotas <damon_design_damos_quotas>` of the given
+DAMON-based operation scheme.
Under ``quotas`` directory, three files (``ms``, ``bytes``,
``reset_interval_ms``) and one directory (``weights``) having three files
@@ -337,23 +318,26 @@ Under ``quotas`` directory, three files (``ms``, ``bytes``,
You can set the ``time quota`` in milliseconds, ``size quota`` in bytes, and
``reset interval`` in milliseconds by writing the values to the three files,
-respectively. You can also set the prioritization weights for size, access
-frequency, and age in per-thousand unit by writing the values to the three
-files under the ``weights`` directory.
+respectively. Then, DAMON tries to use only up to ``time quota`` milliseconds
+for applying the ``action`` to memory regions of the ``access_pattern``, and to
+apply the action to only up to ``bytes`` bytes of memory regions within the
+``reset_interval_ms``. Setting both ``ms`` and ``bytes`` zero disables the
+quota limits.
+
+You can also set the :ref:`prioritization weights
+<damon_design_damos_quotas_prioritization>` for size, access frequency, and age
+in per-thousand unit by writing the values to the three files under the
+``weights`` directory.
schemes/<N>/watermarks/
-----------------------
-To allow easy activation and deactivation of each scheme based on system
-status, DAMON provides a feature called watermarks. The feature receives five
-values called ``metric``, ``interval``, ``high``, ``mid``, and ``low``. The
-``metric`` is the system metric such as free memory ratio that can be measured.
-If the metric value of the system is higher than the value in ``high`` or lower
-than ``low`` at the memoent, the scheme is deactivated. If the value is lower
-than ``mid``, the scheme is activated.
+The directory for the :ref:`watermarks <damon_design_damos_watermarks>` of the
+given DAMON-based operation scheme.
Under the watermarks directory, five files (``metric``, ``interval_us``,
-``high``, ``mid``, and ``low``) for setting each value exist. You can set and
+``high``, ``mid``, and ``low``) for setting the metric, the time interval
+between check of the metric, and the three watermarks exist. You can set and
get the five values by writing to the files, respectively.
Keywords and meanings of those that can be written to the ``metric`` file are
@@ -367,12 +351,8 @@ The ``interval`` should written in microseconds unit.
schemes/<N>/filters/
--------------------
-Users could know something more than the kernel for specific types of memory.
-In the case, users could do their own management for the memory and hence
-doesn't want DAMOS bothers that. Users could limit DAMOS by setting the access
-pattern of the scheme and/or the monitoring regions for the purpose, but that
-can be inefficient in some cases. In such cases, users could set non-access
-pattern driven filters using files in this directory.
+The directory for the :ref:`filters <damon_design_damos_filters>` of the given
+DAMON-based operation scheme.
In the beginning, this directory has only one file, ``nr_filters``. Writing a
number (``N``) to the file creates the number of child directories named ``0``
@@ -432,13 +412,17 @@ starting from ``0`` under this directory. Each directory contains files
exposing detailed information about each of the memory region that the
corresponding scheme's ``action`` has tried to be applied under this directory,
during next :ref:`aggregation interval <sysfs_monitoring_attrs>`. The
-information includes address range, ``nr_accesses``, , and ``age`` of the
-region.
+information includes address range, ``nr_accesses``, and ``age`` of the region.
The directories will be removed when another special keyword,
``clear_schemes_tried_regions``, is written to the relevant
``kdamonds/<N>/state`` file.
+The expected usage of this directory is investigations of schemes' behaviors,
+and query-like efficient data access monitoring results retrievals. For the
+latter use case, in particular, users can set the ``action`` as ``stat`` and
+set the ``access pattern`` as their interested pattern that they want to query.
+
tried_regions/<N>/
------------------
@@ -600,15 +584,10 @@ update.
Schemes
-------
-For usual DAMON-based data access aware memory management optimizations, users
-would simply want the system to apply a memory management action to a memory
-region of a specific access pattern. DAMON receives such formalized operation
-schemes from the user and applies those to the target processes.
-
-Users can get and set the schemes by reading from and writing to ``schemes``
-debugfs file. Reading the file also shows the statistics of each scheme. To
-the file, each of the schemes should be represented in each line in below
-form::
+Users can get and set the DAMON-based operation :ref:`schemes
+<damon_design_damos>` by reading from and writing to ``schemes`` debugfs file.
+Reading the file also shows the statistics of each scheme. To the file, each
+of the schemes should be represented in each line in below form::
<target access pattern> <action> <quota> <watermarks>
@@ -617,8 +596,9 @@ You can disable schemes by simply writing an empty string to the file.
Target Access Pattern
~~~~~~~~~~~~~~~~~~~~~
-The ``<target access pattern>`` is constructed with three ranges in below
-form::
+The target access :ref:`pattern <damon_design_damos_access_pattern>` of the
+scheme. The ``<target access pattern>`` is constructed with three ranges in
+below form::
min-size max-size min-acc max-acc min-age max-age
@@ -631,9 +611,9 @@ closed interval.
Action
~~~~~~
-The ``<action>`` is a predefined integer for memory management actions, which
-DAMON will apply to the regions having the target access pattern. The
-supported numbers and their meanings are as below.
+The ``<action>`` is a predefined integer for memory management :ref:`actions
+<damon_design_damos_action>`. The supported numbers and their meanings are as
+below.
- 0: Call ``madvise()`` for the region with ``MADV_WILLNEED``. Ignored if
``target`` is ``paddr``.
@@ -649,10 +629,8 @@ supported numbers and their meanings are as below.
Quota
~~~~~
-Optimal ``target access pattern`` for each ``action`` is workload dependent, so
-not easy to find. Worse yet, setting a scheme of some action too aggressive
-can cause severe overhead. To avoid such overhead, users can limit time and
-size quota for the scheme via the ``<quota>`` in below form::
+Users can set the :ref:`quotas <damon_design_damos_quotas>` of the given scheme
+via the ``<quota>`` in below form::
<ms> <sz> <reset interval> <priority weights>
@@ -662,19 +640,17 @@ the action to memory regions of the ``target access pattern`` within the
``<sz>`` bytes of memory regions within the ``<reset interval>``. Setting both
``<ms>`` and ``<sz>`` zero disables the quota limits.
-When the quota limit is expected to be exceeded, DAMON prioritizes found memory
-regions of the ``target access pattern`` based on their size, access frequency,
-and age. For personalized prioritization, users can set the weights for the
-three properties in ``<priority weights>`` in below form::
+For the :ref:`prioritization <damon_design_damos_quotas_prioritization>`, users
+can set the weights for the three properties in ``<priority weights>`` in below
+form::
<size weight> <access frequency weight> <age weight>
Watermarks
~~~~~~~~~~
-Some schemes would need to run based on current value of the system's specific
-metrics like free memory ratio. For such cases, users can specify watermarks
-for the condition.::
+Users can specify :ref:`watermarks <damon_design_damos_watermarks>` of the
+given scheme via ``<watermarks>`` in below form::
<metric> <check interval> <high mark> <middle mark> <low mark>
@@ -797,10 +773,12 @@ root directory only.
Tracepoint for Monitoring Results
=================================
-DAMON provides the monitoring results via a tracepoint,
-``damon:damon_aggregated``. While the monitoring is turned on, you could
-record the tracepoint events and show results using tracepoint supporting tools
-like ``perf``. For example::
+Users can get the monitoring results via the :ref:`tried_regions
+<sysfs_schemes_tried_regions>` or a tracepoint, ``damon:damon_aggregated``.
+While the tried regions directory is useful for getting a snapshot, the
+tracepoint is useful for getting a full record of the results. While the
+monitoring is turned on, you could record the tracepoint events and show
+results using tracepoint supporting tools like ``perf``. For example::
# echo on > monitor_on
# perf record -e damon:damon_aggregated &
diff --git a/Documentation/admin-guide/perf/hisi-pmu.rst b/Documentation/admin-guide/perf/hisi-pmu.rst
index 546979360513..e0174d20809a 100644
--- a/Documentation/admin-guide/perf/hisi-pmu.rst
+++ b/Documentation/admin-guide/perf/hisi-pmu.rst
@@ -56,14 +56,14 @@ Example usage of perf::
For HiSilicon uncore PMU v2 whose identifier is 0x30, the topology is the same
as PMU v1, but some new functions are added to the hardware.
-(a) L3C PMU supports filtering by core/thread within the cluster which can be
+1. L3C PMU supports filtering by core/thread within the cluster which can be
specified as a bitmap::
$# perf stat -a -e hisi_sccl3_l3c0/config=0x02,tt_core=0x3/ sleep 5
This will only count the operations from core/thread 0 and 1 in this cluster.
-(b) Tracetag allow the user to chose to count only read, write or atomic
+2. Tracetag allow the user to chose to count only read, write or atomic
operations via the tt_req parameeter in perf. The default value counts all
operations. tt_req is 3bits, 3'b100 represents read operations, 3'b101
represents write operations, 3'b110 represents atomic store operations and
@@ -73,14 +73,16 @@ represents write operations, 3'b110 represents atomic store operations and
This will only count the read operations in this cluster.
-(c) Datasrc allows the user to check where the data comes from. It is 5 bits.
+3. Datasrc allows the user to check where the data comes from. It is 5 bits.
Some important codes are as follows:
-5'b00001: comes from L3C in this die;
-5'b01000: comes from L3C in the cross-die;
-5'b01001: comes from L3C which is in another socket;
-5'b01110: comes from the local DDR;
-5'b01111: comes from the cross-die DDR;
-5'b10000: comes from cross-socket DDR;
+
+- 5'b00001: comes from L3C in this die;
+- 5'b01000: comes from L3C in the cross-die;
+- 5'b01001: comes from L3C which is in another socket;
+- 5'b01110: comes from the local DDR;
+- 5'b01111: comes from the cross-die DDR;
+- 5'b10000: comes from cross-socket DDR;
+
etc, it is mainly helpful to find that the data source is nearest from the CPU
cores. If datasrc_cfg is used in the multi-chips, the datasrc_skt shall be
configured in perf command::
@@ -88,15 +90,25 @@ configured in perf command::
$# perf stat -a -e hisi_sccl3_l3c0/config=0xb9,datasrc_cfg=0xE/,
hisi_sccl3_l3c0/config=0xb9,datasrc_cfg=0xF/ sleep 5
-(d)Some HiSilicon SoCs encapsulate multiple CPU and IO dies. Each CPU die
+4. Some HiSilicon SoCs encapsulate multiple CPU and IO dies. Each CPU die
contains several Compute Clusters (CCLs). The I/O dies are called Super I/O
clusters (SICL) containing multiple I/O clusters (ICLs). Each CCL/ICL in the
SoC has a unique ID. Each ID is 11bits, include a 6-bit SCCL-ID and 5-bit
CCL/ICL-ID. For I/O die, the ICL-ID is followed by:
-5'b00000: I/O_MGMT_ICL;
-5'b00001: Network_ICL;
-5'b00011: HAC_ICL;
-5'b10000: PCIe_ICL;
+
+- 5'b00000: I/O_MGMT_ICL;
+- 5'b00001: Network_ICL;
+- 5'b00011: HAC_ICL;
+- 5'b10000: PCIe_ICL;
+
+5. uring_channel: UC PMU events 0x47~0x59 supports filtering by tx request
+uring channel. It is 2 bits. Some important codes are as follows:
+
+- 2'b11: count the events which sent to the uring_ext (MATA) channel;
+- 2'b01: is the same as 2'b11;
+- 2'b10: count the events which sent to the uring (non-MATA) channel;
+- 2'b00: default value, count the events which sent to the both uring and
+ uring_ext channel;
Users could configure IDs to count data come from specific CCL/ICL, by setting
srcid_cmd & srcid_msk, and data desitined for specific CCL/ICL by setting
diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index d85d90f5d000..3800fab1619b 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -949,7 +949,7 @@ user space can read performance monitor counter registers directly.
The default value is 0 (access disabled).
-See Documentation/arm64/perf.rst for more information.
+See Documentation/arch/arm64/perf.rst for more information.
pid_max
diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst
index 466c560b0c30..4877563241f3 100644
--- a/Documentation/admin-guide/sysctl/net.rst
+++ b/Documentation/admin-guide/sysctl/net.rst
@@ -386,8 +386,8 @@ Default : 0 (for compatibility reasons)
txrehash
--------
-Controls default hash rethink behaviour on listening socket when SO_TXREHASH
-option is set to SOCK_TXREHASH_DEFAULT (i. e. not overridden by setsockopt).
+Controls default hash rethink behaviour on socket when SO_TXREHASH option is set
+to SOCK_TXREHASH_DEFAULT (i. e. not overridden by setsockopt).
If set to 1 (default), hash rethink is performed on listening socket.
If set to 0, hash rethink is not performed.
diff --git a/Documentation/arm/arm.rst b/Documentation/arch/arm/arm.rst
index 99d660fdf73f..99d660fdf73f 100644
--- a/Documentation/arm/arm.rst
+++ b/Documentation/arch/arm/arm.rst
diff --git a/Documentation/arm/booting.rst b/Documentation/arch/arm/booting.rst
index 5974e37b3d20..5974e37b3d20 100644
--- a/Documentation/arm/booting.rst
+++ b/Documentation/arch/arm/booting.rst
diff --git a/Documentation/arm/cluster-pm-race-avoidance.rst b/Documentation/arch/arm/cluster-pm-race-avoidance.rst
index aa58603d3f28..aa58603d3f28 100644
--- a/Documentation/arm/cluster-pm-race-avoidance.rst
+++ b/Documentation/arch/arm/cluster-pm-race-avoidance.rst
diff --git a/Documentation/arm/features.rst b/Documentation/arch/arm/features.rst
index 7414ec03dd15..7414ec03dd15 100644
--- a/Documentation/arm/features.rst
+++ b/Documentation/arch/arm/features.rst
diff --git a/Documentation/arm/firmware.rst b/Documentation/arch/arm/firmware.rst
index efd844baec1d..efd844baec1d 100644
--- a/Documentation/arm/firmware.rst
+++ b/Documentation/arch/arm/firmware.rst
diff --git a/Documentation/arm/google/chromebook-boot-flow.rst b/Documentation/arch/arm/google/chromebook-boot-flow.rst
index 36da77684bba..36da77684bba 100644
--- a/Documentation/arm/google/chromebook-boot-flow.rst
+++ b/Documentation/arch/arm/google/chromebook-boot-flow.rst
diff --git a/Documentation/arm/index.rst b/Documentation/arch/arm/index.rst
index fd43502ae924..fd43502ae924 100644
--- a/Documentation/arm/index.rst
+++ b/Documentation/arch/arm/index.rst
diff --git a/Documentation/arm/interrupts.rst b/Documentation/arch/arm/interrupts.rst
index 2ae70e0e9732..2ae70e0e9732 100644
--- a/Documentation/arm/interrupts.rst
+++ b/Documentation/arch/arm/interrupts.rst
diff --git a/Documentation/arm/ixp4xx.rst b/Documentation/arch/arm/ixp4xx.rst
index a57235616294..a57235616294 100644
--- a/Documentation/arm/ixp4xx.rst
+++ b/Documentation/arch/arm/ixp4xx.rst
diff --git a/Documentation/arm/kernel_mode_neon.rst b/Documentation/arch/arm/kernel_mode_neon.rst
index 9bfb71a2a9b9..9bfb71a2a9b9 100644
--- a/Documentation/arm/kernel_mode_neon.rst
+++ b/Documentation/arch/arm/kernel_mode_neon.rst
diff --git a/Documentation/arm/kernel_user_helpers.rst b/Documentation/arch/arm/kernel_user_helpers.rst
index eb6f3d916622..eb6f3d916622 100644
--- a/Documentation/arm/kernel_user_helpers.rst
+++ b/Documentation/arch/arm/kernel_user_helpers.rst
diff --git a/Documentation/arm/keystone/knav-qmss.rst b/Documentation/arch/arm/keystone/knav-qmss.rst
index 7f7638d80b42..7f7638d80b42 100644
--- a/Documentation/arm/keystone/knav-qmss.rst
+++ b/Documentation/arch/arm/keystone/knav-qmss.rst
diff --git a/Documentation/arm/keystone/overview.rst b/Documentation/arch/arm/keystone/overview.rst
index cd90298c493c..cd90298c493c 100644
--- a/Documentation/arm/keystone/overview.rst
+++ b/Documentation/arch/arm/keystone/overview.rst
diff --git a/Documentation/arm/marvell.rst b/Documentation/arch/arm/marvell.rst
index 3d369a566038..3d369a566038 100644
--- a/Documentation/arm/marvell.rst
+++ b/Documentation/arch/arm/marvell.rst
diff --git a/Documentation/arm/mem_alignment.rst b/Documentation/arch/arm/mem_alignment.rst
index aa22893b62bc..aa22893b62bc 100644
--- a/Documentation/arm/mem_alignment.rst
+++ b/Documentation/arch/arm/mem_alignment.rst
diff --git a/Documentation/arm/memory.rst b/Documentation/arch/arm/memory.rst
index 0cb1e2938823..0cb1e2938823 100644
--- a/Documentation/arm/memory.rst
+++ b/Documentation/arch/arm/memory.rst
diff --git a/Documentation/arm/microchip.rst b/Documentation/arch/arm/microchip.rst
index e721d855f2c9..e721d855f2c9 100644
--- a/Documentation/arm/microchip.rst
+++ b/Documentation/arch/arm/microchip.rst
diff --git a/Documentation/arm/netwinder.rst b/Documentation/arch/arm/netwinder.rst
index 8eab66caa2ac..8eab66caa2ac 100644
--- a/Documentation/arm/netwinder.rst
+++ b/Documentation/arch/arm/netwinder.rst
diff --git a/Documentation/arm/nwfpe/index.rst b/Documentation/arch/arm/nwfpe/index.rst
index 3c4d2f9aa10e..3c4d2f9aa10e 100644
--- a/Documentation/arm/nwfpe/index.rst
+++ b/Documentation/arch/arm/nwfpe/index.rst
diff --git a/Documentation/arm/nwfpe/netwinder-fpe.rst b/Documentation/arch/arm/nwfpe/netwinder-fpe.rst
index cbb320960fc4..cbb320960fc4 100644
--- a/Documentation/arm/nwfpe/netwinder-fpe.rst
+++ b/Documentation/arch/arm/nwfpe/netwinder-fpe.rst
diff --git a/Documentation/arm/nwfpe/notes.rst b/Documentation/arch/arm/nwfpe/notes.rst
index 102e55af8439..102e55af8439 100644
--- a/Documentation/arm/nwfpe/notes.rst
+++ b/Documentation/arch/arm/nwfpe/notes.rst
diff --git a/Documentation/arm/nwfpe/nwfpe.rst b/Documentation/arch/arm/nwfpe/nwfpe.rst
index 35cd90dacbff..35cd90dacbff 100644
--- a/Documentation/arm/nwfpe/nwfpe.rst
+++ b/Documentation/arch/arm/nwfpe/nwfpe.rst
diff --git a/Documentation/arm/nwfpe/todo.rst b/Documentation/arch/arm/nwfpe/todo.rst
index 393f11b14540..393f11b14540 100644
--- a/Documentation/arm/nwfpe/todo.rst
+++ b/Documentation/arch/arm/nwfpe/todo.rst
diff --git a/Documentation/arm/omap/dss.rst b/Documentation/arch/arm/omap/dss.rst
index a40c4d9c717a..a40c4d9c717a 100644
--- a/Documentation/arm/omap/dss.rst
+++ b/Documentation/arch/arm/omap/dss.rst
diff --git a/Documentation/arm/omap/index.rst b/Documentation/arch/arm/omap/index.rst
index 8b365b212e49..8b365b212e49 100644
--- a/Documentation/arm/omap/index.rst
+++ b/Documentation/arch/arm/omap/index.rst
diff --git a/Documentation/arm/omap/omap.rst b/Documentation/arch/arm/omap/omap.rst
index f440c0f4613f..f440c0f4613f 100644
--- a/Documentation/arm/omap/omap.rst
+++ b/Documentation/arch/arm/omap/omap.rst
diff --git a/Documentation/arm/omap/omap_pm.rst b/Documentation/arch/arm/omap/omap_pm.rst
index a335e4c8ce2c..a335e4c8ce2c 100644
--- a/Documentation/arm/omap/omap_pm.rst
+++ b/Documentation/arch/arm/omap/omap_pm.rst
diff --git a/Documentation/arm/porting.rst b/Documentation/arch/arm/porting.rst
index bd21958bdb2d..bd21958bdb2d 100644
--- a/Documentation/arm/porting.rst
+++ b/Documentation/arch/arm/porting.rst
diff --git a/Documentation/arm/pxa/mfp.rst b/Documentation/arch/arm/pxa/mfp.rst
index ac34e5d7ee44..ac34e5d7ee44 100644
--- a/Documentation/arm/pxa/mfp.rst
+++ b/Documentation/arch/arm/pxa/mfp.rst
diff --git a/Documentation/arm/sa1100/assabet.rst b/Documentation/arch/arm/sa1100/assabet.rst
index a761e128fb08..a761e128fb08 100644
--- a/Documentation/arm/sa1100/assabet.rst
+++ b/Documentation/arch/arm/sa1100/assabet.rst
diff --git a/Documentation/arm/sa1100/cerf.rst b/Documentation/arch/arm/sa1100/cerf.rst
index 7fa71b609bf9..7fa71b609bf9 100644
--- a/Documentation/arm/sa1100/cerf.rst
+++ b/Documentation/arch/arm/sa1100/cerf.rst
diff --git a/Documentation/arm/sa1100/index.rst b/Documentation/arch/arm/sa1100/index.rst
index c9aed43280ff..c9aed43280ff 100644
--- a/Documentation/arm/sa1100/index.rst
+++ b/Documentation/arch/arm/sa1100/index.rst
diff --git a/Documentation/arm/sa1100/lart.rst b/Documentation/arch/arm/sa1100/lart.rst
index 94c0568d1095..94c0568d1095 100644
--- a/Documentation/arm/sa1100/lart.rst
+++ b/Documentation/arch/arm/sa1100/lart.rst
diff --git a/Documentation/arm/sa1100/serial_uart.rst b/Documentation/arch/arm/sa1100/serial_uart.rst
index ea983642b9be..ea983642b9be 100644
--- a/Documentation/arm/sa1100/serial_uart.rst
+++ b/Documentation/arch/arm/sa1100/serial_uart.rst
diff --git a/Documentation/arm/samsung/bootloader-interface.rst b/Documentation/arch/arm/samsung/bootloader-interface.rst
index a56f325dae78..a56f325dae78 100644
--- a/Documentation/arm/samsung/bootloader-interface.rst
+++ b/Documentation/arch/arm/samsung/bootloader-interface.rst
diff --git a/Documentation/arm/samsung/clksrc-change-registers.awk b/Documentation/arch/arm/samsung/clksrc-change-registers.awk
index 7be1b8aa7cd9..7be1b8aa7cd9 100755
--- a/Documentation/arm/samsung/clksrc-change-registers.awk
+++ b/Documentation/arch/arm/samsung/clksrc-change-registers.awk
diff --git a/Documentation/arm/samsung/gpio.rst b/Documentation/arch/arm/samsung/gpio.rst
index 27fae0d50361..27fae0d50361 100644
--- a/Documentation/arm/samsung/gpio.rst
+++ b/Documentation/arch/arm/samsung/gpio.rst
diff --git a/Documentation/arm/samsung/index.rst b/Documentation/arch/arm/samsung/index.rst
index 8142cce3d23e..8142cce3d23e 100644
--- a/Documentation/arm/samsung/index.rst
+++ b/Documentation/arch/arm/samsung/index.rst
diff --git a/Documentation/arm/samsung/overview.rst b/Documentation/arch/arm/samsung/overview.rst
index 8b15a190169b..8b15a190169b 100644
--- a/Documentation/arm/samsung/overview.rst
+++ b/Documentation/arch/arm/samsung/overview.rst
diff --git a/Documentation/arm/setup.rst b/Documentation/arch/arm/setup.rst
index 8e12ef3fb9a7..8e12ef3fb9a7 100644
--- a/Documentation/arm/setup.rst
+++ b/Documentation/arch/arm/setup.rst
diff --git a/Documentation/arm/spear/overview.rst b/Documentation/arch/arm/spear/overview.rst
index 1a77f6b213b6..1a77f6b213b6 100644
--- a/Documentation/arm/spear/overview.rst
+++ b/Documentation/arch/arm/spear/overview.rst
diff --git a/Documentation/arm/sti/overview.rst b/Documentation/arch/arm/sti/overview.rst
index ae16aced800f..ae16aced800f 100644
--- a/Documentation/arm/sti/overview.rst
+++ b/Documentation/arch/arm/sti/overview.rst
diff --git a/Documentation/arm/sti/stih407-overview.rst b/Documentation/arch/arm/sti/stih407-overview.rst
index 027e75bc7b7c..027e75bc7b7c 100644
--- a/Documentation/arm/sti/stih407-overview.rst
+++ b/Documentation/arch/arm/sti/stih407-overview.rst
diff --git a/Documentation/arm/sti/stih418-overview.rst b/Documentation/arch/arm/sti/stih418-overview.rst
index b563c1f4fe5a..b563c1f4fe5a 100644
--- a/Documentation/arm/sti/stih418-overview.rst
+++ b/Documentation/arch/arm/sti/stih418-overview.rst
diff --git a/Documentation/arm/stm32/overview.rst b/Documentation/arch/arm/stm32/overview.rst
index 85cfc8410798..85cfc8410798 100644
--- a/Documentation/arm/stm32/overview.rst
+++ b/Documentation/arch/arm/stm32/overview.rst
diff --git a/Documentation/arm/stm32/stm32-dma-mdma-chaining.rst b/Documentation/arch/arm/stm32/stm32-dma-mdma-chaining.rst
index 2945e0e33104..2945e0e33104 100644
--- a/Documentation/arm/stm32/stm32-dma-mdma-chaining.rst
+++ b/Documentation/arch/arm/stm32/stm32-dma-mdma-chaining.rst
diff --git a/Documentation/arm/stm32/stm32f429-overview.rst b/Documentation/arch/arm/stm32/stm32f429-overview.rst
index a7ebe8ea6697..a7ebe8ea6697 100644
--- a/Documentation/arm/stm32/stm32f429-overview.rst
+++ b/Documentation/arch/arm/stm32/stm32f429-overview.rst
diff --git a/Documentation/arm/stm32/stm32f746-overview.rst b/Documentation/arch/arm/stm32/stm32f746-overview.rst
index 78befddc7740..78befddc7740 100644
--- a/Documentation/arm/stm32/stm32f746-overview.rst
+++ b/Documentation/arch/arm/stm32/stm32f746-overview.rst
diff --git a/Documentation/arm/stm32/stm32f769-overview.rst b/Documentation/arch/arm/stm32/stm32f769-overview.rst
index e482980ddf21..e482980ddf21 100644
--- a/Documentation/arm/stm32/stm32f769-overview.rst
+++ b/Documentation/arch/arm/stm32/stm32f769-overview.rst
diff --git a/Documentation/arm/stm32/stm32h743-overview.rst b/Documentation/arch/arm/stm32/stm32h743-overview.rst
index 4e15f1a42730..4e15f1a42730 100644
--- a/Documentation/arm/stm32/stm32h743-overview.rst
+++ b/Documentation/arch/arm/stm32/stm32h743-overview.rst
diff --git a/Documentation/arm/stm32/stm32h750-overview.rst b/Documentation/arch/arm/stm32/stm32h750-overview.rst
index 0e51235c9547..0e51235c9547 100644
--- a/Documentation/arm/stm32/stm32h750-overview.rst
+++ b/Documentation/arch/arm/stm32/stm32h750-overview.rst
diff --git a/Documentation/arm/stm32/stm32mp13-overview.rst b/Documentation/arch/arm/stm32/stm32mp13-overview.rst
index 3bb9492dad49..3bb9492dad49 100644
--- a/Documentation/arm/stm32/stm32mp13-overview.rst
+++ b/Documentation/arch/arm/stm32/stm32mp13-overview.rst
diff --git a/Documentation/arm/stm32/stm32mp151-overview.rst b/Documentation/arch/arm/stm32/stm32mp151-overview.rst
index f42a2ac309c0..f42a2ac309c0 100644
--- a/Documentation/arm/stm32/stm32mp151-overview.rst
+++ b/Documentation/arch/arm/stm32/stm32mp151-overview.rst
diff --git a/Documentation/arm/stm32/stm32mp157-overview.rst b/Documentation/arch/arm/stm32/stm32mp157-overview.rst
index f62fdc8e7d8d..f62fdc8e7d8d 100644
--- a/Documentation/arm/stm32/stm32mp157-overview.rst
+++ b/Documentation/arch/arm/stm32/stm32mp157-overview.rst
diff --git a/Documentation/arm/sunxi.rst b/Documentation/arch/arm/sunxi.rst
index b85d1e2f2d47..b85d1e2f2d47 100644
--- a/Documentation/arm/sunxi.rst
+++ b/Documentation/arch/arm/sunxi.rst
diff --git a/Documentation/arm/sunxi/clocks.rst b/Documentation/arch/arm/sunxi/clocks.rst
index 23bd03f3e21f..23bd03f3e21f 100644
--- a/Documentation/arm/sunxi/clocks.rst
+++ b/Documentation/arch/arm/sunxi/clocks.rst
diff --git a/Documentation/arm/swp_emulation.rst b/Documentation/arch/arm/swp_emulation.rst
index 6a608a9c3715..6a608a9c3715 100644
--- a/Documentation/arm/swp_emulation.rst
+++ b/Documentation/arch/arm/swp_emulation.rst
diff --git a/Documentation/arm/tcm.rst b/Documentation/arch/arm/tcm.rst
index 1dc6c39220f9..1dc6c39220f9 100644
--- a/Documentation/arm/tcm.rst
+++ b/Documentation/arch/arm/tcm.rst
diff --git a/Documentation/arm/uefi.rst b/Documentation/arch/arm/uefi.rst
index baebe688a006..baebe688a006 100644
--- a/Documentation/arm/uefi.rst
+++ b/Documentation/arch/arm/uefi.rst
diff --git a/Documentation/arm/vfp/release-notes.rst b/Documentation/arch/arm/vfp/release-notes.rst
index c6b04937cee3..c6b04937cee3 100644
--- a/Documentation/arm/vfp/release-notes.rst
+++ b/Documentation/arch/arm/vfp/release-notes.rst
diff --git a/Documentation/arm/vlocks.rst b/Documentation/arch/arm/vlocks.rst
index a40a1742110b..a40a1742110b 100644
--- a/Documentation/arm/vlocks.rst
+++ b/Documentation/arch/arm/vlocks.rst
diff --git a/Documentation/arm64/acpi_object_usage.rst b/Documentation/arch/arm64/acpi_object_usage.rst
index 484ef9676653..1da22200fdf8 100644
--- a/Documentation/arm64/acpi_object_usage.rst
+++ b/Documentation/arch/arm64/acpi_object_usage.rst
@@ -17,16 +17,37 @@ For ACPI on arm64, tables also fall into the following categories:
- Recommended: BERT, EINJ, ERST, HEST, PCCT, SSDT
- - Optional: BGRT, CPEP, CSRT, DBG2, DRTM, ECDT, FACS, FPDT, IBFT,
- IORT, MCHI, MPST, MSCT, NFIT, PMTT, RASF, SBST, SLIT, SPMI, SRAT,
- STAO, TCPA, TPM2, UEFI, XENV
+ - Optional: AGDI, BGRT, CEDT, CPEP, CSRT, DBG2, DRTM, ECDT, FACS, FPDT,
+ HMAT, IBFT, IORT, MCHI, MPAM, MPST, MSCT, NFIT, PMTT, PPTT, RASF, SBST,
+ SDEI, SLIT, SPMI, SRAT, STAO, TCPA, TPM2, UEFI, XENV
- - Not supported: BOOT, DBGP, DMAR, ETDT, HPET, IVRS, LPIT, MSDM, OEMx,
- PSDT, RSDT, SLIC, WAET, WDAT, WDRT, WPBT
+ - Not supported: AEST, APMT, BOOT, DBGP, DMAR, ETDT, HPET, IVRS, LPIT,
+ MSDM, OEMx, PDTT, PSDT, RAS2, RSDT, SLIC, WAET, WDAT, WDRT, WPBT
====== ========================================================================
Table Usage for ARMv8 Linux
====== ========================================================================
+AEST Signature Reserved (signature == "AEST")
+
+ **Arm Error Source Table**
+
+ This table informs the OS of any error nodes in the system that are
+ compliant with the Arm RAS architecture.
+
+AGDI Signature Reserved (signature == "AGDI")
+
+ **Arm Generic diagnostic Dump and Reset Device Interface Table**
+
+ This table describes a non-maskable event, that is used by the platform
+ firmware, to request the OS to generate a diagnostic dump and reset the device.
+
+APMT Signature Reserved (signature == "APMT")
+
+ **Arm Performance Monitoring Table**
+
+ This table describes the properties of PMU support implmented by
+ components in the system.
+
BERT Section 18.3 (signature == "BERT")
**Boot Error Record Table**
@@ -47,6 +68,13 @@ BGRT Section 5.2.22 (signature == "BGRT")
Optional, not currently supported, with no real use-case for an
ARM server.
+CEDT Signature Reserved (signature == "CEDT")
+
+ **CXL Early Discovery Table**
+
+ This table allows the OS to discover any CXL Host Bridges and the Host
+ Bridge registers.
+
CPEP Section 5.2.18 (signature == "CPEP")
**Corrected Platform Error Polling table**
@@ -184,6 +212,15 @@ HEST Section 18.3.2 (signature == "HEST")
Must be supplied if RAS support is provided by the platform. It
is recommended this table be supplied.
+HMAT Section 5.2.28 (signature == "HMAT")
+
+ **Heterogeneous Memory Attribute Table**
+
+ This table describes the memory attributes, such as memory side cache
+ attributes and bandwidth and latency details, related to Memory Proximity
+ Domains. The OS uses this information to optimize the system memory
+ configuration.
+
HPET Signature Reserved (signature == "HPET")
**High Precision Event timer Table**
@@ -241,6 +278,13 @@ MCHI Signature Reserved (signature == "MCHI")
Optional, not currently supported.
+MPAM Signature Reserved (signature == "MPAM")
+
+ **Memory Partitioning And Monitoring table**
+
+ This table allows the OS to discover the MPAM controls implemented by
+ the subsystems.
+
MPST Section 5.2.21 (signature == "MPST")
**Memory Power State Table**
@@ -281,18 +325,39 @@ PCCT Section 14.1 (signature == "PCCT)
Recommend for use on arm64; use of PCC is recommended when using CPPC
to control performance and power for platform processors.
+PDTT Section 5.2.29 (signature == "PDTT")
+
+ **Platform Debug Trigger Table**
+
+ This table describes PCC channels used to gather debug logs of
+ non-architectural features.
+
+
PMTT Section 5.2.21.12 (signature == "PMTT")
**Platform Memory Topology Table**
Optional, not currently supported.
+PPTT Section 5.2.30 (signature == "PPTT")
+
+ **Processor Properties Topology Table**
+
+ This table provides the processor and cache topology.
+
PSDT Section 5.2.11.3 (signature == "PSDT")
**Persistent System Description Table**
Obsolete table, will not be supported.
+RAS2 Section 5.2.21 (signature == "RAS2")
+
+ **RAS Features 2 table**
+
+ This table provides interfaces for the RAS capabilities implemented in
+ the platform.
+
RASF Section 5.2.20 (signature == "RASF")
**RAS Feature table**
@@ -318,6 +383,12 @@ SBST Section 5.2.14 (signature == "SBST")
Optional, not currently supported.
+SDEI Signature Reserved (signature == "SDEI")
+
+ **Software Delegated Exception Interface table**
+
+ This table advertises the presence of the SDEI interface.
+
SLIC Signature Reserved (signature == "SLIC")
**Software LIcensing table**
diff --git a/Documentation/arm64/amu.rst b/Documentation/arch/arm64/amu.rst
index 01f2de2b0450..01f2de2b0450 100644
--- a/Documentation/arm64/amu.rst
+++ b/Documentation/arch/arm64/amu.rst
diff --git a/Documentation/arm64/arm-acpi.rst b/Documentation/arch/arm64/arm-acpi.rst
index 47ecb9930dde..94274a8d84cf 100644
--- a/Documentation/arm64/arm-acpi.rst
+++ b/Documentation/arch/arm64/arm-acpi.rst
@@ -1,40 +1,41 @@
-=====================
-ACPI on ARMv8 Servers
-=====================
-
-ACPI can be used for ARMv8 general purpose servers designed to follow
-the ARM SBSA (Server Base System Architecture) [0] and SBBR (Server
-Base Boot Requirements) [1] specifications. Please note that the SBBR
-can be retrieved simply by visiting [1], but the SBSA is currently only
-available to those with an ARM login due to ARM IP licensing concerns.
-
-The ARMv8 kernel implements the reduced hardware model of ACPI version
+===================
+ACPI on Arm systems
+===================
+
+ACPI can be used for Armv8 and Armv9 systems designed to follow
+the BSA (Arm Base System Architecture) [0] and BBR (Arm
+Base Boot Requirements) [1] specifications. Both BSA and BBR are publicly
+accessible documents.
+Arm Servers, in addition to being BSA compliant, comply with a set
+of rules defined in SBSA (Server Base System Architecture) [2].
+
+The Arm kernel implements the reduced hardware model of ACPI version
5.1 or later. Links to the specification and all external documents
it refers to are managed by the UEFI Forum. The specification is
available at http://www.uefi.org/specifications and documents referenced
by the specification can be found via http://www.uefi.org/acpi.
-If an ARMv8 system does not meet the requirements of the SBSA and SBBR,
+If an Arm system does not meet the requirements of the BSA and BBR,
or cannot be described using the mechanisms defined in the required ACPI
specifications, then ACPI may not be a good fit for the hardware.
While the documents mentioned above set out the requirements for building
-industry-standard ARMv8 servers, they also apply to more than one operating
+industry-standard Arm systems, they also apply to more than one operating
system. The purpose of this document is to describe the interaction between
-ACPI and Linux only, on an ARMv8 system -- that is, what Linux expects of
+ACPI and Linux only, on an Arm system -- that is, what Linux expects of
ACPI and what ACPI can expect of Linux.
-Why ACPI on ARM?
+Why ACPI on Arm?
----------------
Before examining the details of the interface between ACPI and Linux, it is
useful to understand why ACPI is being used. Several technologies already
exist in Linux for describing non-enumerable hardware, after all. In this
-section we summarize a blog post [2] from Grant Likely that outlines the
-reasoning behind ACPI on ARMv8 servers. Actually, we snitch a good portion
+section we summarize a blog post [3] from Grant Likely that outlines the
+reasoning behind ACPI on Arm systems. Actually, we snitch a good portion
of the summary text almost directly, to be honest.
-The short form of the rationale for ACPI on ARM is:
+The short form of the rationale for ACPI on Arm is:
- ACPI’s byte code (AML) allows the platform to encode hardware behavior,
while DT explicitly does not support this. For hardware vendors, being
@@ -47,7 +48,7 @@ The short form of the rationale for ACPI on ARM is:
- In the enterprise server environment, ACPI has established bindings (such
as for RAS) which are currently used in production systems. DT does not.
- Such bindings could be defined in DT at some point, but doing so means ARM
+ Such bindings could be defined in DT at some point, but doing so means Arm
and x86 would end up using completely different code paths in both firmware
and the kernel.
@@ -108,7 +109,7 @@ recent version of the kernel.
Relationship with Device Tree
-----------------------------
-ACPI support in drivers and subsystems for ARMv8 should never be mutually
+ACPI support in drivers and subsystems for Arm should never be mutually
exclusive with DT support at compile time.
At boot time the kernel will only use one description method depending on
@@ -121,11 +122,11 @@ time).
Booting using ACPI tables
-------------------------
-The only defined method for passing ACPI tables to the kernel on ARMv8
+The only defined method for passing ACPI tables to the kernel on Arm
is via the UEFI system configuration table. Just so it is explicit, this
means that ACPI is only supported on platforms that boot via UEFI.
-When an ARMv8 system boots, it can either have DT information, ACPI tables,
+When an Arm system boots, it can either have DT information, ACPI tables,
or in some very unusual cases, both. If no command line parameters are used,
the kernel will try to use DT for device enumeration; if there is no DT
present, the kernel will try to use ACPI tables, but only if they are present.
@@ -169,7 +170,7 @@ hardware reduced mode must be set to zero.
For the ACPI core to operate properly, and in turn provide the information
the kernel needs to configure devices, it expects to find the following
-tables (all section numbers refer to the ACPI 6.1 specification):
+tables (all section numbers refer to the ACPI 6.5 specification):
- RSDP (Root System Description Pointer), section 5.2.5
@@ -184,20 +185,76 @@ tables (all section numbers refer to the ACPI 6.1 specification):
- GTDT (Generic Timer Description Table), section 5.2.24
+ - PPTT (Processor Properties Topology Table), section 5.2.30
+
+ - DBG2 (DeBuG port table 2), section 5.2.6, specifically Table 5-6.
+
+ - APMT (Arm Performance Monitoring unit Table), section 5.2.6, specifically Table 5-6.
+
+ - AGDI (Arm Generic diagnostic Dump and Reset Device Interface Table), section 5.2.6, specifically Table 5-6.
+
- If PCI is supported, the MCFG (Memory mapped ConFiGuration
- Table), section 5.2.6, specifically Table 5-31.
+ Table), section 5.2.6, specifically Table 5-6.
- If booting without a console=<device> kernel parameter is
supported, the SPCR (Serial Port Console Redirection table),
- section 5.2.6, specifically Table 5-31.
+ section 5.2.6, specifically Table 5-6.
- If necessary to describe the I/O topology, SMMUs and GIC ITSs,
the IORT (Input Output Remapping Table, section 5.2.6, specifically
- Table 5-31).
+ Table 5-6).
+
+ - If NUMA is supported, the following tables are required:
+
+ - SRAT (System Resource Affinity Table), section 5.2.16
+
+ - SLIT (System Locality distance Information Table), section 5.2.17
+
+ - If NUMA is supported, and the system contains heterogeneous memory,
+ the HMAT (Heterogeneous Memory Attribute Table), section 5.2.28.
+
+ - If the ACPI Platform Error Interfaces are required, the following
+ tables are conditionally required:
+
+ - BERT (Boot Error Record Table, section 18.3.1)
+
+ - EINJ (Error INJection table, section 18.6.1)
+
+ - ERST (Error Record Serialization Table, section 18.5)
+
+ - HEST (Hardware Error Source Table, section 18.3.2)
+
+ - SDEI (Software Delegated Exception Interface table, section 5.2.6,
+ specifically Table 5-6)
+
+ - AEST (Arm Error Source Table, section 5.2.6,
+ specifically Table 5-6)
+
+ - RAS2 (ACPI RAS2 feature table, section 5.2.21)
+
+ - If the system contains controllers using PCC channel, the
+ PCCT (Platform Communications Channel Table), section 14.1
+
+ - If the system contains a controller to capture board-level system state,
+ and communicates with the host via PCC, the PDTT (Platform Debug Trigger
+ Table), section 5.2.29.
+
+ - If NVDIMM is supported, the NFIT (NVDIMM Firmware Interface Table), section 5.2.26
+
+ - If video framebuffer is present, the BGRT (Boot Graphics Resource Table), section 5.2.23
+
+ - If IPMI is implemented, the SPMI (Server Platform Management Interface),
+ section 5.2.6, specifically Table 5-6.
+
+ - If the system contains a CXL Host Bridge, the CEDT (CXL Early Discovery
+ Table), section 5.2.6, specifically Table 5-6.
+
+ - If the system supports MPAM, the MPAM (Memory Partitioning And Monitoring table), section 5.2.6,
+ specifically Table 5-6.
+
+ - If the system lacks persistent storage, the IBFT (ISCSI Boot Firmware
+ Table), section 5.2.6, specifically Table 5-6.
- - If NUMA is supported, the SRAT (System Resource Affinity Table)
- and SLIT (System Locality distance Information Table), sections
- 5.2.16 and 5.2.17, respectively.
If the above tables are not all present, the kernel may or may not be
able to boot properly since it may not be able to configure all of the
@@ -269,16 +326,14 @@ Drivers should look for device properties in the _DSD object ONLY; the _DSD
object is described in the ACPI specification section 6.2.5, but this only
describes how to define the structure of an object returned via _DSD, and
how specific data structures are defined by specific UUIDs. Linux should
-only use the _DSD Device Properties UUID [5]:
+only use the _DSD Device Properties UUID [4]:
- UUID: daffd814-6eba-4d8c-8a91-bc9bbf4aa301
- - https://www.uefi.org/sites/default/files/resources/_DSD-device-properties-UUID.pdf
-
-The UEFI Forum provides a mechanism for registering device properties [4]
-so that they may be used across all operating systems supporting ACPI.
-Device properties that have not been registered with the UEFI Forum should
-not be used.
+Common device properties can be registered by creating a pull request to [4] so
+that they may be used across all operating systems supporting ACPI.
+Device properties that have not been registered with the UEFI Forum can be used
+but not as "uefi-" common properties.
Before creating new device properties, check to be sure that they have not
been defined before and either registered in the Linux kernel documentation
@@ -306,7 +361,7 @@ process.
Once registration and review have been completed, the kernel provides an
interface for looking up device properties in a manner independent of
-whether DT or ACPI is being used. This API should be used [6]; it can
+whether DT or ACPI is being used. This API should be used [5]; it can
eliminate some duplication of code paths in driver probing functions and
discourage divergence between DT bindings and ACPI device properties.
@@ -448,15 +503,15 @@ ASWG
----
The ACPI specification changes regularly. During the year 2014, for instance,
version 5.1 was released and version 6.0 substantially completed, with most of
-the changes being driven by ARM-specific requirements. Proposed changes are
+the changes being driven by Arm-specific requirements. Proposed changes are
presented and discussed in the ASWG (ACPI Specification Working Group) which
is a part of the UEFI Forum. The current version of the ACPI specification
-is 6.1 release in January 2016.
+is 6.5 release in August 2022.
Participation in this group is open to all UEFI members. Please see
http://www.uefi.org/workinggroup for details on group membership.
-It is the intent of the ARMv8 ACPI kernel code to follow the ACPI specification
+It is the intent of the Arm ACPI kernel code to follow the ACPI specification
as closely as possible, and to only implement functionality that complies with
the released standards from UEFI ASWG. As a practical matter, there will be
vendors that provide bad ACPI tables or violate the standards in some way.
@@ -470,12 +525,12 @@ likely be willing to assist in submitting ECRs.
Linux Code
----------
-Individual items specific to Linux on ARM, contained in the Linux
+Individual items specific to Linux on Arm, contained in the Linux
source code, are in the list that follows:
ACPI_OS_NAME
This macro defines the string to be returned when
- an ACPI method invokes the _OS method. On ARM64
+ an ACPI method invokes the _OS method. On Arm
systems, this macro will be "Linux" by default.
The command line parameter acpi_os=<string>
can be used to set it to some other value. The
@@ -485,36 +540,28 @@ ACPI_OS_NAME
ACPI Objects
------------
Detailed expectations for ACPI tables and object are listed in the file
-Documentation/arm64/acpi_object_usage.rst.
+Documentation/arch/arm64/acpi_object_usage.rst.
References
----------
-[0] http://silver.arm.com
- document ARM-DEN-0029, or newer:
- "Server Base System Architecture", version 2.3, dated 27 Mar 2014
+[0] https://developer.arm.com/documentation/den0094/latest
+ document Arm-DEN-0094: "Arm Base System Architecture", version 1.0C, dated 6 Oct 2022
+
+[1] https://developer.arm.com/documentation/den0044/latest
+ Document Arm-DEN-0044: "Arm Base Boot Requirements", version 2.0G, dated 15 Apr 2022
-[1] http://infocenter.arm.com/help/topic/com.arm.doc.den0044a/Server_Base_Boot_Requirements.pdf
- Document ARM-DEN-0044A, or newer: "Server Base Boot Requirements, System
- Software on ARM Platforms", dated 16 Aug 2014
+[2] https://developer.arm.com/documentation/den0029/latest
+ Document Arm-DEN-0029: "Arm Server Base System Architecture", version 7.1, dated 06 Oct 2022
-[2] http://www.secretlab.ca/archives/151,
+[3] http://www.secretlab.ca/archives/151,
10 Jan 2015, Copyright (c) 2015,
Linaro Ltd., written by Grant Likely.
-[3] AMD ACPI for Seattle platform documentation
- http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2012/10/Seattle_ACPI_Guide.pdf
-
-
-[4] http://www.uefi.org/acpi
- please see the link for the "ACPI _DSD Device
- Property Registry Instructions"
-
-[5] http://www.uefi.org/acpi
- please see the link for the "_DSD (Device
- Specific Data) Implementation Guide"
+[4] _DSD (Device Specific Data) Implementation Guide
+ https://github.com/UEFI/DSD-Guide/blob/main/dsd-guide.pdf
-[6] Kernel code for the unified device
+[5] Kernel code for the unified device
property interface can be found in
include/linux/property.h and drivers/base/property.c.
diff --git a/Documentation/arm64/asymmetric-32bit.rst b/Documentation/arch/arm64/asymmetric-32bit.rst
index 64a0b505da7d..64a0b505da7d 100644
--- a/Documentation/arm64/asymmetric-32bit.rst
+++ b/Documentation/arch/arm64/asymmetric-32bit.rst
diff --git a/Documentation/arm64/booting.rst b/Documentation/arch/arm64/booting.rst
index ffeccdd6bdac..b57776a68f15 100644
--- a/Documentation/arm64/booting.rst
+++ b/Documentation/arch/arm64/booting.rst
@@ -379,6 +379,38 @@ Before jumping into the kernel, the following conditions must be met:
- SMCR_EL2.EZT0 (bit 30) must be initialised to 0b1.
+ For CPUs with Memory Copy and Memory Set instructions (FEAT_MOPS):
+
+ - If the kernel is entered at EL1 and EL2 is present:
+
+ - HCRX_EL2.MSCEn (bit 11) must be initialised to 0b1.
+
+ For CPUs with the Extended Translation Control Register feature (FEAT_TCR2):
+
+ - If EL3 is present:
+
+ - SCR_EL3.TCR2En (bit 43) must be initialised to 0b1.
+
+ - If the kernel is entered at EL1 and EL2 is present:
+
+ - HCRX_EL2.TCR2En (bit 14) must be initialised to 0b1.
+
+ For CPUs with the Stage 1 Permission Indirection Extension feature (FEAT_S1PIE):
+
+ - If EL3 is present:
+
+ - SCR_EL3.PIEn (bit 45) must be initialised to 0b1.
+
+ - If the kernel is entered at EL1 and EL2 is present:
+
+ - HFGRTR_EL2.nPIR_EL1 (bit 58) must be initialised to 0b1.
+
+ - HFGWTR_EL2.nPIR_EL1 (bit 58) must be initialised to 0b1.
+
+ - HFGRTR_EL2.nPIRE0_EL1 (bit 57) must be initialised to 0b1.
+
+ - HFGRWR_EL2.nPIRE0_EL1 (bit 57) must be initialised to 0b1.
+
The requirements described above for CPU mode, caches, MMUs, architected
timers, coherency and system registers apply to all CPUs. All CPUs must
enter the kernel in the same exception level. Where the values documented
diff --git a/Documentation/arm64/cpu-feature-registers.rst b/Documentation/arch/arm64/cpu-feature-registers.rst
index c7adc7897df6..4e4625f2455f 100644
--- a/Documentation/arm64/cpu-feature-registers.rst
+++ b/Documentation/arch/arm64/cpu-feature-registers.rst
@@ -288,6 +288,8 @@ infrastructure:
+------------------------------+---------+---------+
| Name | bits | visible |
+------------------------------+---------+---------+
+ | MOPS | [19-16] | y |
+ +------------------------------+---------+---------+
| RPRES | [7-4] | y |
+------------------------------+---------+---------+
| WFXT | [3-0] | y |
diff --git a/Documentation/arm64/elf_hwcaps.rst b/Documentation/arch/arm64/elf_hwcaps.rst
index 83e57e4d38e2..8c8addb4194c 100644
--- a/Documentation/arm64/elf_hwcaps.rst
+++ b/Documentation/arch/arm64/elf_hwcaps.rst
@@ -102,7 +102,7 @@ HWCAP_ASIMDHP
HWCAP_CPUID
EL0 access to certain ID registers is available, to the extent
- described by Documentation/arm64/cpu-feature-registers.rst.
+ described by Documentation/arch/arm64/cpu-feature-registers.rst.
These ID registers may imply the availability of features.
@@ -163,12 +163,12 @@ HWCAP_SB
HWCAP_PACA
Functionality implied by ID_AA64ISAR1_EL1.APA == 0b0001 or
ID_AA64ISAR1_EL1.API == 0b0001, as described by
- Documentation/arm64/pointer-authentication.rst.
+ Documentation/arch/arm64/pointer-authentication.rst.
HWCAP_PACG
Functionality implied by ID_AA64ISAR1_EL1.GPA == 0b0001 or
ID_AA64ISAR1_EL1.GPI == 0b0001, as described by
- Documentation/arm64/pointer-authentication.rst.
+ Documentation/arch/arm64/pointer-authentication.rst.
HWCAP2_DCPODP
Functionality implied by ID_AA64ISAR1_EL1.DPB == 0b0010.
@@ -226,7 +226,7 @@ HWCAP2_BTI
HWCAP2_MTE
Functionality implied by ID_AA64PFR1_EL1.MTE == 0b0010, as described
- by Documentation/arm64/memory-tagging-extension.rst.
+ by Documentation/arch/arm64/memory-tagging-extension.rst.
HWCAP2_ECV
Functionality implied by ID_AA64MMFR0_EL1.ECV == 0b0001.
@@ -239,11 +239,11 @@ HWCAP2_RPRES
HWCAP2_MTE3
Functionality implied by ID_AA64PFR1_EL1.MTE == 0b0011, as described
- by Documentation/arm64/memory-tagging-extension.rst.
+ by Documentation/arch/arm64/memory-tagging-extension.rst.
HWCAP2_SME
Functionality implied by ID_AA64PFR1_EL1.SME == 0b0001, as described
- by Documentation/arm64/sme.rst.
+ by Documentation/arch/arm64/sme.rst.
HWCAP2_SME_I16I64
Functionality implied by ID_AA64SMFR0_EL1.I16I64 == 0b1111.
@@ -302,6 +302,9 @@ HWCAP2_SMEB16B16
HWCAP2_SMEF16F16
Functionality implied by ID_AA64SMFR0_EL1.F16F16 == 0b1
+HWCAP2_MOPS
+ Functionality implied by ID_AA64ISAR2_EL1.MOPS == 0b0001.
+
4. Unused AT_HWCAP bits
-----------------------
diff --git a/Documentation/arm64/features.rst b/Documentation/arch/arm64/features.rst
index dfa4cb3cd3ef..dfa4cb3cd3ef 100644
--- a/Documentation/arm64/features.rst
+++ b/Documentation/arch/arm64/features.rst
diff --git a/Documentation/arm64/hugetlbpage.rst b/Documentation/arch/arm64/hugetlbpage.rst
index a110124c11e3..a110124c11e3 100644
--- a/Documentation/arm64/hugetlbpage.rst
+++ b/Documentation/arch/arm64/hugetlbpage.rst
diff --git a/Documentation/arm64/index.rst b/Documentation/arch/arm64/index.rst
index ae21f8118830..d08e924204bf 100644
--- a/Documentation/arm64/index.rst
+++ b/Documentation/arch/arm64/index.rst
@@ -15,11 +15,13 @@ ARM64 Architecture
cpu-feature-registers
elf_hwcaps
hugetlbpage
+ kdump
legacy_instructions
memory
memory-tagging-extension
perf
pointer-authentication
+ ptdump
silicon-errata
sme
sve
diff --git a/Documentation/arm64/kasan-offsets.sh b/Documentation/arch/arm64/kasan-offsets.sh
index 2dc5f9e18039..2dc5f9e18039 100644
--- a/Documentation/arm64/kasan-offsets.sh
+++ b/Documentation/arch/arm64/kasan-offsets.sh
diff --git a/Documentation/arch/arm64/kdump.rst b/Documentation/arch/arm64/kdump.rst
new file mode 100644
index 000000000000..56a89f45df28
--- /dev/null
+++ b/Documentation/arch/arm64/kdump.rst
@@ -0,0 +1,92 @@
+=======================================
+crashkernel memory reservation on arm64
+=======================================
+
+Author: Baoquan He <bhe@redhat.com>
+
+Kdump mechanism is used to capture a corrupted kernel vmcore so that
+it can be subsequently analyzed. In order to do this, a preliminarily
+reserved memory is needed to pre-load the kdump kernel and boot such
+kernel if corruption happens.
+
+That reserved memory for kdump is adapted to be able to minimally
+accommodate the kdump kernel and the user space programs needed for the
+vmcore collection.
+
+Kernel parameter
+================
+
+Through the kernel parameters below, memory can be reserved accordingly
+during the early stage of the first kernel booting so that a continuous
+large chunk of memomy can be found. The low memory reservation needs to
+be considered if the crashkernel is reserved from the high memory area.
+
+- crashkernel=size@offset
+- crashkernel=size
+- crashkernel=size,high crashkernel=size,low
+
+Low memory and high memory
+==========================
+
+For kdump reservations, low memory is the memory area under a specific
+limit, usually decided by the accessible address bits of the DMA-capable
+devices needed by the kdump kernel to run. Those devices not related to
+vmcore dumping can be ignored. On arm64, the low memory upper bound is
+not fixed: it is 1G on the RPi4 platform but 4G on most other systems.
+On special kernels built with CONFIG_ZONE_(DMA|DMA32) disabled, the
+whole system RAM is low memory. Outside of the low memory described
+above, the rest of system RAM is considered high memory.
+
+Implementation
+==============
+
+1) crashkernel=size@offset
+--------------------------
+
+The crashkernel memory must be reserved at the user-specified region or
+fail if already occupied.
+
+
+2) crashkernel=size
+-------------------
+
+The crashkernel memory region will be reserved in any available position
+according to the search order:
+
+Firstly, the kernel searches the low memory area for an available region
+with the specified size.
+
+If searching for low memory fails, the kernel falls back to searching
+the high memory area for an available region of the specified size. If
+the reservation in high memory succeeds, a default size reservation in
+the low memory will be done. Currently the default size is 128M,
+sufficient for the low memory needs of the kdump kernel.
+
+Note: crashkernel=size is the recommended option for crashkernel kernel
+reservations. The user would not need to know the system memory layout
+for a specific platform.
+
+3) crashkernel=size,high crashkernel=size,low
+---------------------------------------------
+
+crashkernel=size,(high|low) are an important supplement to
+crashkernel=size. They allows the user to specify how much memory needs
+to be allocated from the high memory and low memory respectively. On
+many systems the low memory is precious and crashkernel reservations
+from this area should be kept to a minimum.
+
+To reserve memory for crashkernel=size,high, searching is first
+attempted from the high memory region. If the reservation succeeds, the
+low memory reservation will be done subsequently.
+
+If reservation from the high memory failed, the kernel falls back to
+searching the low memory with the specified size in crashkernel=,high.
+If it succeeds, no further reservation for low memory is needed.
+
+Notes:
+
+- If crashkernel=,low is not specified, the default low memory
+ reservation will be done automatically.
+
+- if crashkernel=0,low is specified, it means that the low memory
+ reservation is omitted intentionally.
diff --git a/Documentation/arm64/legacy_instructions.rst b/Documentation/arch/arm64/legacy_instructions.rst
index 54401b22cb8f..54401b22cb8f 100644
--- a/Documentation/arm64/legacy_instructions.rst
+++ b/Documentation/arch/arm64/legacy_instructions.rst
diff --git a/Documentation/arm64/memory-tagging-extension.rst b/Documentation/arch/arm64/memory-tagging-extension.rst
index dbae47bba25e..679725030731 100644
--- a/Documentation/arm64/memory-tagging-extension.rst
+++ b/Documentation/arch/arm64/memory-tagging-extension.rst
@@ -221,7 +221,7 @@ programs should not retry in case of a non-zero system call return.
``NT_ARM_TAGGED_ADDR_CTRL`` allow ``ptrace()`` access to the tagged
address ABI control and MTE configuration of a process as per the
``prctl()`` options described in
-Documentation/arm64/tagged-address-abi.rst and above. The corresponding
+Documentation/arch/arm64/tagged-address-abi.rst and above. The corresponding
``regset`` is 1 element of 8 bytes (``sizeof(long))``).
Core dump support
diff --git a/Documentation/arm64/memory.rst b/Documentation/arch/arm64/memory.rst
index 2a641ba7be3b..55a55f30eed8 100644
--- a/Documentation/arm64/memory.rst
+++ b/Documentation/arch/arm64/memory.rst
@@ -33,8 +33,8 @@ AArch64 Linux memory layout with 4KB pages + 4 levels (48-bit)::
0000000000000000 0000ffffffffffff 256TB user
ffff000000000000 ffff7fffffffffff 128TB kernel logical memory map
[ffff600000000000 ffff7fffffffffff] 32TB [kasan shadow region]
- ffff800000000000 ffff800007ffffff 128MB modules
- ffff800008000000 fffffbffefffffff 124TB vmalloc
+ ffff800000000000 ffff80007fffffff 2GB modules
+ ffff800080000000 fffffbffefffffff 124TB vmalloc
fffffbfff0000000 fffffbfffdffffff 224MB fixed mappings (top down)
fffffbfffe000000 fffffbfffe7fffff 8MB [guard region]
fffffbfffe800000 fffffbffff7fffff 16MB PCI I/O space
@@ -50,8 +50,8 @@ AArch64 Linux memory layout with 64KB pages + 3 levels (52-bit with HW support):
0000000000000000 000fffffffffffff 4PB user
fff0000000000000 ffff7fffffffffff ~4PB kernel logical memory map
[fffd800000000000 ffff7fffffffffff] 512TB [kasan shadow region]
- ffff800000000000 ffff800007ffffff 128MB modules
- ffff800008000000 fffffbffefffffff 124TB vmalloc
+ ffff800000000000 ffff80007fffffff 2GB modules
+ ffff800080000000 fffffbffefffffff 124TB vmalloc
fffffbfff0000000 fffffbfffdffffff 224MB fixed mappings (top down)
fffffbfffe000000 fffffbfffe7fffff 8MB [guard region]
fffffbfffe800000 fffffbffff7fffff 16MB PCI I/O space
diff --git a/Documentation/arm64/perf.rst b/Documentation/arch/arm64/perf.rst
index 1f87b57c2332..1f87b57c2332 100644
--- a/Documentation/arm64/perf.rst
+++ b/Documentation/arch/arm64/perf.rst
diff --git a/Documentation/arm64/pointer-authentication.rst b/Documentation/arch/arm64/pointer-authentication.rst
index e5dad2e40aa8..e5dad2e40aa8 100644
--- a/Documentation/arm64/pointer-authentication.rst
+++ b/Documentation/arch/arm64/pointer-authentication.rst
diff --git a/Documentation/arch/arm64/ptdump.rst b/Documentation/arch/arm64/ptdump.rst
new file mode 100644
index 000000000000..5dcfc5d7cddf
--- /dev/null
+++ b/Documentation/arch/arm64/ptdump.rst
@@ -0,0 +1,96 @@
+======================
+Kernel page table dump
+======================
+
+ptdump is a debugfs interface that provides a detailed dump of the
+kernel page tables. It offers a comprehensive overview of the kernel
+virtual memory layout as well as the attributes associated with the
+various regions in a human-readable format. It is useful to dump the
+kernel page tables to verify permissions and memory types. Examining the
+page table entries and permissions helps identify potential security
+vulnerabilities such as mappings with overly permissive access rights or
+improper memory protections.
+
+Memory hotplug allows dynamic expansion or contraction of available
+memory without requiring a system reboot. To maintain the consistency
+and integrity of the memory management data structures, arm64 makes use
+of the ``mem_hotplug_lock`` semaphore in write mode. Additionally, in
+read mode, ``mem_hotplug_lock`` supports an efficient implementation of
+``get_online_mems()`` and ``put_online_mems()``. These protect the
+offlining of memory being accessed by the ptdump code.
+
+In order to dump the kernel page tables, enable the following
+configurations and mount debugfs::
+
+ CONFIG_GENERIC_PTDUMP=y
+ CONFIG_PTDUMP_CORE=y
+ CONFIG_PTDUMP_DEBUGFS=y
+
+ mount -t debugfs nodev /sys/kernel/debug
+ cat /sys/kernel/debug/kernel_page_tables
+
+On analysing the output of ``cat /sys/kernel/debug/kernel_page_tables``
+one can derive information about the virtual address range of the entry,
+followed by size of the memory region covered by this entry, the
+hierarchical structure of the page tables and finally the attributes
+associated with each page. The page attributes provide information about
+access permissions, execution capability, type of mapping such as leaf
+level PTE or block level PGD, PMD and PUD, and access status of a page
+within the kernel memory. Assessing these attributes can assist in
+understanding the memory layout, access patterns and security
+characteristics of the kernel pages.
+
+Kernel virtual memory layout example::
+
+ start address end address size attributes
+ +---------------------------------------------------------------------------------------+
+ | ---[ Linear Mapping start ]---------------------------------------------------------- |
+ | .................. |
+ | 0xfff0000000000000-0xfff0000000210000 2112K PTE RW NX SHD AF UXN MEM/NORMAL-TAGGED |
+ | 0xfff0000000210000-0xfff0000001c00000 26560K PTE ro NX SHD AF UXN MEM/NORMAL |
+ | .................. |
+ | ---[ Linear Mapping end ]------------------------------------------------------------ |
+ +---------------------------------------------------------------------------------------+
+ | ---[ Modules start ]----------------------------------------------------------------- |
+ | .................. |
+ | 0xffff800000000000-0xffff800008000000 128M PTE |
+ | .................. |
+ | ---[ Modules end ]------------------------------------------------------------------- |
+ +---------------------------------------------------------------------------------------+
+ | ---[ vmalloc() area ]---------------------------------------------------------------- |
+ | .................. |
+ | 0xffff800008010000-0xffff800008200000 1984K PTE ro x SHD AF UXN MEM/NORMAL |
+ | 0xffff800008200000-0xffff800008e00000 12M PTE ro x SHD AF CON UXN MEM/NORMAL |
+ | .................. |
+ | ---[ vmalloc() end ]----------------------------------------------------------------- |
+ +---------------------------------------------------------------------------------------+
+ | ---[ Fixmap start ]------------------------------------------------------------------ |
+ | .................. |
+ | 0xfffffbfffdb80000-0xfffffbfffdb90000 64K PTE ro x SHD AF UXN MEM/NORMAL |
+ | 0xfffffbfffdb90000-0xfffffbfffdba0000 64K PTE ro NX SHD AF UXN MEM/NORMAL |
+ | .................. |
+ | ---[ Fixmap end ]-------------------------------------------------------------------- |
+ +---------------------------------------------------------------------------------------+
+ | ---[ PCI I/O start ]----------------------------------------------------------------- |
+ | .................. |
+ | 0xfffffbfffe800000-0xfffffbffff800000 16M PTE |
+ | .................. |
+ | ---[ PCI I/O end ]------------------------------------------------------------------- |
+ +---------------------------------------------------------------------------------------+
+ | ---[ vmemmap start ]----------------------------------------------------------------- |
+ | .................. |
+ | 0xfffffc0002000000-0xfffffc0002200000 2M PTE RW NX SHD AF UXN MEM/NORMAL |
+ | 0xfffffc0002200000-0xfffffc0020000000 478M PTE |
+ | .................. |
+ | ---[ vmemmap end ]------------------------------------------------------------------- |
+ +---------------------------------------------------------------------------------------+
+
+``cat /sys/kernel/debug/kernel_page_tables`` output::
+
+ 0xfff0000001c00000-0xfff0000080000000 2020M PTE RW NX SHD AF UXN MEM/NORMAL-TAGGED
+ 0xfff0000080000000-0xfff0000800000000 30G PMD
+ 0xfff0000800000000-0xfff0000800700000 7M PTE RW NX SHD AF UXN MEM/NORMAL-TAGGED
+ 0xfff0000800700000-0xfff0000800710000 64K PTE ro NX SHD AF UXN MEM/NORMAL-TAGGED
+ 0xfff0000800710000-0xfff0000880000000 2089920K PTE RW NX SHD AF UXN MEM/NORMAL-TAGGED
+ 0xfff0000880000000-0xfff0040000000000 4062G PMD
+ 0xfff0040000000000-0xffff800000000000 3964T PGD
diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst
index 9e311bc43e05..d6430ade349d 100644
--- a/Documentation/arm64/silicon-errata.rst
+++ b/Documentation/arch/arm64/silicon-errata.rst
@@ -214,3 +214,7 @@ stable kernels.
+----------------+-----------------+-----------------+-----------------------------+
| Fujitsu | A64FX | E#010001 | FUJITSU_ERRATUM_010001 |
+----------------+-----------------+-----------------+-----------------------------+
+
++----------------+-----------------+-----------------+-----------------------------+
+| ASR | ASR8601 | #8601001 | N/A |
++----------------+-----------------+-----------------+-----------------------------+
diff --git a/Documentation/arm64/sme.rst b/Documentation/arch/arm64/sme.rst
index 1c43ea12eb4f..ba529a1dc606 100644
--- a/Documentation/arm64/sme.rst
+++ b/Documentation/arch/arm64/sme.rst
@@ -465,4 +465,4 @@ References
[2] arch/arm64/include/uapi/asm/ptrace.h
AArch64 Linux ptrace ABI definitions
-[3] Documentation/arm64/cpu-feature-registers.rst
+[3] Documentation/arch/arm64/cpu-feature-registers.rst
diff --git a/Documentation/arm64/sve.rst b/Documentation/arch/arm64/sve.rst
index 1b90a30382ac..0d9a426e9f85 100644
--- a/Documentation/arm64/sve.rst
+++ b/Documentation/arch/arm64/sve.rst
@@ -606,7 +606,7 @@ References
[2] arch/arm64/include/uapi/asm/ptrace.h
AArch64 Linux ptrace ABI definitions
-[3] Documentation/arm64/cpu-feature-registers.rst
+[3] Documentation/arch/arm64/cpu-feature-registers.rst
[4] ARM IHI0055C
http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055c/IHI0055C_beta_aapcs64.pdf
diff --git a/Documentation/arm64/tagged-address-abi.rst b/Documentation/arch/arm64/tagged-address-abi.rst
index 540a1d4fc6c9..fe24a3f158c5 100644
--- a/Documentation/arm64/tagged-address-abi.rst
+++ b/Documentation/arch/arm64/tagged-address-abi.rst
@@ -107,7 +107,7 @@ following behaviours are guaranteed:
A definition of the meaning of tagged pointers on AArch64 can be found
-in Documentation/arm64/tagged-pointers.rst.
+in Documentation/arch/arm64/tagged-pointers.rst.
3. AArch64 Tagged Address ABI Exceptions
-----------------------------------------
diff --git a/Documentation/arm64/tagged-pointers.rst b/Documentation/arch/arm64/tagged-pointers.rst
index 19d284b70384..81b6c2a770dd 100644
--- a/Documentation/arm64/tagged-pointers.rst
+++ b/Documentation/arch/arm64/tagged-pointers.rst
@@ -22,7 +22,7 @@ Passing tagged addresses to the kernel
All interpretation of userspace memory addresses by the kernel assumes
an address tag of 0x00, unless the application enables the AArch64
Tagged Address ABI explicitly
-(Documentation/arm64/tagged-address-abi.rst).
+(Documentation/arch/arm64/tagged-address-abi.rst).
This includes, but is not limited to, addresses found in:
diff --git a/Documentation/arch/index.rst b/Documentation/arch/index.rst
index 80ee31016584..8458b88e9b79 100644
--- a/Documentation/arch/index.rst
+++ b/Documentation/arch/index.rst
@@ -10,8 +10,8 @@ implementation.
:maxdepth: 2
arc/index
- ../arm/index
- ../arm64/index
+ arm/index
+ arm64/index
ia64/index
../loongarch/index
m68k/index
diff --git a/Documentation/arch/x86/resctrl.rst b/Documentation/arch/x86/resctrl.rst
index 387ccbcb558f..cb05d90111b4 100644
--- a/Documentation/arch/x86/resctrl.rst
+++ b/Documentation/arch/x86/resctrl.rst
@@ -287,6 +287,13 @@ Removing a directory will move all tasks and cpus owned by the group it
represents to the parent. Removing one of the created CTRL_MON groups
will automatically remove all MON groups below it.
+Moving MON group directories to a new parent CTRL_MON group is supported
+for the purpose of changing the resource allocations of a MON group
+without impacting its monitoring data or assigned tasks. This operation
+is not allowed for MON groups which monitor CPUs. No other move
+operation is currently allowed other than simply renaming a CTRL_MON or
+MON group.
+
All groups contain the following files:
"tasks":
diff --git a/Documentation/bpf/bpf_iterators.rst b/Documentation/bpf/bpf_iterators.rst
index 6d7770793fab..07433915aa41 100644
--- a/Documentation/bpf/bpf_iterators.rst
+++ b/Documentation/bpf/bpf_iterators.rst
@@ -238,11 +238,8 @@ The following is the breakdown for each field in struct ``bpf_iter_reg``.
that the kernel function cond_resched() is called to avoid other kernel
subsystem (e.g., rcu) misbehaving.
* - seq_info
- - Specifies certain action requests in the kernel BPF iterator
- infrastructure. Currently, only BPF_ITER_RESCHED is supported. This means
- that the kernel function cond_resched() is called to avoid other kernel
- subsystem (e.g., rcu) misbehaving.
-
+ - Specifies the set of seq operations for the BPF iterator and helpers to
+ initialize/free the private data for the corresponding ``seq_file``.
`Click here
<https://lore.kernel.org/bpf/20210212183107.50963-2-songliubraving@fb.com/>`_
diff --git a/Documentation/bpf/cpumasks.rst b/Documentation/bpf/cpumasks.rst
index 41efd8874eeb..3139c7c02e79 100644
--- a/Documentation/bpf/cpumasks.rst
+++ b/Documentation/bpf/cpumasks.rst
@@ -351,14 +351,15 @@ In addition to the above kfuncs, there is also a set of read-only kfuncs that
can be used to query the contents of cpumasks.
.. kernel-doc:: kernel/bpf/cpumask.c
- :identifiers: bpf_cpumask_first bpf_cpumask_first_zero bpf_cpumask_test_cpu
+ :identifiers: bpf_cpumask_first bpf_cpumask_first_zero bpf_cpumask_first_and
+ bpf_cpumask_test_cpu
.. kernel-doc:: kernel/bpf/cpumask.c
:identifiers: bpf_cpumask_equal bpf_cpumask_intersects bpf_cpumask_subset
bpf_cpumask_empty bpf_cpumask_full
.. kernel-doc:: kernel/bpf/cpumask.c
- :identifiers: bpf_cpumask_any bpf_cpumask_any_and
+ :identifiers: bpf_cpumask_any_distribute bpf_cpumask_any_and_distribute
----
diff --git a/Documentation/bpf/instruction-set.rst b/Documentation/bpf/instruction-set.rst
index 492980ece1ab..6644842cd3ea 100644
--- a/Documentation/bpf/instruction-set.rst
+++ b/Documentation/bpf/instruction-set.rst
@@ -163,13 +163,13 @@ BPF_MUL 0x20 dst \*= src
BPF_DIV 0x30 dst = (src != 0) ? (dst / src) : 0
BPF_OR 0x40 dst \|= src
BPF_AND 0x50 dst &= src
-BPF_LSH 0x60 dst <<= src
-BPF_RSH 0x70 dst >>= src
+BPF_LSH 0x60 dst <<= (src & mask)
+BPF_RSH 0x70 dst >>= (src & mask)
BPF_NEG 0x80 dst = ~src
BPF_MOD 0x90 dst = (src != 0) ? (dst % src) : dst
BPF_XOR 0xa0 dst ^= src
BPF_MOV 0xb0 dst = src
-BPF_ARSH 0xc0 sign extending shift right
+BPF_ARSH 0xc0 sign extending dst >>= (src & mask)
BPF_END 0xd0 byte swap operations (see `Byte swap instructions`_ below)
======== ===== ==========================================================
@@ -204,6 +204,9 @@ for ``BPF_ALU64``, 'imm' is first sign extended to 64 bits and the result
interpreted as an unsigned 64-bit value. There are no instructions for
signed division or modulo.
+Shift operations use a mask of 0x3F (63) for 64-bit operations and 0x1F (31)
+for 32-bit operations.
+
Byte swap instructions
~~~~~~~~~~~~~~~~~~~~~~
diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst
index ea2516374d92..0d2647fb358d 100644
--- a/Documentation/bpf/kfuncs.rst
+++ b/Documentation/bpf/kfuncs.rst
@@ -100,7 +100,7 @@ Hence, whenever a constant scalar argument is accepted by a kfunc which is not a
size parameter, and the value of the constant matters for program safety, __k
suffix should be used.
-2.2.2 __uninit Annotation
+2.2.3 __uninit Annotation
-------------------------
This annotation is used to indicate that the argument will be treated as
@@ -117,6 +117,27 @@ Here, the dynptr will be treated as an uninitialized dynptr. Without this
annotation, the verifier will reject the program if the dynptr passed in is
not initialized.
+2.2.4 __opt Annotation
+-------------------------
+
+This annotation is used to indicate that the buffer associated with an __sz or __szk
+argument may be null. If the function is passed a nullptr in place of the buffer,
+the verifier will not check that length is appropriate for the buffer. The kfunc is
+responsible for checking if this buffer is null before using it.
+
+An example is given below::
+
+ __bpf_kfunc void *bpf_dynptr_slice(..., void *buffer__opt, u32 buffer__szk)
+ {
+ ...
+ }
+
+Here, the buffer may be null. If buffer is not null, it at least of size buffer_szk.
+Either way, the returned buffer is either NULL, or of size buffer_szk. Without this
+annotation, the verifier will reject the program if a null pointer is passed in with
+a nonzero size.
+
+
.. _BPF_kfunc_nodef:
2.3 Using an existing kernel function
@@ -206,23 +227,49 @@ absolutely no ABI stability guarantees.
As mentioned above, a nested pointer obtained from walking a trusted pointer is
no longer trusted, with one exception. If a struct type has a field that is
-guaranteed to be valid as long as its parent pointer is trusted, the
-``BTF_TYPE_SAFE_NESTED`` macro can be used to express that to the verifier as
-follows:
+guaranteed to be valid (trusted or rcu, as in KF_RCU description below) as long
+as its parent pointer is valid, the following macros can be used to express
+that to the verifier:
+
+* ``BTF_TYPE_SAFE_TRUSTED``
+* ``BTF_TYPE_SAFE_RCU``
+* ``BTF_TYPE_SAFE_RCU_OR_NULL``
+
+For example,
+
+.. code-block:: c
+
+ BTF_TYPE_SAFE_TRUSTED(struct socket) {
+ struct sock *sk;
+ };
+
+or
.. code-block:: c
- BTF_TYPE_SAFE_NESTED(struct task_struct) {
+ BTF_TYPE_SAFE_RCU(struct task_struct) {
const cpumask_t *cpus_ptr;
+ struct css_set __rcu *cgroups;
+ struct task_struct __rcu *real_parent;
+ struct task_struct *group_leader;
};
In other words, you must:
-1. Wrap the trusted pointer type in the ``BTF_TYPE_SAFE_NESTED`` macro.
+1. Wrap the valid pointer type in a ``BTF_TYPE_SAFE_*`` macro.
-2. Specify the type and name of the trusted nested field. This field must match
+2. Specify the type and name of the valid nested field. This field must match
the field in the original type definition exactly.
+A new type declared by a ``BTF_TYPE_SAFE_*`` macro also needs to be emitted so
+that it appears in BTF. For example, ``BTF_TYPE_SAFE_TRUSTED(struct socket)``
+is emitted in the ``type_is_trusted()`` function as follows:
+
+.. code-block:: c
+
+ BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct socket));
+
+
2.4.5 KF_SLEEPABLE flag
-----------------------
diff --git a/Documentation/bpf/llvm_reloc.rst b/Documentation/bpf/llvm_reloc.rst
index ca8957d5b671..e4a777a6a3a2 100644
--- a/Documentation/bpf/llvm_reloc.rst
+++ b/Documentation/bpf/llvm_reloc.rst
@@ -48,7 +48,7 @@ the code with ``llvm-objdump -dr test.o``::
14: 0f 10 00 00 00 00 00 00 r0 += r1
15: 95 00 00 00 00 00 00 00 exit
-There are four relations in the above for four ``LD_imm64`` instructions.
+There are four relocations in the above for four ``LD_imm64`` instructions.
The following ``llvm-readelf -r test.o`` shows the binary values of the four
relocations::
@@ -79,14 +79,16 @@ The following is the symbol table with ``llvm-readelf -s test.o``::
The 6th entry is global variable ``g1`` with value 0.
Similarly, the second relocation is at ``.text`` offset ``0x18``, instruction 3,
-for global variable ``g2`` which has a symbol value 4, the offset
-from the start of ``.data`` section.
-
-The third and fourth relocations refers to static variables ``l1``
-and ``l2``. From ``.rel.text`` section above, it is not clear
-which symbols they really refers to as they both refers to
+has a type of ``R_BPF_64_64`` and refers to entry 7 in the symbol table.
+The second relocation resolves to global variable ``g2`` which has a symbol
+value 4. The symbol value represents the offset from the start of ``.data``
+section where the initial value of the global variable ``g2`` is stored.
+
+The third and fourth relocations refer to static variables ``l1``
+and ``l2``. From the ``.rel.text`` section above, it is not clear
+to which symbols they really refer as they both refer to
symbol table entry 4, symbol ``sec``, which has ``STT_SECTION`` type
-and represents a section. So for static variable or function,
+and represents a section. So for a static variable or function,
the section offset is written to the original insn
buffer, which is called ``A`` (addend). Looking at
above insn ``7`` and ``11``, they have section offset ``8`` and ``12``.
diff --git a/Documentation/bpf/map_hash.rst b/Documentation/bpf/map_hash.rst
index 8669426264c6..d2343952f2cb 100644
--- a/Documentation/bpf/map_hash.rst
+++ b/Documentation/bpf/map_hash.rst
@@ -1,5 +1,6 @@
.. SPDX-License-Identifier: GPL-2.0-only
.. Copyright (C) 2022 Red Hat, Inc.
+.. Copyright (C) 2022-2023 Isovalent, Inc.
===============================================
BPF_MAP_TYPE_HASH, with PERCPU and LRU Variants
@@ -29,7 +30,16 @@ will automatically evict the least recently used entries when the hash
table reaches capacity. An LRU hash maintains an internal LRU list that
is used to select elements for eviction. This internal LRU list is
shared across CPUs but it is possible to request a per CPU LRU list with
-the ``BPF_F_NO_COMMON_LRU`` flag when calling ``bpf_map_create``.
+the ``BPF_F_NO_COMMON_LRU`` flag when calling ``bpf_map_create``. The
+following table outlines the properties of LRU maps depending on the a
+map type and the flags used to create the map.
+
+======================== ========================= ================================
+Flag ``BPF_MAP_TYPE_LRU_HASH`` ``BPF_MAP_TYPE_LRU_PERCPU_HASH``
+======================== ========================= ================================
+**BPF_F_NO_COMMON_LRU** Per-CPU LRU, global map Per-CPU LRU, per-cpu map
+**!BPF_F_NO_COMMON_LRU** Global LRU, global map Global LRU, per-cpu map
+======================== ========================= ================================
Usage
=====
@@ -206,3 +216,44 @@ Userspace walking the map elements from the map declared above:
cur_key = &next_key;
}
}
+
+Internals
+=========
+
+This section of the document is targeted at Linux developers and describes
+aspects of the map implementations that are not considered stable ABI. The
+following details are subject to change in future versions of the kernel.
+
+``BPF_MAP_TYPE_LRU_HASH`` and variants
+--------------------------------------
+
+Updating elements in LRU maps may trigger eviction behaviour when the capacity
+of the map is reached. There are various steps that the update algorithm
+attempts in order to enforce the LRU property which have increasing impacts on
+other CPUs involved in the following operation attempts:
+
+- Attempt to use CPU-local state to batch operations
+- Attempt to fetch free nodes from global lists
+- Attempt to pull any node from a global list and remove it from the hashmap
+- Attempt to pull any node from any CPU's list and remove it from the hashmap
+
+This algorithm is described visually in the following diagram. See the
+description in commit 3a08c2fd7634 ("bpf: LRU List") for a full explanation of
+the corresponding operations:
+
+.. kernel-figure:: map_lru_hash_update.dot
+ :alt: Diagram outlining the LRU eviction steps taken during map update.
+
+ LRU hash eviction during map update for ``BPF_MAP_TYPE_LRU_HASH`` and
+ variants. See the dot file source for kernel function name code references.
+
+Map updates start from the oval in the top right "begin ``bpf_map_update()``"
+and progress through the graph towards the bottom where the result may be
+either a successful update or a failure with various error codes. The key in
+the top right provides indicators for which locks may be involved in specific
+operations. This is intended as a visual hint for reasoning about how map
+contention may impact update operations, though the map type and flags may
+impact the actual contention on those locks, based on the logic described in
+the table above. For instance, if the map is created with type
+``BPF_MAP_TYPE_LRU_PERCPU_HASH`` and flags ``BPF_F_NO_COMMON_LRU`` then all map
+properties would be per-cpu.
diff --git a/Documentation/bpf/map_lru_hash_update.dot b/Documentation/bpf/map_lru_hash_update.dot
new file mode 100644
index 000000000000..a0fee349d29c
--- /dev/null
+++ b/Documentation/bpf/map_lru_hash_update.dot
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2022-2023 Isovalent, Inc.
+digraph {
+ node [colorscheme=accent4,style=filled] # Apply colorscheme to all nodes
+ graph [splines=ortho, nodesep=1]
+
+ subgraph cluster_key {
+ label = "Key\n(locks held during operation)";
+ rankdir = TB;
+
+ remote_lock [shape=rectangle,fillcolor=4,label="remote CPU LRU lock"]
+ hash_lock [shape=rectangle,fillcolor=3,label="hashtab lock"]
+ lru_lock [shape=rectangle,fillcolor=2,label="LRU lock"]
+ local_lock [shape=rectangle,fillcolor=1,label="local CPU LRU lock"]
+ no_lock [shape=rectangle,label="no locks held"]
+ }
+
+ begin [shape=oval,label="begin\nbpf_map_update()"]
+
+ // Nodes below with an 'fn_' prefix are roughly labeled by the C function
+ // names that initiate the corresponding logic in kernel/bpf/bpf_lru_list.c.
+ // Number suffixes and errno suffixes handle subsections of the corresponding
+ // logic in the function as of the writing of this dot.
+
+ // cf. __local_list_pop_free() / bpf_percpu_lru_pop_free()
+ local_freelist_check [shape=diamond,fillcolor=1,
+ label="Local freelist\nnode available?"];
+ use_local_node [shape=rectangle,
+ label="Use node owned\nby this CPU"]
+
+ // cf. bpf_lru_pop_free()
+ common_lru_check [shape=diamond,
+ label="Map created with\ncommon LRU?\n(!BPF_F_NO_COMMON_LRU)"];
+
+ fn_bpf_lru_list_pop_free_to_local [shape=rectangle,fillcolor=2,
+ label="Flush local pending,
+ Rotate Global list, move
+ LOCAL_FREE_TARGET
+ from global -> local"]
+ // Also corresponds to:
+ // fn__local_list_flush()
+ // fn_bpf_lru_list_rotate()
+ fn___bpf_lru_node_move_to_free[shape=diamond,fillcolor=2,
+ label="Able to free\nLOCAL_FREE_TARGET\nnodes?"]
+
+ fn___bpf_lru_list_shrink_inactive [shape=rectangle,fillcolor=3,
+ label="Shrink inactive list
+ up to remaining
+ LOCAL_FREE_TARGET
+ (global LRU -> local)"]
+ fn___bpf_lru_list_shrink [shape=diamond,fillcolor=2,
+ label="> 0 entries in\nlocal free list?"]
+ fn___bpf_lru_list_shrink2 [shape=rectangle,fillcolor=2,
+ label="Steal one node from
+ inactive, or if empty,
+ from active global list"]
+ fn___bpf_lru_list_shrink3 [shape=rectangle,fillcolor=3,
+ label="Try to remove\nnode from hashtab"]
+
+ local_freelist_check2 [shape=diamond,label="Htab removal\nsuccessful?"]
+ common_lru_check2 [shape=diamond,
+ label="Map created with\ncommon LRU?\n(!BPF_F_NO_COMMON_LRU)"];
+
+ subgraph cluster_remote_lock {
+ label = "Iterate through CPUs\n(start from current)";
+ style = dashed;
+ rankdir=LR;
+
+ local_freelist_check5 [shape=diamond,fillcolor=4,
+ label="Steal a node from\nper-cpu freelist?"]
+ local_freelist_check6 [shape=rectangle,fillcolor=4,
+ label="Steal a node from
+ (1) Unreferenced pending, or
+ (2) Any pending node"]
+ local_freelist_check7 [shape=rectangle,fillcolor=3,
+ label="Try to remove\nnode from hashtab"]
+ fn_htab_lru_map_update_elem [shape=diamond,
+ label="Stole node\nfrom remote\nCPU?"]
+ fn_htab_lru_map_update_elem2 [shape=diamond,label="Iterated\nall CPUs?"]
+ // Also corresponds to:
+ // use_local_node()
+ // fn__local_list_pop_pending()
+ }
+
+ fn_bpf_lru_list_pop_free_to_local2 [shape=rectangle,
+ label="Use node that was\nnot recently referenced"]
+ local_freelist_check4 [shape=rectangle,
+ label="Use node that was\nactively referenced\nin global list"]
+ fn_htab_lru_map_update_elem_ENOMEM [shape=oval,label="return -ENOMEM"]
+ fn_htab_lru_map_update_elem3 [shape=rectangle,
+ label="Use node that was\nactively referenced\nin (another?) CPU's cache"]
+ fn_htab_lru_map_update_elem4 [shape=rectangle,fillcolor=3,
+ label="Update hashmap\nwith new element"]
+ fn_htab_lru_map_update_elem5 [shape=oval,label="return 0"]
+ fn_htab_lru_map_update_elem_EBUSY [shape=oval,label="return -EBUSY"]
+ fn_htab_lru_map_update_elem_EEXIST [shape=oval,label="return -EEXIST"]
+ fn_htab_lru_map_update_elem_ENOENT [shape=oval,label="return -ENOENT"]
+
+ begin -> local_freelist_check
+ local_freelist_check -> use_local_node [xlabel="Y"]
+ local_freelist_check -> common_lru_check [xlabel="N"]
+ common_lru_check -> fn_bpf_lru_list_pop_free_to_local [xlabel="Y"]
+ common_lru_check -> fn___bpf_lru_list_shrink_inactive [xlabel="N"]
+ fn_bpf_lru_list_pop_free_to_local -> fn___bpf_lru_node_move_to_free
+ fn___bpf_lru_node_move_to_free ->
+ fn_bpf_lru_list_pop_free_to_local2 [xlabel="Y"]
+ fn___bpf_lru_node_move_to_free ->
+ fn___bpf_lru_list_shrink_inactive [xlabel="N"]
+ fn___bpf_lru_list_shrink_inactive -> fn___bpf_lru_list_shrink
+ fn___bpf_lru_list_shrink -> fn_bpf_lru_list_pop_free_to_local2 [xlabel = "Y"]
+ fn___bpf_lru_list_shrink -> fn___bpf_lru_list_shrink2 [xlabel="N"]
+ fn___bpf_lru_list_shrink2 -> fn___bpf_lru_list_shrink3
+ fn___bpf_lru_list_shrink3 -> local_freelist_check2
+ local_freelist_check2 -> local_freelist_check4 [xlabel = "Y"]
+ local_freelist_check2 -> common_lru_check2 [xlabel = "N"]
+ common_lru_check2 -> local_freelist_check5 [xlabel = "Y"]
+ common_lru_check2 -> fn_htab_lru_map_update_elem_ENOMEM [xlabel = "N"]
+ local_freelist_check5 -> fn_htab_lru_map_update_elem [xlabel = "Y"]
+ local_freelist_check5 -> local_freelist_check6 [xlabel = "N"]
+ local_freelist_check6 -> local_freelist_check7
+ local_freelist_check7 -> fn_htab_lru_map_update_elem
+
+ fn_htab_lru_map_update_elem -> fn_htab_lru_map_update_elem3 [xlabel = "Y"]
+ fn_htab_lru_map_update_elem -> fn_htab_lru_map_update_elem2 [xlabel = "N"]
+ fn_htab_lru_map_update_elem2 ->
+ fn_htab_lru_map_update_elem_ENOMEM [xlabel = "Y"]
+ fn_htab_lru_map_update_elem2 -> local_freelist_check5 [xlabel = "N"]
+ fn_htab_lru_map_update_elem3 -> fn_htab_lru_map_update_elem4
+
+ use_local_node -> fn_htab_lru_map_update_elem4
+ fn_bpf_lru_list_pop_free_to_local2 -> fn_htab_lru_map_update_elem4
+ local_freelist_check4 -> fn_htab_lru_map_update_elem4
+
+ fn_htab_lru_map_update_elem4 -> fn_htab_lru_map_update_elem5 [headlabel="Success"]
+ fn_htab_lru_map_update_elem4 ->
+ fn_htab_lru_map_update_elem_EBUSY [xlabel="Hashtab lock failed"]
+ fn_htab_lru_map_update_elem4 ->
+ fn_htab_lru_map_update_elem_EEXIST [xlabel="BPF_EXIST set and\nkey already exists"]
+ fn_htab_lru_map_update_elem4 ->
+ fn_htab_lru_map_update_elem_ENOENT [headlabel="BPF_NOEXIST set\nand no such entry"]
+
+ // Create invisible pad nodes to line up various nodes
+ pad0 [style=invis]
+ pad1 [style=invis]
+ pad2 [style=invis]
+ pad3 [style=invis]
+ pad4 [style=invis]
+
+ // Line up the key with the top of the graph
+ no_lock -> local_lock [style=invis]
+ local_lock -> lru_lock [style=invis]
+ lru_lock -> hash_lock [style=invis]
+ hash_lock -> remote_lock [style=invis]
+ remote_lock -> local_freelist_check5 [style=invis]
+ remote_lock -> fn___bpf_lru_list_shrink [style=invis]
+
+ // Line up return code nodes at the bottom of the graph
+ fn_htab_lru_map_update_elem -> pad0 [style=invis]
+ pad0 -> pad1 [style=invis]
+ pad1 -> pad2 [style=invis]
+ //pad2-> fn_htab_lru_map_update_elem_ENOMEM [style=invis]
+ fn_htab_lru_map_update_elem4 -> pad3 [style=invis]
+ pad3 -> fn_htab_lru_map_update_elem5 [style=invis]
+ pad3 -> fn_htab_lru_map_update_elem_EBUSY [style=invis]
+ pad3 -> fn_htab_lru_map_update_elem_EEXIST [style=invis]
+ pad3 -> fn_htab_lru_map_update_elem_ENOENT [style=invis]
+
+ // Reduce diagram width by forcing some nodes to appear above others
+ local_freelist_check4 -> fn_htab_lru_map_update_elem3 [style=invis]
+ common_lru_check2 -> pad4 [style=invis]
+ pad4 -> local_freelist_check5 [style=invis]
+}
diff --git a/Documentation/bpf/map_sockmap.rst b/Documentation/bpf/map_sockmap.rst
index cc92047c6630..2d630686a00b 100644
--- a/Documentation/bpf/map_sockmap.rst
+++ b/Documentation/bpf/map_sockmap.rst
@@ -240,11 +240,11 @@ offsets into ``msg``, respectively.
If a program of type ``BPF_PROG_TYPE_SK_MSG`` is run on a ``msg`` it can only
parse data that the (``data``, ``data_end``) pointers have already consumed.
For ``sendmsg()`` hooks this is likely the first scatterlist element. But for
-calls relying on the ``sendpage`` handler (e.g., ``sendfile()``) this will be
-the range (**0**, **0**) because the data is shared with user space and by
-default the objective is to avoid allowing user space to modify data while (or
-after) BPF verdict is being decided. This helper can be used to pull in data
-and to set the start and end pointers to given values. Data will be copied if
+calls relying on MSG_SPLICE_PAGES (e.g., ``sendfile()``) this will be the
+range (**0**, **0**) because the data is shared with user space and by default
+the objective is to avoid allowing user space to modify data while (or after)
+BPF verdict is being decided. This helper can be used to pull in data and to
+set the start and end pointers to given values. Data will be copied if
necessary (i.e., if data was not linear and if start and end pointers do not
point to the same chunk).
diff --git a/Documentation/bpf/prog_cgroup_sockopt.rst b/Documentation/bpf/prog_cgroup_sockopt.rst
index 172f957204bf..1226a94af07a 100644
--- a/Documentation/bpf/prog_cgroup_sockopt.rst
+++ b/Documentation/bpf/prog_cgroup_sockopt.rst
@@ -98,10 +98,65 @@ can access only the first ``PAGE_SIZE`` of that data. So it has to options:
indicates that the kernel should use BPF's trimmed ``optval``.
When the BPF program returns with the ``optlen`` greater than
-``PAGE_SIZE``, the userspace will receive ``EFAULT`` errno.
+``PAGE_SIZE``, the userspace will receive original kernel
+buffers without any modifications that the BPF program might have
+applied.
Example
=======
+Recommended way to handle BPF programs is as follows:
+
+.. code-block:: c
+
+ SEC("cgroup/getsockopt")
+ int getsockopt(struct bpf_sockopt *ctx)
+ {
+ /* Custom socket option. */
+ if (ctx->level == MY_SOL && ctx->optname == MY_OPTNAME) {
+ ctx->retval = 0;
+ optval[0] = ...;
+ ctx->optlen = 1;
+ return 1;
+ }
+
+ /* Modify kernel's socket option. */
+ if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
+ ctx->retval = 0;
+ optval[0] = ...;
+ ctx->optlen = 1;
+ return 1;
+ }
+
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > PAGE_SIZE)
+ ctx->optlen = 0;
+
+ return 1;
+ }
+
+ SEC("cgroup/setsockopt")
+ int setsockopt(struct bpf_sockopt *ctx)
+ {
+ /* Custom socket option. */
+ if (ctx->level == MY_SOL && ctx->optname == MY_OPTNAME) {
+ /* do something */
+ ctx->optlen = -1;
+ return 1;
+ }
+
+ /* Modify kernel's socket option. */
+ if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
+ optval[0] = ...;
+ return 1;
+ }
+
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > PAGE_SIZE)
+ ctx->optlen = 0;
+
+ return 1;
+ }
+
See ``tools/testing/selftests/bpf/progs/sockopt_sk.c`` for an example
of BPF program that handles socket options.
diff --git a/Documentation/conf.py b/Documentation/conf.py
index 37314afd1ac8..d4fdf6a3875a 100644
--- a/Documentation/conf.py
+++ b/Documentation/conf.py
@@ -74,6 +74,7 @@ if major >= 3:
"__percpu",
"__rcu",
"__user",
+ "__force",
# include/linux/compiler_attributes.h:
"__alias",
diff --git a/Documentation/core-api/cpu_hotplug.rst b/Documentation/core-api/cpu_hotplug.rst
index f75778d37488..e6f5bc39cf5c 100644
--- a/Documentation/core-api/cpu_hotplug.rst
+++ b/Documentation/core-api/cpu_hotplug.rst
@@ -127,17 +127,8 @@ bring CPU4 back online::
$ echo 1 > /sys/devices/system/cpu/cpu4/online
smpboot: Booting Node 0 Processor 4 APIC 0x1
-The CPU is usable again. This should work on all CPUs. CPU0 is often special
-and excluded from CPU hotplug. On X86 the kernel option
-*CONFIG_BOOTPARAM_HOTPLUG_CPU0* has to be enabled in order to be able to
-shutdown CPU0. Alternatively the kernel command option *cpu0_hotplug* can be
-used. Some known dependencies of CPU0:
-
-* Resume from hibernate/suspend. Hibernate/suspend will fail if CPU0 is offline.
-* PIC interrupts. CPU0 can't be removed if a PIC interrupt is detected.
-
-Please let Fenghua Yu <fenghua.yu@intel.com> know if you find any dependencies
-on CPU0.
+The CPU is usable again. This should work on all CPUs, but CPU0 is often special
+and excluded from CPU hotplug.
The CPU hotplug coordination
============================
diff --git a/Documentation/core-api/kernel-api.rst b/Documentation/core-api/kernel-api.rst
index 9b3f3e5f5a95..f2bcc5a7ea43 100644
--- a/Documentation/core-api/kernel-api.rst
+++ b/Documentation/core-api/kernel-api.rst
@@ -96,6 +96,12 @@ Command-line Parsing
.. kernel-doc:: lib/cmdline.c
:export:
+Error Pointers
+--------------
+
+.. kernel-doc:: include/linux/err.h
+ :internal:
+
Sorting
-------
@@ -412,3 +418,15 @@ Read-Copy Update (RCU)
.. kernel-doc:: include/linux/rcu_sync.h
.. kernel-doc:: kernel/rcu/sync.c
+
+.. kernel-doc:: kernel/rcu/tasks.h
+
+.. kernel-doc:: kernel/rcu/tree_stall.h
+
+.. kernel-doc:: include/linux/rcupdate_trace.h
+
+.. kernel-doc:: include/linux/rcupdate_wait.h
+
+.. kernel-doc:: include/linux/rcuref.h
+
+.. kernel-doc:: include/linux/rcutree.h
diff --git a/Documentation/core-api/pin_user_pages.rst b/Documentation/core-api/pin_user_pages.rst
index 9fb0b1080d3b..d3c1f6d8c0e0 100644
--- a/Documentation/core-api/pin_user_pages.rst
+++ b/Documentation/core-api/pin_user_pages.rst
@@ -112,6 +112,12 @@ pages:
This also leads to limitations: there are only 31-10==21 bits available for a
counter that increments 10 bits at a time.
+* Because of that limitation, special handling is applied to the zero pages
+ when using FOLL_PIN. We only pretend to pin a zero page - we don't alter its
+ refcount or pincount at all (it is permanent, so there's no need). The
+ unpinning functions also don't do anything to a zero page. This is
+ transparent to the caller.
+
* Callers must specifically request "dma-pinned tracking of pages". In other
words, just calling get_user_pages() will not suffice; a new set of functions,
pin_user_page() and related, must be used.
diff --git a/Documentation/core-api/this_cpu_ops.rst b/Documentation/core-api/this_cpu_ops.rst
index 5cb8b883ae83..91acbcf30e9b 100644
--- a/Documentation/core-api/this_cpu_ops.rst
+++ b/Documentation/core-api/this_cpu_ops.rst
@@ -53,7 +53,6 @@ preemption and interrupts::
this_cpu_add_return(pcp, val)
this_cpu_xchg(pcp, nval)
this_cpu_cmpxchg(pcp, oval, nval)
- this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
this_cpu_sub(pcp, val)
this_cpu_inc(pcp)
this_cpu_dec(pcp)
@@ -242,7 +241,6 @@ safe::
__this_cpu_add_return(pcp, val)
__this_cpu_xchg(pcp, nval)
__this_cpu_cmpxchg(pcp, oval, nval)
- __this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
__this_cpu_sub(pcp, val)
__this_cpu_inc(pcp)
__this_cpu_dec(pcp)
diff --git a/Documentation/core-api/workqueue.rst b/Documentation/core-api/workqueue.rst
index 8ec4d6270b24..a4c9b9d1905f 100644
--- a/Documentation/core-api/workqueue.rst
+++ b/Documentation/core-api/workqueue.rst
@@ -348,6 +348,37 @@ Guidelines
level of locality in wq operations and work item execution.
+Monitoring
+==========
+
+Use tools/workqueue/wq_monitor.py to monitor workqueue operations: ::
+
+ $ tools/workqueue/wq_monitor.py events
+ total infl CPUtime CPUhog CMwake mayday rescued
+ events 18545 0 6.1 0 5 - -
+ events_highpri 8 0 0.0 0 0 - -
+ events_long 3 0 0.0 0 0 - -
+ events_unbound 38306 0 0.1 - - - -
+ events_freezable 0 0 0.0 0 0 - -
+ events_power_efficient 29598 0 0.2 0 0 - -
+ events_freezable_power_ 10 0 0.0 0 0 - -
+ sock_diag_events 0 0 0.0 0 0 - -
+
+ total infl CPUtime CPUhog CMwake mayday rescued
+ events 18548 0 6.1 0 5 - -
+ events_highpri 8 0 0.0 0 0 - -
+ events_long 3 0 0.0 0 0 - -
+ events_unbound 38322 0 0.1 - - - -
+ events_freezable 0 0 0.0 0 0 - -
+ events_power_efficient 29603 0 0.2 0 0 - -
+ events_freezable_power_ 10 0 0.0 0 0 - -
+ sock_diag_events 0 0 0.0 0 0 - -
+
+ ...
+
+See the command's help message for more info.
+
+
Debugging
=========
@@ -387,6 +418,7 @@ the stack trace of the offending worker thread. ::
The work item's function should be trivially visible in the stack
trace.
+
Non-reentrance Conditions
=========================
diff --git a/Documentation/crypto/async-tx-api.rst b/Documentation/crypto/async-tx-api.rst
index bfc773991bdc..27c146b54d71 100644
--- a/Documentation/crypto/async-tx-api.rst
+++ b/Documentation/crypto/async-tx-api.rst
@@ -66,7 +66,7 @@ features surfaced as a result:
::
struct dma_async_tx_descriptor *
- async_<operation>(<op specific parameters>, struct async_submit ctl *submit)
+ async_<operation>(<op specific parameters>, struct async_submit_ctl *submit)
3.2 Supported operations
------------------------
diff --git a/Documentation/dev-tools/kasan.rst b/Documentation/dev-tools/kasan.rst
index e66916a483cd..f4acf9c2e90f 100644
--- a/Documentation/dev-tools/kasan.rst
+++ b/Documentation/dev-tools/kasan.rst
@@ -107,9 +107,12 @@ effectively disables ``panic_on_warn`` for KASAN reports.
Alternatively, independent of ``panic_on_warn``, the ``kasan.fault=`` boot
parameter can be used to control panic and reporting behaviour:
-- ``kasan.fault=report`` or ``=panic`` controls whether to only print a KASAN
- report or also panic the kernel (default: ``report``). The panic happens even
- if ``kasan_multi_shot`` is enabled.
+- ``kasan.fault=report``, ``=panic``, or ``=panic_on_write`` controls whether
+ to only print a KASAN report, panic the kernel, or panic the kernel on
+ invalid writes only (default: ``report``). The panic happens even if
+ ``kasan_multi_shot`` is enabled. Note that when using asynchronous mode of
+ Hardware Tag-Based KASAN, ``kasan.fault=panic_on_write`` always panics on
+ asynchronously checked accesses (including reads).
Software and Hardware Tag-Based KASAN modes (see the section about various
modes below) support altering stack trace collection behavior:
diff --git a/Documentation/dev-tools/kselftest.rst b/Documentation/dev-tools/kselftest.rst
index 12b575b76b20..deede972f254 100644
--- a/Documentation/dev-tools/kselftest.rst
+++ b/Documentation/dev-tools/kselftest.rst
@@ -36,6 +36,7 @@ Running the selftests (hotplug tests are run in limited mode)
To build the tests::
+ $ make headers
$ make -C tools/testing/selftests
To run the tests::
@@ -168,6 +169,28 @@ the `-t` option for specific single tests. Either can be used multiple times::
For other features see the script usage output, seen with the `-h` option.
+Timeout for selftests
+=====================
+
+Selftests are designed to be quick and so a default timeout is used of 45
+seconds for each test. Tests can override the default timeout by adding
+a settings file in their directory and set a timeout variable there to the
+configured a desired upper timeout for the test. Only a few tests override
+the timeout with a value higher than 45 seconds, selftests strives to keep
+it that way. Timeouts in selftests are not considered fatal because the
+system under which a test runs may change and this can also modify the
+expected time it takes to run a test. If you have control over the systems
+which will run the tests you can configure a test runner on those systems to
+use a greater or lower timeout on the command line as with the `-o` or
+the `--override-timeout` argument. For example to use 165 seconds instead
+one would use:
+
+ $ ./run_kselftest.sh --override-timeout 165
+
+You can look at the TAP output to see if you ran into the timeout. Test
+runners which know a test must run under a specific time can then optionally
+treat these timeouts then as fatal.
+
Packaging selftests
===================
diff --git a/Documentation/dev-tools/kunit/architecture.rst b/Documentation/dev-tools/kunit/architecture.rst
index e95ab05342bb..f335f883f8f6 100644
--- a/Documentation/dev-tools/kunit/architecture.rst
+++ b/Documentation/dev-tools/kunit/architecture.rst
@@ -119,9 +119,9 @@ All expectations/assertions are formatted as:
terminated immediately.
- Assertions call the function:
- ``void __noreturn kunit_abort(struct kunit *)``.
+ ``void __noreturn __kunit_abort(struct kunit *)``.
- - ``kunit_abort`` calls the function:
+ - ``__kunit_abort`` calls the function:
``void __noreturn kunit_try_catch_throw(struct kunit_try_catch *try_catch)``.
- ``kunit_try_catch_throw`` calls the function:
diff --git a/Documentation/dev-tools/kunit/start.rst b/Documentation/dev-tools/kunit/start.rst
index c736613c9b19..a98235326bab 100644
--- a/Documentation/dev-tools/kunit/start.rst
+++ b/Documentation/dev-tools/kunit/start.rst
@@ -250,15 +250,20 @@ Now we are ready to write the test cases.
};
kunit_test_suite(misc_example_test_suite);
+ MODULE_LICENSE("GPL");
+
2. Add the following lines to ``drivers/misc/Kconfig``:
.. code-block:: kconfig
config MISC_EXAMPLE_TEST
tristate "Test for my example" if !KUNIT_ALL_TESTS
- depends on MISC_EXAMPLE && KUNIT=y
+ depends on MISC_EXAMPLE && KUNIT
default KUNIT_ALL_TESTS
+Note: If your test does not support being built as a loadable module (which is
+discouraged), replace tristate by bool, and depend on KUNIT=y instead of KUNIT.
+
3. Add the following lines to ``drivers/misc/Makefile``:
.. code-block:: make
diff --git a/Documentation/dev-tools/kunit/usage.rst b/Documentation/dev-tools/kunit/usage.rst
index 9faf2b4153fc..c27e1646ecd9 100644
--- a/Documentation/dev-tools/kunit/usage.rst
+++ b/Documentation/dev-tools/kunit/usage.rst
@@ -121,6 +121,12 @@ there's an allocation error.
``return`` so they only work from the test function. In KUnit, we stop the
current kthread on failure, so you can call them from anywhere.
+.. note::
+ Warning: There is an exception to the above rule. You shouldn't use assertions
+ in the suite's exit() function, or in the free function for a resource. These
+ run when a test is shutting down, and an assertion here prevents further
+ cleanup code from running, potentially leading to a memory leak.
+
Customizing error messages
--------------------------
@@ -160,7 +166,12 @@ many similar tests. In order to reduce duplication in these closely related
tests, most unit testing frameworks (including KUnit) provide the concept of a
*test suite*. A test suite is a collection of test cases for a unit of code
with optional setup and teardown functions that run before/after the whole
-suite and/or every test case. For example:
+suite and/or every test case.
+
+.. note::
+ A test case will only run if it is associated with a test suite.
+
+For example:
.. code-block:: c
@@ -190,7 +201,10 @@ after everything else. ``kunit_test_suite(example_test_suite)`` registers the
test suite with the KUnit test framework.
.. note::
- A test case will only run if it is associated with a test suite.
+ The ``exit`` and ``suite_exit`` functions will run even if ``init`` or
+ ``suite_init`` fail. Make sure that they can handle any inconsistent
+ state which may result from ``init`` or ``suite_init`` encountering errors
+ or exiting early.
``kunit_test_suite(...)`` is a macro which tells the linker to put the
specified test suite in a special linker section so that it can be run by KUnit
@@ -601,6 +615,57 @@ For example:
KUNIT_ASSERT_STREQ(test, buffer, "");
}
+Registering Cleanup Actions
+---------------------------
+
+If you need to perform some cleanup beyond simple use of ``kunit_kzalloc``,
+you can register a custom "deferred action", which is a cleanup function
+run when the test exits (whether cleanly, or via a failed assertion).
+
+Actions are simple functions with no return value, and a single ``void*``
+context argument, and fulfill the same role as "cleanup" functions in Python
+and Go tests, "defer" statements in languages which support them, and
+(in some cases) destructors in RAII languages.
+
+These are very useful for unregistering things from global lists, closing
+files or other resources, or freeing resources.
+
+For example:
+
+.. code-block:: C
+
+ static void cleanup_device(void *ctx)
+ {
+ struct device *dev = (struct device *)ctx;
+
+ device_unregister(dev);
+ }
+
+ void example_device_test(struct kunit *test)
+ {
+ struct my_device dev;
+
+ device_register(&dev);
+
+ kunit_add_action(test, &cleanup_device, &dev);
+ }
+
+Note that, for functions like device_unregister which only accept a single
+pointer-sized argument, it's possible to directly cast that function to
+a ``kunit_action_t`` rather than writing a wrapper function, for example:
+
+.. code-block:: C
+
+ kunit_add_action(test, (kunit_action_t *)&device_unregister, &dev);
+
+``kunit_add_action`` can fail if, for example, the system is out of memory.
+You can use ``kunit_add_action_or_reset`` instead which runs the action
+immediately if it cannot be deferred.
+
+If you need more control over when the cleanup function is called, you
+can trigger it early using ``kunit_release_action``, or cancel it entirely
+with ``kunit_remove_action``.
+
Testing Static Functions
------------------------
diff --git a/Documentation/devicetree/bindings/arm/xen.txt b/Documentation/devicetree/bindings/arm/xen.txt
index 61d77acbeb5e..f925290d4641 100644
--- a/Documentation/devicetree/bindings/arm/xen.txt
+++ b/Documentation/devicetree/bindings/arm/xen.txt
@@ -56,7 +56,7 @@ hypervisor {
};
The format and meaning of the "xen,uefi-*" parameters are similar to those in
-Documentation/arm/uefi.rst, which are provided by the regular UEFI stub. However
+Documentation/arch/arm/uefi.rst, which are provided by the regular UEFI stub. However
they differ because they are provided by the Xen hypervisor, together with a set
of UEFI runtime services implemented via hypercalls, see
http://xenbits.xen.org/docs/unstable/hypercall/x86_64/include,public,platform.h.html.
diff --git a/Documentation/devicetree/bindings/cpu/idle-states.yaml b/Documentation/devicetree/bindings/cpu/idle-states.yaml
index b8cc826c9501..b3a5356f9916 100644
--- a/Documentation/devicetree/bindings/cpu/idle-states.yaml
+++ b/Documentation/devicetree/bindings/cpu/idle-states.yaml
@@ -259,7 +259,7 @@ description: |+
http://infocenter.arm.com/help/index.jsp
[5] ARM Linux Kernel documentation - Booting AArch64 Linux
- Documentation/arm64/booting.rst
+ Documentation/arch/arm64/booting.rst
[6] RISC-V Linux Kernel documentation - CPUs bindings
Documentation/devicetree/bindings/riscv/cpus.yaml
diff --git a/Documentation/devicetree/bindings/firmware/qcom,scm.yaml b/Documentation/devicetree/bindings/firmware/qcom,scm.yaml
index 367d04ad1923..83381f3a1341 100644
--- a/Documentation/devicetree/bindings/firmware/qcom,scm.yaml
+++ b/Documentation/devicetree/bindings/firmware/qcom,scm.yaml
@@ -71,6 +71,8 @@ properties:
minItems: 1
maxItems: 3
+ dma-coherent: true
+
interconnects:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/i2c/opencores,i2c-ocores.yaml b/Documentation/devicetree/bindings/i2c/opencores,i2c-ocores.yaml
index 85d9efb743ee..d9ef86729011 100644
--- a/Documentation/devicetree/bindings/i2c/opencores,i2c-ocores.yaml
+++ b/Documentation/devicetree/bindings/i2c/opencores,i2c-ocores.yaml
@@ -60,6 +60,7 @@ properties:
default: 0
regstep:
+ $ref: /schemas/types.yaml#/definitions/uint32
description: |
deprecated, use reg-shift above
deprecated: true
diff --git a/Documentation/devicetree/bindings/interrupt-controller/loongson,eiointc.yaml b/Documentation/devicetree/bindings/interrupt-controller/loongson,eiointc.yaml
new file mode 100644
index 000000000000..393c128a41d8
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/loongson,eiointc.yaml
@@ -0,0 +1,59 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/interrupt-controller/loongson,eiointc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Loongson Extended I/O Interrupt Controller
+
+maintainers:
+ - Binbin Zhou <zhoubinbin@loongson.cn>
+
+description: |
+ This interrupt controller is found on the Loongson-3 family chips and
+ Loongson-2K series chips and is used to distribute interrupts directly to
+ individual cores without forwarding them through the HT's interrupt line.
+
+allOf:
+ - $ref: /schemas/interrupt-controller.yaml#
+
+properties:
+ compatible:
+ enum:
+ - loongson,ls2k0500-eiointc
+ - loongson,ls2k2000-eiointc
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ interrupt-controller: true
+
+ '#interrupt-cells':
+ const: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - interrupt-controller
+ - '#interrupt-cells'
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ eiointc: interrupt-controller@1fe11600 {
+ compatible = "loongson,ls2k0500-eiointc";
+ reg = <0x1fe10000 0x10000>;
+
+ interrupt-controller;
+ #interrupt-cells = <1>;
+
+ interrupt-parent = <&cpuintc>;
+ interrupts = <3>;
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/memory-controllers/nuvoton,npcm-memory-controller.yaml b/Documentation/devicetree/bindings/memory-controllers/nuvoton,npcm-memory-controller.yaml
new file mode 100644
index 000000000000..ac1a5a17749d
--- /dev/null
+++ b/Documentation/devicetree/bindings/memory-controllers/nuvoton,npcm-memory-controller.yaml
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/memory-controllers/nuvoton,npcm-memory-controller.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Nuvoton NPCM Memory Controller
+
+maintainers:
+ - Marvin Lin <kflin@nuvoton.com>
+ - Stanley Chu <yschu@nuvoton.com>
+
+description: |
+ The Nuvoton BMC SoC supports DDR4 memory with or without ECC (error correction
+ check).
+
+ The memory controller supports single bit error correction, double bit error
+ detection (in-line ECC in which a section (1/8th) of the memory device used to
+ store data is used for ECC storage).
+
+ Note, the bootloader must configure ECC mode for the memory controller.
+
+properties:
+ compatible:
+ enum:
+ - nuvoton,npcm750-memory-controller
+ - nuvoton,npcm845-memory-controller
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ mc: memory-controller@f0824000 {
+ compatible = "nuvoton,npcm750-memory-controller";
+ reg = <0xf0824000 0x1000>;
+ interrupts = <GIC_SPI 25 IRQ_TYPE_LEVEL_HIGH>;
+ };
diff --git a/Documentation/devicetree/bindings/mfd/rockchip,rk806.yaml b/Documentation/devicetree/bindings/mfd/rockchip,rk806.yaml
new file mode 100644
index 000000000000..cf2500f2e9a0
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/rockchip,rk806.yaml
@@ -0,0 +1,406 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mfd/rockchip,rk806.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: RK806 Power Management Integrated Circuit
+
+maintainers:
+ - Sebastian Reichel <sebastian.reichel@collabora.com>
+
+description:
+ Rockchip RK806 series PMIC. This device consists of an spi or
+ i2c controlled MFD that includes multiple switchable regulators.
+
+properties:
+ compatible:
+ enum:
+ - rockchip,rk806
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ gpio-controller: true
+
+ '#gpio-cells':
+ const: 2
+
+ vcc1-supply:
+ description:
+ The input supply for dcdc-reg1.
+
+ vcc2-supply:
+ description:
+ The input supply for dcdc-reg2.
+
+ vcc3-supply:
+ description:
+ The input supply for dcdc-reg3.
+
+ vcc4-supply:
+ description:
+ The input supply for dcdc-reg4.
+
+ vcc5-supply:
+ description:
+ The input supply for dcdc-reg5.
+
+ vcc6-supply:
+ description:
+ The input supply for dcdc-reg6.
+
+ vcc7-supply:
+ description:
+ The input supply for dcdc-reg7.
+
+ vcc8-supply:
+ description:
+ The input supply for dcdc-reg8.
+
+ vcc9-supply:
+ description:
+ The input supply for dcdc-reg9.
+
+ vcc10-supply:
+ description:
+ The input supply for dcdc-reg10.
+
+ vcc11-supply:
+ description:
+ The input supply for pldo-reg1, pldo-reg2 and pldo-reg3.
+
+ vcc12-supply:
+ description:
+ The input supply for pldo-reg4 and pldo-reg5.
+
+ vcc13-supply:
+ description:
+ The input supply for nldo-reg1, nldo-reg2 and nldo-reg3.
+
+ vcc14-supply:
+ description:
+ The input supply for nldo-reg4 and nldo-reg5.
+
+ vcca-supply:
+ description:
+ The input supply for pldo-reg6.
+
+ regulators:
+ type: object
+ additionalProperties: false
+ patternProperties:
+ "^(dcdc-reg([1-9]|10)|pldo-reg[1-6]|nldo-reg[1-5])$":
+ type: object
+ $ref: /schemas/regulator/regulator.yaml#
+ unevaluatedProperties: false
+
+patternProperties:
+ '-pins$':
+ type: object
+ additionalProperties: false
+ $ref: /schemas/pinctrl/pinmux-node.yaml
+
+ properties:
+ function:
+ enum: [pin_fun0, pin_fun1, pin_fun2, pin_fun3, pin_fun4, pin_fun5]
+
+ pins:
+ $ref: /schemas/types.yaml#/definitions/string
+ enum: [gpio_pwrctrl1, gpio_pwrctrl2, gpio_pwrctrl3]
+
+allOf:
+ - $ref: /schemas/spi/spi-peripheral-props.yaml
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/pinctrl/rockchip.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/gpio/gpio.h>
+ spi {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ pmic@0 {
+ compatible = "rockchip,rk806";
+ reg = <0x0>;
+
+ interrupts = <7 IRQ_TYPE_LEVEL_LOW>;
+
+ vcc1-supply = <&vcc5v0_sys>;
+ vcc2-supply = <&vcc5v0_sys>;
+ vcc3-supply = <&vcc5v0_sys>;
+ vcc4-supply = <&vcc5v0_sys>;
+ vcc5-supply = <&vcc5v0_sys>;
+ vcc6-supply = <&vcc5v0_sys>;
+ vcc7-supply = <&vcc5v0_sys>;
+ vcc8-supply = <&vcc5v0_sys>;
+ vcc9-supply = <&vcc5v0_sys>;
+ vcc10-supply = <&vcc5v0_sys>;
+ vcc11-supply = <&vcc_2v0_pldo_s3>;
+ vcc12-supply = <&vcc5v0_sys>;
+ vcc13-supply = <&vcc5v0_sys>;
+ vcc14-supply = <&vcc_1v1_nldo_s3>;
+ vcca-supply = <&vcc5v0_sys>;
+
+ regulators {
+ vdd_gpu_s0: dcdc-reg1 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <550000>;
+ regulator-max-microvolt = <950000>;
+ regulator-ramp-delay = <12500>;
+ regulator-name = "vdd_gpu_s0";
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vdd_npu_s0: dcdc-reg2 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <550000>;
+ regulator-max-microvolt = <950000>;
+ regulator-ramp-delay = <12500>;
+ regulator-name = "vdd_npu_s0";
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vdd_log_s0: dcdc-reg3 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <750000>;
+ regulator-max-microvolt = <750000>;
+ regulator-ramp-delay = <12500>;
+ regulator-name = "vdd_log_s0";
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ regulator-suspend-microvolt = <750000>;
+ };
+ };
+
+ vdd_vdenc_s0: dcdc-reg4 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <550000>;
+ regulator-max-microvolt = <950000>;
+ regulator-ramp-delay = <12500>;
+ regulator-name = "vdd_vdenc_s0";
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vdd_gpu_mem_s0: dcdc-reg5 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <675000>;
+ regulator-max-microvolt = <950000>;
+ regulator-ramp-delay = <12500>;
+ regulator-name = "vdd_gpu_mem_s0";
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vdd_npu_mem_s0: dcdc-reg6 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <675000>;
+ regulator-max-microvolt = <950000>;
+ regulator-ramp-delay = <12500>;
+ regulator-name = "vdd_npu_mem_s0";
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vcc_2v0_pldo_s3: dcdc-reg7 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <2000000>;
+ regulator-max-microvolt = <2000000>;
+ regulator-ramp-delay = <12500>;
+ regulator-name = "vdd_2v0_pldo_s3";
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ regulator-suspend-microvolt = <2000000>;
+ };
+ };
+
+ vdd_vdenc_mem_s0: dcdc-reg8 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <675000>;
+ regulator-max-microvolt = <950000>;
+ regulator-ramp-delay = <12500>;
+ regulator-name = "vdd_vdenc_mem_s0";
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vdd2_ddr_s3: dcdc-reg9 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-name = "vdd2_ddr_s3";
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ };
+ };
+
+ vcc_1v1_nldo_s3: dcdc-reg10 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1100000>;
+ regulator-max-microvolt = <1100000>;
+ regulator-ramp-delay = <12500>;
+ regulator-name = "vcc_1v1_nldo_s3";
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ regulator-suspend-microvolt = <1100000>;
+ };
+ };
+
+ avcc_1v8_s0: pldo-reg1 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-ramp-delay = <12500>;
+ regulator-name = "avcc_1v8_s0";
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vdd1_1v8_ddr_s3: pldo-reg2 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-ramp-delay = <12500>;
+ regulator-name = "vdd1_1v8_ddr_s3";
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ regulator-suspend-microvolt = <1800000>;
+ };
+ };
+
+ vcc_1v8_s3: pldo-reg3 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-ramp-delay = <12500>;
+ regulator-name = "vcc_1v8_s3";
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ regulator-suspend-microvolt = <1800000>;
+ };
+ };
+
+ vcc_3v3_s0: pldo-reg4 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-ramp-delay = <12500>;
+ regulator-name = "vcc_3v3_s0";
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vccio_sd_s0: pldo-reg5 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-ramp-delay = <12500>;
+ regulator-name = "vccio_sd_s0";
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ master_pldo6_s3: pldo-reg6 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-name = "master_pldo6_s3";
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ regulator-suspend-microvolt = <1800000>;
+ };
+ };
+
+ vdd_0v75_s3: nldo-reg1 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <750000>;
+ regulator-max-microvolt = <750000>;
+ regulator-ramp-delay = <12500>;
+ regulator-name = "vdd_0v75_s3";
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ regulator-suspend-microvolt = <750000>;
+ };
+ };
+
+ vdd2l_0v9_ddr_s3: nldo-reg2 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <900000>;
+ regulator-max-microvolt = <900000>;
+ regulator-name = "vdd2l_0v9_ddr_s3";
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ regulator-suspend-microvolt = <900000>;
+ };
+ };
+
+ master_nldo3: nldo-reg3 {
+ regulator-name = "master_nldo3";
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ avdd_0v75_s0: nldo-reg4 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <750000>;
+ regulator-max-microvolt = <750000>;
+ regulator-name = "avdd_0v75_s0";
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+
+ vdd_0v85_s0: nldo-reg5 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <850000>;
+ regulator-max-microvolt = <850000>;
+ regulator-name = "vdd_0v85_s0";
+ regulator-state-mem {
+ regulator-off-in-suspend;
+ };
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/mmc/arm,pl18x.yaml b/Documentation/devicetree/bindings/mmc/arm,pl18x.yaml
index 1c96da04f0e5..2459a55ed540 100644
--- a/Documentation/devicetree/bindings/mmc/arm,pl18x.yaml
+++ b/Documentation/devicetree/bindings/mmc/arm,pl18x.yaml
@@ -53,10 +53,11 @@ properties:
items:
- const: arm,pl18x
- const: arm,primecell
- - description: Entry for STMicroelectronics variant of PL18x.
- This dedicated compatible is used by bootloaders.
+ - description: Entries for STMicroelectronics variant of PL18x.
items:
- - const: st,stm32-sdmmc2
+ - enum:
+ - st,stm32-sdmmc2
+ - st,stm32mp25-sdmmc2
- const: arm,pl18x
- const: arm,primecell
diff --git a/Documentation/devicetree/bindings/mmc/brcm,bcm2835-sdhost.txt b/Documentation/devicetree/bindings/mmc/brcm,bcm2835-sdhost.txt
deleted file mode 100644
index d876580ae3b8..000000000000
--- a/Documentation/devicetree/bindings/mmc/brcm,bcm2835-sdhost.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-Broadcom BCM2835 SDHOST controller
-
-This file documents differences between the core properties described
-by mmc.txt and the properties that represent the BCM2835 controller.
-
-Required properties:
-- compatible: Should be "brcm,bcm2835-sdhost".
-- clocks: The clock feeding the SDHOST controller.
-
-Optional properties:
-- dmas: DMA channel for read and write.
- See Documentation/devicetree/bindings/dma/dma.txt for details
-
-Example:
-
-sdhost: mmc@7e202000 {
- compatible = "brcm,bcm2835-sdhost";
- reg = <0x7e202000 0x100>;
- interrupts = <2 24>;
- clocks = <&clocks BCM2835_CLOCK_VPU>;
- dmas = <&dma 13>;
- dma-names = "rx-tx";
-};
diff --git a/Documentation/devicetree/bindings/mmc/brcm,bcm2835-sdhost.yaml b/Documentation/devicetree/bindings/mmc/brcm,bcm2835-sdhost.yaml
new file mode 100644
index 000000000000..3a5a44800675
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/brcm,bcm2835-sdhost.yaml
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mmc/brcm,bcm2835-sdhost.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom BCM2835 SDHOST controller
+
+maintainers:
+ - Stefan Wahren <stefan.wahren@i2se.com>
+
+allOf:
+ - $ref: mmc-controller.yaml
+
+properties:
+ compatible:
+ const: brcm,bcm2835-sdhost
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ dmas:
+ maxItems: 1
+
+ dma-names:
+ const: rx-tx
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/bcm2835.h>
+
+ sdhost: mmc@7e202000 {
+ compatible = "brcm,bcm2835-sdhost";
+ reg = <0x7e202000 0x100>;
+ interrupts = <2 24>;
+ clocks = <&clocks BCM2835_CLOCK_VPU>;
+ dmas = <&dma 13>;
+ dma-names = "rx-tx";
+ bus-width = <4>;
+ };
diff --git a/Documentation/devicetree/bindings/mmc/brcm,kona-sdhci.txt b/Documentation/devicetree/bindings/mmc/brcm,kona-sdhci.txt
deleted file mode 100644
index 7f5dd83f5bd9..000000000000
--- a/Documentation/devicetree/bindings/mmc/brcm,kona-sdhci.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-Broadcom BCM281xx SDHCI
-
-This file documents differences between the core properties in mmc.txt
-and the properties present in the bcm281xx SDHCI
-
-Required properties:
-- compatible : Should be "brcm,kona-sdhci"
-- DEPRECATED: compatible : Should be "bcm,kona-sdhci"
-- clocks: phandle + clock specifier pair of the external clock
-
-Refer to clocks/clock-bindings.txt for generic clock consumer properties.
-
-Example:
-
-sdio2: sdio@3f1a0000 {
- compatible = "brcm,kona-sdhci";
- reg = <0x3f1a0000 0x10000>;
- clocks = <&sdio3_clk>;
- interrupts = <0x0 74 0x4>;
-};
-
diff --git a/Documentation/devicetree/bindings/mmc/brcm,kona-sdhci.yaml b/Documentation/devicetree/bindings/mmc/brcm,kona-sdhci.yaml
new file mode 100644
index 000000000000..12eb3988f824
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/brcm,kona-sdhci.yaml
@@ -0,0 +1,48 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mmc/brcm,kona-sdhci.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom Kona family SDHCI controller
+
+maintainers:
+ - Florian Fainelli <f.fainelli@gmail.com>
+
+allOf:
+ - $ref: sdhci-common.yaml#
+
+properties:
+ compatible:
+ const: brcm,kona-sdhci
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - interrupts
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/clock/bcm281xx.h>
+
+ mmc@3f1a0000 {
+ compatible = "brcm,kona-sdhci";
+ reg = <0x3f1a0000 0x10000>;
+ clocks = <&master_ccu BCM281XX_MASTER_CCU_SDIO3>;
+ interrupts = <GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.yaml b/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.yaml
index fbfd822b9270..82eb7a24c857 100644
--- a/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.yaml
+++ b/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.yaml
@@ -42,6 +42,7 @@ properties:
- enum:
- fsl,imx6sll-usdhc
- fsl,imx6ull-usdhc
+ - fsl,imx6ul-usdhc
- const: fsl,imx6sx-usdhc
- items:
- const: fsl,imx7d-usdhc
diff --git a/Documentation/devicetree/bindings/mmc/sdhci-msm.yaml b/Documentation/devicetree/bindings/mmc/sdhci-msm.yaml
index 4f2d9e8127dd..6da28e630577 100644
--- a/Documentation/devicetree/bindings/mmc/sdhci-msm.yaml
+++ b/Documentation/devicetree/bindings/mmc/sdhci-msm.yaml
@@ -36,11 +36,14 @@ properties:
- enum:
- qcom,ipq5018-sdhci
- qcom,ipq5332-sdhci
+ - qcom,ipq6018-sdhci
- qcom,ipq9574-sdhci
- qcom,qcm2290-sdhci
- qcom,qcs404-sdhci
+ - qcom,qdu1000-sdhci
- qcom,sc7180-sdhci
- qcom,sc7280-sdhci
+ - qcom,sc8280xp-sdhci
- qcom,sdm630-sdhci
- qcom,sdm670-sdhci
- qcom,sdm845-sdhci
diff --git a/Documentation/devicetree/bindings/mtd/allwinner,sun4i-a10-nand.yaml b/Documentation/devicetree/bindings/mtd/allwinner,sun4i-a10-nand.yaml
index 9a88870cd865..054b6b8bf9b9 100644
--- a/Documentation/devicetree/bindings/mtd/allwinner,sun4i-a10-nand.yaml
+++ b/Documentation/devicetree/bindings/mtd/allwinner,sun4i-a10-nand.yaml
@@ -49,13 +49,12 @@ properties:
patternProperties:
"^nand@[a-f0-9]$":
type: object
+ $ref: raw-nand-chip.yaml
properties:
reg:
minimum: 0
maximum: 7
- nand-ecc-mode: true
-
nand-ecc-algo:
const: bch
@@ -75,7 +74,7 @@ patternProperties:
minimum: 0
maximum: 1
- additionalProperties: false
+ unevaluatedProperties: false
required:
- compatible
diff --git a/Documentation/devicetree/bindings/mtd/amlogic,meson-nand.yaml b/Documentation/devicetree/bindings/mtd/amlogic,meson-nand.yaml
index 28fb9a7dd70f..787ef488dd5b 100644
--- a/Documentation/devicetree/bindings/mtd/amlogic,meson-nand.yaml
+++ b/Documentation/devicetree/bindings/mtd/amlogic,meson-nand.yaml
@@ -40,6 +40,7 @@ properties:
patternProperties:
"^nand@[0-7]$":
type: object
+ $ref: raw-nand-chip.yaml
properties:
reg:
minimum: 0
@@ -58,6 +59,14 @@ patternProperties:
meson-gxl-nfc 8, 16, 24, 30, 40, 50, 60
meson-axg-nfc 8
+ nand-rb:
+ maxItems: 1
+ items:
+ maximum: 0
+
+ unevaluatedProperties: false
+
+
required:
- compatible
- reg
@@ -87,6 +96,7 @@ examples:
nand@0 {
reg = <0>;
+ nand-rb = <0>;
};
};
diff --git a/Documentation/devicetree/bindings/mtd/brcm,brcmnand.yaml b/Documentation/devicetree/bindings/mtd/brcm,brcmnand.yaml
index 1571024aa119..f57e96374e67 100644
--- a/Documentation/devicetree/bindings/mtd/brcm,brcmnand.yaml
+++ b/Documentation/devicetree/bindings/mtd/brcm,brcmnand.yaml
@@ -114,6 +114,7 @@ properties:
patternProperties:
"^nand@[a-f0-9]$":
type: object
+ $ref: raw-nand-chip.yaml
properties:
compatible:
const: brcm,nandcs
@@ -136,6 +137,8 @@ patternProperties:
layout.
$ref: /schemas/types.yaml#/definitions/uint32
+ unevaluatedProperties: false
+
allOf:
- $ref: nand-controller.yaml#
- if:
diff --git a/Documentation/devicetree/bindings/mtd/denali,nand.yaml b/Documentation/devicetree/bindings/mtd/denali,nand.yaml
index 0be83ad42970..81f95538d415 100644
--- a/Documentation/devicetree/bindings/mtd/denali,nand.yaml
+++ b/Documentation/devicetree/bindings/mtd/denali,nand.yaml
@@ -63,6 +63,12 @@ properties:
minItems: 1
maxItems: 2
+patternProperties:
+ "^nand@[a-f0-9]$":
+ type: object
+ $ref: raw-nand-chip.yaml
+ unevaluatedProperties: false
+
allOf:
- $ref: nand-controller.yaml
@@ -74,7 +80,6 @@ allOf:
then:
patternProperties:
"^nand@[a-f0-9]$":
- type: object
properties:
nand-ecc-strength:
enum:
@@ -92,7 +97,6 @@ allOf:
then:
patternProperties:
"^nand@[a-f0-9]$":
- type: object
properties:
nand-ecc-strength:
enum:
@@ -111,7 +115,6 @@ allOf:
then:
patternProperties:
"^nand@[a-f0-9]$":
- type: object
properties:
nand-ecc-strength:
enum:
diff --git a/Documentation/devicetree/bindings/mtd/ingenic,nand.yaml b/Documentation/devicetree/bindings/mtd/ingenic,nand.yaml
index a7bdb5d3675c..b9312ebefeb9 100644
--- a/Documentation/devicetree/bindings/mtd/ingenic,nand.yaml
+++ b/Documentation/devicetree/bindings/mtd/ingenic,nand.yaml
@@ -39,7 +39,9 @@ properties:
patternProperties:
"^nand@[a-f0-9]$":
type: object
+ $ref: raw-nand-chip.yaml
properties:
+
rb-gpios:
description: GPIO specifier for the busy pin.
maxItems: 1
@@ -48,6 +50,8 @@ patternProperties:
description: GPIO specifier for the write-protect pin.
maxItems: 1
+ unevaluatedProperties: false
+
required:
- compatible
- reg
diff --git a/Documentation/devicetree/bindings/mtd/intel,lgm-ebunand.yaml b/Documentation/devicetree/bindings/mtd/intel,lgm-ebunand.yaml
index cc3def758e00..07bc7e3efd3a 100644
--- a/Documentation/devicetree/bindings/mtd/intel,lgm-ebunand.yaml
+++ b/Documentation/devicetree/bindings/mtd/intel,lgm-ebunand.yaml
@@ -42,17 +42,16 @@ properties:
patternProperties:
"^nand@[a-f0-9]$":
type: object
+ $ref: raw-nand-chip.yaml
properties:
reg:
minimum: 0
maximum: 1
- nand-ecc-mode: true
-
nand-ecc-algo:
const: hw
- additionalProperties: false
+ unevaluatedProperties: false
required:
- compatible
diff --git a/Documentation/devicetree/bindings/mtd/marvell,nand-controller.yaml b/Documentation/devicetree/bindings/mtd/marvell,nand-controller.yaml
new file mode 100644
index 000000000000..a10729bb1840
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/marvell,nand-controller.yaml
@@ -0,0 +1,226 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mtd/marvell,nand-controller.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell NAND Flash Controller (NFC)
+
+maintainers:
+ - Miquel Raynal <miquel.raynal@bootlin.com>
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - const: marvell,armada-8k-nand-controller
+ - const: marvell,armada370-nand-controller
+ - enum:
+ - marvell,armada370-nand-controller
+ - marvell,pxa3xx-nand-controller
+ - description: legacy bindings
+ deprecated: true
+ enum:
+ - marvell,armada-8k-nand
+ - marvell,armada370-nand
+ - marvell,pxa3xx-nand
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ description:
+ Shall reference the NAND controller clocks, the second one is
+ is only needed for the Armada 7K/8K SoCs
+ minItems: 1
+ maxItems: 2
+
+ clock-names:
+ minItems: 1
+ items:
+ - const: core
+ - const: reg
+
+ dmas:
+ maxItems: 1
+
+ dma-names:
+ items:
+ - const: data
+
+ marvell,system-controller:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description: Syscon node that handles NAND controller related registers
+
+patternProperties:
+ "^nand@[a-f0-9]$":
+ type: object
+ $ref: raw-nand-chip.yaml
+
+ properties:
+ reg:
+ minimum: 0
+ maximum: 3
+
+ nand-rb:
+ items:
+ - minimum: 0
+ maximum: 1
+
+ nand-ecc-step-size:
+ const: 512
+
+ nand-ecc-strength:
+ enum: [1, 4, 8, 12, 16]
+
+ nand-ecc-mode:
+ const: hw
+
+ marvell,nand-keep-config:
+ $ref: /schemas/types.yaml#/definitions/flag
+ description:
+ Orders the driver not to take the timings from the core and
+ leaving them completely untouched. Bootloader timings will then
+ be used.
+
+ marvell,nand-enable-arbiter:
+ $ref: /schemas/types.yaml#/definitions/flag
+ description:
+ To enable the arbiter, all boards blindly used it,
+ this bit was set by the bootloader for many boards and even if
+ it is marked reserved in several datasheets, it might be needed to set
+ it (otherwise it is harmless).
+ deprecated: true
+
+ required:
+ - reg
+ - nand-rb
+
+ unevaluatedProperties: false
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+
+allOf:
+ - $ref: nand-controller.yaml#
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: marvell,pxa3xx-nand-controller
+ then:
+ required:
+ - dmas
+ - dma-names
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: marvell,armada-8k-nand-controller
+ then:
+ properties:
+ clocks:
+ minItems: 2
+
+ clock-names:
+ minItems: 2
+
+ required:
+ - marvell,system-controller
+
+ else:
+ properties:
+ clocks:
+ minItems: 1
+
+ clock-names:
+ minItems: 1
+
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ nand_controller: nand-controller@d0000 {
+ compatible = "marvell,armada370-nand-controller";
+ reg = <0xd0000 0x54>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ interrupts = <GIC_SPI 84 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&coredivclk 0>;
+
+ nand@0 {
+ reg = <0>;
+ label = "main-storage";
+ nand-rb = <0>;
+ nand-ecc-mode = "hw";
+ marvell,nand-keep-config;
+ nand-on-flash-bbt;
+ nand-ecc-strength = <4>;
+ nand-ecc-step-size = <512>;
+
+ partitions {
+ compatible = "fixed-partitions";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ partition@0 {
+ label = "Rootfs";
+ reg = <0x00000000 0x40000000>;
+ };
+ };
+ };
+ };
+
+ - |
+ cp0_nand_controller: nand-controller@720000 {
+ compatible = "marvell,armada-8k-nand-controller",
+ "marvell,armada370-nand-controller";
+ reg = <0x720000 0x54>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ interrupts = <115 IRQ_TYPE_LEVEL_HIGH>;
+ clock-names = "core", "reg";
+ clocks = <&cp0_clk 1 2>,
+ <&cp0_clk 1 17>;
+ marvell,system-controller = <&cp0_syscon0>;
+
+ nand@0 {
+ reg = <0>;
+ label = "main-storage";
+ nand-rb = <0>;
+ nand-ecc-mode = "hw";
+ nand-ecc-strength = <8>;
+ nand-ecc-step-size = <512>;
+ };
+ };
+
+ - |
+ nand-controller@43100000 {
+ compatible = "marvell,pxa3xx-nand-controller";
+ reg = <0x43100000 90>;
+ interrupts = <45>;
+ clocks = <&clks 1>;
+ clock-names = "core";
+ dmas = <&pdma 97 3>;
+ dma-names = "data";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ nand@0 {
+ reg = <0>;
+ nand-rb = <0>;
+ nand-ecc-mode = "hw";
+ marvell,nand-keep-config;
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/mtd/marvell-nand.txt b/Documentation/devicetree/bindings/mtd/marvell-nand.txt
deleted file mode 100644
index a2d9a0f2b683..000000000000
--- a/Documentation/devicetree/bindings/mtd/marvell-nand.txt
+++ /dev/null
@@ -1,126 +0,0 @@
-Marvell NAND Flash Controller (NFC)
-
-Required properties:
-- compatible: can be one of the following:
- * "marvell,armada-8k-nand-controller"
- * "marvell,armada370-nand-controller"
- * "marvell,pxa3xx-nand-controller"
- * "marvell,armada-8k-nand" (deprecated)
- * "marvell,armada370-nand" (deprecated)
- * "marvell,pxa3xx-nand" (deprecated)
- Compatibles marked deprecated support only the old bindings described
- at the bottom.
-- reg: NAND flash controller memory area.
-- #address-cells: shall be set to 1. Encode the NAND CS.
-- #size-cells: shall be set to 0.
-- interrupts: shall define the NAND controller interrupt.
-- clocks: shall reference the NAND controller clocks, the second one is
- is only needed for the Armada 7K/8K SoCs
-- clock-names: mandatory if there is a second clock, in this case there
- should be one clock named "core" and another one named "reg"
-- marvell,system-controller: Set to retrieve the syscon node that handles
- NAND controller related registers (only required with the
- "marvell,armada-8k-nand[-controller]" compatibles).
-
-Optional properties:
-- label: see partition.txt. New platforms shall omit this property.
-- dmas: shall reference DMA channel associated to the NAND controller.
- This property is only used with "marvell,pxa3xx-nand[-controller]"
- compatible strings.
-- dma-names: shall be "rxtx".
- This property is only used with "marvell,pxa3xx-nand[-controller]"
- compatible strings.
-
-Optional children nodes:
-Children nodes represent the available NAND chips.
-
-Required properties:
-- reg: shall contain the native Chip Select ids (0-3).
-- nand-rb: see nand-controller.yaml (0-1).
-
-Optional properties:
-- marvell,nand-keep-config: orders the driver not to take the timings
- from the core and leaving them completely untouched. Bootloader
- timings will then be used.
-- label: MTD name.
-- nand-on-flash-bbt: see nand-controller.yaml.
-- nand-ecc-mode: see nand-controller.yaml. Will use hardware ECC if not specified.
-- nand-ecc-algo: see nand-controller.yaml. This property is essentially useful when
- not using hardware ECC. Howerver, it may be added when using hardware
- ECC for clarification but will be ignored by the driver because ECC
- mode is chosen depending on the page size and the strength required by
- the NAND chip. This value may be overwritten with nand-ecc-strength
- property.
-- nand-ecc-strength: see nand-controller.yaml.
-- nand-ecc-step-size: see nand-controller.yaml. Marvell's NAND flash controller does
- use fixed strength (1-bit for Hamming, 16-bit for BCH), so the actual
- step size will shrink or grow in order to fit the required strength.
- Step sizes are not completely random for all and follow certain
- patterns described in AN-379, "Marvell SoC NFC ECC".
-
-See Documentation/devicetree/bindings/mtd/nand-controller.yaml for more details on
-generic bindings.
-
-
-Example:
-nand_controller: nand-controller@d0000 {
- compatible = "marvell,armada370-nand-controller";
- reg = <0xd0000 0x54>;
- #address-cells = <1>;
- #size-cells = <0>;
- interrupts = <GIC_SPI 84 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&coredivclk 0>;
-
- nand@0 {
- reg = <0>;
- label = "main-storage";
- nand-rb = <0>;
- nand-ecc-mode = "hw";
- marvell,nand-keep-config;
- nand-on-flash-bbt;
- nand-ecc-strength = <4>;
- nand-ecc-step-size = <512>;
-
- partitions {
- compatible = "fixed-partitions";
- #address-cells = <1>;
- #size-cells = <1>;
-
- partition@0 {
- label = "Rootfs";
- reg = <0x00000000 0x40000000>;
- };
- };
- };
-};
-
-
-Note on legacy bindings: One can find, in not-updated device trees,
-bindings slightly different than described above with other properties
-described below as well as the partitions node at the root of a so
-called "nand" node (without clear controller/chip separation).
-
-Legacy properties:
-- marvell,nand-enable-arbiter: To enable the arbiter, all boards blindly
- used it, this bit was set by the bootloader for many boards and even if
- it is marked reserved in several datasheets, it might be needed to set
- it (otherwise it is harmless) so whether or not this property is set,
- the bit is selected by the driver.
-- num-cs: Number of chip-select lines to use, all boards blindly set 1
- to this and for a reason, other values would have failed. The value of
- this property is ignored.
-
-Example:
-
- nand0: nand@43100000 {
- compatible = "marvell,pxa3xx-nand";
- reg = <0x43100000 90>;
- interrupts = <45>;
- dmas = <&pdma 97 0>;
- dma-names = "rxtx";
- #address-cells = <1>;
- marvell,nand-keep-config;
- marvell,nand-enable-arbiter;
- num-cs = <1>;
- /* Partitions (optional) */
- };
diff --git a/Documentation/devicetree/bindings/mtd/mediatek,mtk-nfc.yaml b/Documentation/devicetree/bindings/mtd/mediatek,mtk-nfc.yaml
index a6e7f123eda7..ab503a33a269 100644
--- a/Documentation/devicetree/bindings/mtd/mediatek,mtk-nfc.yaml
+++ b/Documentation/devicetree/bindings/mtd/mediatek,mtk-nfc.yaml
@@ -40,12 +40,11 @@ properties:
patternProperties:
"^nand@[a-f0-9]$":
- $ref: nand-chip.yaml#
+ $ref: raw-nand-chip.yaml#
unevaluatedProperties: false
properties:
reg:
maximum: 1
- nand-on-flash-bbt: true
nand-ecc-mode:
const: hw
diff --git a/Documentation/devicetree/bindings/mtd/mtd.yaml b/Documentation/devicetree/bindings/mtd/mtd.yaml
index da3d488c335f..b82ca03e969c 100644
--- a/Documentation/devicetree/bindings/mtd/mtd.yaml
+++ b/Documentation/devicetree/bindings/mtd/mtd.yaml
@@ -12,7 +12,7 @@ maintainers:
properties:
$nodename:
- pattern: "^(flash|.*sram)(@.*)?$"
+ pattern: "^(flash|.*sram|nand)(@.*)?$"
label:
description:
diff --git a/Documentation/devicetree/bindings/mtd/nand-controller.yaml b/Documentation/devicetree/bindings/mtd/nand-controller.yaml
index f70a32d2d9d4..83a4fe4cc29d 100644
--- a/Documentation/devicetree/bindings/mtd/nand-controller.yaml
+++ b/Documentation/devicetree/bindings/mtd/nand-controller.yaml
@@ -16,16 +16,6 @@ description: |
children nodes of the NAND controller. This representation should be
enforced even for simple controllers supporting only one chip.
- The ECC strength and ECC step size properties define the user
- desires in terms of correction capability of a controller. Together,
- they request the ECC engine to correct {strength} bit errors per
- {size} bytes.
-
- The interpretation of these parameters is implementation-defined, so
- not all implementations must support all possible
- combinations. However, implementations are encouraged to further
- specify the value(s) they support.
-
properties:
$nodename:
pattern: "^nand-controller(@.*)?"
@@ -51,79 +41,8 @@ properties:
patternProperties:
"^nand@[a-f0-9]$":
- $ref: nand-chip.yaml#
-
- properties:
- reg:
- description:
- Contains the chip-select IDs.
-
- nand-ecc-placement:
- description:
- Location of the ECC bytes. This location is unknown by default
- but can be explicitly set to "oob", if all ECC bytes are
- known to be stored in the OOB area, or "interleaved" if ECC
- bytes will be interleaved with regular data in the main area.
- $ref: /schemas/types.yaml#/definitions/string
- enum: [ oob, interleaved ]
-
- nand-bus-width:
- description:
- Bus width to the NAND chip
- $ref: /schemas/types.yaml#/definitions/uint32
- enum: [8, 16]
- default: 8
-
- nand-on-flash-bbt:
- description:
- With this property, the OS will search the device for a Bad
- Block Table (BBT). If not found, it will create one, reserve
- a few blocks at the end of the device to store it and update
- it as the device ages. Otherwise, the out-of-band area of a
- few pages of all the blocks will be scanned at boot time to
- find Bad Block Markers (BBM). These markers will help to
- build a volatile BBT in RAM.
- $ref: /schemas/types.yaml#/definitions/flag
-
- nand-ecc-maximize:
- description:
- Whether or not the ECC strength should be maximized. The
- maximum ECC strength is both controller and chip
- dependent. The ECC engine has to select the ECC config
- providing the best strength and taking the OOB area size
- constraint into account. This is particularly useful when
- only the in-band area is used by the upper layers, and you
- want to make your NAND as reliable as possible.
- $ref: /schemas/types.yaml#/definitions/flag
-
- nand-is-boot-medium:
- description:
- Whether or not the NAND chip is a boot medium. Drivers might
- use this information to select ECC algorithms supported by
- the boot ROM or similar restrictions.
- $ref: /schemas/types.yaml#/definitions/flag
-
- nand-rb:
- description:
- Contains the native Ready/Busy IDs.
- $ref: /schemas/types.yaml#/definitions/uint32-array
-
- rb-gpios:
- description:
- Contains one or more GPIO descriptor (the numper of descriptor
- depends on the number of R/B pins exposed by the flash) for the
- Ready/Busy pins. Active state refers to the NAND ready state and
- should be set to GPIOD_ACTIVE_HIGH unless the signal is inverted.
-
- wp-gpios:
- description:
- Contains one GPIO descriptor for the Write Protect pin.
- Active state refers to the NAND Write Protect state and should be
- set to GPIOD_ACTIVE_LOW unless the signal is inverted.
- maxItems: 1
-
- required:
- - reg
+ type: object
+ $ref: raw-nand-chip.yaml#
required:
- "#address-cells"
diff --git a/Documentation/devicetree/bindings/mtd/partitions/partition.yaml b/Documentation/devicetree/bindings/mtd/partitions/partition.yaml
index cdffbb9cedc2..1ebe9e2347ea 100644
--- a/Documentation/devicetree/bindings/mtd/partitions/partition.yaml
+++ b/Documentation/devicetree/bindings/mtd/partitions/partition.yaml
@@ -55,6 +55,7 @@ properties:
linux,rootfs:
description: Marks partition that contains root filesystem to mount and boot
user space from
+ type: boolean
if:
not:
diff --git a/Documentation/devicetree/bindings/mtd/partitions/partitions.yaml b/Documentation/devicetree/bindings/mtd/partitions/partitions.yaml
index 2edc65e0e361..1dda2c80747b 100644
--- a/Documentation/devicetree/bindings/mtd/partitions/partitions.yaml
+++ b/Documentation/devicetree/bindings/mtd/partitions/partitions.yaml
@@ -21,6 +21,7 @@ oneOf:
- $ref: linksys,ns-partitions.yaml
- $ref: qcom,smem-part.yaml
- $ref: redboot-fis.yaml
+ - $ref: tplink,safeloader-partitions.yaml
properties:
compatible: true
diff --git a/Documentation/devicetree/bindings/mtd/qcom,nandc.yaml b/Documentation/devicetree/bindings/mtd/qcom,nandc.yaml
index 00c991ffa6c4..4ada60fbf81d 100644
--- a/Documentation/devicetree/bindings/mtd/qcom,nandc.yaml
+++ b/Documentation/devicetree/bindings/mtd/qcom,nandc.yaml
@@ -34,7 +34,9 @@ properties:
patternProperties:
"^nand@[a-f0-9]$":
type: object
+ $ref: raw-nand-chip.yaml
properties:
+
nand-bus-width:
const: 8
@@ -45,6 +47,24 @@ patternProperties:
enum:
- 512
+ qcom,boot-partitions:
+ $ref: /schemas/types.yaml#/definitions/uint32-matrix
+ items:
+ items:
+ - description: offset
+ - description: size
+ description:
+ Boot partition use a different layout where the 4 bytes of spare
+ data are not protected by ECC. Use this to declare these special
+ partitions by defining first the offset and then the size.
+
+ It's in the form of <offset1 size1 offset2 size2 offset3 ...>
+ and should be declared in ascending order.
+
+ Refer to the ipq8064 example on how to use this special binding.
+
+ unevaluatedProperties: false
+
allOf:
- $ref: nand-controller.yaml#
@@ -107,22 +127,15 @@ allOf:
- qcom,ipq806x-nand
then:
- properties:
- qcom,boot-partitions:
- $ref: /schemas/types.yaml#/definitions/uint32-matrix
- items:
- items:
- - description: offset
- - description: size
- description:
- Boot partition use a different layout where the 4 bytes of spare
- data are not protected by ECC. Use this to declare these special
- partitions by defining first the offset and then the size.
-
- It's in the form of <offset1 size1 offset2 size2 offset3 ...>
- and should be declared in ascending order.
-
- Refer to the ipq8064 example on how to use this special binding.
+ patternProperties:
+ "^nand@[a-f0-9]$":
+ properties:
+ qcom,boot-partitions: true
+ else:
+ patternProperties:
+ "^nand@[a-f0-9]$":
+ properties:
+ qcom,boot-partitions: false
required:
- compatible
diff --git a/Documentation/devicetree/bindings/mtd/raw-nand-chip.yaml b/Documentation/devicetree/bindings/mtd/raw-nand-chip.yaml
new file mode 100644
index 000000000000..092448d7bfc5
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/raw-nand-chip.yaml
@@ -0,0 +1,111 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mtd/raw-nand-chip.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Raw NAND Chip Common Properties
+
+maintainers:
+ - Miquel Raynal <miquel.raynal@bootlin.com>
+
+allOf:
+ - $ref: nand-chip.yaml#
+
+description: |
+ The ECC strength and ECC step size properties define the user
+ desires in terms of correction capability of a controller. Together,
+ they request the ECC engine to correct {strength} bit errors per
+ {size} bytes for a particular raw NAND chip.
+
+ The interpretation of these parameters is implementation-defined, so
+ not all implementations must support all possible
+ combinations. However, implementations are encouraged to further
+ specify the value(s) they support.
+
+properties:
+ $nodename:
+ pattern: "^nand@[a-f0-9]$"
+
+ reg:
+ description:
+ Contains the chip-select IDs.
+
+ nand-ecc-placement:
+ description:
+ Location of the ECC bytes. This location is unknown by default
+ but can be explicitly set to "oob", if all ECC bytes are
+ known to be stored in the OOB area, or "interleaved" if ECC
+ bytes will be interleaved with regular data in the main area.
+ $ref: /schemas/types.yaml#/definitions/string
+ enum: [ oob, interleaved ]
+ deprecated: true
+
+ nand-ecc-mode:
+ description:
+ Legacy ECC configuration mixing the ECC engine choice and
+ configuration.
+ $ref: /schemas/types.yaml#/definitions/string
+ enum: [none, soft, soft_bch, hw, hw_syndrome, on-die]
+ deprecated: true
+
+ nand-bus-width:
+ description:
+ Bus width to the NAND chip
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [8, 16]
+ default: 8
+
+ nand-on-flash-bbt:
+ description:
+ With this property, the OS will search the device for a Bad
+ Block Table (BBT). If not found, it will create one, reserve
+ a few blocks at the end of the device to store it and update
+ it as the device ages. Otherwise, the out-of-band area of a
+ few pages of all the blocks will be scanned at boot time to
+ find Bad Block Markers (BBM). These markers will help to
+ build a volatile BBT in RAM.
+ $ref: /schemas/types.yaml#/definitions/flag
+
+ nand-ecc-maximize:
+ description:
+ Whether or not the ECC strength should be maximized. The
+ maximum ECC strength is both controller and chip
+ dependent. The ECC engine has to select the ECC config
+ providing the best strength and taking the OOB area size
+ constraint into account. This is particularly useful when
+ only the in-band area is used by the upper layers, and you
+ want to make your NAND as reliable as possible.
+ $ref: /schemas/types.yaml#/definitions/flag
+
+ nand-is-boot-medium:
+ description:
+ Whether or not the NAND chip is a boot medium. Drivers might
+ use this information to select ECC algorithms supported by
+ the boot ROM or similar restrictions.
+ $ref: /schemas/types.yaml#/definitions/flag
+
+ nand-rb:
+ description:
+ Contains the native Ready/Busy IDs.
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+
+ rb-gpios:
+ description:
+ Contains one or more GPIO descriptor (the numper of descriptor
+ depends on the number of R/B pins exposed by the flash) for the
+ Ready/Busy pins. Active state refers to the NAND ready state and
+ should be set to GPIOD_ACTIVE_HIGH unless the signal is inverted.
+
+ wp-gpios:
+ description:
+ Contains one GPIO descriptor for the Write Protect pin.
+ Active state refers to the NAND Write Protect state and should be
+ set to GPIOD_ACTIVE_LOW unless the signal is inverted.
+ maxItems: 1
+
+required:
+ - reg
+
+# This is a generic file other binding inherit from and extend
+additionalProperties: true
diff --git a/Documentation/devicetree/bindings/mtd/rockchip,nand-controller.yaml b/Documentation/devicetree/bindings/mtd/rockchip,nand-controller.yaml
index 7eb1d0a38565..ee53715ffdca 100644
--- a/Documentation/devicetree/bindings/mtd/rockchip,nand-controller.yaml
+++ b/Documentation/devicetree/bindings/mtd/rockchip,nand-controller.yaml
@@ -57,6 +57,7 @@ properties:
patternProperties:
"^nand@[0-7]$":
type: object
+ $ref: raw-nand-chip.yaml
properties:
reg:
minimum: 0
@@ -116,6 +117,8 @@ patternProperties:
Only used in combination with 'nand-is-boot-medium'.
+ unevaluatedProperties: false
+
required:
- compatible
- reg
diff --git a/Documentation/devicetree/bindings/mtd/st,stm32-fmc2-nand.yaml b/Documentation/devicetree/bindings/mtd/st,stm32-fmc2-nand.yaml
index 986e85ccebc7..e72cb5bacaf0 100644
--- a/Documentation/devicetree/bindings/mtd/st,stm32-fmc2-nand.yaml
+++ b/Documentation/devicetree/bindings/mtd/st,stm32-fmc2-nand.yaml
@@ -37,6 +37,7 @@ properties:
patternProperties:
"^nand@[a-f0-9]$":
type: object
+ $ref: raw-nand-chip.yaml
properties:
nand-ecc-step-size:
const: 512
@@ -44,6 +45,8 @@ patternProperties:
nand-ecc-strength:
enum: [1, 4, 8]
+ unevaluatedProperties: false
+
allOf:
- $ref: nand-controller.yaml#
diff --git a/Documentation/devicetree/bindings/mtd/ti,am654-hbmc.yaml b/Documentation/devicetree/bindings/mtd/ti,am654-hbmc.yaml
index 4774c92e7fc4..df4fdc02456d 100644
--- a/Documentation/devicetree/bindings/mtd/ti,am654-hbmc.yaml
+++ b/Documentation/devicetree/bindings/mtd/ti,am654-hbmc.yaml
@@ -30,6 +30,8 @@ properties:
patternProperties:
"^flash@[0-1],[0-9a-f]+$":
type: object
+ $ref: mtd-physmap.yaml
+ unevaluatedProperties: false
required:
- compatible
diff --git a/Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.yaml b/Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.yaml
index 3bd912ed7c7e..23e92be33ac8 100644
--- a/Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.yaml
+++ b/Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: Allwinner A20 GMAC
allOf:
- - $ref: "snps,dwmac.yaml#"
+ - $ref: snps,dwmac.yaml#
maintainers:
- Chen-Yu Tsai <wens@csie.org>
diff --git a/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml b/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml
index 47bc2057e629..4bfac9186886 100644
--- a/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml
+++ b/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml
@@ -63,7 +63,7 @@ required:
- syscon
allOf:
- - $ref: "snps,dwmac.yaml#"
+ - $ref: snps,dwmac.yaml#
- if:
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/net/altr,tse.yaml b/Documentation/devicetree/bindings/net/altr,tse.yaml
index 9d02af468906..f5d3b70af07a 100644
--- a/Documentation/devicetree/bindings/net/altr,tse.yaml
+++ b/Documentation/devicetree/bindings/net/altr,tse.yaml
@@ -72,8 +72,8 @@ allOf:
compatible:
contains:
enum:
- - const: altr,tse-1.0
- - const: ALTR,tse-1.0
+ - altr,tse-1.0
+ - ALTR,tse-1.0
then:
properties:
reg:
diff --git a/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml b/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml
index a2c51a84efa5..ee7a65b528cd 100644
--- a/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml
@@ -27,7 +27,7 @@ select:
- compatible
allOf:
- - $ref: "snps,dwmac.yaml#"
+ - $ref: snps,dwmac.yaml#
- if:
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/net/bluetooth/qualcomm-bluetooth.yaml b/Documentation/devicetree/bindings/net/bluetooth/qualcomm-bluetooth.yaml
index 68f78b90d23a..604985c8068e 100644
--- a/Documentation/devicetree/bindings/net/bluetooth/qualcomm-bluetooth.yaml
+++ b/Documentation/devicetree/bindings/net/bluetooth/qualcomm-bluetooth.yaml
@@ -50,6 +50,9 @@ properties:
vddch0-supply:
description: VDD_CH0 supply regulator handle
+ vddch1-supply:
+ description: VDD_CH1 supply regulator handle
+
vddaon-supply:
description: VDD_AON supply regulator handle
diff --git a/Documentation/devicetree/bindings/net/brcm,bcmgenet.yaml b/Documentation/devicetree/bindings/net/brcm,bcmgenet.yaml
index 0e5e5db32faf..7c90a4390531 100644
--- a/Documentation/devicetree/bindings/net/brcm,bcmgenet.yaml
+++ b/Documentation/devicetree/bindings/net/brcm,bcmgenet.yaml
@@ -55,7 +55,7 @@ properties:
patternProperties:
"^mdio@[0-9a-f]+$":
type: object
- $ref: "brcm,unimac-mdio.yaml"
+ $ref: brcm,unimac-mdio.yaml
description:
GENET internal UniMAC MDIO bus
diff --git a/Documentation/devicetree/bindings/net/cdns,macb.yaml b/Documentation/devicetree/bindings/net/cdns,macb.yaml
index bef5e0f895be..bf8894a0257e 100644
--- a/Documentation/devicetree/bindings/net/cdns,macb.yaml
+++ b/Documentation/devicetree/bindings/net/cdns,macb.yaml
@@ -109,6 +109,16 @@ properties:
power-domains:
maxItems: 1
+ cdns,rx-watermark:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ When the receive partial store and forward mode is activated,
+ the receiver will only begin to forward the packet to the external
+ AHB or AXI slave when enough packet data is stored in the SRAM packet buffer.
+ rx-watermark corresponds to the number of SRAM buffer locations,
+ that need to be filled, before the forwarding process is activated.
+ Width of the SRAM is platform dependent, and can be 4, 8 or 16 bytes.
+
'#address-cells':
const: 1
@@ -166,6 +176,7 @@ examples:
compatible = "cdns,macb";
reg = <0xfffc4000 0x4000>;
interrupts = <21>;
+ cdns,rx-watermark = <0x44>;
phy-mode = "rmii";
local-mac-address = [3a 0e 03 04 05 06];
clock-names = "pclk", "hclk", "tx_clk";
diff --git a/Documentation/devicetree/bindings/net/dsa/marvell.txt b/Documentation/devicetree/bindings/net/dsa/marvell.txt
index 2363b412410c..33726134f5c9 100644
--- a/Documentation/devicetree/bindings/net/dsa/marvell.txt
+++ b/Documentation/devicetree/bindings/net/dsa/marvell.txt
@@ -20,7 +20,7 @@ which is at a different MDIO base address in different switch families.
6171, 6172, 6175, 6176, 6185, 6240, 6320, 6321,
6341, 6350, 6351, 6352
- "marvell,mv88e6190" : Switch has base address 0x00. Use with models:
- 6190, 6190X, 6191, 6290, 6390, 6390X
+ 6163, 6190, 6190X, 6191, 6290, 6390, 6390X
- "marvell,mv88e6250" : Switch has base address 0x08 or 0x18. Use with model:
6220, 6250
diff --git a/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml b/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml
index 9a64ed658745..4d5f5cc6d031 100644
--- a/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml
+++ b/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml
@@ -12,10 +12,6 @@ description:
cs_sck_delay of 500ns. Ensuring that this SPI timing requirement is observed
depends on the SPI bus master driver.
-allOf:
- - $ref: dsa.yaml#/$defs/ethernet-ports
- - $ref: /schemas/spi/spi-peripheral-props.yaml#
-
maintainers:
- Vladimir Oltean <vladimir.oltean@nxp.com>
@@ -36,6 +32,9 @@ properties:
reg:
maxItems: 1
+ spi-cpha: true
+ spi-cpol: true
+
# Optional container node for the 2 internal MDIO buses of the SJA1110
# (one for the internal 100base-T1 PHYs and the other for the single
# 100base-TX PHY). The "reg" property does not have physical significance.
@@ -109,6 +108,30 @@ $defs:
1860, 1880, 1900, 1920, 1940, 1960, 1980, 2000, 2020, 2040, 2060, 2080,
2100, 2120, 2140, 2160, 2180, 2200, 2220, 2240, 2260]
+allOf:
+ - $ref: dsa.yaml#/$defs/ethernet-ports
+ - $ref: /schemas/spi/spi-peripheral-props.yaml#
+ - if:
+ properties:
+ compatible:
+ enum:
+ - nxp,sja1105e
+ - nxp,sja1105p
+ - nxp,sja1105q
+ - nxp,sja1105r
+ - nxp,sja1105s
+ - nxp,sja1105t
+ then:
+ properties:
+ spi-cpol: false
+ required:
+ - spi-cpha
+ else:
+ properties:
+ spi-cpha: false
+ required:
+ - spi-cpol
+
unevaluatedProperties: false
examples:
@@ -120,6 +143,7 @@ examples:
ethernet-switch@1 {
reg = <0x1>;
compatible = "nxp,sja1105t";
+ spi-cpha;
ethernet-ports {
#address-cells = <1>;
diff --git a/Documentation/devicetree/bindings/net/ethernet-phy.yaml b/Documentation/devicetree/bindings/net/ethernet-phy.yaml
index 4f574532ee13..c1241c8a3b77 100644
--- a/Documentation/devicetree/bindings/net/ethernet-phy.yaml
+++ b/Documentation/devicetree/bindings/net/ethernet-phy.yaml
@@ -93,6 +93,12 @@ properties:
the turn around line low at end of the control phase of the
MDIO transaction.
+ clocks:
+ maxItems: 1
+ description:
+ External clock connected to the PHY. If not specified it is assumed
+ that the PHY uses a fixed crystal or an internal oscillator.
+
enet-phy-lane-swap:
$ref: /schemas/types.yaml#/definitions/flag
description:
diff --git a/Documentation/devicetree/bindings/net/intel,dwmac-plat.yaml b/Documentation/devicetree/bindings/net/intel,dwmac-plat.yaml
index d23fa3771210..42a0bc94312c 100644
--- a/Documentation/devicetree/bindings/net/intel,dwmac-plat.yaml
+++ b/Documentation/devicetree/bindings/net/intel,dwmac-plat.yaml
@@ -19,7 +19,7 @@ select:
- compatible
allOf:
- - $ref: "snps,dwmac.yaml#"
+ - $ref: snps,dwmac.yaml#
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/net/maxlinear,gpy2xx.yaml b/Documentation/devicetree/bindings/net/maxlinear,gpy2xx.yaml
index d71fa9de2b64..8a3713abd1ca 100644
--- a/Documentation/devicetree/bindings/net/maxlinear,gpy2xx.yaml
+++ b/Documentation/devicetree/bindings/net/maxlinear,gpy2xx.yaml
@@ -17,11 +17,12 @@ properties:
maxlinear,use-broken-interrupts:
description: |
Interrupts are broken on some GPY2xx PHYs in that they keep the
- interrupt line asserted even after the interrupt status register is
- cleared. Thus it is blocking the interrupt line which is usually bad
- for shared lines. By default interrupts are disabled for this PHY and
- polling mode is used. If one can live with the consequences, this
- property can be used to enable interrupt handling.
+ interrupt line asserted for a random amount of time even after the
+ interrupt status register is cleared. Thus it is blocking the
+ interrupt line which is usually bad for shared lines. By default,
+ interrupts are disabled for this PHY and polling mode is used. If one
+ can live with the consequences, this property can be used to enable
+ interrupt handling.
Affected PHYs (as far as known) are GPY215B and GPY215C.
type: boolean
diff --git a/Documentation/devicetree/bindings/net/mediatek-dwmac.yaml b/Documentation/devicetree/bindings/net/mediatek-dwmac.yaml
index 0fa2132fa4f4..08d74ca0769c 100644
--- a/Documentation/devicetree/bindings/net/mediatek-dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/mediatek-dwmac.yaml
@@ -25,7 +25,7 @@ select:
- compatible
allOf:
- - $ref: "snps,dwmac.yaml#"
+ - $ref: snps,dwmac.yaml#
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/net/micrel,ks8851.yaml b/Documentation/devicetree/bindings/net/micrel,ks8851.yaml
index b44d83554ef5..b726c6e14633 100644
--- a/Documentation/devicetree/bindings/net/micrel,ks8851.yaml
+++ b/Documentation/devicetree/bindings/net/micrel,ks8851.yaml
@@ -44,13 +44,13 @@ required:
allOf:
- $ref: ethernet-controller.yaml#
- - $ref: /schemas/memory-controllers/mc-peripheral-props.yaml#
- if:
properties:
compatible:
contains:
const: micrel,ks8851
then:
+ $ref: /schemas/spi/spi-peripheral-props.yaml#
properties:
reg:
maxItems: 1
@@ -60,6 +60,7 @@ allOf:
contains:
const: micrel,ks8851-mll
then:
+ $ref: /schemas/memory-controllers/mc-peripheral-props.yaml#
properties:
reg:
minItems: 2
diff --git a/Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml b/Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml
index 63409cbff5ad..4c01cae7c93a 100644
--- a/Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml
+++ b/Documentation/devicetree/bindings/net/nxp,dwmac-imx.yaml
@@ -24,7 +24,7 @@ select:
- compatible
allOf:
- - $ref: "snps,dwmac.yaml#"
+ - $ref: snps,dwmac.yaml#
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/net/pse-pd/pse-controller.yaml b/Documentation/devicetree/bindings/net/pse-pd/pse-controller.yaml
index b110abb42597..2d382faca0e6 100644
--- a/Documentation/devicetree/bindings/net/pse-pd/pse-controller.yaml
+++ b/Documentation/devicetree/bindings/net/pse-pd/pse-controller.yaml
@@ -16,7 +16,7 @@ maintainers:
properties:
$nodename:
- pattern: "^ethernet-pse(@.*)?$"
+ pattern: "^ethernet-pse(@.*|-([0-9]|[1-9][0-9]+))?$"
"#pse-cells":
description:
diff --git a/Documentation/devicetree/bindings/net/qcom,ethqos.yaml b/Documentation/devicetree/bindings/net/qcom,ethqos.yaml
index 60a38044fb19..7bdb412a0185 100644
--- a/Documentation/devicetree/bindings/net/qcom,ethqos.yaml
+++ b/Documentation/devicetree/bindings/net/qcom,ethqos.yaml
@@ -20,6 +20,7 @@ properties:
compatible:
enum:
- qcom,qcs404-ethqos
+ - qcom,sa8775p-ethqos
- qcom,sc8280xp-ethqos
- qcom,sm8150-ethqos
@@ -32,11 +33,13 @@ properties:
- const: rgmii
interrupts:
+ minItems: 1
items:
- description: Combined signal for various interrupt events
- description: The interrupt that occurs when Rx exits the LPI state
interrupt-names:
+ minItems: 1
items:
- const: macirq
- const: eth_lpi
@@ -49,11 +52,18 @@ properties:
- const: stmmaceth
- const: pclk
- const: ptp_ref
- - const: rgmii
+ - enum:
+ - rgmii
+ - phyaux
iommus:
maxItems: 1
+ phys: true
+
+ phy-names:
+ const: serdes
+
required:
- compatible
- clocks
diff --git a/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml b/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml
index 2a21bbe02892..176ea5f90251 100644
--- a/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml
@@ -32,7 +32,7 @@ select:
- compatible
allOf:
- - $ref: "snps,dwmac.yaml#"
+ - $ref: snps,dwmac.yaml#
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
index 363b3e3ea3a6..ddf9522a5dc2 100644
--- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
@@ -67,6 +67,7 @@ properties:
- loongson,ls2k-dwmac
- loongson,ls7a-dwmac
- qcom,qcs404-ethqos
+ - qcom,sa8775p-ethqos
- qcom,sc8280xp-ethqos
- qcom,sm8150-ethqos
- renesas,r9a06g032-gmac
@@ -582,6 +583,7 @@ allOf:
- ingenic,x1600-mac
- ingenic,x1830-mac
- ingenic,x2000-mac
+ - qcom,sa8775p-ethqos
- qcom,sc8280xp-ethqos
- snps,dwmac-3.50a
- snps,dwmac-4.10a
@@ -638,6 +640,7 @@ allOf:
- ingenic,x1830-mac
- ingenic,x2000-mac
- qcom,qcs404-ethqos
+ - qcom,sa8775p-ethqos
- qcom,sc8280xp-ethqos
- qcom,sm8150-ethqos
- snps,dwmac-4.00
diff --git a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
index 395a4650e285..c9c25132d154 100644
--- a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
+++ b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
@@ -168,14 +168,14 @@ properties:
patternProperties:
"^mdio@[0-9a-f]+$":
type: object
- $ref: "ti,davinci-mdio.yaml#"
+ $ref: ti,davinci-mdio.yaml#
description:
CPSW MDIO bus.
"^cpts@[0-9a-f]+":
type: object
- $ref: "ti,k3-am654-cpts.yaml#"
+ $ref: ti,k3-am654-cpts.yaml#
description:
CPSW Common Platform Time Sync (CPTS) module.
diff --git a/Documentation/devicetree/bindings/net/toshiba,visconti-dwmac.yaml b/Documentation/devicetree/bindings/net/toshiba,visconti-dwmac.yaml
index 474fa8bcf302..052f636158b3 100644
--- a/Documentation/devicetree/bindings/net/toshiba,visconti-dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/toshiba,visconti-dwmac.yaml
@@ -19,7 +19,7 @@ select:
- compatible
allOf:
- - $ref: "snps,dwmac.yaml#"
+ - $ref: snps,dwmac.yaml#
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.yaml b/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.yaml
index c85ed330426d..7758a55dd328 100644
--- a/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.yaml
+++ b/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.yaml
@@ -84,6 +84,8 @@ properties:
required:
- iommus
+ ieee80211-freq-limit: true
+
qcom,ath10k-calibration-data:
$ref: /schemas/types.yaml#/definitions/uint8-array
description:
@@ -164,6 +166,7 @@ required:
additionalProperties: false
allOf:
+ - $ref: ieee80211.yaml#
- if:
properties:
compatible:
@@ -355,4 +358,5 @@ examples:
"msi14",
"msi15",
"legacy";
+ ieee80211-freq-limit = <5470000 5875000>;
};
diff --git a/Documentation/devicetree/bindings/net/xilinx_axienet.txt b/Documentation/devicetree/bindings/net/xilinx_axienet.txt
deleted file mode 100644
index 80e505a2fda1..000000000000
--- a/Documentation/devicetree/bindings/net/xilinx_axienet.txt
+++ /dev/null
@@ -1,101 +0,0 @@
-XILINX AXI ETHERNET Device Tree Bindings
---------------------------------------------------------
-
-Also called AXI 1G/2.5G Ethernet Subsystem, the xilinx axi ethernet IP core
-provides connectivity to an external ethernet PHY supporting different
-interfaces: MII, GMII, RGMII, SGMII, 1000BaseX. It also includes two
-segments of memory for buffering TX and RX, as well as the capability of
-offloading TX/RX checksum calculation off the processor.
-
-Management configuration is done through the AXI interface, while payload is
-sent and received through means of an AXI DMA controller. This driver
-includes the DMA driver code, so this driver is incompatible with AXI DMA
-driver.
-
-For more details about mdio please refer phy.txt file in the same directory.
-
-Required properties:
-- compatible : Must be one of "xlnx,axi-ethernet-1.00.a",
- "xlnx,axi-ethernet-1.01.a", "xlnx,axi-ethernet-2.01.a"
-- reg : Address and length of the IO space, as well as the address
- and length of the AXI DMA controller IO space, unless
- axistream-connected is specified, in which case the reg
- attribute of the node referenced by it is used.
-- interrupts : Should be a list of 2 or 3 interrupts: TX DMA, RX DMA,
- and optionally Ethernet core. If axistream-connected is
- specified, the TX/RX DMA interrupts should be on that node
- instead, and only the Ethernet core interrupt is optionally
- specified here.
-- phy-handle : Should point to the external phy device if exists. Pointing
- this to the PCS/PMA PHY is deprecated and should be avoided.
- See ethernet.txt file in the same directory.
-- xlnx,rxmem : Set to allocated memory buffer for Rx/Tx in the hardware
-
-Optional properties:
-- phy-mode : See ethernet.txt
-- xlnx,phy-type : Deprecated, do not use, but still accepted in preference
- to phy-mode.
-- xlnx,txcsum : 0 or empty for disabling TX checksum offload,
- 1 to enable partial TX checksum offload,
- 2 to enable full TX checksum offload
-- xlnx,rxcsum : Same values as xlnx,txcsum but for RX checksum offload
-- xlnx,switch-x-sgmii : Boolean to indicate the Ethernet core is configured to
- support both 1000BaseX and SGMII modes. If set, the phy-mode
- should be set to match the mode selected on core reset (i.e.
- by the basex_or_sgmii core input line).
-- clock-names: Tuple listing input clock names. Possible clocks:
- s_axi_lite_clk: Clock for AXI register slave interface
- axis_clk: AXI4-Stream clock for TXD RXD TXC and RXS interfaces
- ref_clk: Ethernet reference clock, used by signal delay
- primitives and transceivers
- mgt_clk: MGT reference clock (used by optional internal
- PCS/PMA PHY)
-
- Note that if s_axi_lite_clk is not specified by name, the
- first clock of any name is used for this. If that is also not
- specified, the clock rate is auto-detected from the CPU clock
- (but only on platforms where this is possible). New device
- trees should specify all applicable clocks by name - the
- fallbacks to an unnamed clock or to CPU clock are only for
- backward compatibility.
-- clocks: Phandles to input clocks matching clock-names. Refer to common
- clock bindings.
-- axistream-connected: Reference to another node which contains the resources
- for the AXI DMA controller used by this device.
- If this is specified, the DMA-related resources from that
- device (DMA registers and DMA TX/RX interrupts) rather
- than this one will be used.
- - mdio : Child node for MDIO bus. Must be defined if PHY access is
- required through the core's MDIO interface (i.e. always,
- unless the PHY is accessed through a different bus).
- Non-standard MDIO bus frequency is supported via
- "clock-frequency", see mdio.yaml.
-
- - pcs-handle: Phandle to the internal PCS/PMA PHY in SGMII or 1000Base-X
- modes, where "pcs-handle" should be used to point
- to the PCS/PMA PHY, and "phy-handle" should point to an
- external PHY if exists.
-
-Example:
- axi_ethernet_eth: ethernet@40c00000 {
- compatible = "xlnx,axi-ethernet-1.00.a";
- device_type = "network";
- interrupt-parent = <&microblaze_0_axi_intc>;
- interrupts = <2 0 1>;
- clock-names = "s_axi_lite_clk", "axis_clk", "ref_clk", "mgt_clk";
- clocks = <&axi_clk>, <&axi_clk>, <&pl_enet_ref_clk>, <&mgt_clk>;
- phy-mode = "mii";
- reg = <0x40c00000 0x40000 0x50c00000 0x40000>;
- xlnx,rxcsum = <0x2>;
- xlnx,rxmem = <0x800>;
- xlnx,txcsum = <0x2>;
- phy-handle = <&phy0>;
- axi_ethernetlite_0_mdio: mdio {
- #address-cells = <1>;
- #size-cells = <0>;
- phy0: phy@0 {
- device_type = "ethernet-phy";
- reg = <1>;
- };
- };
- };
diff --git a/Documentation/devicetree/bindings/net/xlnx,axi-ethernet.yaml b/Documentation/devicetree/bindings/net/xlnx,axi-ethernet.yaml
new file mode 100644
index 000000000000..1d33d80af11c
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/xlnx,axi-ethernet.yaml
@@ -0,0 +1,183 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/xlnx,axi-ethernet.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: AXI 1G/2.5G Ethernet Subsystem
+
+description: |
+ Also called AXI 1G/2.5G Ethernet Subsystem, the xilinx axi ethernet IP core
+ provides connectivity to an external ethernet PHY supporting different
+ interfaces: MII, GMII, RGMII, SGMII, 1000BaseX. It also includes two
+ segments of memory for buffering TX and RX, as well as the capability of
+ offloading TX/RX checksum calculation off the processor.
+
+ Management configuration is done through the AXI interface, while payload is
+ sent and received through means of an AXI DMA controller. This driver
+ includes the DMA driver code, so this driver is incompatible with AXI DMA
+ driver.
+
+maintainers:
+ - Radhey Shyam Pandey <radhey.shyam.pandey@xilinx.com>
+
+properties:
+ compatible:
+ enum:
+ - xlnx,axi-ethernet-1.00.a
+ - xlnx,axi-ethernet-1.01.a
+ - xlnx,axi-ethernet-2.01.a
+
+ reg:
+ description:
+ Address and length of the IO space, as well as the address
+ and length of the AXI DMA controller IO space, unless
+ axistream-connected is specified, in which case the reg
+ attribute of the node referenced by it is used.
+ maxItems: 2
+
+ interrupts:
+ items:
+ - description: Ethernet core interrupt
+ - description: Tx DMA interrupt
+ - description: Rx DMA interrupt
+ description:
+ Ethernet core interrupt is optional. If axistream-connected property is
+ present DMA node should contains TX/RX DMA interrupts else DMA interrupt
+ resources are mentioned on ethernet node.
+ minItems: 1
+
+ phy-handle: true
+
+ xlnx,rxmem:
+ description:
+ Set to allocated memory buffer for Rx/Tx in the hardware.
+ $ref: /schemas/types.yaml#/definitions/uint32
+
+ phy-mode:
+ enum:
+ - mii
+ - gmii
+ - rgmii
+ - sgmii
+ - 1000BaseX
+
+ xlnx,phy-type:
+ description:
+ Do not use, but still accepted in preference to phy-mode.
+ deprecated: true
+ $ref: /schemas/types.yaml#/definitions/uint32
+
+ xlnx,txcsum:
+ description:
+ TX checksum offload. 0 or empty for disabling TX checksum offload,
+ 1 to enable partial TX checksum offload and 2 to enable full TX
+ checksum offload.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1, 2]
+
+ xlnx,rxcsum:
+ description:
+ RX checksum offload. 0 or empty for disabling RX checksum offload,
+ 1 to enable partial RX checksum offload and 2 to enable full RX
+ checksum offload.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1, 2]
+
+ xlnx,switch-x-sgmii:
+ type: boolean
+ description:
+ Indicate the Ethernet core is configured to support both 1000BaseX and
+ SGMII modes. If set, the phy-mode should be set to match the mode
+ selected on core reset (i.e. by the basex_or_sgmii core input line).
+
+ clocks:
+ items:
+ - description: Clock for AXI register slave interface.
+ - description: AXI4-Stream clock for TXD RXD TXC and RXS interfaces.
+ - description: Ethernet reference clock, used by signal delay primitives
+ and transceivers.
+ - description: MGT reference clock (used by optional internal PCS/PMA PHY)
+
+ clock-names:
+ items:
+ - const: s_axi_lite_clk
+ - const: axis_clk
+ - const: ref_clk
+ - const: mgt_clk
+
+ axistream-connected:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description: Phandle of AXI DMA controller which contains the resources
+ used by this device. If this is specified, the DMA-related resources
+ from that device (DMA registers and DMA TX/RX interrupts) rather than
+ this one will be used.
+
+ mdio:
+ type: object
+
+ pcs-handle:
+ description: Phandle to the internal PCS/PMA PHY in SGMII or 1000Base-X
+ modes, where "pcs-handle" should be used to point to the PCS/PMA PHY,
+ and "phy-handle" should point to an external PHY if exists.
+ maxItems: 1
+
+required:
+ - compatible
+ - interrupts
+ - reg
+ - xlnx,rxmem
+ - phy-handle
+
+allOf:
+ - $ref: /schemas/net/ethernet-controller.yaml#
+
+additionalProperties: false
+
+examples:
+ - |
+ axi_ethernet_eth: ethernet@40c00000 {
+ compatible = "xlnx,axi-ethernet-1.00.a";
+ interrupts = <2 0 1>;
+ clock-names = "s_axi_lite_clk", "axis_clk", "ref_clk", "mgt_clk";
+ clocks = <&axi_clk>, <&axi_clk>, <&pl_enet_ref_clk>, <&mgt_clk>;
+ phy-mode = "mii";
+ reg = <0x40c00000 0x40000>,<0x50c00000 0x40000>;
+ xlnx,rxcsum = <0x2>;
+ xlnx,rxmem = <0x800>;
+ xlnx,txcsum = <0x2>;
+ phy-handle = <&phy0>;
+
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ phy0: ethernet-phy@1 {
+ device_type = "ethernet-phy";
+ reg = <1>;
+ };
+ };
+ };
+
+ - |
+ axi_ethernet_eth1: ethernet@40000000 {
+ compatible = "xlnx,axi-ethernet-1.00.a";
+ interrupts = <0>;
+ clock-names = "s_axi_lite_clk", "axis_clk", "ref_clk", "mgt_clk";
+ clocks = <&axi_clk>, <&axi_clk>, <&pl_enet_ref_clk>, <&mgt_clk>;
+ phy-mode = "mii";
+ reg = <0x00 0x40000000 0x00 0x40000>;
+ xlnx,rxcsum = <0x2>;
+ xlnx,rxmem = <0x800>;
+ xlnx,txcsum = <0x2>;
+ phy-handle = <&phy1>;
+ axistream-connected = <&dma>;
+
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ phy1: ethernet-phy@1 {
+ device_type = "ethernet-phy";
+ reg = <1>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/perf/fsl-imx-ddr.yaml b/Documentation/devicetree/bindings/perf/fsl-imx-ddr.yaml
index 80a92385367e..e9fad4b3de68 100644
--- a/Documentation/devicetree/bindings/perf/fsl-imx-ddr.yaml
+++ b/Documentation/devicetree/bindings/perf/fsl-imx-ddr.yaml
@@ -4,7 +4,7 @@
$id: http://devicetree.org/schemas/perf/fsl-imx-ddr.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
-title: Freescale(NXP) IMX8 DDR performance monitor
+title: Freescale(NXP) IMX8/9 DDR performance monitor
maintainers:
- Frank Li <frank.li@nxp.com>
@@ -19,6 +19,7 @@ properties:
- fsl,imx8mm-ddr-pmu
- fsl,imx8mn-ddr-pmu
- fsl,imx8mp-ddr-pmu
+ - fsl,imx93-ddr-pmu
- items:
- enum:
- fsl,imx8mm-ddr-pmu
diff --git a/Documentation/devicetree/bindings/regulator/mt6358-regulator.txt b/Documentation/devicetree/bindings/regulator/mt6358-regulator.txt
index 7034cdca54e0..b6384306db5c 100644
--- a/Documentation/devicetree/bindings/regulator/mt6358-regulator.txt
+++ b/Documentation/devicetree/bindings/regulator/mt6358-regulator.txt
@@ -8,15 +8,14 @@ Documentation/devicetree/bindings/regulator/regulator.txt.
The valid names for regulators are::
BUCK:
- buck_vdram1, buck_vcore, buck_vcore_sshub, buck_vpa, buck_vproc11,
- buck_vproc12, buck_vgpu, buck_vs2, buck_vmodem, buck_vs1
+ buck_vdram1, buck_vcore, buck_vpa, buck_vproc11, buck_vproc12, buck_vgpu,
+ buck_vs2, buck_vmodem, buck_vs1
LDO:
ldo_vdram2, ldo_vsim1, ldo_vibr, ldo_vrf12, ldo_vio18, ldo_vusb, ldo_vcamio,
ldo_vcamd, ldo_vcn18, ldo_vfe28, ldo_vsram_proc11, ldo_vcn28, ldo_vsram_others,
- ldo_vsram_others_sshub, ldo_vsram_gpu, ldo_vxo22, ldo_vefuse, ldo_vaux18,
- ldo_vmch, ldo_vbif28, ldo_vsram_proc12, ldo_vcama1, ldo_vemc, ldo_vio28, ldo_va12,
- ldo_vrf18, ldo_vcn33_bt, ldo_vcn33_wifi, ldo_vcama2, ldo_vmc, ldo_vldo28, ldo_vaud28,
- ldo_vsim2
+ ldo_vsram_gpu, ldo_vxo22, ldo_vefuse, ldo_vaux18, ldo_vmch, ldo_vbif28,
+ ldo_vsram_proc12, ldo_vcama1, ldo_vemc, ldo_vio28, ldo_va12, ldo_vrf18,
+ ldo_vcn33, ldo_vcama2, ldo_vmc, ldo_vldo28, ldo_vaud28, ldo_vsim2
Example:
@@ -305,15 +304,8 @@ Example:
regulator-enable-ramp-delay = <120>;
};
- mt6358_vcn33_bt_reg: ldo_vcn33_bt {
- regulator-name = "vcn33_bt";
- regulator-min-microvolt = <3300000>;
- regulator-max-microvolt = <3500000>;
- regulator-enable-ramp-delay = <270>;
- };
-
- mt6358_vcn33_wifi_reg: ldo_vcn33_wifi {
- regulator-name = "vcn33_wifi";
+ mt6358_vcn33_reg: ldo_vcn33 {
+ regulator-name = "vcn33";
regulator-min-microvolt = <3300000>;
regulator-max-microvolt = <3500000>;
regulator-enable-ramp-delay = <270>;
@@ -354,17 +346,5 @@ Example:
regulator-max-microvolt = <3100000>;
regulator-enable-ramp-delay = <540>;
};
-
- mt6358_vcore_sshub_reg: buck_vcore_sshub {
- regulator-name = "vcore_sshub";
- regulator-min-microvolt = <500000>;
- regulator-max-microvolt = <1293750>;
- };
-
- mt6358_vsram_others_sshub_reg: ldo_vsram_others_sshub {
- regulator-name = "vsram_others_sshub";
- regulator-min-microvolt = <500000>;
- regulator-max-microvolt = <1293750>;
- };
};
};
diff --git a/Documentation/devicetree/bindings/regulator/pfuze100.yaml b/Documentation/devicetree/bindings/regulator/pfuze100.yaml
index 67a30b23b92c..e384e4953f0a 100644
--- a/Documentation/devicetree/bindings/regulator/pfuze100.yaml
+++ b/Documentation/devicetree/bindings/regulator/pfuze100.yaml
@@ -36,6 +36,9 @@ properties:
reg:
maxItems: 1
+ interrupts:
+ maxItems: 1
+
fsl,pfuze-support-disable-sw:
$ref: /schemas/types.yaml#/definitions/flag
description: |
diff --git a/Documentation/devicetree/bindings/regulator/pwm-regulator.yaml b/Documentation/devicetree/bindings/regulator/pwm-regulator.yaml
index 7e58471097f8..80ecf938b749 100644
--- a/Documentation/devicetree/bindings/regulator/pwm-regulator.yaml
+++ b/Documentation/devicetree/bindings/regulator/pwm-regulator.yaml
@@ -64,6 +64,7 @@ properties:
defined, <100> is assumed, meaning that
pwm-dutycycle-range contains values expressed in
percent.
+ $ref: /schemas/types.yaml#/definitions/uint32
default: 100
pwm-dutycycle-range:
diff --git a/Documentation/devicetree/bindings/regulator/renesas,raa215300.yaml b/Documentation/devicetree/bindings/regulator/renesas,raa215300.yaml
new file mode 100644
index 000000000000..97cff71d2967
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/renesas,raa215300.yaml
@@ -0,0 +1,85 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/regulator/renesas,raa215300.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas RAA215300 Power Management Integrated Circuit (PMIC)
+
+maintainers:
+ - Biju Das <biju.das.jz@bp.renesas.com>
+
+description: |
+ The RAA215300 is a high-performance, low-cost 9-channel PMIC designed for
+ 32-bit and 64-bit MCU and MPU applications. It supports DDR3, DDR3L, DDR4,
+ and LPDDR4 memory power requirements. The internally compensated regulators,
+ built-in Real-Time Clock (RTC), 32kHz crystal oscillator, and coin cell
+ battery charger provide a highly integrated, small footprint power solution
+ ideal for System-On-Module (SOM) applications. A spread spectrum feature
+ provides an ease-of-use solution for noise-sensitive audio or RF applications.
+
+ This device exposes two devices via I2C. One for the integrated RTC IP, and
+ one for everything else.
+
+ Link to datasheet:
+ https://www.renesas.com/in/en/products/power-power-management/multi-channel-power-management-ics-pmics/ssdsoc-power-management-ics-pmic-and-pmus/raa215300-high-performance-9-channel-pmic-supporting-ddr-memory-built-charger-and-rtc
+
+properties:
+ compatible:
+ enum:
+ - renesas,raa215300
+
+ reg:
+ maxItems: 2
+
+ reg-names:
+ items:
+ - const: main
+ - const: rtc
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ description: |
+ The clocks are optional. The RTC is disabled, if no clocks are
+ provided(either xin or clkin).
+ maxItems: 1
+
+ clock-names:
+ description: |
+ Use xin, if connected to an external crystal.
+ Use clkin, if connected to an external clock signal.
+ enum:
+ - xin
+ - clkin
+
+required:
+ - compatible
+ - reg
+ - reg-names
+
+additionalProperties: false
+
+examples:
+ - |
+ /* 32.768kHz crystal */
+ x2: x2-clock {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <32768>;
+ };
+
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ raa215300: pmic@12 {
+ compatible = "renesas,raa215300";
+ reg = <0x12>, <0x6f>;
+ reg-names = "main", "rtc";
+
+ clocks = <&x2>;
+ clock-names = "xin";
+ };
+ };
diff --git a/Documentation/devicetree/bindings/regulator/ti,tps62870.yaml b/Documentation/devicetree/bindings/regulator/ti,tps62870.yaml
new file mode 100644
index 000000000000..386989544dac
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/ti,tps62870.yaml
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/regulator/ti,tps62870.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: TI TPS62870/TPS62871/TPS62872/TPS62873 voltage regulator
+
+maintainers:
+ - Mårten Lindahl <marten.lindahl@axis.com>
+
+allOf:
+ - $ref: regulator.yaml#
+
+properties:
+ compatible:
+ enum:
+ - ti,tps62870
+ - ti,tps62871
+ - ti,tps62872
+ - ti,tps62873
+
+ reg:
+ maxItems: 1
+
+ regulator-initial-mode:
+ enum: [ 1, 2 ]
+ description: 1 - Forced PWM mode, 2 - Low power mode
+
+required:
+ - compatible
+ - reg
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ regulator@41 {
+ compatible = "ti,tps62873";
+ reg = <0x41>;
+ regulator-name = "+0.75V";
+ regulator-min-microvolt = <400000>;
+ regulator-max-microvolt = <1675000>;
+ regulator-initial-mode = <1>;
+ };
+ };
+
+...
diff --git a/Documentation/devicetree/bindings/spi/allwinner,sun4i-a10-spi.yaml b/Documentation/devicetree/bindings/spi/allwinner,sun4i-a10-spi.yaml
index 2155478bfc4d..a6f34bdd1d3c 100644
--- a/Documentation/devicetree/bindings/spi/allwinner,sun4i-a10-spi.yaml
+++ b/Documentation/devicetree/bindings/spi/allwinner,sun4i-a10-spi.yaml
@@ -14,9 +14,6 @@ maintainers:
- Maxime Ripard <mripard@kernel.org>
properties:
- "#address-cells": true
- "#size-cells": true
-
compatible:
const: allwinner,sun4i-a10-spi
@@ -46,12 +43,9 @@ properties:
- const: rx
- const: tx
- num-cs: true
-
patternProperties:
"^.*@[0-9a-f]+":
type: object
- additionalProperties: true
properties:
reg:
items:
@@ -71,7 +65,7 @@ required:
- clocks
- clock-names
-additionalProperties: false
+unevaluatedProperties: false
examples:
- |
diff --git a/Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml b/Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml
index de36c6a34a0f..28b8ace63044 100644
--- a/Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml
+++ b/Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml
@@ -14,11 +14,9 @@ maintainers:
- Maxime Ripard <mripard@kernel.org>
properties:
- "#address-cells": true
- "#size-cells": true
-
compatible:
oneOf:
+ - const: allwinner,sun50i-r329-spi
- const: allwinner,sun6i-a31-spi
- const: allwinner,sun8i-h3-spi
- items:
@@ -28,6 +26,15 @@ properties:
- allwinner,sun50i-h616-spi
- allwinner,suniv-f1c100s-spi
- const: allwinner,sun8i-h3-spi
+ - items:
+ - enum:
+ - allwinner,sun20i-d1-spi
+ - allwinner,sun50i-r329-spi-dbi
+ - const: allwinner,sun50i-r329-spi
+ - items:
+ - const: allwinner,sun20i-d1-spi-dbi
+ - const: allwinner,sun50i-r329-spi-dbi
+ - const: allwinner,sun50i-r329-spi
reg:
maxItems: 1
@@ -58,12 +65,9 @@ properties:
- const: rx
- const: tx
- num-cs: true
-
patternProperties:
"^.*@[0-9a-f]+":
type: object
- additionalProperties: true
properties:
reg:
items:
@@ -83,7 +87,7 @@ required:
- clocks
- clock-names
-additionalProperties: false
+unevaluatedProperties: false
examples:
- |
diff --git a/Documentation/devicetree/bindings/spi/atmel,at91rm9200-spi.yaml b/Documentation/devicetree/bindings/spi/atmel,at91rm9200-spi.yaml
index 6c57dd6c3a36..58367587bfbc 100644
--- a/Documentation/devicetree/bindings/spi/atmel,at91rm9200-spi.yaml
+++ b/Documentation/devicetree/bindings/spi/atmel,at91rm9200-spi.yaml
@@ -20,6 +20,10 @@ properties:
- items:
- const: microchip,sam9x60-spi
- const: atmel,at91rm9200-spi
+ - items:
+ - const: microchip,sam9x7-spi
+ - const: microchip,sam9x60-spi
+ - const: atmel,at91rm9200-spi
reg:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/spi/cdns,qspi-nor.yaml b/Documentation/devicetree/bindings/spi/cdns,qspi-nor.yaml
index b310069762dd..4f15f9a0cc34 100644
--- a/Documentation/devicetree/bindings/spi/cdns,qspi-nor.yaml
+++ b/Documentation/devicetree/bindings/spi/cdns,qspi-nor.yaml
@@ -46,12 +46,28 @@ allOf:
maxItems: 2
items:
enum: [ qspi, qspi-ocp ]
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: amd,pensando-elba-qspi
+ then:
+ properties:
+ cdns,fifo-depth:
+ enum: [ 128, 256, 1024 ]
+ default: 1024
+ else:
+ properties:
+ cdns,fifo-depth:
+ enum: [ 128, 256 ]
+ default: 128
properties:
compatible:
oneOf:
- items:
- enum:
+ - amd,pensando-elba-qspi
- ti,k2g-qspi
- ti,am654-ospi
- intel,lgm-qspi
@@ -76,8 +92,6 @@ properties:
description:
Size of the data FIFO in words.
$ref: /schemas/types.yaml#/definitions/uint32
- enum: [ 128, 256 ]
- default: 128
cdns,fifo-width:
$ref: /schemas/types.yaml#/definitions/uint32
diff --git a/Documentation/devicetree/bindings/spi/qcom,spi-qcom-qspi.yaml b/Documentation/devicetree/bindings/spi/qcom,spi-qcom-qspi.yaml
index ee8f7ea907b0..1696ac46a660 100644
--- a/Documentation/devicetree/bindings/spi/qcom,spi-qcom-qspi.yaml
+++ b/Documentation/devicetree/bindings/spi/qcom,spi-qcom-qspi.yaml
@@ -29,6 +29,9 @@ properties:
reg:
maxItems: 1
+ iommus:
+ maxItems: 1
+
interrupts:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/spi/renesas,rzv2m-csi.yaml b/Documentation/devicetree/bindings/spi/renesas,rzv2m-csi.yaml
new file mode 100644
index 000000000000..e59183e53690
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/renesas,rzv2m-csi.yaml
@@ -0,0 +1,70 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/spi/renesas,rzv2m-csi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas RZ/V2M Clocked Serial Interface (CSI)
+
+maintainers:
+ - Fabrizio Castro <fabrizio.castro.jz@renesas.com>
+ - Geert Uytterhoeven <geert+renesas@glider.be>
+
+allOf:
+ - $ref: spi-controller.yaml#
+
+properties:
+ compatible:
+ const: renesas,rzv2m-csi
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ items:
+ - description: The clock used to generate the output clock (CSICLK)
+ - description: Internal clock to access the registers (PCLK)
+
+ clock-names:
+ items:
+ - const: csiclk
+ - const: pclk
+
+ resets:
+ maxItems: 1
+
+ power-domains:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+ - resets
+ - power-domains
+ - '#address-cells'
+ - '#size-cells'
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/clock/r9a09g011-cpg.h>
+ csi4: spi@a4020200 {
+ compatible = "renesas,rzv2m-csi";
+ reg = <0xa4020200 0x80>;
+ interrupts = <GIC_SPI 230 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&cpg CPG_MOD R9A09G011_CSI4_CLK>,
+ <&cpg CPG_MOD R9A09G011_CPERI_GRPH_PCLK>;
+ clock-names = "csiclk", "pclk";
+ resets = <&cpg R9A09G011_CSI_GPH_PRESETN>;
+ power-domains = <&cpg>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
diff --git a/Documentation/devicetree/bindings/spi/samsung,spi.yaml b/Documentation/devicetree/bindings/spi/samsung,spi.yaml
index e0a465d70b0a..79da99ca0e53 100644
--- a/Documentation/devicetree/bindings/spi/samsung,spi.yaml
+++ b/Documentation/devicetree/bindings/spi/samsung,spi.yaml
@@ -35,8 +35,6 @@ properties:
minItems: 2
maxItems: 3
- cs-gpios: true
-
dmas:
minItems: 2
maxItems: 2
diff --git a/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml b/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml
index 12ca108864c6..a47cb144b09f 100644
--- a/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml
+++ b/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml
@@ -74,6 +74,8 @@ properties:
const: intel,keembay-ssi
- description: Intel Thunder Bay SPI Controller
const: intel,thunderbay-ssi
+ - description: Intel Mount Evans Integrated Management Complex SPI Controller
+ const: intel,mountevans-imc-ssi
- description: AMD Pensando Elba SoC SPI Controller
const: amd,pensando-elba-spi
- description: Baikal-T1 SPI Controller
diff --git a/Documentation/devicetree/bindings/spi/socionext,uniphier-spi.yaml b/Documentation/devicetree/bindings/spi/socionext,uniphier-spi.yaml
index 597fc4e6b01c..c96131ebbea1 100644
--- a/Documentation/devicetree/bindings/spi/socionext,uniphier-spi.yaml
+++ b/Documentation/devicetree/bindings/spi/socionext,uniphier-spi.yaml
@@ -17,9 +17,6 @@ allOf:
- $ref: spi-controller.yaml#
properties:
- "#address-cells": true
- "#size-cells": true
-
compatible:
const: socionext,uniphier-scssi
diff --git a/Documentation/devicetree/bindings/spi/spi-controller.yaml b/Documentation/devicetree/bindings/spi/spi-controller.yaml
index 90945f59b7e8..524f6fe8c27b 100644
--- a/Documentation/devicetree/bindings/spi/spi-controller.yaml
+++ b/Documentation/devicetree/bindings/spi/spi-controller.yaml
@@ -17,7 +17,7 @@ description: |
properties:
$nodename:
- pattern: "^spi(@.*|-[0-9a-f])*$"
+ pattern: "^spi(@.*|-([0-9]|[1-9][0-9]+))?$"
"#address-cells":
enum: [0, 1]
diff --git a/Documentation/devicetree/bindings/spi/spi-zynqmp-qspi.yaml b/Documentation/devicetree/bindings/spi/spi-zynqmp-qspi.yaml
index 20f77246d365..226d8b493b57 100644
--- a/Documentation/devicetree/bindings/spi/spi-zynqmp-qspi.yaml
+++ b/Documentation/devicetree/bindings/spi/spi-zynqmp-qspi.yaml
@@ -32,6 +32,12 @@ properties:
clocks:
maxItems: 2
+ iommus:
+ maxItems: 1
+
+ power-domains:
+ maxItems: 1
+
required:
- compatible
- reg
diff --git a/Documentation/devicetree/bindings/thermal/armada-thermal.txt b/Documentation/devicetree/bindings/thermal/armada-thermal.txt
index b0bee7e42038..ab8b8fccc7af 100644
--- a/Documentation/devicetree/bindings/thermal/armada-thermal.txt
+++ b/Documentation/devicetree/bindings/thermal/armada-thermal.txt
@@ -8,6 +8,7 @@ Required properties:
* marvell,armada380-thermal
* marvell,armadaxp-thermal
* marvell,armada-ap806-thermal
+ * marvell,armada-ap807-thermal
* marvell,armada-cp110-thermal
Note: these bindings are deprecated for AP806/CP110 and should instead
diff --git a/Documentation/devicetree/bindings/thermal/brcm,bcm2835-thermal.txt b/Documentation/devicetree/bindings/thermal/brcm,bcm2835-thermal.txt
deleted file mode 100644
index a3e9ec5dc7ac..000000000000
--- a/Documentation/devicetree/bindings/thermal/brcm,bcm2835-thermal.txt
+++ /dev/null
@@ -1,41 +0,0 @@
-Binding for Thermal Sensor driver for BCM2835 SoCs.
-
-Required parameters:
--------------------
-
-compatible: should be one of: "brcm,bcm2835-thermal",
- "brcm,bcm2836-thermal" or "brcm,bcm2837-thermal"
-reg: Address range of the thermal registers.
-clocks: Phandle of the clock used by the thermal sensor.
-#thermal-sensor-cells: should be 0 (see Documentation/devicetree/bindings/thermal/thermal-sensor.yaml)
-
-Example:
-
-thermal-zones {
- cpu_thermal: cpu-thermal {
- polling-delay-passive = <0>;
- polling-delay = <1000>;
-
- thermal-sensors = <&thermal>;
-
- trips {
- cpu-crit {
- temperature = <80000>;
- hysteresis = <0>;
- type = "critical";
- };
- };
-
- coefficients = <(-538) 407000>;
-
- cooling-maps {
- };
- };
-};
-
-thermal: thermal@7e212000 {
- compatible = "brcm,bcm2835-thermal";
- reg = <0x7e212000 0x8>;
- clocks = <&clocks BCM2835_CLOCK_TSENS>;
- #thermal-sensor-cells = <0>;
-};
diff --git a/Documentation/devicetree/bindings/thermal/brcm,bcm2835-thermal.yaml b/Documentation/devicetree/bindings/thermal/brcm,bcm2835-thermal.yaml
new file mode 100644
index 000000000000..2b6026d9fbcf
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/brcm,bcm2835-thermal.yaml
@@ -0,0 +1,48 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/thermal/brcm,bcm2835-thermal.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom BCM2835 thermal sensor
+
+maintainers:
+ - Stefan Wahren <stefan.wahren@i2se.com>
+
+allOf:
+ - $ref: thermal-sensor.yaml#
+
+properties:
+ compatible:
+ enum:
+ - brcm,bcm2835-thermal
+ - brcm,bcm2836-thermal
+ - brcm,bcm2837-thermal
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ "#thermal-sensor-cells":
+ const: 0
+
+unevaluatedProperties: false
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - '#thermal-sensor-cells'
+
+examples:
+ - |
+ #include <dt-bindings/clock/bcm2835.h>
+
+ thermal@7e212000 {
+ compatible = "brcm,bcm2835-thermal";
+ reg = <0x7e212000 0x8>;
+ clocks = <&clocks BCM2835_CLOCK_TSENS>;
+ #thermal-sensor-cells = <0>;
+ };
diff --git a/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml b/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml
index d1ec963a6834..27e9e16e6455 100644
--- a/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml
+++ b/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml
@@ -29,6 +29,8 @@ properties:
items:
- enum:
- qcom,mdm9607-tsens
+ - qcom,msm8226-tsens
+ - qcom,msm8909-tsens
- qcom,msm8916-tsens
- qcom,msm8939-tsens
- qcom,msm8974-tsens
@@ -48,6 +50,7 @@ properties:
- qcom,msm8953-tsens
- qcom,msm8996-tsens
- qcom,msm8998-tsens
+ - qcom,qcm2290-tsens
- qcom,sc7180-tsens
- qcom,sc7280-tsens
- qcom,sc8180x-tsens
@@ -56,6 +59,7 @@ properties:
- qcom,sdm845-tsens
- qcom,sm6115-tsens
- qcom,sm6350-tsens
+ - qcom,sm6375-tsens
- qcom,sm8150-tsens
- qcom,sm8250-tsens
- qcom,sm8350-tsens
@@ -67,6 +71,12 @@ properties:
enum:
- qcom,ipq8074-tsens
+ - description: v2 of TSENS with combined interrupt
+ items:
+ - enum:
+ - qcom,ipq9574-tsens
+ - const: qcom,ipq8074-tsens
+
reg:
items:
- description: TM registers
@@ -223,12 +233,7 @@ allOf:
contains:
enum:
- qcom,ipq8064-tsens
- - qcom,mdm9607-tsens
- - qcom,msm8916-tsens
- qcom,msm8960-tsens
- - qcom,msm8974-tsens
- - qcom,msm8976-tsens
- - qcom,qcs404-tsens
- qcom,tsens-v0_1
- qcom,tsens-v1
then:
@@ -244,22 +249,7 @@ allOf:
properties:
compatible:
contains:
- enum:
- - qcom,msm8953-tsens
- - qcom,msm8996-tsens
- - qcom,msm8998-tsens
- - qcom,sc7180-tsens
- - qcom,sc7280-tsens
- - qcom,sc8180x-tsens
- - qcom,sc8280xp-tsens
- - qcom,sdm630-tsens
- - qcom,sdm845-tsens
- - qcom,sm6350-tsens
- - qcom,sm8150-tsens
- - qcom,sm8250-tsens
- - qcom,sm8350-tsens
- - qcom,sm8450-tsens
- - qcom,tsens-v2
+ const: qcom,tsens-v2
then:
properties:
interrupts:
diff --git a/Documentation/devicetree/bindings/timer/brcm,kona-timer.txt b/Documentation/devicetree/bindings/timer/brcm,kona-timer.txt
deleted file mode 100644
index 39adf54b4388..000000000000
--- a/Documentation/devicetree/bindings/timer/brcm,kona-timer.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-Broadcom Kona Family timer
------------------------------------------------------
-This timer is used in the following Broadcom SoCs:
- BCM11130, BCM11140, BCM11351, BCM28145, BCM28155
-
-Required properties:
-- compatible : "brcm,kona-timer"
-- DEPRECATED: compatible : "bcm,kona-timer"
-- reg : Register range for the timer
-- interrupts : interrupt for the timer
-- clocks: phandle + clock specifier pair of the external clock
-- clock-frequency: frequency that the clock operates
-
-Only one of clocks or clock-frequency should be specified.
-
-Refer to clocks/clock-bindings.txt for generic clock consumer properties.
-
-Example:
- timer@35006000 {
- compatible = "brcm,kona-timer";
- reg = <0x35006000 0x1000>;
- interrupts = <0x0 7 0x4>;
- clocks = <&hub_timer_clk>;
- };
-
diff --git a/Documentation/devicetree/bindings/timer/brcm,kona-timer.yaml b/Documentation/devicetree/bindings/timer/brcm,kona-timer.yaml
new file mode 100644
index 000000000000..d6af8383d6fc
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/brcm,kona-timer.yaml
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/timer/brcm,kona-timer.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom Kona family timer
+
+maintainers:
+ - Florian Fainelli <f.fainelli@gmail.com>
+
+properties:
+ compatible:
+ const: brcm,kona-timer
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-frequency: true
+
+oneOf:
+ - required:
+ - clocks
+ - required:
+ - clock-frequency
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/bcm281xx.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ timer@35006000 {
+ compatible = "brcm,kona-timer";
+ reg = <0x35006000 0x1000>;
+ interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&aon_ccu BCM281XX_AON_CCU_HUB_TIMER>;
+ };
+...
diff --git a/Documentation/devicetree/bindings/timer/loongson,ls1x-pwmtimer.yaml b/Documentation/devicetree/bindings/timer/loongson,ls1x-pwmtimer.yaml
new file mode 100644
index 000000000000..ad61ae55850b
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/loongson,ls1x-pwmtimer.yaml
@@ -0,0 +1,48 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/timer/loongson,ls1x-pwmtimer.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Loongson-1 PWM timer
+
+maintainers:
+ - Keguang Zhang <keguang.zhang@gmail.com>
+
+description:
+ Loongson-1 PWM timer can be used for system clock source
+ and clock event timers.
+
+properties:
+ compatible:
+ const: loongson,ls1b-pwmtimer
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - interrupts
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/loongson,ls1x-clk.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+ clocksource: timer@1fe5c030 {
+ compatible = "loongson,ls1b-pwmtimer";
+ reg = <0x1fe5c030 0x10>;
+
+ clocks = <&clkc LS1X_CLKID_APB>;
+ interrupt-parent = <&intc0>;
+ interrupts = <20 IRQ_TYPE_LEVEL_HIGH>;
+ };
diff --git a/Documentation/devicetree/bindings/timer/ralink,rt2880-timer.yaml b/Documentation/devicetree/bindings/timer/ralink,rt2880-timer.yaml
new file mode 100644
index 000000000000..daa7832babe3
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/ralink,rt2880-timer.yaml
@@ -0,0 +1,44 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/timer/ralink,rt2880-timer.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Timer present in Ralink family SoCs
+
+maintainers:
+ - Sergio Paracuellos <sergio.paracuellos@gmail.com>
+
+properties:
+ compatible:
+ const: ralink,rt2880-timer
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - interrupts
+
+additionalProperties: false
+
+examples:
+ - |
+ timer@100 {
+ compatible = "ralink,rt2880-timer";
+ reg = <0x100 0x20>;
+
+ clocks = <&sysc 3>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <1>;
+ };
+...
diff --git a/Documentation/doc-guide/sphinx.rst b/Documentation/doc-guide/sphinx.rst
index 23edb427e76f..cd8ad7904491 100644
--- a/Documentation/doc-guide/sphinx.rst
+++ b/Documentation/doc-guide/sphinx.rst
@@ -313,9 +313,18 @@ the documentation build system will automatically turn a reference to
function name exists. If you see ``c:func:`` use in a kernel document,
please feel free to remove it.
+Tables
+------
+
+ReStructuredText provides several options for table syntax. Kernel style for
+tables is to prefer *simple table* syntax or *grid table* syntax. See the
+`reStructuredText user reference for table syntax`_ for more details.
+
+.. _reStructuredText user reference for table syntax:
+ https://docutils.sourceforge.io/docs/user/rst/quickref.html#tables
list tables
------------
+~~~~~~~~~~~
The list-table formats can be useful for tables that are not easily laid
out in the usual Sphinx ASCII-art formats. These formats are nearly
diff --git a/Documentation/driver-api/basics.rst b/Documentation/driver-api/basics.rst
index 4b4d8e28d3be..7671b531ba1a 100644
--- a/Documentation/driver-api/basics.rst
+++ b/Documentation/driver-api/basics.rst
@@ -84,7 +84,13 @@ Reference counting
Atomics
-------
-.. kernel-doc:: arch/x86/include/asm/atomic.h
+.. kernel-doc:: include/linux/atomic/atomic-instrumented.h
+ :internal:
+
+.. kernel-doc:: include/linux/atomic/atomic-arch-fallback.h
+ :internal:
+
+.. kernel-doc:: include/linux/atomic/atomic-long.h
:internal:
Kernel objects manipulation
diff --git a/Documentation/driver-api/edac.rst b/Documentation/driver-api/edac.rst
index b8c742aa0a71..f4f044b95c4f 100644
--- a/Documentation/driver-api/edac.rst
+++ b/Documentation/driver-api/edac.rst
@@ -106,6 +106,16 @@ will occupy those chip-select rows.
This term is avoided because it is unclear when needing to distinguish
between chip-select rows and socket sets.
+* High Bandwidth Memory (HBM)
+
+HBM is a new memory type with low power consumption and ultra-wide
+communication lanes. It uses vertically stacked memory chips (DRAM dies)
+interconnected by microscopic wires called "through-silicon vias," or
+TSVs.
+
+Several stacks of HBM chips connect to the CPU or GPU through an ultra-fast
+interconnect called the "interposer". Therefore, HBM's characteristics
+are nearly indistinguishable from on-chip integrated RAM.
Memory Controllers
------------------
@@ -176,3 +186,113 @@ nodes::
the L1 and L2 directories would be "edac_device_block's"
.. kernel-doc:: drivers/edac/edac_device.h
+
+
+Heterogeneous system support
+----------------------------
+
+An AMD heterogeneous system is built by connecting the data fabrics of
+both CPUs and GPUs via custom xGMI links. Thus, the data fabric on the
+GPU nodes can be accessed the same way as the data fabric on CPU nodes.
+
+The MI200 accelerators are data center GPUs. They have 2 data fabrics,
+and each GPU data fabric contains four Unified Memory Controllers (UMC).
+Each UMC contains eight channels. Each UMC channel controls one 128-bit
+HBM2e (2GB) channel (equivalent to 8 X 2GB ranks). This creates a total
+of 4096-bits of DRAM data bus.
+
+While the UMC is interfacing a 16GB (8high X 2GB DRAM) HBM stack, each UMC
+channel is interfacing 2GB of DRAM (represented as rank).
+
+Memory controllers on AMD GPU nodes can be represented in EDAC thusly:
+
+ GPU DF / GPU Node -> EDAC MC
+ GPU UMC -> EDAC CSROW
+ GPU UMC channel -> EDAC CHANNEL
+
+For example: a heterogeneous system with 1 AMD CPU is connected to
+4 MI200 (Aldebaran) GPUs using xGMI.
+
+Some more heterogeneous hardware details:
+
+- The CPU UMC (Unified Memory Controller) is mostly the same as the GPU UMC.
+ They have chip selects (csrows) and channels. However, the layouts are different
+ for performance, physical layout, or other reasons.
+- CPU UMCs use 1 channel, In this case UMC = EDAC channel. This follows the
+ marketing speak. CPU has X memory channels, etc.
+- CPU UMCs use up to 4 chip selects, So UMC chip select = EDAC CSROW.
+- GPU UMCs use 1 chip select, So UMC = EDAC CSROW.
+- GPU UMCs use 8 channels, So UMC channel = EDAC channel.
+
+The EDAC subsystem provides a mechanism to handle AMD heterogeneous
+systems by calling system specific ops for both CPUs and GPUs.
+
+AMD GPU nodes are enumerated in sequential order based on the PCI
+hierarchy, and the first GPU node is assumed to have a Node ID value
+following those of the CPU nodes after latter are fully populated::
+
+ $ ls /sys/devices/system/edac/mc/
+ mc0 - CPU MC node 0
+ mc1 |
+ mc2 |- GPU card[0] => node 0(mc1), node 1(mc2)
+ mc3 |
+ mc4 |- GPU card[1] => node 0(mc3), node 1(mc4)
+ mc5 |
+ mc6 |- GPU card[2] => node 0(mc5), node 1(mc6)
+ mc7 |
+ mc8 |- GPU card[3] => node 0(mc7), node 1(mc8)
+
+For example, a heterogeneous system with one AMD CPU is connected to
+four MI200 (Aldebaran) GPUs using xGMI. This topology can be represented
+via the following sysfs entries::
+
+ /sys/devices/system/edac/mc/..
+
+ CPU # CPU node
+ ├── mc 0
+
+ GPU Nodes are enumerated sequentially after CPU nodes have been populated
+ GPU card 1 # Each MI200 GPU has 2 nodes/mcs
+ ├── mc 1 # GPU node 0 == mc1, Each MC node has 4 UMCs/CSROWs
+ │   ├── csrow 0 # UMC 0
+ │   │   ├── channel 0 # Each UMC has 8 channels
+ │   │   ├── channel 1 # size of each channel is 2 GB, so each UMC has 16 GB
+ │   │   ├── channel 2
+ │   │   ├── channel 3
+ │   │   ├── channel 4
+ │   │   ├── channel 5
+ │   │   ├── channel 6
+ │   │   ├── channel 7
+ │   ├── csrow 1 # UMC 1
+ │   │   ├── channel 0
+ │   │   ├── ..
+ │   │   ├── channel 7
+ │   ├── .. ..
+ │   ├── csrow 3 # UMC 3
+ │   │   ├── channel 0
+ │   │   ├── ..
+ │   │   ├── channel 7
+ │   ├── rank 0
+ │   ├── .. ..
+ │   ├── rank 31 # total 32 ranks/dimms from 4 UMCs
+ ├
+ ├── mc 2 # GPU node 1 == mc2
+ │   ├── .. # each GPU has total 64 GB
+
+ GPU card 2
+ ├── mc 3
+ │   ├── ..
+ ├── mc 4
+ │   ├── ..
+
+ GPU card 3
+ ├── mc 5
+ │   ├── ..
+ ├── mc 6
+ │   ├── ..
+
+ GPU card 4
+ ├── mc 7
+ │   ├── ..
+ ├── mc 8
+ │   ├── ..
diff --git a/Documentation/driver-api/ptp.rst b/Documentation/driver-api/ptp.rst
index 664838ae7776..5e033c3b11b3 100644
--- a/Documentation/driver-api/ptp.rst
+++ b/Documentation/driver-api/ptp.rst
@@ -73,6 +73,22 @@ Writing clock drivers
class driver, since the lock may also be needed by the clock
driver's interrupt service routine.
+PTP hardware clock requirements for '.adjphase'
+-----------------------------------------------
+
+ The 'struct ptp_clock_info' interface has a '.adjphase' function.
+ This function has a set of requirements from the PHC in order to be
+ implemented.
+
+ * The PHC implements a servo algorithm internally that is used to
+ correct the offset passed in the '.adjphase' call.
+ * When other PTP adjustment functions are called, the PHC servo
+ algorithm is disabled.
+
+ **NOTE:** '.adjphase' is not a simple time adjustment functionality
+ that 'jumps' the PHC clock time based on the provided offset. It
+ should correct the offset provided using an internal algorithm.
+
Supported hardware
==================
@@ -106,3 +122,16 @@ Supported hardware
- LPF settings (bandwidth, phase limiting, automatic holdover, physical layer assist (per ITU-T G.8273.2))
- Programmable output PTP clocks, any frequency up to 1GHz (to other PHY/MAC time stampers, refclk to ASSPs/SoCs/FPGAs)
- Lock to GNSS input, automatic switching between GNSS and user-space PHC control (optional)
+
+ * NVIDIA Mellanox
+
+ - GPIO
+ - Certain variants of ConnectX-6 Dx and later products support one
+ GPIO which can time stamp external triggers and one GPIO to produce
+ periodic signals.
+ - Certain variants of ConnectX-5 and older products support one GPIO,
+ configured to either time stamp external triggers or produce
+ periodic signals.
+ - PHC instances
+ - All ConnectX devices have a free-running counter
+ - ConnectX-6 Dx and later devices have a UTC format counter
diff --git a/Documentation/filesystems/autofs-mount-control.rst b/Documentation/filesystems/autofs-mount-control.rst
index bf4b511cdbe8..b5a379d25c40 100644
--- a/Documentation/filesystems/autofs-mount-control.rst
+++ b/Documentation/filesystems/autofs-mount-control.rst
@@ -196,7 +196,7 @@ information and return operation results::
struct args_ismountpoint ismountpoint;
};
- char path[0];
+ char path[];
};
The ioctlfd field is a mount point file descriptor of an autofs mount
diff --git a/Documentation/filesystems/autofs.rst b/Documentation/filesystems/autofs.rst
index 4f490278d22f..3b6e38e646cd 100644
--- a/Documentation/filesystems/autofs.rst
+++ b/Documentation/filesystems/autofs.rst
@@ -467,7 +467,7 @@ Each ioctl is passed a pointer to an `autofs_dev_ioctl` structure::
struct args_ismountpoint ismountpoint;
};
- char path[0];
+ char path[];
};
For the **OPEN_MOUNT** and **IS_MOUNTPOINT** commands, the target
diff --git a/Documentation/filesystems/directory-locking.rst b/Documentation/filesystems/directory-locking.rst
index 504ba940c36c..dccd61c7c5c3 100644
--- a/Documentation/filesystems/directory-locking.rst
+++ b/Documentation/filesystems/directory-locking.rst
@@ -22,12 +22,11 @@ exclusive.
3) object removal. Locking rules: caller locks parent, finds victim,
locks victim and calls the method. Locks are exclusive.
-4) rename() that is _not_ cross-directory. Locking rules: caller locks
-the parent and finds source and target. In case of exchange (with
-RENAME_EXCHANGE in flags argument) lock both. In any case,
-if the target already exists, lock it. If the source is a non-directory,
-lock it. If we need to lock both, lock them in inode pointer order.
-Then call the method. All locks are exclusive.
+4) rename() that is _not_ cross-directory. Locking rules: caller locks the
+parent and finds source and target. We lock both (provided they exist). If we
+need to lock two inodes of different type (dir vs non-dir), we lock directory
+first. If we need to lock two inodes of the same type, lock them in inode
+pointer order. Then call the method. All locks are exclusive.
NB: we might get away with locking the source (and target in exchange
case) shared.
@@ -44,15 +43,17 @@ All locks are exclusive.
rules:
* lock the filesystem
- * lock parents in "ancestors first" order.
+ * lock parents in "ancestors first" order. If one is not ancestor of
+ the other, lock them in inode pointer order.
* find source and target.
* if old parent is equal to or is a descendent of target
fail with -ENOTEMPTY
* if new parent is equal to or is a descendent of source
fail with -ELOOP
- * If it's an exchange, lock both the source and the target.
- * If the target exists, lock it. If the source is a non-directory,
- lock it. If we need to lock both, do so in inode pointer order.
+ * Lock both the source and the target provided they exist. If we
+ need to lock two inodes of different type (dir vs non-dir), we lock
+ the directory first. If we need to lock two inodes of the same type,
+ lock them in inode pointer order.
* call the method.
All ->i_rwsem are taken exclusive. Again, we might get away with locking
@@ -66,8 +67,9 @@ If no directory is its own ancestor, the scheme above is deadlock-free.
Proof:
- First of all, at any moment we have a partial ordering of the
- objects - A < B iff A is an ancestor of B.
+ First of all, at any moment we have a linear ordering of the
+ objects - A < B iff (A is an ancestor of B) or (B is not an ancestor
+ of A and ptr(A) < ptr(B)).
That ordering can change. However, the following is true:
diff --git a/Documentation/filesystems/fsverity.rst b/Documentation/filesystems/fsverity.rst
index ede672dedf11..cb845e8e5435 100644
--- a/Documentation/filesystems/fsverity.rst
+++ b/Documentation/filesystems/fsverity.rst
@@ -38,20 +38,14 @@ fail at runtime.
Use cases
=========
-By itself, the base fs-verity feature only provides integrity
-protection, i.e. detection of accidental (non-malicious) corruption.
+By itself, fs-verity only provides integrity protection, i.e.
+detection of accidental (non-malicious) corruption.
However, because fs-verity makes retrieving the file hash extremely
efficient, it's primarily meant to be used as a tool to support
authentication (detection of malicious modifications) or auditing
(logging file hashes before use).
-Trusted userspace code (e.g. operating system code running on a
-read-only partition that is itself authenticated by dm-verity) can
-authenticate the contents of an fs-verity file by using the
-`FS_IOC_MEASURE_VERITY`_ ioctl to retrieve its hash, then verifying a
-digital signature of it.
-
A standard file hash could be used instead of fs-verity. However,
this is inefficient if the file is large and only a small portion may
be accessed. This is often the case for Android application package
@@ -69,24 +63,31 @@ still be used on read-only filesystems. fs-verity is for files that
must live on a read-write filesystem because they are independently
updated and potentially user-installed, so dm-verity cannot be used.
-The base fs-verity feature is a hashing mechanism only; actually
-authenticating the files may be done by:
-
-* Userspace-only
-
-* Builtin signature verification + userspace policy
-
- fs-verity optionally supports a simple signature verification
- mechanism where users can configure the kernel to require that
- all fs-verity files be signed by a key loaded into a keyring;
- see `Built-in signature verification`_.
-
-* Integrity Measurement Architecture (IMA)
-
- IMA supports including fs-verity file digests and signatures in the
- IMA measurement list and verifying fs-verity based file signatures
- stored as security.ima xattrs, based on policy.
-
+fs-verity does not mandate a particular scheme for authenticating its
+file hashes. (Similarly, dm-verity does not mandate a particular
+scheme for authenticating its block device root hashes.) Options for
+authenticating fs-verity file hashes include:
+
+- Trusted userspace code. Often, the userspace code that accesses
+ files can be trusted to authenticate them. Consider e.g. an
+ application that wants to authenticate data files before using them,
+ or an application loader that is part of the operating system (which
+ is already authenticated in a different way, such as by being loaded
+ from a read-only partition that uses dm-verity) and that wants to
+ authenticate applications before loading them. In these cases, this
+ trusted userspace code can authenticate a file's contents by
+ retrieving its fs-verity digest using `FS_IOC_MEASURE_VERITY`_, then
+ verifying a signature of it using any userspace cryptographic
+ library that supports digital signatures.
+
+- Integrity Measurement Architecture (IMA). IMA supports fs-verity
+ file digests as an alternative to its traditional full file digests.
+ "IMA appraisal" enforces that files contain a valid, matching
+ signature in their "security.ima" extended attribute, as controlled
+ by the IMA policy. For more information, see the IMA documentation.
+
+- Trusted userspace code in combination with `Built-in signature
+ verification`_. This approach should be used only with great care.
User API
========
@@ -111,8 +112,7 @@ follows::
};
This structure contains the parameters of the Merkle tree to build for
-the file, and optionally contains a signature. It must be initialized
-as follows:
+the file. It must be initialized as follows:
- ``version`` must be 1.
- ``hash_algorithm`` must be the identifier for the hash algorithm to
@@ -129,12 +129,14 @@ as follows:
file or device. Currently the maximum salt size is 32 bytes.
- ``salt_ptr`` is the pointer to the salt, or NULL if no salt is
provided.
-- ``sig_size`` is the size of the signature in bytes, or 0 if no
- signature is provided. Currently the signature is (somewhat
- arbitrarily) limited to 16128 bytes. See `Built-in signature
- verification`_ for more information.
-- ``sig_ptr`` is the pointer to the signature, or NULL if no
- signature is provided.
+- ``sig_size`` is the size of the builtin signature in bytes, or 0 if no
+ builtin signature is provided. Currently the builtin signature is
+ (somewhat arbitrarily) limited to 16128 bytes.
+- ``sig_ptr`` is the pointer to the builtin signature, or NULL if no
+ builtin signature is provided. A builtin signature is only needed
+ if the `Built-in signature verification`_ feature is being used. It
+ is not needed for IMA appraisal, and it is not needed if the file
+ signature is being handled entirely in userspace.
- All reserved fields must be zeroed.
FS_IOC_ENABLE_VERITY causes the filesystem to build a Merkle tree for
@@ -158,7 +160,7 @@ fatal signal), no changes are made to the file.
FS_IOC_ENABLE_VERITY can fail with the following errors:
- ``EACCES``: the process does not have write access to the file
-- ``EBADMSG``: the signature is malformed
+- ``EBADMSG``: the builtin signature is malformed
- ``EBUSY``: this ioctl is already running on the file
- ``EEXIST``: the file already has verity enabled
- ``EFAULT``: the caller provided inaccessible memory
@@ -168,10 +170,10 @@ FS_IOC_ENABLE_VERITY can fail with the following errors:
reserved bits are set; or the file descriptor refers to neither a
regular file nor a directory.
- ``EISDIR``: the file descriptor refers to a directory
-- ``EKEYREJECTED``: the signature doesn't match the file
-- ``EMSGSIZE``: the salt or signature is too long
-- ``ENOKEY``: the fs-verity keyring doesn't contain the certificate
- needed to verify the signature
+- ``EKEYREJECTED``: the builtin signature doesn't match the file
+- ``EMSGSIZE``: the salt or builtin signature is too long
+- ``ENOKEY``: the ".fs-verity" keyring doesn't contain the certificate
+ needed to verify the builtin signature
- ``ENOPKG``: fs-verity recognizes the hash algorithm, but it's not
available in the kernel's crypto API as currently configured (e.g.
for SHA-512, missing CONFIG_CRYPTO_SHA512).
@@ -180,8 +182,8 @@ FS_IOC_ENABLE_VERITY can fail with the following errors:
support; or the filesystem superblock has not had the 'verity'
feature enabled on it; or the filesystem does not support fs-verity
on this file. (See `Filesystem support`_.)
-- ``EPERM``: the file is append-only; or, a signature is required and
- one was not provided.
+- ``EPERM``: the file is append-only; or, a builtin signature is
+ required and one was not provided.
- ``EROFS``: the filesystem is read-only
- ``ETXTBSY``: someone has the file open for writing. This can be the
caller's file descriptor, another open file descriptor, or the file
@@ -270,9 +272,9 @@ This ioctl takes in a pointer to the following structure::
- ``FS_VERITY_METADATA_TYPE_DESCRIPTOR`` reads the fs-verity
descriptor. See `fs-verity descriptor`_.
-- ``FS_VERITY_METADATA_TYPE_SIGNATURE`` reads the signature which was
- passed to FS_IOC_ENABLE_VERITY, if any. See `Built-in signature
- verification`_.
+- ``FS_VERITY_METADATA_TYPE_SIGNATURE`` reads the builtin signature
+ which was passed to FS_IOC_ENABLE_VERITY, if any. See `Built-in
+ signature verification`_.
The semantics are similar to those of ``pread()``. ``offset``
specifies the offset in bytes into the metadata item to read from, and
@@ -299,7 +301,7 @@ FS_IOC_READ_VERITY_METADATA can fail with the following errors:
overflowed
- ``ENODATA``: the file is not a verity file, or
FS_VERITY_METADATA_TYPE_SIGNATURE was requested but the file doesn't
- have a built-in signature
+ have a builtin signature
- ``ENOTTY``: this type of filesystem does not implement fs-verity, or
this ioctl is not yet implemented on it
- ``EOPNOTSUPP``: the kernel was not configured with fs-verity
@@ -347,8 +349,8 @@ non-verity one, with the following exceptions:
with EIO (for read()) or SIGBUS (for mmap() reads).
- If the sysctl "fs.verity.require_signatures" is set to 1 and the
- file is not signed by a key in the fs-verity keyring, then opening
- the file will fail. See `Built-in signature verification`_.
+ file is not signed by a key in the ".fs-verity" keyring, then
+ opening the file will fail. See `Built-in signature verification`_.
Direct access to the Merkle tree is not supported. Therefore, if a
verity file is copied, or is backed up and restored, then it will lose
@@ -433,20 +435,25 @@ root hash as well as other fields such as the file size::
Built-in signature verification
===============================
-With CONFIG_FS_VERITY_BUILTIN_SIGNATURES=y, fs-verity supports putting
-a portion of an authentication policy (see `Use cases`_) in the
-kernel. Specifically, it adds support for:
+CONFIG_FS_VERITY_BUILTIN_SIGNATURES=y adds supports for in-kernel
+verification of fs-verity builtin signatures.
+
+**IMPORTANT**! Please take great care before using this feature.
+It is not the only way to do signatures with fs-verity, and the
+alternatives (such as userspace signature verification, and IMA
+appraisal) can be much better. It's also easy to fall into a trap
+of thinking this feature solves more problems than it actually does.
+
+Enabling this option adds the following:
-1. At fs-verity module initialization time, a keyring ".fs-verity" is
- created. The root user can add trusted X.509 certificates to this
- keyring using the add_key() system call, then (when done)
- optionally use keyctl_restrict_keyring() to prevent additional
- certificates from being added.
+1. At boot time, the kernel creates a keyring named ".fs-verity". The
+ root user can add trusted X.509 certificates to this keyring using
+ the add_key() system call.
2. `FS_IOC_ENABLE_VERITY`_ accepts a pointer to a PKCS#7 formatted
detached signature in DER format of the file's fs-verity digest.
- On success, this signature is persisted alongside the Merkle tree.
- Then, any time the file is opened, the kernel will verify the
+ On success, the ioctl persists the signature alongside the Merkle
+ tree. Then, any time the file is opened, the kernel verifies the
file's actual digest against this signature, using the certificates
in the ".fs-verity" keyring.
@@ -454,8 +461,8 @@ kernel. Specifically, it adds support for:
When set to 1, the kernel requires that all verity files have a
correctly signed digest as described in (2).
-fs-verity file digests must be signed in the following format, which
-is similar to the structure used by `FS_IOC_MEASURE_VERITY`_::
+The data that the signature as described in (2) must be a signature of
+is the fs-verity file digest in the following format::
struct fsverity_formatted_digest {
char magic[8]; /* must be "FSVerity" */
@@ -464,13 +471,66 @@ is similar to the structure used by `FS_IOC_MEASURE_VERITY`_::
__u8 digest[];
};
-fs-verity's built-in signature verification support is meant as a
-relatively simple mechanism that can be used to provide some level of
-authenticity protection for verity files, as an alternative to doing
-the signature verification in userspace or using IMA-appraisal.
-However, with this mechanism, userspace programs still need to check
-that the verity bit is set, and there is no protection against verity
-files being swapped around.
+That's it. It should be emphasized again that fs-verity builtin
+signatures are not the only way to do signatures with fs-verity. See
+`Use cases`_ for an overview of ways in which fs-verity can be used.
+fs-verity builtin signatures have some major limitations that should
+be carefully considered before using them:
+
+- Builtin signature verification does *not* make the kernel enforce
+ that any files actually have fs-verity enabled. Thus, it is not a
+ complete authentication policy. Currently, if it is used, the only
+ way to complete the authentication policy is for trusted userspace
+ code to explicitly check whether files have fs-verity enabled with a
+ signature before they are accessed. (With
+ fs.verity.require_signatures=1, just checking whether fs-verity is
+ enabled suffices.) But, in this case the trusted userspace code
+ could just store the signature alongside the file and verify it
+ itself using a cryptographic library, instead of using this feature.
+
+- A file's builtin signature can only be set at the same time that
+ fs-verity is being enabled on the file. Changing or deleting the
+ builtin signature later requires re-creating the file.
+
+- Builtin signature verification uses the same set of public keys for
+ all fs-verity enabled files on the system. Different keys cannot be
+ trusted for different files; each key is all or nothing.
+
+- The sysctl fs.verity.require_signatures applies system-wide.
+ Setting it to 1 only works when all users of fs-verity on the system
+ agree that it should be set to 1. This limitation can prevent
+ fs-verity from being used in cases where it would be helpful.
+
+- Builtin signature verification can only use signature algorithms
+ that are supported by the kernel. For example, the kernel does not
+ yet support Ed25519, even though this is often the signature
+ algorithm that is recommended for new cryptographic designs.
+
+- fs-verity builtin signatures are in PKCS#7 format, and the public
+ keys are in X.509 format. These formats are commonly used,
+ including by some other kernel features (which is why the fs-verity
+ builtin signatures use them), and are very feature rich.
+ Unfortunately, history has shown that code that parses and handles
+ these formats (which are from the 1990s and are based on ASN.1)
+ often has vulnerabilities as a result of their complexity. This
+ complexity is not inherent to the cryptography itself.
+
+ fs-verity users who do not need advanced features of X.509 and
+ PKCS#7 should strongly consider using simpler formats, such as plain
+ Ed25519 keys and signatures, and verifying signatures in userspace.
+
+ fs-verity users who choose to use X.509 and PKCS#7 anyway should
+ still consider that verifying those signatures in userspace is more
+ flexible (for other reasons mentioned earlier in this document) and
+ eliminates the need to enable CONFIG_FS_VERITY_BUILTIN_SIGNATURES
+ and its associated increase in kernel attack surface. In some cases
+ it can even be necessary, since advanced X.509 and PKCS#7 features
+ do not always work as intended with the kernel. For example, the
+ kernel does not check X.509 certificate validity times.
+
+ Note: IMA appraisal, which supports fs-verity, does not use PKCS#7
+ for its signatures, so it partially avoids the issues discussed
+ here. IMA appraisal does use X.509.
Filesystem support
==================
diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
index aa1a233b0fa8..ed148919e11a 100644
--- a/Documentation/filesystems/locking.rst
+++ b/Documentation/filesystems/locking.rst
@@ -521,8 +521,6 @@ prototypes::
int (*fsync) (struct file *, loff_t start, loff_t end, int datasync);
int (*fasync) (int, struct file *, int);
int (*lock) (struct file *, int, struct file_lock *);
- ssize_t (*sendpage) (struct file *, struct page *, int, size_t,
- loff_t *, int);
unsigned long (*get_unmapped_area)(struct file *, unsigned long,
unsigned long, unsigned long, unsigned long);
int (*check_flags)(int);
diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst
index 769be5230210..cb2a97e49872 100644
--- a/Documentation/filesystems/vfs.rst
+++ b/Documentation/filesystems/vfs.rst
@@ -1086,7 +1086,6 @@ This describes how the VFS can manipulate an open file. As of kernel
int (*fsync) (struct file *, loff_t, loff_t, int datasync);
int (*fasync) (int, struct file *, int);
int (*lock) (struct file *, int, struct file_lock *);
- ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
int (*check_flags)(int);
int (*flock) (struct file *, int, struct file_lock *);
diff --git a/Documentation/leds/leds-class.rst b/Documentation/leds/leds-class.rst
index cd155ead8703..5db620ed27aa 100644
--- a/Documentation/leds/leds-class.rst
+++ b/Documentation/leds/leds-class.rst
@@ -169,6 +169,87 @@ Setting the brightness to zero with brightness_set() callback function
should completely turn off the LED and cancel the previously programmed
hardware blinking function, if any.
+Hardware driven LEDs
+====================
+
+Some LEDs can be programmed to be driven by hardware. This is not
+limited to blink but also to turn off or on autonomously.
+To support this feature, a LED needs to implement various additional
+ops and needs to declare specific support for the supported triggers.
+
+With hw control we refer to the LED driven by hardware.
+
+LED driver must define the following value to support hw control:
+
+ - hw_control_trigger:
+ unique trigger name supported by the LED in hw control
+ mode.
+
+LED driver must implement the following API to support hw control:
+ - hw_control_is_supported:
+ check if the flags passed by the supported trigger can
+ be parsed and activate hw control on the LED.
+
+ Return 0 if the passed flags mask is supported and
+ can be set with hw_control_set().
+
+ If the passed flags mask is not supported -EOPNOTSUPP
+ must be returned, the LED trigger will use software
+ fallback in this case.
+
+ Return a negative error in case of any other error like
+ device not ready or timeouts.
+
+ - hw_control_set:
+ activate hw control. LED driver will use the provided
+ flags passed from the supported trigger, parse them to
+ a set of mode and setup the LED to be driven by hardware
+ following the requested modes.
+
+ Set LED_OFF via the brightness_set to deactivate hw control.
+
+ Return 0 on success, a negative error number on failing to
+ apply flags.
+
+ - hw_control_get:
+ get active modes from a LED already in hw control, parse
+ them and set in flags the current active flags for the
+ supported trigger.
+
+ Return 0 on success, a negative error number on failing
+ parsing the initial mode.
+ Error from this function is NOT FATAL as the device may
+ be in a not supported initial state by the attached LED
+ trigger.
+
+ - hw_control_get_device:
+ return the device associated with the LED driver in
+ hw control. A trigger might use this to match the
+ returned device from this function with a configured
+ device for the trigger as the source for blinking
+ events and correctly enable hw control.
+ (example a netdev trigger configured to blink for a
+ particular dev match the returned dev from get_device
+ to set hw control)
+
+ Returns a pointer to a struct device or NULL if nothing
+ is currently attached.
+
+LED driver can activate additional modes by default to workaround the
+impossibility of supporting each different mode on the supported trigger.
+Examples are hardcoding the blink speed to a set interval, enable special
+feature like bypassing blink if some requirements are not met.
+
+A trigger should first check if the hw control API are supported by the LED
+driver and check if the trigger is supported to verify if hw control is possible,
+use hw_control_is_supported to check if the flags are supported and only at
+the end use hw_control_set to activate hw control.
+
+A trigger can use hw_control_get to check if a LED is already in hw control
+and init their flags.
+
+When the LED is in hw control, no software blink is possible and doing so
+will effectively disable hw control.
Known Issues
============
diff --git a/Documentation/maintainer/configure-git.rst b/Documentation/maintainer/configure-git.rst
index 80ae5030a590..ec0ddfb9cdd3 100644
--- a/Documentation/maintainer/configure-git.rst
+++ b/Documentation/maintainer/configure-git.rst
@@ -56,7 +56,7 @@ by adding the following hook into your git:
$ cat >.git/hooks/applypatch-msg <<'EOF'
#!/bin/sh
. git-sh-setup
- perl -pi -e 's|^Message-Id:\s*<?([^>]+)>?$|Link: https://lore.kernel.org/r/$1|g;' "$1"
+ perl -pi -e 's|^Message-I[dD]:\s*<?([^>]+)>?$|Link: https://lore.kernel.org/r/$1|g;' "$1"
test -x "$GIT_DIR/hooks/commit-msg" &&
exec "$GIT_DIR/hooks/commit-msg" ${1+"$@"}
:
diff --git a/Documentation/mm/damon/design.rst b/Documentation/mm/damon/design.rst
index 0cff6fac6b7e..4bfdf1d30c4a 100644
--- a/Documentation/mm/damon/design.rst
+++ b/Documentation/mm/damon/design.rst
@@ -4,31 +4,55 @@
Design
======
-Configurable Layers
-===================
-
-DAMON provides data access monitoring functionality while making the accuracy
-and the overhead controllable. The fundamental access monitorings require
-primitives that dependent on and optimized for the target address space. On
-the other hand, the accuracy and overhead tradeoff mechanism, which is the core
-of DAMON, is in the pure logic space. DAMON separates the two parts in
-different layers and defines its interface to allow various low level
-primitives implementations configurable with the core logic. We call the low
-level primitives implementations monitoring operations.
-
-Due to this separated design and the configurable interface, users can extend
-DAMON for any address space by configuring the core logics with appropriate
-monitoring operations. If appropriate one is not provided, users can implement
-the operations on their own.
+
+Overall Architecture
+====================
+
+DAMON subsystem is configured with three layers including
+
+- Operations Set: Implements fundamental operations for DAMON that depends on
+ the given monitoring target address-space and available set of
+ software/hardware primitives,
+- Core: Implements core logics including monitoring overhead/accurach control
+ and access-aware system operations on top of the operations set layer, and
+- Modules: Implements kernel modules for various purposes that provides
+ interfaces for the user space, on top of the core layer.
+
+
+Configurable Operations Set
+---------------------------
+
+For data access monitoring and additional low level work, DAMON needs a set of
+implementations for specific operations that are dependent on and optimized for
+the given target address space. On the other hand, the accuracy and overhead
+tradeoff mechanism, which is the core logic of DAMON, is in the pure logic
+space. DAMON separates the two parts in different layers, namely DAMON
+Operations Set and DAMON Core Logics Layers, respectively. It further defines
+the interface between the layers to allow various operations sets to be
+configured with the core logic.
+
+Due to this design, users can extend DAMON for any address space by configuring
+the core logic to use the appropriate operations set. If any appropriate set
+is unavailable, users can implement one on their own.
For example, physical memory, virtual memory, swap space, those for specific
processes, NUMA nodes, files, and backing memory devices would be supportable.
-Also, if some architectures or devices support special optimized access check
-primitives, those will be easily configurable.
+Also, if some architectures or devices supporting special optimized access
+check primitives, those will be easily configurable.
-Reference Implementations of Address Space Specific Monitoring Operations
-=========================================================================
+Programmable Modules
+--------------------
+
+Core layer of DAMON is implemented as a framework, and exposes its application
+programming interface to all kernel space components such as subsystems and
+modules. For common use cases of DAMON, DAMON subsystem provides kernel
+modules that built on top of the core layer using the API, which can be easily
+used by the user space end users.
+
+
+Operations Set Layer
+====================
The monitoring operations are defined in two parts:
@@ -90,8 +114,12 @@ conflict with the reclaim logic using ``PG_idle`` and ``PG_young`` page flags,
as Idle page tracking does.
-Address Space Independent Core Mechanisms
-=========================================
+Core Logics
+===========
+
+
+Monitoring
+----------
Below four sections describe each of the DAMON core mechanisms and the five
monitoring attributes, ``sampling interval``, ``aggregation interval``,
@@ -100,7 +128,7 @@ regions``.
Access Frequency Monitoring
----------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
The output of DAMON says what pages are how frequently accessed for a given
duration. The resolution of the access frequency is controlled by setting
@@ -127,7 +155,7 @@ size of the target workload grows.
Region Based Sampling
----------------------
+~~~~~~~~~~~~~~~~~~~~~
To avoid the unbounded increase of the overhead, DAMON groups adjacent pages
that assumed to have the same access frequencies into a region. As long as the
@@ -144,7 +172,7 @@ assumption is not guaranteed.
Adaptive Regions Adjustment
----------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
Even somehow the initial monitoring target regions are well constructed to
fulfill the assumption (pages in same region have similar access frequencies),
@@ -162,8 +190,22 @@ In this way, DAMON provides its best-effort quality and minimal overhead while
keeping the bounds users set for their trade-off.
+Age Tracking
+~~~~~~~~~~~~
+
+By analyzing the monitoring results, users can also find how long the current
+access pattern of a region has maintained. That could be used for good
+understanding of the access pattern. For example, page placement algorithm
+utilizing both the frequency and the recency could be implemented using that.
+To make such access pattern maintained period analysis easier, DAMON maintains
+yet another counter called ``age`` in each region. For each ``aggregation
+interval``, DAMON checks if the region's size and access frequency
+(``nr_accesses``) has significantly changed. If so, the counter is reset to
+zero. Otherwise, the counter is increased.
+
+
Dynamic Target Space Updates Handling
--------------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The monitoring target address range could dynamically changed. For example,
virtual memory could be dynamically mapped and unmapped. Physical memory could
@@ -174,3 +216,246 @@ monitoring operations to check dynamic changes including memory mapping changes
and applies it to monitoring operations-related data structures such as the
abstracted monitoring target memory area only for each of a user-specified time
interval (``update interval``).
+
+
+.. _damon_design_damos:
+
+Operation Schemes
+-----------------
+
+One common purpose of data access monitoring is access-aware system efficiency
+optimizations. For example,
+
+ paging out memory regions that are not accessed for more than two minutes
+
+or
+
+ using THP for memory regions that are larger than 2 MiB and showing a high
+ access frequency for more than one minute.
+
+One straightforward approach for such schemes would be profile-guided
+optimizations. That is, getting data access monitoring results of the
+workloads or the system using DAMON, finding memory regions of special
+characteristics by profiling the monitoring results, and making system
+operation changes for the regions. The changes could be made by modifying or
+providing advice to the software (the application and/or the kernel), or
+reconfiguring the hardware. Both offline and online approaches could be
+available.
+
+Among those, providing advice to the kernel at runtime would be flexible and
+effective, and therefore widely be used. However, implementing such schemes
+could impose unnecessary redundancy and inefficiency. The profiling could be
+redundant if the type of interest is common. Exchanging the information
+including monitoring results and operation advice between kernel and user
+spaces could be inefficient.
+
+To allow users to reduce such redundancy and inefficiencies by offloading the
+works, DAMON provides a feature called Data Access Monitoring-based Operation
+Schemes (DAMOS). It lets users specify their desired schemes at a high
+level. For such specifications, DAMON starts monitoring, finds regions having
+the access pattern of interest, and applies the user-desired operation actions
+to the regions as soon as found.
+
+
+.. _damon_design_damos_action:
+
+Operation Action
+~~~~~~~~~~~~~~~~
+
+The management action that the users desire to apply to the regions of their
+interest. For example, paging out, prioritizing for next reclamation victim
+selection, advising ``khugepaged`` to collapse or split, or doing nothing but
+collecting statistics of the regions.
+
+The list of supported actions is defined in DAMOS, but the implementation of
+each action is in the DAMON operations set layer because the implementation
+normally depends on the monitoring target address space. For example, the code
+for paging specific virtual address ranges out would be different from that for
+physical address ranges. And the monitoring operations implementation sets are
+not mandated to support all actions of the list. Hence, the availability of
+specific DAMOS action depends on what operations set is selected to be used
+together.
+
+Applying an action to a region is considered as changing the region's
+characteristics. Hence, DAMOS resets the age of regions when an action is
+applied to those.
+
+
+.. _damon_design_damos_access_pattern:
+
+Target Access Pattern
+~~~~~~~~~~~~~~~~~~~~~
+
+The access pattern of the schemes' interest. The patterns are constructed with
+the properties that DAMON's monitoring results provide, specifically the size,
+the access frequency, and the age. Users can describe their access pattern of
+interest by setting minimum and maximum values of the three properties. If a
+region's three properties are in the ranges, DAMOS classifies it as one of the
+regions that the scheme is having an interest in.
+
+
+.. _damon_design_damos_quotas:
+
+Quotas
+~~~~~~
+
+DAMOS upper-bound overhead control feature. DAMOS could incur high overhead if
+the target access pattern is not properly tuned. For example, if a huge memory
+region having the access pattern of interest is found, applying the scheme's
+action to all pages of the huge region could consume unacceptably large system
+resources. Preventing such issues by tuning the access pattern could be
+challenging, especially if the access patterns of the workloads are highly
+dynamic.
+
+To mitigate that situation, DAMOS provides an upper-bound overhead control
+feature called quotas. It lets users specify an upper limit of time that DAMOS
+can use for applying the action, and/or a maximum bytes of memory regions that
+the action can be applied within a user-specified time duration.
+
+
+.. _damon_design_damos_quotas_prioritization:
+
+Prioritization
+^^^^^^^^^^^^^^
+
+A mechanism for making a good decision under the quotas. When the action
+cannot be applied to all regions of interest due to the quotas, DAMOS
+prioritizes regions and applies the action to only regions having high enough
+priorities so that it will not exceed the quotas.
+
+The prioritization mechanism should be different for each action. For example,
+rarely accessed (colder) memory regions would be prioritized for page-out
+scheme action. In contrast, the colder regions would be deprioritized for huge
+page collapse scheme action. Hence, the prioritization mechanisms for each
+action are implemented in each DAMON operations set, together with the actions.
+
+Though the implementation is up to the DAMON operations set, it would be common
+to calculate the priority using the access pattern properties of the regions.
+Some users would want the mechanisms to be personalized for their specific
+case. For example, some users would want the mechanism to weigh the recency
+(``age``) more than the access frequency (``nr_accesses``). DAMOS allows users
+to specify the weight of each access pattern property and passes the
+information to the underlying mechanism. Nevertheless, how and even whether
+the weight will be respected are up to the underlying prioritization mechanism
+implementation.
+
+
+.. _damon_design_damos_watermarks:
+
+Watermarks
+~~~~~~~~~~
+
+Conditional DAMOS (de)activation automation. Users might want DAMOS to run
+only under certain situations. For example, when a sufficient amount of free
+memory is guaranteed, running a scheme for proactive reclamation would only
+consume unnecessary system resources. To avoid such consumption, the user would
+need to manually monitor some metrics such as free memory ratio, and turn
+DAMON/DAMOS on or off.
+
+DAMOS allows users to offload such works using three watermarks. It allows the
+users to configure the metric of their interest, and three watermark values,
+namely high, middle, and low. If the value of the metric becomes above the
+high watermark or below the low watermark, the scheme is deactivated. If the
+metric becomes below the mid watermark but above the low watermark, the scheme
+is activated. If all schemes are deactivated by the watermarks, the monitoring
+is also deactivated. In this case, the DAMON worker thread only periodically
+checks the watermarks and therefore incurs nearly zero overhead.
+
+
+.. _damon_design_damos_filters:
+
+Filters
+~~~~~~~
+
+Non-access pattern-based target memory regions filtering. If users run
+self-written programs or have good profiling tools, they could know something
+more than the kernel, such as future access patterns or some special
+requirements for specific types of memory. For example, some users may know
+only anonymous pages can impact their program's performance. They can also
+have a list of latency-critical processes.
+
+To let users optimize DAMOS schemes with such special knowledge, DAMOS provides
+a feature called DAMOS filters. The feature allows users to set an arbitrary
+number of filters for each scheme. Each filter specifies the type of target
+memory, and whether it should exclude the memory of the type (filter-out), or
+all except the memory of the type (filter-in).
+
+As of this writing, anonymous page type and memory cgroup type are supported by
+the feature. Some filter target types can require additional arguments. For
+example, the memory cgroup filter type asks users to specify the file path of
+the memory cgroup for the filter. Hence, users can apply specific schemes to
+only anonymous pages, non-anonymous pages, pages of specific cgroups, all pages
+excluding those of specific cgroups, and any combination of those.
+
+
+Application Programming Interface
+---------------------------------
+
+The programming interface for kernel space data access-aware applications.
+DAMON is a framework, so it does nothing by itself. Instead, it only helps
+other kernel components such as subsystems and modules building their data
+access-aware applications using DAMON's core features. For this, DAMON exposes
+its all features to other kernel components via its application programming
+interface, namely ``include/linux/damon.h``. Please refer to the API
+:doc:`document </mm/damon/api>` for details of the interface.
+
+
+Modules
+=======
+
+Because the core of DAMON is a framework for kernel components, it doesn't
+provide any direct interface for the user space. Such interfaces should be
+implemented by each DAMON API user kernel components, instead. DAMON subsystem
+itself implements such DAMON API user modules, which are supposed to be used
+for general purpose DAMON control and special purpose data access-aware system
+operations, and provides stable application binary interfaces (ABI) for the
+user space. The user space can build their efficient data access-aware
+applications using the interfaces.
+
+
+General Purpose User Interface Modules
+--------------------------------------
+
+DAMON modules that provide user space ABIs for general purpose DAMON usage in
+runtime.
+
+DAMON user interface modules, namely 'DAMON sysfs interface' and 'DAMON debugfs
+interface' are DAMON API user kernel modules that provide ABIs to the
+user-space. Please note that DAMON debugfs interface is currently deprecated.
+
+Like many other ABIs, the modules create files on sysfs and debugfs, allow
+users to specify their requests to and get the answers from DAMON by writing to
+and reading from the files. As a response to such I/O, DAMON user interface
+modules control DAMON and retrieve the results as user requested via the DAMON
+API, and return the results to the user-space.
+
+The ABIs are designed to be used for user space applications development,
+rather than human beings' fingers. Human users are recommended to use such
+user space tools. One such Python-written user space tool is available at
+Github (https://github.com/awslabs/damo), Pypi
+(https://pypistats.org/packages/damo), and Fedora
+(https://packages.fedoraproject.org/pkgs/python-damo/damo/).
+
+Please refer to the ABI :doc:`document </admin-guide/mm/damon/usage>` for
+details of the interfaces.
+
+
+Special-Purpose Access-aware Kernel Modules
+-------------------------------------------
+
+DAMON modules that provide user space ABI for specific purpose DAMON usage.
+
+DAMON sysfs/debugfs user interfaces are for full control of all DAMON features
+in runtime. For each special-purpose system-wide data access-aware system
+operations such as proactive reclamation or LRU lists balancing, the interfaces
+could be simplified by removing unnecessary knobs for the specific purpose, and
+extended for boot-time and even compile time control. Default values of DAMON
+control parameters for the usage would also need to be optimized for the
+purpose.
+
+To support such cases, yet more DAMON API user kernel modules that provide more
+simple and optimized user space interfaces are available. Currently, two
+modules for proactive reclamation and LRU lists manipulation are provided. For
+more detail, please read the usage documents for those
+(:doc:`/admin-guide/mm/damon/reclaim` and
+:doc:`/admin-guide/mm/damon/lru_sort`).
diff --git a/Documentation/mm/damon/faq.rst b/Documentation/mm/damon/faq.rst
index dde7e2414ee6..3279dc7a8211 100644
--- a/Documentation/mm/damon/faq.rst
+++ b/Documentation/mm/damon/faq.rst
@@ -4,29 +4,6 @@
Frequently Asked Questions
==========================
-Why a new subsystem, instead of extending perf or other user space tools?
-=========================================================================
-
-First, because it needs to be lightweight as much as possible so that it can be
-used online, any unnecessary overhead such as kernel - user space context
-switching cost should be avoided. Second, DAMON aims to be used by other
-programs including the kernel. Therefore, having a dependency on specific
-tools like perf is not desirable. These are the two biggest reasons why DAMON
-is implemented in the kernel space.
-
-
-Can 'idle pages tracking' or 'perf mem' substitute DAMON?
-=========================================================
-
-Idle page tracking is a low level primitive for access check of the physical
-address space. 'perf mem' is similar, though it can use sampling to minimize
-the overhead. On the other hand, DAMON is a higher-level framework for the
-monitoring of various address spaces. It is focused on memory management
-optimization and provides sophisticated accuracy/overhead handling mechanisms.
-Therefore, 'idle pages tracking' and 'perf mem' could provide a subset of
-DAMON's output, but cannot substitute DAMON.
-
-
Does DAMON support virtual memory only?
=======================================
diff --git a/Documentation/mm/damon/maintainer-profile.rst b/Documentation/mm/damon/maintainer-profile.rst
index 24a202f03de8..a84c14e59053 100644
--- a/Documentation/mm/damon/maintainer-profile.rst
+++ b/Documentation/mm/damon/maintainer-profile.rst
@@ -3,7 +3,7 @@
DAMON Maintainer Entry Profile
==============================
-The DAMON subsystem covers the files that listed in 'DATA ACCESS MONITOR'
+The DAMON subsystem covers the files that are listed in 'DATA ACCESS MONITOR'
section of 'MAINTAINERS' file.
The mailing lists for the subsystem are damon@lists.linux.dev and
@@ -15,7 +15,7 @@ SCM Trees
There are multiple Linux trees for DAMON development. Patches under
development or testing are queued in damon/next [2]_ by the DAMON maintainer.
-Suffieicntly reviewed patches will be queued in mm-unstable [1]_ by the memory
+Sufficiently reviewed patches will be queued in mm-unstable [1]_ by the memory
management subsystem maintainer. After more sufficient tests, the patches will
be queued in mm-stable [3]_ , and finally pull-requested to the mainline by the
memory management subsystem maintainer.
diff --git a/Documentation/mm/page_migration.rst b/Documentation/mm/page_migration.rst
index 313dce18893e..e35af7805be5 100644
--- a/Documentation/mm/page_migration.rst
+++ b/Documentation/mm/page_migration.rst
@@ -73,14 +73,13 @@ In kernel use of migrate_pages()
It also prevents the swapper or other scans from encountering
the page.
-2. We need to have a function of type new_page_t that can be
+2. We need to have a function of type new_folio_t that can be
passed to migrate_pages(). This function should figure out
- how to allocate the correct new page given the old page.
+ how to allocate the correct new folio given the old folio.
3. The migrate_pages() function is called which attempts
to do the migration. It will call the function to allocate
- the new page for each page that is considered for
- moving.
+ the new folio for each folio that is considered for moving.
How migrate_pages() works
=========================
diff --git a/Documentation/mm/page_tables.rst b/Documentation/mm/page_tables.rst
index 96939571d7bc..7840c1891751 100644
--- a/Documentation/mm/page_tables.rst
+++ b/Documentation/mm/page_tables.rst
@@ -3,3 +3,152 @@
===========
Page Tables
===========
+
+Paged virtual memory was invented along with virtual memory as a concept in
+1962 on the Ferranti Atlas Computer which was the first computer with paged
+virtual memory. The feature migrated to newer computers and became a de facto
+feature of all Unix-like systems as time went by. In 1985 the feature was
+included in the Intel 80386, which was the CPU Linux 1.0 was developed on.
+
+Page tables map virtual addresses as seen by the CPU into physical addresses
+as seen on the external memory bus.
+
+Linux defines page tables as a hierarchy which is currently five levels in
+height. The architecture code for each supported architecture will then
+map this to the restrictions of the hardware.
+
+The physical address corresponding to the virtual address is often referenced
+by the underlying physical page frame. The **page frame number** or **pfn**
+is the physical address of the page (as seen on the external memory bus)
+divided by `PAGE_SIZE`.
+
+Physical memory address 0 will be *pfn 0* and the highest pfn will be
+the last page of physical memory the external address bus of the CPU can
+address.
+
+With a page granularity of 4KB and a address range of 32 bits, pfn 0 is at
+address 0x00000000, pfn 1 is at address 0x00001000, pfn 2 is at 0x00002000
+and so on until we reach pfn 0xfffff at 0xfffff000. With 16KB pages pfs are
+at 0x00004000, 0x00008000 ... 0xffffc000 and pfn goes from 0 to 0x3fffff.
+
+As you can see, with 4KB pages the page base address uses bits 12-31 of the
+address, and this is why `PAGE_SHIFT` in this case is defined as 12 and
+`PAGE_SIZE` is usually defined in terms of the page shift as `(1 << PAGE_SHIFT)`
+
+Over time a deeper hierarchy has been developed in response to increasing memory
+sizes. When Linux was created, 4KB pages and a single page table called
+`swapper_pg_dir` with 1024 entries was used, covering 4MB which coincided with
+the fact that Torvald's first computer had 4MB of physical memory. Entries in
+this single table were referred to as *PTE*:s - page table entries.
+
+The software page table hierarchy reflects the fact that page table hardware has
+become hierarchical and that in turn is done to save page table memory and
+speed up mapping.
+
+One could of course imagine a single, linear page table with enormous amounts
+of entries, breaking down the whole memory into single pages. Such a page table
+would be very sparse, because large portions of the virtual memory usually
+remains unused. By using hierarchical page tables large holes in the virtual
+address space does not waste valuable page table memory, because it will suffice
+to mark large areas as unmapped at a higher level in the page table hierarchy.
+
+Additionally, on modern CPUs, a higher level page table entry can point directly
+to a physical memory range, which allows mapping a contiguous range of several
+megabytes or even gigabytes in a single high-level page table entry, taking
+shortcuts in mapping virtual memory to physical memory: there is no need to
+traverse deeper in the hierarchy when you find a large mapped range like this.
+
+The page table hierarchy has now developed into this::
+
+ +-----+
+ | PGD |
+ +-----+
+ |
+ | +-----+
+ +-->| P4D |
+ +-----+
+ |
+ | +-----+
+ +-->| PUD |
+ +-----+
+ |
+ | +-----+
+ +-->| PMD |
+ +-----+
+ |
+ | +-----+
+ +-->| PTE |
+ +-----+
+
+
+Symbols on the different levels of the page table hierarchy have the following
+meaning beginning from the bottom:
+
+- **pte**, `pte_t`, `pteval_t` = **Page Table Entry** - mentioned earlier.
+ The *pte* is an array of `PTRS_PER_PTE` elements of the `pteval_t` type, each
+ mapping a single page of virtual memory to a single page of physical memory.
+ The architecture defines the size and contents of `pteval_t`.
+
+ A typical example is that the `pteval_t` is a 32- or 64-bit value with the
+ upper bits being a **pfn** (page frame number), and the lower bits being some
+ architecture-specific bits such as memory protection.
+
+ The **entry** part of the name is a bit confusing because while in Linux 1.0
+ this did refer to a single page table entry in the single top level page
+ table, it was retrofitted to be an array of mapping elements when two-level
+ page tables were first introduced, so the *pte* is the lowermost page
+ *table*, not a page table *entry*.
+
+- **pmd**, `pmd_t`, `pmdval_t` = **Page Middle Directory**, the hierarchy right
+ above the *pte*, with `PTRS_PER_PMD` references to the *pte*:s.
+
+- **pud**, `pud_t`, `pudval_t` = **Page Upper Directory** was introduced after
+ the other levels to handle 4-level page tables. It is potentially unused,
+ or *folded* as we will discuss later.
+
+- **p4d**, `p4d_t`, `p4dval_t` = **Page Level 4 Directory** was introduced to
+ handle 5-level page tables after the *pud* was introduced. Now it was clear
+ that we needed to replace *pgd*, *pmd*, *pud* etc with a figure indicating the
+ directory level and that we cannot go on with ad hoc names any more. This
+ is only used on systems which actually have 5 levels of page tables, otherwise
+ it is folded.
+
+- **pgd**, `pgd_t`, `pgdval_t` = **Page Global Directory** - the Linux kernel
+ main page table handling the PGD for the kernel memory is still found in
+ `swapper_pg_dir`, but each userspace process in the system also has its own
+ memory context and thus its own *pgd*, found in `struct mm_struct` which
+ in turn is referenced to in each `struct task_struct`. So tasks have memory
+ context in the form of a `struct mm_struct` and this in turn has a
+ `struct pgt_t *pgd` pointer to the corresponding page global directory.
+
+To repeat: each level in the page table hierarchy is a *array of pointers*, so
+the **pgd** contains `PTRS_PER_PGD` pointers to the next level below, **p4d**
+contains `PTRS_PER_P4D` pointers to **pud** items and so on. The number of
+pointers on each level is architecture-defined.::
+
+ PMD
+ --> +-----+ PTE
+ | ptr |-------> +-----+
+ | ptr |- | ptr |-------> PAGE
+ | ptr | \ | ptr |
+ | ptr | \ ...
+ | ... | \
+ | ptr | \ PTE
+ +-----+ +----> +-----+
+ | ptr |-------> PAGE
+ | ptr |
+ ...
+
+
+Page Table Folding
+==================
+
+If the architecture does not use all the page table levels, they can be *folded*
+which means skipped, and all operations performed on page tables will be
+compile-time augmented to just skip a level when accessing the next lower
+level.
+
+Page table handling code that wishes to be architecture-neutral, such as the
+virtual memory manager, will need to be written so that it traverses all of the
+currently five levels. This style should also be preferred for
+architecture-specific code, so as to be robust to future changes.
diff --git a/Documentation/mm/split_page_table_lock.rst b/Documentation/mm/split_page_table_lock.rst
index 50ee0dfc95be..a834fad9de12 100644
--- a/Documentation/mm/split_page_table_lock.rst
+++ b/Documentation/mm/split_page_table_lock.rst
@@ -14,15 +14,20 @@ tables. Access to higher level tables protected by mm->page_table_lock.
There are helpers to lock/unlock a table and other accessor functions:
- pte_offset_map_lock()
- maps pte and takes PTE table lock, returns pointer to the taken
- lock;
+ maps PTE and takes PTE table lock, returns pointer to PTE with
+ pointer to its PTE table lock, or returns NULL if no PTE table;
+ - pte_offset_map_nolock()
+ maps PTE, returns pointer to PTE with pointer to its PTE table
+ lock (not taken), or returns NULL if no PTE table;
+ - pte_offset_map()
+ maps PTE, returns pointer to PTE, or returns NULL if no PTE table;
+ - pte_unmap()
+ unmaps PTE table;
- pte_unmap_unlock()
unlocks and unmaps PTE table;
- pte_alloc_map_lock()
- allocates PTE table if needed and take the lock, returns pointer
- to taken lock or NULL if allocation failed;
- - pte_lockptr()
- returns pointer to PTE table lock;
+ allocates PTE table if needed and takes its lock, returns pointer to
+ PTE with pointer to its lock, or returns NULL if allocation failed;
- pmd_lock()
takes PMD table lock, returns pointer to taken lock;
- pmd_lockptr()
diff --git a/Documentation/netlink/genetlink-c.yaml b/Documentation/netlink/genetlink-c.yaml
index 8e8c17b0a6c6..57d1c1c4918f 100644
--- a/Documentation/netlink/genetlink-c.yaml
+++ b/Documentation/netlink/genetlink-c.yaml
@@ -195,6 +195,16 @@ properties:
description: Max length for a string or a binary attribute.
$ref: '#/$defs/len-or-define'
sub-type: *attr-type
+ display-hint: &display-hint
+ description: |
+ Optional format indicator that is intended only for choosing
+ the right formatting mechanism when displaying values of this
+ type.
+ enum: [ hex, mac, fddi, ipv4, ipv6, uuid ]
+ # Start genetlink-c
+ name-prefix:
+ type: string
+ # End genetlink-c
# Make sure name-prefix does not appear in subsets (subsets inherit naming)
dependencies:
diff --git a/Documentation/netlink/genetlink-legacy.yaml b/Documentation/netlink/genetlink-legacy.yaml
index b33541a51d6b..43b769c98fb2 100644
--- a/Documentation/netlink/genetlink-legacy.yaml
+++ b/Documentation/netlink/genetlink-legacy.yaml
@@ -119,9 +119,24 @@ properties:
name:
type: string
type:
- enum: [ u8, u16, u32, u64, s8, s16, s32, s64, string ]
+ description: The netlink attribute type
+ enum: [ u8, u16, u32, u64, s8, s16, s32, s64, string, binary ]
len:
$ref: '#/$defs/len-or-define'
+ byte-order:
+ enum: [ little-endian, big-endian ]
+ doc:
+ description: Documentation for the struct member attribute.
+ type: string
+ enum:
+ description: Name of the enum type used for the attribute.
+ type: string
+ display-hint: &display-hint
+ description: |
+ Optional format indicator that is intended only for choosing
+ the right formatting mechanism when displaying values of this
+ type.
+ enum: [ hex, mac, fddi, ipv4, ipv6, uuid ]
# End genetlink-legacy
attribute-sets:
@@ -171,6 +186,7 @@ properties:
name:
type: string
type: &attr-type
+ description: The netlink attribute type
enum: [ unused, pad, flag, binary, u8, u16, u32, u64, s32, s64,
string, nest, array-nest, nest-type-value ]
doc:
@@ -218,6 +234,11 @@ properties:
description: Max length for a string or a binary attribute.
$ref: '#/$defs/len-or-define'
sub-type: *attr-type
+ display-hint: *display-hint
+ # Start genetlink-c
+ name-prefix:
+ type: string
+ # End genetlink-c
# Start genetlink-legacy
struct:
description: Name of the struct type used for the attribute.
diff --git a/Documentation/netlink/genetlink.yaml b/Documentation/netlink/genetlink.yaml
index d8b2cdeba058..1cbb448d2f1c 100644
--- a/Documentation/netlink/genetlink.yaml
+++ b/Documentation/netlink/genetlink.yaml
@@ -168,6 +168,12 @@ properties:
description: Max length for a string or a binary attribute.
$ref: '#/$defs/len-or-define'
sub-type: *attr-type
+ display-hint: &display-hint
+ description: |
+ Optional format indicator that is intended only for choosing
+ the right formatting mechanism when displaying values of this
+ type.
+ enum: [ hex, mac, fddi, ipv4, ipv6, uuid ]
# Make sure name-prefix does not appear in subsets (subsets inherit naming)
dependencies:
diff --git a/Documentation/netlink/specs/devlink.yaml b/Documentation/netlink/specs/devlink.yaml
index 90641668232e..5d46ca966979 100644
--- a/Documentation/netlink/specs/devlink.yaml
+++ b/Documentation/netlink/specs/devlink.yaml
@@ -9,6 +9,7 @@ doc: Partial family for Devlink.
attribute-sets:
-
name: devlink
+ name-prefix: devlink-attr-
attributes:
-
name: bus-name
@@ -95,10 +96,12 @@ attribute-sets:
-
name: reload-action-info
type: nest
+ multi-attr: true
nested-attributes: dl-reload-act-info
-
name: reload-action-stats
type: nest
+ multi-attr: true
nested-attributes: dl-reload-act-stats
-
name: dl-dev-stats
@@ -196,3 +199,8 @@ operations:
attributes:
- bus-name
- dev-name
+ - info-driver-name
+ - info-serial-number
+ - info-version-fixed
+ - info-version-running
+ - info-version-stored
diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml
index 4846345bade4..837b565577ca 100644
--- a/Documentation/netlink/specs/ethtool.yaml
+++ b/Documentation/netlink/specs/ethtool.yaml
@@ -9,8 +9,13 @@ doc: Partial family for Ethtool Netlink.
definitions:
-
name: udp-tunnel-type
+ enum-name:
type: enum
entries: [ vxlan, geneve, vxlan-gpe ]
+ -
+ name: stringset
+ type: enum
+ entries: []
attribute-sets:
-
@@ -497,7 +502,7 @@ attribute-sets:
attributes:
-
name: pad
- type: u32
+ type: pad
-
name: tx-frames
type: u64
@@ -577,7 +582,7 @@ attribute-sets:
name: phc-index
type: u32
-
- name: cable-test-ntf-nest-result
+ name: cable-result
attributes:
-
name: pair
@@ -586,7 +591,7 @@ attribute-sets:
name: code
type: u8
-
- name: cable-test-ntf-nest-fault-length
+ name: cable-fault-length
attributes:
-
name: pair
@@ -595,16 +600,16 @@ attribute-sets:
name: cm
type: u32
-
- name: cable-test-ntf-nest
+ name: cable-nest
attributes:
-
name: result
type: nest
- nested-attributes: cable-test-ntf-nest-result
+ nested-attributes: cable-result
-
name: fault-length
type: nest
- nested-attributes: cable-test-ntf-nest-fault-length
+ nested-attributes: cable-fault-length
-
name: cable-test
attributes:
@@ -612,13 +617,20 @@ attribute-sets:
name: header
type: nest
nested-attributes: header
+ -
+ name: cable-test-ntf
+ attributes:
+ -
+ name: header
+ type: nest
+ nested-attributes: header
-
name: status
type: u8
-
name: nest
type: nest
- nested-attributes: cable-test-ntf-nest
+ nested-attributes: cable-nest
-
name: cable-test-tdr-cfg
attributes:
@@ -632,9 +644,23 @@ attribute-sets:
name: step
type: u32
-
- name: pari
+ name: pair
type: u8
-
+ name: cable-test-tdr-ntf
+ attributes:
+ -
+ name: header
+ type: nest
+ nested-attributes: header
+ -
+ name: status
+ type: u8
+ -
+ name: nest
+ type: nest
+ nested-attributes: cable-nest
+ -
name: cable-test-tdr
attributes:
-
@@ -646,7 +672,7 @@ attribute-sets:
type: nest
nested-attributes: cable-test-tdr-cfg
-
- name: tunnel-info-udp-entry
+ name: tunnel-udp-entry
attributes:
-
name: port
@@ -657,7 +683,7 @@ attribute-sets:
type: u32
enum: udp-tunnel-type
-
- name: tunnel-info-udp-table
+ name: tunnel-udp-table
attributes:
-
name: size
@@ -667,9 +693,17 @@ attribute-sets:
type: nest
nested-attributes: bitset
-
- name: udp-ports
+ name: entry
type: nest
- nested-attributes: tunnel-info-udp-entry
+ multi-attr: true
+ nested-attributes: tunnel-udp-entry
+ -
+ name: tunnel-udp
+ attributes:
+ -
+ name: table
+ type: nest
+ nested-attributes: tunnel-udp-table
-
name: tunnel-info
attributes:
@@ -680,13 +714,13 @@ attribute-sets:
-
name: udp-ports
type: nest
- nested-attributes: tunnel-info-udp-table
+ nested-attributes: tunnel-udp
-
name: fec-stat
attributes:
-
name: pad
- type: u8
+ type: pad
-
name: corrected
type: binary
@@ -750,7 +784,7 @@ attribute-sets:
attributes:
-
name: pad
- type: u32
+ type: pad
-
name: id
type: u32
@@ -759,16 +793,29 @@ attribute-sets:
type: u32
-
name: stat
- type: nest
- nested-attributes: u64
+ type: u64
+ type-value: [ id ]
-
name: hist-rx
type: nest
- nested-attributes: u64
+ nested-attributes: stats-grp-hist
-
name: hist-tx
type: nest
- nested-attributes: u64
+ nested-attributes: stats-grp-hist
+ -
+ name: hist-bkt-low
+ type: u32
+ -
+ name: hist-bkt-hi
+ type: u32
+ -
+ name: hist-val
+ type: u64
+ -
+ name: stats-grp-hist
+ subset-of: stats-grp
+ attributes:
-
name: hist-bkt-low
type: u32
@@ -783,7 +830,7 @@ attribute-sets:
attributes:
-
name: pad
- type: u32
+ type: pad
-
name: header
type: nest
@@ -836,12 +883,15 @@ attribute-sets:
-
name: admin-state
type: u32
+ name-prefix: ethtool-a-podl-pse-
-
name: admin-control
type: u32
+ name-prefix: ethtool-a-podl-pse-
-
name: pw-d-status
type: u32
+ name-prefix: ethtool-a-podl-pse-
-
name: rss
attributes:
@@ -895,6 +945,7 @@ attribute-sets:
operations:
enum-model: directional
+ name-prefix: ethtool-msg-
list:
-
name: strset-get
@@ -1348,10 +1399,16 @@ operations:
request:
attributes:
- header
- reply:
- attributes:
- - header
- - cable-test-ntf-nest
+ -
+ name: cable-test-ntf
+ doc: Cable test notification.
+
+ attribute-set: cable-test-ntf
+
+ event:
+ attributes:
+ - header
+ - status
-
name: cable-test-tdr-act
doc: Cable test TDR.
@@ -1362,10 +1419,17 @@ operations:
request:
attributes:
- header
- reply:
- attributes:
- - header
- - cable-test-tdr-cfg
+ -
+ name: cable-test-tdr-ntf
+ doc: Cable test TDR notification.
+
+ attribute-set: cable-test-tdr-ntf
+
+ event:
+ attributes:
+ - header
+ - status
+ - nest
-
name: tunnel-info-get
doc: Get tsinfo params.
diff --git a/Documentation/netlink/specs/ovs_datapath.yaml b/Documentation/netlink/specs/ovs_datapath.yaml
index 6d71db8c4416..f709c26c3e92 100644
--- a/Documentation/netlink/specs/ovs_datapath.yaml
+++ b/Documentation/netlink/specs/ovs_datapath.yaml
@@ -3,6 +3,7 @@
name: ovs_datapath
version: 2
protocol: genetlink-legacy
+uapi-header: linux/openvswitch.h
doc:
OVS datapath configuration over generic netlink.
@@ -18,6 +19,7 @@ definitions:
-
name: user-features
type: flags
+ name-prefix: ovs-dp-f-
entries:
-
name: unaligned
@@ -33,35 +35,37 @@ definitions:
doc: Allow per-cpu dispatch of upcalls
-
name: datapath-stats
+ enum-name: ovs-dp-stats
type: struct
members:
-
- name: hit
+ name: n-hit
type: u64
-
- name: missed
+ name: n-missed
type: u64
-
- name: lost
+ name: n-lost
type: u64
-
- name: flows
+ name: n-flows
type: u64
-
name: megaflow-stats
+ enum-name: ovs-dp-megaflow-stats
type: struct
members:
-
- name: mask-hit
+ name: n-mask-hit
type: u64
-
- name: masks
+ name: n-masks
type: u32
-
name: padding
type: u32
-
- name: cache-hits
+ name: n-cache-hit
type: u64
-
name: pad1
@@ -70,6 +74,8 @@ definitions:
attribute-sets:
-
name: datapath
+ name-prefix: ovs-dp-attr-
+ enum-name: ovs-datapath-attrs
attributes:
-
name: name
@@ -101,12 +107,16 @@ attribute-sets:
name: per-cpu-pids
type: binary
sub-type: u32
+ -
+ name: ifindex
+ type: u32
operations:
fixed-header: ovs-header
+ name-prefix: ovs-dp-cmd-
list:
-
- name: dp-get
+ name: get
doc: Get / dump OVS data path configuration and state
value: 3
attribute-set: datapath
@@ -125,7 +135,7 @@ operations:
- per-cpu-pids
dump: *dp-get-op
-
- name: dp-new
+ name: new
doc: Create new OVS data path
value: 1
attribute-set: datapath
@@ -137,7 +147,7 @@ operations:
- upcall-pid
- user-features
-
- name: dp-del
+ name: del
doc: Delete existing OVS data path
value: 2
attribute-set: datapath
diff --git a/Documentation/netlink/specs/ovs_flow.yaml b/Documentation/netlink/specs/ovs_flow.yaml
new file mode 100644
index 000000000000..109ca1f57b6c
--- /dev/null
+++ b/Documentation/netlink/specs/ovs_flow.yaml
@@ -0,0 +1,980 @@
+# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+
+name: ovs_flow
+version: 1
+protocol: genetlink-legacy
+uapi-header: linux/openvswitch.h
+
+doc:
+ OVS flow configuration over generic netlink.
+
+definitions:
+ -
+ name: ovs-header
+ type: struct
+ doc: |
+ Header for OVS Generic Netlink messages.
+ members:
+ -
+ name: dp-ifindex
+ type: u32
+ doc: |
+ ifindex of local port for datapath (0 to make a request not specific
+ to a datapath).
+ -
+ name: ovs-flow-stats
+ type: struct
+ members:
+ -
+ name: n-packets
+ type: u64
+ doc: Number of matched packets.
+ -
+ name: n-bytes
+ type: u64
+ doc: Number of matched bytes.
+ -
+ name: ovs-key-ethernet
+ type: struct
+ members:
+ -
+ name: eth-src
+ type: binary
+ len: 6
+ display-hint: mac
+ -
+ name: eth-dst
+ type: binary
+ len: 6
+ display-hint: mac
+ -
+ name: ovs-key-mpls
+ type: struct
+ members:
+ -
+ name: mpls-lse
+ type: u32
+ byte-order: big-endian
+ -
+ name: ovs-key-ipv4
+ type: struct
+ members:
+ -
+ name: ipv4-src
+ type: u32
+ byte-order: big-endian
+ display-hint: ipv4
+ -
+ name: ipv4-dst
+ type: u32
+ byte-order: big-endian
+ display-hint: ipv4
+ -
+ name: ipv4-proto
+ type: u8
+ -
+ name: ipv4-tos
+ type: u8
+ -
+ name: ipv4-ttl
+ type: u8
+ -
+ name: ipv4-frag
+ type: u8
+ enum: ovs-frag-type
+ -
+ name: ovs-key-ipv6
+ type: struct
+ members:
+ -
+ name: ipv6-src
+ type: binary
+ len: 16
+ byte-order: big-endian
+ display-hint: ipv6
+ -
+ name: ipv6-dst
+ type: binary
+ len: 16
+ byte-order: big-endian
+ display-hint: ipv6
+ -
+ name: ipv6-label
+ type: u32
+ byte-order: big-endian
+ -
+ name: ipv6-proto
+ type: u8
+ -
+ name: ipv6-tclass
+ type: u8
+ -
+ name: ipv6-hlimit
+ type: u8
+ -
+ name: ipv6-frag
+ type: u8
+ -
+ name: ovs-key-ipv6-exthdrs
+ type: struct
+ members:
+ -
+ name: hdrs
+ type: u16
+ -
+ name: ovs-frag-type
+ name-prefix: ovs-frag-type-
+ type: enum
+ entries:
+ -
+ name: none
+ doc: Packet is not a fragment.
+ -
+ name: first
+ doc: Packet is a fragment with offset 0.
+ -
+ name: later
+ doc: Packet is a fragment with nonzero offset.
+ -
+ name: any
+ value: 255
+ -
+ name: ovs-key-tcp
+ type: struct
+ members:
+ -
+ name: tcp-src
+ type: u16
+ byte-order: big-endian
+ -
+ name: tcp-dst
+ type: u16
+ byte-order: big-endian
+ -
+ name: ovs-key-udp
+ type: struct
+ members:
+ -
+ name: udp-src
+ type: u16
+ byte-order: big-endian
+ -
+ name: udp-dst
+ type: u16
+ byte-order: big-endian
+ -
+ name: ovs-key-sctp
+ type: struct
+ members:
+ -
+ name: sctp-src
+ type: u16
+ byte-order: big-endian
+ -
+ name: sctp-dst
+ type: u16
+ byte-order: big-endian
+ -
+ name: ovs-key-icmp
+ type: struct
+ members:
+ -
+ name: icmp-type
+ type: u8
+ -
+ name: icmp-code
+ type: u8
+ -
+ name: ovs-key-arp
+ type: struct
+ members:
+ -
+ name: arp-sip
+ type: u32
+ byte-order: big-endian
+ -
+ name: arp-tip
+ type: u32
+ byte-order: big-endian
+ -
+ name: arp-op
+ type: u16
+ byte-order: big-endian
+ -
+ name: arp-sha
+ type: binary
+ len: 6
+ display-hint: mac
+ -
+ name: arp-tha
+ type: binary
+ len: 6
+ display-hint: mac
+ -
+ name: ovs-key-nd
+ type: struct
+ members:
+ -
+ name: nd_target
+ type: binary
+ len: 16
+ byte-order: big-endian
+ -
+ name: nd-sll
+ type: binary
+ len: 6
+ display-hint: mac
+ -
+ name: nd-tll
+ type: binary
+ len: 6
+ display-hint: mac
+ -
+ name: ovs-key-ct-tuple-ipv4
+ type: struct
+ members:
+ -
+ name: ipv4-src
+ type: u32
+ byte-order: big-endian
+ -
+ name: ipv4-dst
+ type: u32
+ byte-order: big-endian
+ -
+ name: src-port
+ type: u16
+ byte-order: big-endian
+ -
+ name: dst-port
+ type: u16
+ byte-order: big-endian
+ -
+ name: ipv4-proto
+ type: u8
+ -
+ name: ovs-action-push-vlan
+ type: struct
+ members:
+ -
+ name: vlan_tpid
+ type: u16
+ byte-order: big-endian
+ doc: Tag protocol identifier (TPID) to push.
+ -
+ name: vlan_tci
+ type: u16
+ byte-order: big-endian
+ doc: Tag control identifier (TCI) to push.
+ -
+ name: ovs-ufid-flags
+ name-prefix: ovs-ufid-f-
+ type: flags
+ entries:
+ - omit-key
+ - omit-mask
+ - omit-actions
+ -
+ name: ovs-action-hash
+ type: struct
+ members:
+ -
+ name: hash-alg
+ type: u32
+ doc: Algorithm used to compute hash prior to recirculation.
+ -
+ name: hash-basis
+ type: u32
+ doc: Basis used for computing hash.
+ -
+ name: ovs-hash-alg
+ type: enum
+ doc: |
+ Data path hash algorithm for computing Datapath hash. The algorithm type only specifies
+ the fields in a flow will be used as part of the hash. Each datapath is free to use its
+ own hash algorithm. The hash value will be opaque to the user space daemon.
+ entries:
+ - ovs-hash-alg-l4
+
+ -
+ name: ovs-action-push-mpls
+ type: struct
+ members:
+ -
+ name: mpls-lse
+ type: u32
+ byte-order: big-endian
+ doc: |
+ MPLS label stack entry to push
+ -
+ name: mpls-ethertype
+ type: u32
+ byte-order: big-endian
+ doc: |
+ Ethertype to set in the encapsulating ethernet frame. The only values
+ ethertype should ever be given are ETH_P_MPLS_UC and ETH_P_MPLS_MC,
+ indicating MPLS unicast or multicast. Other are rejected.
+ -
+ name: ovs-action-add-mpls
+ type: struct
+ members:
+ -
+ name: mpls-lse
+ type: u32
+ byte-order: big-endian
+ doc: |
+ MPLS label stack entry to push
+ -
+ name: mpls-ethertype
+ type: u32
+ byte-order: big-endian
+ doc: |
+ Ethertype to set in the encapsulating ethernet frame. The only values
+ ethertype should ever be given are ETH_P_MPLS_UC and ETH_P_MPLS_MC,
+ indicating MPLS unicast or multicast. Other are rejected.
+ -
+ name: tun-flags
+ type: u16
+ doc: |
+ MPLS tunnel attributes.
+ -
+ name: ct-state-flags
+ type: flags
+ name-prefix: ovs-cs-f-
+ entries:
+ -
+ name: new
+ doc: Beginning of a new connection.
+ -
+ name: established
+ doc: Part of an existing connenction
+ -
+ name: related
+ doc: Related to an existing connection.
+ -
+ name: reply-dir
+ doc: Flow is in the reply direction.
+ -
+ name: invalid
+ doc: Could not track the connection.
+ -
+ name: tracked
+ doc: Conntrack has occurred.
+ -
+ name: src-nat
+ doc: Packet's source address/port was mangled by NAT.
+ -
+ name: dst-nat
+ doc: Packet's destination address/port was mangled by NAT.
+
+attribute-sets:
+ -
+ name: flow-attrs
+ enum-name: ovs-flow-attr
+ name-prefix: ovs-flow-attr-
+ attributes:
+ -
+ name: key
+ type: nest
+ nested-attributes: key-attrs
+ doc: |
+ Nested attributes specifying the flow key. Always present in
+ notifications. Required for all requests (except dumps).
+ -
+ name: actions
+ type: nest
+ nested-attributes: action-attrs
+ doc: |
+ Nested attributes specifying the actions to take for packets that
+ match the key. Always present in notifications. Required for
+ OVS_FLOW_CMD_NEW requests, optional for OVS_FLOW_CMD_SET requests. An
+ OVS_FLOW_CMD_SET without OVS_FLOW_ATTR_ACTIONS will not modify the
+ actions. To clear the actions, an OVS_FLOW_ATTR_ACTIONS without any
+ nested attributes must be given.
+ -
+ name: stats
+ type: binary
+ struct: ovs-flow-stats
+ doc: |
+ Statistics for this flow. Present in notifications if the stats would
+ be nonzero. Ignored in requests.
+ -
+ name: tcp-flags
+ type: u8
+ doc: |
+ An 8-bit value giving the ORed value of all of the TCP flags seen on
+ packets in this flow. Only present in notifications for TCP flows, and
+ only if it would be nonzero. Ignored in requests.
+ -
+ name: used
+ type: u64
+ doc: |
+ A 64-bit integer giving the time, in milliseconds on the system
+ monotonic clock, at which a packet was last processed for this
+ flow. Only present in notifications if a packet has been processed for
+ this flow. Ignored in requests.
+ -
+ name: clear
+ type: flag
+ doc: |
+ If present in a OVS_FLOW_CMD_SET request, clears the last-used time,
+ accumulated TCP flags, and statistics for this flow. Otherwise
+ ignored in requests. Never present in notifications.
+ -
+ name: mask
+ type: nest
+ nested-attributes: key-attrs
+ doc: |
+ Nested attributes specifying the mask bits for wildcarded flow
+ match. Mask bit value '1' specifies exact match with corresponding
+ flow key bit, while mask bit value '0' specifies a wildcarded
+ match. Omitting attribute is treated as wildcarding all corresponding
+ fields. Optional for all requests. If not present, all flow key bits
+ are exact match bits.
+ -
+ name: probe
+ type: binary
+ doc: |
+ Flow operation is a feature probe, error logging should be suppressed.
+ -
+ name: ufid
+ type: binary
+ doc: |
+ A value between 1-16 octets specifying a unique identifier for the
+ flow. Causes the flow to be indexed by this value rather than the
+ value of the OVS_FLOW_ATTR_KEY attribute. Optional for all
+ requests. Present in notifications if the flow was created with this
+ attribute.
+ display-hint: uuid
+ -
+ name: ufid-flags
+ type: u32
+ enum: ovs-ufid-flags
+ doc: |
+ A 32-bit value of ORed flags that provide alternative semantics for
+ flow installation and retrieval. Optional for all requests.
+ -
+ name: pad
+ type: binary
+
+ -
+ name: key-attrs
+ enum-name: ovs-key-attr
+ name-prefix: ovs-key-attr-
+ attributes:
+ -
+ name: encap
+ type: nest
+ nested-attributes: key-attrs
+ -
+ name: priority
+ type: u32
+ -
+ name: in-port
+ type: u32
+ -
+ name: ethernet
+ type: binary
+ struct: ovs-key-ethernet
+ doc: struct ovs_key_ethernet
+ -
+ name: vlan
+ type: u16
+ byte-order: big-endian
+ -
+ name: ethertype
+ type: u16
+ byte-order: big-endian
+ -
+ name: ipv4
+ type: binary
+ struct: ovs-key-ipv4
+ -
+ name: ipv6
+ type: binary
+ struct: ovs-key-ipv6
+ doc: struct ovs_key_ipv6
+ -
+ name: tcp
+ type: binary
+ struct: ovs-key-tcp
+ -
+ name: udp
+ type: binary
+ struct: ovs-key-udp
+ -
+ name: icmp
+ type: binary
+ struct: ovs-key-icmp
+ -
+ name: icmpv6
+ type: binary
+ struct: ovs-key-icmp
+ -
+ name: arp
+ type: binary
+ struct: ovs-key-arp
+ doc: struct ovs_key_arp
+ -
+ name: nd
+ type: binary
+ struct: ovs-key-nd
+ doc: struct ovs_key_nd
+ -
+ name: skb-mark
+ type: u32
+ -
+ name: tunnel
+ type: nest
+ nested-attributes: tunnel-key-attrs
+ -
+ name: sctp
+ type: binary
+ struct: ovs-key-sctp
+ -
+ name: tcp-flags
+ type: u16
+ byte-order: big-endian
+ -
+ name: dp-hash
+ type: u32
+ doc: Value 0 indicates the hash is not computed by the datapath.
+ -
+ name: recirc-id
+ type: u32
+ -
+ name: mpls
+ type: binary
+ struct: ovs-key-mpls
+ -
+ name: ct-state
+ type: u32
+ enum: ct-state-flags
+ enum-as-flags: true
+ -
+ name: ct-zone
+ type: u16
+ doc: connection tracking zone
+ -
+ name: ct-mark
+ type: u32
+ doc: connection tracking mark
+ -
+ name: ct-labels
+ type: binary
+ display-hint: hex
+ doc: 16-octet connection tracking label
+ -
+ name: ct-orig-tuple-ipv4
+ type: binary
+ struct: ovs-key-ct-tuple-ipv4
+ -
+ name: ct-orig-tuple-ipv6
+ type: binary
+ doc: struct ovs_key_ct_tuple_ipv6
+ -
+ name: nsh
+ type: nest
+ nested-attributes: ovs-nsh-key-attrs
+ -
+ name: packet-type
+ type: u32
+ byte-order: big-endian
+ doc: Should not be sent to the kernel
+ -
+ name: nd-extensions
+ type: binary
+ doc: Should not be sent to the kernel
+ -
+ name: tunnel-info
+ type: binary
+ doc: struct ip_tunnel_info
+ -
+ name: ipv6-exthdrs
+ type: binary
+ struct: ovs-key-ipv6-exthdrs
+ doc: struct ovs_key_ipv6_exthdr
+ -
+ name: action-attrs
+ enum-name: ovs-action-attr
+ name-prefix: ovs-action-attr-
+ attributes:
+ -
+ name: output
+ type: u32
+ doc: ovs port number in datapath
+ -
+ name: userspace
+ type: nest
+ nested-attributes: userspace-attrs
+ -
+ name: set
+ type: nest
+ nested-attributes: key-attrs
+ doc: Replaces the contents of an existing header. The single nested attribute specifies a header to modify and its value.
+ -
+ name: push-vlan
+ type: binary
+ struct: ovs-action-push-vlan
+ doc: Push a new outermost 802.1Q or 802.1ad header onto the packet.
+ -
+ name: pop-vlan
+ type: flag
+ doc: Pop the outermost 802.1Q or 802.1ad header from the packet.
+ -
+ name: sample
+ type: nest
+ nested-attributes: sample-attrs
+ doc: |
+ Probabilistically executes actions, as specified in the nested attributes.
+ -
+ name: recirc
+ type: u32
+ doc: recirc id
+ -
+ name: hash
+ type: binary
+ struct: ovs-action-hash
+ -
+ name: push-mpls
+ type: binary
+ struct: ovs-action-push-mpls
+ doc: |
+ Push a new MPLS label stack entry onto the top of the packets MPLS
+ label stack. Set the ethertype of the encapsulating frame to either
+ ETH_P_MPLS_UC or ETH_P_MPLS_MC to indicate the new packet contents.
+ -
+ name: pop-mpls
+ type: u16
+ byte-order: big-endian
+ doc: ethertype
+ -
+ name: set-masked
+ type: nest
+ nested-attributes: key-attrs
+ doc: |
+ Replaces the contents of an existing header. A nested attribute
+ specifies a header to modify, its value, and a mask. For every bit set
+ in the mask, the corresponding bit value is copied from the value to
+ the packet header field, rest of the bits are left unchanged. The
+ non-masked value bits must be passed in as zeroes. Masking is not
+ supported for the OVS_KEY_ATTR_TUNNEL attribute.
+ -
+ name: ct
+ type: nest
+ nested-attributes: ct-attrs
+ doc: |
+ Track the connection. Populate the conntrack-related entries
+ in the flow key.
+ -
+ name: trunc
+ type: u32
+ doc: struct ovs_action_trunc is a u32 max length
+ -
+ name: push-eth
+ type: binary
+ doc: struct ovs_action_push_eth
+ -
+ name: pop-eth
+ type: flag
+ -
+ name: ct-clear
+ type: flag
+ -
+ name: push-nsh
+ type: nest
+ nested-attributes: ovs-nsh-key-attrs
+ doc: |
+ Push NSH header to the packet.
+ -
+ name: pop-nsh
+ type: flag
+ doc: |
+ Pop the outermost NSH header off the packet.
+ -
+ name: meter
+ type: u32
+ doc: |
+ Run packet through a meter, which may drop the packet, or modify the
+ packet (e.g., change the DSCP field)
+ -
+ name: clone
+ type: nest
+ nested-attributes: action-attrs
+ doc: |
+ Make a copy of the packet and execute a list of actions without
+ affecting the original packet and key.
+ -
+ name: check-pkt-len
+ type: nest
+ nested-attributes: check-pkt-len-attrs
+ doc: |
+ Check the packet length and execute a set of actions if greater than
+ the specified packet length, else execute another set of actions.
+ -
+ name: add-mpls
+ type: binary
+ struct: ovs-action-add-mpls
+ doc: |
+ Push a new MPLS label stack entry at the start of the packet or at the
+ start of the l3 header depending on the value of l3 tunnel flag in the
+ tun_flags field of this OVS_ACTION_ATTR_ADD_MPLS argument.
+ -
+ name: dec-ttl
+ type: nest
+ nested-attributes: dec-ttl-attrs
+ -
+ name: tunnel-key-attrs
+ enum-name: ovs-tunnel-key-attr
+ name-prefix: ovs-tunnel-key-attr-
+ attributes:
+ -
+ name: id
+ type: u64
+ byte-order: big-endian
+ value: 0
+ -
+ name: ipv4-src
+ type: u32
+ byte-order: big-endian
+ -
+ name: ipv4-dst
+ type: u32
+ byte-order: big-endian
+ -
+ name: tos
+ type: u8
+ -
+ name: ttl
+ type: u8
+ -
+ name: dont-fragment
+ type: flag
+ -
+ name: csum
+ type: flag
+ -
+ name: oam
+ type: flag
+ -
+ name: geneve-opts
+ type: binary
+ sub-type: u32
+ -
+ name: tp-src
+ type: u16
+ byte-order: big-endian
+ -
+ name: tp-dst
+ type: u16
+ byte-order: big-endian
+ -
+ name: vxlan-opts
+ type: nest
+ nested-attributes: vxlan-ext-attrs
+ -
+ name: ipv6-src
+ type: binary
+ doc: |
+ struct in6_addr source IPv6 address
+ -
+ name: ipv6-dst
+ type: binary
+ doc: |
+ struct in6_addr destination IPv6 address
+ -
+ name: pad
+ type: binary
+ -
+ name: erspan-opts
+ type: binary
+ doc: |
+ struct erspan_metadata
+ -
+ name: ipv4-info-bridge
+ type: flag
+ -
+ name: check-pkt-len-attrs
+ enum-name: ovs-check-pkt-len-attr
+ name-prefix: ovs-check-pkt-len-attr-
+ attributes:
+ -
+ name: pkt-len
+ type: u16
+ -
+ name: actions-if-greater
+ type: nest
+ nested-attributes: action-attrs
+ -
+ name: actions-if-less-equal
+ type: nest
+ nested-attributes: action-attrs
+ -
+ name: sample-attrs
+ enum-name: ovs-sample-attr
+ name-prefix: ovs-sample-attr-
+ attributes:
+ -
+ name: probability
+ type: u32
+ -
+ name: actions
+ type: nest
+ nested-attributes: action-attrs
+ -
+ name: userspace-attrs
+ enum-name: ovs-userspace-attr
+ name-prefix: ovs-userspace-attr-
+ attributes:
+ -
+ name: pid
+ type: u32
+ -
+ name: userdata
+ type: binary
+ -
+ name: egress-tun-port
+ type: u32
+ -
+ name: actions
+ type: flag
+ -
+ name: ovs-nsh-key-attrs
+ enum-name: ovs-nsh-key-attr
+ name-prefix: ovs-nsh-key-attr-
+ attributes:
+ -
+ name: base
+ type: binary
+ -
+ name: md1
+ type: binary
+ -
+ name: md2
+ type: binary
+ -
+ name: ct-attrs
+ enum-name: ovs-ct-attr
+ name-prefix: ovs-ct-attr-
+ attributes:
+ -
+ name: commit
+ type: flag
+ -
+ name: zone
+ type: u16
+ -
+ name: mark
+ type: binary
+ -
+ name: labels
+ type: binary
+ -
+ name: helper
+ type: string
+ -
+ name: nat
+ type: nest
+ nested-attributes: nat-attrs
+ -
+ name: force-commit
+ type: flag
+ -
+ name: eventmask
+ type: u32
+ -
+ name: timeout
+ type: string
+ -
+ name: nat-attrs
+ enum-name: ovs-nat-attr
+ name-prefix: ovs-nat-attr-
+ attributes:
+ -
+ name: src
+ type: flag
+ -
+ name: dst
+ type: flag
+ -
+ name: ip-min
+ type: binary
+ -
+ name: ip-max
+ type: binary
+ -
+ name: proto-min
+ type: u16
+ -
+ name: proto-max
+ type: u16
+ -
+ name: persistent
+ type: flag
+ -
+ name: proto-hash
+ type: flag
+ -
+ name: proto-random
+ type: flag
+ -
+ name: dec-ttl-attrs
+ enum-name: ovs-dec-ttl-attr
+ name-prefix: ovs-dec-ttl-attr-
+ attributes:
+ -
+ name: action
+ type: nest
+ nested-attributes: action-attrs
+ -
+ name: vxlan-ext-attrs
+ enum-name: ovs-vxlan-ext-
+ name-prefix: ovs-vxlan-ext-
+ attributes:
+ -
+ name: gbp
+ type: u32
+
+operations:
+ name-prefix: ovs-flow-cmd-
+ fixed-header: ovs-header
+ list:
+ -
+ name: get
+ doc: Get / dump OVS flow configuration and state
+ value: 3
+ attribute-set: flow-attrs
+ do: &flow-get-op
+ request:
+ attributes:
+ - dp-ifindex
+ - key
+ - ufid
+ - ufid-flags
+ reply:
+ attributes:
+ - dp-ifindex
+ - key
+ - ufid
+ - mask
+ - stats
+ - actions
+ dump: *flow-get-op
+ -
+ name: new
+ doc: Create OVS flow configuration in a data path
+ value: 1
+ attribute-set: flow-attrs
+ do:
+ request:
+ attributes:
+ - dp-ifindex
+ - key
+ - ufid
+ - mask
+ - actions
+
+mcast-groups:
+ list:
+ -
+ name: ovs_flow
diff --git a/Documentation/netlink/specs/ovs_vport.yaml b/Documentation/netlink/specs/ovs_vport.yaml
index 8e55622ddf11..17336455bec1 100644
--- a/Documentation/netlink/specs/ovs_vport.yaml
+++ b/Documentation/netlink/specs/ovs_vport.yaml
@@ -3,6 +3,7 @@
name: ovs_vport
version: 2
protocol: genetlink-legacy
+uapi-header: linux/openvswitch.h
doc:
OVS vport configuration over generic netlink.
@@ -18,10 +19,13 @@ definitions:
-
name: vport-type
type: enum
+ enum-name: ovs-vport-type
+ name-prefix: ovs-vport-type-
entries: [ unspec, netdev, internal, gre, vxlan, geneve ]
-
name: vport-stats
type: struct
+ enum-name: ovs-vport-stats
members:
-
name: rx-packets
@@ -51,6 +55,8 @@ definitions:
attribute-sets:
-
name: vport-options
+ enum-name: ovs-vport-options
+ name-prefix: ovs-tunnel-attr-
attributes:
-
name: dst-port
@@ -60,6 +66,8 @@ attribute-sets:
type: u32
-
name: upcall-stats
+ enum-name: ovs-vport-upcall-attr
+ name-prefix: ovs-vport-upcall-attr-
attributes:
-
name: success
@@ -70,6 +78,8 @@ attribute-sets:
type: u64
-
name: vport
+ name-prefix: ovs-vport-attr-
+ enum-name: ovs-vport-attr
attributes:
-
name: port-no
@@ -108,9 +118,10 @@ attribute-sets:
nested-attributes: upcall-stats
operations:
+ name-prefix: ovs-vport-cmd-
list:
-
- name: vport-get
+ name: get
doc: Get / dump OVS vport configuration and state
value: 3
attribute-set: vport
diff --git a/Documentation/networking/device_drivers/ethernet/amazon/ena.rst b/Documentation/networking/device_drivers/ethernet/amazon/ena.rst
index 8bcb173e0353..5eaa3ab6c73e 100644
--- a/Documentation/networking/device_drivers/ethernet/amazon/ena.rst
+++ b/Documentation/networking/device_drivers/ethernet/amazon/ena.rst
@@ -38,6 +38,7 @@ debug logs.
Some of the ENA devices support a working mode called Low-latency
Queue (LLQ), which saves several more microseconds.
+
ENA Source Code Directory Structure
===================================
@@ -205,6 +206,8 @@ Adaptive coalescing can be switched on/off through `ethtool(8)`'s
More information about Adaptive Interrupt Moderation (DIM) can be found in
Documentation/networking/net_dim.rst
+.. _`RX copybreak`:
+
RX copybreak
============
The rx_copybreak is initialized by default to ENA_DEFAULT_RX_COPYBREAK
@@ -315,3 +318,34 @@ Rx
- The new SKB is updated with the necessary information (protocol,
checksum hw verify result, etc), and then passed to the network
stack, using the NAPI interface function :code:`napi_gro_receive()`.
+
+Dynamic RX Buffers (DRB)
+------------------------
+
+Each RX descriptor in the RX ring is a single memory page (which is either 4KB
+or 16KB long depending on system's configurations).
+To reduce the memory allocations required when dealing with a high rate of small
+packets, the driver tries to reuse the remaining RX descriptor's space if more
+than 2KB of this page remain unused.
+
+A simple example of this mechanism is the following sequence of events:
+
+::
+
+ 1. Driver allocates page-sized RX buffer and passes it to hardware
+ +----------------------+
+ |4KB RX Buffer |
+ +----------------------+
+
+ 2. A 300Bytes packet is received on this buffer
+
+ 3. The driver increases the ref count on this page and returns it back to
+ HW as an RX buffer of size 4KB - 300Bytes = 3796 Bytes
+ +----+--------------------+
+ |****|3796 Bytes RX Buffer|
+ +----+--------------------+
+
+This mechanism isn't used when an XDP program is loaded, or when the
+RX packet is less than rx_copybreak bytes (in which case the packet is
+copied out of the RX buffer into the linear part of a new skb allocated
+for it and the RX buffer remains the same size, see `RX copybreak`_).
diff --git a/Documentation/networking/device_drivers/ethernet/intel/ice.rst b/Documentation/networking/device_drivers/ethernet/intel/ice.rst
index 69695e5511f4..e4d065c55ea8 100644
--- a/Documentation/networking/device_drivers/ethernet/intel/ice.rst
+++ b/Documentation/networking/device_drivers/ethernet/intel/ice.rst
@@ -84,24 +84,6 @@ Once the VM shuts down, or otherwise releases the VF, the command will
complete.
-Important notes for SR-IOV and Link Aggregation
------------------------------------------------
-Link Aggregation is mutually exclusive with SR-IOV.
-
-- If Link Aggregation is active, SR-IOV VFs cannot be created on the PF.
-- If SR-IOV is active, you cannot set up Link Aggregation on the interface.
-
-Bridging and MACVLAN are also affected by this. If you wish to use bridging or
-MACVLAN with SR-IOV, you must set up bridging or MACVLAN before enabling
-SR-IOV. If you are using bridging or MACVLAN in conjunction with SR-IOV, and
-you want to remove the interface from the bridge or MACVLAN, you must follow
-these steps:
-
-1. Destroy SR-IOV VFs if they exist
-2. Remove the interface from the bridge or MACVLAN
-3. Recreate SRIOV VFs as needed
-
-
Additional Features and Configurations
======================================
diff --git a/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst b/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst
index 5ba9015336e2..bfd233cfac35 100644
--- a/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst
+++ b/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst
@@ -13,6 +13,7 @@ Contents
- `Drivers`_
- `Basic packet flow`_
- `Devlink health reporters`_
+- `Quality of service`_
Overview
========
@@ -287,3 +288,47 @@ For example::
NIX_AF_ERR:
NIX Error Interrupt Reg : 64
Rx on unmapped PF_FUNC
+
+
+Quality of service
+==================
+
+
+Hardware algorithms used in scheduling
+--------------------------------------
+
+octeontx2 silicon and CN10K transmit interface consists of five transmit levels
+starting from SMQ/MDQ, TL4 to TL1. Each packet will traverse MDQ, TL4 to TL1
+levels. Each level contains an array of queues to support scheduling and shaping.
+The hardware uses the below algorithms depending on the priority of scheduler queues.
+once the usercreates tc classes with different priorities, the driver configures
+schedulers allocated to the class with specified priority along with rate-limiting
+configuration.
+
+1. Strict Priority
+
+ - Once packets are submitted to MDQ, hardware picks all active MDQs having different priority
+ using strict priority.
+
+2. Round Robin
+
+ - Active MDQs having the same priority level are chosen using round robin.
+
+
+Setup HTB offload
+-----------------
+
+1. Enable HW TC offload on the interface::
+
+ # ethtool -K <interface> hw-tc-offload on
+
+2. Crate htb root::
+
+ # tc qdisc add dev <interface> clsact
+ # tc qdisc replace dev <interface> root handle 1: htb offload
+
+3. Create tc classes with different priorities::
+
+ # tc class add dev <interface> parent 1: classid 1:1 htb rate 10Gbit prio 1
+
+ # tc class add dev <interface> parent 1: classid 1:2 htb rate 10Gbit prio 7
diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst
index 6b2d1fe74ecf..a395df9c2751 100644
--- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst
+++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst
@@ -797,6 +797,16 @@ Counters on the NIC port that is connected to a eSwitch.
RoCE/UD/RC traffic) [#accel]_.
- Acceleration
+ * - `vport_loopback_packets`
+ - Unicast, multicast and broadcast packets that were loop-back (received
+ and transmitted), IB/Eth [#accel]_.
+ - Acceleration
+
+ * - `vport_loopback_bytes`
+ - Unicast, multicast and broadcast bytes that were loop-back (received
+ and transmitted), IB/Eth [#accel]_.
+ - Acceleration
+
* - `rx_steer_missed_packets`
- Number of packets that was received by the NIC, however was discarded
because it did not match any flow in the NIC flow table.
diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst
index 3354ca3608ee..a4edf908b707 100644
--- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst
+++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst
@@ -290,6 +290,13 @@ Description of the vnic counters:
- nic_receive_steering_discard
number of packets that completed RX flow
steering but were discarded due to a mismatch in flow table.
+- generated_pkt_steering_fail
+ number of packets generated by the VNIC experiencing unexpected steering
+ failure (at any point in steering flow).
+- handled_pkt_steering_fail
+ number of packets handled by the VNIC experiencing unexpected steering
+ failure (at any point in steering flow owned by the VNIC, including the FDB
+ for the eswitch owner).
User commands examples:
diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/switchdev.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/switchdev.rst
index 01deedb71597..6e3f5ee8b0d0 100644
--- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/switchdev.rst
+++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/switchdev.rst
@@ -45,6 +45,28 @@ Following bridge VLAN functions are supported by mlx5:
Subfunction
===========
+Subfunction which are spawned over the E-switch are created only with devlink
+device, and by default all the SF auxiliary devices are disabled.
+This will allow user to configure the SF before the SF have been fully probed,
+which will save time.
+
+Usage example:
+
+- Create SF::
+
+ $ devlink port add pci/0000:08:00.0 flavour pcisf pfnum 0 sfnum 11
+ $ devlink port function set pci/0000:08:00.0/32768 hw_addr 00:00:00:00:00:11 state active
+
+- Enable ETH auxiliary device::
+
+ $ devlink dev param set auxiliary/mlx5_core.sf.1 name enable_eth value true cmode driverinit
+
+- Now, in order to fully probe the SF, use devlink reload::
+
+ $ devlink dev reload auxiliary/mlx5_core.sf.1
+
+mlx5 supports ETH,rdma and vdpa (vnet) auxiliary devices devlink params (see :ref:`Documentation/networking/devlink/devlink-params.rst <devlink_params_generic>`).
+
mlx5 supports subfunction management using devlink port (see :ref:`Documentation/networking/devlink/devlink-port.rst <devlink_port>`) interface.
A subfunction has its own function capabilities and its own resources. This
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index 80b8f73a0244..4a010a7cde7f 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -881,9 +881,10 @@ tcp_fastopen_key - list of comma separated 32-digit hexadecimal INTEGERs
tcp_syn_retries - INTEGER
Number of times initial SYNs for an active TCP connection attempt
will be retransmitted. Should not be higher than 127. Default value
- is 6, which corresponds to 63seconds till the last retransmission
- with the current initial RTO of 1second. With this the final timeout
- for an active TCP connection attempt will happen after 127seconds.
+ is 6, which corresponds to 67seconds (with tcp_syn_linear_timeouts = 4)
+ till the last retransmission with the current initial RTO of 1second.
+ With this the final timeout for an active TCP connection attempt
+ will happen after 131seconds.
tcp_timestamps - INTEGER
Enable timestamps as defined in RFC1323.
@@ -946,6 +947,16 @@ tcp_pacing_ca_ratio - INTEGER
Default: 120
+tcp_syn_linear_timeouts - INTEGER
+ The number of times for an active TCP connection to retransmit SYNs with
+ a linear backoff timeout before defaulting to an exponential backoff
+ timeout. This has no effect on SYNACK at the passive TCP side.
+
+ With an initial RTO of 1 and tcp_syn_linear_timeouts = 4 we would
+ expect SYN RTOs to be: 1, 1, 1, 1, 1, 2, 4, ... (4 linear timeouts,
+ and the first exponential backoff using 2^0 * initial_RTO).
+ Default: 4
+
tcp_tso_win_divisor - INTEGER
This allows control over what percentage of the congestion window
can be consumed by a single TSO frame.
@@ -970,6 +981,21 @@ tcp_tw_reuse - INTEGER
tcp_window_scaling - BOOLEAN
Enable window scaling as defined in RFC1323.
+tcp_shrink_window - BOOLEAN
+ This changes how the TCP receive window is calculated.
+
+ RFC 7323, section 2.4, says there are instances when a retracted
+ window can be offered, and that TCP implementations MUST ensure
+ that they handle a shrinking window, as specified in RFC 1122.
+
+ - 0 - Disabled. The window is never shrunk.
+ - 1 - Enabled. The window is shrunk when necessary to remain within
+ the memory limit set by autotuning (sk_rcvbuf).
+ This only occurs if a non-zero receive window
+ scaling factor is also in effect.
+
+ Default: 0
+
tcp_wmem - vector of 3 INTEGERs: min, default, max
min: Amount of memory reserved for send buffers for TCP sockets.
Each TCP socket has rights to use it due to fact of its birth.
diff --git a/Documentation/networking/scaling.rst b/Documentation/networking/scaling.rst
index 3d435caa3ef2..92c9fb46d6a2 100644
--- a/Documentation/networking/scaling.rst
+++ b/Documentation/networking/scaling.rst
@@ -269,8 +269,8 @@ a single application thread handles flows with many different flow hashes.
rps_sock_flow_table is a global flow table that contains the *desired* CPU
for flows: the CPU that is currently processing the flow in userspace.
Each table value is a CPU index that is updated during calls to recvmsg
-and sendmsg (specifically, inet_recvmsg(), inet_sendmsg(), inet_sendpage()
-and tcp_splice_read()).
+and sendmsg (specifically, inet_recvmsg(), inet_sendmsg() and
+tcp_splice_read()).
When the scheduler moves a thread to a new CPU while it has outstanding
receive packets on the old CPU, packets may arrive out of order. To
diff --git a/Documentation/process/2.Process.rst b/Documentation/process/2.Process.rst
index 6a919cffcbfd..613a01da4717 100644
--- a/Documentation/process/2.Process.rst
+++ b/Documentation/process/2.Process.rst
@@ -434,9 +434,10 @@ There are a few hints which can help with linux-kernel survival:
questions. Some developers can get impatient with people who clearly
have not done their homework.
-- Avoid top-posting (the practice of putting your answer above the quoted
- text you are responding to). It makes your response harder to read and
- makes a poor impression.
+- Use interleaved ("inline") replies, which makes your response easier to
+ read. (i.e. avoid top-posting -- the practice of putting your answer above
+ the quoted text you are responding to.) For more details, see
+ :ref:`Documentation/process/submitting-patches.rst <interleaved_replies>`.
- Ask on the correct mailing list. Linux-kernel may be the general meeting
point, but it is not the best place to find developers from all
diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst
index ef540865ad22..5cf6a5f8ca57 100644
--- a/Documentation/process/changes.rst
+++ b/Documentation/process/changes.rst
@@ -31,7 +31,7 @@ you probably needn't concern yourself with pcmciautils.
====================== =============== ========================================
GNU C 5.1 gcc --version
Clang/LLVM (optional) 11.0.0 clang --version
-Rust (optional) 1.62.0 rustc --version
+Rust (optional) 1.68.2 rustc --version
bindgen (optional) 0.56.0 bindgen --version
GNU make 3.82 make --version
bash 4.2 bash --version
diff --git a/Documentation/process/handling-regressions.rst b/Documentation/process/handling-regressions.rst
index abb741b1aeee..5d3c3de3f4ec 100644
--- a/Documentation/process/handling-regressions.rst
+++ b/Documentation/process/handling-regressions.rst
@@ -129,88 +129,132 @@ tools and scripts used by other kernel developers or Linux distributions; one of
these tools is regzbot, which heavily relies on the "Link:" tags to associate
reports for regression with changes resolving them.
-Prioritize work on fixing regressions
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-You should fix any reported regression as quickly as possible, to provide
-affected users with a solution in a timely manner and prevent more users from
-running into the issue; nevertheless developers need to take enough time and
-care to ensure regression fixes do not cause additional damage.
-
-In the end though, developers should give their best to prevent users from
-running into situations where a regression leaves them only three options: "run
-a kernel with a regression that seriously impacts usage", "continue running an
-outdated and thus potentially insecure kernel version for more than two weeks
-after a regression's culprit was identified", and "downgrade to a still
-supported kernel series that lack required features".
-
-How to realize this depends a lot on the situation. Here are a few rules of
-thumb for you, in order or importance:
-
- * Prioritize work on handling regression reports and fixing regression over all
- other Linux kernel work, unless the latter concerns acute security issues or
- bugs causing data loss or damage.
-
- * Always consider reverting the culprit commits and reapplying them later
- together with necessary fixes, as this might be the least dangerous and
- quickest way to fix a regression.
-
- * Developers should handle regressions in all supported kernel series, but are
- free to delegate the work to the stable team, if the issue probably at no
- point in time occurred with mainline.
-
- * Try to resolve any regressions introduced in the current development before
- its end. If you fear a fix might be too risky to apply only days before a new
- mainline release, let Linus decide: submit the fix separately to him as soon
- as possible with the explanation of the situation. He then can make a call
- and postpone the release if necessary, for example if multiple such changes
- show up in his inbox.
-
- * Address regressions in stable, longterm, or proper mainline releases with
- more urgency than regressions in mainline pre-releases. That changes after
- the release of the fifth pre-release, aka "-rc5": mainline then becomes as
- important, to ensure all the improvements and fixes are ideally tested
- together for at least one week before Linus releases a new mainline version.
-
- * Fix regressions within two or three days, if they are critical for some
- reason -- for example, if the issue is likely to affect many users of the
- kernel series in question on all or certain architectures. Note, this
- includes mainline, as issues like compile errors otherwise might prevent many
- testers or continuous integration systems from testing the series.
-
- * Aim to fix regressions within one week after the culprit was identified, if
- the issue was introduced in either:
-
- * a recent stable/longterm release
-
- * the development cycle of the latest proper mainline release
-
- In the latter case (say Linux v5.14), try to address regressions even
- quicker, if the stable series for the predecessor (v5.13) will be abandoned
- soon or already was stamped "End-of-Life" (EOL) -- this usually happens about
- three to four weeks after a new mainline release.
-
- * Try to fix all other regressions within two weeks after the culprit was
- found. Two or three additional weeks are acceptable for performance
- regressions and other issues which are annoying, but don't prevent anyone
- from running Linux (unless it's an issue in the current development cycle,
- as those should ideally be addressed before the release). A few weeks in
- total are acceptable if a regression can only be fixed with a risky change
- and at the same time is affecting only a few users; as much time is
- also okay if the regression is already present in the second newest longterm
- kernel series.
-
-Note: The aforementioned time frames for resolving regressions are meant to
-include getting the fix tested, reviewed, and merged into mainline, ideally with
-the fix being in linux-next at least briefly. This leads to delays you need to
-account for.
-
-Subsystem maintainers are expected to assist in reaching those periods by doing
-timely reviews and quick handling of accepted patches. They thus might have to
-send git-pull requests earlier or more often than usual; depending on the fix,
-it might even be acceptable to skip testing in linux-next. Especially fixes for
-regressions in stable and longterm kernels need to be handled quickly, as fixes
-need to be merged in mainline before they can be backported to older series.
+Expectations and best practices for fixing regressions
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+As a Linux kernel developer, you are expected to give your best to prevent
+situations where a regression caused by a recent change of yours leaves users
+only these options:
+
+ * Run a kernel with a regression that impacts usage.
+
+ * Switch to an older or newer kernel series.
+
+ * Continue running an outdated and thus potentially insecure kernel for more
+ than three weeks after the regression's culprit was identified. Ideally it
+ should be less than two. And it ought to be just a few days, if the issue is
+ severe or affects many users -- either in general or in prevalent
+ environments.
+
+How to realize that in practice depends on various factors. Use the following
+rules of thumb as a guide.
+
+In general:
+
+ * Prioritize work on regressions over all other Linux kernel work, unless the
+ latter concerns a severe issue (e.g. acute security vulnerability, data loss,
+ bricked hardware, ...).
+
+ * Expedite fixing mainline regressions that recently made it into a proper
+ mainline, stable, or longterm release (either directly or via backport).
+
+ * Do not consider regressions from the current cycle as something that can wait
+ till the end of the cycle, as the issue might discourage or prevent users and
+ CI systems from testing mainline now or generally.
+
+ * Work with the required care to avoid additional or bigger damage, even if
+ resolving an issue then might take longer than outlined below.
+
+On timing once the culprit of a regression is known:
+
+ * Aim to mainline a fix within two or three days, if the issue is severe or
+ bothering many users -- either in general or in prevalent conditions like a
+ particular hardware environment, distribution, or stable/longterm series.
+
+ * Aim to mainline a fix by Sunday after the next, if the culprit made it
+ into a recent mainline, stable, or longterm release (either directly or via
+ backport); if the culprit became known early during a week and is simple to
+ resolve, try to mainline the fix within the same week.
+
+ * For other regressions, aim to mainline fixes before the hindmost Sunday
+ within the next three weeks. One or two Sundays later are acceptable, if the
+ regression is something people can live with easily for a while -- like a
+ mild performance regression.
+
+ * It's strongly discouraged to delay mainlining regression fixes till the next
+ merge window, except when the fix is extraordinarily risky or when the
+ culprit was mainlined more than a year ago.
+
+On procedure:
+
+ * Always consider reverting the culprit, as it's often the quickest and least
+ dangerous way to fix a regression. Don't worry about mainlining a fixed
+ variant later: that should be straight-forward, as most of the code went
+ through review once already.
+
+ * Try to resolve any regressions introduced in mainline during the past
+ twelve months before the current development cycle ends: Linus wants such
+ regressions to be handled like those from the current cycle, unless fixing
+ bears unusual risks.
+
+ * Consider CCing Linus on discussions or patch review, if a regression seems
+ tangly. Do the same in precarious or urgent cases -- especially if the
+ subsystem maintainer might be unavailable. Also CC the stable team, when you
+ know such a regression made it into a mainline, stable, or longterm release.
+
+ * For urgent regressions, consider asking Linus to pick up the fix straight
+ from the mailing list: he is totally fine with that for uncontroversial
+ fixes. Ideally though such requests should happen in accordance with the
+ subsystem maintainers or come directly from them.
+
+ * In case you are unsure if a fix is worth the risk applying just days before
+ a new mainline release, send Linus a mail with the usual lists and people in
+ CC; in it, summarize the situation while asking him to consider picking up
+ the fix straight from the list. He then himself can make the call and when
+ needed even postpone the release. Such requests again should ideally happen
+ in accordance with the subsystem maintainers or come directly from them.
+
+Regarding stable and longterm kernels:
+
+ * You are free to leave regressions to the stable team, if they at no point in
+ time occurred with mainline or were fixed there already.
+
+ * If a regression made it into a proper mainline release during the past
+ twelve months, ensure to tag the fix with "Cc: stable@vger.kernel.org", as a
+ "Fixes:" tag alone does not guarantee a backport. Please add the same tag,
+ in case you know the culprit was backported to stable or longterm kernels.
+
+ * When receiving reports about regressions in recent stable or longterm kernel
+ series, please evaluate at least briefly if the issue might happen in current
+ mainline as well -- and if that seems likely, take hold of the report. If in
+ doubt, ask the reporter to check mainline.
+
+ * Whenever you want to swiftly resolve a regression that recently also made it
+ into a proper mainline, stable, or longterm release, fix it quickly in
+ mainline; when appropriate thus involve Linus to fast-track the fix (see
+ above). That's because the stable team normally does neither revert nor fix
+ any changes that cause the same problems in mainline.
+
+ * In case of urgent regression fixes you might want to ensure prompt
+ backporting by dropping the stable team a note once the fix was mainlined;
+ this is especially advisable during merge windows and shortly thereafter, as
+ the fix otherwise might land at the end of a huge patch queue.
+
+On patch flow:
+
+ * Developers, when trying to reach the time periods mentioned above, remember
+ to account for the time it takes to get fixes tested, reviewed, and merged by
+ Linus, ideally with them being in linux-next at least briefly. Hence, if a
+ fix is urgent, make it obvious to ensure others handle it appropriately.
+
+ * Reviewers, you are kindly asked to assist developers in reaching the time
+ periods mentioned above by reviewing regression fixes in a timely manner.
+
+ * Subsystem maintainers, you likewise are encouraged to expedite the handling
+ of regression fixes. Thus evaluate if skipping linux-next is an option for
+ the particular fix. Also consider sending git pull requests more often than
+ usual when needed. And try to avoid holding onto regression fixes over
+ weekends -- especially when the fix is marked for backporting.
More aspects regarding regressions developers should be aware of
diff --git a/Documentation/process/maintainer-tip.rst b/Documentation/process/maintainer-tip.rst
index 178c95fd17dc..93d8a794bdfc 100644
--- a/Documentation/process/maintainer-tip.rst
+++ b/Documentation/process/maintainer-tip.rst
@@ -421,6 +421,9 @@ allowing themselves a breath. Please respect that.
The release candidate -rc1 is the starting point for new patches to be
applied which are targeted for the next merge window.
+So called _urgent_ branches will be merged into mainline during the
+stabilization phase of each release.
+
Git
^^^
diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst
index 486875fd73c0..efac910e2659 100644
--- a/Documentation/process/submitting-patches.rst
+++ b/Documentation/process/submitting-patches.rst
@@ -331,6 +331,31 @@ explaining difference against previous submission (see
See Documentation/process/email-clients.rst for recommendations on email
clients and mailing list etiquette.
+.. _interleaved_replies:
+
+Use trimmed interleaved replies in email discussions
+----------------------------------------------------
+Top-posting is strongly discouraged in Linux kernel development
+discussions. Interleaved (or "inline") replies make conversations much
+easier to follow. For more details see:
+https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
+
+As is frequently quoted on the mailing list::
+
+ A: http://en.wikipedia.org/wiki/Top_post
+ Q: Were do I find info about this thing called top-posting?
+ A: Because it messes up the order in which people normally read text.
+ Q: Why is top-posting such a bad thing?
+ A: Top-posting.
+ Q: What is the most annoying thing in e-mail?
+
+Similarly, please trim all unneeded quotations that aren't relevant
+to your reply. This makes responses easier to find, and saves time and
+space. For more details see: http://daringfireball.net/2007/07/on_top ::
+
+ A: No.
+ Q: Should I include quotations after my reply?
+
.. _resend_reminders:
Don't get discouraged - or impatient
diff --git a/Documentation/rust/quick-start.rst b/Documentation/rust/quick-start.rst
index 13b7744b1e27..a8931512ed98 100644
--- a/Documentation/rust/quick-start.rst
+++ b/Documentation/rust/quick-start.rst
@@ -38,9 +38,9 @@ and run::
rustup override set $(scripts/min-tool-version.sh rustc)
-Otherwise, fetch a standalone installer or install ``rustup`` from:
+Otherwise, fetch a standalone installer from:
- https://www.rust-lang.org
+ https://forge.rust-lang.org/infra/other-installation-methods.html#standalone
Rust standard library source
diff --git a/Documentation/scheduler/sched-deadline.rst b/Documentation/scheduler/sched-deadline.rst
index 9d9be52f221a..9fe4846079bb 100644
--- a/Documentation/scheduler/sched-deadline.rst
+++ b/Documentation/scheduler/sched-deadline.rst
@@ -203,12 +203,15 @@ Deadline Task Scheduling
- Total bandwidth (this_bw): this is the sum of all tasks "belonging" to the
runqueue, including the tasks in Inactive state.
+ - Maximum usable bandwidth (max_bw): This is the maximum bandwidth usable by
+ deadline tasks and is currently set to the RT capacity.
+
The algorithm reclaims the bandwidth of the tasks in Inactive state.
It does so by decrementing the runtime of the executing task Ti at a pace equal
to
- dq = -max{ Ui / Umax, (1 - Uinact - Uextra) } dt
+ dq = -(max{ Ui, (Umax - Uinact - Uextra) } / Umax) dt
where:
diff --git a/Documentation/subsystem-apis.rst b/Documentation/subsystem-apis.rst
index b51f38527e14..02d6dc3a49c8 100644
--- a/Documentation/subsystem-apis.rst
+++ b/Documentation/subsystem-apis.rst
@@ -10,6 +10,30 @@ is taken directly from the kernel source, with supplemental material added
as needed (or at least as we managed to add it — probably *not* all that is
needed).
+Human interfaces
+----------------
+
+.. toctree::
+ :maxdepth: 1
+
+ input/index
+ hid/index
+ sound/index
+ gpu/index
+ fb/index
+
+Storage interfaces
+------------------
+
+.. toctree::
+ :maxdepth: 1
+
+ filesystems/index
+ block/index
+ cdrom/index
+ scsi/index
+ target/index
+
**Fixme**: much more organizational work is needed here.
.. toctree::
@@ -19,12 +43,8 @@ needed).
core-api/index
locking/index
accounting/index
- block/index
- cdrom/index
cpu-freq/index
- fb/index
fpga/index
- hid/index
i2c/index
iio/index
isdn/index
@@ -34,25 +54,19 @@ needed).
networking/index
pcmcia/index
power/index
- target/index
timers/index
spi/index
w1/index
watchdog/index
virt/index
- input/index
hwmon/index
- gpu/index
accel/index
security/index
- sound/index
crypto/index
- filesystems/index
mm/index
bpf/index
usb/index
PCI/index
- scsi/index
misc-devices/index
scheduler/index
mhi/index
diff --git a/Documentation/trace/user_events.rst b/Documentation/trace/user_events.rst
index f79987e16cf4..e7b07313550a 100644
--- a/Documentation/trace/user_events.rst
+++ b/Documentation/trace/user_events.rst
@@ -14,10 +14,6 @@ Programs can view status of the events via
/sys/kernel/tracing/user_events_status and can both register and write
data out via /sys/kernel/tracing/user_events_data.
-Programs can also use /sys/kernel/tracing/dynamic_events to register and
-delete user based events via the u: prefix. The format of the command to
-dynamic_events is the same as the ioctl with the u: prefix applied.
-
Typically programs will register a set of events that they wish to expose to
tools that can read trace_events (such as ftrace and perf). The registration
process tells the kernel which address and bit to reflect if any tool has
@@ -144,6 +140,9 @@ its name. Delete will only succeed if there are no references left to the
event (in both user and kernel space). User programs should use a separate file
to request deletes than the one used for registration due to this.
+**NOTE:** By default events will auto-delete when there are no references left
+to the event. Flags in the future may change this logic.
+
Unregistering
-------------
If after registering an event it is no longer wanted to be updated then it can
diff --git a/Documentation/translations/zh_CN/arm/Booting b/Documentation/translations/zh_CN/arch/arm/Booting
index 5ecea0767893..f18585156b67 100644
--- a/Documentation/translations/zh_CN/arm/Booting
+++ b/Documentation/translations/zh_CN/arch/arm/Booting
@@ -1,4 +1,4 @@
-Chinese translated version of Documentation/arm/booting.rst
+Chinese translated version of Documentation/arch/arm/booting.rst
If you have any comment or update to the content, please contact the
original document maintainer directly. However, if you have a problem
@@ -9,7 +9,7 @@ or if there is a problem with the translation.
Maintainer: Russell King <linux@arm.linux.org.uk>
Chinese maintainer: Fu Wei <tekkamanninja@gmail.com>
---------------------------------------------------------------------
-Documentation/arm/booting.rst 的中文翻译
+Documentation/arch/arm/booting.rst 的中文翻译
如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文
交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻
diff --git a/Documentation/translations/zh_CN/arm/kernel_user_helpers.txt b/Documentation/translations/zh_CN/arch/arm/kernel_user_helpers.txt
index 99af4363984d..018eb7d54233 100644
--- a/Documentation/translations/zh_CN/arm/kernel_user_helpers.txt
+++ b/Documentation/translations/zh_CN/arch/arm/kernel_user_helpers.txt
@@ -1,4 +1,4 @@
-Chinese translated version of Documentation/arm/kernel_user_helpers.rst
+Chinese translated version of Documentation/arch/arm/kernel_user_helpers.rst
If you have any comment or update to the content, please contact the
original document maintainer directly. However, if you have a problem
@@ -10,7 +10,7 @@ Maintainer: Nicolas Pitre <nicolas.pitre@linaro.org>
Dave Martin <dave.martin@linaro.org>
Chinese maintainer: Fu Wei <tekkamanninja@gmail.com>
---------------------------------------------------------------------
-Documentation/arm/kernel_user_helpers.rst 的中文翻译
+Documentation/arch/arm/kernel_user_helpers.rst 的中文翻译
如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文
交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻
diff --git a/Documentation/translations/zh_CN/arm64/amu.rst b/Documentation/translations/zh_CN/arch/arm64/amu.rst
index ab7180f91394..f8e09fd21ef5 100644
--- a/Documentation/translations/zh_CN/arm64/amu.rst
+++ b/Documentation/translations/zh_CN/arch/arm64/amu.rst
@@ -1,6 +1,6 @@
-.. include:: ../disclaimer-zh_CN.rst
+.. include:: ../../disclaimer-zh_CN.rst
-:Original: :ref:`Documentation/arm64/amu.rst <amu_index>`
+:Original: :ref:`Documentation/arch/arm64/amu.rst <amu_index>`
Translator: Bailu Lin <bailu.lin@vivo.com>
diff --git a/Documentation/translations/zh_CN/arm64/booting.txt b/Documentation/translations/zh_CN/arch/arm64/booting.txt
index 5b0164132c71..630eb32a8854 100644
--- a/Documentation/translations/zh_CN/arm64/booting.txt
+++ b/Documentation/translations/zh_CN/arch/arm64/booting.txt
@@ -1,4 +1,4 @@
-Chinese translated version of Documentation/arm64/booting.rst
+Chinese translated version of Documentation/arch/arm64/booting.rst
If you have any comment or update to the content, please contact the
original document maintainer directly. However, if you have a problem
@@ -10,7 +10,7 @@ M: Will Deacon <will.deacon@arm.com>
zh_CN: Fu Wei <wefu@redhat.com>
C: 55f058e7574c3615dea4615573a19bdb258696c6
---------------------------------------------------------------------
-Documentation/arm64/booting.rst 的中文翻译
+Documentation/arch/arm64/booting.rst 的中文翻译
如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文
交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻
diff --git a/Documentation/translations/zh_CN/arm64/elf_hwcaps.rst b/Documentation/translations/zh_CN/arch/arm64/elf_hwcaps.rst
index 9aa4637eac97..f60ac1580d3e 100644
--- a/Documentation/translations/zh_CN/arm64/elf_hwcaps.rst
+++ b/Documentation/translations/zh_CN/arch/arm64/elf_hwcaps.rst
@@ -1,6 +1,6 @@
-.. include:: ../disclaimer-zh_CN.rst
+.. include:: ../../disclaimer-zh_CN.rst
-:Original: :ref:`Documentation/arm64/elf_hwcaps.rst <elf_hwcaps_index>`
+:Original: :ref:`Documentation/arch/arm64/elf_hwcaps.rst <elf_hwcaps_index>`
Translator: Bailu Lin <bailu.lin@vivo.com>
@@ -92,7 +92,7 @@ HWCAP_ASIMDHP
ID_AA64PFR0_EL1.AdvSIMD == 0b0001 表示有此功能。
HWCAP_CPUID
- 根据 Documentation/arm64/cpu-feature-registers.rst 描述,EL0 可以访问
+ 根据 Documentation/arch/arm64/cpu-feature-registers.rst 描述,EL0 可以访问
某些 ID 寄存器。
这些 ID 寄存器可能表示功能的可用性。
@@ -152,12 +152,12 @@ HWCAP_SB
ID_AA64ISAR1_EL1.SB == 0b0001 表示有此功能。
HWCAP_PACA
- 如 Documentation/arm64/pointer-authentication.rst 所描述,
+ 如 Documentation/arch/arm64/pointer-authentication.rst 所描述,
ID_AA64ISAR1_EL1.APA == 0b0001 或 ID_AA64ISAR1_EL1.API == 0b0001
表示有此功能。
HWCAP_PACG
- 如 Documentation/arm64/pointer-authentication.rst 所描述,
+ 如 Documentation/arch/arm64/pointer-authentication.rst 所描述,
ID_AA64ISAR1_EL1.GPA == 0b0001 或 ID_AA64ISAR1_EL1.GPI == 0b0001
表示有此功能。
diff --git a/Documentation/translations/zh_CN/arm64/hugetlbpage.rst b/Documentation/translations/zh_CN/arch/arm64/hugetlbpage.rst
index 13304d269d0b..8079eadde29a 100644
--- a/Documentation/translations/zh_CN/arm64/hugetlbpage.rst
+++ b/Documentation/translations/zh_CN/arch/arm64/hugetlbpage.rst
@@ -1,6 +1,6 @@
-.. include:: ../disclaimer-zh_CN.rst
+.. include:: ../../disclaimer-zh_CN.rst
-:Original: :ref:`Documentation/arm64/hugetlbpage.rst <hugetlbpage_index>`
+:Original: :ref:`Documentation/arch/arm64/hugetlbpage.rst <hugetlbpage_index>`
Translator: Bailu Lin <bailu.lin@vivo.com>
diff --git a/Documentation/translations/zh_CN/arm64/index.rst b/Documentation/translations/zh_CN/arch/arm64/index.rst
index 57dc5de5ccc5..e12b9f6e5d6c 100644
--- a/Documentation/translations/zh_CN/arm64/index.rst
+++ b/Documentation/translations/zh_CN/arch/arm64/index.rst
@@ -1,6 +1,6 @@
-.. include:: ../disclaimer-zh_CN.rst
+.. include:: ../../disclaimer-zh_CN.rst
-:Original: :ref:`Documentation/arm64/index.rst <arm64_index>`
+:Original: :ref:`Documentation/arch/arm64/index.rst <arm64_index>`
:Translator: Bailu Lin <bailu.lin@vivo.com>
.. _cn_arm64_index:
diff --git a/Documentation/translations/zh_CN/arm64/legacy_instructions.txt b/Documentation/translations/zh_CN/arch/arm64/legacy_instructions.txt
index e295cf75f606..e469fccbe356 100644
--- a/Documentation/translations/zh_CN/arm64/legacy_instructions.txt
+++ b/Documentation/translations/zh_CN/arch/arm64/legacy_instructions.txt
@@ -1,4 +1,4 @@
-Chinese translated version of Documentation/arm64/legacy_instructions.rst
+Chinese translated version of Documentation/arch/arm64/legacy_instructions.rst
If you have any comment or update to the content, please contact the
original document maintainer directly. However, if you have a problem
@@ -10,7 +10,7 @@ Maintainer: Punit Agrawal <punit.agrawal@arm.com>
Suzuki K. Poulose <suzuki.poulose@arm.com>
Chinese maintainer: Fu Wei <wefu@redhat.com>
---------------------------------------------------------------------
-Documentation/arm64/legacy_instructions.rst 的中文翻译
+Documentation/arch/arm64/legacy_instructions.rst 的中文翻译
如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文
交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻
diff --git a/Documentation/translations/zh_CN/arm64/memory.txt b/Documentation/translations/zh_CN/arch/arm64/memory.txt
index be20f8228b91..c6962e9cb9f8 100644
--- a/Documentation/translations/zh_CN/arm64/memory.txt
+++ b/Documentation/translations/zh_CN/arch/arm64/memory.txt
@@ -1,4 +1,4 @@
-Chinese translated version of Documentation/arm64/memory.rst
+Chinese translated version of Documentation/arch/arm64/memory.rst
If you have any comment or update to the content, please contact the
original document maintainer directly. However, if you have a problem
@@ -9,7 +9,7 @@ or if there is a problem with the translation.
Maintainer: Catalin Marinas <catalin.marinas@arm.com>
Chinese maintainer: Fu Wei <wefu@redhat.com>
---------------------------------------------------------------------
-Documentation/arm64/memory.rst 的中文翻译
+Documentation/arch/arm64/memory.rst 的中文翻译
如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文
交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻
diff --git a/Documentation/translations/zh_CN/arm64/perf.rst b/Documentation/translations/zh_CN/arch/arm64/perf.rst
index 9bf21d73f4d1..6be72704e659 100644
--- a/Documentation/translations/zh_CN/arm64/perf.rst
+++ b/Documentation/translations/zh_CN/arch/arm64/perf.rst
@@ -1,8 +1,8 @@
.. SPDX-License-Identifier: GPL-2.0
-.. include:: ../disclaimer-zh_CN.rst
+.. include:: ../../disclaimer-zh_CN.rst
-:Original: :ref:`Documentation/arm64/perf.rst <perf_index>`
+:Original: :ref:`Documentation/arch/arm64/perf.rst <perf_index>`
Translator: Bailu Lin <bailu.lin@vivo.com>
diff --git a/Documentation/translations/zh_CN/arm64/silicon-errata.txt b/Documentation/translations/zh_CN/arch/arm64/silicon-errata.txt
index 440c59ac7dce..f4767ffdd61d 100644
--- a/Documentation/translations/zh_CN/arm64/silicon-errata.txt
+++ b/Documentation/translations/zh_CN/arch/arm64/silicon-errata.txt
@@ -1,4 +1,4 @@
-Chinese translated version of Documentation/arm64/silicon-errata.rst
+Chinese translated version of Documentation/arch/arm64/silicon-errata.rst
If you have any comment or update to the content, please contact the
original document maintainer directly. However, if you have a problem
@@ -10,7 +10,7 @@ M: Will Deacon <will.deacon@arm.com>
zh_CN: Fu Wei <wefu@redhat.com>
C: 1926e54f115725a9248d0c4c65c22acaf94de4c4
---------------------------------------------------------------------
-Documentation/arm64/silicon-errata.rst 的中文翻译
+Documentation/arch/arm64/silicon-errata.rst 的中文翻译
如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文
交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻
diff --git a/Documentation/translations/zh_CN/arm64/tagged-pointers.txt b/Documentation/translations/zh_CN/arch/arm64/tagged-pointers.txt
index 77ac3548a16d..27577c3c5e3f 100644
--- a/Documentation/translations/zh_CN/arm64/tagged-pointers.txt
+++ b/Documentation/translations/zh_CN/arch/arm64/tagged-pointers.txt
@@ -1,4 +1,4 @@
-Chinese translated version of Documentation/arm64/tagged-pointers.rst
+Chinese translated version of Documentation/arch/arm64/tagged-pointers.rst
If you have any comment or update to the content, please contact the
original document maintainer directly. However, if you have a problem
@@ -9,7 +9,7 @@ or if there is a problem with the translation.
Maintainer: Will Deacon <will.deacon@arm.com>
Chinese maintainer: Fu Wei <wefu@redhat.com>
---------------------------------------------------------------------
-Documentation/arm64/tagged-pointers.rst 的中文翻译
+Documentation/arch/arm64/tagged-pointers.rst 的中文翻译
如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文
交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻
diff --git a/Documentation/translations/zh_CN/arch/index.rst b/Documentation/translations/zh_CN/arch/index.rst
index 908ea131bb1c..6fa0cb671009 100644
--- a/Documentation/translations/zh_CN/arch/index.rst
+++ b/Documentation/translations/zh_CN/arch/index.rst
@@ -9,7 +9,7 @@
:maxdepth: 2
../mips/index
- ../arm64/index
+ arm64/index
../riscv/index
openrisc/index
parisc/index
diff --git a/Documentation/translations/zh_CN/mm/page_migration.rst b/Documentation/translations/zh_CN/mm/page_migration.rst
index 076081dc1635..f95063826a15 100644
--- a/Documentation/translations/zh_CN/mm/page_migration.rst
+++ b/Documentation/translations/zh_CN/mm/page_migration.rst
@@ -55,7 +55,7 @@ mbind()设置一个新的内存策略。一个进程的页面也可以通过sys_
消失。它还可以防止交换器或其他扫描器遇到该页。
-2. 我们需要有一个new_page_t类型的函数,可以传递给migrate_pages()。这个函数应该计算
+2. 我们需要有一个new_folio_t类型的函数,可以传递给migrate_pages()。这个函数应该计算
出如何在给定的旧页面中分配正确的新页面。
3. migrate_pages()函数被调用,它试图进行迁移。它将调用该函数为每个被考虑迁移的页面分
diff --git a/Documentation/translations/zh_TW/arm64/amu.rst b/Documentation/translations/zh_TW/arch/arm64/amu.rst
index ffdc466e0f62..f947a6c7369f 100644
--- a/Documentation/translations/zh_TW/arm64/amu.rst
+++ b/Documentation/translations/zh_TW/arch/arm64/amu.rst
@@ -1,8 +1,8 @@
.. SPDX-License-Identifier: GPL-2.0
-.. include:: ../disclaimer-zh_TW.rst
+.. include:: ../../disclaimer-zh_TW.rst
-:Original: :ref:`Documentation/arm64/amu.rst <amu_index>`
+:Original: :ref:`Documentation/arch/arm64/amu.rst <amu_index>`
Translator: Bailu Lin <bailu.lin@vivo.com>
Hu Haowen <src.res@email.cn>
diff --git a/Documentation/translations/zh_TW/arm64/booting.txt b/Documentation/translations/zh_TW/arch/arm64/booting.txt
index b9439dd54012..24817b8b70cd 100644
--- a/Documentation/translations/zh_TW/arm64/booting.txt
+++ b/Documentation/translations/zh_TW/arch/arm64/booting.txt
@@ -1,6 +1,6 @@
SPDX-License-Identifier: GPL-2.0
-Chinese translated version of Documentation/arm64/booting.rst
+Chinese translated version of Documentation/arch/arm64/booting.rst
If you have any comment or update to the content, please contact the
original document maintainer directly. However, if you have a problem
@@ -13,7 +13,7 @@ zh_CN: Fu Wei <wefu@redhat.com>
zh_TW: Hu Haowen <src.res@email.cn>
C: 55f058e7574c3615dea4615573a19bdb258696c6
---------------------------------------------------------------------
-Documentation/arm64/booting.rst 的中文翻譯
+Documentation/arch/arm64/booting.rst 的中文翻譯
如果想評論或更新本文的內容,請直接聯繫原文檔的維護者。如果你使用英文
交流有困難的話,也可以向中文版維護者求助。如果本翻譯更新不及時或者翻
diff --git a/Documentation/translations/zh_TW/arm64/elf_hwcaps.rst b/Documentation/translations/zh_TW/arch/arm64/elf_hwcaps.rst
index 3eb1c623ce31..fca3c6ff7b93 100644
--- a/Documentation/translations/zh_TW/arm64/elf_hwcaps.rst
+++ b/Documentation/translations/zh_TW/arch/arm64/elf_hwcaps.rst
@@ -1,8 +1,8 @@
.. SPDX-License-Identifier: GPL-2.0
-.. include:: ../disclaimer-zh_TW.rst
+.. include:: ../../disclaimer-zh_TW.rst
-:Original: :ref:`Documentation/arm64/elf_hwcaps.rst <elf_hwcaps_index>`
+:Original: :ref:`Documentation/arch/arm64/elf_hwcaps.rst <elf_hwcaps_index>`
Translator: Bailu Lin <bailu.lin@vivo.com>
Hu Haowen <src.res@email.cn>
@@ -95,7 +95,7 @@ HWCAP_ASIMDHP
ID_AA64PFR0_EL1.AdvSIMD == 0b0001 表示有此功能。
HWCAP_CPUID
- 根據 Documentation/arm64/cpu-feature-registers.rst 描述,EL0 可以訪問
+ 根據 Documentation/arch/arm64/cpu-feature-registers.rst 描述,EL0 可以訪問
某些 ID 寄存器。
這些 ID 寄存器可能表示功能的可用性。
@@ -155,12 +155,12 @@ HWCAP_SB
ID_AA64ISAR1_EL1.SB == 0b0001 表示有此功能。
HWCAP_PACA
- 如 Documentation/arm64/pointer-authentication.rst 所描述,
+ 如 Documentation/arch/arm64/pointer-authentication.rst 所描述,
ID_AA64ISAR1_EL1.APA == 0b0001 或 ID_AA64ISAR1_EL1.API == 0b0001
表示有此功能。
HWCAP_PACG
- 如 Documentation/arm64/pointer-authentication.rst 所描述,
+ 如 Documentation/arch/arm64/pointer-authentication.rst 所描述,
ID_AA64ISAR1_EL1.GPA == 0b0001 或 ID_AA64ISAR1_EL1.GPI == 0b0001
表示有此功能。
diff --git a/Documentation/translations/zh_TW/arm64/hugetlbpage.rst b/Documentation/translations/zh_TW/arch/arm64/hugetlbpage.rst
index 846b500dae97..10feb329dfb8 100644
--- a/Documentation/translations/zh_TW/arm64/hugetlbpage.rst
+++ b/Documentation/translations/zh_TW/arch/arm64/hugetlbpage.rst
@@ -1,8 +1,8 @@
.. SPDX-License-Identifier: GPL-2.0
-.. include:: ../disclaimer-zh_TW.rst
+.. include:: ../../disclaimer-zh_TW.rst
-:Original: :ref:`Documentation/arm64/hugetlbpage.rst <hugetlbpage_index>`
+:Original: :ref:`Documentation/arch/arm64/hugetlbpage.rst <hugetlbpage_index>`
Translator: Bailu Lin <bailu.lin@vivo.com>
Hu Haowen <src.res@email.cn>
diff --git a/Documentation/translations/zh_TW/arm64/index.rst b/Documentation/translations/zh_TW/arch/arm64/index.rst
index 2322783f3881..68befee14b99 100644
--- a/Documentation/translations/zh_TW/arm64/index.rst
+++ b/Documentation/translations/zh_TW/arch/arm64/index.rst
@@ -1,8 +1,8 @@
.. SPDX-License-Identifier: GPL-2.0
-.. include:: ../disclaimer-zh_TW.rst
+.. include:: ../../disclaimer-zh_TW.rst
-:Original: :ref:`Documentation/arm64/index.rst <arm64_index>`
+:Original: :ref:`Documentation/arch/arm64/index.rst <arm64_index>`
:Translator: Bailu Lin <bailu.lin@vivo.com>
Hu Haowen <src.res@email.cn>
diff --git a/Documentation/translations/zh_TW/arm64/legacy_instructions.txt b/Documentation/translations/zh_TW/arch/arm64/legacy_instructions.txt
index 6d4454f77b9e..3c915df9836c 100644
--- a/Documentation/translations/zh_TW/arm64/legacy_instructions.txt
+++ b/Documentation/translations/zh_TW/arch/arm64/legacy_instructions.txt
@@ -1,6 +1,6 @@
SPDX-License-Identifier: GPL-2.0
-Chinese translated version of Documentation/arm64/legacy_instructions.rst
+Chinese translated version of Documentation/arch/arm64/legacy_instructions.rst
If you have any comment or update to the content, please contact the
original document maintainer directly. However, if you have a problem
@@ -13,7 +13,7 @@ Maintainer: Punit Agrawal <punit.agrawal@arm.com>
Chinese maintainer: Fu Wei <wefu@redhat.com>
Traditional Chinese maintainer: Hu Haowen <src.res@email.cn>
---------------------------------------------------------------------
-Documentation/arm64/legacy_instructions.rst 的中文翻譯
+Documentation/arch/arm64/legacy_instructions.rst 的中文翻譯
如果想評論或更新本文的內容,請直接聯繫原文檔的維護者。如果你使用英文
交流有困難的話,也可以向中文版維護者求助。如果本翻譯更新不及時或者翻
diff --git a/Documentation/translations/zh_TW/arm64/memory.txt b/Documentation/translations/zh_TW/arch/arm64/memory.txt
index 99c2b78b5674..2437380a26d8 100644
--- a/Documentation/translations/zh_TW/arm64/memory.txt
+++ b/Documentation/translations/zh_TW/arch/arm64/memory.txt
@@ -1,6 +1,6 @@
SPDX-License-Identifier: GPL-2.0
-Chinese translated version of Documentation/arm64/memory.rst
+Chinese translated version of Documentation/arch/arm64/memory.rst
If you have any comment or update to the content, please contact the
original document maintainer directly. However, if you have a problem
@@ -12,7 +12,7 @@ Maintainer: Catalin Marinas <catalin.marinas@arm.com>
Chinese maintainer: Fu Wei <wefu@redhat.com>
Traditional Chinese maintainer: Hu Haowen <src.res@email.cn>
---------------------------------------------------------------------
-Documentation/arm64/memory.rst 的中文翻譯
+Documentation/arch/arm64/memory.rst 的中文翻譯
如果想評論或更新本文的內容,請直接聯繫原文檔的維護者。如果你使用英文
交流有困難的話,也可以向中文版維護者求助。如果本翻譯更新不及時或者翻
diff --git a/Documentation/translations/zh_TW/arm64/perf.rst b/Documentation/translations/zh_TW/arch/arm64/perf.rst
index f1ffd55dfe50..3b39997a52eb 100644
--- a/Documentation/translations/zh_TW/arm64/perf.rst
+++ b/Documentation/translations/zh_TW/arch/arm64/perf.rst
@@ -1,8 +1,8 @@
.. SPDX-License-Identifier: GPL-2.0
-.. include:: ../disclaimer-zh_TW.rst
+.. include:: ../../disclaimer-zh_TW.rst
-:Original: :ref:`Documentation/arm64/perf.rst <perf_index>`
+:Original: :ref:`Documentation/arch/arm64/perf.rst <perf_index>`
Translator: Bailu Lin <bailu.lin@vivo.com>
Hu Haowen <src.res@email.cn>
diff --git a/Documentation/translations/zh_TW/arm64/silicon-errata.txt b/Documentation/translations/zh_TW/arch/arm64/silicon-errata.txt
index bf2077197504..66c3a3506458 100644
--- a/Documentation/translations/zh_TW/arm64/silicon-errata.txt
+++ b/Documentation/translations/zh_TW/arch/arm64/silicon-errata.txt
@@ -1,6 +1,6 @@
SPDX-License-Identifier: GPL-2.0
-Chinese translated version of Documentation/arm64/silicon-errata.rst
+Chinese translated version of Documentation/arch/arm64/silicon-errata.rst
If you have any comment or update to the content, please contact the
original document maintainer directly. However, if you have a problem
@@ -13,7 +13,7 @@ zh_CN: Fu Wei <wefu@redhat.com>
zh_TW: Hu Haowen <src.res@email.cn>
C: 1926e54f115725a9248d0c4c65c22acaf94de4c4
---------------------------------------------------------------------
-Documentation/arm64/silicon-errata.rst 的中文翻譯
+Documentation/arch/arm64/silicon-errata.rst 的中文翻譯
如果想評論或更新本文的內容,請直接聯繫原文檔的維護者。如果你使用英文
交流有困難的話,也可以向中文版維護者求助。如果本翻譯更新不及時或者翻
diff --git a/Documentation/translations/zh_TW/arm64/tagged-pointers.txt b/Documentation/translations/zh_TW/arch/arm64/tagged-pointers.txt
index 87f88628401a..b7f683f20ed1 100644
--- a/Documentation/translations/zh_TW/arm64/tagged-pointers.txt
+++ b/Documentation/translations/zh_TW/arch/arm64/tagged-pointers.txt
@@ -1,6 +1,6 @@
SPDX-License-Identifier: GPL-2.0
-Chinese translated version of Documentation/arm64/tagged-pointers.rst
+Chinese translated version of Documentation/arch/arm64/tagged-pointers.rst
If you have any comment or update to the content, please contact the
original document maintainer directly. However, if you have a problem
@@ -12,7 +12,7 @@ Maintainer: Will Deacon <will.deacon@arm.com>
Chinese maintainer: Fu Wei <wefu@redhat.com>
Traditional Chinese maintainer: Hu Haowen <src.res@email.cn>
---------------------------------------------------------------------
-Documentation/arm64/tagged-pointers.rst 的中文翻譯
+Documentation/arch/arm64/tagged-pointers.rst 的中文翻譯
如果想評論或更新本文的內容,請直接聯繫原文檔的維護者。如果你使用英文
交流有困難的話,也可以向中文版維護者求助。如果本翻譯更新不及時或者翻
diff --git a/Documentation/translations/zh_TW/index.rst b/Documentation/translations/zh_TW/index.rst
index e97d7d578751..e7c83868e780 100644
--- a/Documentation/translations/zh_TW/index.rst
+++ b/Documentation/translations/zh_TW/index.rst
@@ -150,7 +150,7 @@ TODOList:
.. toctree::
:maxdepth: 2
- arm64/index
+ arch/arm64/index
TODOList:
diff --git a/Documentation/userspace-api/netlink/intro-specs.rst b/Documentation/userspace-api/netlink/intro-specs.rst
index a3b847eafff7..bada89699455 100644
--- a/Documentation/userspace-api/netlink/intro-specs.rst
+++ b/Documentation/userspace-api/netlink/intro-specs.rst
@@ -78,3 +78,82 @@ to see other examples.
The code generation itself is performed by ``tools/net/ynl/ynl-gen-c.py``
but it takes a few arguments so calling it directly for each file
quickly becomes tedious.
+
+YNL lib
+=======
+
+``tools/net/ynl/lib/`` contains an implementation of a C library
+(based on libmnl) which integrates with code generated by
+``tools/net/ynl/ynl-gen-c.py`` to create easy to use netlink wrappers.
+
+YNL basics
+----------
+
+The YNL library consists of two parts - the generic code (functions
+prefix by ``ynl_``) and per-family auto-generated code (prefixed
+with the name of the family).
+
+To create a YNL socket call ynl_sock_create() passing the family
+struct (family structs are exported by the auto-generated code).
+ynl_sock_destroy() closes the socket.
+
+YNL requests
+------------
+
+Steps for issuing YNL requests are best explained on an example.
+All the functions and types in this example come from the auto-generated
+code (for the netdev family in this case):
+
+.. code-block:: c
+
+ // 0. Request and response pointers
+ struct netdev_dev_get_req *req;
+ struct netdev_dev_get_rsp *d;
+
+ // 1. Allocate a request
+ req = netdev_dev_get_req_alloc();
+ // 2. Set request parameters (as needed)
+ netdev_dev_get_req_set_ifindex(req, ifindex);
+
+ // 3. Issues the request
+ d = netdev_dev_get(ys, req);
+ // 4. Free the request arguments
+ netdev_dev_get_req_free(req);
+ // 5. Error check (the return value from step 3)
+ if (!d) {
+ // 6. Print the YNL-generated error
+ fprintf(stderr, "YNL: %s\n", ys->err.msg);
+ return -1;
+ }
+
+ // ... do stuff with the response @d
+
+ // 7. Free response
+ netdev_dev_get_rsp_free(d);
+
+YNL dumps
+---------
+
+Performing dumps follows similar pattern as requests.
+Dumps return a list of objects terminated by a special marker,
+or NULL on error. Use ``ynl_dump_foreach()`` to iterate over
+the result.
+
+YNL notifications
+-----------------
+
+YNL lib supports using the same socket for notifications and
+requests. In case notifications arrive during processing of a request
+they are queued internally and can be retrieved at a later time.
+
+To subscribed to notifications use ``ynl_subscribe()``.
+The notifications have to be read out from the socket,
+``ynl_socket_get_fd()`` returns the underlying socket fd which can
+be plugged into appropriate asynchronous IO API like ``poll``,
+or ``select``.
+
+Notifications can be retrieved using ``ynl_ntf_dequeue()`` and have
+to be freed using ``ynl_ntf_free()``. Since we don't know the notification
+type upfront the notifications are returned as ``struct ynl_ntf_base_type *``
+and user is expected to cast them to the appropriate full type based
+on the ``cmd`` member.
diff --git a/Documentation/virt/guest-halt-polling.rst b/Documentation/virt/guest-halt-polling.rst
index b4e747942417..922291ddc40c 100644
--- a/Documentation/virt/guest-halt-polling.rst
+++ b/Documentation/virt/guest-halt-polling.rst
@@ -72,7 +72,7 @@ high once achieves global guest_halt_poll_ns value).
Default: Y
-The module parameters can be set from the debugfs files in::
+The module parameters can be set from the sysfs files in::
/sys/module/haltpoll/parameters/
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index add067793b90..96c4475539c2 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -2613,7 +2613,7 @@ follows::
this vcpu, and determines which register slices are visible through
this ioctl interface.
-(See Documentation/arm64/sve.rst for an explanation of the "vq"
+(See Documentation/arch/arm64/sve.rst for an explanation of the "vq"
nomenclature.)
KVM_REG_ARM64_SVE_VLS is only accessible after KVM_ARM_VCPU_INIT.
diff --git a/Documentation/virt/kvm/halt-polling.rst b/Documentation/virt/kvm/halt-polling.rst
index 3fae39b1a5ba..4f1a1b23d99c 100644
--- a/Documentation/virt/kvm/halt-polling.rst
+++ b/Documentation/virt/kvm/halt-polling.rst
@@ -112,11 +112,11 @@ powerpc kvm-hv case.
| | function. | |
+-----------------------+---------------------------+-------------------------+
-These module parameters can be set from the debugfs files in:
+These module parameters can be set from the sysfs files in:
/sys/module/kvm/parameters/
-Note: that these module parameters are system wide values and are not able to
+Note: these module parameters are system-wide values and are not able to
be tuned on a per vm basis.
Any changes to these parameters will be picked up by new and existing vCPUs the
@@ -142,12 +142,12 @@ Further Notes
global max polling interval (halt_poll_ns) then the host will always poll for the
entire block time and thus cpu utilisation will go to 100%.
-- Halt polling essentially presents a trade off between power usage and latency and
+- Halt polling essentially presents a trade-off between power usage and latency and
the module parameters should be used to tune the affinity for this. Idle cpu time is
essentially converted to host kernel time with the aim of decreasing latency when
entering the guest.
- Halt polling will only be conducted by the host when no other tasks are runnable on
that cpu, otherwise the polling will cease immediately and schedule will be invoked to
- allow that other task to run. Thus this doesn't allow a guest to denial of service the
- cpu.
+ allow that other task to run. Thus this doesn't allow a guest to cause denial of service
+ of the cpu.
diff --git a/Documentation/virt/kvm/locking.rst b/Documentation/virt/kvm/locking.rst
index 8c77554e4896..3a034db5e55f 100644
--- a/Documentation/virt/kvm/locking.rst
+++ b/Documentation/virt/kvm/locking.rst
@@ -67,7 +67,7 @@ following two cases:
2. Write-Protection: The SPTE is present and the fault is caused by
write-protect. That means we just need to change the W bit of the spte.
-What we use to avoid all the race is the Host-writable bit and MMU-writable bit
+What we use to avoid all the races is the Host-writable bit and MMU-writable bit
on the spte:
- Host-writable means the gfn is writable in the host kernel page tables and in
@@ -130,7 +130,7 @@ to gfn. For indirect sp, we disabled fast page fault for simplicity.
A solution for indirect sp could be to pin the gfn, for example via
kvm_vcpu_gfn_to_pfn_atomic, before the cmpxchg. After the pinning:
-- We have held the refcount of pfn that means the pfn can not be freed and
+- We have held the refcount of pfn; that means the pfn can not be freed and
be reused for another gfn.
- The pfn is writable and therefore it cannot be shared between different gfns
by KSM.
@@ -186,22 +186,22 @@ writable between reading spte and updating spte. Like below case:
The Dirty bit is lost in this case.
In order to avoid this kind of issue, we always treat the spte as "volatile"
-if it can be updated out of mmu-lock, see spte_has_volatile_bits(), it means,
+if it can be updated out of mmu-lock [see spte_has_volatile_bits()]; it means
the spte is always atomically updated in this case.
3) flush tlbs due to spte updated
-If the spte is updated from writable to readonly, we should flush all TLBs,
+If the spte is updated from writable to read-only, we should flush all TLBs,
otherwise rmap_write_protect will find a read-only spte, even though the
writable spte might be cached on a CPU's TLB.
As mentioned before, the spte can be updated to writable out of mmu-lock on
-fast page fault path, in order to easily audit the path, we see if TLBs need
-be flushed caused by this reason in mmu_spte_update() since this is a common
+fast page fault path. In order to easily audit the path, we see if TLBs needing
+to be flushed caused this reason in mmu_spte_update() since this is a common
function to update spte (present -> present).
Since the spte is "volatile" if it can be updated out of mmu-lock, we always
-atomically update the spte, the race caused by fast page fault can be avoided,
+atomically update the spte and the race caused by fast page fault can be avoided.
See the comments in spte_has_volatile_bits() and mmu_spte_update().
Lockless Access Tracking:
@@ -283,9 +283,9 @@ time it will be set using the Dirty tracking mechanism described above.
:Arch: x86
:Protects: wakeup_vcpus_on_cpu
:Comment: This is a per-CPU lock and it is used for VT-d posted-interrupts.
- When VT-d posted-interrupts is supported and the VM has assigned
+ When VT-d posted-interrupts are supported and the VM has assigned
devices, we put the blocked vCPU on the list blocked_vcpu_on_cpu
- protected by blocked_vcpu_on_cpu_lock, when VT-d hardware issues
+ protected by blocked_vcpu_on_cpu_lock. When VT-d hardware issues
wakeup notification event since external interrupts from the
assigned devices happens, we will find the vCPU on the list to
wakeup.
diff --git a/Documentation/virt/kvm/ppc-pv.rst b/Documentation/virt/kvm/ppc-pv.rst
index 5fdb907670be..740d03d25300 100644
--- a/Documentation/virt/kvm/ppc-pv.rst
+++ b/Documentation/virt/kvm/ppc-pv.rst
@@ -89,7 +89,7 @@ also define a new hypercall feature to indicate that the host can give you more
registers. Only if the host supports the additional features, make use of them.
The magic page layout is described by struct kvm_vcpu_arch_shared
-in arch/powerpc/include/asm/kvm_para.h.
+in arch/powerpc/include/uapi/asm/kvm_para.h.
Magic page features
===================
@@ -112,7 +112,7 @@ Magic page flags
================
In addition to features that indicate whether a host is capable of a particular
-feature we also have a channel for a guest to tell the guest whether it's capable
+feature we also have a channel for a guest to tell the host whether it's capable
of something. This is what we call "flags".
Flags are passed to the host in the low 12 bits of the Effective Address.
@@ -139,7 +139,7 @@ Patched instructions
====================
The "ld" and "std" instructions are transformed to "lwz" and "stw" instructions
-respectively on 32 bit systems with an added offset of 4 to accommodate for big
+respectively on 32-bit systems with an added offset of 4 to accommodate for big
endianness.
The following is a list of mapping the Linux kernel performs when running as
@@ -210,7 +210,7 @@ available on all targets.
2) PAPR hypercalls
PAPR hypercalls are needed to run server PowerPC PAPR guests (-M pseries in QEMU).
-These are the same hypercalls that pHyp, the POWER hypervisor implements. Some of
+These are the same hypercalls that pHyp, the POWER hypervisor, implements. Some of
them are handled in the kernel, some are handled in user space. This is only
available on book3s_64.
diff --git a/Documentation/virt/kvm/vcpu-requests.rst b/Documentation/virt/kvm/vcpu-requests.rst
index 87f04c1fa53d..06718b9bc959 100644
--- a/Documentation/virt/kvm/vcpu-requests.rst
+++ b/Documentation/virt/kvm/vcpu-requests.rst
@@ -101,7 +101,7 @@ also be used, e.g. ::
However, VCPU request users should refrain from doing so, as it would
break the abstraction. The first 8 bits are reserved for architecture
-independent requests, all additional bits are available for architecture
+independent requests; all additional bits are available for architecture
dependent requests.
Architecture Independent Requests
@@ -151,8 +151,8 @@ KVM_REQUEST_NO_WAKEUP
This flag is applied to requests that only need immediate attention
from VCPUs running in guest mode. That is, sleeping VCPUs do not need
- to be awaken for these requests. Sleeping VCPUs will handle the
- requests when they are awaken later for some other reason.
+ to be awakened for these requests. Sleeping VCPUs will handle the
+ requests when they are awakened later for some other reason.
KVM_REQUEST_WAIT
diff --git a/Documentation/virt/paravirt_ops.rst b/Documentation/virt/paravirt_ops.rst
index 6b789d27cead..62d867e0d4d6 100644
--- a/Documentation/virt/paravirt_ops.rst
+++ b/Documentation/virt/paravirt_ops.rst
@@ -5,31 +5,31 @@ Paravirt_ops
============
Linux provides support for different hypervisor virtualization technologies.
-Historically different binary kernels would be required in order to support
-different hypervisors, this restriction was removed with pv_ops.
+Historically, different binary kernels would be required in order to support
+different hypervisors; this restriction was removed with pv_ops.
Linux pv_ops is a virtualization API which enables support for different
hypervisors. It allows each hypervisor to override critical operations and
allows a single kernel binary to run on all supported execution environments
including native machine -- without any hypervisors.
pv_ops provides a set of function pointers which represent operations
-corresponding to low level critical instructions and high level
-functionalities in various areas. pv-ops allows for optimizations at run
-time by enabling binary patching of the low-ops critical operations
+corresponding to low-level critical instructions and high-level
+functionalities in various areas. pv_ops allows for optimizations at run
+time by enabling binary patching of the low-level critical operations
at boot time.
pv_ops operations are classified into three categories:
- simple indirect call
- These operations correspond to high level functionality where it is
+ These operations correspond to high-level functionality where it is
known that the overhead of indirect call isn't very important.
- indirect call which allows optimization with binary patch
- Usually these operations correspond to low level critical instructions. They
+ Usually these operations correspond to low-level critical instructions. They
are called frequently and are performance critical. The overhead is
very important.
- a set of macros for hand written assembly code
Hand written assembly codes (.S files) also need paravirtualization
- because they include sensitive instructions or some of code paths in
+ because they include sensitive instructions or some code paths in
them are very performance critical.
diff --git a/MAINTAINERS b/MAINTAINERS
index 6992b7cc7095..acbe54087d1c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -909,13 +909,6 @@ L: netdev@vger.kernel.org
S: Maintained
F: drivers/net/ethernet/altera/
-ALTERA TSE PCS
-M: Maxime Chevallier <maxime.chevallier@bootlin.com>
-L: netdev@vger.kernel.org
-S: Supported
-F: drivers/net/pcs/pcs-altera-tse.c
-F: include/linux/pcs-altera-tse.h
-
ALTERA UART/JTAG UART SERIAL DRIVERS
M: Tobias Klauser <tklauser@distanz.ch>
L: linux-serial@vger.kernel.org
@@ -2710,7 +2703,7 @@ Q: https://patchwork.kernel.org/project/linux-samsung-soc/list/
B: mailto:linux-samsung-soc@vger.kernel.org
C: irc://irc.libera.chat/linux-exynos
T: git git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux.git
-F: Documentation/arm/samsung/
+F: Documentation/arch/arm/samsung/
F: Documentation/devicetree/bindings/arm/samsung/
F: Documentation/devicetree/bindings/hwinfo/samsung,*
F: Documentation/devicetree/bindings/power/pd-samsung.yaml
@@ -3062,7 +3055,7 @@ M: Will Deacon <will@kernel.org>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git
-F: Documentation/arm64/
+F: Documentation/arch/arm64/
F: arch/arm64/
F: tools/testing/selftests/arm64/
X: arch/arm64/boot/dts/
@@ -3613,6 +3606,7 @@ S: Supported
W: http://www.bluez.org/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth.git
T: git git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth-next.git
+F: Documentation/devicetree/bindings/net/bluetooth/
F: drivers/bluetooth/
BLUETOOTH SUBSYSTEM
@@ -4487,6 +4481,13 @@ S: Supported
F: Documentation/filesystems/caching/cachefiles.rst
F: fs/cachefiles/
+CACHESTAT: PAGE CACHE STATS FOR A FILE
+M: Nhat Pham <nphamcs@gmail.com>
+M: Johannes Weiner <hannes@cmpxchg.org>
+L: linux-mm@kvack.org
+S: Maintained
+F: tools/testing/selftests/cachestat/test_cachestat.c
+
CADENCE MIPI-CSI2 BRIDGES
M: Maxime Ripard <mripard@kernel.org>
L: linux-media@vger.kernel.org
@@ -5344,6 +5345,18 @@ F: include/linux/sched/cpufreq.h
F: kernel/sched/cpufreq*.c
F: tools/testing/selftests/cpufreq/
+CPU HOTPLUG
+M: Thomas Gleixner <tglx@linutronix.de>
+M: Peter Zijlstra <peterz@infradead.org>
+L: linux-kernel@vger.kernel.org
+S: Maintained
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git smp/core
+F: kernel/cpu.c
+F: kernel/smpboot.*
+F: include/linux/cpu.h
+F: include/linux/cpuhotplug.h
+F: include/linux/smpboot.h
+
CPU IDLE TIME MANAGEMENT FRAMEWORK
M: "Rafael J. Wysocki" <rafael@kernel.org>
M: Daniel Lezcano <daniel.lezcano@linaro.org>
@@ -6227,6 +6240,12 @@ X: Documentation/power/
X: Documentation/spi/
X: Documentation/userspace-api/media/
+DOCUMENTATION PROCESS
+M: Jonathan Corbet <corbet@lwn.net>
+S: Maintained
+F: Documentation/process/
+L: workflows@vger.kernel.org
+
DOCUMENTATION REPORTING ISSUES
M: Thorsten Leemhuis <linux@leemhuis.info>
L: linux-doc@vger.kernel.org
@@ -7482,6 +7501,14 @@ L: linux-edac@vger.kernel.org
S: Maintained
F: drivers/edac/mpc85xx_edac.[ch]
+EDAC-NPCM
+M: Marvin Lin <kflin@nuvoton.com>
+M: Stanley Chu <yschu@nuvoton.com>
+L: linux-edac@vger.kernel.org
+S: Maintained
+F: Documentation/devicetree/bindings/memory-controllers/nuvoton,npcm-memory-controller.yaml
+F: drivers/edac/npcm_edac.c
+
EDAC-PASEMI
M: Egor Martovetsky <egor@pasemi.com>
L: linux-edac@vger.kernel.org
@@ -7978,6 +8005,12 @@ S: Maintained
F: drivers/hwmon/f75375s.c
F: include/linux/f75375s.h
+FINTEK F81604 USB to 2xCANBUS DEVICE DRIVER
+M: Ji-Ze Hong (Peter Hong) <peter_hong@fintek.com.tw>
+L: linux-can@vger.kernel.org
+S: Maintained
+F: drivers/net/can/usb/f81604.c
+
FIREWIRE AUDIO DRIVERS and IEC 61883-1/6 PACKET STREAMING ENGINE
M: Clemens Ladisch <clemens@ladisch.de>
M: Takashi Sakamoto <o-takashi@sakamocchi.jp>
@@ -8073,6 +8106,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/har
F: include/linux/fortify-string.h
F: lib/fortify_kunit.c
F: lib/memcpy_kunit.c
+F: lib/strcat_kunit.c
F: lib/strscpy_kunit.c
F: lib/test_fortify/*
F: scripts/test_fortify.sh
@@ -9972,8 +10006,9 @@ M: Miquel Raynal <miquel.raynal@bootlin.com>
L: linux-wpan@vger.kernel.org
S: Maintained
W: https://linux-wpan.org/
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/sschmidt/wpan.git
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/sschmidt/wpan-next.git
+Q: https://patchwork.kernel.org/project/linux-wpan/list/
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/wpan/wpan.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/wpan/wpan-next.git
F: Documentation/networking/ieee802154.rst
F: drivers/net/ieee802154/
F: include/linux/ieee802154.h
@@ -10345,9 +10380,8 @@ M: Jesse Brandeburg <jesse.brandeburg@intel.com>
M: Tony Nguyen <anthony.l.nguyen@intel.com>
L: intel-wired-lan@lists.osuosl.org (moderated for non-subscribers)
S: Supported
-W: http://www.intel.com/support/feedback.htm
-W: http://e1000.sourceforge.net/
-Q: http://patchwork.ozlabs.org/project/intel-wired-lan/list/
+W: https://www.intel.com/content/www/us/en/support.html
+Q: https://patchwork.ozlabs.org/project/intel-wired-lan/list/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue.git
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/next-queue.git
F: Documentation/networking/device_drivers/ethernet/intel/
@@ -11274,6 +11308,10 @@ W: http://kernelnewbies.org/KernelJanitors
KERNEL NFSD, SUNRPC, AND LOCKD SERVERS
M: Chuck Lever <chuck.lever@oracle.com>
M: Jeff Layton <jlayton@kernel.org>
+R: Neil Brown <neilb@suse.de>
+R: Olga Kornievskaia <kolga@netapp.com>
+R: Dai Ngo <Dai.Ngo@oracle.com>
+R: Tom Talpey <tom@talpey.com>
L: linux-nfs@vger.kernel.org
S: Supported
W: http://nfs.sourceforge.net/
@@ -11331,6 +11369,8 @@ L: linux-kselftest@vger.kernel.org
L: kunit-dev@googlegroups.com
S: Maintained
W: https://google.github.io/kunit-docs/third_party/kernel/docs/
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git kunit
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git kunit-fixes
F: Documentation/dev-tools/kunit/
F: include/kunit/
F: lib/kunit/
@@ -12545,12 +12585,11 @@ MARVELL NAND CONTROLLER DRIVER
M: Miquel Raynal <miquel.raynal@bootlin.com>
L: linux-mtd@lists.infradead.org
S: Maintained
-F: Documentation/devicetree/bindings/mtd/marvell-nand.txt
F: drivers/mtd/nand/raw/marvell_nand.c
MARVELL OCTEON ENDPOINT DRIVER
M: Veerasenareddy Burru <vburru@marvell.com>
-M: Abhijit Ayarekar <aayarekar@marvell.com>
+M: Sathesh Edara <sedara@marvell.com>
L: netdev@vger.kernel.org
S: Supported
F: drivers/net/ethernet/marvell/octeon_ep
@@ -12849,6 +12888,13 @@ F: Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt
F: drivers/net/ieee802154/mcr20a.c
F: drivers/net/ieee802154/mcr20a.h
+MDIO REGMAP DRIVER
+M: Maxime Chevallier <maxime.chevallier@bootlin.com>
+L: netdev@vger.kernel.org
+S: Maintained
+F: drivers/net/mdio/mdio-regmap.c
+F: include/linux/mdio/mdio-regmap.h
+
MEASUREMENT COMPUTING CIO-DAC IIO DRIVER
M: William Breathitt Gray <william.gray@linaro.org>
L: linux-iio@vger.kernel.org
@@ -13148,6 +13194,15 @@ S: Maintained
F: drivers/net/pcs/pcs-mtk-lynxi.c
F: include/linux/pcs/pcs-mtk-lynxi.h
+MEDIATEK ETHERNET PHY DRIVERS
+M: Daniel Golle <daniel@makrotopia.org>
+M: Qingfang Deng <dqfext@gmail.com>
+M: SkyLake Huang <SkyLake.Huang@mediatek.com>
+L: netdev@vger.kernel.org
+S: Maintained
+F: drivers/net/phy/mediatek-ge-soc.c
+F: drivers/net/phy/mediatek-ge.c
+
MEDIATEK I2C CONTROLLER DRIVER
M: Qii Wang <qii.wang@mediatek.com>
L: linux-i2c@vger.kernel.org
@@ -13209,6 +13264,7 @@ R: Shayne Chen <shayne.chen@mediatek.com>
R: Sean Wang <sean.wang@mediatek.com>
L: linux-wireless@vger.kernel.org
S: Maintained
+T: git https://github.com/nbd168/wireless
F: Documentation/devicetree/bindings/net/wireless/mediatek,mt76.yaml
F: drivers/net/wireless/mediatek/mt76/
@@ -13269,10 +13325,11 @@ F: drivers/memory/mtk-smi.c
F: include/soc/mediatek/smi.h
MEDIATEK SWITCH DRIVER
-M: Sean Wang <sean.wang@mediatek.com>
+M: Arınç ÜNAL <arinc.unal@arinc9.com>
+M: Daniel Golle <daniel@makrotopia.org>
M: Landen Chao <Landen.Chao@mediatek.com>
M: DENG Qingfang <dqfext@gmail.com>
-M: Daniel Golle <daniel@makrotopia.org>
+M: Sean Wang <sean.wang@mediatek.com>
L: netdev@vger.kernel.org
S: Maintained
F: drivers/net/dsa/mt7530-mdio.c
@@ -14693,7 +14750,7 @@ NETWORKING [LABELED] (NetLabel, Labeled IPsec, SECMARK)
M: Paul Moore <paul@paul-moore.com>
L: netdev@vger.kernel.org
L: linux-security-module@vger.kernel.org
-S: Maintained
+S: Supported
W: https://github.com/netlabel
F: Documentation/netlabel/
F: include/net/calipso.h
@@ -14729,6 +14786,7 @@ NETWORKING [TCP]
M: Eric Dumazet <edumazet@google.com>
L: netdev@vger.kernel.org
S: Maintained
+F: include/linux/net_mm.h
F: include/linux/tcp.h
F: include/net/tcp.h
F: include/trace/events/tcp.h
@@ -15297,7 +15355,7 @@ OMAP DISPLAY SUBSYSTEM and FRAMEBUFFER SUPPORT (DSS2)
L: linux-omap@vger.kernel.org
L: linux-fbdev@vger.kernel.org
S: Orphan
-F: Documentation/arm/omap/dss.rst
+F: Documentation/arch/arm/omap/dss.rst
F: drivers/video/fbdev/omap2/
OMAP FRAMEBUFFER SUPPORT
@@ -15948,7 +16006,7 @@ F: include/uapi/linux/ppdev.h
PARAVIRT_OPS INTERFACE
M: Juergen Gross <jgross@suse.com>
-M: Srivatsa S. Bhat (VMware) <srivatsa@csail.mit.edu>
+R: Ajay Kaher <akaher@vmware.com>
R: Alexey Makhalov <amakhalov@vmware.com>
R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
L: virtualization@lists.linux-foundation.org
@@ -16384,7 +16442,7 @@ F: Documentation/devicetree/bindings/pci/intel,keembay-pcie*
F: drivers/pci/controller/dwc/pcie-keembay.c
PCIE DRIVER FOR INTEL LGM GW SOC
-M: Rahul Tanwar <rtanwar@maxlinear.com>
+M: Chuanhua Lei <lchuanhua@maxlinear.com>
L: linux-pci@vger.kernel.org
S: Maintained
F: Documentation/devicetree/bindings/pci/intel-gw-pcie.yaml
@@ -17358,6 +17416,8 @@ QUALCOMM ATHEROS ATH11K WIRELESS DRIVER
M: Kalle Valo <kvalo@kernel.org>
L: ath11k@lists.infradead.org
S: Supported
+W: https://wireless.wiki.kernel.org/en/users/Drivers/ath11k
+B: https://wireless.wiki.kernel.org/en/users/Drivers/ath11k/bugreport
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git
F: Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml
F: drivers/net/wireless/ath/ath11k/
@@ -17367,6 +17427,7 @@ M: Toke Høiland-Jørgensen <toke@toke.dk>
L: linux-wireless@vger.kernel.org
S: Maintained
W: https://wireless.wiki.kernel.org/en/users/Drivers/ath9k
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git
F: Documentation/devicetree/bindings/net/wireless/qca,ath9k.yaml
F: drivers/net/wireless/ath/ath9k/
@@ -17796,7 +17857,7 @@ M: Boqun Feng <boqun.feng@gmail.com>
R: Steven Rostedt <rostedt@goodmis.org>
R: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
R: Lai Jiangshan <jiangshanlai@gmail.com>
-R: Zqiang <qiang1.zhang@intel.com>
+R: Zqiang <qiang.zhang1211@gmail.com>
L: rcu@vger.kernel.org
S: Supported
W: http://www.rdrop.com/users/paulmck/RCU/
@@ -17827,7 +17888,7 @@ F: tools/testing/selftests/rtc/
Real-time Linux Analysis (RTLA) tools
M: Daniel Bristot de Oliveira <bristot@kernel.org>
M: Steven Rostedt <rostedt@goodmis.org>
-L: linux-trace-devel@vger.kernel.org
+L: linux-trace-kernel@vger.kernel.org
S: Maintained
F: Documentation/tools/rtla/
F: tools/tracing/rtla/
@@ -18397,7 +18458,7 @@ F: drivers/infiniband/ulp/rtrs/
RUNTIME VERIFICATION (RV)
M: Daniel Bristot de Oliveira <bristot@kernel.org>
M: Steven Rostedt <rostedt@goodmis.org>
-L: linux-trace-devel@vger.kernel.org
+L: linux-trace-kernel@vger.kernel.org
S: Maintained
F: Documentation/trace/rv/
F: include/linux/rv.h
@@ -22520,7 +22581,7 @@ S: Supported
F: drivers/misc/vmw_balloon.c
VMWARE HYPERVISOR INTERFACE
-M: Srivatsa S. Bhat (VMware) <srivatsa@csail.mit.edu>
+M: Ajay Kaher <akaher@vmware.com>
M: Alexey Makhalov <amakhalov@vmware.com>
R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
L: virtualization@lists.linux-foundation.org
@@ -22547,8 +22608,8 @@ F: drivers/scsi/vmw_pvscsi.c
F: drivers/scsi/vmw_pvscsi.h
VMWARE VIRTUAL PTP CLOCK DRIVER
-M: Srivatsa S. Bhat (VMware) <srivatsa@csail.mit.edu>
M: Deep Shah <sdeep@vmware.com>
+R: Ajay Kaher <akaher@vmware.com>
R: Alexey Makhalov <amakhalov@vmware.com>
R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
L: netdev@vger.kernel.org
@@ -23152,6 +23213,7 @@ F: drivers/iio/adc/xilinx-ams.c
XILINX AXI ETHERNET DRIVER
M: Radhey Shyam Pandey <radhey.shyam.pandey@xilinx.com>
S: Maintained
+F: Documentation/devicetree/bindings/net/xlnx,axi-ethernet.yaml
F: drivers/net/ethernet/xilinx/xilinx_axienet*
XILINX CAN DRIVER
diff --git a/Makefile b/Makefile
index b68b43c19072..48a044bfe062 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
VERSION = 6
PATCHLEVEL = 4
SUBLEVEL = 0
-EXTRAVERSION = -rc7
+EXTRAVERSION =
NAME = Hurr durr I'ma ninja sloth
# *DOCUMENTATION*
@@ -1026,6 +1026,12 @@ KBUILD_CFLAGS += -Wno-pointer-sign
# globally built with -Wcast-function-type.
KBUILD_CFLAGS += $(call cc-option, -Wcast-function-type)
+# To gain proper coverage for CONFIG_UBSAN_BOUNDS and CONFIG_FORTIFY_SOURCE,
+# the kernel uses only C99 flexible arrays for dynamically sized trailing
+# arrays. Enforce this for everything that may examine structure sizes and
+# perform bounds checking.
+KBUILD_CFLAGS += $(call cc-option, -fstrict-flex-arrays=3)
+
# disable stringop warnings in gcc 8+
KBUILD_CFLAGS += $(call cc-disable-warning, stringop-truncation)
diff --git a/arch/Kconfig b/arch/Kconfig
index 205fd23e0cad..aff2746c8af2 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -34,6 +34,29 @@ config ARCH_HAS_SUBPAGE_FAULTS
config HOTPLUG_SMT
bool
+# Selected by HOTPLUG_CORE_SYNC_DEAD or HOTPLUG_CORE_SYNC_FULL
+config HOTPLUG_CORE_SYNC
+ bool
+
+# Basic CPU dead synchronization selected by architecture
+config HOTPLUG_CORE_SYNC_DEAD
+ bool
+ select HOTPLUG_CORE_SYNC
+
+# Full CPU synchronization with alive state selected by architecture
+config HOTPLUG_CORE_SYNC_FULL
+ bool
+ select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU
+ select HOTPLUG_CORE_SYNC
+
+config HOTPLUG_SPLIT_STARTUP
+ bool
+ select HOTPLUG_CORE_SYNC_FULL
+
+config HOTPLUG_PARALLEL
+ bool
+ select HOTPLUG_SPLIT_STARTUP
+
config GENERIC_ENTRY
bool
@@ -285,6 +308,9 @@ config ARCH_HAS_DMA_SET_UNCACHED
config ARCH_HAS_DMA_CLEAR_UNCACHED
bool
+config ARCH_HAS_CPU_FINALIZE_INIT
+ bool
+
# Select if arch init_task must go in the __init_task_data section
config ARCH_TASK_STRUCT_ON_STACK
bool
@@ -400,20 +426,14 @@ config HAVE_HARDLOCKUP_DETECTOR_PERF
The arch chooses to use the generic perf-NMI-based hardlockup
detector. Must define HAVE_PERF_EVENTS_NMI.
-config HAVE_NMI_WATCHDOG
- depends on HAVE_NMI
- bool
- help
- The arch provides a low level NMI watchdog. It provides
- asm/nmi.h, and defines its own arch_touch_nmi_watchdog().
-
config HAVE_HARDLOCKUP_DETECTOR_ARCH
bool
- select HAVE_NMI_WATCHDOG
help
- The arch chooses to provide its own hardlockup detector, which is
- a superset of the HAVE_NMI_WATCHDOG. It also conforms to config
- interfaces and parameters provided by hardlockup detector subsystem.
+ The arch provides its own hardlockup detector implementation instead
+ of the generic ones.
+
+ It uses the same command line parameters, and sysctl interface,
+ as the generic hardlockup detectors.
config HAVE_PERF_REGS
bool
@@ -1188,13 +1208,6 @@ config COMPAT_32BIT_TIME
config ARCH_NO_PREEMPT
bool
-config ARCH_EPHEMERAL_INODES
- def_bool n
- help
- An arch should select this symbol if it doesn't keep track of inode
- instances on its own, but instead relies on something else (e.g. the
- host kernel for an UML kernel).
-
config ARCH_SUPPORTS_RT
bool
diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index f2861a43a61e..cbd9244571af 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -200,25 +200,6 @@ ATOMIC_OPS(xor, xor)
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
-#define arch_atomic64_cmpxchg(v, old, new) \
- (arch_cmpxchg(&((v)->counter), old, new))
-#define arch_atomic64_xchg(v, new) \
- (arch_xchg(&((v)->counter), new))
-
-#define arch_atomic_cmpxchg(v, old, new) \
- (arch_cmpxchg(&((v)->counter), old, new))
-#define arch_atomic_xchg(v, new) \
- (arch_xchg(&((v)->counter), new))
-
-/**
- * arch_atomic_fetch_add_unless - add unless the number is a given value
- * @v: pointer of type atomic_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as it was not @u.
- * Returns the old value of @v.
- */
static __inline__ int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u)
{
int c, new, old;
@@ -242,15 +223,6 @@ static __inline__ int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u)
}
#define arch_atomic_fetch_add_unless arch_atomic_fetch_add_unless
-/**
- * arch_atomic64_fetch_add_unless - add unless the number is a given value
- * @v: pointer of type atomic64_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as it was not @u.
- * Returns the old value of @v.
- */
static __inline__ s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
{
s64 c, new, old;
@@ -274,13 +246,6 @@ static __inline__ s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u
}
#define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless
-/*
- * arch_atomic64_dec_if_positive - decrement by 1 if old value positive
- * @v: pointer of type atomic_t
- *
- * The function returns the old value of *v minus 1, even if
- * the atomic variable, v, was not decremented.
- */
static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
{
s64 old, tmp;
diff --git a/arch/alpha/include/asm/bugs.h b/arch/alpha/include/asm/bugs.h
deleted file mode 100644
index 78030d1c7e7e..000000000000
--- a/arch/alpha/include/asm/bugs.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * include/asm-alpha/bugs.h
- *
- * Copyright (C) 1994 Linus Torvalds
- */
-
-/*
- * This is included by init/main.c to check for architecture-dependent bugs.
- *
- * Needs:
- * void check_bugs(void);
- */
-
-/*
- * I don't know of any alpha bugs yet.. Nice chip
- */
-
-static void check_bugs(void)
-{
-}
diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index 739891b94136..e94f621903fe 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -137,6 +137,9 @@
#define SO_RCVMARK 75
+#define SO_PASSPIDFD 76
+#define SO_PEERPIDFD 77
+
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 2a9a877a0508..d98701ee36c6 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -1014,8 +1014,6 @@ SYSCALL_DEFINE2(osf_settimeofday, struct timeval32 __user *, tv,
return do_sys_settimeofday64(tv ? &kts : NULL, tz ? &ktz : NULL);
}
-asmlinkage long sys_ni_posix_timers(void);
-
SYSCALL_DEFINE2(osf_utimes, const char __user *, filename,
struct timeval32 __user *, tvs)
{
diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c
index 33bf3a627002..b650ff1cb022 100644
--- a/arch/alpha/kernel/setup.c
+++ b/arch/alpha/kernel/setup.c
@@ -658,7 +658,7 @@ setup_arch(char **cmdline_p)
#endif
/* Default root filesystem to sda2. */
- ROOT_DEV = Root_SDA2;
+ ROOT_DEV = MKDEV(SCSI_DISK0_MAJOR, 2);
#ifdef CONFIG_EISA
/* FIXME: only set this when we actually have EISA in this box? */
diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
index 8ebacf37a8cf..1f13995d00d7 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -490,3 +490,4 @@
558 common process_mrelease sys_process_mrelease
559 common futex_waitv sys_futex_waitv
560 common set_mempolicy_home_node sys_ni_syscall
+561 common cachestat sys_cachestat
diff --git a/arch/arc/include/asm/atomic-spinlock.h b/arch/arc/include/asm/atomic-spinlock.h
index 2c830347bfb4..89d12a60f84c 100644
--- a/arch/arc/include/asm/atomic-spinlock.h
+++ b/arch/arc/include/asm/atomic-spinlock.h
@@ -81,6 +81,11 @@ static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \
ATOMIC_OPS(add, +=, add)
ATOMIC_OPS(sub, -=, sub)
+#define arch_atomic_fetch_add arch_atomic_fetch_add
+#define arch_atomic_fetch_sub arch_atomic_fetch_sub
+#define arch_atomic_add_return arch_atomic_add_return
+#define arch_atomic_sub_return arch_atomic_sub_return
+
#undef ATOMIC_OPS
#define ATOMIC_OPS(op, c_op, asm_op) \
ATOMIC_OP(op, c_op, asm_op) \
@@ -92,7 +97,11 @@ ATOMIC_OPS(or, |=, or)
ATOMIC_OPS(xor, ^=, xor)
#define arch_atomic_andnot arch_atomic_andnot
+
+#define arch_atomic_fetch_and arch_atomic_fetch_and
#define arch_atomic_fetch_andnot arch_atomic_fetch_andnot
+#define arch_atomic_fetch_or arch_atomic_fetch_or
+#define arch_atomic_fetch_xor arch_atomic_fetch_xor
#undef ATOMIC_OPS
#undef ATOMIC_FETCH_OP
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 52ee51e1ff7c..592d7fffc223 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -22,30 +22,6 @@
#include <asm/atomic-spinlock.h>
#endif
-#define arch_atomic_cmpxchg(v, o, n) \
-({ \
- arch_cmpxchg(&((v)->counter), (o), (n)); \
-})
-
-#ifdef arch_cmpxchg_relaxed
-#define arch_atomic_cmpxchg_relaxed(v, o, n) \
-({ \
- arch_cmpxchg_relaxed(&((v)->counter), (o), (n)); \
-})
-#endif
-
-#define arch_atomic_xchg(v, n) \
-({ \
- arch_xchg(&((v)->counter), (n)); \
-})
-
-#ifdef arch_xchg_relaxed
-#define arch_atomic_xchg_relaxed(v, n) \
-({ \
- arch_xchg_relaxed(&((v)->counter), (n)); \
-})
-#endif
-
/*
* 64-bit atomics
*/
diff --git a/arch/arc/include/asm/atomic64-arcv2.h b/arch/arc/include/asm/atomic64-arcv2.h
index c5a8010fdc97..6b6db981967a 100644
--- a/arch/arc/include/asm/atomic64-arcv2.h
+++ b/arch/arc/include/asm/atomic64-arcv2.h
@@ -159,6 +159,7 @@ arch_atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new)
return prev;
}
+#define arch_atomic64_cmpxchg arch_atomic64_cmpxchg
static inline s64 arch_atomic64_xchg(atomic64_t *ptr, s64 new)
{
@@ -179,14 +180,7 @@ static inline s64 arch_atomic64_xchg(atomic64_t *ptr, s64 new)
return prev;
}
-
-/**
- * arch_atomic64_dec_if_positive - decrement by 1 if old value positive
- * @v: pointer of type atomic64_t
- *
- * The function returns the old value of *v minus 1, even if
- * the atomic variable, v, was not decremented.
- */
+#define arch_atomic64_xchg arch_atomic64_xchg
static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
{
@@ -212,15 +206,6 @@ static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
}
#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
-/**
- * arch_atomic64_fetch_add_unless - add unless the number is a given value
- * @v: pointer of type atomic64_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, if it was not @u.
- * Returns the old value of @v
- */
static inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
{
s64 old, temp;
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 9ed7f03ba15a..d60b73d93e03 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -5,6 +5,7 @@ config ARM
select ARCH_32BIT_OFF_T
select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE if HAVE_KRETPROBES && FRAME_POINTER && !ARM_UNWIND
select ARCH_HAS_BINFMT_FLAT
+ select ARCH_HAS_CPU_FINALIZE_INIT if MMU
select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_DEBUG_VIRTUAL if MMU
select ARCH_HAS_DMA_WRITE_COMBINE if !ARM_DMA_MEM_BUFFERABLE
@@ -124,6 +125,7 @@ config ARM
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_UID16
select HAVE_VIRT_CPU_ACCOUNTING_GEN
+ select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU
select IRQ_FORCED_THREADING
select LOCK_MM_AND_FIND_VMA
select MODULES_USE_ELF_REL
@@ -1781,7 +1783,7 @@ config VFP
Say Y to include VFP support code in the kernel. This is needed
if your hardware includes a VFP unit.
- Please see <file:Documentation/arm/vfp/release-notes.rst> for
+ Please see <file:Documentation/arch/arm/vfp/release-notes.rst> for
release notes and additional status information.
Say N if your target does not have VFP hardware.
diff --git a/arch/arm/boot/compressed/atags_to_fdt.c b/arch/arm/boot/compressed/atags_to_fdt.c
index 1feb6b0f7a1f..627752f18661 100644
--- a/arch/arm/boot/compressed/atags_to_fdt.c
+++ b/arch/arm/boot/compressed/atags_to_fdt.c
@@ -2,6 +2,7 @@
#include <linux/libfdt_env.h>
#include <asm/setup.h>
#include <libfdt.h>
+#include "misc.h"
#if defined(CONFIG_ARM_ATAG_DTB_COMPAT_CMDLINE_EXTEND)
#define do_extend_cmdline 1
diff --git a/arch/arm/boot/compressed/fdt_check_mem_start.c b/arch/arm/boot/compressed/fdt_check_mem_start.c
index 9291a2661bdf..aa856567fd33 100644
--- a/arch/arm/boot/compressed/fdt_check_mem_start.c
+++ b/arch/arm/boot/compressed/fdt_check_mem_start.c
@@ -3,6 +3,7 @@
#include <linux/kernel.h>
#include <linux/libfdt.h>
#include <linux/sizes.h>
+#include "misc.h"
static const void *get_prop(const void *fdt, const char *node_path,
const char *property, int minlen)
diff --git a/arch/arm/boot/compressed/misc.c b/arch/arm/boot/compressed/misc.c
index abfed1aa2baa..6b4baa6a9a50 100644
--- a/arch/arm/boot/compressed/misc.c
+++ b/arch/arm/boot/compressed/misc.c
@@ -103,9 +103,6 @@ static void putstr(const char *ptr)
/*
* gzip declarations
*/
-extern char input_data[];
-extern char input_data_end[];
-
unsigned char *output_data;
unsigned long free_mem_ptr;
@@ -131,9 +128,6 @@ asmlinkage void __div0(void)
error("Attempting division by 0!");
}
-extern int do_decompress(u8 *input, int len, u8 *output, void (*error)(char *x));
-
-
void
decompress_kernel(unsigned long output_start, unsigned long free_mem_ptr_p,
unsigned long free_mem_ptr_end_p,
diff --git a/arch/arm/boot/compressed/misc.h b/arch/arm/boot/compressed/misc.h
index c958dccd1d97..6da00a26ac08 100644
--- a/arch/arm/boot/compressed/misc.h
+++ b/arch/arm/boot/compressed/misc.h
@@ -6,5 +6,16 @@
void error(char *x) __noreturn;
extern unsigned long free_mem_ptr;
extern unsigned long free_mem_end_ptr;
+void __div0(void);
+void
+decompress_kernel(unsigned long output_start, unsigned long free_mem_ptr_p,
+ unsigned long free_mem_ptr_end_p, int arch_id);
+void fortify_panic(const char *name);
+int atags_to_fdt(void *atag_list, void *fdt, int total_space);
+uint32_t fdt_check_mem_start(uint32_t mem_start, const void *fdt);
+int do_decompress(u8 *input, int len, u8 *output, void (*error)(char *x));
+
+extern char input_data[];
+extern char input_data_end[];
#endif
diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c
index 8a9aeeb504dd..e013ff1168d3 100644
--- a/arch/arm/common/mcpm_entry.c
+++ b/arch/arm/common/mcpm_entry.c
@@ -21,7 +21,7 @@
/*
* The public API for this code is documented in arch/arm/include/asm/mcpm.h.
* For a comprehensive description of the main algorithm used here, please
- * see Documentation/arm/cluster-pm-race-avoidance.rst.
+ * see Documentation/arch/arm/cluster-pm-race-avoidance.rst.
*/
struct sync_struct mcpm_sync;
diff --git a/arch/arm/common/mcpm_head.S b/arch/arm/common/mcpm_head.S
index 299495c43dfd..f590e803ca11 100644
--- a/arch/arm/common/mcpm_head.S
+++ b/arch/arm/common/mcpm_head.S
@@ -5,7 +5,7 @@
* Created by: Nicolas Pitre, March 2012
* Copyright: (C) 2012-2013 Linaro Limited
*
- * Refer to Documentation/arm/cluster-pm-race-avoidance.rst
+ * Refer to Documentation/arch/arm/cluster-pm-race-avoidance.rst
* for details of the synchronisation algorithms used here.
*/
diff --git a/arch/arm/common/vlock.S b/arch/arm/common/vlock.S
index 1fa09c4697ed..c5eaed5a76f0 100644
--- a/arch/arm/common/vlock.S
+++ b/arch/arm/common/vlock.S
@@ -6,7 +6,7 @@
* Copyright: (C) 2012-2013 Linaro Limited
*
* This algorithm is described in more detail in
- * Documentation/arm/vlocks.rst.
+ * Documentation/arch/arm/vlocks.rst.
*/
#include <linux/linkage.h>
diff --git a/arch/arm/include/asm/arm_pmuv3.h b/arch/arm/include/asm/arm_pmuv3.h
index f4db3e75d75f..f3cd04ff022d 100644
--- a/arch/arm/include/asm/arm_pmuv3.h
+++ b/arch/arm/include/asm/arm_pmuv3.h
@@ -222,6 +222,11 @@ static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr)
return false;
}
+static inline bool kvm_set_pmuserenr(u64 val)
+{
+ return false;
+}
+
/* PMU Version in DFR Register */
#define ARMV8_PMU_DFR_VER_NI 0
#define ARMV8_PMU_DFR_VER_V3P4 0x5
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 505a306e0271..aebe2c8f6a68 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -394,6 +394,23 @@ ALT_UP_B(.L0_\@)
#endif
.endm
+/*
+ * Raw SMP data memory barrier
+ */
+ .macro __smp_dmb mode
+#if __LINUX_ARM_ARCH__ >= 7
+ .ifeqs "\mode","arm"
+ dmb ish
+ .else
+ W(dmb) ish
+ .endif
+#elif __LINUX_ARM_ARCH__ == 6
+ mcr p15, 0, r0, c7, c10, 5 @ dmb
+#else
+ .error "Incompatible SMP platform"
+#endif
+ .endm
+
#if defined(CONFIG_CPU_V7M)
/*
* setmode is used to assert to be in svc mode during boot. For v7-M
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index db8512d9a918..f0e3b01afa74 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -197,6 +197,16 @@ static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \
return val; \
}
+#define arch_atomic_add_return arch_atomic_add_return
+#define arch_atomic_sub_return arch_atomic_sub_return
+#define arch_atomic_fetch_add arch_atomic_fetch_add
+#define arch_atomic_fetch_sub arch_atomic_fetch_sub
+
+#define arch_atomic_fetch_and arch_atomic_fetch_and
+#define arch_atomic_fetch_andnot arch_atomic_fetch_andnot
+#define arch_atomic_fetch_or arch_atomic_fetch_or
+#define arch_atomic_fetch_xor arch_atomic_fetch_xor
+
static inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new)
{
int ret;
@@ -210,8 +220,7 @@ static inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new)
return ret;
}
-
-#define arch_atomic_fetch_andnot arch_atomic_fetch_andnot
+#define arch_atomic_cmpxchg arch_atomic_cmpxchg
#endif /* __LINUX_ARM_ARCH__ */
@@ -240,8 +249,6 @@ ATOMIC_OPS(xor, ^=, eor)
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
-#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new))
-
#ifndef CONFIG_GENERIC_ATOMIC64
typedef struct {
s64 counter;
diff --git a/arch/arm/include/asm/bugs.h b/arch/arm/include/asm/bugs.h
index 97a312ba0840..fe385551edec 100644
--- a/arch/arm/include/asm/bugs.h
+++ b/arch/arm/include/asm/bugs.h
@@ -1,7 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * arch/arm/include/asm/bugs.h
- *
* Copyright (C) 1995-2003 Russell King
*/
#ifndef __ASM_BUGS_H
@@ -10,10 +8,8 @@
extern void check_writebuffer_bugs(void);
#ifdef CONFIG_MMU
-extern void check_bugs(void);
extern void check_other_bugs(void);
#else
-#define check_bugs() do { } while (0)
#define check_other_bugs() do { } while (0)
#endif
diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h
index 7e9251ca29fe..5be3ddc96a50 100644
--- a/arch/arm/include/asm/ftrace.h
+++ b/arch/arm/include/asm/ftrace.h
@@ -75,6 +75,10 @@ static inline bool arch_syscall_match_sym_name(const char *sym,
return !strcasecmp(sym, name);
}
+void prepare_ftrace_return(unsigned long *parent, unsigned long self,
+ unsigned long frame_pointer,
+ unsigned long stack_pointer);
+
#endif /* ifndef __ASSEMBLY__ */
#endif /* _ASM_ARM_FTRACE */
diff --git a/arch/arm/include/asm/irq.h b/arch/arm/include/asm/irq.h
index a7c2337b0c7d..18605f1b3580 100644
--- a/arch/arm/include/asm/irq.h
+++ b/arch/arm/include/asm/irq.h
@@ -27,7 +27,6 @@ struct irqaction;
struct pt_regs;
void handle_IRQ(unsigned int, struct pt_regs *);
-void init_IRQ(void);
#ifdef CONFIG_SMP
#include <linux/cpumask.h>
diff --git a/arch/arm/include/asm/mach/arch.h b/arch/arm/include/asm/mach/arch.h
index 9349e7a82c9c..2b18a258204d 100644
--- a/arch/arm/include/asm/mach/arch.h
+++ b/arch/arm/include/asm/mach/arch.h
@@ -56,7 +56,6 @@ struct machine_desc {
void (*init_time)(void);
void (*init_machine)(void);
void (*init_late)(void);
- void (*handle_irq)(struct pt_regs *);
void (*restart)(enum reboot_mode, const char *);
};
diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h
index 74bb5947b387..28c63d172a96 100644
--- a/arch/arm/include/asm/page.h
+++ b/arch/arm/include/asm/page.h
@@ -113,6 +113,28 @@ struct cpu_user_fns {
unsigned long vaddr, struct vm_area_struct *vma);
};
+void fa_copy_user_highpage(struct page *to, struct page *from,
+ unsigned long vaddr, struct vm_area_struct *vma);
+void fa_clear_user_highpage(struct page *page, unsigned long vaddr);
+void feroceon_copy_user_highpage(struct page *to, struct page *from,
+ unsigned long vaddr, struct vm_area_struct *vma);
+void feroceon_clear_user_highpage(struct page *page, unsigned long vaddr);
+void v4_mc_copy_user_highpage(struct page *to, struct page *from,
+ unsigned long vaddr, struct vm_area_struct *vma);
+void v4_mc_clear_user_highpage(struct page *page, unsigned long vaddr);
+void v4wb_copy_user_highpage(struct page *to, struct page *from,
+ unsigned long vaddr, struct vm_area_struct *vma);
+void v4wb_clear_user_highpage(struct page *page, unsigned long vaddr);
+void v4wt_copy_user_highpage(struct page *to, struct page *from,
+ unsigned long vaddr, struct vm_area_struct *vma);
+void v4wt_clear_user_highpage(struct page *page, unsigned long vaddr);
+void xsc3_mc_copy_user_highpage(struct page *to, struct page *from,
+ unsigned long vaddr, struct vm_area_struct *vma);
+void xsc3_mc_clear_user_highpage(struct page *page, unsigned long vaddr);
+void xscale_mc_copy_user_highpage(struct page *to, struct page *from,
+ unsigned long vaddr, struct vm_area_struct *vma);
+void xscale_mc_clear_user_highpage(struct page *page, unsigned long vaddr);
+
#ifdef MULTI_USER
extern struct cpu_user_fns cpu_user;
diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h
index 483b8ddfcb82..7f44e88d1f25 100644
--- a/arch/arm/include/asm/ptrace.h
+++ b/arch/arm/include/asm/ptrace.h
@@ -193,5 +193,8 @@ static inline unsigned long it_advance(unsigned long cpsr)
return cpsr;
}
+int syscall_trace_enter(struct pt_regs *regs);
+void syscall_trace_exit(struct pt_regs *regs);
+
#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/arm/include/asm/setup.h b/arch/arm/include/asm/setup.h
index ba0872a8dcda..546af8b1e3f6 100644
--- a/arch/arm/include/asm/setup.h
+++ b/arch/arm/include/asm/setup.h
@@ -5,7 +5,7 @@
* Copyright (C) 1997-1999 Russell King
*
* Structure passed to kernel to tell it about the
- * hardware it's running on. See Documentation/arm/setup.rst
+ * hardware it's running on. See Documentation/arch/arm/setup.rst
* for more info.
*/
#ifndef __ASMARM_SETUP_H
@@ -28,4 +28,11 @@ extern void save_atags(const struct tag *tags);
static inline void save_atags(const struct tag *tags) { }
#endif
+struct machine_desc;
+void init_default_cache_policy(unsigned long);
+void paging_init(const struct machine_desc *desc);
+void early_mm_init(const struct machine_desc *);
+void adjust_lowmem_bounds(void);
+void setup_dma_zone(const struct machine_desc *desc);
+
#endif
diff --git a/arch/arm/include/asm/signal.h b/arch/arm/include/asm/signal.h
index 430be7774402..8b84092d1518 100644
--- a/arch/arm/include/asm/signal.h
+++ b/arch/arm/include/asm/signal.h
@@ -22,4 +22,9 @@ typedef struct {
#define __ARCH_HAS_SA_RESTORER
#include <asm/sigcontext.h>
+
+void do_rseq_syscall(struct pt_regs *regs);
+int do_work_pending(struct pt_regs *regs, unsigned int thread_flags,
+ int syscall);
+
#endif
diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h
index 7c1c90d9f582..8c05a7f374d8 100644
--- a/arch/arm/include/asm/smp.h
+++ b/arch/arm/include/asm/smp.h
@@ -64,7 +64,7 @@ extern void secondary_startup_arm(void);
extern int __cpu_disable(void);
-extern void __cpu_die(unsigned int cpu);
+static inline void __cpu_die(unsigned int cpu) { }
extern void arch_send_call_function_single_ipi(int cpu);
extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
diff --git a/arch/arm/include/asm/spectre.h b/arch/arm/include/asm/spectre.h
index 85f9e538fb32..d9c28b3b6b62 100644
--- a/arch/arm/include/asm/spectre.h
+++ b/arch/arm/include/asm/spectre.h
@@ -35,4 +35,8 @@ static inline void spectre_v2_update_state(unsigned int state,
int spectre_bhb_update_vectors(unsigned int method);
+void cpu_v7_ca8_ibe(void);
+void cpu_v7_ca15_ibe(void);
+void cpu_v7_bugs_init(void);
+
#endif
diff --git a/arch/arm/include/asm/suspend.h b/arch/arm/include/asm/suspend.h
index 506314265c6f..be81b9ca2ea1 100644
--- a/arch/arm/include/asm/suspend.h
+++ b/arch/arm/include/asm/suspend.h
@@ -13,5 +13,6 @@ extern void cpu_resume(void);
extern void cpu_resume_no_hyp(void);
extern void cpu_resume_arm(void);
extern int cpu_suspend(unsigned long, int (*)(unsigned long));
+extern void __cpu_suspend_save(u32 *ptr, u32 ptrsz, u32 sp, u32 *save_ptr);
#endif
diff --git a/arch/arm/include/asm/sync_bitops.h b/arch/arm/include/asm/sync_bitops.h
index 6f5d627c44a3..f46b3c570f92 100644
--- a/arch/arm/include/asm/sync_bitops.h
+++ b/arch/arm/include/asm/sync_bitops.h
@@ -14,14 +14,35 @@
* ops which are SMP safe even on a UP kernel.
*/
+/*
+ * Unordered
+ */
+
#define sync_set_bit(nr, p) _set_bit(nr, p)
#define sync_clear_bit(nr, p) _clear_bit(nr, p)
#define sync_change_bit(nr, p) _change_bit(nr, p)
-#define sync_test_and_set_bit(nr, p) _test_and_set_bit(nr, p)
-#define sync_test_and_clear_bit(nr, p) _test_and_clear_bit(nr, p)
-#define sync_test_and_change_bit(nr, p) _test_and_change_bit(nr, p)
#define sync_test_bit(nr, addr) test_bit(nr, addr)
-#define arch_sync_cmpxchg arch_cmpxchg
+/*
+ * Fully ordered
+ */
+
+int _sync_test_and_set_bit(int nr, volatile unsigned long * p);
+#define sync_test_and_set_bit(nr, p) _sync_test_and_set_bit(nr, p)
+
+int _sync_test_and_clear_bit(int nr, volatile unsigned long * p);
+#define sync_test_and_clear_bit(nr, p) _sync_test_and_clear_bit(nr, p)
+
+int _sync_test_and_change_bit(int nr, volatile unsigned long * p);
+#define sync_test_and_change_bit(nr, p) _sync_test_and_change_bit(nr, p)
+
+#define arch_sync_cmpxchg(ptr, old, new) \
+({ \
+ __typeof__(*(ptr)) __ret; \
+ __smp_mb__before_atomic(); \
+ __ret = arch_cmpxchg_relaxed((ptr), (old), (new)); \
+ __smp_mb__after_atomic(); \
+ __ret; \
+})
#endif
diff --git a/arch/arm/include/asm/syscalls.h b/arch/arm/include/asm/syscalls.h
new file mode 100644
index 000000000000..5912e7cffa6a
--- /dev/null
+++ b/arch/arm/include/asm/syscalls.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_SYSCALLS_H
+#define __ASM_SYSCALLS_H
+
+#include <linux/linkage.h>
+#include <linux/types.h>
+
+struct pt_regs;
+asmlinkage int sys_sigreturn(struct pt_regs *regs);
+asmlinkage int sys_rt_sigreturn(struct pt_regs *regs);
+asmlinkage long sys_arm_fadvise64_64(int fd, int advice,
+ loff_t offset, loff_t len);
+
+struct oldabi_stat64;
+asmlinkage long sys_oabi_stat64(const char __user * filename,
+ struct oldabi_stat64 __user * statbuf);
+asmlinkage long sys_oabi_lstat64(const char __user * filename,
+ struct oldabi_stat64 __user * statbuf);
+asmlinkage long sys_oabi_fstat64(unsigned long fd,
+ struct oldabi_stat64 __user * statbuf);
+asmlinkage long sys_oabi_fstatat64(int dfd,
+ const char __user *filename,
+ struct oldabi_stat64 __user *statbuf,
+ int flag);
+asmlinkage long sys_oabi_fcntl64(unsigned int fd, unsigned int cmd,
+ unsigned long arg);
+struct oabi_epoll_event;
+asmlinkage long sys_oabi_epoll_ctl(int epfd, int op, int fd,
+ struct oabi_epoll_event __user *event);
+struct oabi_sembuf;
+struct old_timespec32;
+asmlinkage long sys_oabi_semtimedop(int semid,
+ struct oabi_sembuf __user *tsops,
+ unsigned nsops,
+ const struct old_timespec32 __user *timeout);
+asmlinkage long sys_oabi_semop(int semid, struct oabi_sembuf __user *tsops,
+ unsigned nsops);
+asmlinkage int sys_oabi_ipc(uint call, int first, int second, int third,
+ void __user *ptr, long fifth);
+struct sockaddr;
+asmlinkage long sys_oabi_bind(int fd, struct sockaddr __user *addr, int addrlen);
+asmlinkage long sys_oabi_connect(int fd, struct sockaddr __user *addr, int addrlen);
+asmlinkage long sys_oabi_sendto(int fd, void __user *buff,
+ size_t len, unsigned flags,
+ struct sockaddr __user *addr,
+ int addrlen);
+struct user_msghdr;
+asmlinkage long sys_oabi_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags);
+asmlinkage long sys_oabi_socketcall(int call, unsigned long __user *args);
+
+#endif
diff --git a/arch/arm/include/asm/tcm.h b/arch/arm/include/asm/tcm.h
index d8bd8a4b0ede..e1f7dca86a22 100644
--- a/arch/arm/include/asm/tcm.h
+++ b/arch/arm/include/asm/tcm.h
@@ -9,9 +9,7 @@
#ifndef __ASMARM_TCM_H
#define __ASMARM_TCM_H
-#ifndef CONFIG_HAVE_TCM
-#error "You should not be including tcm.h unless you have a TCM!"
-#endif
+#ifdef CONFIG_HAVE_TCM
#include <linux/compiler.h>
@@ -29,4 +27,11 @@ void tcm_free(void *addr, size_t len);
bool tcm_dtcm_present(void);
bool tcm_itcm_present(void);
+void __init tcm_init(void);
+#else
+/* No TCM support, just blank inlines to be optimized out */
+static inline void tcm_init(void)
+{
+}
+#endif
#endif
diff --git a/arch/arm/include/asm/traps.h b/arch/arm/include/asm/traps.h
index 987fefb0a4db..0aaefe3e1700 100644
--- a/arch/arm/include/asm/traps.h
+++ b/arch/arm/include/asm/traps.h
@@ -35,4 +35,13 @@ extern void ptrace_break(struct pt_regs *regs);
extern void *vectors_page;
+asmlinkage void dump_backtrace_stm(u32 *stack, u32 instruction, const char *loglvl);
+asmlinkage void do_undefinstr(struct pt_regs *regs);
+asmlinkage void handle_fiq_as_nmi(struct pt_regs *regs);
+asmlinkage void bad_mode(struct pt_regs *regs, int reason);
+asmlinkage int arm_syscall(int no, struct pt_regs *regs);
+asmlinkage void baddataabort(int code, unsigned long instr, struct pt_regs *regs);
+asmlinkage void __div0(void);
+asmlinkage void handle_bad_stack(struct pt_regs *regs);
+
#endif
diff --git a/arch/arm/include/asm/unwind.h b/arch/arm/include/asm/unwind.h
index b51f85417f58..d60b09a5acfc 100644
--- a/arch/arm/include/asm/unwind.h
+++ b/arch/arm/include/asm/unwind.h
@@ -40,6 +40,10 @@ extern void unwind_table_del(struct unwind_table *tab);
extern void unwind_backtrace(struct pt_regs *regs, struct task_struct *tsk,
const char *loglvl);
+void __aeabi_unwind_cpp_pr0(void);
+void __aeabi_unwind_cpp_pr1(void);
+void __aeabi_unwind_cpp_pr2(void);
+
#endif /* !__ASSEMBLY__ */
#ifdef CONFIG_ARM_UNWIND
diff --git a/arch/arm/include/asm/vdso.h b/arch/arm/include/asm/vdso.h
index 5b85889f82ee..422c3afa806a 100644
--- a/arch/arm/include/asm/vdso.h
+++ b/arch/arm/include/asm/vdso.h
@@ -24,6 +24,11 @@ static inline void arm_install_vdso(struct mm_struct *mm, unsigned long addr)
#endif /* CONFIG_VDSO */
+int __vdso_clock_gettime(clockid_t clock, struct old_timespec32 *ts);
+int __vdso_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts);
+int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz);
+int __vdso_clock_getres(clockid_t clock_id, struct old_timespec32 *res);
+
#endif /* __ASSEMBLY__ */
#endif /* __KERNEL__ */
diff --git a/arch/arm/include/asm/vfp.h b/arch/arm/include/asm/vfp.h
index 157ea3426158..5b57b8768bac 100644
--- a/arch/arm/include/asm/vfp.h
+++ b/arch/arm/include/asm/vfp.h
@@ -102,6 +102,7 @@
#ifndef __ASSEMBLY__
void vfp_disable(void);
+void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs);
#endif
#endif /* __ASM_VFP_H */
diff --git a/arch/arm/include/uapi/asm/setup.h b/arch/arm/include/uapi/asm/setup.h
index 25ceda63b284..8e50e034fec7 100644
--- a/arch/arm/include/uapi/asm/setup.h
+++ b/arch/arm/include/uapi/asm/setup.h
@@ -9,7 +9,7 @@
* published by the Free Software Foundation.
*
* Structure passed to kernel to tell it about the
- * hardware it's running on. See Documentation/arm/setup.rst
+ * hardware it's running on. See Documentation/arch/arm/setup.rst
* for more info.
*/
#ifndef _UAPI__ASMARM_SETUP_H
diff --git a/arch/arm/kernel/atags_parse.c b/arch/arm/kernel/atags_parse.c
index 373b61f9a4f0..33f6eb5213a5 100644
--- a/arch/arm/kernel/atags_parse.c
+++ b/arch/arm/kernel/atags_parse.c
@@ -127,7 +127,7 @@ static int __init parse_tag_cmdline(const struct tag *tag)
#elif defined(CONFIG_CMDLINE_FORCE)
pr_warn("Ignoring tag cmdline (using the default kernel command line)\n");
#else
- strlcpy(default_command_line, tag->u.cmdline.cmdline,
+ strscpy(default_command_line, tag->u.cmdline.cmdline,
COMMAND_LINE_SIZE);
#endif
return 0;
@@ -224,7 +224,7 @@ setup_machine_tags(void *atags_vaddr, unsigned int machine_nr)
}
/* parse_early_param needs a boot_command_line */
- strlcpy(boot_command_line, from, COMMAND_LINE_SIZE);
+ strscpy(boot_command_line, from, COMMAND_LINE_SIZE);
return mdesc;
}
diff --git a/arch/arm/kernel/bugs.c b/arch/arm/kernel/bugs.c
index 14c8dbbb7d2d..087bce6ec8e9 100644
--- a/arch/arm/kernel/bugs.c
+++ b/arch/arm/kernel/bugs.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/init.h>
+#include <linux/cpu.h>
#include <asm/bugs.h>
#include <asm/proc-fns.h>
@@ -11,7 +12,7 @@ void check_other_bugs(void)
#endif
}
-void __init check_bugs(void)
+void __init arch_cpu_finalize_init(void)
{
check_writebuffer_bugs();
check_other_bugs();
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index c39303e5c234..291dc48d6bed 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -875,7 +875,7 @@ ENDPROC(__bad_stack)
* existing ones. This mechanism should be used only for things that are
* really small and justified, and not be abused freely.
*
- * See Documentation/arm/kernel_user_helpers.rst for formal definitions.
+ * See Documentation/arch/arm/kernel_user_helpers.rst for formal definitions.
*/
THUMB( .arm )
diff --git a/arch/arm/kernel/fiq.c b/arch/arm/kernel/fiq.c
index 98ca3e3fa847..d2c8e5313539 100644
--- a/arch/arm/kernel/fiq.c
+++ b/arch/arm/kernel/fiq.c
@@ -45,6 +45,7 @@
#include <asm/cacheflush.h>
#include <asm/cp15.h>
#include <asm/fiq.h>
+#include <asm/mach/irq.h>
#include <asm/irq.h>
#include <asm/traps.h>
diff --git a/arch/arm/kernel/head-inflate-data.c b/arch/arm/kernel/head-inflate-data.c
index 89a52104d32a..225c0699a12c 100644
--- a/arch/arm/kernel/head-inflate-data.c
+++ b/arch/arm/kernel/head-inflate-data.c
@@ -8,16 +8,13 @@
#include <linux/init.h>
#include <linux/zutil.h>
+#include "head.h"
/* for struct inflate_state */
#include "../../../lib/zlib_inflate/inftrees.h"
#include "../../../lib/zlib_inflate/inflate.h"
#include "../../../lib/zlib_inflate/infutil.h"
-extern char __data_loc[];
-extern char _edata_loc[];
-extern char _sdata[];
-
/*
* This code is called very early during the boot process to decompress
* the .data segment stored compressed in ROM. Therefore none of the global
diff --git a/arch/arm/kernel/head.h b/arch/arm/kernel/head.h
new file mode 100644
index 000000000000..0eb5accf7141
--- /dev/null
+++ b/arch/arm/kernel/head.h
@@ -0,0 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+extern char __data_loc[];
+extern char _edata_loc[];
+extern char _sdata[];
+
+int __init __inflate_kernel_data(void);
diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c
index d59c36dc0494..e74d84f58b77 100644
--- a/arch/arm/kernel/module.c
+++ b/arch/arm/kernel/module.c
@@ -169,8 +169,7 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
offset = __mem_to_opcode_arm(*(u32 *)loc);
offset = (offset & 0x00ffffff) << 2;
- if (offset & 0x02000000)
- offset -= 0x04000000;
+ offset = sign_extend32(offset, 25);
offset += sym->st_value - loc;
@@ -236,7 +235,7 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
case R_ARM_MOVT_PREL:
offset = tmp = __mem_to_opcode_arm(*(u32 *)loc);
offset = ((offset & 0xf0000) >> 4) | (offset & 0xfff);
- offset = (offset ^ 0x8000) - 0x8000;
+ offset = sign_extend32(offset, 15);
offset += sym->st_value;
if (ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_PREL ||
@@ -344,8 +343,7 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
((~(j2 ^ sign) & 1) << 22) |
((upper & 0x03ff) << 12) |
((lower & 0x07ff) << 1);
- if (offset & 0x01000000)
- offset -= 0x02000000;
+ offset = sign_extend32(offset, 24);
offset += sym->st_value - loc;
/*
@@ -401,7 +399,7 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
offset = ((upper & 0x000f) << 12) |
((upper & 0x0400) << 1) |
((lower & 0x7000) >> 4) | (lower & 0x00ff);
- offset = (offset ^ 0x8000) - 0x8000;
+ offset = sign_extend32(offset, 15);
offset += sym->st_value;
if (ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_PREL ||
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 75cd4699e7b3..c66b560562b3 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -76,13 +76,6 @@ static int __init fpe_setup(char *line)
__setup("fpe=", fpe_setup);
#endif
-extern void init_default_cache_policy(unsigned long);
-extern void paging_init(const struct machine_desc *desc);
-extern void early_mm_init(const struct machine_desc *);
-extern void adjust_lowmem_bounds(void);
-extern enum reboot_mode reboot_mode;
-extern void setup_dma_zone(const struct machine_desc *desc);
-
unsigned int processor_id;
EXPORT_SYMBOL(processor_id);
unsigned int __machine_arch_type __read_mostly;
@@ -1142,7 +1135,7 @@ void __init setup_arch(char **cmdline_p)
setup_initial_init_mm(_text, _etext, _edata, _end);
/* populate cmd_line too for later use, preserving boot_command_line */
- strlcpy(cmd_line, boot_command_line, COMMAND_LINE_SIZE);
+ strscpy(cmd_line, boot_command_line, COMMAND_LINE_SIZE);
*cmdline_p = cmd_line;
early_fixmap_init();
@@ -1198,10 +1191,6 @@ void __init setup_arch(char **cmdline_p)
reserve_crashkernel();
-#ifdef CONFIG_GENERIC_IRQ_MULTI_HANDLER
- handle_arch_irq = mdesc->handle_irq;
-#endif
-
#ifdef CONFIG_VT
#if defined(CONFIG_VGA_CONSOLE)
conswitchp = &vga_con;
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index e07f359254c3..8d0afa11bed5 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -18,6 +18,7 @@
#include <asm/traps.h>
#include <asm/unistd.h>
#include <asm/vfp.h>
+#include <asm/syscalls.h>
#include "signal.h"
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 87f8d0e5e314..6756203e45f3 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -288,15 +288,11 @@ int __cpu_disable(void)
}
/*
- * called on the thread which is asking for a CPU to be shutdown -
- * waits until shutdown has completed, or it is timed out.
+ * called on the thread which is asking for a CPU to be shutdown after the
+ * shutdown completed.
*/
-void __cpu_die(unsigned int cpu)
+void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu)
{
- if (!cpu_wait_death(cpu, 5)) {
- pr_err("CPU%u: cpu didn't die\n", cpu);
- return;
- }
pr_debug("CPU%u: shutdown\n", cpu);
clear_tasks_mm_cpumask(cpu);
@@ -336,11 +332,11 @@ void __noreturn arch_cpu_idle_dead(void)
flush_cache_louis();
/*
- * Tell __cpu_die() that this CPU is now safe to dispose of. Once
- * this returns, power and/or clocks can be removed at any point
- * from this CPU and its cache by platform_cpu_kill().
+ * Tell cpuhp_bp_sync_dead() that this CPU is now safe to dispose
+ * of. Once this returns, power and/or clocks can be removed at
+ * any point from this CPU and its cache by platform_cpu_kill().
*/
- (void)cpu_report_death();
+ cpuhp_ap_report_dead();
/*
* Ensure that the cache lines associated with that completion are
diff --git a/arch/arm/kernel/sys_arm.c b/arch/arm/kernel/sys_arm.c
index a5f183cfecb1..0141e9bb02e8 100644
--- a/arch/arm/kernel/sys_arm.c
+++ b/arch/arm/kernel/sys_arm.c
@@ -24,6 +24,7 @@
#include <linux/ipc.h>
#include <linux/uaccess.h>
#include <linux/slab.h>
+#include <asm/syscalls.h>
/*
* Since loff_t is a 64 bit type we avoid a lot of ABI hassle
diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c
index 006163195d67..d00f4040a9f5 100644
--- a/arch/arm/kernel/sys_oabi-compat.c
+++ b/arch/arm/kernel/sys_oabi-compat.c
@@ -10,6 +10,8 @@
* Copyright: MontaVista Software, Inc.
*/
+#include <asm/syscalls.h>
+
/*
* The legacy ABI and the new ARM EABI have different rules making some
* syscalls incompatible especially with structure arguments.
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 40c7c807d67f..3bad79db5d6e 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -756,6 +756,7 @@ void __readwrite_bug(const char *fn)
}
EXPORT_SYMBOL(__readwrite_bug);
+#ifdef CONFIG_MMU
void __pte_error(const char *file, int line, pte_t pte)
{
pr_err("%s:%d: bad pte %08llx.\n", file, line, (long long)pte_val(pte));
@@ -770,6 +771,7 @@ void __pgd_error(const char *file, int line, pgd_t pgd)
{
pr_err("%s:%d: bad pgd %08llx.\n", file, line, (long long)pgd_val(pgd));
}
+#endif
asmlinkage void __div0(void)
{
diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c
index 3408269d19c7..f297d66a8a76 100644
--- a/arch/arm/kernel/vdso.c
+++ b/arch/arm/kernel/vdso.c
@@ -135,7 +135,7 @@ static Elf32_Sym * __init find_symbol(struct elfinfo *lib, const char *symname)
if (lib->dynsym[i].st_name == 0)
continue;
- strlcpy(name, lib->dynstr + lib->dynsym[i].st_name,
+ strscpy(name, lib->dynstr + lib->dynsym[i].st_name,
MAX_SYMNAME);
c = strchr(name, '@');
if (c)
diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h
index 95bd35991288..f069d1b2318e 100644
--- a/arch/arm/lib/bitops.h
+++ b/arch/arm/lib/bitops.h
@@ -28,7 +28,7 @@ UNWIND( .fnend )
ENDPROC(\name )
.endm
- .macro testop, name, instr, store
+ .macro __testop, name, instr, store, barrier
ENTRY( \name )
UNWIND( .fnstart )
ands ip, r1, #3
@@ -38,7 +38,7 @@ UNWIND( .fnstart )
mov r0, r0, lsr #5
add r1, r1, r0, lsl #2 @ Get word offset
mov r3, r2, lsl r3 @ create mask
- smp_dmb
+ \barrier
#if __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP)
.arch_extension mp
ALT_SMP(W(pldw) [r1])
@@ -50,13 +50,21 @@ UNWIND( .fnstart )
strex ip, r2, [r1]
cmp ip, #0
bne 1b
- smp_dmb
+ \barrier
cmp r0, #0
movne r0, #1
2: bx lr
UNWIND( .fnend )
ENDPROC(\name )
.endm
+
+ .macro testop, name, instr, store
+ __testop \name, \instr, \store, smp_dmb
+ .endm
+
+ .macro sync_testop, name, instr, store
+ __testop \name, \instr, \store, __smp_dmb
+ .endm
#else
.macro bitop, name, instr
ENTRY( \name )
diff --git a/arch/arm/lib/testchangebit.S b/arch/arm/lib/testchangebit.S
index 4ebecc67e6e0..f13fe9bc2399 100644
--- a/arch/arm/lib/testchangebit.S
+++ b/arch/arm/lib/testchangebit.S
@@ -10,3 +10,7 @@
.text
testop _test_and_change_bit, eor, str
+
+#if __LINUX_ARM_ARCH__ >= 6
+sync_testop _sync_test_and_change_bit, eor, str
+#endif
diff --git a/arch/arm/lib/testclearbit.S b/arch/arm/lib/testclearbit.S
index 009afa0f5b4a..4d2c5ca620eb 100644
--- a/arch/arm/lib/testclearbit.S
+++ b/arch/arm/lib/testclearbit.S
@@ -10,3 +10,7 @@
.text
testop _test_and_clear_bit, bicne, strne
+
+#if __LINUX_ARM_ARCH__ >= 6
+sync_testop _sync_test_and_clear_bit, bicne, strne
+#endif
diff --git a/arch/arm/lib/testsetbit.S b/arch/arm/lib/testsetbit.S
index f3192e55acc8..649dbab65d8d 100644
--- a/arch/arm/lib/testsetbit.S
+++ b/arch/arm/lib/testsetbit.S
@@ -10,3 +10,7 @@
.text
testop _test_and_set_bit, orreq, streq
+
+#if __LINUX_ARM_ARCH__ >= 6
+sync_testop _sync_test_and_set_bit, orreq, streq
+#endif
diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c
index e4c2677cc1e9..2f6163f05e93 100644
--- a/arch/arm/lib/uaccess_with_memcpy.c
+++ b/arch/arm/lib/uaccess_with_memcpy.c
@@ -74,6 +74,9 @@ pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp)
return 0;
pte = pte_offset_map_lock(current->mm, pmd, addr, &ptl);
+ if (unlikely(!pte))
+ return 0;
+
if (unlikely(!pte_present(*pte) || !pte_young(*pte) ||
!pte_write(*pte) || !pte_dirty(*pte))) {
pte_unmap_unlock(pte, ptl);
diff --git a/arch/arm/mach-exynos/common.h b/arch/arm/mach-exynos/common.h
index 29eb075b24a4..b5287ff1c542 100644
--- a/arch/arm/mach-exynos/common.h
+++ b/arch/arm/mach-exynos/common.h
@@ -106,7 +106,7 @@ void exynos_firmware_init(void);
#define C2_STATE (1 << 3)
/*
* Magic values for bootloader indicating chosen low power mode.
- * See also Documentation/arm/samsung/bootloader-interface.rst
+ * See also Documentation/arch/arm/samsung/bootloader-interface.rst
*/
#define EXYNOS_SLEEP_MAGIC 0x00000bad
#define EXYNOS_AFTR_MAGIC 0xfcba0d10
diff --git a/arch/arm/mach-mxs/mach-mxs.c b/arch/arm/mach-mxs/mach-mxs.c
index 51e47053c816..3faf9a1e3e36 100644
--- a/arch/arm/mach-mxs/mach-mxs.c
+++ b/arch/arm/mach-mxs/mach-mxs.c
@@ -11,7 +11,6 @@
#include <linux/err.h>
#include <linux/gpio.h>
#include <linux/init.h>
-#include <linux/irqchip/mxs.h>
#include <linux/reboot.h>
#include <linux/micrel_phy.h>
#include <linux/of_address.h>
@@ -472,7 +471,6 @@ static const char *const mxs_dt_compat[] __initconst = {
};
DT_MACHINE_START(MXS, "Freescale MXS (Device Tree)")
- .handle_irq = icoll_handle_irq,
.init_machine = mxs_machine_init,
.init_late = mxs_pm_init,
.dt_compat = mxs_dt_compat,
diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c
index 9108c871d129..88139200449e 100644
--- a/arch/arm/mach-omap1/board-ams-delta.c
+++ b/arch/arm/mach-omap1/board-ams-delta.c
@@ -877,7 +877,6 @@ MACHINE_START(AMS_DELTA, "Amstrad E3 (Delta)")
.map_io = ams_delta_map_io,
.init_early = omap1_init_early,
.init_irq = omap1_init_irq,
- .handle_irq = omap1_handle_irq,
.init_machine = ams_delta_init,
.init_late = ams_delta_init_late,
.init_time = omap1_timer_init,
diff --git a/arch/arm/mach-omap1/board-nokia770.c b/arch/arm/mach-omap1/board-nokia770.c
index a501a473ffd6..b56cea9f9d2f 100644
--- a/arch/arm/mach-omap1/board-nokia770.c
+++ b/arch/arm/mach-omap1/board-nokia770.c
@@ -291,7 +291,6 @@ MACHINE_START(NOKIA770, "Nokia 770")
.map_io = omap1_map_io,
.init_early = omap1_init_early,
.init_irq = omap1_init_irq,
- .handle_irq = omap1_handle_irq,
.init_machine = omap_nokia770_init,
.init_late = omap1_init_late,
.init_time = omap1_timer_init,
diff --git a/arch/arm/mach-omap1/board-osk.c b/arch/arm/mach-omap1/board-osk.c
index df758c1f9237..46eda4ff4797 100644
--- a/arch/arm/mach-omap1/board-osk.c
+++ b/arch/arm/mach-omap1/board-osk.c
@@ -389,7 +389,6 @@ MACHINE_START(OMAP_OSK, "TI-OSK")
.map_io = omap1_map_io,
.init_early = omap1_init_early,
.init_irq = omap1_init_irq,
- .handle_irq = omap1_handle_irq,
.init_machine = osk_init,
.init_late = omap1_init_late,
.init_time = omap1_timer_init,
diff --git a/arch/arm/mach-omap1/board-palmte.c b/arch/arm/mach-omap1/board-palmte.c
index f79c497f04d5..91df3dc365af 100644
--- a/arch/arm/mach-omap1/board-palmte.c
+++ b/arch/arm/mach-omap1/board-palmte.c
@@ -259,7 +259,6 @@ MACHINE_START(OMAP_PALMTE, "OMAP310 based Palm Tungsten E")
.map_io = omap1_map_io,
.init_early = omap1_init_early,
.init_irq = omap1_init_irq,
- .handle_irq = omap1_handle_irq,
.init_machine = omap_palmte_init,
.init_late = omap1_init_late,
.init_time = omap1_timer_init,
diff --git a/arch/arm/mach-omap1/board-sx1.c b/arch/arm/mach-omap1/board-sx1.c
index 0c0cdd5e77c7..3ae295af96fd 100644
--- a/arch/arm/mach-omap1/board-sx1.c
+++ b/arch/arm/mach-omap1/board-sx1.c
@@ -338,7 +338,6 @@ MACHINE_START(SX1, "OMAP310 based Siemens SX1")
.map_io = omap1_map_io,
.init_early = omap1_init_early,
.init_irq = omap1_init_irq,
- .handle_irq = omap1_handle_irq,
.init_machine = omap_sx1_init,
.init_late = omap1_init_late,
.init_time = omap1_timer_init,
diff --git a/arch/arm/mach-omap1/irq.c b/arch/arm/mach-omap1/irq.c
index bfc7ab010ae2..3d9e72e1eddc 100644
--- a/arch/arm/mach-omap1/irq.c
+++ b/arch/arm/mach-omap1/irq.c
@@ -37,6 +37,7 @@
*/
#include <linux/gpio.h>
#include <linux/init.h>
+#include <linux/irq.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/interrupt.h>
@@ -254,4 +255,6 @@ void __init omap1_init_irq(void)
ct = irq_data_get_chip_type(d);
ct->chip.irq_unmask(d);
}
+
+ set_handle_irq(omap1_handle_irq);
}
diff --git a/arch/arm/mach-pxa/gumstix.c b/arch/arm/mach-pxa/gumstix.c
index 72b08a9bf0fd..6b7197ae3c72 100644
--- a/arch/arm/mach-pxa/gumstix.c
+++ b/arch/arm/mach-pxa/gumstix.c
@@ -233,7 +233,6 @@ MACHINE_START(GUMSTIX, "Gumstix")
.map_io = pxa25x_map_io,
.nr_irqs = PXA_NR_IRQS,
.init_irq = pxa25x_init_irq,
- .handle_irq = pxa25x_handle_irq,
.init_time = pxa_timer_init,
.init_machine = gumstix_init,
.restart = pxa_restart,
diff --git a/arch/arm/mach-pxa/pxa25x.c b/arch/arm/mach-pxa/pxa25x.c
index 1b83be181bab..032dc897fe94 100644
--- a/arch/arm/mach-pxa/pxa25x.c
+++ b/arch/arm/mach-pxa/pxa25x.c
@@ -143,6 +143,7 @@ set_pwer:
void __init pxa25x_init_irq(void)
{
pxa_init_irq(32, pxa25x_set_wake);
+ set_handle_irq(pxa25x_handle_irq);
}
static int __init __init
diff --git a/arch/arm/mach-pxa/pxa27x.c b/arch/arm/mach-pxa/pxa27x.c
index 4135ba2877c4..c9b56424b653 100644
--- a/arch/arm/mach-pxa/pxa27x.c
+++ b/arch/arm/mach-pxa/pxa27x.c
@@ -228,6 +228,7 @@ static int pxa27x_set_wake(struct irq_data *d, unsigned int on)
void __init pxa27x_init_irq(void)
{
pxa_init_irq(34, pxa27x_set_wake);
+ set_handle_irq(pxa27x_handle_irq);
}
static int __init
diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c
index 4325bdc2b9ff..042922a0a9d6 100644
--- a/arch/arm/mach-pxa/spitz.c
+++ b/arch/arm/mach-pxa/spitz.c
@@ -1043,7 +1043,6 @@ MACHINE_START(SPITZ, "SHARP Spitz")
.map_io = pxa27x_map_io,
.nr_irqs = PXA_NR_IRQS,
.init_irq = pxa27x_init_irq,
- .handle_irq = pxa27x_handle_irq,
.init_machine = spitz_init,
.init_time = pxa_timer_init,
.restart = spitz_restart,
@@ -1056,7 +1055,6 @@ MACHINE_START(BORZOI, "SHARP Borzoi")
.map_io = pxa27x_map_io,
.nr_irqs = PXA_NR_IRQS,
.init_irq = pxa27x_init_irq,
- .handle_irq = pxa27x_handle_irq,
.init_machine = spitz_init,
.init_time = pxa_timer_init,
.restart = spitz_restart,
@@ -1069,7 +1067,6 @@ MACHINE_START(AKITA, "SHARP Akita")
.map_io = pxa27x_map_io,
.nr_irqs = PXA_NR_IRQS,
.init_irq = pxa27x_init_irq,
- .handle_irq = pxa27x_handle_irq,
.init_machine = spitz_init,
.init_time = pxa_timer_init,
.restart = spitz_restart,
diff --git a/arch/arm/mach-sti/Kconfig b/arch/arm/mach-sti/Kconfig
index b2d45cf10a3c..b3842c971d31 100644
--- a/arch/arm/mach-sti/Kconfig
+++ b/arch/arm/mach-sti/Kconfig
@@ -21,7 +21,7 @@ menuconfig ARCH_STI
help
Include support for STMicroelectronics' STiH415/416, STiH407/10 and
STiH418 family SoCs using the Device Tree for discovery. More
- information can be found in Documentation/arm/sti/ and
+ information can be found in Documentation/arch/arm/sti/ and
Documentation/devicetree.
if ARCH_STI
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index be183ed1232d..c164cde50243 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -712,7 +712,7 @@ config ARM_VIRT_EXT
assistance.
A compliant bootloader is required in order to make maximum
- use of this feature. Refer to Documentation/arm/booting.rst for
+ use of this feature. Refer to Documentation/arch/arm/booting.rst for
details.
config SWP_EMULATE
@@ -904,7 +904,7 @@ config KUSER_HELPERS
the CPU type fitted to the system. This permits binaries to be
run on ARMv4 through to ARMv7 without modification.
- See Documentation/arm/kernel_user_helpers.rst for details.
+ See Documentation/arch/arm/kernel_user_helpers.rst for details.
However, the fixed address nature of these helpers can be used
by ROP (return orientated programming) authors when creating
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index b4a33358d2e9..bc4ed5ce3e00 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -258,12 +258,14 @@ static struct dma_contig_early_reserve dma_mmu_remap[MAX_CMA_AREAS] __initdata;
static int dma_mmu_remap_num __initdata;
+#ifdef CONFIG_DMA_CMA
void __init dma_contiguous_early_fixup(phys_addr_t base, unsigned long size)
{
dma_mmu_remap[dma_mmu_remap_num].base = base;
dma_mmu_remap[dma_mmu_remap_num].size = size;
dma_mmu_remap_num++;
}
+#endif
void __init dma_contiguous_remap(void)
{
diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c
index 0e49154454a6..ca5302b0b7ee 100644
--- a/arch/arm/mm/fault-armv.c
+++ b/arch/arm/mm/fault-armv.c
@@ -117,8 +117,11 @@ static int adjust_pte(struct vm_area_struct *vma, unsigned long address,
* must use the nested version. This also means we need to
* open-code the spin-locking.
*/
- ptl = pte_lockptr(vma->vm_mm, pmd);
pte = pte_offset_map(pmd, address);
+ if (!pte)
+ return 0;
+
+ ptl = pte_lockptr(vma->vm_mm, pmd);
do_pte_lock(ptl);
ret = do_adjust_pte(vma, address, pfn, pte);
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 0860eeba8bd3..fef62e4a9edd 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -85,6 +85,9 @@ void show_pte(const char *lvl, struct mm_struct *mm, unsigned long addr)
break;
pte = pte_offset_map(pmd, addr);
+ if (!pte)
+ break;
+
pr_cont(", *pte=%08llx", (long long)pte_val(*pte));
#ifndef CONFIG_ARM_LPAE
pr_cont(", *ppte=%08llx",
diff --git a/arch/arm/mm/fault.h b/arch/arm/mm/fault.h
index 54927ba1fa6e..e8f8c1902544 100644
--- a/arch/arm/mm/fault.h
+++ b/arch/arm/mm/fault.h
@@ -37,5 +37,9 @@ static inline int fsr_fs(unsigned int fsr)
void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs);
void early_abt_enable(void);
+asmlinkage void do_DataAbort(unsigned long addr, unsigned int fsr,
+ struct pt_regs *regs);
+asmlinkage void do_PrefetchAbort(unsigned long addr, unsigned int ifsr,
+ struct pt_regs *regs);
#endif /* __ARCH_ARM_FAULT_H */
diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index 7ff9feea13a6..2508be91b7a0 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -354,6 +354,7 @@ EXPORT_SYMBOL(flush_dcache_page);
* memcpy() to/from page
* if written to page, flush_dcache_page()
*/
+void __flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr);
void __flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr)
{
unsigned long pfn;
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 463fc2a8448f..f3a52c08a200 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -21,6 +21,7 @@
#include <asm/sections.h>
#include <asm/setup.h>
#include <asm/smp_plat.h>
+#include <asm/tcm.h>
#include <asm/tlb.h>
#include <asm/highmem.h>
#include <asm/system_info.h>
@@ -37,7 +38,6 @@
#include "fault.h"
#include "mm.h"
-#include "tcm.h"
extern unsigned long __atags_pointer;
diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c
index 53f2d8774fdb..43cfd06bbeba 100644
--- a/arch/arm/mm/nommu.c
+++ b/arch/arm/mm/nommu.c
@@ -21,6 +21,7 @@
#include <asm/cputype.h>
#include <asm/mpu.h>
#include <asm/procinfo.h>
+#include <asm/idmap.h>
#include "mm.h"
diff --git a/arch/arm/mm/tcm.h b/arch/arm/mm/tcm.h
deleted file mode 100644
index 6b80a760d875..000000000000
--- a/arch/arm/mm/tcm.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2008-2009 ST-Ericsson AB
- * TCM memory handling for ARM systems
- *
- * Author: Linus Walleij <linus.walleij@stericsson.com>
- * Author: Rickard Andersson <rickard.andersson@stericsson.com>
- */
-
-#ifdef CONFIG_HAVE_TCM
-void __init tcm_init(void);
-#else
-/* No TCM support, just blank inlines to be optimized out */
-static inline void tcm_init(void)
-{
-}
-#endif
diff --git a/arch/arm/probes/kprobes/checkers-common.c b/arch/arm/probes/kprobes/checkers-common.c
index 4d720990cf2a..eba7ac4725c0 100644
--- a/arch/arm/probes/kprobes/checkers-common.c
+++ b/arch/arm/probes/kprobes/checkers-common.c
@@ -40,7 +40,7 @@ enum probes_insn checker_stack_use_imm_0xx(probes_opcode_t insn,
* Different from other insn uses imm8, the real addressing offset of
* STRD in T32 encoding should be imm8 * 4. See ARMARM description.
*/
-enum probes_insn checker_stack_use_t32strd(probes_opcode_t insn,
+static enum probes_insn checker_stack_use_t32strd(probes_opcode_t insn,
struct arch_probes_insn *asi,
const struct decode_header *h)
{
diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c
index 9090c3a74dcc..d8238da095df 100644
--- a/arch/arm/probes/kprobes/core.c
+++ b/arch/arm/probes/kprobes/core.c
@@ -233,7 +233,7 @@ singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb)
* kprobe, and that level is reserved for user kprobe handlers, so we can't
* risk encountering a new kprobe in an interrupt handler.
*/
-void __kprobes kprobe_handler(struct pt_regs *regs)
+static void __kprobes kprobe_handler(struct pt_regs *regs)
{
struct kprobe *p, *cur;
struct kprobe_ctlblk *kcb;
diff --git a/arch/arm/probes/kprobes/opt-arm.c b/arch/arm/probes/kprobes/opt-arm.c
index dbef34ed933f..7f65048380ca 100644
--- a/arch/arm/probes/kprobes/opt-arm.c
+++ b/arch/arm/probes/kprobes/opt-arm.c
@@ -145,8 +145,6 @@ __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
}
}
-extern void kprobe_handler(struct pt_regs *regs);
-
static void
optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
{
diff --git a/arch/arm/probes/kprobes/test-core.c b/arch/arm/probes/kprobes/test-core.c
index c562832b8627..171c7076b89f 100644
--- a/arch/arm/probes/kprobes/test-core.c
+++ b/arch/arm/probes/kprobes/test-core.c
@@ -720,7 +720,7 @@ static const char coverage_register_lookup[16] = {
[REG_TYPE_NOSPPCX] = COVERAGE_ANY_REG | COVERAGE_SP,
};
-unsigned coverage_start_registers(const struct decode_header *h)
+static unsigned coverage_start_registers(const struct decode_header *h)
{
unsigned regs = 0;
int i;
diff --git a/arch/arm/probes/kprobes/test-core.h b/arch/arm/probes/kprobes/test-core.h
index 56ad3c0aaeea..c7297037c162 100644
--- a/arch/arm/probes/kprobes/test-core.h
+++ b/arch/arm/probes/kprobes/test-core.h
@@ -454,3 +454,7 @@ void kprobe_thumb32_test_cases(void);
#else
void kprobe_arm_test_cases(void);
#endif
+
+void __kprobes_test_case_start(void);
+void __kprobes_test_case_end_16(void);
+void __kprobes_test_case_end_32(void);
diff --git a/arch/arm/tools/mach-types b/arch/arm/tools/mach-types
index 9e74c7ff6b04..97e2bfa01f4b 100644
--- a/arch/arm/tools/mach-types
+++ b/arch/arm/tools/mach-types
@@ -7,7 +7,7 @@
# http://www.arm.linux.org.uk/developer/machines/download.php
#
# Please do not send patches to this file; it is automatically generated!
-# To add an entry into this database, please see Documentation/arm/arm.rst,
+# To add an entry into this database, please see Documentation/arch/arm/arm.rst,
# or visit:
#
# http://www.arm.linux.org.uk/developer/machines/?action=new
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index ac964612d8b0..8ebed8a13874 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -464,3 +464,4 @@
448 common process_mrelease sys_process_mrelease
449 common futex_waitv sys_futex_waitv
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
+451 common cachestat sys_cachestat
diff --git a/arch/arm/vdso/vgettimeofday.c b/arch/arm/vdso/vgettimeofday.c
index 1976c6f325a4..a003beacac76 100644
--- a/arch/arm/vdso/vgettimeofday.c
+++ b/arch/arm/vdso/vgettimeofday.c
@@ -6,6 +6,8 @@
*/
#include <linux/time.h>
#include <linux/types.h>
+#include <asm/vdso.h>
+#include <asm/unwind.h>
int __vdso_clock_gettime(clockid_t clock,
struct old_timespec32 *ts)
diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
index 349dcb944a93..1ba5078c1025 100644
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -25,6 +25,7 @@
#include <asm/thread_notify.h>
#include <asm/traps.h>
#include <asm/vfp.h>
+#include <asm/neon.h>
#include "vfpinstr.h"
#include "vfp.h"
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 92f3fff2522b..595028bd9160 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -120,6 +120,7 @@ config ARM64
select CRC32
select DCACHE_WORD_ACCESS
select DYNAMIC_FTRACE if FUNCTION_TRACER
+ select DMA_BOUNCE_UNALIGNED_KMALLOC
select DMA_DIRECT_REMAP
select EDAC_SUPPORT
select FRAME_POINTER
@@ -203,12 +204,16 @@ config ARM64
select HAVE_FUNCTION_ERROR_INJECTION
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_GCC_PLUGINS
+ select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && \
+ HW_PERF_EVENTS && HAVE_PERF_EVENTS_NMI
select HAVE_HW_BREAKPOINT if PERF_EVENTS
select HAVE_IOREMAP_PROT
select HAVE_IRQ_TIME_ACCOUNTING
select HAVE_KVM
+ select HAVE_MOD_ARCH_SPECIFIC
select HAVE_NMI
select HAVE_PERF_EVENTS
+ select HAVE_PERF_EVENTS_NMI if ARM64_PSEUDO_NMI
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
select HAVE_PREEMPT_DYNAMIC_KEY
@@ -222,6 +227,7 @@ config ARM64
select HAVE_KPROBES
select HAVE_KRETPROBES
select HAVE_GENERIC_VDSO
+ select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU
select IRQ_DOMAIN
select IRQ_FORCED_THREADING
select KASAN_VMALLOC if KASAN
@@ -578,7 +584,6 @@ config ARM64_ERRATUM_845719
config ARM64_ERRATUM_843419
bool "Cortex-A53: 843419: A load or store might access an incorrect address"
default y
- select ARM64_MODULE_PLTS if MODULES
help
This option links the kernel with '--fix-cortex-a53-843419' and
enables PLT support to replace certain ADRP instructions, which can
@@ -1586,7 +1591,7 @@ config ARM64_TAGGED_ADDR_ABI
When this option is enabled, user applications can opt in to a
relaxed ABI via prctl() allowing tagged addresses to be passed
to system calls as pointer arguments. For details, see
- Documentation/arm64/tagged-address-abi.rst.
+ Documentation/arch/arm64/tagged-address-abi.rst.
menuconfig COMPAT
bool "Kernel support for 32-bit EL0"
@@ -1620,7 +1625,7 @@ config KUSER_HELPERS
the system. This permits binaries to be run on ARMv4 through
to ARMv8 without modification.
- See Documentation/arm/kernel_user_helpers.rst for details.
+ See Documentation/arch/arm/kernel_user_helpers.rst for details.
However, the fixed address nature of these helpers can be used
by ROP (return orientated programming) authors when creating
@@ -2048,7 +2053,7 @@ config ARM64_MTE
explicitly opt in. The mechanism for the userspace is
described in:
- Documentation/arm64/memory-tagging-extension.rst.
+ Documentation/arch/arm64/memory-tagging-extension.rst.
endmenu # "ARMv8.5 architectural features"
@@ -2108,26 +2113,6 @@ config ARM64_SME
register state capable of holding two dimensional matrix tiles to
enable various matrix operations.
-config ARM64_MODULE_PLTS
- bool "Use PLTs to allow module memory to spill over into vmalloc area"
- depends on MODULES
- select HAVE_MOD_ARCH_SPECIFIC
- help
- Allocate PLTs when loading modules so that jumps and calls whose
- targets are too far away for their relative offsets to be encoded
- in the instructions themselves can be bounced via veneers in the
- module's PLT. This allows modules to be allocated in the generic
- vmalloc area after the dedicated module memory area has been
- exhausted.
-
- When running with address space randomization (KASLR), the module
- region itself may be too far away for ordinary relative jumps and
- calls, and so in that case, module PLTs are required and cannot be
- disabled.
-
- Specific errata workaround(s) might also force module PLTs to be
- enabled (ARM64_ERRATUM_843419).
-
config ARM64_PSEUDO_NMI
bool "Support for NMI-like interrupts"
select ARM_GIC_V3
@@ -2168,7 +2153,6 @@ config RELOCATABLE
config RANDOMIZE_BASE
bool "Randomize the address of the kernel image"
- select ARM64_MODULE_PLTS if MODULES
select RELOCATABLE
help
Randomizes the virtual address at which the kernel image is
@@ -2199,9 +2183,8 @@ config RANDOMIZE_MODULE_REGION_FULL
When this option is not set, the module region will be randomized over
a limited range that contains the [_stext, _etext] interval of the
core kernel, so branch relocations are almost always in range unless
- ARM64_MODULE_PLTS is enabled and the region is exhausted. In this
- particular case of region exhaustion, modules might be able to fall
- back to a larger 2GB area.
+ the region is exhausted. In this particular case of region
+ exhaustion, modules might be able to fall back to a larger 2GB area.
config CC_HAVE_STACKPROTECTOR_SYSREG
def_bool $(cc-option,-mstack-protector-guard=sysreg -mstack-protector-guard-reg=sp_el0 -mstack-protector-guard-offset=0)
diff --git a/arch/arm64/boot/dts/qcom/sc7180-idp.dts b/arch/arm64/boot/dts/qcom/sc7180-idp.dts
index 9f052270e090..299ef5dc225a 100644
--- a/arch/arm64/boot/dts/qcom/sc7180-idp.dts
+++ b/arch/arm64/boot/dts/qcom/sc7180-idp.dts
@@ -393,6 +393,11 @@
qcom,spare-regs = <&tcsr_regs_2 0xb3e4>;
};
+&scm {
+ /* TF-A firmware maps memory cached so mark dma-coherent to match. */
+ dma-coherent;
+};
+
&sdhc_1 {
status = "okay";
diff --git a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi
index ca6920de7ea8..1472e7f10831 100644
--- a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi
@@ -892,6 +892,11 @@ hp_i2c: &i2c9 {
qcom,spare-regs = <&tcsr_regs_2 0xb3e4>;
};
+&scm {
+ /* TF-A firmware maps memory cached so mark dma-coherent to match. */
+ dma-coherent;
+};
+
&sdhc_1 {
status = "okay";
diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi b/arch/arm64/boot/dts/qcom/sc7180.dtsi
index f479cab8ab45..a65be760d1a7 100644
--- a/arch/arm64/boot/dts/qcom/sc7180.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi
@@ -369,7 +369,7 @@
};
firmware {
- scm {
+ scm: scm {
compatible = "qcom,scm-sc7180", "qcom,scm";
};
};
diff --git a/arch/arm64/boot/dts/qcom/sc7280-chrome-common.dtsi b/arch/arm64/boot/dts/qcom/sc7280-chrome-common.dtsi
index f562e4d2b655..2e1cd219fc18 100644
--- a/arch/arm64/boot/dts/qcom/sc7280-chrome-common.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7280-chrome-common.dtsi
@@ -79,6 +79,11 @@
firmware-name = "ath11k/WCN6750/hw1.0/wpss.mdt";
};
+&scm {
+ /* TF-A firmware maps memory cached so mark dma-coherent to match. */
+ dma-coherent;
+};
+
&wifi {
status = "okay";
diff --git a/arch/arm64/boot/dts/qcom/sc7280.dtsi b/arch/arm64/boot/dts/qcom/sc7280.dtsi
index 2fd1d3c0eb34..36f0bb9b3cbb 100644
--- a/arch/arm64/boot/dts/qcom/sc7280.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7280.dtsi
@@ -656,7 +656,7 @@
};
firmware {
- scm {
+ scm: scm {
compatible = "qcom,scm-sc7280", "qcom,scm";
};
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3308.dtsi b/arch/arm64/boot/dts/rockchip/rk3308.dtsi
index dd228a256a32..2ae4bb7d5e62 100644
--- a/arch/arm64/boot/dts/rockchip/rk3308.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3308.dtsi
@@ -97,6 +97,7 @@
l2: l2-cache {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
};
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts
index f69a38f42d2d..0a27fa5271f5 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts
@@ -37,7 +37,8 @@
vin-supply = <&vcc_io>;
};
- vcc_host_5v: vcc-host-5v-regulator {
+ /* Common enable line for all of the rails mentioned in the labels */
+ vcc_host_5v: vcc_host1_5v: vcc_otg_5v: vcc-host-5v-regulator {
compatible = "regulator-fixed";
gpio = <&gpio0 RK_PA2 GPIO_ACTIVE_LOW>;
pinctrl-names = "default";
@@ -48,17 +49,6 @@
vin-supply = <&vcc_sys>;
};
- vcc_host1_5v: vcc_otg_5v: vcc-host1-5v-regulator {
- compatible = "regulator-fixed";
- gpio = <&gpio0 RK_PA2 GPIO_ACTIVE_LOW>;
- pinctrl-names = "default";
- pinctrl-0 = <&usb20_host_drv>;
- regulator-name = "vcc_host1_5v";
- regulator-always-on;
- regulator-boot-on;
- vin-supply = <&vcc_sys>;
- };
-
vcc_sys: vcc-sys {
compatible = "regulator-fixed";
regulator-name = "vcc_sys";
diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
index 6d7a7bf72ac7..e729e7a22b23 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
@@ -103,6 +103,7 @@
l2: l2-cache0 {
compatible = "cache";
cache-level = <2>;
+ cache-unified;
};
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3566-soquartz-cm4.dts b/arch/arm64/boot/dts/rockchip/rk3566-soquartz-cm4.dts
index 263ce40770dd..cddf6cd2fecb 100644
--- a/arch/arm64/boot/dts/rockchip/rk3566-soquartz-cm4.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3566-soquartz-cm4.dts
@@ -28,6 +28,16 @@
regulator-max-microvolt = <5000000>;
vin-supply = <&vcc12v_dcin>;
};
+
+ vcc_sd_pwr: vcc-sd-pwr-regulator {
+ compatible = "regulator-fixed";
+ regulator-name = "vcc_sd_pwr";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ vin-supply = <&vcc3v3_sys>;
+ };
};
/* phy for pcie */
@@ -130,13 +140,7 @@
};
&sdmmc0 {
- vmmc-supply = <&sdmmc_pwr>;
- status = "okay";
-};
-
-&sdmmc_pwr {
- regulator-min-microvolt = <3300000>;
- regulator-max-microvolt = <3300000>;
+ vmmc-supply = <&vcc_sd_pwr>;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3566-soquartz.dtsi b/arch/arm64/boot/dts/rockchip/rk3566-soquartz.dtsi
index 102e448bc026..31aa2b8efe39 100644
--- a/arch/arm64/boot/dts/rockchip/rk3566-soquartz.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3566-soquartz.dtsi
@@ -104,16 +104,6 @@
regulator-max-microvolt = <3300000>;
vin-supply = <&vcc5v0_sys>;
};
-
- sdmmc_pwr: sdmmc-pwr-regulator {
- compatible = "regulator-fixed";
- enable-active-high;
- gpio = <&gpio0 RK_PA5 GPIO_ACTIVE_HIGH>;
- pinctrl-names = "default";
- pinctrl-0 = <&sdmmc_pwr_h>;
- regulator-name = "sdmmc_pwr";
- status = "disabled";
- };
};
&cpu0 {
@@ -155,6 +145,19 @@
status = "disabled";
};
+&gpio0 {
+ nextrst-hog {
+ gpio-hog;
+ /*
+ * GPIO_ACTIVE_LOW + output-low here means that the pin is set
+ * to high, because output-low decides the value pre-inversion.
+ */
+ gpios = <RK_PA5 GPIO_ACTIVE_LOW>;
+ line-name = "nEXTRST";
+ output-low;
+ };
+};
+
&gpu {
mali-supply = <&vdd_gpu>;
status = "okay";
@@ -538,12 +541,6 @@
rockchip,pins = <2 RK_PC2 RK_FUNC_GPIO &pcfg_pull_none>;
};
};
-
- sdmmc-pwr {
- sdmmc_pwr_h: sdmmc-pwr-h {
- rockchip,pins = <0 RK_PA5 RK_FUNC_GPIO &pcfg_pull_none>;
- };
- };
};
&pmu_io_domains {
diff --git a/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5c.dts b/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5c.dts
index f70ca9f0470a..c718b8dbb9c6 100644
--- a/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5c.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5c.dts
@@ -106,7 +106,7 @@
rockchip-key {
reset_button_pin: reset-button-pin {
- rockchip,pins = <4 RK_PA0 RK_FUNC_GPIO &pcfg_pull_up>;
+ rockchip,pins = <0 RK_PB7 RK_FUNC_GPIO &pcfg_pull_up>;
};
};
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dts b/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dts
index 2a1118f15c29..b6ad8328c7eb 100644
--- a/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dts
@@ -134,4 +134,3 @@
};
};
};
-
diff --git a/arch/arm64/boot/dts/rockchip/rk3568.dtsi b/arch/arm64/boot/dts/rockchip/rk3568.dtsi
index ba67b58f05b7..f1be76a54ceb 100644
--- a/arch/arm64/boot/dts/rockchip/rk3568.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3568.dtsi
@@ -94,9 +94,10 @@
power-domains = <&power RK3568_PD_PIPE>;
reg = <0x3 0xc0400000 0x0 0x00400000>,
<0x0 0xfe270000 0x0 0x00010000>,
- <0x3 0x7f000000 0x0 0x01000000>;
- ranges = <0x01000000 0x0 0x3ef00000 0x3 0x7ef00000 0x0 0x00100000>,
- <0x02000000 0x0 0x00000000 0x3 0x40000000 0x0 0x3ef00000>;
+ <0x0 0xf2000000 0x0 0x00100000>;
+ ranges = <0x01000000 0x0 0xf2100000 0x0 0xf2100000 0x0 0x00100000>,
+ <0x02000000 0x0 0xf2200000 0x0 0xf2200000 0x0 0x01e00000>,
+ <0x03000000 0x0 0x40000000 0x3 0x40000000 0x0 0x40000000>;
reg-names = "dbi", "apb", "config";
resets = <&cru SRST_PCIE30X1_POWERUP>;
reset-names = "pipe";
@@ -146,9 +147,10 @@
power-domains = <&power RK3568_PD_PIPE>;
reg = <0x3 0xc0800000 0x0 0x00400000>,
<0x0 0xfe280000 0x0 0x00010000>,
- <0x3 0xbf000000 0x0 0x01000000>;
- ranges = <0x01000000 0x0 0x3ef00000 0x3 0xbef00000 0x0 0x00100000>,
- <0x02000000 0x0 0x00000000 0x3 0x80000000 0x0 0x3ef00000>;
+ <0x0 0xf0000000 0x0 0x00100000>;
+ ranges = <0x01000000 0x0 0xf0100000 0x0 0xf0100000 0x0 0x00100000>,
+ <0x02000000 0x0 0xf0200000 0x0 0xf0200000 0x0 0x01e00000>,
+ <0x03000000 0x0 0x40000000 0x3 0x80000000 0x0 0x40000000>;
reg-names = "dbi", "apb", "config";
resets = <&cru SRST_PCIE30X2_POWERUP>;
reset-names = "pipe";
diff --git a/arch/arm64/boot/dts/rockchip/rk356x.dtsi b/arch/arm64/boot/dts/rockchip/rk356x.dtsi
index f62e0fd881a9..61680c7ac489 100644
--- a/arch/arm64/boot/dts/rockchip/rk356x.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk356x.dtsi
@@ -952,7 +952,7 @@
compatible = "rockchip,rk3568-pcie";
reg = <0x3 0xc0000000 0x0 0x00400000>,
<0x0 0xfe260000 0x0 0x00010000>,
- <0x3 0x3f000000 0x0 0x01000000>;
+ <0x0 0xf4000000 0x0 0x00100000>;
reg-names = "dbi", "apb", "config";
interrupts = <GIC_SPI 75 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH>,
@@ -982,8 +982,9 @@
phys = <&combphy2 PHY_TYPE_PCIE>;
phy-names = "pcie-phy";
power-domains = <&power RK3568_PD_PIPE>;
- ranges = <0x01000000 0x0 0x3ef00000 0x3 0x3ef00000 0x0 0x00100000
- 0x02000000 0x0 0x00000000 0x3 0x00000000 0x0 0x3ef00000>;
+ ranges = <0x01000000 0x0 0xf4100000 0x0 0xf4100000 0x0 0x00100000>,
+ <0x02000000 0x0 0xf4200000 0x0 0xf4200000 0x0 0x01e00000>,
+ <0x03000000 0x0 0x40000000 0x3 0x00000000 0x0 0x40000000>;
resets = <&cru SRST_PCIE20_POWERUP>;
reset-names = "pipe";
#address-cells = <3>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
index 657c019d27fa..a3124bd2e092 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
@@ -229,6 +229,7 @@
cache-line-size = <64>;
cache-sets = <512>;
cache-level = <2>;
+ cache-unified;
next-level-cache = <&l3_cache>;
};
@@ -238,6 +239,7 @@
cache-line-size = <64>;
cache-sets = <512>;
cache-level = <2>;
+ cache-unified;
next-level-cache = <&l3_cache>;
};
@@ -247,6 +249,7 @@
cache-line-size = <64>;
cache-sets = <512>;
cache-level = <2>;
+ cache-unified;
next-level-cache = <&l3_cache>;
};
@@ -256,6 +259,7 @@
cache-line-size = <64>;
cache-sets = <512>;
cache-level = <2>;
+ cache-unified;
next-level-cache = <&l3_cache>;
};
@@ -265,6 +269,7 @@
cache-line-size = <64>;
cache-sets = <1024>;
cache-level = <2>;
+ cache-unified;
next-level-cache = <&l3_cache>;
};
@@ -274,6 +279,7 @@
cache-line-size = <64>;
cache-sets = <1024>;
cache-level = <2>;
+ cache-unified;
next-level-cache = <&l3_cache>;
};
@@ -283,6 +289,7 @@
cache-line-size = <64>;
cache-sets = <1024>;
cache-level = <2>;
+ cache-unified;
next-level-cache = <&l3_cache>;
};
@@ -292,6 +299,7 @@
cache-line-size = <64>;
cache-sets = <1024>;
cache-level = <2>;
+ cache-unified;
next-level-cache = <&l3_cache>;
};
@@ -301,6 +309,7 @@
cache-line-size = <64>;
cache-sets = <4096>;
cache-level = <3>;
+ cache-unified;
};
};
diff --git a/arch/arm64/hyperv/mshyperv.c b/arch/arm64/hyperv/mshyperv.c
index a406454578f0..f1b8a04ee9f2 100644
--- a/arch/arm64/hyperv/mshyperv.c
+++ b/arch/arm64/hyperv/mshyperv.c
@@ -67,7 +67,7 @@ static int __init hyperv_init(void)
if (ret)
return ret;
- ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "arm64/hyperv_init:online",
+ ret = cpuhp_setup_state(CPUHP_AP_HYPERV_ONLINE, "arm64/hyperv_init:online",
hv_common_cpu_init, hv_common_cpu_die);
if (ret < 0) {
hv_common_free();
diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h
index bdf1f6bcd010..94b486192e1f 100644
--- a/arch/arm64/include/asm/alternative-macros.h
+++ b/arch/arm64/include/asm/alternative-macros.h
@@ -23,17 +23,17 @@
#include <linux/stringify.h>
-#define ALTINSTR_ENTRY(feature) \
+#define ALTINSTR_ENTRY(cpucap) \
" .word 661b - .\n" /* label */ \
" .word 663f - .\n" /* new instruction */ \
- " .hword " __stringify(feature) "\n" /* feature bit */ \
+ " .hword " __stringify(cpucap) "\n" /* cpucap */ \
" .byte 662b-661b\n" /* source len */ \
" .byte 664f-663f\n" /* replacement len */
-#define ALTINSTR_ENTRY_CB(feature, cb) \
+#define ALTINSTR_ENTRY_CB(cpucap, cb) \
" .word 661b - .\n" /* label */ \
- " .word " __stringify(cb) "- .\n" /* callback */ \
- " .hword " __stringify(feature) "\n" /* feature bit */ \
+ " .word " __stringify(cb) "- .\n" /* callback */ \
+ " .hword " __stringify(cpucap) "\n" /* cpucap */ \
" .byte 662b-661b\n" /* source len */ \
" .byte 664f-663f\n" /* replacement len */
@@ -53,13 +53,13 @@
*
* Alternatives with callbacks do not generate replacement instructions.
*/
-#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled) \
+#define __ALTERNATIVE_CFG(oldinstr, newinstr, cpucap, cfg_enabled) \
".if "__stringify(cfg_enabled)" == 1\n" \
"661:\n\t" \
oldinstr "\n" \
"662:\n" \
".pushsection .altinstructions,\"a\"\n" \
- ALTINSTR_ENTRY(feature) \
+ ALTINSTR_ENTRY(cpucap) \
".popsection\n" \
".subsection 1\n" \
"663:\n\t" \
@@ -70,31 +70,31 @@
".previous\n" \
".endif\n"
-#define __ALTERNATIVE_CFG_CB(oldinstr, feature, cfg_enabled, cb) \
+#define __ALTERNATIVE_CFG_CB(oldinstr, cpucap, cfg_enabled, cb) \
".if "__stringify(cfg_enabled)" == 1\n" \
"661:\n\t" \
oldinstr "\n" \
"662:\n" \
".pushsection .altinstructions,\"a\"\n" \
- ALTINSTR_ENTRY_CB(feature, cb) \
+ ALTINSTR_ENTRY_CB(cpucap, cb) \
".popsection\n" \
"663:\n\t" \
"664:\n\t" \
".endif\n"
-#define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...) \
- __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg))
+#define _ALTERNATIVE_CFG(oldinstr, newinstr, cpucap, cfg, ...) \
+ __ALTERNATIVE_CFG(oldinstr, newinstr, cpucap, IS_ENABLED(cfg))
-#define ALTERNATIVE_CB(oldinstr, feature, cb) \
- __ALTERNATIVE_CFG_CB(oldinstr, (1 << ARM64_CB_SHIFT) | (feature), 1, cb)
+#define ALTERNATIVE_CB(oldinstr, cpucap, cb) \
+ __ALTERNATIVE_CFG_CB(oldinstr, (1 << ARM64_CB_SHIFT) | (cpucap), 1, cb)
#else
#include <asm/assembler.h>
-.macro altinstruction_entry orig_offset alt_offset feature orig_len alt_len
+.macro altinstruction_entry orig_offset alt_offset cpucap orig_len alt_len
.word \orig_offset - .
.word \alt_offset - .
- .hword (\feature)
+ .hword (\cpucap)
.byte \orig_len
.byte \alt_len
.endm
@@ -210,9 +210,9 @@ alternative_endif
#endif /* __ASSEMBLY__ */
/*
- * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature));
+ * Usage: asm(ALTERNATIVE(oldinstr, newinstr, cpucap));
*
- * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature, CONFIG_FOO));
+ * Usage: asm(ALTERNATIVE(oldinstr, newinstr, cpucap, CONFIG_FOO));
* N.B. If CONFIG_FOO is specified, but not selected, the whole block
* will be omitted, including oldinstr.
*/
@@ -224,15 +224,15 @@ alternative_endif
#include <linux/types.h>
static __always_inline bool
-alternative_has_feature_likely(const unsigned long feature)
+alternative_has_cap_likely(const unsigned long cpucap)
{
- compiletime_assert(feature < ARM64_NCAPS,
- "feature must be < ARM64_NCAPS");
+ compiletime_assert(cpucap < ARM64_NCAPS,
+ "cpucap must be < ARM64_NCAPS");
asm_volatile_goto(
- ALTERNATIVE_CB("b %l[l_no]", %[feature], alt_cb_patch_nops)
+ ALTERNATIVE_CB("b %l[l_no]", %[cpucap], alt_cb_patch_nops)
:
- : [feature] "i" (feature)
+ : [cpucap] "i" (cpucap)
:
: l_no);
@@ -242,15 +242,15 @@ l_no:
}
static __always_inline bool
-alternative_has_feature_unlikely(const unsigned long feature)
+alternative_has_cap_unlikely(const unsigned long cpucap)
{
- compiletime_assert(feature < ARM64_NCAPS,
- "feature must be < ARM64_NCAPS");
+ compiletime_assert(cpucap < ARM64_NCAPS,
+ "cpucap must be < ARM64_NCAPS");
asm_volatile_goto(
- ALTERNATIVE("nop", "b %l[l_yes]", %[feature])
+ ALTERNATIVE("nop", "b %l[l_yes]", %[cpucap])
:
- : [feature] "i" (feature)
+ : [cpucap] "i" (cpucap)
:
: l_yes);
diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
index a38b92e11811..00d97b8a757f 100644
--- a/arch/arm64/include/asm/alternative.h
+++ b/arch/arm64/include/asm/alternative.h
@@ -13,7 +13,7 @@
struct alt_instr {
s32 orig_offset; /* offset to original instruction */
s32 alt_offset; /* offset to replacement instruction */
- u16 cpufeature; /* cpufeature bit set for replacement */
+ u16 cpucap; /* cpucap bit set for replacement */
u8 orig_len; /* size of original instruction(s) */
u8 alt_len; /* size of new instruction(s), <= orig_len */
};
@@ -23,7 +23,7 @@ typedef void (*alternative_cb_t)(struct alt_instr *alt,
void __init apply_boot_alternatives(void);
void __init apply_alternatives_all(void);
-bool alternative_is_applied(u16 cpufeature);
+bool alternative_is_applied(u16 cpucap);
#ifdef CONFIG_MODULES
void apply_alternatives_module(void *start, size_t length);
@@ -31,5 +31,8 @@ void apply_alternatives_module(void *start, size_t length);
static inline void apply_alternatives_module(void *start, size_t length) { }
#endif
+void alt_cb_patch_nops(struct alt_instr *alt, __le32 *origptr,
+ __le32 *updptr, int nr_inst);
+
#endif /* __ASSEMBLY__ */
#endif /* __ASM_ALTERNATIVE_H */
diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h
index af1fafbe7e1d..934c658ee947 100644
--- a/arch/arm64/include/asm/arch_timer.h
+++ b/arch/arm64/include/asm/arch_timer.h
@@ -88,13 +88,7 @@ static inline notrace u64 arch_timer_read_cntvct_el0(void)
#define arch_timer_reg_read_stable(reg) \
({ \
- u64 _val; \
- \
- preempt_disable_notrace(); \
- _val = erratum_handler(read_ ## reg)(); \
- preempt_enable_notrace(); \
- \
- _val; \
+ erratum_handler(read_ ## reg)(); \
})
/*
diff --git a/arch/arm64/include/asm/archrandom.h b/arch/arm64/include/asm/archrandom.h
index 2f5f3da34782..b0abc64f86b0 100644
--- a/arch/arm64/include/asm/archrandom.h
+++ b/arch/arm64/include/asm/archrandom.h
@@ -129,4 +129,6 @@ static inline bool __init __early_cpu_has_rndr(void)
return (ftr >> ID_AA64ISAR0_EL1_RNDR_SHIFT) & 0xf;
}
+u64 kaslr_early_init(void *fdt);
+
#endif /* _ASM_ARCHRANDOM_H */
diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h
index 75b211c98dea..5b6efe8abeeb 100644
--- a/arch/arm64/include/asm/asm-uaccess.h
+++ b/arch/arm64/include/asm/asm-uaccess.h
@@ -18,7 +18,6 @@
bic \tmp1, \tmp1, #TTBR_ASID_MASK
sub \tmp1, \tmp1, #RESERVED_SWAPPER_OFFSET // reserved_pg_dir
msr ttbr0_el1, \tmp1 // set reserved TTBR0_EL1
- isb
add \tmp1, \tmp1, #RESERVED_SWAPPER_OFFSET
msr ttbr1_el1, \tmp1 // set reserved ASID
isb
@@ -31,7 +30,6 @@
extr \tmp2, \tmp2, \tmp1, #48
ror \tmp2, \tmp2, #16
msr ttbr1_el1, \tmp2 // set the active ASID
- isb
msr ttbr0_el1, \tmp1 // set the non-PAN TTBR0_EL1
isb
.endm
diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index c9979273d389..400d279e0f8d 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -142,24 +142,6 @@ static __always_inline long arch_atomic64_dec_if_positive(atomic64_t *v)
#define arch_atomic_fetch_xor_release arch_atomic_fetch_xor_release
#define arch_atomic_fetch_xor arch_atomic_fetch_xor
-#define arch_atomic_xchg_relaxed(v, new) \
- arch_xchg_relaxed(&((v)->counter), (new))
-#define arch_atomic_xchg_acquire(v, new) \
- arch_xchg_acquire(&((v)->counter), (new))
-#define arch_atomic_xchg_release(v, new) \
- arch_xchg_release(&((v)->counter), (new))
-#define arch_atomic_xchg(v, new) \
- arch_xchg(&((v)->counter), (new))
-
-#define arch_atomic_cmpxchg_relaxed(v, old, new) \
- arch_cmpxchg_relaxed(&((v)->counter), (old), (new))
-#define arch_atomic_cmpxchg_acquire(v, old, new) \
- arch_cmpxchg_acquire(&((v)->counter), (old), (new))
-#define arch_atomic_cmpxchg_release(v, old, new) \
- arch_cmpxchg_release(&((v)->counter), (old), (new))
-#define arch_atomic_cmpxchg(v, old, new) \
- arch_cmpxchg(&((v)->counter), (old), (new))
-
#define arch_atomic_andnot arch_atomic_andnot
/*
@@ -209,16 +191,6 @@ static __always_inline long arch_atomic64_dec_if_positive(atomic64_t *v)
#define arch_atomic64_fetch_xor_release arch_atomic64_fetch_xor_release
#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor
-#define arch_atomic64_xchg_relaxed arch_atomic_xchg_relaxed
-#define arch_atomic64_xchg_acquire arch_atomic_xchg_acquire
-#define arch_atomic64_xchg_release arch_atomic_xchg_release
-#define arch_atomic64_xchg arch_atomic_xchg
-
-#define arch_atomic64_cmpxchg_relaxed arch_atomic_cmpxchg_relaxed
-#define arch_atomic64_cmpxchg_acquire arch_atomic_cmpxchg_acquire
-#define arch_atomic64_cmpxchg_release arch_atomic_cmpxchg_release
-#define arch_atomic64_cmpxchg arch_atomic_cmpxchg
-
#define arch_atomic64_andnot arch_atomic64_andnot
#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
index cbb3d961123b..89d2ba272359 100644
--- a/arch/arm64/include/asm/atomic_ll_sc.h
+++ b/arch/arm64/include/asm/atomic_ll_sc.h
@@ -294,38 +294,46 @@ __CMPXCHG_CASE( , , mb_, 64, dmb ish, , l, "memory", L)
#undef __CMPXCHG_CASE
-#define __CMPXCHG_DBL(name, mb, rel, cl) \
-static __always_inline long \
-__ll_sc__cmpxchg_double##name(unsigned long old1, \
- unsigned long old2, \
- unsigned long new1, \
- unsigned long new2, \
- volatile void *ptr) \
+union __u128_halves {
+ u128 full;
+ struct {
+ u64 low, high;
+ };
+};
+
+#define __CMPXCHG128(name, mb, rel, cl...) \
+static __always_inline u128 \
+__ll_sc__cmpxchg128##name(volatile u128 *ptr, u128 old, u128 new) \
{ \
- unsigned long tmp, ret; \
+ union __u128_halves r, o = { .full = (old) }, \
+ n = { .full = (new) }; \
+ unsigned int tmp; \
\
- asm volatile("// __cmpxchg_double" #name "\n" \
- " prfm pstl1strm, %2\n" \
- "1: ldxp %0, %1, %2\n" \
- " eor %0, %0, %3\n" \
- " eor %1, %1, %4\n" \
- " orr %1, %0, %1\n" \
- " cbnz %1, 2f\n" \
- " st" #rel "xp %w0, %5, %6, %2\n" \
- " cbnz %w0, 1b\n" \
+ asm volatile("// __cmpxchg128" #name "\n" \
+ " prfm pstl1strm, %[v]\n" \
+ "1: ldxp %[rl], %[rh], %[v]\n" \
+ " cmp %[rl], %[ol]\n" \
+ " ccmp %[rh], %[oh], 0, eq\n" \
+ " b.ne 2f\n" \
+ " st" #rel "xp %w[tmp], %[nl], %[nh], %[v]\n" \
+ " cbnz %w[tmp], 1b\n" \
" " #mb "\n" \
"2:" \
- : "=&r" (tmp), "=&r" (ret), "+Q" (*(__uint128_t *)ptr) \
- : "r" (old1), "r" (old2), "r" (new1), "r" (new2) \
- : cl); \
+ : [v] "+Q" (*(u128 *)ptr), \
+ [rl] "=&r" (r.low), [rh] "=&r" (r.high), \
+ [tmp] "=&r" (tmp) \
+ : [ol] "r" (o.low), [oh] "r" (o.high), \
+ [nl] "r" (n.low), [nh] "r" (n.high) \
+ : "cc", ##cl); \
\
- return ret; \
+ return r.full; \
}
-__CMPXCHG_DBL( , , , )
-__CMPXCHG_DBL(_mb, dmb ish, l, "memory")
+__CMPXCHG128( , , )
+__CMPXCHG128(_mb, dmb ish, l, "memory")
+
+#undef __CMPXCHG128
-#undef __CMPXCHG_DBL
#undef K
#endif /* __ASM_ATOMIC_LL_SC_H */
diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
index 319958b95cfd..87f568a94e55 100644
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -281,40 +281,35 @@ __CMPXCHG_CASE(x, , mb_, 64, al, "memory")
#undef __CMPXCHG_CASE
-#define __CMPXCHG_DBL(name, mb, cl...) \
-static __always_inline long \
-__lse__cmpxchg_double##name(unsigned long old1, \
- unsigned long old2, \
- unsigned long new1, \
- unsigned long new2, \
- volatile void *ptr) \
+#define __CMPXCHG128(name, mb, cl...) \
+static __always_inline u128 \
+__lse__cmpxchg128##name(volatile u128 *ptr, u128 old, u128 new) \
{ \
- unsigned long oldval1 = old1; \
- unsigned long oldval2 = old2; \
- register unsigned long x0 asm ("x0") = old1; \
- register unsigned long x1 asm ("x1") = old2; \
- register unsigned long x2 asm ("x2") = new1; \
- register unsigned long x3 asm ("x3") = new2; \
+ union __u128_halves r, o = { .full = (old) }, \
+ n = { .full = (new) }; \
+ register unsigned long x0 asm ("x0") = o.low; \
+ register unsigned long x1 asm ("x1") = o.high; \
+ register unsigned long x2 asm ("x2") = n.low; \
+ register unsigned long x3 asm ("x3") = n.high; \
register unsigned long x4 asm ("x4") = (unsigned long)ptr; \
\
asm volatile( \
__LSE_PREAMBLE \
" casp" #mb "\t%[old1], %[old2], %[new1], %[new2], %[v]\n"\
- " eor %[old1], %[old1], %[oldval1]\n" \
- " eor %[old2], %[old2], %[oldval2]\n" \
- " orr %[old1], %[old1], %[old2]" \
: [old1] "+&r" (x0), [old2] "+&r" (x1), \
- [v] "+Q" (*(__uint128_t *)ptr) \
+ [v] "+Q" (*(u128 *)ptr) \
: [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4), \
- [oldval1] "r" (oldval1), [oldval2] "r" (oldval2) \
+ [oldval1] "r" (o.low), [oldval2] "r" (o.high) \
: cl); \
\
- return x0; \
+ r.low = x0; r.high = x1; \
+ \
+ return r.full; \
}
-__CMPXCHG_DBL( , )
-__CMPXCHG_DBL(_mb, al, "memory")
+__CMPXCHG128( , )
+__CMPXCHG128(_mb, al, "memory")
-#undef __CMPXCHG_DBL
+#undef __CMPXCHG128
#endif /* __ASM_ATOMIC_LSE_H */
diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
index a51e6e8f3171..ceb368d33bf4 100644
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -33,6 +33,7 @@
* the CPU.
*/
#define ARCH_DMA_MINALIGN (128)
+#define ARCH_KMALLOC_MINALIGN (8)
#ifndef __ASSEMBLY__
@@ -90,6 +91,8 @@ static inline int cache_line_size_of_cpu(void)
int cache_line_size(void);
+#define dma_get_cache_alignment cache_line_size
+
/*
* Read the effective value of CTR_EL0.
*
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index c6bc5d8ec3ca..d7a540736741 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -130,21 +130,18 @@ __CMPXCHG_CASE(mb_, 64)
#undef __CMPXCHG_CASE
-#define __CMPXCHG_DBL(name) \
-static inline long __cmpxchg_double##name(unsigned long old1, \
- unsigned long old2, \
- unsigned long new1, \
- unsigned long new2, \
- volatile void *ptr) \
+#define __CMPXCHG128(name) \
+static inline u128 __cmpxchg128##name(volatile u128 *ptr, \
+ u128 old, u128 new) \
{ \
- return __lse_ll_sc_body(_cmpxchg_double##name, \
- old1, old2, new1, new2, ptr); \
+ return __lse_ll_sc_body(_cmpxchg128##name, \
+ ptr, old, new); \
}
-__CMPXCHG_DBL( )
-__CMPXCHG_DBL(_mb)
+__CMPXCHG128( )
+__CMPXCHG128(_mb)
-#undef __CMPXCHG_DBL
+#undef __CMPXCHG128
#define __CMPXCHG_GEN(sfx) \
static __always_inline unsigned long __cmpxchg##sfx(volatile void *ptr, \
@@ -198,34 +195,17 @@ __CMPXCHG_GEN(_mb)
#define arch_cmpxchg64 arch_cmpxchg
#define arch_cmpxchg64_local arch_cmpxchg_local
-/* cmpxchg_double */
-#define system_has_cmpxchg_double() 1
-
-#define __cmpxchg_double_check(ptr1, ptr2) \
-({ \
- if (sizeof(*(ptr1)) != 8) \
- BUILD_BUG(); \
- VM_BUG_ON((unsigned long *)(ptr2) - (unsigned long *)(ptr1) != 1); \
-})
+/* cmpxchg128 */
+#define system_has_cmpxchg128() 1
-#define arch_cmpxchg_double(ptr1, ptr2, o1, o2, n1, n2) \
+#define arch_cmpxchg128(ptr, o, n) \
({ \
- int __ret; \
- __cmpxchg_double_check(ptr1, ptr2); \
- __ret = !__cmpxchg_double_mb((unsigned long)(o1), (unsigned long)(o2), \
- (unsigned long)(n1), (unsigned long)(n2), \
- ptr1); \
- __ret; \
+ __cmpxchg128_mb((ptr), (o), (n)); \
})
-#define arch_cmpxchg_double_local(ptr1, ptr2, o1, o2, n1, n2) \
+#define arch_cmpxchg128_local(ptr, o, n) \
({ \
- int __ret; \
- __cmpxchg_double_check(ptr1, ptr2); \
- __ret = !__cmpxchg_double((unsigned long)(o1), (unsigned long)(o2), \
- (unsigned long)(n1), (unsigned long)(n2), \
- ptr1); \
- __ret; \
+ __cmpxchg128((ptr), (o), (n)); \
})
#define __CMPWAIT_CASE(w, sfx, sz) \
diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h
index 74575c3d6987..ae904a1ad529 100644
--- a/arch/arm64/include/asm/compat.h
+++ b/arch/arm64/include/asm/compat.h
@@ -96,6 +96,8 @@ static inline int is_compat_thread(struct thread_info *thread)
return test_ti_thread_flag(thread, TIF_32BIT);
}
+long compat_arm_syscall(struct pt_regs *regs, int scno);
+
#else /* !CONFIG_COMPAT */
static inline int is_compat_thread(struct thread_info *thread)
diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
index fd7a92219eea..e749838b9c5d 100644
--- a/arch/arm64/include/asm/cpu.h
+++ b/arch/arm64/include/asm/cpu.h
@@ -56,6 +56,7 @@ struct cpuinfo_arm64 {
u64 reg_id_aa64mmfr0;
u64 reg_id_aa64mmfr1;
u64 reg_id_aa64mmfr2;
+ u64 reg_id_aa64mmfr3;
u64 reg_id_aa64pfr0;
u64 reg_id_aa64pfr1;
u64 reg_id_aa64zfr0;
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 6bf013fb110d..7a95c324e52a 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -107,7 +107,7 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0;
* CPU capabilities:
*
* We use arm64_cpu_capabilities to represent system features, errata work
- * arounds (both used internally by kernel and tracked in cpu_hwcaps) and
+ * arounds (both used internally by kernel and tracked in system_cpucaps) and
* ELF HWCAPs (which are exposed to user).
*
* To support systems with heterogeneous CPUs, we need to make sure that we
@@ -419,12 +419,12 @@ static __always_inline bool is_hyp_code(void)
return is_vhe_hyp_code() || is_nvhe_hyp_code();
}
-extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
+extern DECLARE_BITMAP(system_cpucaps, ARM64_NCAPS);
-extern DECLARE_BITMAP(boot_capabilities, ARM64_NCAPS);
+extern DECLARE_BITMAP(boot_cpucaps, ARM64_NCAPS);
#define for_each_available_cap(cap) \
- for_each_set_bit(cap, cpu_hwcaps, ARM64_NCAPS)
+ for_each_set_bit(cap, system_cpucaps, ARM64_NCAPS)
bool this_cpu_has_cap(unsigned int cap);
void cpu_set_feature(unsigned int num);
@@ -437,7 +437,7 @@ unsigned long cpu_get_elf_hwcap2(void);
static __always_inline bool system_capabilities_finalized(void)
{
- return alternative_has_feature_likely(ARM64_ALWAYS_SYSTEM);
+ return alternative_has_cap_likely(ARM64_ALWAYS_SYSTEM);
}
/*
@@ -449,7 +449,7 @@ static __always_inline bool cpus_have_cap(unsigned int num)
{
if (num >= ARM64_NCAPS)
return false;
- return arch_test_bit(num, cpu_hwcaps);
+ return arch_test_bit(num, system_cpucaps);
}
/*
@@ -464,7 +464,7 @@ static __always_inline bool __cpus_have_const_cap(int num)
{
if (num >= ARM64_NCAPS)
return false;
- return alternative_has_feature_unlikely(num);
+ return alternative_has_cap_unlikely(num);
}
/*
@@ -504,16 +504,6 @@ static __always_inline bool cpus_have_const_cap(int num)
return cpus_have_cap(num);
}
-static inline void cpus_set_cap(unsigned int num)
-{
- if (num >= ARM64_NCAPS) {
- pr_warn("Attempt to set an illegal CPU capability (%d >= %d)\n",
- num, ARM64_NCAPS);
- } else {
- __set_bit(num, cpu_hwcaps);
- }
-}
-
static inline int __attribute_const__
cpuid_feature_extract_signed_field_width(u64 features, int field, int width)
{
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index f86b157a5da3..4cf2cb053bc8 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -88,7 +88,7 @@ efi_status_t __efi_rt_asm_wrapper(void *, const char *, ...);
* guaranteed to cover the kernel Image.
*
* Since the EFI stub is part of the kernel Image, we can relax the
- * usual requirements in Documentation/arm64/booting.rst, which still
+ * usual requirements in Documentation/arch/arm64/booting.rst, which still
* apply to other bootloaders, and are required for some kernel
* configurations.
*/
@@ -166,4 +166,6 @@ static inline void efi_capsule_flush_cache_range(void *addr, int size)
dcache_clean_inval_poc((unsigned long)addr, (unsigned long)addr + size);
}
+efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f);
+
#endif /* _ASM_EFI_H */
diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
index 037724b19c5c..f4c3d30bf746 100644
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -22,6 +22,15 @@
isb
.endm
+.macro __init_el2_hcrx
+ mrs x0, id_aa64mmfr1_el1
+ ubfx x0, x0, #ID_AA64MMFR1_EL1_HCX_SHIFT, #4
+ cbz x0, .Lskip_hcrx_\@
+ mov_q x0, HCRX_HOST_FLAGS
+ msr_s SYS_HCRX_EL2, x0
+.Lskip_hcrx_\@:
+.endm
+
/*
* Allow Non-secure EL1 and EL0 to access physical timer and counter.
* This is not necessary for VHE, since the host kernel runs in EL2,
@@ -69,7 +78,7 @@
cbz x0, .Lskip_trace_\@ // Skip if TraceBuffer is not present
mrs_s x0, SYS_TRBIDR_EL1
- and x0, x0, TRBIDR_PROG
+ and x0, x0, TRBIDR_EL1_P
cbnz x0, .Lskip_trace_\@ // If TRBE is available at EL2
mov x0, #(MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT)
@@ -150,12 +159,21 @@
mov x0, xzr
mrs x1, id_aa64pfr1_el1
ubfx x1, x1, #ID_AA64PFR1_EL1_SME_SHIFT, #4
- cbz x1, .Lset_fgt_\@
+ cbz x1, .Lset_pie_fgt_\@
/* Disable nVHE traps of TPIDR2 and SMPRI */
orr x0, x0, #HFGxTR_EL2_nSMPRI_EL1_MASK
orr x0, x0, #HFGxTR_EL2_nTPIDR2_EL0_MASK
+.Lset_pie_fgt_\@:
+ mrs_s x1, SYS_ID_AA64MMFR3_EL1
+ ubfx x1, x1, #ID_AA64MMFR3_EL1_S1PIE_SHIFT, #4
+ cbz x1, .Lset_fgt_\@
+
+ /* Disable trapping of PIR_EL1 / PIRE0_EL1 */
+ orr x0, x0, #HFGxTR_EL2_nPIR_EL1
+ orr x0, x0, #HFGxTR_EL2_nPIRE0_EL1
+
.Lset_fgt_\@:
msr_s SYS_HFGRTR_EL2, x0
msr_s SYS_HFGWTR_EL2, x0
@@ -184,6 +202,7 @@
*/
.macro init_el2_state
__init_el2_sctlr
+ __init_el2_hcrx
__init_el2_timers
__init_el2_debug
__init_el2_lor
@@ -284,14 +303,6 @@
cbz x1, .Lskip_sme_\@
msr_s SYS_SMPRIMAP_EL2, xzr // Make all priorities equal
-
- mrs x1, id_aa64mmfr1_el1 // HCRX_EL2 present?
- ubfx x1, x1, #ID_AA64MMFR1_EL1_HCX_SHIFT, #4
- cbz x1, .Lskip_sme_\@
-
- mrs_s x1, SYS_HCRX_EL2
- orr x1, x1, #HCRX_EL2_SMPME_MASK // Enable priority mapping
- msr_s SYS_HCRX_EL2, x1
.Lskip_sme_\@:
.endm
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 8487aec9b658..ae35939f395b 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -47,7 +47,7 @@
#define ESR_ELx_EC_DABT_LOW (0x24)
#define ESR_ELx_EC_DABT_CUR (0x25)
#define ESR_ELx_EC_SP_ALIGN (0x26)
-/* Unallocated EC: 0x27 */
+#define ESR_ELx_EC_MOPS (0x27)
#define ESR_ELx_EC_FP_EXC32 (0x28)
/* Unallocated EC: 0x29 - 0x2B */
#define ESR_ELx_EC_FP_EXC64 (0x2C)
@@ -75,8 +75,11 @@
#define ESR_ELx_IL_SHIFT (25)
#define ESR_ELx_IL (UL(1) << ESR_ELx_IL_SHIFT)
-#define ESR_ELx_ISS_MASK (ESR_ELx_IL - 1)
+#define ESR_ELx_ISS_MASK (GENMASK(24, 0))
#define ESR_ELx_ISS(esr) ((esr) & ESR_ELx_ISS_MASK)
+#define ESR_ELx_ISS2_SHIFT (32)
+#define ESR_ELx_ISS2_MASK (GENMASK_ULL(55, 32))
+#define ESR_ELx_ISS2(esr) (((esr) & ESR_ELx_ISS2_MASK) >> ESR_ELx_ISS2_SHIFT)
/* ISS field definitions shared by different classes */
#define ESR_ELx_WNR_SHIFT (6)
@@ -140,6 +143,20 @@
#define ESR_ELx_CM_SHIFT (8)
#define ESR_ELx_CM (UL(1) << ESR_ELx_CM_SHIFT)
+/* ISS2 field definitions for Data Aborts */
+#define ESR_ELx_TnD_SHIFT (10)
+#define ESR_ELx_TnD (UL(1) << ESR_ELx_TnD_SHIFT)
+#define ESR_ELx_TagAccess_SHIFT (9)
+#define ESR_ELx_TagAccess (UL(1) << ESR_ELx_TagAccess_SHIFT)
+#define ESR_ELx_GCS_SHIFT (8)
+#define ESR_ELx_GCS (UL(1) << ESR_ELx_GCS_SHIFT)
+#define ESR_ELx_Overlay_SHIFT (6)
+#define ESR_ELx_Overlay (UL(1) << ESR_ELx_Overlay_SHIFT)
+#define ESR_ELx_DirtyBit_SHIFT (5)
+#define ESR_ELx_DirtyBit (UL(1) << ESR_ELx_DirtyBit_SHIFT)
+#define ESR_ELx_Xs_SHIFT (0)
+#define ESR_ELx_Xs_MASK (GENMASK_ULL(4, 0))
+
/* ISS field definitions for exceptions taken in to Hyp */
#define ESR_ELx_CV (UL(1) << 24)
#define ESR_ELx_COND_SHIFT (20)
@@ -356,6 +373,15 @@
#define ESR_ELx_SME_ISS_ZA_DISABLED 3
#define ESR_ELx_SME_ISS_ZT_DISABLED 4
+/* ISS field definitions for MOPS exceptions */
+#define ESR_ELx_MOPS_ISS_MEM_INST (UL(1) << 24)
+#define ESR_ELx_MOPS_ISS_FROM_EPILOGUE (UL(1) << 18)
+#define ESR_ELx_MOPS_ISS_WRONG_OPTION (UL(1) << 17)
+#define ESR_ELx_MOPS_ISS_OPTION_A (UL(1) << 16)
+#define ESR_ELx_MOPS_ISS_DESTREG(esr) (((esr) & (UL(0x1f) << 10)) >> 10)
+#define ESR_ELx_MOPS_ISS_SRCREG(esr) (((esr) & (UL(0x1f) << 5)) >> 5)
+#define ESR_ELx_MOPS_ISS_SIZEREG(esr) (((esr) & (UL(0x1f) << 0)) >> 0)
+
#ifndef __ASSEMBLY__
#include <asm/types.h>
diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h
index e73af709cb7a..ad688e157c9b 100644
--- a/arch/arm64/include/asm/exception.h
+++ b/arch/arm64/include/asm/exception.h
@@ -8,16 +8,11 @@
#define __ASM_EXCEPTION_H
#include <asm/esr.h>
-#include <asm/kprobes.h>
#include <asm/ptrace.h>
#include <linux/interrupt.h>
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
#define __exception_irq_entry __irq_entry
-#else
-#define __exception_irq_entry __kprobes
-#endif
static inline unsigned long disr_to_esr(u64 disr)
{
@@ -77,6 +72,7 @@ void do_el0_svc(struct pt_regs *regs);
void do_el0_svc_compat(struct pt_regs *regs);
void do_el0_fpac(struct pt_regs *regs, unsigned long esr);
void do_el1_fpac(struct pt_regs *regs, unsigned long esr);
+void do_el0_mops(struct pt_regs *regs, unsigned long esr);
void do_serror(struct pt_regs *regs, unsigned long esr);
void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags);
diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h
index fa4c6ff3aa9b..84055329cd8b 100644
--- a/arch/arm64/include/asm/hw_breakpoint.h
+++ b/arch/arm64/include/asm/hw_breakpoint.h
@@ -154,4 +154,12 @@ static inline int get_num_wrps(void)
ID_AA64DFR0_EL1_WRPs_SHIFT);
}
+#ifdef CONFIG_CPU_PM
+extern void cpu_suspend_set_dbg_restorer(int (*hw_bp_restore)(unsigned int));
+#else
+static inline void cpu_suspend_set_dbg_restorer(int (*hw_bp_restore)(unsigned int))
+{
+}
+#endif
+
#endif /* __ASM_BREAKPOINT_H */
diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index 5d45f19fda7f..692b1ec663b2 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -137,6 +137,7 @@
#define KERNEL_HWCAP_SME_BI32I32 __khwcap2_feature(SME_BI32I32)
#define KERNEL_HWCAP_SME_B16B16 __khwcap2_feature(SME_B16B16)
#define KERNEL_HWCAP_SME_F16F16 __khwcap2_feature(SME_F16F16)
+#define KERNEL_HWCAP_MOPS __khwcap2_feature(MOPS)
/*
* This yields a mask that user programs can use to figure out what
diff --git a/arch/arm64/include/asm/image.h b/arch/arm64/include/asm/image.h
index c2b13213c720..c09cf942dc92 100644
--- a/arch/arm64/include/asm/image.h
+++ b/arch/arm64/include/asm/image.h
@@ -27,7 +27,7 @@
/*
* struct arm64_image_header - arm64 kernel image header
- * See Documentation/arm64/booting.rst for details
+ * See Documentation/arch/arm64/booting.rst for details
*
* @code0: Executable code, or
* @mz_header alternatively used for part of MZ header
diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 877495a0fd0c..51d92abf945e 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -22,13 +22,13 @@
* Generic IO read/write. These perform native-endian accesses.
*/
#define __raw_writeb __raw_writeb
-static inline void __raw_writeb(u8 val, volatile void __iomem *addr)
+static __always_inline void __raw_writeb(u8 val, volatile void __iomem *addr)
{
asm volatile("strb %w0, [%1]" : : "rZ" (val), "r" (addr));
}
#define __raw_writew __raw_writew
-static inline void __raw_writew(u16 val, volatile void __iomem *addr)
+static __always_inline void __raw_writew(u16 val, volatile void __iomem *addr)
{
asm volatile("strh %w0, [%1]" : : "rZ" (val), "r" (addr));
}
@@ -40,13 +40,13 @@ static __always_inline void __raw_writel(u32 val, volatile void __iomem *addr)
}
#define __raw_writeq __raw_writeq
-static inline void __raw_writeq(u64 val, volatile void __iomem *addr)
+static __always_inline void __raw_writeq(u64 val, volatile void __iomem *addr)
{
asm volatile("str %x0, [%1]" : : "rZ" (val), "r" (addr));
}
#define __raw_readb __raw_readb
-static inline u8 __raw_readb(const volatile void __iomem *addr)
+static __always_inline u8 __raw_readb(const volatile void __iomem *addr)
{
u8 val;
asm volatile(ALTERNATIVE("ldrb %w0, [%1]",
@@ -57,7 +57,7 @@ static inline u8 __raw_readb(const volatile void __iomem *addr)
}
#define __raw_readw __raw_readw
-static inline u16 __raw_readw(const volatile void __iomem *addr)
+static __always_inline u16 __raw_readw(const volatile void __iomem *addr)
{
u16 val;
@@ -80,7 +80,7 @@ static __always_inline u32 __raw_readl(const volatile void __iomem *addr)
}
#define __raw_readq __raw_readq
-static inline u64 __raw_readq(const volatile void __iomem *addr)
+static __always_inline u64 __raw_readq(const volatile void __iomem *addr)
{
u64 val;
asm volatile(ALTERNATIVE("ldr %0, [%1]",
diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
index e0f5f6b73edd..1f31ec146d16 100644
--- a/arch/arm64/include/asm/irqflags.h
+++ b/arch/arm64/include/asm/irqflags.h
@@ -24,7 +24,7 @@
static __always_inline bool __irqflags_uses_pmr(void)
{
return IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) &&
- alternative_has_feature_unlikely(ARM64_HAS_GIC_PRIO_MASKING);
+ alternative_has_cap_unlikely(ARM64_HAS_GIC_PRIO_MASKING);
}
static __always_inline void __daif_local_irq_enable(void)
diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
index 186dd7f85b14..577773870b66 100644
--- a/arch/arm64/include/asm/kernel-pgtable.h
+++ b/arch/arm64/include/asm/kernel-pgtable.h
@@ -107,14 +107,14 @@
/*
* Initial memory map attributes.
*/
-#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
-#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
+#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED | PTE_UXN)
+#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S | PTE_UXN)
#ifdef CONFIG_ARM64_4K_PAGES
-#define SWAPPER_RW_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS)
+#define SWAPPER_RW_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS | PTE_WRITE)
#define SWAPPER_RX_MMUFLAGS (SWAPPER_RW_MMUFLAGS | PMD_SECT_RDONLY)
#else
-#define SWAPPER_RW_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS)
+#define SWAPPER_RW_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS | PTE_WRITE)
#define SWAPPER_RX_MMUFLAGS (SWAPPER_RW_MMUFLAGS | PTE_RDONLY)
#endif
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index baef29fcbeee..c6e12e8f2751 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -9,6 +9,7 @@
#include <asm/esr.h>
#include <asm/memory.h>
+#include <asm/sysreg.h>
#include <asm/types.h>
/* Hyp Configuration Register (HCR) bits */
@@ -92,6 +93,9 @@
#define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC)
#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
+#define HCRX_GUEST_FLAGS (HCRX_EL2_SMPME | HCRX_EL2_TCR2En)
+#define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En)
+
/* TCR_EL2 Registers bits */
#define TCR_EL2_RES1 ((1U << 31) | (1 << 23))
#define TCR_EL2_TBI (1 << 20)
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 43c3bc0f9544..86042afa86c3 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -267,6 +267,24 @@ extern u64 __kvm_get_mdcr_el2(void);
__kvm_at_err; \
} )
+void __noreturn hyp_panic(void);
+asmlinkage void kvm_unexpected_el2_exception(void);
+asmlinkage void __noreturn hyp_panic(void);
+asmlinkage void __noreturn hyp_panic_bad_stack(void);
+asmlinkage void kvm_unexpected_el2_exception(void);
+struct kvm_cpu_context;
+void handle_trap(struct kvm_cpu_context *host_ctxt);
+asmlinkage void __noreturn kvm_host_psci_cpu_entry(bool is_cpu_on);
+void __noreturn __pkvm_init_finalise(void);
+void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc);
+void kvm_patch_vector_branch(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst);
+void kvm_get_kimage_voffset(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst);
+void kvm_compute_final_ctr_el0(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst);
+void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr_virt,
+ u64 elr_phys, u64 par, uintptr_t vcpu, u64 far, u64 hpfar);
#else /* __ASSEMBLY__ */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 7e7e19ef6993..d48609d95423 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -279,6 +279,7 @@ enum vcpu_sysreg {
TTBR0_EL1, /* Translation Table Base Register 0 */
TTBR1_EL1, /* Translation Table Base Register 1 */
TCR_EL1, /* Translation Control Register */
+ TCR2_EL1, /* Extended Translation Control Register */
ESR_EL1, /* Exception Syndrome Register */
AFSR0_EL1, /* Auxiliary Fault Status Register 0 */
AFSR1_EL1, /* Auxiliary Fault Status Register 1 */
@@ -339,6 +340,10 @@ enum vcpu_sysreg {
TFSR_EL1, /* Tag Fault Status Register (EL1) */
TFSRE0_EL1, /* Tag Fault Status Register (EL0) */
+ /* Permission Indirection Extension registers */
+ PIR_EL1, /* Permission Indirection Register 1 (EL1) */
+ PIRE0_EL1, /* Permission Indirection Register 0 (EL1) */
+
/* 32bit specific registers. */
DACR32_EL2, /* Domain Access Control Register */
IFSR32_EL2, /* Instruction Fault Status Register */
@@ -699,6 +704,8 @@ struct kvm_vcpu_arch {
#define SYSREGS_ON_CPU __vcpu_single_flag(sflags, BIT(4))
/* Software step state is Active-pending */
#define DBG_SS_ACTIVE_PENDING __vcpu_single_flag(sflags, BIT(5))
+/* PMUSERENR for the guest EL0 is on physical CPU */
+#define PMUSERENR_ON_CPU __vcpu_single_flag(sflags, BIT(6))
/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
@@ -1031,7 +1038,7 @@ void kvm_arm_clear_debug(struct kvm_vcpu *vcpu);
void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu);
#define kvm_vcpu_os_lock_enabled(vcpu) \
- (!!(__vcpu_sys_reg(vcpu, OSLSR_EL1) & SYS_OSLSR_OSLK))
+ (!!(__vcpu_sys_reg(vcpu, OSLSR_EL1) & OSLSR_EL1_OSLK))
int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr);
@@ -1065,9 +1072,14 @@ void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu);
#ifdef CONFIG_KVM
void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr);
void kvm_clr_pmu_events(u32 clr);
+bool kvm_set_pmuserenr(u64 val);
#else
static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {}
static inline void kvm_clr_pmu_events(u32 clr) {}
+static inline bool kvm_set_pmuserenr(u64 val)
+{
+ return false;
+}
#endif
void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h
index f99d74826a7e..cbbcdc35c4cd 100644
--- a/arch/arm64/include/asm/lse.h
+++ b/arch/arm64/include/asm/lse.h
@@ -18,7 +18,7 @@
static __always_inline bool system_uses_lse_atomics(void)
{
- return alternative_has_feature_likely(ARM64_HAS_LSE_ATOMICS);
+ return alternative_has_cap_likely(ARM64_HAS_LSE_ATOMICS);
}
#define __lse_ll_sc_body(op, ...) \
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index c735afdf639b..6e0e5722f229 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -46,7 +46,7 @@
#define KIMAGE_VADDR (MODULES_END)
#define MODULES_END (MODULES_VADDR + MODULES_VSIZE)
#define MODULES_VADDR (_PAGE_END(VA_BITS_MIN))
-#define MODULES_VSIZE (SZ_128M)
+#define MODULES_VSIZE (SZ_2G)
#define VMEMMAP_START (-(UL(1) << (VA_BITS - VMEMMAP_SHIFT)))
#define VMEMMAP_END (VMEMMAP_START + VMEMMAP_SIZE)
#define PCI_IO_END (VMEMMAP_START - SZ_8M)
@@ -204,15 +204,17 @@ static inline unsigned long kaslr_offset(void)
return kimage_vaddr - KIMAGE_VADDR;
}
+#ifdef CONFIG_RANDOMIZE_BASE
+void kaslr_init(void);
static inline bool kaslr_enabled(void)
{
- /*
- * The KASLR offset modulo MIN_KIMG_ALIGN is taken from the physical
- * placement of the image rather than from the seed, so a displacement
- * of less than MIN_KIMG_ALIGN means that no seed was provided.
- */
- return kaslr_offset() >= MIN_KIMG_ALIGN;
+ extern bool __kaslr_is_enabled;
+ return __kaslr_is_enabled;
}
+#else
+static inline void kaslr_init(void) { }
+static inline bool kaslr_enabled(void) { return false; }
+#endif
/*
* Allow all memory at the discovery stage. We will clip it later.
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 56911691bef0..a6fb325424e7 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -39,11 +39,16 @@ static inline void contextidr_thread_switch(struct task_struct *next)
/*
* Set TTBR0 to reserved_pg_dir. No translations will be possible via TTBR0.
*/
-static inline void cpu_set_reserved_ttbr0(void)
+static inline void cpu_set_reserved_ttbr0_nosync(void)
{
unsigned long ttbr = phys_to_ttbr(__pa_symbol(reserved_pg_dir));
write_sysreg(ttbr, ttbr0_el1);
+}
+
+static inline void cpu_set_reserved_ttbr0(void)
+{
+ cpu_set_reserved_ttbr0_nosync();
isb();
}
@@ -52,7 +57,6 @@ void cpu_do_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm)
{
BUG_ON(pgd == swapper_pg_dir);
- cpu_set_reserved_ttbr0();
cpu_do_switch_mm(virt_to_phys(pgd),mm);
}
@@ -164,7 +168,7 @@ static inline void cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap)
* up (i.e. cpufeature framework is not up yet) and
* latter only when we enable CNP via cpufeature's
* enable() callback.
- * Also we rely on the cpu_hwcap bit being set before
+ * Also we rely on the system_cpucaps bit being set before
* calling the enable() function.
*/
ttbr1 |= TTBR_CNP_BIT;
diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h
index 18734fed3bdd..bfa6638b4c93 100644
--- a/arch/arm64/include/asm/module.h
+++ b/arch/arm64/include/asm/module.h
@@ -7,7 +7,6 @@
#include <asm-generic/module.h>
-#ifdef CONFIG_ARM64_MODULE_PLTS
struct mod_plt_sec {
int plt_shndx;
int plt_num_entries;
@@ -21,7 +20,6 @@ struct mod_arch_specific {
/* for CONFIG_DYNAMIC_FTRACE */
struct plt_entry *ftrace_trampolines;
};
-#endif
u64 module_emit_plt_entry(struct module *mod, Elf64_Shdr *sechdrs,
void *loc, const Elf64_Rela *rela,
@@ -30,12 +28,6 @@ u64 module_emit_plt_entry(struct module *mod, Elf64_Shdr *sechdrs,
u64 module_emit_veneer_for_adrp(struct module *mod, Elf64_Shdr *sechdrs,
void *loc, u64 val);
-#ifdef CONFIG_RANDOMIZE_BASE
-extern u64 module_alloc_base;
-#else
-#define module_alloc_base ((u64)_etext - MODULES_VSIZE)
-#endif
-
struct plt_entry {
/*
* A program that conforms to the AArch64 Procedure Call Standard
diff --git a/arch/arm64/include/asm/module.lds.h b/arch/arm64/include/asm/module.lds.h
index dbba4b7559aa..b9ae8349e35d 100644
--- a/arch/arm64/include/asm/module.lds.h
+++ b/arch/arm64/include/asm/module.lds.h
@@ -1,9 +1,7 @@
SECTIONS {
-#ifdef CONFIG_ARM64_MODULE_PLTS
.plt 0 : { BYTE(0) }
.init.plt 0 : { BYTE(0) }
.text.ftrace_trampoline 0 : { BYTE(0) }
-#endif
#ifdef CONFIG_KASAN_SW_TAGS
/*
diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h
index b9ba19dbdb69..9abcc8ef3087 100644
--- a/arch/arm64/include/asm/percpu.h
+++ b/arch/arm64/include/asm/percpu.h
@@ -140,17 +140,11 @@ PERCPU_RET_OP(add, add, ldadd)
* re-enabling preemption for preemptible kernels, but doing that in a way
* which builds inside a module would mean messing directly with the preempt
* count. If you do this, peterz and tglx will hunt you down.
+ *
+ * Not to mention it'll break the actual preemption model for missing a
+ * preemption point when TIF_NEED_RESCHED gets set while preemption is
+ * disabled.
*/
-#define this_cpu_cmpxchg_double_8(ptr1, ptr2, o1, o2, n1, n2) \
-({ \
- int __ret; \
- preempt_disable_notrace(); \
- __ret = cmpxchg_double_local( raw_cpu_ptr(&(ptr1)), \
- raw_cpu_ptr(&(ptr2)), \
- o1, o2, n1, n2); \
- preempt_enable_notrace(); \
- __ret; \
-})
#define _pcp_protect(op, pcp, ...) \
({ \
@@ -240,6 +234,22 @@ PERCPU_RET_OP(add, add, ldadd)
#define this_cpu_cmpxchg_8(pcp, o, n) \
_pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
+#define this_cpu_cmpxchg64(pcp, o, n) this_cpu_cmpxchg_8(pcp, o, n)
+
+#define this_cpu_cmpxchg128(pcp, o, n) \
+({ \
+ typedef typeof(pcp) pcp_op_T__; \
+ u128 old__, new__, ret__; \
+ pcp_op_T__ *ptr__; \
+ old__ = o; \
+ new__ = n; \
+ preempt_disable_notrace(); \
+ ptr__ = raw_cpu_ptr(&(pcp)); \
+ ret__ = cmpxchg128_local((void *)ptr__, old__, new__); \
+ preempt_enable_notrace(); \
+ ret__; \
+})
+
#ifdef __KVM_NVHE_HYPERVISOR__
extern unsigned long __hyp_per_cpu_offset(unsigned int cpu);
#define __per_cpu_offset
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index f658aafc47df..e4944d517c99 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -171,6 +171,14 @@
#define PTE_ATTRINDX_MASK (_AT(pteval_t, 7) << 2)
/*
+ * PIIndex[3:0] encoding (Permission Indirection Extension)
+ */
+#define PTE_PI_IDX_0 6 /* AP[1], USER */
+#define PTE_PI_IDX_1 51 /* DBM */
+#define PTE_PI_IDX_2 53 /* PXN */
+#define PTE_PI_IDX_3 54 /* UXN */
+
+/*
* Memory Attribute override for Stage-2 (MemAttr[3:0])
*/
#define PTE_S2_MEMATTR(t) (_AT(pteval_t, (t)) << 2)
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 9b165117a454..eed814b00a38 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -27,6 +27,40 @@
*/
#define PMD_PRESENT_INVALID (_AT(pteval_t, 1) << 59) /* only when !PMD_SECT_VALID */
+#define _PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
+#define _PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
+
+#define PROT_DEFAULT (_PROT_DEFAULT | PTE_MAYBE_NG)
+#define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_MAYBE_NG)
+
+#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
+#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE))
+#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC))
+#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL))
+#define PROT_NORMAL_TAGGED (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_TAGGED))
+
+#define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE))
+#define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PTE_WRITE | PMD_ATTRINDX(MT_NORMAL))
+#define PROT_SECT_NORMAL_EXEC (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
+
+#define _PAGE_DEFAULT (_PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL))
+
+#define _PAGE_KERNEL (PROT_NORMAL)
+#define _PAGE_KERNEL_RO ((PROT_NORMAL & ~PTE_WRITE) | PTE_RDONLY)
+#define _PAGE_KERNEL_ROX ((PROT_NORMAL & ~(PTE_WRITE | PTE_PXN)) | PTE_RDONLY)
+#define _PAGE_KERNEL_EXEC (PROT_NORMAL & ~PTE_PXN)
+#define _PAGE_KERNEL_EXEC_CONT ((PROT_NORMAL & ~PTE_PXN) | PTE_CONT)
+
+#define _PAGE_SHARED (_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
+#define _PAGE_SHARED_EXEC (_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_WRITE)
+#define _PAGE_READONLY (_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
+#define _PAGE_READONLY_EXEC (_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN)
+#define _PAGE_EXECONLY (_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN)
+
+#ifdef __ASSEMBLY__
+#define PTE_MAYBE_NG 0
+#endif
+
#ifndef __ASSEMBLY__
#include <asm/cpufeature.h>
@@ -34,9 +68,6 @@
extern bool arm64_use_ng_mappings;
-#define _PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
-#define _PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
-
#define PTE_MAYBE_NG (arm64_use_ng_mappings ? PTE_NG : 0)
#define PMD_MAYBE_NG (arm64_use_ng_mappings ? PMD_SECT_NG : 0)
@@ -50,26 +81,11 @@ extern bool arm64_use_ng_mappings;
#define PTE_MAYBE_GP 0
#endif
-#define PROT_DEFAULT (_PROT_DEFAULT | PTE_MAYBE_NG)
-#define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_MAYBE_NG)
-
-#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
-#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE))
-#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC))
-#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL))
-#define PROT_NORMAL_TAGGED (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_TAGGED))
-
-#define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE))
-#define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
-#define PROT_SECT_NORMAL_EXEC (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
-
-#define _PAGE_DEFAULT (_PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL))
-
-#define PAGE_KERNEL __pgprot(PROT_NORMAL)
-#define PAGE_KERNEL_RO __pgprot((PROT_NORMAL & ~PTE_WRITE) | PTE_RDONLY)
-#define PAGE_KERNEL_ROX __pgprot((PROT_NORMAL & ~(PTE_WRITE | PTE_PXN)) | PTE_RDONLY)
-#define PAGE_KERNEL_EXEC __pgprot(PROT_NORMAL & ~PTE_PXN)
-#define PAGE_KERNEL_EXEC_CONT __pgprot((PROT_NORMAL & ~PTE_PXN) | PTE_CONT)
+#define PAGE_KERNEL __pgprot(_PAGE_KERNEL)
+#define PAGE_KERNEL_RO __pgprot(_PAGE_KERNEL_RO)
+#define PAGE_KERNEL_ROX __pgprot(_PAGE_KERNEL_ROX)
+#define PAGE_KERNEL_EXEC __pgprot(_PAGE_KERNEL_EXEC)
+#define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_KERNEL_EXEC_CONT)
#define PAGE_S2_MEMATTR(attr, has_fwb) \
({ \
@@ -83,12 +99,62 @@ extern bool arm64_use_ng_mappings;
#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
/* shared+writable pages are clean by default, hence PTE_RDONLY|PTE_WRITE */
-#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
-#define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_WRITE)
-#define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
-#define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN)
-#define PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN)
+#define PAGE_SHARED __pgprot(_PAGE_SHARED)
+#define PAGE_SHARED_EXEC __pgprot(_PAGE_SHARED_EXEC)
+#define PAGE_READONLY __pgprot(_PAGE_READONLY)
+#define PAGE_READONLY_EXEC __pgprot(_PAGE_READONLY_EXEC)
+#define PAGE_EXECONLY __pgprot(_PAGE_EXECONLY)
#endif /* __ASSEMBLY__ */
+#define pte_pi_index(pte) ( \
+ ((pte & BIT(PTE_PI_IDX_3)) >> (PTE_PI_IDX_3 - 3)) | \
+ ((pte & BIT(PTE_PI_IDX_2)) >> (PTE_PI_IDX_2 - 2)) | \
+ ((pte & BIT(PTE_PI_IDX_1)) >> (PTE_PI_IDX_1 - 1)) | \
+ ((pte & BIT(PTE_PI_IDX_0)) >> (PTE_PI_IDX_0 - 0)))
+
+/*
+ * Page types used via Permission Indirection Extension (PIE). PIE uses
+ * the USER, DBM, PXN and UXN bits to to generate an index which is used
+ * to look up the actual permission in PIR_ELx and PIRE0_EL1. We define
+ * combinations we use on non-PIE systems with the same encoding, for
+ * convenience these are listed here as comments as are the unallocated
+ * encodings.
+ */
+
+/* 0: PAGE_DEFAULT */
+/* 1: PTE_USER */
+/* 2: PTE_WRITE */
+/* 3: PTE_WRITE | PTE_USER */
+/* 4: PAGE_EXECONLY PTE_PXN */
+/* 5: PAGE_READONLY_EXEC PTE_PXN | PTE_USER */
+/* 6: PTE_PXN | PTE_WRITE */
+/* 7: PAGE_SHARED_EXEC PTE_PXN | PTE_WRITE | PTE_USER */
+/* 8: PAGE_KERNEL_ROX PTE_UXN */
+/* 9: PTE_UXN | PTE_USER */
+/* a: PAGE_KERNEL_EXEC PTE_UXN | PTE_WRITE */
+/* b: PTE_UXN | PTE_WRITE | PTE_USER */
+/* c: PAGE_KERNEL_RO PTE_UXN | PTE_PXN */
+/* d: PAGE_READONLY PTE_UXN | PTE_PXN | PTE_USER */
+/* e: PAGE_KERNEL PTE_UXN | PTE_PXN | PTE_WRITE */
+/* f: PAGE_SHARED PTE_UXN | PTE_PXN | PTE_WRITE | PTE_USER */
+
+#define PIE_E0 ( \
+ PIRx_ELx_PERM(pte_pi_index(_PAGE_EXECONLY), PIE_X_O) | \
+ PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY_EXEC), PIE_RX) | \
+ PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED_EXEC), PIE_RWX) | \
+ PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY), PIE_R) | \
+ PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED), PIE_RW))
+
+#define PIE_E1 ( \
+ PIRx_ELx_PERM(pte_pi_index(_PAGE_EXECONLY), PIE_NONE_O) | \
+ PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY_EXEC), PIE_R) | \
+ PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED_EXEC), PIE_RW) | \
+ PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY), PIE_R) | \
+ PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED), PIE_RW) | \
+ PIRx_ELx_PERM(pte_pi_index(_PAGE_KERNEL_ROX), PIE_RX) | \
+ PIRx_ELx_PERM(pte_pi_index(_PAGE_KERNEL_EXEC), PIE_RWX) | \
+ PIRx_ELx_PERM(pte_pi_index(_PAGE_KERNEL_RO), PIE_R) | \
+ PIRx_ELx_PERM(pte_pi_index(_PAGE_KERNEL), PIE_RW))
+
#endif /* __ASM_PGTABLE_PROT_H */
diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h
index 13df982a0808..3fdae5fe3142 100644
--- a/arch/arm64/include/asm/scs.h
+++ b/arch/arm64/include/asm/scs.h
@@ -73,6 +73,7 @@ static inline void dynamic_scs_init(void) {}
#endif
int scs_patch(const u8 eh_frame[], int size);
+asmlinkage void scs_patch_vmlinux(void);
#endif /* __ASSEMBLY __ */
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index f2d26235bfb4..9b31e6d0da17 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -99,7 +99,7 @@ static inline void arch_send_wakeup_ipi_mask(const struct cpumask *mask)
extern int __cpu_disable(void);
-extern void __cpu_die(unsigned int cpu);
+static inline void __cpu_die(unsigned int cpu) { }
extern void __noreturn cpu_die(void);
extern void __noreturn cpu_die_early(void);
diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h
index db7b371b367c..9cc501450486 100644
--- a/arch/arm64/include/asm/spectre.h
+++ b/arch/arm64/include/asm/spectre.h
@@ -100,5 +100,21 @@ bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, int sco
u8 spectre_bhb_loop_affected(int scope);
void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *__unused);
bool try_emulate_el1_ssbs(struct pt_regs *regs, u32 instr);
+
+void spectre_v4_patch_fw_mitigation_enable(struct alt_instr *alt, __le32 *origptr,
+ __le32 *updptr, int nr_inst);
+void smccc_patch_fw_mitigation_conduit(struct alt_instr *alt, __le32 *origptr,
+ __le32 *updptr, int nr_inst);
+void spectre_bhb_patch_loop_mitigation_enable(struct alt_instr *alt, __le32 *origptr,
+ __le32 *updptr, int nr_inst);
+void spectre_bhb_patch_fw_mitigation_enabled(struct alt_instr *alt, __le32 *origptr,
+ __le32 *updptr, int nr_inst);
+void spectre_bhb_patch_loop_iter(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst);
+void spectre_bhb_patch_wa3(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst);
+void spectre_bhb_patch_clearbhb(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst);
+
#endif /* __ASSEMBLY__ */
#endif /* __ASM_SPECTRE_H */
diff --git a/arch/arm64/include/asm/syscall_wrapper.h b/arch/arm64/include/asm/syscall_wrapper.h
index d30217c21eff..17f687510c48 100644
--- a/arch/arm64/include/asm/syscall_wrapper.h
+++ b/arch/arm64/include/asm/syscall_wrapper.h
@@ -38,6 +38,7 @@
asmlinkage long __arm64_compat_sys_##sname(const struct pt_regs *__unused)
#define COND_SYSCALL_COMPAT(name) \
+ asmlinkage long __arm64_compat_sys_##name(const struct pt_regs *regs); \
asmlinkage long __weak __arm64_compat_sys_##name(const struct pt_regs *regs) \
{ \
return sys_ni_syscall(); \
@@ -53,6 +54,7 @@
ALLOW_ERROR_INJECTION(__arm64_sys##name, ERRNO); \
static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \
static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \
+ asmlinkage long __arm64_sys##name(const struct pt_regs *regs); \
asmlinkage long __arm64_sys##name(const struct pt_regs *regs) \
{ \
return __se_sys##name(SC_ARM64_REGS_TO_ARGS(x,__VA_ARGS__)); \
@@ -73,11 +75,13 @@
asmlinkage long __arm64_sys_##sname(const struct pt_regs *__unused)
#define COND_SYSCALL(name) \
+ asmlinkage long __arm64_sys_##name(const struct pt_regs *regs); \
asmlinkage long __weak __arm64_sys_##name(const struct pt_regs *regs) \
{ \
return sys_ni_syscall(); \
}
+asmlinkage long __arm64_sys_ni_syscall(const struct pt_regs *__unused);
#define SYS_NI(name) SYSCALL_ALIAS(__arm64_sys_##name, sys_ni_posix_timers);
#endif /* __ASM_SYSCALL_WRAPPER_H */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index eefd712f2430..7a1e62631814 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -140,25 +140,17 @@
#define SYS_SVCR_SMSTART_SM_EL0 sys_reg(0, 3, 4, 3, 3)
#define SYS_SVCR_SMSTOP_SMZA_EL0 sys_reg(0, 3, 4, 6, 3)
-#define SYS_OSDTRRX_EL1 sys_reg(2, 0, 0, 0, 2)
-#define SYS_MDCCINT_EL1 sys_reg(2, 0, 0, 2, 0)
-#define SYS_MDSCR_EL1 sys_reg(2, 0, 0, 2, 2)
-#define SYS_OSDTRTX_EL1 sys_reg(2, 0, 0, 3, 2)
-#define SYS_OSECCR_EL1 sys_reg(2, 0, 0, 6, 2)
#define SYS_DBGBVRn_EL1(n) sys_reg(2, 0, 0, n, 4)
#define SYS_DBGBCRn_EL1(n) sys_reg(2, 0, 0, n, 5)
#define SYS_DBGWVRn_EL1(n) sys_reg(2, 0, 0, n, 6)
#define SYS_DBGWCRn_EL1(n) sys_reg(2, 0, 0, n, 7)
#define SYS_MDRAR_EL1 sys_reg(2, 0, 1, 0, 0)
-#define SYS_OSLAR_EL1 sys_reg(2, 0, 1, 0, 4)
-#define SYS_OSLAR_OSLK BIT(0)
-
#define SYS_OSLSR_EL1 sys_reg(2, 0, 1, 1, 4)
-#define SYS_OSLSR_OSLM_MASK (BIT(3) | BIT(0))
-#define SYS_OSLSR_OSLM_NI 0
-#define SYS_OSLSR_OSLM_IMPLEMENTED BIT(3)
-#define SYS_OSLSR_OSLK BIT(1)
+#define OSLSR_EL1_OSLM_MASK (BIT(3) | BIT(0))
+#define OSLSR_EL1_OSLM_NI 0
+#define OSLSR_EL1_OSLM_IMPLEMENTED BIT(3)
+#define OSLSR_EL1_OSLK BIT(1)
#define SYS_OSDLR_EL1 sys_reg(2, 0, 1, 3, 4)
#define SYS_DBGPRCR_EL1 sys_reg(2, 0, 1, 4, 4)
@@ -241,54 +233,8 @@
/*** End of Statistical Profiling Extension ***/
-/*
- * TRBE Registers
- */
-#define SYS_TRBLIMITR_EL1 sys_reg(3, 0, 9, 11, 0)
-#define SYS_TRBPTR_EL1 sys_reg(3, 0, 9, 11, 1)
-#define SYS_TRBBASER_EL1 sys_reg(3, 0, 9, 11, 2)
-#define SYS_TRBSR_EL1 sys_reg(3, 0, 9, 11, 3)
-#define SYS_TRBMAR_EL1 sys_reg(3, 0, 9, 11, 4)
-#define SYS_TRBTRG_EL1 sys_reg(3, 0, 9, 11, 6)
-#define SYS_TRBIDR_EL1 sys_reg(3, 0, 9, 11, 7)
-
-#define TRBLIMITR_LIMIT_MASK GENMASK_ULL(51, 0)
-#define TRBLIMITR_LIMIT_SHIFT 12
-#define TRBLIMITR_NVM BIT(5)
-#define TRBLIMITR_TRIG_MODE_MASK GENMASK(1, 0)
-#define TRBLIMITR_TRIG_MODE_SHIFT 3
-#define TRBLIMITR_FILL_MODE_MASK GENMASK(1, 0)
-#define TRBLIMITR_FILL_MODE_SHIFT 1
-#define TRBLIMITR_ENABLE BIT(0)
-#define TRBPTR_PTR_MASK GENMASK_ULL(63, 0)
-#define TRBPTR_PTR_SHIFT 0
-#define TRBBASER_BASE_MASK GENMASK_ULL(51, 0)
-#define TRBBASER_BASE_SHIFT 12
-#define TRBSR_EC_MASK GENMASK(5, 0)
-#define TRBSR_EC_SHIFT 26
-#define TRBSR_IRQ BIT(22)
-#define TRBSR_TRG BIT(21)
-#define TRBSR_WRAP BIT(20)
-#define TRBSR_ABORT BIT(18)
-#define TRBSR_STOP BIT(17)
-#define TRBSR_MSS_MASK GENMASK(15, 0)
-#define TRBSR_MSS_SHIFT 0
-#define TRBSR_BSC_MASK GENMASK(5, 0)
-#define TRBSR_BSC_SHIFT 0
-#define TRBSR_FSC_MASK GENMASK(5, 0)
-#define TRBSR_FSC_SHIFT 0
-#define TRBMAR_SHARE_MASK GENMASK(1, 0)
-#define TRBMAR_SHARE_SHIFT 8
-#define TRBMAR_OUTER_MASK GENMASK(3, 0)
-#define TRBMAR_OUTER_SHIFT 4
-#define TRBMAR_INNER_MASK GENMASK(3, 0)
-#define TRBMAR_INNER_SHIFT 0
-#define TRBTRG_TRG_MASK GENMASK(31, 0)
-#define TRBTRG_TRG_SHIFT 0
-#define TRBIDR_FLAG BIT(5)
-#define TRBIDR_PROG BIT(4)
-#define TRBIDR_ALIGN_MASK GENMASK(3, 0)
-#define TRBIDR_ALIGN_SHIFT 0
+#define TRBSR_EL1_BSC_MASK GENMASK(5, 0)
+#define TRBSR_EL1_BSC_SHIFT 0
#define SYS_PMINTENSET_EL1 sys_reg(3, 0, 9, 14, 1)
#define SYS_PMINTENCLR_EL1 sys_reg(3, 0, 9, 14, 2)
@@ -764,6 +710,25 @@
#define ICH_VTR_TDS_SHIFT 19
#define ICH_VTR_TDS_MASK (1 << ICH_VTR_TDS_SHIFT)
+/*
+ * Permission Indirection Extension (PIE) permission encodings.
+ * Encodings with the _O suffix, have overlays applied (Permission Overlay Extension).
+ */
+#define PIE_NONE_O 0x0
+#define PIE_R_O 0x1
+#define PIE_X_O 0x2
+#define PIE_RX_O 0x3
+#define PIE_RW_O 0x5
+#define PIE_RWnX_O 0x6
+#define PIE_RWX_O 0x7
+#define PIE_R 0x8
+#define PIE_GCS 0x9
+#define PIE_RX 0xa
+#define PIE_RW 0xc
+#define PIE_RWX 0xe
+
+#define PIRx_ELx_PERM(idx, perm) ((perm) << ((idx) * 4))
+
#define ARM64_FEATURE_FIELD_BITS 4
/* Defined for compatibility only, do not add new users. */
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 848739c15de8..553d1bc559c6 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -55,10 +55,6 @@ struct thread_info {
void arch_setup_new_exec(void);
#define arch_setup_new_exec arch_setup_new_exec
-void arch_release_task_struct(struct task_struct *tsk);
-int arch_dup_task_struct(struct task_struct *dst,
- struct task_struct *src);
-
#endif
#define TIF_SIGPENDING 0 /* signal pending */
diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
index 1f361e2da516..d66dfb3a72dd 100644
--- a/arch/arm64/include/asm/traps.h
+++ b/arch/arm64/include/asm/traps.h
@@ -29,6 +29,8 @@ void arm64_force_sig_fault(int signo, int code, unsigned long far, const char *s
void arm64_force_sig_mceerr(int code, unsigned long far, short lsb, const char *str);
void arm64_force_sig_ptrace_errno_trap(int errno, unsigned long far, const char *str);
+int early_brk64(unsigned long addr, unsigned long esr, struct pt_regs *regs);
+
/*
* Move regs->pc to next instruction and do necessary setup before it
* is executed.
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 05f4fc265428..14be5000c5a0 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -65,7 +65,6 @@ static inline void __uaccess_ttbr0_disable(void)
ttbr &= ~TTBR_ASID_MASK;
/* reserved_pg_dir placed before swapper_pg_dir */
write_sysreg(ttbr - RESERVED_SWAPPER_OFFSET, ttbr0_el1);
- isb();
/* Set reserved ASID */
write_sysreg(ttbr, ttbr1_el1);
isb();
@@ -89,7 +88,6 @@ static inline void __uaccess_ttbr0_enable(void)
ttbr1 &= ~TTBR_ASID_MASK; /* safety measure */
ttbr1 |= ttbr0 & TTBR_ASID_MASK;
write_sysreg(ttbr1, ttbr1_el1);
- isb();
/* Restore user page table */
write_sysreg(ttbr0, ttbr0_el1);
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 037feba03a51..64a514f90131 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -39,7 +39,7 @@
#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5)
#define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800)
-#define __NR_compat_syscalls 451
+#define __NR_compat_syscalls 452
#endif
#define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 604a2053d006..d952a28463e0 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -907,6 +907,8 @@ __SYSCALL(__NR_process_mrelease, sys_process_mrelease)
__SYSCALL(__NR_futex_waitv, sys_futex_waitv)
#define __NR_set_mempolicy_home_node 450
__SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node)
+#define __NR_cachestat 451
+__SYSCALL(__NR_cachestat, sys_cachestat)
/*
* Please add new compat syscalls above this comment and update
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index 69a4fb749c65..a2cac4305b1e 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -102,5 +102,6 @@
#define HWCAP2_SME_BI32I32 (1UL << 40)
#define HWCAP2_SME_B16B16 (1UL << 41)
#define HWCAP2_SME_F16F16 (1UL << 42)
+#define HWCAP2_MOPS (1UL << 43)
#endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h
index 656a10ea6c67..f23c1dc3f002 100644
--- a/arch/arm64/include/uapi/asm/sigcontext.h
+++ b/arch/arm64/include/uapi/asm/sigcontext.h
@@ -177,7 +177,7 @@ struct zt_context {
* vector length beyond its initial architectural limit of 2048 bits
* (16 quadwords).
*
- * See linux/Documentation/arm64/sve.rst for a description of the VL/VQ
+ * See linux/Documentation/arch/arm64/sve.rst for a description of the VL/VQ
* terminology.
*/
#define SVE_VQ_BYTES __SVE_VQ_BYTES /* bytes per quadword */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 7c2bb4e72476..d95b3d6b471a 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -42,9 +42,9 @@ obj-$(CONFIG_COMPAT) += sigreturn32.o
obj-$(CONFIG_COMPAT_ALIGNMENT_FIXUPS) += compat_alignment.o
obj-$(CONFIG_KUSER_HELPERS) += kuser32.o
obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o
-obj-$(CONFIG_MODULES) += module.o
-obj-$(CONFIG_ARM64_MODULE_PLTS) += module-plts.o
+obj-$(CONFIG_MODULES) += module.o module-plts.o
obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o
+obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
obj-$(CONFIG_CPU_PM) += sleep.o suspend.o
obj-$(CONFIG_CPU_IDLE) += cpuidle.o
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index d32d4ed5519b..8ff6610af496 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -24,8 +24,8 @@
#define ALT_ORIG_PTR(a) __ALT_PTR(a, orig_offset)
#define ALT_REPL_PTR(a) __ALT_PTR(a, alt_offset)
-#define ALT_CAP(a) ((a)->cpufeature & ~ARM64_CB_BIT)
-#define ALT_HAS_CB(a) ((a)->cpufeature & ARM64_CB_BIT)
+#define ALT_CAP(a) ((a)->cpucap & ~ARM64_CB_BIT)
+#define ALT_HAS_CB(a) ((a)->cpucap & ARM64_CB_BIT)
/* Volatile, as we may be patching the guts of READ_ONCE() */
static volatile int all_alternatives_applied;
@@ -37,12 +37,12 @@ struct alt_region {
struct alt_instr *end;
};
-bool alternative_is_applied(u16 cpufeature)
+bool alternative_is_applied(u16 cpucap)
{
- if (WARN_ON(cpufeature >= ARM64_NCAPS))
+ if (WARN_ON(cpucap >= ARM64_NCAPS))
return false;
- return test_bit(cpufeature, applied_alternatives);
+ return test_bit(cpucap, applied_alternatives);
}
/*
@@ -121,11 +121,11 @@ static noinstr void patch_alternative(struct alt_instr *alt,
* accidentally call into the cache.S code, which is patched by us at
* runtime.
*/
-static void clean_dcache_range_nopatch(u64 start, u64 end)
+static noinstr void clean_dcache_range_nopatch(u64 start, u64 end)
{
u64 cur, d_size, ctr_el0;
- ctr_el0 = read_sanitised_ftr_reg(SYS_CTR_EL0);
+ ctr_el0 = arm64_ftr_reg_ctrel0.sys_val;
d_size = 4 << cpuid_feature_extract_unsigned_field(ctr_el0,
CTR_EL0_DminLine_SHIFT);
cur = start & ~(d_size - 1);
@@ -141,7 +141,7 @@ static void clean_dcache_range_nopatch(u64 start, u64 end)
static void __apply_alternatives(const struct alt_region *region,
bool is_module,
- unsigned long *feature_mask)
+ unsigned long *cpucap_mask)
{
struct alt_instr *alt;
__le32 *origptr, *updptr;
@@ -151,7 +151,7 @@ static void __apply_alternatives(const struct alt_region *region,
int nr_inst;
int cap = ALT_CAP(alt);
- if (!test_bit(cap, feature_mask))
+ if (!test_bit(cap, cpucap_mask))
continue;
if (!cpus_have_cap(cap))
@@ -188,11 +188,10 @@ static void __apply_alternatives(const struct alt_region *region,
icache_inval_all_pou();
isb();
- /* Ignore ARM64_CB bit from feature mask */
bitmap_or(applied_alternatives, applied_alternatives,
- feature_mask, ARM64_NCAPS);
+ cpucap_mask, ARM64_NCAPS);
bitmap_and(applied_alternatives, applied_alternatives,
- cpu_hwcaps, ARM64_NCAPS);
+ system_cpucaps, ARM64_NCAPS);
}
}
@@ -239,7 +238,7 @@ static int __init __apply_alternatives_multi_stop(void *unused)
} else {
DECLARE_BITMAP(remaining_capabilities, ARM64_NCAPS);
- bitmap_complement(remaining_capabilities, boot_capabilities,
+ bitmap_complement(remaining_capabilities, boot_cpucaps,
ARM64_NCAPS);
BUG_ON(all_alternatives_applied);
@@ -274,7 +273,7 @@ void __init apply_boot_alternatives(void)
pr_info("applying boot alternatives\n");
__apply_alternatives(&kernel_alternatives, false,
- &boot_capabilities[0]);
+ &boot_cpucaps[0]);
}
#ifdef CONFIG_MODULES
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 7d7128c65161..6ea7f23b1287 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -105,11 +105,11 @@ unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT;
unsigned int compat_elf_hwcap2 __read_mostly;
#endif
-DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
-EXPORT_SYMBOL(cpu_hwcaps);
-static struct arm64_cpu_capabilities const __ro_after_init *cpu_hwcaps_ptrs[ARM64_NCAPS];
+DECLARE_BITMAP(system_cpucaps, ARM64_NCAPS);
+EXPORT_SYMBOL(system_cpucaps);
+static struct arm64_cpu_capabilities const __ro_after_init *cpucap_ptrs[ARM64_NCAPS];
-DECLARE_BITMAP(boot_capabilities, ARM64_NCAPS);
+DECLARE_BITMAP(boot_cpucaps, ARM64_NCAPS);
bool arm64_use_ng_mappings = false;
EXPORT_SYMBOL(arm64_use_ng_mappings);
@@ -137,7 +137,7 @@ static cpumask_var_t cpu_32bit_el0_mask __cpumask_var_read_mostly;
void dump_cpu_features(void)
{
/* file-wide pr_fmt adds "CPU features: " prefix */
- pr_emerg("0x%*pb\n", ARM64_NCAPS, &cpu_hwcaps);
+ pr_emerg("0x%*pb\n", ARM64_NCAPS, &system_cpucaps);
}
#define ARM64_CPUID_FIELDS(reg, field, min_value) \
@@ -223,6 +223,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar2[] = {
ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_CSSC_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_RPRFM_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64ISAR2_EL1_BC_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_MOPS_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
FTR_STRICT, FTR_EXACT, ID_AA64ISAR2_EL1_APA3_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
@@ -364,6 +365,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_TIDCP1_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_AFP_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_HCX_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_ETS_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_TWED_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_XNX_SHIFT, 4, 0),
@@ -396,6 +398,12 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
ARM64_FTR_END,
};
+static const struct arm64_ftr_bits ftr_id_aa64mmfr3[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_S1PIE_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_TCRX_SHIFT, 4, 0),
+ ARM64_FTR_END,
+};
+
static const struct arm64_ftr_bits ftr_ctr[] = {
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RES1 */
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_EL0_DIC_SHIFT, 1, 1),
@@ -722,6 +730,7 @@ static const struct __ftr_reg_entry {
ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1,
&id_aa64mmfr1_override),
ARM64_FTR_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2),
+ ARM64_FTR_REG(SYS_ID_AA64MMFR3_EL1, ftr_id_aa64mmfr3),
/* Op1 = 0, CRn = 1, CRm = 2 */
ARM64_FTR_REG(SYS_ZCR_EL1, ftr_zcr),
@@ -954,24 +963,24 @@ extern const struct arm64_cpu_capabilities arm64_errata[];
static const struct arm64_cpu_capabilities arm64_features[];
static void __init
-init_cpu_hwcaps_indirect_list_from_array(const struct arm64_cpu_capabilities *caps)
+init_cpucap_indirect_list_from_array(const struct arm64_cpu_capabilities *caps)
{
for (; caps->matches; caps++) {
if (WARN(caps->capability >= ARM64_NCAPS,
"Invalid capability %d\n", caps->capability))
continue;
- if (WARN(cpu_hwcaps_ptrs[caps->capability],
+ if (WARN(cpucap_ptrs[caps->capability],
"Duplicate entry for capability %d\n",
caps->capability))
continue;
- cpu_hwcaps_ptrs[caps->capability] = caps;
+ cpucap_ptrs[caps->capability] = caps;
}
}
-static void __init init_cpu_hwcaps_indirect_list(void)
+static void __init init_cpucap_indirect_list(void)
{
- init_cpu_hwcaps_indirect_list_from_array(arm64_features);
- init_cpu_hwcaps_indirect_list_from_array(arm64_errata);
+ init_cpucap_indirect_list_from_array(arm64_features);
+ init_cpucap_indirect_list_from_array(arm64_errata);
}
static void __init setup_boot_cpu_capabilities(void);
@@ -1017,6 +1026,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
init_cpu_ftr_reg(SYS_ID_AA64MMFR0_EL1, info->reg_id_aa64mmfr0);
init_cpu_ftr_reg(SYS_ID_AA64MMFR1_EL1, info->reg_id_aa64mmfr1);
init_cpu_ftr_reg(SYS_ID_AA64MMFR2_EL1, info->reg_id_aa64mmfr2);
+ init_cpu_ftr_reg(SYS_ID_AA64MMFR3_EL1, info->reg_id_aa64mmfr3);
init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0);
init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1);
init_cpu_ftr_reg(SYS_ID_AA64ZFR0_EL1, info->reg_id_aa64zfr0);
@@ -1049,10 +1059,10 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
init_cpu_ftr_reg(SYS_GMID_EL1, info->reg_gmid);
/*
- * Initialize the indirect array of CPU hwcaps capabilities pointers
- * before we handle the boot CPU below.
+ * Initialize the indirect array of CPU capabilities pointers before we
+ * handle the boot CPU below.
*/
- init_cpu_hwcaps_indirect_list();
+ init_cpucap_indirect_list();
/*
* Detect and enable early CPU capabilities based on the boot CPU,
@@ -1262,6 +1272,8 @@ void update_cpu_features(int cpu,
info->reg_id_aa64mmfr1, boot->reg_id_aa64mmfr1);
taint |= check_update_ftr_reg(SYS_ID_AA64MMFR2_EL1, cpu,
info->reg_id_aa64mmfr2, boot->reg_id_aa64mmfr2);
+ taint |= check_update_ftr_reg(SYS_ID_AA64MMFR3_EL1, cpu,
+ info->reg_id_aa64mmfr3, boot->reg_id_aa64mmfr3);
taint |= check_update_ftr_reg(SYS_ID_AA64PFR0_EL1, cpu,
info->reg_id_aa64pfr0, boot->reg_id_aa64pfr0);
@@ -1391,6 +1403,7 @@ u64 __read_sysreg_by_encoding(u32 sys_id)
read_sysreg_case(SYS_ID_AA64MMFR0_EL1);
read_sysreg_case(SYS_ID_AA64MMFR1_EL1);
read_sysreg_case(SYS_ID_AA64MMFR2_EL1);
+ read_sysreg_case(SYS_ID_AA64MMFR3_EL1);
read_sysreg_case(SYS_ID_AA64ISAR0_EL1);
read_sysreg_case(SYS_ID_AA64ISAR1_EL1);
read_sysreg_case(SYS_ID_AA64ISAR2_EL1);
@@ -2048,9 +2061,9 @@ static bool has_address_auth_cpucap(const struct arm64_cpu_capabilities *entry,
static bool has_address_auth_metacap(const struct arm64_cpu_capabilities *entry,
int scope)
{
- bool api = has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_IMP_DEF], scope);
- bool apa = has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA5], scope);
- bool apa3 = has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA3], scope);
+ bool api = has_address_auth_cpucap(cpucap_ptrs[ARM64_HAS_ADDRESS_AUTH_IMP_DEF], scope);
+ bool apa = has_address_auth_cpucap(cpucap_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA5], scope);
+ bool apa3 = has_address_auth_cpucap(cpucap_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA3], scope);
return apa || apa3 || api;
}
@@ -2186,6 +2199,11 @@ static void cpu_enable_dit(const struct arm64_cpu_capabilities *__unused)
set_pstate_dit(1);
}
+static void cpu_enable_mops(const struct arm64_cpu_capabilities *__unused)
+{
+ sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_MSCEn);
+}
+
/* Internal helper functions to match cpu capability type */
static bool
cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap)
@@ -2235,11 +2253,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.capability = ARM64_HAS_ECV_CNTPOFF,
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.matches = has_cpuid_feature,
- .sys_reg = SYS_ID_AA64MMFR0_EL1,
- .field_pos = ID_AA64MMFR0_EL1_ECV_SHIFT,
- .field_width = 4,
- .sign = FTR_UNSIGNED,
- .min_field_value = ID_AA64MMFR0_EL1_ECV_CNTPOFF,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR0_EL1, ECV, CNTPOFF)
},
#ifdef CONFIG_ARM64_PAN
{
@@ -2309,6 +2323,13 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.matches = is_kvm_protected_mode,
},
+ {
+ .desc = "HCRX_EL2 register",
+ .capability = ARM64_HAS_HCX,
+ .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, HCX, IMP)
+ },
#endif
{
.desc = "Kernel page table isolation (KPTI)",
@@ -2641,6 +2662,27 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.cpu_enable = cpu_enable_dit,
ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, DIT, IMP)
},
+ {
+ .desc = "Memory Copy and Memory Set instructions",
+ .capability = ARM64_HAS_MOPS,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ .cpu_enable = cpu_enable_mops,
+ ARM64_CPUID_FIELDS(ID_AA64ISAR2_EL1, MOPS, IMP)
+ },
+ {
+ .capability = ARM64_HAS_TCR2,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR3_EL1, TCRX, IMP)
+ },
+ {
+ .desc = "Stage-1 Permission Indirection Extension (S1PIE)",
+ .capability = ARM64_HAS_S1PIE,
+ .type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR3_EL1, S1PIE, IMP)
+ },
{},
};
@@ -2769,6 +2811,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(ID_AA64ISAR2_EL1, RPRFM, IMP, CAP_HWCAP, KERNEL_HWCAP_RPRFM),
HWCAP_CAP(ID_AA64ISAR2_EL1, RPRES, IMP, CAP_HWCAP, KERNEL_HWCAP_RPRES),
HWCAP_CAP(ID_AA64ISAR2_EL1, WFxT, IMP, CAP_HWCAP, KERNEL_HWCAP_WFXT),
+ HWCAP_CAP(ID_AA64ISAR2_EL1, MOPS, IMP, CAP_HWCAP, KERNEL_HWCAP_MOPS),
#ifdef CONFIG_ARM64_SME
HWCAP_CAP(ID_AA64PFR1_EL1, SME, IMP, CAP_HWCAP, KERNEL_HWCAP_SME),
HWCAP_CAP(ID_AA64SMFR0_EL1, FA64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64),
@@ -2895,7 +2938,7 @@ static void update_cpu_capabilities(u16 scope_mask)
scope_mask &= ARM64_CPUCAP_SCOPE_MASK;
for (i = 0; i < ARM64_NCAPS; i++) {
- caps = cpu_hwcaps_ptrs[i];
+ caps = cpucap_ptrs[i];
if (!caps || !(caps->type & scope_mask) ||
cpus_have_cap(caps->capability) ||
!caps->matches(caps, cpucap_default_scope(caps)))
@@ -2903,10 +2946,11 @@ static void update_cpu_capabilities(u16 scope_mask)
if (caps->desc)
pr_info("detected: %s\n", caps->desc);
- cpus_set_cap(caps->capability);
+
+ __set_bit(caps->capability, system_cpucaps);
if ((scope_mask & SCOPE_BOOT_CPU) && (caps->type & SCOPE_BOOT_CPU))
- set_bit(caps->capability, boot_capabilities);
+ set_bit(caps->capability, boot_cpucaps);
}
}
@@ -2920,7 +2964,7 @@ static int cpu_enable_non_boot_scope_capabilities(void *__unused)
u16 non_boot_scope = SCOPE_ALL & ~SCOPE_BOOT_CPU;
for_each_available_cap(i) {
- const struct arm64_cpu_capabilities *cap = cpu_hwcaps_ptrs[i];
+ const struct arm64_cpu_capabilities *cap = cpucap_ptrs[i];
if (WARN_ON(!cap))
continue;
@@ -2950,7 +2994,7 @@ static void __init enable_cpu_capabilities(u16 scope_mask)
for (i = 0; i < ARM64_NCAPS; i++) {
unsigned int num;
- caps = cpu_hwcaps_ptrs[i];
+ caps = cpucap_ptrs[i];
if (!caps || !(caps->type & scope_mask))
continue;
num = caps->capability;
@@ -2995,7 +3039,7 @@ static void verify_local_cpu_caps(u16 scope_mask)
scope_mask &= ARM64_CPUCAP_SCOPE_MASK;
for (i = 0; i < ARM64_NCAPS; i++) {
- caps = cpu_hwcaps_ptrs[i];
+ caps = cpucap_ptrs[i];
if (!caps || !(caps->type & scope_mask))
continue;
@@ -3194,7 +3238,7 @@ static void __init setup_boot_cpu_capabilities(void)
bool this_cpu_has_cap(unsigned int n)
{
if (!WARN_ON(preemptible()) && n < ARM64_NCAPS) {
- const struct arm64_cpu_capabilities *cap = cpu_hwcaps_ptrs[n];
+ const struct arm64_cpu_capabilities *cap = cpucap_ptrs[n];
if (cap)
return cap->matches(cap, SCOPE_LOCAL_CPU);
@@ -3207,13 +3251,13 @@ EXPORT_SYMBOL_GPL(this_cpu_has_cap);
/*
* This helper function is used in a narrow window when,
* - The system wide safe registers are set with all the SMP CPUs and,
- * - The SYSTEM_FEATURE cpu_hwcaps may not have been set.
+ * - The SYSTEM_FEATURE system_cpucaps may not have been set.
* In all other cases cpus_have_{const_}cap() should be used.
*/
static bool __maybe_unused __system_matches_cap(unsigned int n)
{
if (n < ARM64_NCAPS) {
- const struct arm64_cpu_capabilities *cap = cpu_hwcaps_ptrs[n];
+ const struct arm64_cpu_capabilities *cap = cpucap_ptrs[n];
if (cap)
return cap->matches(cap, SCOPE_SYSTEM);
diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
index 42e19fff40ee..d1f68599c29f 100644
--- a/arch/arm64/kernel/cpuidle.c
+++ b/arch/arm64/kernel/cpuidle.c
@@ -13,7 +13,7 @@
#include <linux/of_device.h>
#include <linux/psci.h>
-#ifdef CONFIG_ACPI
+#ifdef CONFIG_ACPI_PROCESSOR_IDLE
#include <acpi/processor.h>
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index eb4378c23b3c..58622dc85917 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -125,6 +125,7 @@ static const char *const hwcap_str[] = {
[KERNEL_HWCAP_SME_BI32I32] = "smebi32i32",
[KERNEL_HWCAP_SME_B16B16] = "smeb16b16",
[KERNEL_HWCAP_SME_F16F16] = "smef16f16",
+ [KERNEL_HWCAP_MOPS] = "mops",
};
#ifdef CONFIG_COMPAT
@@ -446,6 +447,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
info->reg_id_aa64mmfr0 = read_cpuid(ID_AA64MMFR0_EL1);
info->reg_id_aa64mmfr1 = read_cpuid(ID_AA64MMFR1_EL1);
info->reg_id_aa64mmfr2 = read_cpuid(ID_AA64MMFR2_EL1);
+ info->reg_id_aa64mmfr3 = read_cpuid(ID_AA64MMFR3_EL1);
info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1);
info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1);
info->reg_id_aa64zfr0 = read_cpuid(ID_AA64ZFR0_EL1);
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index 3af3c01c93a6..6b2e0c367702 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -126,7 +126,7 @@ static __always_inline void __exit_to_user_mode(void)
lockdep_hardirqs_on(CALLER_ADDR0);
}
-static __always_inline void prepare_exit_to_user_mode(struct pt_regs *regs)
+static __always_inline void exit_to_user_mode_prepare(struct pt_regs *regs)
{
unsigned long flags;
@@ -135,11 +135,13 @@ static __always_inline void prepare_exit_to_user_mode(struct pt_regs *regs)
flags = read_thread_flags();
if (unlikely(flags & _TIF_WORK_MASK))
do_notify_resume(regs, flags);
+
+ lockdep_sys_exit();
}
static __always_inline void exit_to_user_mode(struct pt_regs *regs)
{
- prepare_exit_to_user_mode(regs);
+ exit_to_user_mode_prepare(regs);
mte_check_tfsr_exit();
__exit_to_user_mode();
}
@@ -611,6 +613,14 @@ static void noinstr el0_bti(struct pt_regs *regs)
exit_to_user_mode(regs);
}
+static void noinstr el0_mops(struct pt_regs *regs, unsigned long esr)
+{
+ enter_from_user_mode(regs);
+ local_daif_restore(DAIF_PROCCTX);
+ do_el0_mops(regs, esr);
+ exit_to_user_mode(regs);
+}
+
static void noinstr el0_inv(struct pt_regs *regs, unsigned long esr)
{
enter_from_user_mode(regs);
@@ -688,6 +698,9 @@ asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs)
case ESR_ELx_EC_BTI:
el0_bti(regs);
break;
+ case ESR_ELx_EC_MOPS:
+ el0_mops(regs, esr);
+ break;
case ESR_ELx_EC_BREAKPT_LOW:
case ESR_ELx_EC_SOFTSTP_LOW:
case ESR_ELx_EC_WATCHPT_LOW:
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index ab2a6e33c052..a40e5e50fa55 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -101,12 +101,11 @@
.org .Lventry_start\@ + 128 // Did we overflow the ventry slot?
.endm
- .macro tramp_alias, dst, sym, tmp
- mov_q \dst, TRAMP_VALIAS
- adr_l \tmp, \sym
- add \dst, \dst, \tmp
- adr_l \tmp, .entry.tramp.text
- sub \dst, \dst, \tmp
+ .macro tramp_alias, dst, sym
+ .set .Lalias\@, TRAMP_VALIAS + \sym - .entry.tramp.text
+ movz \dst, :abs_g2_s:.Lalias\@
+ movk \dst, :abs_g1_nc:.Lalias\@
+ movk \dst, :abs_g0_nc:.Lalias\@
.endm
/*
@@ -435,13 +434,14 @@ alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0
eret
alternative_else_nop_endif
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
- bne 4f
msr far_el1, x29
- tramp_alias x30, tramp_exit_native, x29
- br x30
-4:
- tramp_alias x30, tramp_exit_compat, x29
- br x30
+
+ ldr_this_cpu x30, this_cpu_vector, x29
+ tramp_alias x29, tramp_exit
+ msr vbar_el1, x30 // install vector table
+ ldr lr, [sp, #S_LR] // restore x30
+ add sp, sp, #PT_REGS_SIZE // restore sp
+ br x29
#endif
.else
ldr lr, [sp, #S_LR]
@@ -732,22 +732,6 @@ alternative_else_nop_endif
.org 1b + 128 // Did we overflow the ventry slot?
.endm
- .macro tramp_exit, regsize = 64
- tramp_data_read_var x30, this_cpu_vector
- get_this_cpu_offset x29
- ldr x30, [x30, x29]
-
- msr vbar_el1, x30
- ldr lr, [sp, #S_LR]
- tramp_unmap_kernel x29
- .if \regsize == 64
- mrs x29, far_el1
- .endif
- add sp, sp, #PT_REGS_SIZE // restore sp
- eret
- sb
- .endm
-
.macro generate_tramp_vector, kpti, bhb
.Lvector_start\@:
.space 0x400
@@ -768,7 +752,7 @@ alternative_else_nop_endif
*/
.pushsection ".entry.tramp.text", "ax"
.align 11
-SYM_CODE_START_NOALIGN(tramp_vectors)
+SYM_CODE_START_LOCAL_NOALIGN(tramp_vectors)
#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_LOOP
generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_FW
@@ -777,13 +761,12 @@ SYM_CODE_START_NOALIGN(tramp_vectors)
generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_NONE
SYM_CODE_END(tramp_vectors)
-SYM_CODE_START(tramp_exit_native)
- tramp_exit
-SYM_CODE_END(tramp_exit_native)
-
-SYM_CODE_START(tramp_exit_compat)
- tramp_exit 32
-SYM_CODE_END(tramp_exit_compat)
+SYM_CODE_START_LOCAL(tramp_exit)
+ tramp_unmap_kernel x29
+ mrs x29, far_el1 // restore x29
+ eret
+ sb
+SYM_CODE_END(tramp_exit)
.popsection // .entry.tramp.text
#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
@@ -1077,7 +1060,7 @@ alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0
alternative_else_nop_endif
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
- tramp_alias dst=x5, sym=__sdei_asm_exit_trampoline, tmp=x3
+ tramp_alias dst=x5, sym=__sdei_asm_exit_trampoline
br x5
#endif
SYM_CODE_END(__sdei_asm_handler)
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 2fbafa5cc7ac..7a1aeb95d7c3 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -1649,6 +1649,7 @@ void fpsimd_flush_thread(void)
fpsimd_flush_thread_vl(ARM64_VEC_SME);
current->thread.svcr = 0;
+ sme_smstop();
}
current->thread.fp_type = FP_STATE_FPSIMD;
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index 432626c866a8..a650f5e11fc5 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -197,7 +197,7 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
static struct plt_entry *get_ftrace_plt(struct module *mod)
{
-#ifdef CONFIG_ARM64_MODULE_PLTS
+#ifdef CONFIG_MODULES
struct plt_entry *plt = mod->arch.ftrace_trampolines;
return &plt[FTRACE_PLT_IDX];
@@ -249,7 +249,7 @@ static bool ftrace_find_callable_addr(struct dyn_ftrace *rec,
* must use a PLT to reach it. We can only place PLTs for modules, and
* only when module PLT support is built-in.
*/
- if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
+ if (!IS_ENABLED(CONFIG_MODULES))
return false;
/*
@@ -431,10 +431,8 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
*
* Note: 'mod' is only set at module load time.
*/
- if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_ARGS) &&
- IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && mod) {
+ if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_ARGS) && mod)
return aarch64_insn_patch_text_nosync((void *)pc, new);
- }
if (!ftrace_find_callable_addr(rec, mod, &addr))
return -EINVAL;
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index e92caebff46a..0f5a30f109d9 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -382,7 +382,7 @@ SYM_FUNC_START_LOCAL(create_idmap)
adrp x0, init_idmap_pg_dir
adrp x3, _text
adrp x6, _end + MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE
- mov x7, SWAPPER_RX_MMUFLAGS
+ mov_q x7, SWAPPER_RX_MMUFLAGS
map_memory x0, x1, x3, x6, x7, x3, IDMAP_PGD_ORDER, x10, x11, x12, x13, x14, EXTRA_SHIFT
@@ -391,7 +391,7 @@ SYM_FUNC_START_LOCAL(create_idmap)
adrp x2, init_pg_dir
adrp x3, init_pg_end
bic x4, x2, #SWAPPER_BLOCK_SIZE - 1
- mov x5, SWAPPER_RW_MMUFLAGS
+ mov_q x5, SWAPPER_RW_MMUFLAGS
mov x6, #SWAPPER_BLOCK_SHIFT
bl remap_region
@@ -402,7 +402,7 @@ SYM_FUNC_START_LOCAL(create_idmap)
bfi x22, x21, #0, #SWAPPER_BLOCK_SHIFT // remapped FDT address
add x3, x2, #MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE
bic x4, x21, #SWAPPER_BLOCK_SIZE - 1
- mov x5, SWAPPER_RW_MMUFLAGS
+ mov_q x5, SWAPPER_RW_MMUFLAGS
mov x6, #SWAPPER_BLOCK_SHIFT
bl remap_region
@@ -430,7 +430,7 @@ SYM_FUNC_START_LOCAL(create_kernel_mapping)
adrp x3, _text // runtime __pa(_text)
sub x6, x6, x3 // _end - _text
add x6, x6, x5 // runtime __va(_end)
- mov x7, SWAPPER_RW_MMUFLAGS
+ mov_q x7, SWAPPER_RW_MMUFLAGS
map_memory x0, x1, x5, x6, x7, x3, (VA_BITS - PGDIR_SHIFT), x10, x11, x12, x13, x14
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index 788597a6b6a2..02870beb271e 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -99,7 +99,6 @@ int pfn_is_nosave(unsigned long pfn)
void notrace save_processor_state(void)
{
- WARN_ON(num_online_cpus() != 1);
}
void notrace restore_processor_state(void)
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index b29a311bb055..db2a1861bb97 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -973,14 +973,6 @@ static int hw_breakpoint_reset(unsigned int cpu)
return 0;
}
-#ifdef CONFIG_CPU_PM
-extern void cpu_suspend_set_dbg_restorer(int (*hw_bp_restore)(unsigned int));
-#else
-static inline void cpu_suspend_set_dbg_restorer(int (*hw_bp_restore)(unsigned int))
-{
-}
-#endif
-
/*
* One-time initialisation.
*/
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index 9439240c3fcf..d63de1973ddb 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -119,6 +119,24 @@ SYM_CODE_START_LOCAL(__finalise_el2)
msr ttbr1_el1, x0
mrs_s x0, SYS_MAIR_EL12
msr mair_el1, x0
+ mrs x1, REG_ID_AA64MMFR3_EL1
+ ubfx x1, x1, #ID_AA64MMFR3_EL1_TCRX_SHIFT, #4
+ cbz x1, .Lskip_tcr2
+ mrs x0, REG_TCR2_EL12
+ msr REG_TCR2_EL1, x0
+
+ // Transfer permission indirection state
+ mrs x1, REG_ID_AA64MMFR3_EL1
+ ubfx x1, x1, #ID_AA64MMFR3_EL1_S1PIE_SHIFT, #4
+ cbz x1, .Lskip_indirection
+ mrs x0, REG_PIRE0_EL12
+ msr REG_PIRE0_EL1, x0
+ mrs x0, REG_PIR_EL12
+ msr REG_PIR_EL1, x0
+
+.Lskip_indirection:
+.Lskip_tcr2:
+
isb
// Hack the exception return to stay at EL2
diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c
index 370ab84fd06e..8439248c21d3 100644
--- a/arch/arm64/kernel/idreg-override.c
+++ b/arch/arm64/kernel/idreg-override.c
@@ -123,6 +123,7 @@ static const struct ftr_set_desc isar2 __initconst = {
.fields = {
FIELD("gpa3", ID_AA64ISAR2_EL1_GPA3_SHIFT, NULL),
FIELD("apa3", ID_AA64ISAR2_EL1_APA3_SHIFT, NULL),
+ FIELD("mops", ID_AA64ISAR2_EL1_MOPS_SHIFT, NULL),
{}
},
};
@@ -174,6 +175,7 @@ static const struct {
"id_aa64isar1.gpi=0 id_aa64isar1.gpa=0 "
"id_aa64isar1.api=0 id_aa64isar1.apa=0 "
"id_aa64isar2.gpa3=0 id_aa64isar2.apa3=0" },
+ { "arm64.nomops", "id_aa64isar2.mops=0" },
{ "arm64.nomte", "id_aa64pfr1.mte=0" },
{ "nokaslr", "kaslr.disabled=1" },
};
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index e7477f21a4c9..17f96a19781d 100644
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@@ -4,90 +4,35 @@
*/
#include <linux/cache.h>
-#include <linux/crc32.h>
#include <linux/init.h>
-#include <linux/libfdt.h>
-#include <linux/mm_types.h>
-#include <linux/sched.h>
-#include <linux/types.h>
-#include <linux/pgtable.h>
-#include <linux/random.h>
+#include <linux/printk.h>
-#include <asm/fixmap.h>
-#include <asm/kernel-pgtable.h>
+#include <asm/cpufeature.h>
#include <asm/memory.h>
-#include <asm/mmu.h>
-#include <asm/sections.h>
-#include <asm/setup.h>
-u64 __ro_after_init module_alloc_base;
u16 __initdata memstart_offset_seed;
struct arm64_ftr_override kaslr_feature_override __initdata;
-static int __init kaslr_init(void)
-{
- u64 module_range;
- u32 seed;
-
- /*
- * Set a reasonable default for module_alloc_base in case
- * we end up running with module randomization disabled.
- */
- module_alloc_base = (u64)_etext - MODULES_VSIZE;
+bool __ro_after_init __kaslr_is_enabled = false;
+void __init kaslr_init(void)
+{
if (kaslr_feature_override.val & kaslr_feature_override.mask & 0xf) {
pr_info("KASLR disabled on command line\n");
- return 0;
- }
-
- if (!kaslr_enabled()) {
- pr_warn("KASLR disabled due to lack of seed\n");
- return 0;
+ return;
}
- pr_info("KASLR enabled\n");
-
/*
- * KASAN without KASAN_VMALLOC does not expect the module region to
- * intersect the vmalloc region, since shadow memory is allocated for
- * each module at load time, whereas the vmalloc region will already be
- * shadowed by KASAN zero pages.
+ * The KASLR offset modulo MIN_KIMG_ALIGN is taken from the physical
+ * placement of the image rather than from the seed, so a displacement
+ * of less than MIN_KIMG_ALIGN means that no seed was provided.
*/
- BUILD_BUG_ON((IS_ENABLED(CONFIG_KASAN_GENERIC) ||
- IS_ENABLED(CONFIG_KASAN_SW_TAGS)) &&
- !IS_ENABLED(CONFIG_KASAN_VMALLOC));
-
- seed = get_random_u32();
-
- if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) {
- /*
- * Randomize the module region over a 2 GB window covering the
- * kernel. This reduces the risk of modules leaking information
- * about the address of the kernel itself, but results in
- * branches between modules and the core kernel that are
- * resolved via PLTs. (Branches between modules will be
- * resolved normally.)
- */
- module_range = SZ_2G - (u64)(_end - _stext);
- module_alloc_base = max((u64)_end - SZ_2G, (u64)MODULES_VADDR);
- } else {
- /*
- * Randomize the module region by setting module_alloc_base to
- * a PAGE_SIZE multiple in the range [_etext - MODULES_VSIZE,
- * _stext) . This guarantees that the resulting region still
- * covers [_stext, _etext], and that all relative branches can
- * be resolved without veneers unless this region is exhausted
- * and we fall back to a larger 2GB window in module_alloc()
- * when ARM64_MODULE_PLTS is enabled.
- */
- module_range = MODULES_VSIZE - (u64)(_etext - _stext);
+ if (kaslr_offset() < MIN_KIMG_ALIGN) {
+ pr_warn("KASLR disabled due to lack of seed\n");
+ return;
}
- /* use the lower 21 bits to randomize the base of the module region */
- module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21;
- module_alloc_base &= PAGE_MASK;
-
- return 0;
+ pr_info("KASLR enabled\n");
+ __kaslr_is_enabled = true;
}
-subsys_initcall(kaslr_init)
diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c
index 5ed6a585f21f..636be6715155 100644
--- a/arch/arm64/kernel/kexec_image.c
+++ b/arch/arm64/kernel/kexec_image.c
@@ -48,7 +48,7 @@ static void *image_load(struct kimage *image,
/*
* We require a kernel with an unambiguous Image header. Per
- * Documentation/arm64/booting.rst, this is the case when image_size
+ * Documentation/arch/arm64/booting.rst, this is the case when image_size
* is non-zero (practically speaking, since v3.17).
*/
h = (struct arm64_image_header *)kernel;
diff --git a/arch/arm64/kernel/kuser32.S b/arch/arm64/kernel/kuser32.S
index 692e9d2e31e5..af046ceac22d 100644
--- a/arch/arm64/kernel/kuser32.S
+++ b/arch/arm64/kernel/kuser32.S
@@ -10,7 +10,7 @@
* aarch32_setup_additional_pages() and are provided for compatibility
* reasons with 32 bit (aarch32) applications that need them.
*
- * See Documentation/arm/kernel_user_helpers.rst for formal definitions.
+ * See Documentation/arch/arm/kernel_user_helpers.rst for formal definitions.
*/
#include <asm/unistd.h>
diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c
index 543493bf924d..ad02058756b5 100644
--- a/arch/arm64/kernel/module-plts.c
+++ b/arch/arm64/kernel/module-plts.c
@@ -7,6 +7,7 @@
#include <linux/ftrace.h>
#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/moduleloader.h>
#include <linux/sort.h>
static struct plt_entry __get_adrp_add_pair(u64 dst, u64 pc,
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 5af4975caeb5..dd851297596e 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -7,6 +7,8 @@
* Author: Will Deacon <will.deacon@arm.com>
*/
+#define pr_fmt(fmt) "Modules: " fmt
+
#include <linux/bitops.h>
#include <linux/elf.h>
#include <linux/ftrace.h>
@@ -15,52 +17,131 @@
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/moduleloader.h>
+#include <linux/random.h>
#include <linux/scs.h>
#include <linux/vmalloc.h>
+
#include <asm/alternative.h>
#include <asm/insn.h>
#include <asm/scs.h>
#include <asm/sections.h>
+static u64 module_direct_base __ro_after_init = 0;
+static u64 module_plt_base __ro_after_init = 0;
+
+/*
+ * Choose a random page-aligned base address for a window of 'size' bytes which
+ * entirely contains the interval [start, end - 1].
+ */
+static u64 __init random_bounding_box(u64 size, u64 start, u64 end)
+{
+ u64 max_pgoff, pgoff;
+
+ if ((end - start) >= size)
+ return 0;
+
+ max_pgoff = (size - (end - start)) / PAGE_SIZE;
+ pgoff = get_random_u32_inclusive(0, max_pgoff);
+
+ return start - pgoff * PAGE_SIZE;
+}
+
+/*
+ * Modules may directly reference data and text anywhere within the kernel
+ * image and other modules. References using PREL32 relocations have a +/-2G
+ * range, and so we need to ensure that the entire kernel image and all modules
+ * fall within a 2G window such that these are always within range.
+ *
+ * Modules may directly branch to functions and code within the kernel text,
+ * and to functions and code within other modules. These branches will use
+ * CALL26/JUMP26 relocations with a +/-128M range. Without PLTs, we must ensure
+ * that the entire kernel text and all module text falls within a 128M window
+ * such that these are always within range. With PLTs, we can expand this to a
+ * 2G window.
+ *
+ * We chose the 128M region to surround the entire kernel image (rather than
+ * just the text) as using the same bounds for the 128M and 2G regions ensures
+ * by construction that we never select a 128M region that is not a subset of
+ * the 2G region. For very large and unusual kernel configurations this means
+ * we may fall back to PLTs where they could have been avoided, but this keeps
+ * the logic significantly simpler.
+ */
+static int __init module_init_limits(void)
+{
+ u64 kernel_end = (u64)_end;
+ u64 kernel_start = (u64)_text;
+ u64 kernel_size = kernel_end - kernel_start;
+
+ /*
+ * The default modules region is placed immediately below the kernel
+ * image, and is large enough to use the full 2G relocation range.
+ */
+ BUILD_BUG_ON(KIMAGE_VADDR != MODULES_END);
+ BUILD_BUG_ON(MODULES_VSIZE < SZ_2G);
+
+ if (!kaslr_enabled()) {
+ if (kernel_size < SZ_128M)
+ module_direct_base = kernel_end - SZ_128M;
+ if (kernel_size < SZ_2G)
+ module_plt_base = kernel_end - SZ_2G;
+ } else {
+ u64 min = kernel_start;
+ u64 max = kernel_end;
+
+ if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) {
+ pr_info("2G module region forced by RANDOMIZE_MODULE_REGION_FULL\n");
+ } else {
+ module_direct_base = random_bounding_box(SZ_128M, min, max);
+ if (module_direct_base) {
+ min = module_direct_base;
+ max = module_direct_base + SZ_128M;
+ }
+ }
+
+ module_plt_base = random_bounding_box(SZ_2G, min, max);
+ }
+
+ pr_info("%llu pages in range for non-PLT usage",
+ module_direct_base ? (SZ_128M - kernel_size) / PAGE_SIZE : 0);
+ pr_info("%llu pages in range for PLT usage",
+ module_plt_base ? (SZ_2G - kernel_size) / PAGE_SIZE : 0);
+
+ return 0;
+}
+subsys_initcall(module_init_limits);
+
void *module_alloc(unsigned long size)
{
- u64 module_alloc_end = module_alloc_base + MODULES_VSIZE;
- gfp_t gfp_mask = GFP_KERNEL;
- void *p;
-
- /* Silence the initial allocation */
- if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
- gfp_mask |= __GFP_NOWARN;
-
- if (IS_ENABLED(CONFIG_KASAN_GENERIC) ||
- IS_ENABLED(CONFIG_KASAN_SW_TAGS))
- /* don't exceed the static module region - see below */
- module_alloc_end = MODULES_END;
-
- p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
- module_alloc_end, gfp_mask, PAGE_KERNEL, VM_DEFER_KMEMLEAK,
- NUMA_NO_NODE, __builtin_return_address(0));
-
- if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) &&
- (IS_ENABLED(CONFIG_KASAN_VMALLOC) ||
- (!IS_ENABLED(CONFIG_KASAN_GENERIC) &&
- !IS_ENABLED(CONFIG_KASAN_SW_TAGS))))
- /*
- * KASAN without KASAN_VMALLOC can only deal with module
- * allocations being served from the reserved module region,
- * since the remainder of the vmalloc region is already
- * backed by zero shadow pages, and punching holes into it
- * is non-trivial. Since the module region is not randomized
- * when KASAN is enabled without KASAN_VMALLOC, it is even
- * less likely that the module region gets exhausted, so we
- * can simply omit this fallback in that case.
- */
- p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
- module_alloc_base + SZ_2G, GFP_KERNEL,
- PAGE_KERNEL, 0, NUMA_NO_NODE,
- __builtin_return_address(0));
+ void *p = NULL;
+
+ /*
+ * Where possible, prefer to allocate within direct branch range of the
+ * kernel such that no PLTs are necessary.
+ */
+ if (module_direct_base) {
+ p = __vmalloc_node_range(size, MODULE_ALIGN,
+ module_direct_base,
+ module_direct_base + SZ_128M,
+ GFP_KERNEL | __GFP_NOWARN,
+ PAGE_KERNEL, 0, NUMA_NO_NODE,
+ __builtin_return_address(0));
+ }
- if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) {
+ if (!p && module_plt_base) {
+ p = __vmalloc_node_range(size, MODULE_ALIGN,
+ module_plt_base,
+ module_plt_base + SZ_2G,
+ GFP_KERNEL | __GFP_NOWARN,
+ PAGE_KERNEL, 0, NUMA_NO_NODE,
+ __builtin_return_address(0));
+ }
+
+ if (!p) {
+ pr_warn_ratelimited("%s: unable to allocate memory\n",
+ __func__);
+ }
+
+ if (p && (kasan_alloc_module_shadow(p, size, GFP_KERNEL) < 0)) {
vfree(p);
return NULL;
}
@@ -448,9 +529,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
case R_AARCH64_CALL26:
ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 26,
AARCH64_INSN_IMM_26);
-
- if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) &&
- ovf == -ERANGE) {
+ if (ovf == -ERANGE) {
val = module_emit_plt_entry(me, sechdrs, loc, &rel[i], sym);
if (!val)
return -ENOEXEC;
@@ -487,7 +566,7 @@ static int module_init_ftrace_plt(const Elf_Ehdr *hdr,
const Elf_Shdr *sechdrs,
struct module *mod)
{
-#if defined(CONFIG_ARM64_MODULE_PLTS) && defined(CONFIG_DYNAMIC_FTRACE)
+#if defined(CONFIG_DYNAMIC_FTRACE)
const Elf_Shdr *s;
struct plt_entry *plts;
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index 7e89968bd282..4c5ef9b20065 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -416,10 +416,9 @@ long get_mte_ctrl(struct task_struct *task)
static int __access_remote_tags(struct mm_struct *mm, unsigned long addr,
struct iovec *kiov, unsigned int gup_flags)
{
- struct vm_area_struct *vma;
void __user *buf = kiov->iov_base;
size_t len = kiov->iov_len;
- int ret;
+ int err = 0;
int write = gup_flags & FOLL_WRITE;
if (!access_ok(buf, len))
@@ -429,14 +428,16 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr,
return -EIO;
while (len) {
+ struct vm_area_struct *vma;
unsigned long tags, offset;
void *maddr;
- struct page *page = NULL;
+ struct page *page = get_user_page_vma_remote(mm, addr,
+ gup_flags, &vma);
- ret = get_user_pages_remote(mm, addr, 1, gup_flags, &page,
- &vma, NULL);
- if (ret <= 0)
+ if (IS_ERR_OR_NULL(page)) {
+ err = page == NULL ? -EIO : PTR_ERR(page);
break;
+ }
/*
* Only copy tags if the page has been mapped as PROT_MTE
@@ -446,7 +447,7 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr,
* was never mapped with PROT_MTE.
*/
if (!(vma->vm_flags & VM_MTE)) {
- ret = -EOPNOTSUPP;
+ err = -EOPNOTSUPP;
put_page(page);
break;
}
@@ -479,7 +480,7 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr,
kiov->iov_len = buf - kiov->iov_base;
if (!kiov->iov_len) {
/* check for error accessing the tracee's address space */
- if (ret <= 0)
+ if (err)
return -EIO;
else
return -EFAULT;
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index b8ec7b3ac9cb..417a8a86b2db 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -296,6 +296,8 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p)
*cmdline_p = boot_command_line;
+ kaslr_init();
+
/*
* If know now we are going to need KPTI then use non-global
* mappings from the start, avoiding the cost of rewriting
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 2cfc810d0a5b..e304f7ebec2a 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -23,6 +23,7 @@
#include <asm/daifflags.h>
#include <asm/debug-monitors.h>
#include <asm/elf.h>
+#include <asm/exception.h>
#include <asm/cacheflush.h>
#include <asm/ucontext.h>
#include <asm/unistd.h>
@@ -398,7 +399,7 @@ static int restore_tpidr2_context(struct user_ctxs *user)
__get_user_error(tpidr2_el0, &user->tpidr2->tpidr2, err);
if (!err)
- current->thread.tpidr2_el0 = tpidr2_el0;
+ write_sysreg_s(tpidr2_el0, SYS_TPIDR2_EL0);
return err;
}
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index d00d4cbb31b1..edd63894d61e 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -332,17 +332,13 @@ static int op_cpu_kill(unsigned int cpu)
}
/*
- * called on the thread which is asking for a CPU to be shutdown -
- * waits until shutdown has completed, or it is timed out.
+ * Called on the thread which is asking for a CPU to be shutdown after the
+ * shutdown completed.
*/
-void __cpu_die(unsigned int cpu)
+void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu)
{
int err;
- if (!cpu_wait_death(cpu, 5)) {
- pr_crit("CPU%u: cpu didn't die\n", cpu);
- return;
- }
pr_debug("CPU%u: shutdown\n", cpu);
/*
@@ -369,8 +365,8 @@ void __noreturn cpu_die(void)
local_daif_mask();
- /* Tell __cpu_die() that this CPU is now safe to dispose of */
- (void)cpu_report_death();
+ /* Tell cpuhp_bp_sync_dead() that this CPU is now safe to dispose of */
+ cpuhp_ap_report_dead();
/*
* Actually shutdown the CPU. This must never fail. The specific hotplug
diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c
index da84cf855c44..5a668d7f3c1f 100644
--- a/arch/arm64/kernel/syscall.c
+++ b/arch/arm64/kernel/syscall.c
@@ -147,11 +147,9 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
* exit regardless, as the old entry assembly did.
*/
if (!has_syscall_work(flags) && !IS_ENABLED(CONFIG_DEBUG_RSEQ)) {
- local_daif_mask();
flags = read_thread_flags();
if (!has_syscall_work(flags) && !(flags & _TIF_SINGLESTEP))
return;
- local_daif_restore(DAIF_PROCCTX);
}
trace_exit:
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 4bb1b8f47298..8b70759cdbb9 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -514,6 +514,63 @@ void do_el1_fpac(struct pt_regs *regs, unsigned long esr)
die("Oops - FPAC", regs, esr);
}
+void do_el0_mops(struct pt_regs *regs, unsigned long esr)
+{
+ bool wrong_option = esr & ESR_ELx_MOPS_ISS_WRONG_OPTION;
+ bool option_a = esr & ESR_ELx_MOPS_ISS_OPTION_A;
+ int dstreg = ESR_ELx_MOPS_ISS_DESTREG(esr);
+ int srcreg = ESR_ELx_MOPS_ISS_SRCREG(esr);
+ int sizereg = ESR_ELx_MOPS_ISS_SIZEREG(esr);
+ unsigned long dst, src, size;
+
+ dst = pt_regs_read_reg(regs, dstreg);
+ src = pt_regs_read_reg(regs, srcreg);
+ size = pt_regs_read_reg(regs, sizereg);
+
+ /*
+ * Put the registers back in the original format suitable for a
+ * prologue instruction, using the generic return routine from the
+ * Arm ARM (DDI 0487I.a) rules CNTMJ and MWFQH.
+ */
+ if (esr & ESR_ELx_MOPS_ISS_MEM_INST) {
+ /* SET* instruction */
+ if (option_a ^ wrong_option) {
+ /* Format is from Option A; forward set */
+ pt_regs_write_reg(regs, dstreg, dst + size);
+ pt_regs_write_reg(regs, sizereg, -size);
+ }
+ } else {
+ /* CPY* instruction */
+ if (!(option_a ^ wrong_option)) {
+ /* Format is from Option B */
+ if (regs->pstate & PSR_N_BIT) {
+ /* Backward copy */
+ pt_regs_write_reg(regs, dstreg, dst - size);
+ pt_regs_write_reg(regs, srcreg, src - size);
+ }
+ } else {
+ /* Format is from Option A */
+ if (size & BIT(63)) {
+ /* Forward copy */
+ pt_regs_write_reg(regs, dstreg, dst + size);
+ pt_regs_write_reg(regs, srcreg, src + size);
+ pt_regs_write_reg(regs, sizereg, -size);
+ }
+ }
+ }
+
+ if (esr & ESR_ELx_MOPS_ISS_FROM_EPILOGUE)
+ regs->pc -= 8;
+ else
+ regs->pc -= 4;
+
+ /*
+ * If single stepping then finish the step before executing the
+ * prologue instruction.
+ */
+ user_fastforward_single_step(current);
+}
+
#define __user_cache_maint(insn, address, res) \
if (address >= TASK_SIZE_MAX) { \
res = -EFAULT; \
@@ -824,6 +881,7 @@ static const char *esr_class_str[] = {
[ESR_ELx_EC_DABT_LOW] = "DABT (lower EL)",
[ESR_ELx_EC_DABT_CUR] = "DABT (current EL)",
[ESR_ELx_EC_SP_ALIGN] = "SP Alignment",
+ [ESR_ELx_EC_MOPS] = "MOPS",
[ESR_ELx_EC_FP_EXC32] = "FP (AArch32)",
[ESR_ELx_EC_FP_EXC64] = "FP (AArch64)",
[ESR_ELx_EC_SERROR] = "SError",
@@ -947,7 +1005,7 @@ void do_serror(struct pt_regs *regs, unsigned long esr)
}
/* GENERIC_BUG traps */
-
+#ifdef CONFIG_GENERIC_BUG
int is_valid_bugaddr(unsigned long addr)
{
/*
@@ -959,6 +1017,7 @@ int is_valid_bugaddr(unsigned long addr)
*/
return 1;
}
+#endif
static int bug_handler(struct pt_regs *regs, unsigned long esr)
{
@@ -1044,7 +1103,7 @@ static int kasan_handler(struct pt_regs *regs, unsigned long esr)
bool recover = esr & KASAN_ESR_RECOVER;
bool write = esr & KASAN_ESR_WRITE;
size_t size = KASAN_ESR_SIZE(esr);
- u64 addr = regs->regs[0];
+ void *addr = (void *)regs->regs[0];
u64 pc = regs->pc;
kasan_report(addr, size, write, pc);
diff --git a/arch/arm64/kernel/watchdog_hld.c b/arch/arm64/kernel/watchdog_hld.c
new file mode 100644
index 000000000000..dcd25322127c
--- /dev/null
+++ b/arch/arm64/kernel/watchdog_hld.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/nmi.h>
+#include <linux/cpufreq.h>
+#include <linux/perf/arm_pmu.h>
+
+/*
+ * Safe maximum CPU frequency in case a particular platform doesn't implement
+ * cpufreq driver. Although, architecture doesn't put any restrictions on
+ * maximum frequency but 5 GHz seems to be safe maximum given the available
+ * Arm CPUs in the market which are clocked much less than 5 GHz. On the other
+ * hand, we can't make it much higher as it would lead to a large hard-lockup
+ * detection timeout on parts which are running slower (eg. 1GHz on
+ * Developerbox) and doesn't possess a cpufreq driver.
+ */
+#define SAFE_MAX_CPU_FREQ 5000000000UL // 5 GHz
+u64 hw_nmi_get_sample_period(int watchdog_thresh)
+{
+ unsigned int cpu = smp_processor_id();
+ unsigned long max_cpu_freq;
+
+ max_cpu_freq = cpufreq_get_hw_max_freq(cpu) * 1000UL;
+ if (!max_cpu_freq)
+ max_cpu_freq = SAFE_MAX_CPU_FREQ;
+
+ return (u64)max_cpu_freq * watchdog_thresh;
+}
+
+bool __init arch_perf_nmi_is_available(void)
+{
+ /*
+ * hardlockup_detector_perf_init() will success even if Pseudo-NMI turns off,
+ * however, the pmu interrupts will act like a normal interrupt instead of
+ * NMI and the hardlockup detector would be broken.
+ */
+ return arm_pmu_irq_is_nmi();
+}
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
index 55f80fb93925..8725291cb00a 100644
--- a/arch/arm64/kvm/debug.c
+++ b/arch/arm64/kvm/debug.c
@@ -333,7 +333,7 @@ void kvm_arch_vcpu_load_debug_state_flags(struct kvm_vcpu *vcpu)
/* Check if we have TRBE implemented and available at the host */
if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_TraceBuffer_SHIFT) &&
- !(read_sysreg_s(SYS_TRBIDR_EL1) & TRBIDR_PROG))
+ !(read_sysreg_s(SYS_TRBIDR_EL1) & TRBIDR_EL1_P))
vcpu_set_flag(vcpu, DEBUG_STATE_SAVE_TRBE);
}
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 5c15c58f90cc..2f6e0b3e4a75 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -82,8 +82,14 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
* EL1 instead of being trapped to EL2.
*/
if (kvm_arm_support_pmu_v3()) {
+ struct kvm_cpu_context *hctxt;
+
write_sysreg(0, pmselr_el0);
+
+ hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ ctxt_sys_reg(hctxt, PMUSERENR_EL0) = read_sysreg(pmuserenr_el0);
write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);
+ vcpu_set_flag(vcpu, PMUSERENR_ON_CPU);
}
vcpu->arch.mdcr_el2_host = read_sysreg(mdcr_el2);
@@ -106,8 +112,13 @@ static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
write_sysreg(vcpu->arch.mdcr_el2_host, mdcr_el2);
write_sysreg(0, hstr_el2);
- if (kvm_arm_support_pmu_v3())
- write_sysreg(0, pmuserenr_el0);
+ if (kvm_arm_support_pmu_v3()) {
+ struct kvm_cpu_context *hctxt;
+
+ hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ write_sysreg(ctxt_sys_reg(hctxt, PMUSERENR_EL0), pmuserenr_el0);
+ vcpu_clear_flag(vcpu, PMUSERENR_ON_CPU);
+ }
if (cpus_have_final_cap(ARM64_SME)) {
sysreg_clear_set_s(SYS_HFGRTR_EL2, 0,
@@ -130,6 +141,9 @@ static inline void ___activate_traps(struct kvm_vcpu *vcpu)
if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE))
write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
+
+ if (cpus_have_final_cap(ARM64_HAS_HCX))
+ write_sysreg_s(HCRX_GUEST_FLAGS, SYS_HCRX_EL2);
}
static inline void ___deactivate_traps(struct kvm_vcpu *vcpu)
@@ -144,6 +158,9 @@ static inline void ___deactivate_traps(struct kvm_vcpu *vcpu)
vcpu->arch.hcr_el2 &= ~HCR_VSE;
vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE;
}
+
+ if (cpus_have_final_cap(ARM64_HAS_HCX))
+ write_sysreg_s(HCRX_HOST_FLAGS, SYS_HCRX_EL2);
}
static inline bool __populate_fault_info(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
index 699ea1f8d409..bb6b571ec627 100644
--- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
+++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
@@ -44,6 +44,8 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
ctxt_sys_reg(ctxt, TTBR0_EL1) = read_sysreg_el1(SYS_TTBR0);
ctxt_sys_reg(ctxt, TTBR1_EL1) = read_sysreg_el1(SYS_TTBR1);
ctxt_sys_reg(ctxt, TCR_EL1) = read_sysreg_el1(SYS_TCR);
+ if (cpus_have_final_cap(ARM64_HAS_TCR2))
+ ctxt_sys_reg(ctxt, TCR2_EL1) = read_sysreg_el1(SYS_TCR2);
ctxt_sys_reg(ctxt, ESR_EL1) = read_sysreg_el1(SYS_ESR);
ctxt_sys_reg(ctxt, AFSR0_EL1) = read_sysreg_el1(SYS_AFSR0);
ctxt_sys_reg(ctxt, AFSR1_EL1) = read_sysreg_el1(SYS_AFSR1);
@@ -53,6 +55,10 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
ctxt_sys_reg(ctxt, CONTEXTIDR_EL1) = read_sysreg_el1(SYS_CONTEXTIDR);
ctxt_sys_reg(ctxt, AMAIR_EL1) = read_sysreg_el1(SYS_AMAIR);
ctxt_sys_reg(ctxt, CNTKCTL_EL1) = read_sysreg_el1(SYS_CNTKCTL);
+ if (cpus_have_final_cap(ARM64_HAS_S1PIE)) {
+ ctxt_sys_reg(ctxt, PIR_EL1) = read_sysreg_el1(SYS_PIR);
+ ctxt_sys_reg(ctxt, PIRE0_EL1) = read_sysreg_el1(SYS_PIRE0);
+ }
ctxt_sys_reg(ctxt, PAR_EL1) = read_sysreg_par();
ctxt_sys_reg(ctxt, TPIDR_EL1) = read_sysreg(tpidr_el1);
@@ -114,6 +120,8 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
write_sysreg_el1(ctxt_sys_reg(ctxt, CPACR_EL1), SYS_CPACR);
write_sysreg_el1(ctxt_sys_reg(ctxt, TTBR0_EL1), SYS_TTBR0);
write_sysreg_el1(ctxt_sys_reg(ctxt, TTBR1_EL1), SYS_TTBR1);
+ if (cpus_have_final_cap(ARM64_HAS_TCR2))
+ write_sysreg_el1(ctxt_sys_reg(ctxt, TCR2_EL1), SYS_TCR2);
write_sysreg_el1(ctxt_sys_reg(ctxt, ESR_EL1), SYS_ESR);
write_sysreg_el1(ctxt_sys_reg(ctxt, AFSR0_EL1), SYS_AFSR0);
write_sysreg_el1(ctxt_sys_reg(ctxt, AFSR1_EL1), SYS_AFSR1);
@@ -123,6 +131,10 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
write_sysreg_el1(ctxt_sys_reg(ctxt, CONTEXTIDR_EL1), SYS_CONTEXTIDR);
write_sysreg_el1(ctxt_sys_reg(ctxt, AMAIR_EL1), SYS_AMAIR);
write_sysreg_el1(ctxt_sys_reg(ctxt, CNTKCTL_EL1), SYS_CNTKCTL);
+ if (cpus_have_final_cap(ARM64_HAS_S1PIE)) {
+ write_sysreg_el1(ctxt_sys_reg(ctxt, PIR_EL1), SYS_PIR);
+ write_sysreg_el1(ctxt_sys_reg(ctxt, PIRE0_EL1), SYS_PIRE0);
+ }
write_sysreg(ctxt_sys_reg(ctxt, PAR_EL1), par_el1);
write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL1), tpidr_el1);
diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
index d756b939f296..4558c02eb352 100644
--- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c
+++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
@@ -56,7 +56,7 @@ static void __debug_save_trace(u64 *trfcr_el1)
*trfcr_el1 = 0;
/* Check if the TRBE is enabled */
- if (!(read_sysreg_s(SYS_TRBLIMITR_EL1) & TRBLIMITR_ENABLE))
+ if (!(read_sysreg_s(SYS_TRBLIMITR_EL1) & TRBLIMITR_EL1_E))
return;
/*
* Prohibit trace generation while we are in guest.
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 7a1aa511e7da..b37e7c96efea 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -92,14 +92,28 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
}
NOKPROBE_SYMBOL(__deactivate_traps);
+/*
+ * Disable IRQs in {activate,deactivate}_traps_vhe_{load,put}() to
+ * prevent a race condition between context switching of PMUSERENR_EL0
+ * in __{activate,deactivate}_traps_common() and IPIs that attempts to
+ * update PMUSERENR_EL0. See also kvm_set_pmuserenr().
+ */
void activate_traps_vhe_load(struct kvm_vcpu *vcpu)
{
+ unsigned long flags;
+
+ local_irq_save(flags);
__activate_traps_common(vcpu);
+ local_irq_restore(flags);
}
void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu)
{
+ unsigned long flags;
+
+ local_irq_save(flags);
__deactivate_traps_common(vcpu);
+ local_irq_restore(flags);
}
static const exit_handler_fn hyp_exit_handlers[] = {
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index 491ca7eb2a4c..560650972478 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -700,7 +700,25 @@ static struct arm_pmu *kvm_pmu_probe_armpmu(void)
mutex_lock(&arm_pmus_lock);
- cpu = smp_processor_id();
+ /*
+ * It is safe to use a stale cpu to iterate the list of PMUs so long as
+ * the same value is used for the entirety of the loop. Given this, and
+ * the fact that no percpu data is used for the lookup there is no need
+ * to disable preemption.
+ *
+ * It is still necessary to get a valid cpu, though, to probe for the
+ * default PMU instance as userspace is not required to specify a PMU
+ * type. In order to uphold the preexisting behavior KVM selects the
+ * PMU instance for the core where the first call to the
+ * KVM_ARM_VCPU_PMU_V3_CTRL attribute group occurs. A dependent use case
+ * would be a user with disdain of all things big.LITTLE that affines
+ * the VMM to a particular cluster of cores.
+ *
+ * In any case, userspace should just do the sane thing and use the UAPI
+ * to select a PMU type directly. But, be wary of the baggage being
+ * carried here.
+ */
+ cpu = raw_smp_processor_id();
list_for_each_entry(entry, &arm_pmus, entry) {
tmp = entry->arm_pmu;
diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c
index 7887133d15f0..121f1a14c829 100644
--- a/arch/arm64/kvm/pmu.c
+++ b/arch/arm64/kvm/pmu.c
@@ -209,3 +209,30 @@ void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu)
kvm_vcpu_pmu_enable_el0(events_host);
kvm_vcpu_pmu_disable_el0(events_guest);
}
+
+/*
+ * With VHE, keep track of the PMUSERENR_EL0 value for the host EL0 on the pCPU
+ * where PMUSERENR_EL0 for the guest is loaded, since PMUSERENR_EL0 is switched
+ * to the value for the guest on vcpu_load(). The value for the host EL0
+ * will be restored on vcpu_put(), before returning to userspace.
+ * This isn't necessary for nVHE, as the register is context switched for
+ * every guest enter/exit.
+ *
+ * Return true if KVM takes care of the register. Otherwise return false.
+ */
+bool kvm_set_pmuserenr(u64 val)
+{
+ struct kvm_cpu_context *hctxt;
+ struct kvm_vcpu *vcpu;
+
+ if (!kvm_arm_support_pmu_v3() || !has_vhe())
+ return false;
+
+ vcpu = kvm_get_running_vcpu();
+ if (!vcpu || !vcpu_get_flag(vcpu, PMUSERENR_ON_CPU))
+ return false;
+
+ hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ ctxt_sys_reg(hctxt, PMUSERENR_EL0) = val;
+ return true;
+}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 753aa7418149..5b5d5e5449dc 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -401,9 +401,9 @@ static bool trap_oslar_el1(struct kvm_vcpu *vcpu,
return read_from_write_only(vcpu, p, r);
/* Forward the OSLK bit to OSLSR */
- oslsr = __vcpu_sys_reg(vcpu, OSLSR_EL1) & ~SYS_OSLSR_OSLK;
- if (p->regval & SYS_OSLAR_OSLK)
- oslsr |= SYS_OSLSR_OSLK;
+ oslsr = __vcpu_sys_reg(vcpu, OSLSR_EL1) & ~OSLSR_EL1_OSLK;
+ if (p->regval & OSLAR_EL1_OSLK)
+ oslsr |= OSLSR_EL1_OSLK;
__vcpu_sys_reg(vcpu, OSLSR_EL1) = oslsr;
return true;
@@ -427,7 +427,7 @@ static int set_oslsr_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
* The only modifiable bit is the OSLK bit. Refuse the write if
* userspace attempts to change any other bit in the register.
*/
- if ((val ^ rd->val) & ~SYS_OSLSR_OSLK)
+ if ((val ^ rd->val) & ~OSLSR_EL1_OSLK)
return -EINVAL;
__vcpu_sys_reg(vcpu, rd->reg) = val;
@@ -1265,6 +1265,7 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, struct sys_reg_desc const *r
ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3));
if (!cpus_have_final_cap(ARM64_HAS_WFXT))
val &= ~ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_WFxT);
+ val &= ~ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_MOPS);
break;
case SYS_ID_AA64DFR0_EL1:
/* Limit debug to ARMv8.0 */
@@ -1800,7 +1801,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_MDRAR_EL1), trap_raz_wi },
{ SYS_DESC(SYS_OSLAR_EL1), trap_oslar_el1 },
{ SYS_DESC(SYS_OSLSR_EL1), trap_oslsr_el1, reset_val, OSLSR_EL1,
- SYS_OSLSR_OSLM_IMPLEMENTED, .set_user = set_oslsr_el1, },
+ OSLSR_EL1_OSLM_IMPLEMENTED, .set_user = set_oslsr_el1, },
{ SYS_DESC(SYS_OSDLR_EL1), trap_raz_wi },
{ SYS_DESC(SYS_DBGPRCR_EL1), trap_raz_wi },
{ SYS_DESC(SYS_DBGCLAIMSET_EL1), trap_raz_wi },
@@ -1891,7 +1892,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
ID_SANITISED(ID_AA64MMFR0_EL1),
ID_SANITISED(ID_AA64MMFR1_EL1),
ID_SANITISED(ID_AA64MMFR2_EL1),
- ID_UNALLOCATED(7,3),
+ ID_SANITISED(ID_AA64MMFR3_EL1),
ID_UNALLOCATED(7,4),
ID_UNALLOCATED(7,5),
ID_UNALLOCATED(7,6),
@@ -1911,6 +1912,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_TTBR0_EL1), access_vm_reg, reset_unknown, TTBR0_EL1 },
{ SYS_DESC(SYS_TTBR1_EL1), access_vm_reg, reset_unknown, TTBR1_EL1 },
{ SYS_DESC(SYS_TCR_EL1), access_vm_reg, reset_val, TCR_EL1, 0 },
+ { SYS_DESC(SYS_TCR2_EL1), access_vm_reg, reset_val, TCR2_EL1, 0 },
PTRAUTH_KEY(APIA),
PTRAUTH_KEY(APIB),
@@ -1960,6 +1962,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_PMMIR_EL1), trap_raz_wi },
{ SYS_DESC(SYS_MAIR_EL1), access_vm_reg, reset_unknown, MAIR_EL1 },
+ { SYS_DESC(SYS_PIRE0_EL1), access_vm_reg, reset_unknown, PIRE0_EL1 },
+ { SYS_DESC(SYS_PIR_EL1), access_vm_reg, reset_unknown, PIR_EL1 },
{ SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 },
{ SYS_DESC(SYS_LORSA_EL1), trap_loregion },
diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index 6eafc2c45cfc..c8c3cb812783 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -446,6 +446,7 @@ int vgic_lazy_init(struct kvm *kvm)
int kvm_vgic_map_resources(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
+ enum vgic_type type;
gpa_t dist_base;
int ret = 0;
@@ -460,10 +461,13 @@ int kvm_vgic_map_resources(struct kvm *kvm)
if (!irqchip_in_kernel(kvm))
goto out;
- if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2)
+ if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) {
ret = vgic_v2_map_resources(kvm);
- else
+ type = VGIC_V2;
+ } else {
ret = vgic_v3_map_resources(kvm);
+ type = VGIC_V3;
+ }
if (ret) {
__kvm_vgic_destroy(kvm);
@@ -473,8 +477,7 @@ int kvm_vgic_map_resources(struct kvm *kvm)
dist_base = dist->vgic_dist_base;
mutex_unlock(&kvm->arch.config_lock);
- ret = vgic_register_dist_iodev(kvm, dist_base,
- kvm_vgic_global_state.type);
+ ret = vgic_register_dist_iodev(kvm, dist_base, type);
if (ret) {
kvm_err("Unable to register VGIC dist MMIO regions\n");
kvm_vgic_destroy(kvm);
diff --git a/arch/arm64/lib/xor-neon.c b/arch/arm64/lib/xor-neon.c
index 96b171995d19..f9a53b7f9842 100644
--- a/arch/arm64/lib/xor-neon.c
+++ b/arch/arm64/lib/xor-neon.c
@@ -10,7 +10,7 @@
#include <linux/module.h>
#include <asm/neon-intrinsics.h>
-void xor_arm64_neon_2(unsigned long bytes, unsigned long * __restrict p1,
+static void xor_arm64_neon_2(unsigned long bytes, unsigned long * __restrict p1,
const unsigned long * __restrict p2)
{
uint64_t *dp1 = (uint64_t *)p1;
@@ -37,7 +37,7 @@ void xor_arm64_neon_2(unsigned long bytes, unsigned long * __restrict p1,
} while (--lines > 0);
}
-void xor_arm64_neon_3(unsigned long bytes, unsigned long * __restrict p1,
+static void xor_arm64_neon_3(unsigned long bytes, unsigned long * __restrict p1,
const unsigned long * __restrict p2,
const unsigned long * __restrict p3)
{
@@ -73,7 +73,7 @@ void xor_arm64_neon_3(unsigned long bytes, unsigned long * __restrict p1,
} while (--lines > 0);
}
-void xor_arm64_neon_4(unsigned long bytes, unsigned long * __restrict p1,
+static void xor_arm64_neon_4(unsigned long bytes, unsigned long * __restrict p1,
const unsigned long * __restrict p2,
const unsigned long * __restrict p3,
const unsigned long * __restrict p4)
@@ -118,7 +118,7 @@ void xor_arm64_neon_4(unsigned long bytes, unsigned long * __restrict p1,
} while (--lines > 0);
}
-void xor_arm64_neon_5(unsigned long bytes, unsigned long * __restrict p1,
+static void xor_arm64_neon_5(unsigned long bytes, unsigned long * __restrict p1,
const unsigned long * __restrict p2,
const unsigned long * __restrict p3,
const unsigned long * __restrict p4,
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index e1e0dca01839..188197590fc9 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -364,8 +364,8 @@ void cpu_do_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm)
ttbr1 &= ~TTBR_ASID_MASK;
ttbr1 |= FIELD_PREP(TTBR_ASID_MASK, asid);
+ cpu_set_reserved_ttbr0_nosync();
write_sysreg(ttbr1, ttbr1_el1);
- isb();
write_sysreg(ttbr0, ttbr0_el1);
isb();
post_ttbr_update_workaround();
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 8a169bdb4d53..935f0a8911f9 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -66,6 +66,8 @@ static inline const struct fault_info *esr_to_debug_fault_info(unsigned long esr
static void data_abort_decode(unsigned long esr)
{
+ unsigned long iss2 = ESR_ELx_ISS2(esr);
+
pr_alert("Data abort info:\n");
if (esr & ESR_ELx_ISV) {
@@ -78,12 +80,21 @@ static void data_abort_decode(unsigned long esr)
(esr & ESR_ELx_SF) >> ESR_ELx_SF_SHIFT,
(esr & ESR_ELx_AR) >> ESR_ELx_AR_SHIFT);
} else {
- pr_alert(" ISV = 0, ISS = 0x%08lx\n", esr & ESR_ELx_ISS_MASK);
+ pr_alert(" ISV = 0, ISS = 0x%08lx, ISS2 = 0x%08lx\n",
+ esr & ESR_ELx_ISS_MASK, iss2);
}
- pr_alert(" CM = %lu, WnR = %lu\n",
+ pr_alert(" CM = %lu, WnR = %lu, TnD = %lu, TagAccess = %lu\n",
(esr & ESR_ELx_CM) >> ESR_ELx_CM_SHIFT,
- (esr & ESR_ELx_WNR) >> ESR_ELx_WNR_SHIFT);
+ (esr & ESR_ELx_WNR) >> ESR_ELx_WNR_SHIFT,
+ (iss2 & ESR_ELx_TnD) >> ESR_ELx_TnD_SHIFT,
+ (iss2 & ESR_ELx_TagAccess) >> ESR_ELx_TagAccess_SHIFT);
+
+ pr_alert(" GCS = %ld, Overlay = %lu, DirtyBit = %lu, Xs = %llu\n",
+ (iss2 & ESR_ELx_GCS) >> ESR_ELx_GCS_SHIFT,
+ (iss2 & ESR_ELx_Overlay) >> ESR_ELx_Overlay_SHIFT,
+ (iss2 & ESR_ELx_DirtyBit) >> ESR_ELx_DirtyBit_SHIFT,
+ (iss2 & ESR_ELx_Xs_MASK) >> ESR_ELx_Xs_SHIFT);
}
static void mem_abort_decode(unsigned long esr)
@@ -177,6 +188,9 @@ static void show_pte(unsigned long addr)
break;
ptep = pte_offset_map(pmdp, addr);
+ if (!ptep)
+ break;
+
pte = READ_ONCE(*ptep);
pr_cont(", pte=%016llx", pte_val(pte));
pte_unmap(ptep);
@@ -317,7 +331,7 @@ static void report_tag_fault(unsigned long addr, unsigned long esr,
* find out access size.
*/
bool is_write = !!(esr & ESR_ELx_WNR);
- kasan_report(addr, 0, is_write, regs->pc);
+ kasan_report((void *)addr, 0, is_write, regs->pc);
}
#else
/* Tag faults aren't enabled without CONFIG_KASAN_HW_TAGS. */
@@ -854,9 +868,6 @@ void do_sp_pc_abort(unsigned long addr, unsigned long esr, struct pt_regs *regs)
}
NOKPROBE_SYMBOL(do_sp_pc_abort);
-int __init early_brk64(unsigned long addr, unsigned long esr,
- struct pt_regs *regs);
-
/*
* __refdata because early_brk64 is __init, but the reference to it is
* clobbered at arch_initcall time.
diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index 5f9379b3c8c8..4e6476094952 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -8,6 +8,7 @@
#include <linux/export.h>
#include <linux/mm.h>
+#include <linux/libnvdimm.h>
#include <linux/pagemap.h>
#include <asm/cacheflush.h>
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 95364e8bdc19..21716c940682 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -307,14 +307,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
return NULL;
WARN_ON(addr & (sz - 1));
- /*
- * Note that if this code were ever ported to the
- * 32-bit arm platform then it will cause trouble in
- * the case where CONFIG_HIGHPTE is set, since there
- * will be no pte_unmap() to correspond with this
- * pte_alloc_map().
- */
- ptep = pte_alloc_map(mm, pmdp, addr);
+ ptep = pte_alloc_huge(mm, pmdp, addr);
} else if (sz == PMD_SIZE) {
if (want_pmd_share(vma, addr) && pud_none(READ_ONCE(*pudp)))
ptep = huge_pmd_share(mm, vma, addr, pudp);
@@ -366,7 +359,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
return (pte_t *)pmdp;
if (sz == CONT_PTE_SIZE)
- return pte_offset_kernel(pmdp, (addr & CONT_PTE_MASK));
+ return pte_offset_huge(pmdp, (addr & CONT_PTE_MASK));
return NULL;
}
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 66e70ca47680..d31c3a9290c5 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -69,6 +69,7 @@ phys_addr_t __ro_after_init arm64_dma_phys_limit;
#define CRASH_ADDR_LOW_MAX arm64_dma_phys_limit
#define CRASH_ADDR_HIGH_MAX (PHYS_MASK + 1)
+#define CRASH_HIGH_SEARCH_BASE SZ_4G
#define DEFAULT_CRASH_KERNEL_LOW_SIZE (128UL << 20)
@@ -101,12 +102,13 @@ static int __init reserve_crashkernel_low(unsigned long long low_size)
*/
static void __init reserve_crashkernel(void)
{
- unsigned long long crash_base, crash_size;
- unsigned long long crash_low_size = 0;
+ unsigned long long crash_low_size = 0, search_base = 0;
unsigned long long crash_max = CRASH_ADDR_LOW_MAX;
+ unsigned long long crash_base, crash_size;
char *cmdline = boot_command_line;
- int ret;
bool fixed_base = false;
+ bool high = false;
+ int ret;
if (!IS_ENABLED(CONFIG_KEXEC_CORE))
return;
@@ -129,7 +131,9 @@ static void __init reserve_crashkernel(void)
else if (ret)
return;
+ search_base = CRASH_HIGH_SEARCH_BASE;
crash_max = CRASH_ADDR_HIGH_MAX;
+ high = true;
} else if (ret || !crash_size) {
/* The specified value is invalid */
return;
@@ -140,31 +144,51 @@ static void __init reserve_crashkernel(void)
/* User specifies base address explicitly. */
if (crash_base) {
fixed_base = true;
+ search_base = crash_base;
crash_max = crash_base + crash_size;
}
retry:
crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN,
- crash_base, crash_max);
+ search_base, crash_max);
if (!crash_base) {
/*
- * If the first attempt was for low memory, fall back to
- * high memory, the minimum required low memory will be
- * reserved later.
+ * For crashkernel=size[KMG]@offset[KMG], print out failure
+ * message if can't reserve the specified region.
*/
- if (!fixed_base && (crash_max == CRASH_ADDR_LOW_MAX)) {
+ if (fixed_base) {
+ pr_warn("crashkernel reservation failed - memory is in use.\n");
+ return;
+ }
+
+ /*
+ * For crashkernel=size[KMG], if the first attempt was for
+ * low memory, fall back to high memory, the minimum required
+ * low memory will be reserved later.
+ */
+ if (!high && crash_max == CRASH_ADDR_LOW_MAX) {
crash_max = CRASH_ADDR_HIGH_MAX;
+ search_base = CRASH_ADDR_LOW_MAX;
crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
goto retry;
}
+ /*
+ * For crashkernel=size[KMG],high, if the first attempt was
+ * for high memory, fall back to low memory.
+ */
+ if (high && crash_max == CRASH_ADDR_HIGH_MAX) {
+ crash_max = CRASH_ADDR_LOW_MAX;
+ search_base = 0;
+ goto retry;
+ }
pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
crash_size);
return;
}
- if ((crash_base > CRASH_ADDR_LOW_MAX - crash_low_size) &&
- crash_low_size && reserve_crashkernel_low(crash_low_size)) {
+ if ((crash_base >= CRASH_ADDR_LOW_MAX) && crash_low_size &&
+ reserve_crashkernel_low(crash_low_size)) {
memblock_phys_free(crash_base, crash_size);
return;
}
@@ -442,7 +466,12 @@ void __init bootmem_init(void)
*/
void __init mem_init(void)
{
- swiotlb_init(max_pfn > PFN_DOWN(arm64_dma_phys_limit), SWIOTLB_VERBOSE);
+ bool swiotlb = max_pfn > PFN_DOWN(arm64_dma_phys_limit);
+
+ if (IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC))
+ swiotlb = true;
+
+ swiotlb_init(swiotlb, SWIOTLB_VERBOSE);
/* this will put all unused low memory onto the freelists */
memblock_free_all();
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index e969e68de005..f17d066e85eb 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -214,7 +214,7 @@ static void __init clear_pgds(unsigned long start,
static void __init kasan_init_shadow(void)
{
u64 kimg_shadow_start, kimg_shadow_end;
- u64 mod_shadow_start, mod_shadow_end;
+ u64 mod_shadow_start;
u64 vmalloc_shadow_end;
phys_addr_t pa_start, pa_end;
u64 i;
@@ -223,7 +223,6 @@ static void __init kasan_init_shadow(void)
kimg_shadow_end = PAGE_ALIGN((u64)kasan_mem_to_shadow(KERNEL_END));
mod_shadow_start = (u64)kasan_mem_to_shadow((void *)MODULES_VADDR);
- mod_shadow_end = (u64)kasan_mem_to_shadow((void *)MODULES_END);
vmalloc_shadow_end = (u64)kasan_mem_to_shadow((void *)VMALLOC_END);
@@ -246,17 +245,9 @@ static void __init kasan_init_shadow(void)
kasan_populate_early_shadow(kasan_mem_to_shadow((void *)PAGE_END),
(void *)mod_shadow_start);
- if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
- BUILD_BUG_ON(VMALLOC_START != MODULES_END);
- kasan_populate_early_shadow((void *)vmalloc_shadow_end,
- (void *)KASAN_SHADOW_END);
- } else {
- kasan_populate_early_shadow((void *)kimg_shadow_end,
- (void *)KASAN_SHADOW_END);
- if (kimg_shadow_start > mod_shadow_end)
- kasan_populate_early_shadow((void *)mod_shadow_end,
- (void *)kimg_shadow_start);
- }
+ BUILD_BUG_ON(VMALLOC_START != MODULES_END);
+ kasan_populate_early_shadow((void *)vmalloc_shadow_end,
+ (void *)KASAN_SHADOW_END);
for_each_mem_range(i, &pa_start, &pa_end) {
void *start = (void *)__phys_to_virt(pa_start);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index af6bc8403ee4..95d360805f8a 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -451,7 +451,7 @@ static phys_addr_t pgd_pgtable_alloc(int shift)
void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt,
phys_addr_t size, pgprot_t prot)
{
- if ((virt >= PAGE_END) && (virt < VMALLOC_START)) {
+ if (virt < PAGE_OFFSET) {
pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n",
&phys, virt);
return;
@@ -478,7 +478,7 @@ void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
static void update_mapping_prot(phys_addr_t phys, unsigned long virt,
phys_addr_t size, pgprot_t prot)
{
- if ((virt >= PAGE_END) && (virt < VMALLOC_START)) {
+ if (virt < PAGE_OFFSET) {
pr_warn("BUG: not updating mapping for %pa at 0x%016lx - outside kernel range\n",
&phys, virt);
return;
@@ -663,12 +663,17 @@ static void __init map_kernel_segment(pgd_t *pgdp, void *va_start, void *va_end,
vm_area_add_early(vma);
}
+static pgprot_t kernel_exec_prot(void)
+{
+ return rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC;
+}
+
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
static int __init map_entry_trampoline(void)
{
int i;
- pgprot_t prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC;
+ pgprot_t prot = kernel_exec_prot();
phys_addr_t pa_start = __pa_symbol(__entry_tramp_text_start);
/* The trampoline is always mapped and can therefore be global */
@@ -723,7 +728,7 @@ static void __init map_kernel(pgd_t *pgdp)
* mapping to install SW breakpoints. Allow this (only) when
* explicitly requested with rodata=off.
*/
- pgprot_t text_prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC;
+ pgprot_t text_prot = kernel_exec_prot();
/*
* If we have a CPU that supports BTI and a kernel built for
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index c2cb437821ca..2baeec419f62 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -199,7 +199,7 @@ SYM_FUNC_END(idmap_cpu_replace_ttbr1)
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-#define KPTI_NG_PTE_FLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS)
+#define KPTI_NG_PTE_FLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS | PTE_WRITE)
.pushsection ".idmap.text", "a"
@@ -290,7 +290,7 @@ SYM_TYPED_FUNC_START(idmap_kpti_install_ng_mappings)
isb
mov temp_pte, x5
- mov pte_flags, #KPTI_NG_PTE_FLAGS
+ mov_q pte_flags, KPTI_NG_PTE_FLAGS
/* Everybody is enjoying the idmap, so we can rewrite swapper. */
/* PGD */
@@ -454,6 +454,21 @@ SYM_FUNC_START(__cpu_setup)
#endif /* CONFIG_ARM64_HW_AFDBM */
msr mair_el1, mair
msr tcr_el1, tcr
+
+ mrs_s x1, SYS_ID_AA64MMFR3_EL1
+ ubfx x1, x1, #ID_AA64MMFR3_EL1_S1PIE_SHIFT, #4
+ cbz x1, .Lskip_indirection
+
+ mov_q x0, PIE_E0
+ msr REG_PIRE0_EL1, x0
+ mov_q x0, PIE_E1
+ msr REG_PIR_EL1, x0
+
+ mov x0, TCR2_EL1x_PIE
+ msr REG_TCR2_EL1, x0
+
+.Lskip_indirection:
+
/*
* Prepare SCTLR
*/
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index b26da8efa616..145b540ec34f 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -1731,21 +1731,21 @@ static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl,
}
}
-static void save_args(struct jit_ctx *ctx, int args_off, int nargs)
+static void save_args(struct jit_ctx *ctx, int args_off, int nregs)
{
int i;
- for (i = 0; i < nargs; i++) {
+ for (i = 0; i < nregs; i++) {
emit(A64_STR64I(i, A64_SP, args_off), ctx);
args_off += 8;
}
}
-static void restore_args(struct jit_ctx *ctx, int args_off, int nargs)
+static void restore_args(struct jit_ctx *ctx, int args_off, int nregs)
{
int i;
- for (i = 0; i < nargs; i++) {
+ for (i = 0; i < nregs; i++) {
emit(A64_LDR64I(i, A64_SP, args_off), ctx);
args_off += 8;
}
@@ -1764,7 +1764,7 @@ static void restore_args(struct jit_ctx *ctx, int args_off, int nargs)
*/
static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
struct bpf_tramp_links *tlinks, void *orig_call,
- int nargs, u32 flags)
+ int nregs, u32 flags)
{
int i;
int stack_size;
@@ -1772,7 +1772,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
int regs_off;
int retval_off;
int args_off;
- int nargs_off;
+ int nregs_off;
int ip_off;
int run_ctx_off;
struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
@@ -1795,11 +1795,11 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
* SP + retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or
* BPF_TRAMP_F_RET_FENTRY_RET
*
- * [ argN ]
+ * [ arg reg N ]
* [ ... ]
- * SP + args_off [ arg1 ]
+ * SP + args_off [ arg reg 1 ]
*
- * SP + nargs_off [ args count ]
+ * SP + nregs_off [ arg regs count ]
*
* SP + ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag
*
@@ -1816,13 +1816,13 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
if (flags & BPF_TRAMP_F_IP_ARG)
stack_size += 8;
- nargs_off = stack_size;
+ nregs_off = stack_size;
/* room for args count */
stack_size += 8;
args_off = stack_size;
/* room for args */
- stack_size += nargs * 8;
+ stack_size += nregs * 8;
/* room for return value */
retval_off = stack_size;
@@ -1865,12 +1865,12 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
emit(A64_STR64I(A64_R(10), A64_SP, ip_off), ctx);
}
- /* save args count*/
- emit(A64_MOVZ(1, A64_R(10), nargs, 0), ctx);
- emit(A64_STR64I(A64_R(10), A64_SP, nargs_off), ctx);
+ /* save arg regs count*/
+ emit(A64_MOVZ(1, A64_R(10), nregs, 0), ctx);
+ emit(A64_STR64I(A64_R(10), A64_SP, nregs_off), ctx);
- /* save args */
- save_args(ctx, args_off, nargs);
+ /* save arg regs */
+ save_args(ctx, args_off, nregs);
/* save callee saved registers */
emit(A64_STR64I(A64_R(19), A64_SP, regs_off), ctx);
@@ -1897,7 +1897,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
}
if (flags & BPF_TRAMP_F_CALL_ORIG) {
- restore_args(ctx, args_off, nargs);
+ restore_args(ctx, args_off, nregs);
/* call original func */
emit(A64_LDR64I(A64_R(10), A64_SP, retaddr_off), ctx);
emit(A64_ADR(A64_LR, AARCH64_INSN_SIZE * 2), ctx);
@@ -1926,7 +1926,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
}
if (flags & BPF_TRAMP_F_RESTORE_REGS)
- restore_args(ctx, args_off, nargs);
+ restore_args(ctx, args_off, nregs);
/* restore callee saved register x19 and x20 */
emit(A64_LDR64I(A64_R(19), A64_SP, regs_off), ctx);
@@ -1967,24 +1967,25 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
void *orig_call)
{
int i, ret;
- int nargs = m->nr_args;
+ int nregs = m->nr_args;
int max_insns = ((long)image_end - (long)image) / AARCH64_INSN_SIZE;
struct jit_ctx ctx = {
.image = NULL,
.idx = 0,
};
- /* the first 8 arguments are passed by registers */
- if (nargs > 8)
- return -ENOTSUPP;
-
- /* don't support struct argument */
+ /* extra registers needed for struct argument */
for (i = 0; i < MAX_BPF_FUNC_ARGS; i++) {
+ /* The arg_size is at most 16 bytes, enforced by the verifier. */
if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG)
- return -ENOTSUPP;
+ nregs += (m->arg_size[i] + 7) / 8 - 1;
}
- ret = prepare_trampoline(&ctx, im, tlinks, orig_call, nargs, flags);
+ /* the first 8 registers are used for arguments */
+ if (nregs > 8)
+ return -ENOTSUPP;
+
+ ret = prepare_trampoline(&ctx, im, tlinks, orig_call, nregs, flags);
if (ret < 0)
return ret;
@@ -1995,7 +1996,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
ctx.idx = 0;
jit_fill_hole(image, (unsigned int)(image_end - image));
- ret = prepare_trampoline(&ctx, im, tlinks, orig_call, nargs, flags);
+ ret = prepare_trampoline(&ctx, im, tlinks, orig_call, nregs, flags);
if (ret > 0 && validate_code(&ctx) < 0)
ret = -EINVAL;
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index 40ba95472594..19c23c4fa2da 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -32,16 +32,20 @@ HAS_GENERIC_AUTH_IMP_DEF
HAS_GIC_CPUIF_SYSREGS
HAS_GIC_PRIO_MASKING
HAS_GIC_PRIO_RELAXED_SYNC
+HAS_HCX
HAS_LDAPR
HAS_LSE_ATOMICS
+HAS_MOPS
HAS_NESTED_VIRT
HAS_NO_FPSIMD
HAS_NO_HW_PREFETCH
HAS_PAN
+HAS_S1PIE
HAS_RAS_EXTN
HAS_RNG
HAS_SB
HAS_STAGE2_FWB
+HAS_TCR2
HAS_TIDCP1
HAS_TLB_RANGE
HAS_VIRT_HOST_EXTN
diff --git a/arch/arm64/tools/gen-cpucaps.awk b/arch/arm64/tools/gen-cpucaps.awk
index 00c9e72a200a..8525980379d7 100755
--- a/arch/arm64/tools/gen-cpucaps.awk
+++ b/arch/arm64/tools/gen-cpucaps.awk
@@ -24,12 +24,12 @@ BEGIN {
}
/^[vA-Z0-9_]+$/ {
- printf("#define ARM64_%-30s\t%d\n", $0, cap_num++)
+ printf("#define ARM64_%-40s\t%d\n", $0, cap_num++)
next
}
END {
- printf("#define ARM64_NCAPS\t\t\t\t%d\n", cap_num)
+ printf("#define ARM64_NCAPS\t\t\t\t\t%d\n", cap_num)
print ""
print "#endif /* __ASM_CPUCAPS_H */"
}
diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg
index c9a0d1fa3209..1ea4a3dc68f8 100644
--- a/arch/arm64/tools/sysreg
+++ b/arch/arm64/tools/sysreg
@@ -48,6 +48,61 @@
# feature that introduces them (eg, FEAT_LS64_ACCDATA introduces enumeration
# item ACCDATA) though it may be more taseful to do something else.
+Sysreg OSDTRRX_EL1 2 0 0 0 2
+Res0 63:32
+Field 31:0 DTRRX
+EndSysreg
+
+Sysreg MDCCINT_EL1 2 0 0 2 0
+Res0 63:31
+Field 30 RX
+Field 29 TX
+Res0 28:0
+EndSysreg
+
+Sysreg MDSCR_EL1 2 0 0 2 2
+Res0 63:36
+Field 35 EHBWE
+Field 34 EnSPM
+Field 33 TTA
+Field 32 EMBWE
+Field 31 TFO
+Field 30 RXfull
+Field 29 TXfull
+Res0 28
+Field 27 RXO
+Field 26 TXU
+Res0 25:24
+Field 23:22 INTdis
+Field 21 TDA
+Res0 20
+Field 19 SC2
+Res0 18:16
+Field 15 MDE
+Field 14 HDE
+Field 13 KDE
+Field 12 TDCC
+Res0 11:7
+Field 6 ERR
+Res0 5:1
+Field 0 SS
+EndSysreg
+
+Sysreg OSDTRTX_EL1 2 0 0 3 2
+Res0 63:32
+Field 31:0 DTRTX
+EndSysreg
+
+Sysreg OSECCR_EL1 2 0 0 6 2
+Res0 63:32
+Field 31:0 EDECCR
+EndSysreg
+
+Sysreg OSLAR_EL1 2 0 1 0 4
+Res0 63:1
+Field 0 OSLK
+EndSysreg
+
Sysreg ID_PFR0_EL1 3 0 0 1 0
Res0 63:32
UnsignedEnum 31:28 RAS
@@ -1538,6 +1593,78 @@ UnsignedEnum 3:0 CnP
EndEnum
EndSysreg
+Sysreg ID_AA64MMFR3_EL1 3 0 0 7 3
+UnsignedEnum 63:60 Spec_FPACC
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+UnsignedEnum 59:56 ADERR
+ 0b0000 NI
+ 0b0001 DEV_ASYNC
+ 0b0010 FEAT_ADERR
+ 0b0011 FEAT_ADERR_IND
+EndEnum
+UnsignedEnum 55:52 SDERR
+ 0b0000 NI
+ 0b0001 DEV_SYNC
+ 0b0010 FEAT_ADERR
+ 0b0011 FEAT_ADERR_IND
+EndEnum
+Res0 51:48
+UnsignedEnum 47:44 ANERR
+ 0b0000 NI
+ 0b0001 ASYNC
+ 0b0010 FEAT_ANERR
+ 0b0011 FEAT_ANERR_IND
+EndEnum
+UnsignedEnum 43:40 SNERR
+ 0b0000 NI
+ 0b0001 SYNC
+ 0b0010 FEAT_ANERR
+ 0b0011 FEAT_ANERR_IND
+EndEnum
+UnsignedEnum 39:36 D128_2
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+UnsignedEnum 35:32 D128
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+UnsignedEnum 31:28 MEC
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+UnsignedEnum 27:24 AIE
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+UnsignedEnum 23:20 S2POE
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+UnsignedEnum 19:16 S1POE
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+UnsignedEnum 15:12 S2PIE
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+UnsignedEnum 11:8 S1PIE
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+UnsignedEnum 7:4 SCTLRX
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+UnsignedEnum 3:0 TCRX
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+EndSysreg
+
Sysreg SCTLR_EL1 3 0 1 0 0
Field 63 TIDCP
Field 62 SPINTMASK
@@ -2034,7 +2161,17 @@ Fields ZCR_ELx
EndSysreg
Sysreg HCRX_EL2 3 4 1 2 2
-Res0 63:12
+Res0 63:23
+Field 22 GCSEn
+Field 21 EnIDCP128
+Field 20 EnSDERR
+Field 19 TMEA
+Field 18 EnSNERR
+Field 17 D128En
+Field 16 PTTWI
+Field 15 SCTLR2En
+Field 14 TCR2En
+Res0 13:12
Field 11 MSCEn
Field 10 MCE2
Field 9 CMOW
@@ -2153,6 +2290,87 @@ Sysreg TTBR1_EL1 3 0 2 0 1
Fields TTBRx_EL1
EndSysreg
+SysregFields TCR2_EL1x
+Res0 63:16
+Field 15 DisCH1
+Field 14 DisCH0
+Res0 13:12
+Field 11 HAFT
+Field 10 PTTWI
+Res0 9:6
+Field 5 D128
+Field 4 AIE
+Field 3 POE
+Field 2 E0POE
+Field 1 PIE
+Field 0 PnCH
+EndSysregFields
+
+Sysreg TCR2_EL1 3 0 2 0 3
+Fields TCR2_EL1x
+EndSysreg
+
+Sysreg TCR2_EL12 3 5 2 0 3
+Fields TCR2_EL1x
+EndSysreg
+
+Sysreg TCR2_EL2 3 4 2 0 3
+Res0 63:16
+Field 15 DisCH1
+Field 14 DisCH0
+Field 13 AMEC1
+Field 12 AMEC0
+Field 11 HAFT
+Field 10 PTTWI
+Field 9:8 SKL1
+Field 7:6 SKL0
+Field 5 D128
+Field 4 AIE
+Field 3 POE
+Field 2 E0POE
+Field 1 PIE
+Field 0 PnCH
+EndSysreg
+
+SysregFields PIRx_ELx
+Field 63:60 Perm15
+Field 59:56 Perm14
+Field 55:52 Perm13
+Field 51:48 Perm12
+Field 47:44 Perm11
+Field 43:40 Perm10
+Field 39:36 Perm9
+Field 35:32 Perm8
+Field 31:28 Perm7
+Field 27:24 Perm6
+Field 23:20 Perm5
+Field 19:16 Perm4
+Field 15:12 Perm3
+Field 11:8 Perm2
+Field 7:4 Perm1
+Field 3:0 Perm0
+EndSysregFields
+
+Sysreg PIRE0_EL1 3 0 10 2 2
+Fields PIRx_ELx
+EndSysreg
+
+Sysreg PIRE0_EL12 3 5 10 2 2
+Fields PIRx_ELx
+EndSysreg
+
+Sysreg PIR_EL1 3 0 10 2 3
+Fields PIRx_ELx
+EndSysreg
+
+Sysreg PIR_EL12 3 5 10 2 3
+Fields PIRx_ELx
+EndSysreg
+
+Sysreg PIR_EL2 3 4 10 2 3
+Fields PIRx_ELx
+EndSysreg
+
Sysreg LORSA_EL1 3 0 10 4 0
Res0 63:52
Field 51:16 SA
@@ -2200,3 +2418,80 @@ Sysreg ICC_NMIAR1_EL1 3 0 12 9 5
Res0 63:24
Field 23:0 INTID
EndSysreg
+
+Sysreg TRBLIMITR_EL1 3 0 9 11 0
+Field 63:12 LIMIT
+Res0 11:7
+Field 6 XE
+Field 5 nVM
+Enum 4:3 TM
+ 0b00 STOP
+ 0b01 IRQ
+ 0b11 IGNR
+EndEnum
+Enum 2:1 FM
+ 0b00 FILL
+ 0b01 WRAP
+ 0b11 CBUF
+EndEnum
+Field 0 E
+EndSysreg
+
+Sysreg TRBPTR_EL1 3 0 9 11 1
+Field 63:0 PTR
+EndSysreg
+
+Sysreg TRBBASER_EL1 3 0 9 11 2
+Field 63:12 BASE
+Res0 11:0
+EndSysreg
+
+Sysreg TRBSR_EL1 3 0 9 11 3
+Res0 63:56
+Field 55:32 MSS2
+Field 31:26 EC
+Res0 25:24
+Field 23 DAT
+Field 22 IRQ
+Field 21 TRG
+Field 20 WRAP
+Res0 19
+Field 18 EA
+Field 17 S
+Res0 16
+Field 15:0 MSS
+EndSysreg
+
+Sysreg TRBMAR_EL1 3 0 9 11 4
+Res0 63:12
+Enum 11:10 PAS
+ 0b00 SECURE
+ 0b01 NON_SECURE
+ 0b10 ROOT
+ 0b11 REALM
+EndEnum
+Enum 9:8 SH
+ 0b00 NON_SHAREABLE
+ 0b10 OUTER_SHAREABLE
+ 0b11 INNER_SHAREABLE
+EndEnum
+Field 7:0 Attr
+EndSysreg
+
+Sysreg TRBTRG_EL1 3 0 9 11 6
+Res0 63:32
+Field 31:0 TRG
+EndSysreg
+
+Sysreg TRBIDR_EL1 3 0 9 11 7
+Res0 63:12
+Enum 11:8 EA
+ 0b0000 NON_DESC
+ 0b0001 IGNORE
+ 0b0010 SERROR
+EndEnum
+Res0 7:6
+Field 5 F
+Field 4 P
+Field 3:0 Align
+EndSysreg
diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
index 03e9f6666157..cf2a6fd7dff8 100644
--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -96,6 +96,7 @@ config CSKY
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_STACKPROTECTOR
select HAVE_SYSCALL_TRACEPOINTS
+ select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU
select LOCK_MM_AND_FIND_VMA
select MAY_HAVE_SPARSE_IRQ
select MODULES_USE_ELF_RELA if MODULES
diff --git a/arch/csky/include/asm/atomic.h b/arch/csky/include/asm/atomic.h
index 60406ef9c2bb..4dab44f6143a 100644
--- a/arch/csky/include/asm/atomic.h
+++ b/arch/csky/include/asm/atomic.h
@@ -195,41 +195,6 @@ arch_atomic_dec_if_positive(atomic_t *v)
}
#define arch_atomic_dec_if_positive arch_atomic_dec_if_positive
-#define ATOMIC_OP() \
-static __always_inline \
-int arch_atomic_xchg_relaxed(atomic_t *v, int n) \
-{ \
- return __xchg_relaxed(n, &(v->counter), 4); \
-} \
-static __always_inline \
-int arch_atomic_cmpxchg_relaxed(atomic_t *v, int o, int n) \
-{ \
- return __cmpxchg_relaxed(&(v->counter), o, n, 4); \
-} \
-static __always_inline \
-int arch_atomic_cmpxchg_acquire(atomic_t *v, int o, int n) \
-{ \
- return __cmpxchg_acquire(&(v->counter), o, n, 4); \
-} \
-static __always_inline \
-int arch_atomic_cmpxchg(atomic_t *v, int o, int n) \
-{ \
- return __cmpxchg(&(v->counter), o, n, 4); \
-}
-
-#define ATOMIC_OPS() \
- ATOMIC_OP()
-
-ATOMIC_OPS()
-
-#define arch_atomic_xchg_relaxed arch_atomic_xchg_relaxed
-#define arch_atomic_cmpxchg_relaxed arch_atomic_cmpxchg_relaxed
-#define arch_atomic_cmpxchg_acquire arch_atomic_cmpxchg_acquire
-#define arch_atomic_cmpxchg arch_atomic_cmpxchg
-
-#undef ATOMIC_OPS
-#undef ATOMIC_OP
-
#else
#include <asm-generic/atomic.h>
#endif
diff --git a/arch/csky/include/asm/smp.h b/arch/csky/include/asm/smp.h
index 668b79ce29ea..d3db334f3196 100644
--- a/arch/csky/include/asm/smp.h
+++ b/arch/csky/include/asm/smp.h
@@ -23,7 +23,7 @@ void __init set_send_ipi(void (*func)(const struct cpumask *mask), int irq);
int __cpu_disable(void);
-void __cpu_die(unsigned int cpu);
+static inline void __cpu_die(unsigned int cpu) { }
#endif /* CONFIG_SMP */
diff --git a/arch/csky/kernel/smp.c b/arch/csky/kernel/smp.c
index b12e2c3c387f..8e42352cbf12 100644
--- a/arch/csky/kernel/smp.c
+++ b/arch/csky/kernel/smp.c
@@ -291,12 +291,8 @@ int __cpu_disable(void)
return 0;
}
-void __cpu_die(unsigned int cpu)
+void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu)
{
- if (!cpu_wait_death(cpu, 5)) {
- pr_crit("CPU%u: shutdown failed\n", cpu);
- return;
- }
pr_notice("CPU%u: shutdown\n", cpu);
}
@@ -304,7 +300,7 @@ void __noreturn arch_cpu_idle_dead(void)
{
idle_task_exit();
- cpu_report_death();
+ cpuhp_ap_report_dead();
while (!secondary_stack)
arch_cpu_idle();
diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h
index 6e94f8d04146..2447d083c432 100644
--- a/arch/hexagon/include/asm/atomic.h
+++ b/arch/hexagon/include/asm/atomic.h
@@ -28,58 +28,8 @@ static inline void arch_atomic_set(atomic_t *v, int new)
#define arch_atomic_set_release(v, i) arch_atomic_set((v), (i))
-/**
- * arch_atomic_read - reads a word, atomically
- * @v: pointer to atomic value
- *
- * Assumes all word reads on our architecture are atomic.
- */
#define arch_atomic_read(v) READ_ONCE((v)->counter)
-/**
- * arch_atomic_xchg - atomic
- * @v: pointer to memory to change
- * @new: new value (technically passed in a register -- see xchg)
- */
-#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), (new)))
-
-
-/**
- * arch_atomic_cmpxchg - atomic compare-and-exchange values
- * @v: pointer to value to change
- * @old: desired old value to match
- * @new: new value to put in
- *
- * Parameters are then pointer, value-in-register, value-in-register,
- * and the output is the old value.
- *
- * Apparently this is complicated for archs that don't support
- * the memw_locked like we do (or it's broken or whatever).
- *
- * Kind of the lynchpin of the rest of the generically defined routines.
- * Remember V2 had that bug with dotnew predicate set by memw_locked.
- *
- * "old" is "expected" old val, __oldval is actual old value
- */
-static inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new)
-{
- int __oldval;
-
- asm volatile(
- "1: %0 = memw_locked(%1);\n"
- " { P0 = cmp.eq(%0,%2);\n"
- " if (!P0.new) jump:nt 2f; }\n"
- " memw_locked(%1,P0) = %3;\n"
- " if (!P0) jump 1b;\n"
- "2:\n"
- : "=&r" (__oldval)
- : "r" (&v->counter), "r" (old), "r" (new)
- : "memory", "p0"
- );
-
- return __oldval;
-}
-
#define ATOMIC_OP(op) \
static inline void arch_atomic_##op(int i, atomic_t *v) \
{ \
@@ -135,6 +85,11 @@ static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \
ATOMIC_OPS(add)
ATOMIC_OPS(sub)
+#define arch_atomic_add_return arch_atomic_add_return
+#define arch_atomic_sub_return arch_atomic_sub_return
+#define arch_atomic_fetch_add arch_atomic_fetch_add
+#define arch_atomic_fetch_sub arch_atomic_fetch_sub
+
#undef ATOMIC_OPS
#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
@@ -142,21 +97,15 @@ ATOMIC_OPS(and)
ATOMIC_OPS(or)
ATOMIC_OPS(xor)
+#define arch_atomic_fetch_and arch_atomic_fetch_and
+#define arch_atomic_fetch_or arch_atomic_fetch_or
+#define arch_atomic_fetch_xor arch_atomic_fetch_xor
+
#undef ATOMIC_OPS
#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
-/**
- * arch_atomic_fetch_add_unless - add unless the number is a given value
- * @v: pointer to value
- * @a: amount to add
- * @u: unless value is equal to u
- *
- * Returns old value.
- *
- */
-
static inline int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u)
{
int __oldval;
diff --git a/arch/hexagon/kernel/setup.c b/arch/hexagon/kernel/setup.c
index 1880d9beaf2b..621674e86232 100644
--- a/arch/hexagon/kernel/setup.c
+++ b/arch/hexagon/kernel/setup.c
@@ -66,9 +66,9 @@ void __init setup_arch(char **cmdline_p)
on_simulator = 0;
if (p[0] != '\0')
- strlcpy(boot_command_line, p, COMMAND_LINE_SIZE);
+ strscpy(boot_command_line, p, COMMAND_LINE_SIZE);
else
- strlcpy(boot_command_line, default_command_line,
+ strscpy(boot_command_line, default_command_line,
COMMAND_LINE_SIZE);
/*
@@ -76,7 +76,7 @@ void __init setup_arch(char **cmdline_p)
* are both picked up by the init code. If no reason to
* make them different, pass the same pointer back.
*/
- strlcpy(cmd_line, boot_command_line, COMMAND_LINE_SIZE);
+ strscpy(cmd_line, boot_command_line, COMMAND_LINE_SIZE);
*cmdline_p = cmd_line;
parse_early_param();
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 21fa63ce5ffc..2cd93e6bf0fe 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -9,6 +9,7 @@ menu "Processor type and features"
config IA64
bool
select ARCH_BINFMT_ELF_EXTRA_PHDRS
+ select ARCH_HAS_CPU_FINALIZE_INIT
select ARCH_HAS_DMA_MARK_CLEAN
select ARCH_HAS_STRNCPY_FROM_USER
select ARCH_HAS_STRNLEN_USER
diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
index 266c429b9137..6540a628d257 100644
--- a/arch/ia64/include/asm/atomic.h
+++ b/arch/ia64/include/asm/atomic.h
@@ -207,13 +207,6 @@ ATOMIC64_FETCH_OP(xor, ^)
#undef ATOMIC64_FETCH_OP
#undef ATOMIC64_OP
-#define arch_atomic_cmpxchg(v, old, new) (arch_cmpxchg(&((v)->counter), old, new))
-#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new))
-
-#define arch_atomic64_cmpxchg(v, old, new) \
- (arch_cmpxchg(&((v)->counter), old, new))
-#define arch_atomic64_xchg(v, new) (arch_xchg(&((v)->counter), new))
-
#define arch_atomic_add(i,v) (void)arch_atomic_add_return((i), (v))
#define arch_atomic_sub(i,v) (void)arch_atomic_sub_return((i), (v))
diff --git a/arch/ia64/include/asm/bugs.h b/arch/ia64/include/asm/bugs.h
deleted file mode 100644
index 0d6b9bded56c..000000000000
--- a/arch/ia64/include/asm/bugs.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * This is included by init/main.c to check for architecture-dependent bugs.
- *
- * Needs:
- * void check_bugs(void);
- *
- * Based on <asm-alpha/bugs.h>.
- *
- * Modified 1998, 1999, 2003
- * David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co.
- */
-#ifndef _ASM_IA64_BUGS_H
-#define _ASM_IA64_BUGS_H
-
-#include <asm/processor.h>
-
-extern void check_bugs (void);
-
-#endif /* _ASM_IA64_BUGS_H */
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index c05728044272..5a55ac82c13a 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -627,7 +627,7 @@ setup_arch (char **cmdline_p)
* is physical disk 1 partition 1 and the Linux root disk is
* physical disk 1 partition 2.
*/
- ROOT_DEV = Root_SDA2; /* default to second partition on first drive */
+ ROOT_DEV = MKDEV(SCSI_DISK0_MAJOR, 2);
if (is_uv_system())
uv_setup(cmdline_p);
@@ -1067,8 +1067,7 @@ cpu_init (void)
}
}
-void __init
-check_bugs (void)
+void __init arch_cpu_finalize_init(void)
{
ia64_patch_mckinley_e9((unsigned long) __start___mckinley_e9_bundles,
(unsigned long) __end___mckinley_e9_bundles);
diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl
index 72c929d9902b..f8c74ffeeefb 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -371,3 +371,4 @@
448 common process_mrelease sys_process_mrelease
449 common futex_waitv sys_futex_waitv
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
+451 common cachestat sys_cachestat
diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c
index 78a02e026164..adc49f2d22e8 100644
--- a/arch/ia64/mm/hugetlbpage.c
+++ b/arch/ia64/mm/hugetlbpage.c
@@ -41,7 +41,7 @@ huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
if (pud) {
pmd = pmd_alloc(mm, pud, taddr);
if (pmd)
- pte = pte_alloc_map(mm, pmd, taddr);
+ pte = pte_alloc_huge(mm, pmd, taddr);
}
return pte;
}
@@ -64,7 +64,7 @@ huge_pte_offset (struct mm_struct *mm, unsigned long addr, unsigned long sz)
if (pud_present(*pud)) {
pmd = pmd_offset(pud, taddr);
if (pmd_present(*pmd))
- pte = pte_offset_map(pmd, taddr);
+ pte = pte_offset_huge(pmd, taddr);
}
}
}
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 73519e13bbb3..32b9da4622ce 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -10,6 +10,7 @@ config LOONGARCH
select ARCH_ENABLE_MEMORY_HOTPLUG
select ARCH_ENABLE_MEMORY_HOTREMOVE
select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
+ select ARCH_HAS_CPU_FINALIZE_INIT
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
select ARCH_HAS_PTE_SPECIAL
diff --git a/arch/loongarch/include/asm/atomic.h b/arch/loongarch/include/asm/atomic.h
index 6b9aca9ab6e9..e27f0c72d324 100644
--- a/arch/loongarch/include/asm/atomic.h
+++ b/arch/loongarch/include/asm/atomic.h
@@ -29,21 +29,7 @@
#define ATOMIC_INIT(i) { (i) }
-/*
- * arch_atomic_read - read atomic variable
- * @v: pointer of type atomic_t
- *
- * Atomically reads the value of @v.
- */
#define arch_atomic_read(v) READ_ONCE((v)->counter)
-
-/*
- * arch_atomic_set - set atomic variable
- * @v: pointer of type atomic_t
- * @i: required value
- *
- * Atomically sets the value of @v to @i.
- */
#define arch_atomic_set(v, i) WRITE_ONCE((v)->counter, (i))
#define ATOMIC_OP(op, I, asm_op) \
@@ -139,14 +125,6 @@ static inline int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u)
}
#define arch_atomic_fetch_add_unless arch_atomic_fetch_add_unless
-/*
- * arch_atomic_sub_if_positive - conditionally subtract integer from atomic variable
- * @i: integer value to subtract
- * @v: pointer of type atomic_t
- *
- * Atomically test @v and subtract @i if @v is greater or equal than @i.
- * The function returns the old value of @v minus @i.
- */
static inline int arch_atomic_sub_if_positive(int i, atomic_t *v)
{
int result;
@@ -181,31 +159,13 @@ static inline int arch_atomic_sub_if_positive(int i, atomic_t *v)
return result;
}
-#define arch_atomic_cmpxchg(v, o, n) (arch_cmpxchg(&((v)->counter), (o), (n)))
-#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), (new)))
-
-/*
- * arch_atomic_dec_if_positive - decrement by 1 if old value positive
- * @v: pointer of type atomic_t
- */
#define arch_atomic_dec_if_positive(v) arch_atomic_sub_if_positive(1, v)
#ifdef CONFIG_64BIT
#define ATOMIC64_INIT(i) { (i) }
-/*
- * arch_atomic64_read - read atomic variable
- * @v: pointer of type atomic64_t
- *
- */
#define arch_atomic64_read(v) READ_ONCE((v)->counter)
-
-/*
- * arch_atomic64_set - set atomic variable
- * @v: pointer of type atomic64_t
- * @i: required value
- */
#define arch_atomic64_set(v, i) WRITE_ONCE((v)->counter, (i))
#define ATOMIC64_OP(op, I, asm_op) \
@@ -300,14 +260,6 @@ static inline long arch_atomic64_fetch_add_unless(atomic64_t *v, long a, long u)
}
#define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless
-/*
- * arch_atomic64_sub_if_positive - conditionally subtract integer from atomic variable
- * @i: integer value to subtract
- * @v: pointer of type atomic64_t
- *
- * Atomically test @v and subtract @i if @v is greater or equal than @i.
- * The function returns the old value of @v minus @i.
- */
static inline long arch_atomic64_sub_if_positive(long i, atomic64_t *v)
{
long result;
@@ -342,14 +294,6 @@ static inline long arch_atomic64_sub_if_positive(long i, atomic64_t *v)
return result;
}
-#define arch_atomic64_cmpxchg(v, o, n) \
- ((__typeof__((v)->counter))arch_cmpxchg(&((v)->counter), (o), (n)))
-#define arch_atomic64_xchg(v, new) (arch_xchg(&((v)->counter), (new)))
-
-/*
- * arch_atomic64_dec_if_positive - decrement by 1 if old value positive
- * @v: pointer of type atomic64_t
- */
#define arch_atomic64_dec_if_positive(v) arch_atomic64_sub_if_positive(1, v)
#endif /* CONFIG_64BIT */
diff --git a/arch/loongarch/include/asm/bugs.h b/arch/loongarch/include/asm/bugs.h
deleted file mode 100644
index 98396535163b..000000000000
--- a/arch/loongarch/include/asm/bugs.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * This is included by init/main.c to check for architecture-dependent bugs.
- *
- * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
- */
-#ifndef _ASM_BUGS_H
-#define _ASM_BUGS_H
-
-#include <asm/cpu.h>
-#include <asm/cpu-info.h>
-
-extern void check_bugs(void);
-
-#endif /* _ASM_BUGS_H */
diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h
index 35e8a52fea11..1c2a0a2c8830 100644
--- a/arch/loongarch/include/asm/loongarch.h
+++ b/arch/loongarch/include/asm/loongarch.h
@@ -1167,7 +1167,7 @@ static __always_inline void iocsr_write64(u64 val, u32 reg)
#ifndef __ASSEMBLY__
-static inline u64 drdtime(void)
+static __always_inline u64 drdtime(void)
{
int rID = 0;
u64 val = 0;
diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
index 4444b13418f0..78a00359bde3 100644
--- a/arch/loongarch/kernel/setup.c
+++ b/arch/loongarch/kernel/setup.c
@@ -12,6 +12,7 @@
*/
#include <linux/init.h>
#include <linux/acpi.h>
+#include <linux/cpu.h>
#include <linux/dmi.h>
#include <linux/efi.h>
#include <linux/export.h>
@@ -37,7 +38,6 @@
#include <asm/addrspace.h>
#include <asm/alternative.h>
#include <asm/bootinfo.h>
-#include <asm/bugs.h>
#include <asm/cache.h>
#include <asm/cpu.h>
#include <asm/dma.h>
@@ -87,7 +87,7 @@ const char *get_system_type(void)
return "generic-loongson-machine";
}
-void __init check_bugs(void)
+void __init arch_cpu_finalize_init(void)
{
alternative_instructions();
}
diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c
index f377e50f3c66..c189e03cd5da 100644
--- a/arch/loongarch/kernel/time.c
+++ b/arch/loongarch/kernel/time.c
@@ -190,9 +190,9 @@ static u64 read_const_counter(struct clocksource *clk)
return drdtime();
}
-static u64 native_sched_clock(void)
+static noinstr u64 sched_clock_read(void)
{
- return read_const_counter(NULL);
+ return drdtime();
}
static struct clocksource clocksource_const = {
@@ -211,7 +211,7 @@ int __init constant_clocksource_init(void)
res = clocksource_register_hz(&clocksource_const, freq);
- sched_clock_register(native_sched_clock, 64, freq);
+ sched_clock_register(sched_clock_read, 64, freq);
pr_info("Constant clock source device register\n");
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 40198a1ebe27..dc792b321f1e 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -4,6 +4,7 @@ config M68K
default y
select ARCH_32BIT_OFF_T
select ARCH_HAS_BINFMT_FLAT
+ select ARCH_HAS_CPU_FINALIZE_INIT if MMU
select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_DMA_PREP_COHERENT if HAS_DMA && MMU && !COLDFIRE
select ARCH_HAS_SYNC_DMA_FOR_DEVICE if HAS_DMA
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index b26469a65bc1..62fdca7efce4 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -43,6 +43,7 @@ CONFIG_IOSCHED_BFQ=m
CONFIG_BINFMT_MISC=m
CONFIG_SLAB=y
# CONFIG_COMPACTION is not set
+CONFIG_DMAPOOL_TEST=m
CONFIG_USERFAULTFD=y
CONFIG_NET=y
CONFIG_PACKET=y
@@ -454,7 +455,6 @@ CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
-# CONFIG_PRINT_QUOTA_WARNING is not set
CONFIG_AUTOFS_FS=m
CONFIG_FUSE_FS=m
CONFIG_CUSE=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index 944a49a129be..5bfbd0444bb5 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -39,6 +39,7 @@ CONFIG_IOSCHED_BFQ=m
CONFIG_BINFMT_MISC=m
CONFIG_SLAB=y
# CONFIG_COMPACTION is not set
+CONFIG_DMAPOOL_TEST=m
CONFIG_USERFAULTFD=y
CONFIG_NET=y
CONFIG_PACKET=y
@@ -411,7 +412,6 @@ CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
-# CONFIG_PRINT_QUOTA_WARNING is not set
CONFIG_AUTOFS_FS=m
CONFIG_FUSE_FS=m
CONFIG_CUSE=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index a32dd884fcce..44302f11c9ea 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -46,6 +46,7 @@ CONFIG_IOSCHED_BFQ=m
CONFIG_BINFMT_MISC=m
CONFIG_SLAB=y
# CONFIG_COMPACTION is not set
+CONFIG_DMAPOOL_TEST=m
CONFIG_USERFAULTFD=y
CONFIG_NET=y
CONFIG_PACKET=y
@@ -431,7 +432,6 @@ CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
-# CONFIG_PRINT_QUOTA_WARNING is not set
CONFIG_AUTOFS_FS=m
CONFIG_FUSE_FS=m
CONFIG_CUSE=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index 23b7805309bd..f3336f1774ec 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -36,6 +36,7 @@ CONFIG_IOSCHED_BFQ=m
CONFIG_BINFMT_MISC=m
CONFIG_SLAB=y
# CONFIG_COMPACTION is not set
+CONFIG_DMAPOOL_TEST=m
CONFIG_USERFAULTFD=y
CONFIG_NET=y
CONFIG_PACKET=y
@@ -403,7 +404,6 @@ CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
-# CONFIG_PRINT_QUOTA_WARNING is not set
CONFIG_AUTOFS_FS=m
CONFIG_FUSE_FS=m
CONFIG_CUSE=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index 5605ab5c3dcf..2d1bbac68066 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -38,6 +38,7 @@ CONFIG_IOSCHED_BFQ=m
CONFIG_BINFMT_MISC=m
CONFIG_SLAB=y
# CONFIG_COMPACTION is not set
+CONFIG_DMAPOOL_TEST=m
CONFIG_USERFAULTFD=y
CONFIG_NET=y
CONFIG_PACKET=y
@@ -413,7 +414,6 @@ CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
-# CONFIG_PRINT_QUOTA_WARNING is not set
CONFIG_AUTOFS_FS=m
CONFIG_FUSE_FS=m
CONFIG_CUSE=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index d0d1f9c33756..b4428dc36102 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -37,6 +37,7 @@ CONFIG_IOSCHED_BFQ=m
CONFIG_BINFMT_MISC=m
CONFIG_SLAB=y
# CONFIG_COMPACTION is not set
+CONFIG_DMAPOOL_TEST=m
CONFIG_USERFAULTFD=y
CONFIG_NET=y
CONFIG_PACKET=y
@@ -433,7 +434,6 @@ CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
-# CONFIG_PRINT_QUOTA_WARNING is not set
CONFIG_AUTOFS_FS=m
CONFIG_FUSE_FS=m
CONFIG_CUSE=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index 6d04314ce7ea..4cd9fa4cb10c 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -57,6 +57,7 @@ CONFIG_IOSCHED_BFQ=m
CONFIG_BINFMT_MISC=m
CONFIG_SLAB=y
# CONFIG_COMPACTION is not set
+CONFIG_DMAPOOL_TEST=m
CONFIG_USERFAULTFD=y
CONFIG_NET=y
CONFIG_PACKET=y
@@ -519,7 +520,6 @@ CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
-# CONFIG_PRINT_QUOTA_WARNING is not set
CONFIG_AUTOFS_FS=m
CONFIG_FUSE_FS=m
CONFIG_CUSE=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index e6f5ae526d08..7ee9ad50f0ad 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -35,6 +35,7 @@ CONFIG_IOSCHED_BFQ=m
CONFIG_BINFMT_MISC=m
CONFIG_SLAB=y
# CONFIG_COMPACTION is not set
+CONFIG_DMAPOOL_TEST=m
CONFIG_USERFAULTFD=y
CONFIG_NET=y
CONFIG_PACKET=y
@@ -402,7 +403,6 @@ CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
-# CONFIG_PRINT_QUOTA_WARNING is not set
CONFIG_AUTOFS_FS=m
CONFIG_FUSE_FS=m
CONFIG_CUSE=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index f2d4dff4787a..2488893616dc 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -36,6 +36,7 @@ CONFIG_IOSCHED_BFQ=m
CONFIG_BINFMT_MISC=m
CONFIG_SLAB=y
# CONFIG_COMPACTION is not set
+CONFIG_DMAPOOL_TEST=m
CONFIG_USERFAULTFD=y
CONFIG_NET=y
CONFIG_PACKET=y
@@ -403,7 +404,6 @@ CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
-# CONFIG_PRINT_QUOTA_WARNING is not set
CONFIG_AUTOFS_FS=m
CONFIG_FUSE_FS=m
CONFIG_CUSE=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index 907eedecd040..ffc676289f87 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -37,6 +37,7 @@ CONFIG_IOSCHED_BFQ=m
CONFIG_BINFMT_MISC=m
CONFIG_SLAB=y
# CONFIG_COMPACTION is not set
+CONFIG_DMAPOOL_TEST=m
CONFIG_USERFAULTFD=y
CONFIG_NET=y
CONFIG_PACKET=y
@@ -420,7 +421,6 @@ CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
-# CONFIG_PRINT_QUOTA_WARNING is not set
CONFIG_AUTOFS_FS=m
CONFIG_FUSE_FS=m
CONFIG_CUSE=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index 9e3d47008f21..198179657ce0 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -402,7 +402,6 @@ CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
-# CONFIG_PRINT_QUOTA_WARNING is not set
CONFIG_AUTOFS_FS=m
CONFIG_FUSE_FS=m
CONFIG_CUSE=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index f6540078cb4b..85364f6178d4 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -33,6 +33,7 @@ CONFIG_IOSCHED_BFQ=m
CONFIG_BINFMT_MISC=m
CONFIG_SLAB=y
# CONFIG_COMPACTION is not set
+CONFIG_DMAPOOL_TEST=m
CONFIG_USERFAULTFD=y
CONFIG_NET=y
CONFIG_PACKET=y
@@ -401,7 +402,6 @@ CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
-# CONFIG_PRINT_QUOTA_WARNING is not set
CONFIG_AUTOFS_FS=m
CONFIG_FUSE_FS=m
CONFIG_CUSE=m
diff --git a/arch/m68k/configs/virt_defconfig b/arch/m68k/configs/virt_defconfig
index 8059bd618370..311b57e73316 100644
--- a/arch/m68k/configs/virt_defconfig
+++ b/arch/m68k/configs/virt_defconfig
@@ -24,8 +24,6 @@ CONFIG_SUN_PARTITION=y
CONFIG_SYSV68_PARTITION=y
CONFIG_NET=y
CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
diff --git a/arch/m68k/include/asm/atomic.h b/arch/m68k/include/asm/atomic.h
index cfba83d230fd..4bfbc25f6ecf 100644
--- a/arch/m68k/include/asm/atomic.h
+++ b/arch/m68k/include/asm/atomic.h
@@ -106,6 +106,11 @@ static inline int arch_atomic_fetch_##op(int i, atomic_t * v) \
ATOMIC_OPS(add, +=, add)
ATOMIC_OPS(sub, -=, sub)
+#define arch_atomic_add_return arch_atomic_add_return
+#define arch_atomic_sub_return arch_atomic_sub_return
+#define arch_atomic_fetch_add arch_atomic_fetch_add
+#define arch_atomic_fetch_sub arch_atomic_fetch_sub
+
#undef ATOMIC_OPS
#define ATOMIC_OPS(op, c_op, asm_op) \
ATOMIC_OP(op, c_op, asm_op) \
@@ -115,6 +120,10 @@ ATOMIC_OPS(and, &=, and)
ATOMIC_OPS(or, |=, or)
ATOMIC_OPS(xor, ^=, eor)
+#define arch_atomic_fetch_and arch_atomic_fetch_and
+#define arch_atomic_fetch_or arch_atomic_fetch_or
+#define arch_atomic_fetch_xor arch_atomic_fetch_xor
+
#undef ATOMIC_OPS
#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
@@ -158,12 +167,7 @@ static inline int arch_atomic_inc_and_test(atomic_t *v)
}
#define arch_atomic_inc_and_test arch_atomic_inc_and_test
-#ifdef CONFIG_RMW_INSNS
-
-#define arch_atomic_cmpxchg(v, o, n) ((int)arch_cmpxchg(&((v)->counter), (o), (n)))
-#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new))
-
-#else /* !CONFIG_RMW_INSNS */
+#ifndef CONFIG_RMW_INSNS
static inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new)
{
@@ -177,6 +181,7 @@ static inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new)
local_irq_restore(flags);
return prev;
}
+#define arch_atomic_cmpxchg arch_atomic_cmpxchg
static inline int arch_atomic_xchg(atomic_t *v, int new)
{
@@ -189,6 +194,7 @@ static inline int arch_atomic_xchg(atomic_t *v, int new)
local_irq_restore(flags);
return prev;
}
+#define arch_atomic_xchg arch_atomic_xchg
#endif /* !CONFIG_RMW_INSNS */
diff --git a/arch/m68k/include/asm/bugs.h b/arch/m68k/include/asm/bugs.h
deleted file mode 100644
index 745530651e0b..000000000000
--- a/arch/m68k/include/asm/bugs.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * include/asm-m68k/bugs.h
- *
- * Copyright (C) 1994 Linus Torvalds
- */
-
-/*
- * This is included by init/main.c to check for architecture-dependent bugs.
- *
- * Needs:
- * void check_bugs(void);
- */
-
-#ifdef CONFIG_MMU
-extern void check_bugs(void); /* in arch/m68k/kernel/setup.c */
-#else
-static void check_bugs(void)
-{
-}
-#endif
diff --git a/arch/m68k/include/asm/mmu_context.h b/arch/m68k/include/asm/mmu_context.h
index 8ed6ac14d99f..141bbdfad960 100644
--- a/arch/m68k/include/asm/mmu_context.h
+++ b/arch/m68k/include/asm/mmu_context.h
@@ -99,7 +99,7 @@ static inline void load_ksp_mmu(struct task_struct *task)
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
- pte_t *pte;
+ pte_t *pte = NULL;
unsigned long mmuar;
local_irq_save(flags);
@@ -139,7 +139,7 @@ static inline void load_ksp_mmu(struct task_struct *task)
pte = (mmuar >= PAGE_OFFSET) ? pte_offset_kernel(pmd, mmuar)
: pte_offset_map(pmd, mmuar);
- if (pte_none(*pte) || !pte_present(*pte))
+ if (!pte || pte_none(*pte) || !pte_present(*pte))
goto bug;
set_pte(pte, pte_mkyoung(*pte));
@@ -161,6 +161,8 @@ static inline void load_ksp_mmu(struct task_struct *task)
bug:
pr_info("ksp load failed: mm=0x%p ksp=0x08%lx\n", mm, mmuar);
end:
+ if (pte && mmuar < PAGE_OFFSET)
+ pte_unmap(pte);
local_irq_restore(flags);
}
diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c
index fbff1cea62ca..6f1ae01f322c 100644
--- a/arch/m68k/kernel/setup_mm.c
+++ b/arch/m68k/kernel/setup_mm.c
@@ -10,6 +10,7 @@
*/
#include <linux/kernel.h>
+#include <linux/cpu.h>
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/delay.h>
@@ -504,7 +505,7 @@ static int __init proc_hardware_init(void)
module_init(proc_hardware_init);
#endif
-void check_bugs(void)
+void __init arch_cpu_finalize_init(void)
{
#if defined(CONFIG_FPU) && !defined(CONFIG_M68KFPU_EMU)
if (m68k_fputype == 0) {
diff --git a/arch/m68k/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k.c
index bd0274c7592e..c586034d2a7a 100644
--- a/arch/m68k/kernel/sys_m68k.c
+++ b/arch/m68k/kernel/sys_m68k.c
@@ -488,6 +488,8 @@ sys_atomic_cmpxchg_32(unsigned long newval, int oldval, int d3, int d4, int d5,
if (!pmd_present(*pmd))
goto bad_access;
pte = pte_offset_map_lock(mm, pmd, (unsigned long)mem, &ptl);
+ if (!pte)
+ goto bad_access;
if (!pte_present(*pte) || !pte_dirty(*pte)
|| !pte_write(*pte)) {
pte_unmap_unlock(pte, ptl);
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
index b1f3940bc298..4f504783371f 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -450,3 +450,4 @@
448 common process_mrelease sys_process_mrelease
449 common futex_waitv sys_futex_waitv
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
+451 common cachestat sys_cachestat
diff --git a/arch/m68k/mm/mcfmmu.c b/arch/m68k/mm/mcfmmu.c
index 70aa0979e027..42f45abea37a 100644
--- a/arch/m68k/mm/mcfmmu.c
+++ b/arch/m68k/mm/mcfmmu.c
@@ -91,7 +91,8 @@ int cf_tlb_miss(struct pt_regs *regs, int write, int dtlb, int extension_word)
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
- pte_t *pte;
+ pte_t *pte = NULL;
+ int ret = -1;
int asid;
local_irq_save(flags);
@@ -100,47 +101,33 @@ int cf_tlb_miss(struct pt_regs *regs, int write, int dtlb, int extension_word)
regs->pc + (extension_word * sizeof(long));
mm = (!user_mode(regs) && KMAPAREA(mmuar)) ? &init_mm : current->mm;
- if (!mm) {
- local_irq_restore(flags);
- return -1;
- }
+ if (!mm)
+ goto out;
pgd = pgd_offset(mm, mmuar);
- if (pgd_none(*pgd)) {
- local_irq_restore(flags);
- return -1;
- }
+ if (pgd_none(*pgd))
+ goto out;
p4d = p4d_offset(pgd, mmuar);
- if (p4d_none(*p4d)) {
- local_irq_restore(flags);
- return -1;
- }
+ if (p4d_none(*p4d))
+ goto out;
pud = pud_offset(p4d, mmuar);
- if (pud_none(*pud)) {
- local_irq_restore(flags);
- return -1;
- }
+ if (pud_none(*pud))
+ goto out;
pmd = pmd_offset(pud, mmuar);
- if (pmd_none(*pmd)) {
- local_irq_restore(flags);
- return -1;
- }
+ if (pmd_none(*pmd))
+ goto out;
pte = (KMAPAREA(mmuar)) ? pte_offset_kernel(pmd, mmuar)
: pte_offset_map(pmd, mmuar);
- if (pte_none(*pte) || !pte_present(*pte)) {
- local_irq_restore(flags);
- return -1;
- }
+ if (!pte || pte_none(*pte) || !pte_present(*pte))
+ goto out;
if (write) {
- if (!pte_write(*pte)) {
- local_irq_restore(flags);
- return -1;
- }
+ if (!pte_write(*pte))
+ goto out;
set_pte(pte, pte_mkdirty(*pte));
}
@@ -161,9 +148,12 @@ int cf_tlb_miss(struct pt_regs *regs, int write, int dtlb, int extension_word)
mmu_write(MMUOR, MMUOR_ACC | MMUOR_UAA);
else
mmu_write(MMUOR, MMUOR_ITLB | MMUOR_ACC | MMUOR_UAA);
-
+ ret = 0;
+out:
+ if (pte && !KMAPAREA(mmuar))
+ pte_unmap(pte);
local_irq_restore(flags);
- return 0;
+ return ret;
}
void __init cf_bootmem_alloc(void)
diff --git a/arch/microblaze/include/asm/cache.h b/arch/microblaze/include/asm/cache.h
index a149b3e711ec..1903988b9e23 100644
--- a/arch/microblaze/include/asm/cache.h
+++ b/arch/microblaze/include/asm/cache.h
@@ -18,4 +18,9 @@
#define SMP_CACHE_BYTES L1_CACHE_BYTES
+/* MS be sure that SLAB allocates aligned objects */
+#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
+
+#define ARCH_SLAB_MINALIGN L1_CACHE_BYTES
+
#endif /* _ASM_MICROBLAZE_CACHE_H */
diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h
index 7b9861bcd458..337f23eabc71 100644
--- a/arch/microblaze/include/asm/page.h
+++ b/arch/microblaze/include/asm/page.h
@@ -30,11 +30,6 @@
#ifndef __ASSEMBLY__
-/* MS be sure that SLAB allocates aligned objects */
-#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
-
-#define ARCH_SLAB_MINALIGN L1_CACHE_BYTES
-
/*
* PAGE_OFFSET -- the first address of the first page of memory. With MMU
* it is set to the kernel start address (aligned on a page boundary).
diff --git a/arch/microblaze/include/asm/setup.h b/arch/microblaze/include/asm/setup.h
index a06cc1f97aa9..3657f5e78a3d 100644
--- a/arch/microblaze/include/asm/setup.h
+++ b/arch/microblaze/include/asm/setup.h
@@ -16,8 +16,6 @@ extern char *klimit;
extern void mmu_reset(void);
-void time_init(void);
-void init_IRQ(void);
void machine_early_init(const char *cmdline, unsigned int ram,
unsigned int fdt, unsigned int msr, unsigned int tlb0,
unsigned int tlb1);
diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c
index c5c6186a7e8b..e424c796e297 100644
--- a/arch/microblaze/kernel/prom.c
+++ b/arch/microblaze/kernel/prom.c
@@ -20,7 +20,7 @@ void __init early_init_devtree(void *params)
early_init_dt_scan(params);
if (!strlen(boot_command_line))
- strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE);
+ strscpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE);
memblock_allow_resize();
diff --git a/arch/microblaze/kernel/signal.c b/arch/microblaze/kernel/signal.c
index c3aebec71c0c..c78a0ff48066 100644
--- a/arch/microblaze/kernel/signal.c
+++ b/arch/microblaze/kernel/signal.c
@@ -194,7 +194,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
preempt_disable();
ptep = pte_offset_map(pmdp, address);
- if (pte_present(*ptep)) {
+ if (ptep && pte_present(*ptep)) {
address = (unsigned long) page_address(pte_page(*ptep));
/* MS: I need add offset in page */
address += ((unsigned long)frame->tramp) & ~PAGE_MASK;
@@ -203,7 +203,8 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
invalidate_icache_range(address, address + 8);
flush_dcache_range(address, address + 8);
}
- pte_unmap(ptep);
+ if (ptep)
+ pte_unmap(ptep);
preempt_enable();
if (err)
return -EFAULT;
diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl
index 820145e47350..858d22bf275c 100644
--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@ -456,3 +456,4 @@
448 common process_mrelease sys_process_mrelease
449 common futex_waitv sys_futex_waitv
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
+451 common cachestat sys_cachestat
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 6796d839bcfd..d49d5fc40021 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -4,6 +4,7 @@ config MIPS
default y
select ARCH_32BIT_OFF_T if !64BIT
select ARCH_BINFMT_ELF_STATE if MIPS_FP_SUPPORT
+ select ARCH_HAS_CPU_FINALIZE_INIT
select ARCH_HAS_CURRENT_STACK_POINTER if !CC_IS_CLANG || CLANG_VERSION >= 140000
select ARCH_HAS_DEBUG_VIRTUAL if !64BIT
select ARCH_HAS_FORTIFY_SOURCE
@@ -2287,6 +2288,7 @@ config MIPS_CPS
select MIPS_CM
select MIPS_CPS_PM if HOTPLUG_CPU
select SMP
+ select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU
select SYNC_R4K if (CEVT_R4K || CSRC_R4K)
select SYS_SUPPORTS_HOTPLUG_CPU
select SYS_SUPPORTS_SCHED_SMT if CPU_MIPSR6
diff --git a/arch/mips/bmips/setup.c b/arch/mips/bmips/setup.c
index 549a6392a3d2..053805cb741c 100644
--- a/arch/mips/bmips/setup.c
+++ b/arch/mips/bmips/setup.c
@@ -178,7 +178,10 @@ void __init plat_mem_setup(void)
ioport_resource.start = 0;
ioport_resource.end = ~0;
- /* intended to somewhat resemble ARM; see Documentation/arm/booting.rst */
+ /*
+ * intended to somewhat resemble ARM; see
+ * Documentation/arch/arm/booting.rst
+ */
if (fw_arg0 == 0 && fw_arg1 == 0xffffffff)
dtb = phys_to_virt(fw_arg2);
else
diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c
index 4212584e6efa..33c09688210f 100644
--- a/arch/mips/cavium-octeon/smp.c
+++ b/arch/mips/cavium-octeon/smp.c
@@ -345,6 +345,7 @@ void play_dead(void)
int cpu = cpu_number_map(cvmx_get_core_num());
idle_task_exit();
+ cpuhp_ap_report_dead();
octeon_processor_boot = 0xff;
per_cpu(cpu_state, cpu) = CPU_DEAD;
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index 712fb5a6a568..ba188e77768b 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -33,17 +33,6 @@ static __always_inline void arch_##pfx##_set(pfx##_t *v, type i) \
{ \
WRITE_ONCE(v->counter, i); \
} \
- \
-static __always_inline type \
-arch_##pfx##_cmpxchg(pfx##_t *v, type o, type n) \
-{ \
- return arch_cmpxchg(&v->counter, o, n); \
-} \
- \
-static __always_inline type arch_##pfx##_xchg(pfx##_t *v, type n) \
-{ \
- return arch_xchg(&v->counter, n); \
-}
ATOMIC_OPS(atomic, int)
diff --git a/arch/mips/include/asm/bugs.h b/arch/mips/include/asm/bugs.h
index 653f78f3a685..84be74afcb9a 100644
--- a/arch/mips/include/asm/bugs.h
+++ b/arch/mips/include/asm/bugs.h
@@ -1,17 +1,11 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
- * This is included by init/main.c to check for architecture-dependent bugs.
- *
* Copyright (C) 2007 Maciej W. Rozycki
- *
- * Needs:
- * void check_bugs(void);
*/
#ifndef _ASM_BUGS_H
#define _ASM_BUGS_H
#include <linux/bug.h>
-#include <linux/delay.h>
#include <linux/smp.h>
#include <asm/cpu.h>
@@ -24,17 +18,6 @@ extern void check_bugs64_early(void);
extern void check_bugs32(void);
extern void check_bugs64(void);
-static inline void __init check_bugs(void)
-{
- unsigned int cpu = smp_processor_id();
-
- cpu_data[cpu].udelay_val = loops_per_jiffy;
- check_bugs32();
-
- if (IS_ENABLED(CONFIG_CPU_R4X00_BUGS64))
- check_bugs64();
-}
-
static inline int r4k_daddiu_bug(void)
{
if (!IS_ENABLED(CONFIG_CPU_R4X00_BUGS64))
diff --git a/arch/mips/include/asm/fw/cfe/cfe_api.h b/arch/mips/include/asm/fw/cfe/cfe_api.h
index 25df2f4deb31..b52a6a9c26f1 100644
--- a/arch/mips/include/asm/fw/cfe/cfe_api.h
+++ b/arch/mips/include/asm/fw/cfe/cfe_api.h
@@ -17,9 +17,6 @@
#include <linux/types.h>
#include <linux/string.h>
-typedef long intptr_t;
-
-
/*
* Constants
*/
diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h
index 44f9824c1d8c..75abfa834ab7 100644
--- a/arch/mips/include/asm/irq.h
+++ b/arch/mips/include/asm/irq.h
@@ -19,7 +19,6 @@
#define IRQ_STACK_SIZE THREAD_SIZE
#define IRQ_STACK_START (IRQ_STACK_SIZE - 16)
-extern void __init init_IRQ(void);
extern void *irq_stack[NR_CPUS];
/*
diff --git a/arch/mips/include/asm/mach-loongson32/loongson1.h b/arch/mips/include/asm/mach-loongson32/loongson1.h
index eb3ddbec1752..d8f9dec0ecc3 100644
--- a/arch/mips/include/asm/mach-loongson32/loongson1.h
+++ b/arch/mips/include/asm/mach-loongson32/loongson1.h
@@ -47,7 +47,6 @@
#include <regs-clk.h>
#include <regs-mux.h>
-#include <regs-pwm.h>
#include <regs-rtc.h>
#include <regs-wdt.h>
diff --git a/arch/mips/include/asm/mach-loongson32/regs-pwm.h b/arch/mips/include/asm/mach-loongson32/regs-pwm.h
deleted file mode 100644
index ec870c82d492..000000000000
--- a/arch/mips/include/asm/mach-loongson32/regs-pwm.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (c) 2014 Zhang, Keguang <keguang.zhang@gmail.com>
- *
- * Loongson 1 PWM Register Definitions.
- */
-
-#ifndef __ASM_MACH_LOONGSON32_REGS_PWM_H
-#define __ASM_MACH_LOONGSON32_REGS_PWM_H
-
-/* Loongson 1 PWM Timer Register Definitions */
-#define PWM_CNT 0x0
-#define PWM_HRC 0x4
-#define PWM_LRC 0x8
-#define PWM_CTRL 0xc
-
-/* PWM Control Register Bits */
-#define CNT_RST BIT(7)
-#define INT_SR BIT(6)
-#define INT_EN BIT(5)
-#define PWM_SINGLE BIT(4)
-#define PWM_OE BIT(3)
-#define CNT_EN BIT(0)
-
-#endif /* __ASM_MACH_LOONGSON32_REGS_PWM_H */
diff --git a/arch/mips/include/asm/smp-ops.h b/arch/mips/include/asm/smp-ops.h
index 0145bbfb5efb..5719ff49eff1 100644
--- a/arch/mips/include/asm/smp-ops.h
+++ b/arch/mips/include/asm/smp-ops.h
@@ -33,6 +33,7 @@ struct plat_smp_ops {
#ifdef CONFIG_HOTPLUG_CPU
int (*cpu_disable)(void);
void (*cpu_die)(unsigned int cpu);
+ void (*cleanup_dead_cpu)(unsigned cpu);
#endif
#ifdef CONFIG_KEXEC
void (*kexec_nonboot_cpu)(void);
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index 18f3d95ecfec..60ebaed28a4c 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -148,6 +148,9 @@
#define SO_RCVMARK 75
+#define SO_PASSPIDFD 76
+#define SO_PEERPIDFD 77
+
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index c0e65135481b..cb871eb784a7 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -11,6 +11,8 @@
* Copyright (C) 2000, 2001, 2002, 2007 Maciej W. Rozycki
*/
#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/delay.h>
#include <linux/ioport.h>
#include <linux/export.h>
#include <linux/screen_info.h>
@@ -841,3 +843,14 @@ static int __init setnocoherentio(char *str)
}
early_param("nocoherentio", setnocoherentio);
#endif
+
+void __init arch_cpu_finalize_init(void)
+{
+ unsigned int cpu = smp_processor_id();
+
+ cpu_data[cpu].udelay_val = loops_per_jiffy;
+ check_bugs32();
+
+ if (IS_ENABLED(CONFIG_CPU_R4X00_BUGS64))
+ check_bugs64();
+}
diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c
index 15466d4cf4a0..c074ecce3fbf 100644
--- a/arch/mips/kernel/smp-bmips.c
+++ b/arch/mips/kernel/smp-bmips.c
@@ -392,6 +392,7 @@ static void bmips_cpu_die(unsigned int cpu)
void __ref play_dead(void)
{
idle_task_exit();
+ cpuhp_ap_report_dead();
/* flush data cache */
_dma_cache_wback_inv(0, ~0);
diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
index 62f677b2306f..d7fdbec232da 100644
--- a/arch/mips/kernel/smp-cps.c
+++ b/arch/mips/kernel/smp-cps.c
@@ -503,8 +503,7 @@ void play_dead(void)
}
}
- /* This CPU has chosen its way out */
- (void)cpu_report_death();
+ cpuhp_ap_report_dead();
cps_shutdown_this_cpu(cpu_death);
@@ -527,7 +526,9 @@ static void wait_for_sibling_halt(void *ptr_cpu)
} while (!(halted & TCHALT_H));
}
-static void cps_cpu_die(unsigned int cpu)
+static void cps_cpu_die(unsigned int cpu) { }
+
+static void cps_cleanup_dead_cpu(unsigned cpu)
{
unsigned core = cpu_core(&cpu_data[cpu]);
unsigned int vpe_id = cpu_vpe_id(&cpu_data[cpu]);
@@ -535,12 +536,6 @@ static void cps_cpu_die(unsigned int cpu)
unsigned stat;
int err;
- /* Wait for the cpu to choose its way out */
- if (!cpu_wait_death(cpu, 5)) {
- pr_err("CPU%u: didn't offline\n", cpu);
- return;
- }
-
/*
* Now wait for the CPU to actually offline. Without doing this that
* offlining may race with one or more of:
@@ -624,6 +619,7 @@ static const struct plat_smp_ops cps_smp_ops = {
#ifdef CONFIG_HOTPLUG_CPU
.cpu_disable = cps_cpu_disable,
.cpu_die = cps_cpu_die,
+ .cleanup_dead_cpu = cps_cleanup_dead_cpu,
#endif
#ifdef CONFIG_KEXEC
.kexec_nonboot_cpu = cps_kexec_nonboot_cpu,
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 1d93b85271ba..90c71d800b59 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -690,6 +690,14 @@ void flush_tlb_one(unsigned long vaddr)
EXPORT_SYMBOL(flush_tlb_page);
EXPORT_SYMBOL(flush_tlb_one);
+#ifdef CONFIG_HOTPLUG_CORE_SYNC_DEAD
+void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu)
+{
+ if (mp_ops->cleanup_dead_cpu)
+ mp_ops->cleanup_dead_cpu(cpu);
+}
+#endif
+
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
static void tick_broadcast_callee(void *info)
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index 253ff994ed2e..1976317d4e8b 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -389,3 +389,4 @@
448 n32 process_mrelease sys_process_mrelease
449 n32 futex_waitv sys_futex_waitv
450 n32 set_mempolicy_home_node sys_set_mempolicy_home_node
+451 n32 cachestat sys_cachestat
diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl
index 3f1886ad9d80..cfda2511badf 100644
--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
@@ -365,3 +365,4 @@
448 n64 process_mrelease sys_process_mrelease
449 n64 futex_waitv sys_futex_waitv
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
+451 n64 cachestat sys_cachestat
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 8f243e35a7b2..7692234c3768 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -438,3 +438,4 @@
448 o32 process_mrelease sys_process_mrelease
449 o32 futex_waitv sys_futex_waitv
450 o32 set_mempolicy_home_node sys_set_mempolicy_home_node
+451 o32 cachestat sys_cachestat
diff --git a/arch/mips/loongson32/Kconfig b/arch/mips/loongson32/Kconfig
index 2ef9da0016df..a7c500959577 100644
--- a/arch/mips/loongson32/Kconfig
+++ b/arch/mips/loongson32/Kconfig
@@ -35,41 +35,4 @@ config LOONGSON1_LS1C
select COMMON_CLK
endchoice
-menuconfig CEVT_CSRC_LS1X
- bool "Use PWM Timer for clockevent/clocksource"
- select MIPS_EXTERNAL_TIMER
- depends on CPU_LOONGSON32
- help
- This option changes the default clockevent/clocksource to PWM Timer,
- and is required by Loongson1 CPUFreq support.
-
- If unsure, say N.
-
-choice
- prompt "Select clockevent/clocksource"
- depends on CEVT_CSRC_LS1X
- default TIMER_USE_PWM0
-
-config TIMER_USE_PWM0
- bool "Use PWM Timer 0"
- help
- Use PWM Timer 0 as the default clockevent/clocksourcer.
-
-config TIMER_USE_PWM1
- bool "Use PWM Timer 1"
- help
- Use PWM Timer 1 as the default clockevent/clocksourcer.
-
-config TIMER_USE_PWM2
- bool "Use PWM Timer 2"
- help
- Use PWM Timer 2 as the default clockevent/clocksourcer.
-
-config TIMER_USE_PWM3
- bool "Use PWM Timer 3"
- help
- Use PWM Timer 3 as the default clockevent/clocksourcer.
-
-endchoice
-
endif # MACH_LOONGSON32
diff --git a/arch/mips/loongson32/common/time.c b/arch/mips/loongson32/common/time.c
index 965c04aa56fd..74ad2b17918d 100644
--- a/arch/mips/loongson32/common/time.c
+++ b/arch/mips/loongson32/common/time.c
@@ -5,208 +5,8 @@
#include <linux/clk.h>
#include <linux/of_clk.h>
-#include <linux/interrupt.h>
-#include <linux/sizes.h>
#include <asm/time.h>
-#include <loongson1.h>
-#include <platform.h>
-
-#ifdef CONFIG_CEVT_CSRC_LS1X
-
-#if defined(CONFIG_TIMER_USE_PWM1)
-#define LS1X_TIMER_BASE LS1X_PWM1_BASE
-#define LS1X_TIMER_IRQ LS1X_PWM1_IRQ
-
-#elif defined(CONFIG_TIMER_USE_PWM2)
-#define LS1X_TIMER_BASE LS1X_PWM2_BASE
-#define LS1X_TIMER_IRQ LS1X_PWM2_IRQ
-
-#elif defined(CONFIG_TIMER_USE_PWM3)
-#define LS1X_TIMER_BASE LS1X_PWM3_BASE
-#define LS1X_TIMER_IRQ LS1X_PWM3_IRQ
-
-#else
-#define LS1X_TIMER_BASE LS1X_PWM0_BASE
-#define LS1X_TIMER_IRQ LS1X_PWM0_IRQ
-#endif
-
-DEFINE_RAW_SPINLOCK(ls1x_timer_lock);
-
-static void __iomem *timer_reg_base;
-static uint32_t ls1x_jiffies_per_tick;
-
-static inline void ls1x_pwmtimer_set_period(uint32_t period)
-{
- __raw_writel(period, timer_reg_base + PWM_HRC);
- __raw_writel(period, timer_reg_base + PWM_LRC);
-}
-
-static inline void ls1x_pwmtimer_restart(void)
-{
- __raw_writel(0x0, timer_reg_base + PWM_CNT);
- __raw_writel(INT_EN | CNT_EN, timer_reg_base + PWM_CTRL);
-}
-
-void __init ls1x_pwmtimer_init(void)
-{
- timer_reg_base = ioremap(LS1X_TIMER_BASE, SZ_16);
- if (!timer_reg_base)
- panic("Failed to remap timer registers");
-
- ls1x_jiffies_per_tick = DIV_ROUND_CLOSEST(mips_hpt_frequency, HZ);
-
- ls1x_pwmtimer_set_period(ls1x_jiffies_per_tick);
- ls1x_pwmtimer_restart();
-}
-
-static u64 ls1x_clocksource_read(struct clocksource *cs)
-{
- unsigned long flags;
- int count;
- u32 jifs;
- static int old_count;
- static u32 old_jifs;
-
- raw_spin_lock_irqsave(&ls1x_timer_lock, flags);
- /*
- * Although our caller may have the read side of xtime_lock,
- * this is now a seqlock, and we are cheating in this routine
- * by having side effects on state that we cannot undo if
- * there is a collision on the seqlock and our caller has to
- * retry. (Namely, old_jifs and old_count.) So we must treat
- * jiffies as volatile despite the lock. We read jiffies
- * before latching the timer count to guarantee that although
- * the jiffies value might be older than the count (that is,
- * the counter may underflow between the last point where
- * jiffies was incremented and the point where we latch the
- * count), it cannot be newer.
- */
- jifs = jiffies;
- /* read the count */
- count = __raw_readl(timer_reg_base + PWM_CNT);
-
- /*
- * It's possible for count to appear to go the wrong way for this
- * reason:
- *
- * The timer counter underflows, but we haven't handled the resulting
- * interrupt and incremented jiffies yet.
- *
- * Previous attempts to handle these cases intelligently were buggy, so
- * we just do the simple thing now.
- */
- if (count < old_count && jifs == old_jifs)
- count = old_count;
-
- old_count = count;
- old_jifs = jifs;
-
- raw_spin_unlock_irqrestore(&ls1x_timer_lock, flags);
-
- return (u64) (jifs * ls1x_jiffies_per_tick) + count;
-}
-
-static struct clocksource ls1x_clocksource = {
- .name = "ls1x-pwmtimer",
- .read = ls1x_clocksource_read,
- .mask = CLOCKSOURCE_MASK(24),
- .flags = CLOCK_SOURCE_IS_CONTINUOUS,
-};
-
-static irqreturn_t ls1x_clockevent_isr(int irq, void *devid)
-{
- struct clock_event_device *cd = devid;
-
- ls1x_pwmtimer_restart();
- cd->event_handler(cd);
-
- return IRQ_HANDLED;
-}
-
-static int ls1x_clockevent_set_state_periodic(struct clock_event_device *cd)
-{
- raw_spin_lock(&ls1x_timer_lock);
- ls1x_pwmtimer_set_period(ls1x_jiffies_per_tick);
- ls1x_pwmtimer_restart();
- __raw_writel(INT_EN | CNT_EN, timer_reg_base + PWM_CTRL);
- raw_spin_unlock(&ls1x_timer_lock);
-
- return 0;
-}
-
-static int ls1x_clockevent_tick_resume(struct clock_event_device *cd)
-{
- raw_spin_lock(&ls1x_timer_lock);
- __raw_writel(INT_EN | CNT_EN, timer_reg_base + PWM_CTRL);
- raw_spin_unlock(&ls1x_timer_lock);
-
- return 0;
-}
-
-static int ls1x_clockevent_set_state_shutdown(struct clock_event_device *cd)
-{
- raw_spin_lock(&ls1x_timer_lock);
- __raw_writel(__raw_readl(timer_reg_base + PWM_CTRL) & ~CNT_EN,
- timer_reg_base + PWM_CTRL);
- raw_spin_unlock(&ls1x_timer_lock);
-
- return 0;
-}
-
-static int ls1x_clockevent_set_next(unsigned long evt,
- struct clock_event_device *cd)
-{
- raw_spin_lock(&ls1x_timer_lock);
- ls1x_pwmtimer_set_period(evt);
- ls1x_pwmtimer_restart();
- raw_spin_unlock(&ls1x_timer_lock);
-
- return 0;
-}
-
-static struct clock_event_device ls1x_clockevent = {
- .name = "ls1x-pwmtimer",
- .features = CLOCK_EVT_FEAT_PERIODIC,
- .rating = 300,
- .irq = LS1X_TIMER_IRQ,
- .set_next_event = ls1x_clockevent_set_next,
- .set_state_shutdown = ls1x_clockevent_set_state_shutdown,
- .set_state_periodic = ls1x_clockevent_set_state_periodic,
- .set_state_oneshot = ls1x_clockevent_set_state_shutdown,
- .tick_resume = ls1x_clockevent_tick_resume,
-};
-
-static void __init ls1x_time_init(void)
-{
- struct clock_event_device *cd = &ls1x_clockevent;
- int ret;
-
- if (!mips_hpt_frequency)
- panic("Invalid timer clock rate");
-
- ls1x_pwmtimer_init();
-
- clockevent_set_clock(cd, mips_hpt_frequency);
- cd->max_delta_ns = clockevent_delta2ns(0xffffff, cd);
- cd->max_delta_ticks = 0xffffff;
- cd->min_delta_ns = clockevent_delta2ns(0x000300, cd);
- cd->min_delta_ticks = 0x000300;
- cd->cpumask = cpumask_of(smp_processor_id());
- clockevents_register_device(cd);
-
- ls1x_clocksource.rating = 200 + mips_hpt_frequency / 10000000;
- ret = clocksource_register_hz(&ls1x_clocksource, mips_hpt_frequency);
- if (ret)
- panic(KERN_ERR "Failed to register clocksource: %d\n", ret);
-
- if (request_irq(LS1X_TIMER_IRQ, ls1x_clockevent_isr,
- IRQF_PERCPU | IRQF_TIMER, "ls1x-pwmtimer",
- &ls1x_clockevent))
- pr_err("Failed to register ls1x-pwmtimer interrupt\n");
-}
-#endif /* CONFIG_CEVT_CSRC_LS1X */
-
void __init plat_time_init(void)
{
struct clk *clk = NULL;
@@ -214,20 +14,10 @@ void __init plat_time_init(void)
/* initialize LS1X clocks */
of_clk_init(NULL);
-#ifdef CONFIG_CEVT_CSRC_LS1X
- /* setup LS1X PWM timer */
- clk = clk_get(NULL, "ls1x-pwmtimer");
- if (IS_ERR(clk))
- panic("unable to get timer clock, err=%ld", PTR_ERR(clk));
-
- mips_hpt_frequency = clk_get_rate(clk);
- ls1x_time_init();
-#else
/* setup mips r4k timer */
clk = clk_get(NULL, "cpu_clk");
if (IS_ERR(clk))
panic("unable to get cpu clock, err=%ld", PTR_ERR(clk));
mips_hpt_frequency = clk_get_rate(clk) / 2;
-#endif /* CONFIG_CEVT_CSRC_LS1X */
}
diff --git a/arch/mips/loongson64/smp.c b/arch/mips/loongson64/smp.c
index b0e8bb9fa036..cdecd7af11a6 100644
--- a/arch/mips/loongson64/smp.c
+++ b/arch/mips/loongson64/smp.c
@@ -775,6 +775,7 @@ void play_dead(void)
void (*play_dead_at_ckseg1)(int *);
idle_task_exit();
+ cpuhp_ap_report_dead();
prid_imp = read_c0_prid() & PRID_IMP_MASK;
prid_rev = read_c0_prid() & PRID_REV_MASK;
diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c
index 1b939abbe4ca..93c2d695588a 100644
--- a/arch/mips/mm/tlb-r4k.c
+++ b/arch/mips/mm/tlb-r4k.c
@@ -297,7 +297,7 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte)
p4d_t *p4dp;
pud_t *pudp;
pmd_t *pmdp;
- pte_t *ptep;
+ pte_t *ptep, *ptemap = NULL;
int idx, pid;
/*
@@ -344,7 +344,12 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte)
} else
#endif
{
- ptep = pte_offset_map(pmdp, address);
+ ptemap = ptep = pte_offset_map(pmdp, address);
+ /*
+ * update_mmu_cache() is called between pte_offset_map_lock()
+ * and pte_unmap_unlock(), so we can assume that ptep is not
+ * NULL here: and what should be done below if it were NULL?
+ */
#if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
#ifdef CONFIG_XPA
@@ -373,6 +378,9 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte)
tlbw_use_hazard();
htw_start();
flush_micro_tlb_vm(vma);
+
+ if (ptemap)
+ pte_unmap(ptemap);
local_irq_restore(flags);
}
diff --git a/arch/nios2/kernel/cpuinfo.c b/arch/nios2/kernel/cpuinfo.c
index 203870c4b86d..338849c430a5 100644
--- a/arch/nios2/kernel/cpuinfo.c
+++ b/arch/nios2/kernel/cpuinfo.c
@@ -47,7 +47,7 @@ void __init setup_cpuinfo(void)
str = of_get_property(cpu, "altr,implementation", &len);
if (str)
- strlcpy(cpuinfo.cpu_impl, str, sizeof(cpuinfo.cpu_impl));
+ strscpy(cpuinfo.cpu_impl, str, sizeof(cpuinfo.cpu_impl));
else
strcpy(cpuinfo.cpu_impl, "<unknown>");
diff --git a/arch/nios2/kernel/setup.c b/arch/nios2/kernel/setup.c
index 40bc8fb75e0b..8582ed965844 100644
--- a/arch/nios2/kernel/setup.c
+++ b/arch/nios2/kernel/setup.c
@@ -121,7 +121,7 @@ asmlinkage void __init nios2_boot_init(unsigned r4, unsigned r5, unsigned r6,
dtb_passed = r6;
if (r7)
- strlcpy(cmdline_passed, (char *)r7, COMMAND_LINE_SIZE);
+ strscpy(cmdline_passed, (char *)r7, COMMAND_LINE_SIZE);
}
#endif
@@ -129,10 +129,10 @@ asmlinkage void __init nios2_boot_init(unsigned r4, unsigned r5, unsigned r6,
#ifndef CONFIG_CMDLINE_FORCE
if (cmdline_passed[0])
- strlcpy(boot_command_line, cmdline_passed, COMMAND_LINE_SIZE);
+ strscpy(boot_command_line, cmdline_passed, COMMAND_LINE_SIZE);
#ifdef CONFIG_NIOS2_CMDLINE_IGNORE_DTB
else
- strlcpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
+ strscpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
#endif
#endif
diff --git a/arch/openrisc/include/asm/atomic.h b/arch/openrisc/include/asm/atomic.h
index 326167e4783a..8ce67ec7c9a3 100644
--- a/arch/openrisc/include/asm/atomic.h
+++ b/arch/openrisc/include/asm/atomic.h
@@ -130,7 +130,4 @@ static inline int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u)
#include <asm/cmpxchg.h>
-#define arch_atomic_xchg(ptr, v) (arch_xchg(&(ptr)->counter, (v)))
-#define arch_atomic_cmpxchg(v, old, new) (arch_cmpxchg(&((v)->counter), (old), (new)))
-
#endif /* __ASM_OPENRISC_ATOMIC_H */
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 967bde65dd0e..c0b4b1c253d1 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -57,6 +57,7 @@ config PARISC
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
select HAVE_REGS_AND_STACK_ACCESS_API
+ select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU
select GENERIC_SCHED_CLOCK
select GENERIC_IRQ_MIGRATION if SMP
select HAVE_UNSTABLE_SCHED_CLOCK if SMP
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index dd5a299ada69..d4f023887ff8 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -73,10 +73,6 @@ static __inline__ int arch_atomic_read(const atomic_t *v)
return READ_ONCE((v)->counter);
}
-/* exported interface */
-#define arch_atomic_cmpxchg(v, o, n) (arch_cmpxchg(&((v)->counter), (o), (n)))
-#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new))
-
#define ATOMIC_OP(op, c_op) \
static __inline__ void arch_atomic_##op(int i, atomic_t *v) \
{ \
@@ -122,6 +118,11 @@ static __inline__ int arch_atomic_fetch_##op(int i, atomic_t *v) \
ATOMIC_OPS(add, +=)
ATOMIC_OPS(sub, -=)
+#define arch_atomic_add_return arch_atomic_add_return
+#define arch_atomic_sub_return arch_atomic_sub_return
+#define arch_atomic_fetch_add arch_atomic_fetch_add
+#define arch_atomic_fetch_sub arch_atomic_fetch_sub
+
#undef ATOMIC_OPS
#define ATOMIC_OPS(op, c_op) \
ATOMIC_OP(op, c_op) \
@@ -131,6 +132,10 @@ ATOMIC_OPS(and, &=)
ATOMIC_OPS(or, |=)
ATOMIC_OPS(xor, ^=)
+#define arch_atomic_fetch_and arch_atomic_fetch_and
+#define arch_atomic_fetch_or arch_atomic_fetch_or
+#define arch_atomic_fetch_xor arch_atomic_fetch_xor
+
#undef ATOMIC_OPS
#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
@@ -185,6 +190,11 @@ static __inline__ s64 arch_atomic64_fetch_##op(s64 i, atomic64_t *v) \
ATOMIC64_OPS(add, +=)
ATOMIC64_OPS(sub, -=)
+#define arch_atomic64_add_return arch_atomic64_add_return
+#define arch_atomic64_sub_return arch_atomic64_sub_return
+#define arch_atomic64_fetch_add arch_atomic64_fetch_add
+#define arch_atomic64_fetch_sub arch_atomic64_fetch_sub
+
#undef ATOMIC64_OPS
#define ATOMIC64_OPS(op, c_op) \
ATOMIC64_OP(op, c_op) \
@@ -194,6 +204,10 @@ ATOMIC64_OPS(and, &=)
ATOMIC64_OPS(or, |=)
ATOMIC64_OPS(xor, ^=)
+#define arch_atomic64_fetch_and arch_atomic64_fetch_and
+#define arch_atomic64_fetch_or arch_atomic64_fetch_or
+#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor
+
#undef ATOMIC64_OPS
#undef ATOMIC64_FETCH_OP
#undef ATOMIC64_OP_RETURN
@@ -218,11 +232,6 @@ arch_atomic64_read(const atomic64_t *v)
return READ_ONCE((v)->counter);
}
-/* exported interface */
-#define arch_atomic64_cmpxchg(v, o, n) \
- ((__typeof__((v)->counter))arch_cmpxchg(&((v)->counter), (o), (n)))
-#define arch_atomic64_xchg(v, new) (arch_xchg(&((v)->counter), new))
-
#endif /* !CONFIG_64BIT */
diff --git a/arch/parisc/include/asm/bugs.h b/arch/parisc/include/asm/bugs.h
deleted file mode 100644
index 0a7f9db6bd1c..000000000000
--- a/arch/parisc/include/asm/bugs.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * include/asm-parisc/bugs.h
- *
- * Copyright (C) 1999 Mike Shaver
- */
-
-/*
- * This is included by init/main.c to check for architecture-dependent bugs.
- *
- * Needs:
- * void check_bugs(void);
- */
-
-#include <asm/processor.h>
-
-static inline void check_bugs(void)
-{
-// identify_cpu(&boot_cpu_data);
-}
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index e715df5385d6..5656395c95ee 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -472,9 +472,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
#define pte_same(A,B) (pte_val(A) == pte_val(B))
-struct seq_file;
-extern void arch_report_meminfo(struct seq_file *m);
-
#endif /* !__ASSEMBLY__ */
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index f486d3dfb6bb..be264c2b1a11 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -129,6 +129,9 @@
#define SO_RCVMARK 0x4049
+#define SO_PASSPIDFD 0x404A
+#define SO_PEERPIDFD 0x404B
+
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index ca4a302d4365..501160250bb7 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -426,10 +426,15 @@ void flush_dcache_page(struct page *page)
offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT;
addr = mpnt->vm_start + offset;
if (parisc_requires_coherency()) {
+ bool needs_flush = false;
pte_t *ptep;
ptep = get_ptep(mpnt->vm_mm, addr);
- if (ptep && pte_needs_flush(*ptep))
+ if (ptep) {
+ needs_flush = pte_needs_flush(*ptep);
+ pte_unmap(ptep);
+ }
+ if (needs_flush)
flush_user_cache_page(mpnt, addr);
} else {
/*
@@ -561,14 +566,20 @@ EXPORT_SYMBOL(flush_kernel_dcache_page_addr);
static void flush_cache_page_if_present(struct vm_area_struct *vma,
unsigned long vmaddr, unsigned long pfn)
{
- pte_t *ptep = get_ptep(vma->vm_mm, vmaddr);
+ bool needs_flush = false;
+ pte_t *ptep;
/*
* The pte check is racy and sometimes the flush will trigger
* a non-access TLB miss. Hopefully, the page has already been
* flushed.
*/
- if (ptep && pte_needs_flush(*ptep))
+ ptep = get_ptep(vma->vm_mm, vmaddr);
+ if (ptep) {
+ needs_flush = pte_needs_flush(*ptep);
+ pte_unmap(ptep);
+ }
+ if (needs_flush)
flush_cache_page(vma, vmaddr, pfn);
}
@@ -635,17 +646,22 @@ static void flush_cache_pages(struct vm_area_struct *vma, unsigned long start, u
pte_t *ptep;
for (addr = start; addr < end; addr += PAGE_SIZE) {
+ bool needs_flush = false;
/*
* The vma can contain pages that aren't present. Although
* the pte search is expensive, we need the pte to find the
* page pfn and to check whether the page should be flushed.
*/
ptep = get_ptep(vma->vm_mm, addr);
- if (ptep && pte_needs_flush(*ptep)) {
+ if (ptep) {
+ needs_flush = pte_needs_flush(*ptep);
+ pfn = pte_pfn(*ptep);
+ pte_unmap(ptep);
+ }
+ if (needs_flush) {
if (parisc_requires_coherency()) {
flush_user_cache_page(vma, addr);
} else {
- pfn = pte_pfn(*ptep);
if (WARN_ON(!pfn_valid(pfn)))
return;
__flush_cache_page(vma, addr, PFN_PHYS(pfn));
diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c
index 71ed5391f29d..415f12d5bab3 100644
--- a/arch/parisc/kernel/pci-dma.c
+++ b/arch/parisc/kernel/pci-dma.c
@@ -164,7 +164,7 @@ static inline void unmap_uncached_pte(pmd_t * pmd, unsigned long vaddr,
pmd_clear(pmd);
return;
}
- pte = pte_offset_map(pmd, vaddr);
+ pte = pte_offset_kernel(pmd, vaddr);
vaddr &= ~PMD_MASK;
end = vaddr + size;
if (end > PMD_SIZE)
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index 24411ab79c30..abdbf038d643 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -171,8 +171,8 @@ void __noreturn arch_cpu_idle_dead(void)
local_irq_disable();
- /* Tell __cpu_die() that this CPU is now safe to dispose of. */
- (void)cpu_report_death();
+ /* Tell the core that this CPU is now safe to dispose of. */
+ cpuhp_ap_report_dead();
/* Ensure that the cache lines are written out. */
flush_cache_all_local();
diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c
index b7fc859fa87d..d0eb1bd19a13 100644
--- a/arch/parisc/kernel/smp.c
+++ b/arch/parisc/kernel/smp.c
@@ -271,7 +271,6 @@ void arch_send_call_function_single_ipi(int cpu)
static void
smp_cpu_init(int cpunum)
{
- extern void init_IRQ(void); /* arch/parisc/kernel/irq.c */
extern void start_cpu_itimer(void); /* arch/parisc/kernel/time.c */
/* Set modes and Enable floating point coprocessor */
@@ -500,11 +499,10 @@ int __cpu_disable(void)
void __cpu_die(unsigned int cpu)
{
pdc_cpu_rendezvous_lock();
+}
- if (!cpu_wait_death(cpu, 5)) {
- pr_crit("CPU%u: cpu didn't die\n", cpu);
- return;
- }
+void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu)
+{
pr_info("CPU%u: is shutting down\n", cpu);
/* set task's state to interruptible sleep */
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index 0e42fceb2d5e..3c71fad78318 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -448,3 +448,4 @@
448 common process_mrelease sys_process_mrelease
449 common futex_waitv sys_futex_waitv
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
+451 common cachestat sys_cachestat
diff --git a/arch/parisc/mm/hugetlbpage.c b/arch/parisc/mm/hugetlbpage.c
index d1d3990b83f6..a8a1a7c1e16e 100644
--- a/arch/parisc/mm/hugetlbpage.c
+++ b/arch/parisc/mm/hugetlbpage.c
@@ -66,7 +66,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
if (pud) {
pmd = pmd_alloc(mm, pud, addr);
if (pmd)
- pte = pte_alloc_map(mm, pmd, addr);
+ pte = pte_alloc_huge(mm, pmd, addr);
}
return pte;
}
@@ -90,7 +90,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
if (!pud_none(*pud)) {
pmd = pmd_offset(pud, addr);
if (!pmd_none(*pmd))
- pte = pte_offset_map(pmd, addr);
+ pte = pte_offset_huge(pmd, addr);
}
}
}
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index a243fcdf346d..395044b705a1 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -90,8 +90,7 @@ config NMI_IPI
config PPC_WATCHDOG
bool
- depends on HARDLOCKUP_DETECTOR
- depends on HAVE_HARDLOCKUP_DETECTOR_ARCH
+ depends on HARDLOCKUP_DETECTOR_ARCH
default y
help
This is a placeholder when the powerpc hardlockup detector
@@ -240,7 +239,7 @@ config PPC
select HAVE_GCC_PLUGINS if GCC_VERSION >= 50200 # plugin support on gcc <= 5.1 is buggy on PPC
select HAVE_GENERIC_VDSO
select HAVE_HARDLOCKUP_DETECTOR_ARCH if PPC_BOOK3S_64 && SMP
- select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH
+ select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI
select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx)
select HAVE_IOREMAP_PROT
select HAVE_IRQ_TIME_ACCOUNTING
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index 47228b177478..5bf6a4d49268 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -126,18 +126,6 @@ ATOMIC_OPS(xor, xor, "", K)
#undef ATOMIC_OP_RETURN_RELAXED
#undef ATOMIC_OP
-#define arch_atomic_cmpxchg(v, o, n) \
- (arch_cmpxchg(&((v)->counter), (o), (n)))
-#define arch_atomic_cmpxchg_relaxed(v, o, n) \
- arch_cmpxchg_relaxed(&((v)->counter), (o), (n))
-#define arch_atomic_cmpxchg_acquire(v, o, n) \
- arch_cmpxchg_acquire(&((v)->counter), (o), (n))
-
-#define arch_atomic_xchg(v, new) \
- (arch_xchg(&((v)->counter), new))
-#define arch_atomic_xchg_relaxed(v, new) \
- arch_xchg_relaxed(&((v)->counter), (new))
-
/**
* atomic_fetch_add_unless - add unless the number is a given value
* @v: pointer of type atomic_t
@@ -396,18 +384,6 @@ static __inline__ s64 arch_atomic64_dec_if_positive(atomic64_t *v)
}
#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
-#define arch_atomic64_cmpxchg(v, o, n) \
- (arch_cmpxchg(&((v)->counter), (o), (n)))
-#define arch_atomic64_cmpxchg_relaxed(v, o, n) \
- arch_cmpxchg_relaxed(&((v)->counter), (o), (n))
-#define arch_atomic64_cmpxchg_acquire(v, o, n) \
- arch_cmpxchg_acquire(&((v)->counter), (o), (n))
-
-#define arch_atomic64_xchg(v, new) \
- (arch_xchg(&((v)->counter), new))
-#define arch_atomic64_xchg_relaxed(v, new) \
- arch_xchg_relaxed(&((v)->counter), (new))
-
/**
* atomic64_fetch_add_unless - add unless the number is a given value
* @v: pointer of type atomic64_t
diff --git a/arch/powerpc/include/asm/bugs.h b/arch/powerpc/include/asm/bugs.h
deleted file mode 100644
index 01b8f6ca4dbb..000000000000
--- a/arch/powerpc/include/asm/bugs.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-#ifndef _ASM_POWERPC_BUGS_H
-#define _ASM_POWERPC_BUGS_H
-
-/*
- */
-
-/*
- * This file is included by 'init/main.c' to check for
- * architecture-dependent bugs.
- */
-
-static inline void check_bugs(void) { }
-
-#endif /* _ASM_POWERPC_BUGS_H */
diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h
index ae0a68a838e8..69232231d270 100644
--- a/arch/powerpc/include/asm/cache.h
+++ b/arch/powerpc/include/asm/cache.h
@@ -33,6 +33,10 @@
#define IFETCH_ALIGN_BYTES (1 << IFETCH_ALIGN_SHIFT)
+#ifdef CONFIG_NOT_COHERENT_CACHE
+#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
+#endif
+
#if !defined(__ASSEMBLY__)
#ifdef CONFIG_PPC64
diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index deadd2149426..f257cacb49a9 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -50,9 +50,14 @@ extern void *hardirq_ctx[NR_CPUS];
extern void *softirq_ctx[NR_CPUS];
void __do_IRQ(struct pt_regs *regs);
-extern void __init init_IRQ(void);
int irq_choose_cpu(const struct cpumask *mask);
+#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI)
+extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask,
+ bool exclude_self);
+#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
+#endif
+
#endif /* _ASM_IRQ_H */
#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h
index c3c7adef74de..49a75340c3e0 100644
--- a/arch/powerpc/include/asm/nmi.h
+++ b/arch/powerpc/include/asm/nmi.h
@@ -3,18 +3,10 @@
#define _ASM_NMI_H
#ifdef CONFIG_PPC_WATCHDOG
-extern void arch_touch_nmi_watchdog(void);
long soft_nmi_interrupt(struct pt_regs *regs);
-void watchdog_nmi_set_timeout_pct(u64 pct);
+void watchdog_hardlockup_set_timeout_pct(u64 pct);
#else
-static inline void arch_touch_nmi_watchdog(void) {}
-static inline void watchdog_nmi_set_timeout_pct(u64 pct) {}
-#endif
-
-#ifdef CONFIG_NMI_IPI
-extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask,
- bool exclude_self);
-#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
+static inline void watchdog_hardlockup_set_timeout_pct(u64 pct) {}
#endif
extern void hv_nmi_check_nonrecoverable(struct pt_regs *regs);
diff --git a/arch/powerpc/include/asm/page_32.h b/arch/powerpc/include/asm/page_32.h
index 56f217606327..b9ac9e3a771c 100644
--- a/arch/powerpc/include/asm/page_32.h
+++ b/arch/powerpc/include/asm/page_32.h
@@ -12,10 +12,6 @@
#define VM_DATA_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS32
-#ifdef CONFIG_NOT_COHERENT_CACHE
-#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
-#endif
-
#if defined(CONFIG_PPC_256K_PAGES) || \
(defined(CONFIG_PPC_8xx) && defined(CONFIG_PPC_16K_PAGES))
#define PTE_SHIFT (PAGE_SHIFT - PTE_T_LOG2 - 2) /* 1/4 of a page */
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index 9972626ddaf6..6a88bfdaa69b 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -165,9 +165,6 @@ static inline bool is_ioremap_addr(const void *x)
return addr >= IOREMAP_BASE && addr < IOREMAP_END;
}
-
-struct seq_file;
-void arch_report_meminfo(struct seq_file *m);
#endif /* CONFIG_PPC64 */
#endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 265801a3e94c..e95660e69414 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -417,9 +417,9 @@ noinstr static void nmi_ipi_lock_start(unsigned long *flags)
{
raw_local_irq_save(*flags);
hard_irq_disable();
- while (arch_atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) {
+ while (raw_atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) {
raw_local_irq_restore(*flags);
- spin_until_cond(arch_atomic_read(&__nmi_ipi_lock) == 0);
+ spin_until_cond(raw_atomic_read(&__nmi_ipi_lock) == 0);
raw_local_irq_save(*flags);
hard_irq_disable();
}
@@ -427,15 +427,15 @@ noinstr static void nmi_ipi_lock_start(unsigned long *flags)
noinstr static void nmi_ipi_lock(void)
{
- while (arch_atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1)
- spin_until_cond(arch_atomic_read(&__nmi_ipi_lock) == 0);
+ while (raw_atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1)
+ spin_until_cond(raw_atomic_read(&__nmi_ipi_lock) == 0);
}
noinstr static void nmi_ipi_unlock(void)
{
smp_mb();
- WARN_ON(arch_atomic_read(&__nmi_ipi_lock) != 1);
- arch_atomic_set(&__nmi_ipi_lock, 0);
+ WARN_ON(raw_atomic_read(&__nmi_ipi_lock) != 1);
+ raw_atomic_set(&__nmi_ipi_lock, 0);
}
noinstr static void nmi_ipi_unlock_end(unsigned long *flags)
@@ -1605,6 +1605,7 @@ static void add_cpu_to_masks(int cpu)
}
/* Activate a secondary processor. */
+__no_stack_protector
void start_secondary(void *unused)
{
unsigned int cpu = raw_smp_processor_id();
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index a0be127475b1..8c0b08b7a80e 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -537,3 +537,4 @@
448 common process_mrelease sys_process_mrelease
449 common futex_waitv sys_futex_waitv
450 nospu set_mempolicy_home_node sys_set_mempolicy_home_node
+451 common cachestat sys_cachestat
diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c
index 828d0f4106d2..cba6dd15de3b 100644
--- a/arch/powerpc/kernel/tau_6xx.c
+++ b/arch/powerpc/kernel/tau_6xx.c
@@ -200,7 +200,7 @@ static int __init TAU_init(void)
tau_int_enable = IS_ENABLED(CONFIG_TAU_INT) &&
!strcmp(cur_cpu_spec->platform, "ppc750");
- tau_workq = alloc_workqueue("tau", WQ_UNBOUND, 1);
+ tau_workq = alloc_ordered_workqueue("tau", 0);
if (!tau_workq)
return -ENOMEM;
diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index dbcc4a793f0b..edb2dd1f53eb 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -438,7 +438,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
{
int cpu = smp_processor_id();
- if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
+ if (!(watchdog_enabled & WATCHDOG_HARDLOCKUP_ENABLED))
return HRTIMER_NORESTART;
if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
@@ -479,7 +479,7 @@ static void start_watchdog(void *arg)
return;
}
- if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
+ if (!(watchdog_enabled & WATCHDOG_HARDLOCKUP_ENABLED))
return;
if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
@@ -546,7 +546,7 @@ static void watchdog_calc_timeouts(void)
wd_timer_period_ms = watchdog_thresh * 1000 * 2 / 5;
}
-void watchdog_nmi_stop(void)
+void watchdog_hardlockup_stop(void)
{
int cpu;
@@ -554,7 +554,7 @@ void watchdog_nmi_stop(void)
stop_watchdog_on_cpu(cpu);
}
-void watchdog_nmi_start(void)
+void watchdog_hardlockup_start(void)
{
int cpu;
@@ -566,7 +566,7 @@ void watchdog_nmi_start(void)
/*
* Invoked from core watchdog init.
*/
-int __init watchdog_nmi_probe(void)
+int __init watchdog_hardlockup_probe(void)
{
int err;
@@ -582,7 +582,7 @@ int __init watchdog_nmi_probe(void)
}
#ifdef CONFIG_PPC_PSERIES
-void watchdog_nmi_set_timeout_pct(u64 pct)
+void watchdog_hardlockup_set_timeout_pct(u64 pct)
{
pr_info("Set the NMI watchdog timeout factor to %llu%%\n", pct);
WRITE_ONCE(wd_timeout_pct, pct);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 461307b89c3a..572707858d65 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -509,7 +509,7 @@ static void kvmppc_unmap_free_pmd(struct kvm *kvm, pmd_t *pmd, bool full,
} else {
pte_t *pte;
- pte = pte_offset_map(p, 0);
+ pte = pte_offset_kernel(p, 0);
kvmppc_unmap_free_pte(kvm, pte, full, lpid);
pmd_clear(p);
}
diff --git a/arch/powerpc/mm/book3s64/hash_tlb.c b/arch/powerpc/mm/book3s64/hash_tlb.c
index a64ea0a7ef96..21fcad97ae80 100644
--- a/arch/powerpc/mm/book3s64/hash_tlb.c
+++ b/arch/powerpc/mm/book3s64/hash_tlb.c
@@ -239,12 +239,16 @@ void flush_hash_table_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long
local_irq_save(flags);
arch_enter_lazy_mmu_mode();
start_pte = pte_offset_map(pmd, addr);
+ if (!start_pte)
+ goto out;
for (pte = start_pte; pte < start_pte + PTRS_PER_PTE; pte++) {
unsigned long pteval = pte_val(*pte);
if (pteval & H_PAGE_HASHPTE)
hpte_need_flush(mm, addr, pte, pteval, 0);
addr += PAGE_SIZE;
}
+ pte_unmap(start_pte);
+out:
arch_leave_lazy_mmu_mode();
local_irq_restore(flags);
}
diff --git a/arch/powerpc/mm/book3s64/iommu_api.c b/arch/powerpc/mm/book3s64/iommu_api.c
index 81d7185e2ae8..d19fb1f3007d 100644
--- a/arch/powerpc/mm/book3s64/iommu_api.c
+++ b/arch/powerpc/mm/book3s64/iommu_api.c
@@ -105,7 +105,7 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
ret = pin_user_pages(ua + (entry << PAGE_SHIFT), n,
FOLL_WRITE | FOLL_LONGTERM,
- mem->hpages + entry, NULL);
+ mem->hpages + entry);
if (ret == n) {
pinned += n;
continue;
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index ce804b7bf84e..0bd4866d9824 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -795,12 +795,20 @@ void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush)
goto out;
if (current->active_mm == mm) {
+ unsigned long flags;
+
WARN_ON_ONCE(current->mm != NULL);
- /* Is a kernel thread and is using mm as the lazy tlb */
+ /*
+ * It is a kernel thread and is using mm as the lazy tlb, so
+ * switch it to init_mm. This is not always called from IPI
+ * (e.g., flush_type_needed), so must disable irqs.
+ */
+ local_irq_save(flags);
mmgrab_lazy_tlb(&init_mm);
current->active_mm = &init_mm;
switch_mm_irqs_off(mm, &init_mm, current);
mmdrop_lazy_tlb(mm);
+ local_irq_restore(flags);
}
/*
diff --git a/arch/powerpc/mm/book3s64/subpage_prot.c b/arch/powerpc/mm/book3s64/subpage_prot.c
index b75a9fb99599..0dc85556dec5 100644
--- a/arch/powerpc/mm/book3s64/subpage_prot.c
+++ b/arch/powerpc/mm/book3s64/subpage_prot.c
@@ -71,6 +71,8 @@ static void hpte_flush_range(struct mm_struct *mm, unsigned long addr,
if (pmd_none(*pmd))
return;
pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+ if (!pte)
+ return;
arch_enter_lazy_mmu_mode();
for (; npages > 0; --npages) {
pte_update(mm, addr, pte, 0, 0, 0);
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index b900933507da..f7c683b672c1 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -183,7 +183,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
return NULL;
if (IS_ENABLED(CONFIG_PPC_8xx) && pshift < PMD_SHIFT)
- return pte_alloc_map(mm, (pmd_t *)hpdp, addr);
+ return pte_alloc_huge(mm, (pmd_t *)hpdp, addr);
BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp));
diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c
index 193cc9c39422..0c41f4b005bc 100644
--- a/arch/powerpc/platforms/powermac/setup.c
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -76,7 +76,8 @@ int pmac_newworld;
static int current_root_goodness = -1;
-#define DEFAULT_ROOT_DEVICE Root_SDA1 /* sda1 - slightly silly choice */
+/* sda1 - slightly silly choice */
+#define DEFAULT_ROOT_DEVICE MKDEV(SCSI_DISK0_MAJOR, 1)
sys_ctrler_t sys_ctrler = SYS_CTRLER_UNKNOWN;
EXPORT_SYMBOL(sys_ctrler);
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index 719c97a155ed..47f8eabd1bee 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -564,8 +564,7 @@ int __init dlpar_workqueue_init(void)
if (pseries_hp_wq)
return 0;
- pseries_hp_wq = alloc_workqueue("pseries hotplug workqueue",
- WQ_UNBOUND, 1);
+ pseries_hp_wq = alloc_ordered_workqueue("pseries hotplug workqueue", 0);
return pseries_hp_wq ? 0 : -ENOMEM;
}
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index 6f30113b5468..cd632ba9ebff 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -750,7 +750,7 @@ static int pseries_migrate_partition(u64 handle)
goto out;
if (factor)
- watchdog_nmi_set_timeout_pct(factor);
+ watchdog_hardlockup_set_timeout_pct(factor);
ret = pseries_suspend(handle);
if (ret == 0) {
@@ -766,7 +766,7 @@ static int pseries_migrate_partition(u64 handle)
pseries_cancel_migration(handle, ret);
if (factor)
- watchdog_nmi_set_timeout_pct(0);
+ watchdog_hardlockup_set_timeout_pct(0);
out:
vas_migration_handler(VAS_RESUME);
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 70c4c59a1a8f..fae747cc57d2 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -3376,12 +3376,15 @@ static void show_pte(unsigned long addr)
printf("pmdp @ 0x%px = 0x%016lx\n", pmdp, pmd_val(*pmdp));
ptep = pte_offset_map(pmdp, addr);
- if (pte_none(*ptep)) {
+ if (!ptep || pte_none(*ptep)) {
+ if (ptep)
+ pte_unmap(ptep);
printf("no valid PTE\n");
return;
}
format_pte(ptep, pte_val(*ptep));
+ pte_unmap(ptep);
sync();
__delay(200);
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index a11b1c038c6d..a08917f681af 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -123,6 +123,7 @@ config RISCV
select HAVE_RSEQ
select HAVE_STACKPROTECTOR
select HAVE_SYSCALL_TRACEPOINTS
+ select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU
select IRQ_DOMAIN
select IRQ_FORCED_THREADING
select KASAN_VMALLOC if KASAN
diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
index bba472928b53..f5dfef6c2153 100644
--- a/arch/riscv/include/asm/atomic.h
+++ b/arch/riscv/include/asm/atomic.h
@@ -238,78 +238,6 @@ static __always_inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a,
#define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless
#endif
-/*
- * atomic_{cmp,}xchg is required to have exactly the same ordering semantics as
- * {cmp,}xchg and the operations that return, so they need a full barrier.
- */
-#define ATOMIC_OP(c_t, prefix, size) \
-static __always_inline \
-c_t arch_atomic##prefix##_xchg_relaxed(atomic##prefix##_t *v, c_t n) \
-{ \
- return __xchg_relaxed(&(v->counter), n, size); \
-} \
-static __always_inline \
-c_t arch_atomic##prefix##_xchg_acquire(atomic##prefix##_t *v, c_t n) \
-{ \
- return __xchg_acquire(&(v->counter), n, size); \
-} \
-static __always_inline \
-c_t arch_atomic##prefix##_xchg_release(atomic##prefix##_t *v, c_t n) \
-{ \
- return __xchg_release(&(v->counter), n, size); \
-} \
-static __always_inline \
-c_t arch_atomic##prefix##_xchg(atomic##prefix##_t *v, c_t n) \
-{ \
- return __arch_xchg(&(v->counter), n, size); \
-} \
-static __always_inline \
-c_t arch_atomic##prefix##_cmpxchg_relaxed(atomic##prefix##_t *v, \
- c_t o, c_t n) \
-{ \
- return __cmpxchg_relaxed(&(v->counter), o, n, size); \
-} \
-static __always_inline \
-c_t arch_atomic##prefix##_cmpxchg_acquire(atomic##prefix##_t *v, \
- c_t o, c_t n) \
-{ \
- return __cmpxchg_acquire(&(v->counter), o, n, size); \
-} \
-static __always_inline \
-c_t arch_atomic##prefix##_cmpxchg_release(atomic##prefix##_t *v, \
- c_t o, c_t n) \
-{ \
- return __cmpxchg_release(&(v->counter), o, n, size); \
-} \
-static __always_inline \
-c_t arch_atomic##prefix##_cmpxchg(atomic##prefix##_t *v, c_t o, c_t n) \
-{ \
- return __cmpxchg(&(v->counter), o, n, size); \
-}
-
-#ifdef CONFIG_GENERIC_ATOMIC64
-#define ATOMIC_OPS() \
- ATOMIC_OP(int, , 4)
-#else
-#define ATOMIC_OPS() \
- ATOMIC_OP(int, , 4) \
- ATOMIC_OP(s64, 64, 8)
-#endif
-
-ATOMIC_OPS()
-
-#define arch_atomic_xchg_relaxed arch_atomic_xchg_relaxed
-#define arch_atomic_xchg_acquire arch_atomic_xchg_acquire
-#define arch_atomic_xchg_release arch_atomic_xchg_release
-#define arch_atomic_xchg arch_atomic_xchg
-#define arch_atomic_cmpxchg_relaxed arch_atomic_cmpxchg_relaxed
-#define arch_atomic_cmpxchg_acquire arch_atomic_cmpxchg_acquire
-#define arch_atomic_cmpxchg_release arch_atomic_cmpxchg_release
-#define arch_atomic_cmpxchg arch_atomic_cmpxchg
-
-#undef ATOMIC_OPS
-#undef ATOMIC_OP
-
static __always_inline bool arch_atomic_inc_unless_negative(atomic_t *v)
{
int prev, rc;
diff --git a/arch/riscv/include/asm/irq.h b/arch/riscv/include/asm/irq.h
index 43b9ebfbd943..8e10a94430a2 100644
--- a/arch/riscv/include/asm/irq.h
+++ b/arch/riscv/include/asm/irq.h
@@ -16,6 +16,4 @@ void riscv_set_intc_hwnode_fn(struct fwnode_handle *(*fn)(void));
struct fwnode_handle *riscv_get_intc_hwnode(void);
-extern void __init init_IRQ(void);
-
#endif /* _ASM_RISCV_IRQ_H */
diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h
index c4b77017ec58..0d555847cde6 100644
--- a/arch/riscv/include/asm/smp.h
+++ b/arch/riscv/include/asm/smp.h
@@ -70,7 +70,7 @@ asmlinkage void smp_callin(void);
#if defined CONFIG_HOTPLUG_CPU
int __cpu_disable(void);
-void __cpu_die(unsigned int cpu);
+static inline void __cpu_die(unsigned int cpu) { }
#endif /* CONFIG_HOTPLUG_CPU */
#else
diff --git a/arch/riscv/include/asm/timex.h b/arch/riscv/include/asm/timex.h
index d6a7428f6248..a06697846e69 100644
--- a/arch/riscv/include/asm/timex.h
+++ b/arch/riscv/include/asm/timex.h
@@ -88,6 +88,4 @@ static inline int read_current_timer(unsigned long *timer_val)
return 0;
}
-extern void time_init(void);
-
#endif /* _ASM_RISCV_TIMEX_H */
diff --git a/arch/riscv/kernel/cpu-hotplug.c b/arch/riscv/kernel/cpu-hotplug.c
index a941adc7cbf2..457a18efcb11 100644
--- a/arch/riscv/kernel/cpu-hotplug.c
+++ b/arch/riscv/kernel/cpu-hotplug.c
@@ -8,6 +8,7 @@
#include <linux/sched.h>
#include <linux/err.h>
#include <linux/irq.h>
+#include <linux/cpuhotplug.h>
#include <linux/cpu.h>
#include <linux/sched/hotplug.h>
#include <asm/irq.h>
@@ -49,17 +50,15 @@ int __cpu_disable(void)
return ret;
}
+#ifdef CONFIG_HOTPLUG_CPU
/*
- * Called on the thread which is asking for a CPU to be shutdown.
+ * Called on the thread which is asking for a CPU to be shutdown, if the
+ * CPU reported dead to the hotplug core.
*/
-void __cpu_die(unsigned int cpu)
+void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu)
{
int ret = 0;
- if (!cpu_wait_death(cpu, 5)) {
- pr_err("CPU %u: didn't die\n", cpu);
- return;
- }
pr_notice("CPU%u: off\n", cpu);
/* Verify from the firmware if the cpu is really stopped*/
@@ -76,9 +75,10 @@ void __noreturn arch_cpu_idle_dead(void)
{
idle_task_exit();
- (void)cpu_report_death();
+ cpuhp_ap_report_dead();
cpu_ops[smp_processor_id()]->cpu_stop();
/* It should never reach here */
BUG();
}
+#endif
diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c
index e0ef56dc57b9..542883b3b49b 100644
--- a/arch/riscv/mm/hugetlbpage.c
+++ b/arch/riscv/mm/hugetlbpage.c
@@ -67,7 +67,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
for_each_napot_order(order) {
if (napot_cont_size(order) == sz) {
- pte = pte_alloc_map(mm, pmd, addr & napot_cont_mask(order));
+ pte = pte_alloc_huge(mm, pmd, addr & napot_cont_mask(order));
break;
}
}
@@ -114,7 +114,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
for_each_napot_order(order) {
if (napot_cont_size(order) == sz) {
- pte = pte_offset_kernel(pmd, addr & napot_cont_mask(order));
+ pte = pte_offset_huge(pmd, addr & napot_cont_mask(order));
break;
}
}
diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile
index bd2e27f82532..dc20e166983e 100644
--- a/arch/riscv/purgatory/Makefile
+++ b/arch/riscv/purgatory/Makefile
@@ -31,7 +31,7 @@ $(obj)/strncmp.o: $(srctree)/arch/riscv/lib/strncmp.S FORCE
$(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE
$(call if_changed_rule,cc_o_c)
-CFLAGS_sha256.o := -D__DISABLE_EXPORTS
+CFLAGS_sha256.o := -D__DISABLE_EXPORTS -D__NO_FORTIFY
CFLAGS_string.o := -D__DISABLE_EXPORTS
CFLAGS_ctype.o := -D__DISABLE_EXPORTS
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 6dab9c1be508..5b39918b7042 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -117,6 +117,7 @@ config S390
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_DEBUG_PAGEALLOC
select ARCH_SUPPORTS_HUGETLBFS
+ select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && CC_IS_CLANG
select ARCH_SUPPORTS_NUMA_BALANCING
select ARCH_SUPPORTS_PER_VMA_LOCK
select ARCH_USE_BUILTIN_BSWAP
diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c
index acb1f8b53105..c67f59db7a51 100644
--- a/arch/s390/boot/vmem.c
+++ b/arch/s390/boot/vmem.c
@@ -45,6 +45,13 @@ static void pgtable_populate(unsigned long addr, unsigned long end, enum populat
static pte_t pte_z;
+static inline void kasan_populate(unsigned long start, unsigned long end, enum populate_mode mode)
+{
+ start = PAGE_ALIGN_DOWN(__sha(start));
+ end = PAGE_ALIGN(__sha(end));
+ pgtable_populate(start, end, mode);
+}
+
static void kasan_populate_shadow(void)
{
pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY);
@@ -95,17 +102,17 @@ static void kasan_populate_shadow(void)
*/
for_each_physmem_usable_range(i, &start, &end)
- pgtable_populate(__sha(start), __sha(end), POPULATE_KASAN_MAP_SHADOW);
+ kasan_populate(start, end, POPULATE_KASAN_MAP_SHADOW);
if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
untracked_end = VMALLOC_START;
/* shallowly populate kasan shadow for vmalloc and modules */
- pgtable_populate(__sha(VMALLOC_START), __sha(MODULES_END), POPULATE_KASAN_SHALLOW);
+ kasan_populate(VMALLOC_START, MODULES_END, POPULATE_KASAN_SHALLOW);
} else {
untracked_end = MODULES_VADDR;
}
/* populate kasan shadow for untracked memory */
- pgtable_populate(__sha(ident_map_size), __sha(untracked_end), POPULATE_KASAN_ZERO_SHADOW);
- pgtable_populate(__sha(MODULES_END), __sha(_REGION1_SIZE), POPULATE_KASAN_ZERO_SHADOW);
+ kasan_populate(ident_map_size, untracked_end, POPULATE_KASAN_ZERO_SHADOW);
+ kasan_populate(MODULES_END, _REGION1_SIZE, POPULATE_KASAN_ZERO_SHADOW);
}
static bool kasan_pgd_populate_zero_shadow(pgd_t *pgd, unsigned long addr,
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index be3bf03bf361..aa95cf6dfabb 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -116,6 +116,7 @@ CONFIG_UNIX=y
CONFIG_UNIX_DIAG=m
CONFIG_XFRM_USER=m
CONFIG_NET_KEY=m
+CONFIG_NET_TC_SKB_EXT=y
CONFIG_SMC=m
CONFIG_SMC_DIAG=m
CONFIG_INET=y
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index 769c7eed8b6a..f041945f9148 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -107,6 +107,7 @@ CONFIG_UNIX=y
CONFIG_UNIX_DIAG=m
CONFIG_XFRM_USER=m
CONFIG_NET_KEY=m
+CONFIG_NET_TC_SKB_EXT=y
CONFIG_SMC=m
CONFIG_SMC_DIAG=m
CONFIG_INET=y
diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c
index 29dc827e0fe8..d29a9d908797 100644
--- a/arch/s390/crypto/paes_s390.c
+++ b/arch/s390/crypto/paes_s390.c
@@ -5,7 +5,7 @@
* s390 implementation of the AES Cipher Algorithm with protected keys.
*
* s390 Version:
- * Copyright IBM Corp. 2017,2020
+ * Copyright IBM Corp. 2017, 2023
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
* Harald Freudenberger <freude@de.ibm.com>
*/
@@ -132,7 +132,8 @@ static inline int __paes_keyblob2pkey(struct key_blob *kb,
if (i > 0 && ret == -EAGAIN && in_task())
if (msleep_interruptible(1000))
return -EINTR;
- ret = pkey_keyblob2pkey(kb->key, kb->keylen, pk);
+ ret = pkey_keyblob2pkey(kb->key, kb->keylen,
+ pk->protkey, &pk->len, &pk->type);
if (ret == 0)
break;
}
@@ -145,6 +146,7 @@ static inline int __paes_convert_key(struct s390_paes_ctx *ctx)
int ret;
struct pkey_protkey pkey;
+ pkey.len = sizeof(pkey.protkey);
ret = __paes_keyblob2pkey(&ctx->kb, &pkey);
if (ret)
return ret;
@@ -414,6 +416,9 @@ static inline int __xts_paes_convert_key(struct s390_pxts_ctx *ctx)
{
struct pkey_protkey pkey0, pkey1;
+ pkey0.len = sizeof(pkey0.protkey);
+ pkey1.len = sizeof(pkey1.protkey);
+
if (__paes_keyblob2pkey(&ctx->kb[0], &pkey0) ||
__paes_keyblob2pkey(&ctx->kb[1], &pkey1))
return -EINVAL;
diff --git a/arch/s390/include/asm/asm-prototypes.h b/arch/s390/include/asm/asm-prototypes.h
index c37eb921bfbf..a873e873e1ee 100644
--- a/arch/s390/include/asm/asm-prototypes.h
+++ b/arch/s390/include/asm/asm-prototypes.h
@@ -6,4 +6,8 @@
#include <asm/fpu/api.h>
#include <asm-generic/asm-prototypes.h>
+__int128_t __ashlti3(__int128_t a, int b);
+__int128_t __ashrti3(__int128_t a, int b);
+__int128_t __lshrti3(__int128_t a, int b);
+
#endif /* _ASM_S390_PROTOTYPES_H */
diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h
index 06e0e42f4eec..aae0315374de 100644
--- a/arch/s390/include/asm/cmpxchg.h
+++ b/arch/s390/include/asm/cmpxchg.h
@@ -190,38 +190,18 @@ static __always_inline unsigned long __cmpxchg(unsigned long address,
#define arch_cmpxchg_local arch_cmpxchg
#define arch_cmpxchg64_local arch_cmpxchg
-#define system_has_cmpxchg_double() 1
+#define system_has_cmpxchg128() 1
-static __always_inline int __cmpxchg_double(unsigned long p1, unsigned long p2,
- unsigned long o1, unsigned long o2,
- unsigned long n1, unsigned long n2)
+static __always_inline u128 arch_cmpxchg128(volatile u128 *ptr, u128 old, u128 new)
{
- union register_pair old = { .even = o1, .odd = o2, };
- union register_pair new = { .even = n1, .odd = n2, };
- int cc;
-
asm volatile(
" cdsg %[old],%[new],%[ptr]\n"
- " ipm %[cc]\n"
- " srl %[cc],28\n"
- : [cc] "=&d" (cc), [old] "+&d" (old.pair)
- : [new] "d" (new.pair),
- [ptr] "QS" (*(unsigned long *)p1), "Q" (*(unsigned long *)p2)
+ : [old] "+d" (old), [ptr] "+QS" (*ptr)
+ : [new] "d" (new)
: "memory", "cc");
- return !cc;
+ return old;
}
-#define arch_cmpxchg_double(p1, p2, o1, o2, n1, n2) \
-({ \
- typeof(p1) __p1 = (p1); \
- typeof(p2) __p2 = (p2); \
- \
- BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long)); \
- BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long)); \
- VM_BUG_ON((unsigned long)((__p1) + 1) != (unsigned long)(__p2));\
- __cmpxchg_double((unsigned long)__p1, (unsigned long)__p2, \
- (unsigned long)(o1), (unsigned long)(o2), \
- (unsigned long)(n1), (unsigned long)(n2)); \
-})
+#define arch_cmpxchg128 arch_cmpxchg128
#endif /* __ASM_CMPXCHG_H */
diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h
index 646b12981f20..b378e2b57ad8 100644
--- a/arch/s390/include/asm/cpacf.h
+++ b/arch/s390/include/asm/cpacf.h
@@ -2,7 +2,7 @@
/*
* CP Assist for Cryptographic Functions (CPACF)
*
- * Copyright IBM Corp. 2003, 2017
+ * Copyright IBM Corp. 2003, 2023
* Author(s): Thomas Spatzier
* Jan Glauber
* Harald Freudenberger (freude@de.ibm.com)
@@ -132,6 +132,11 @@
#define CPACF_PCKMO_ENC_AES_128_KEY 0x12
#define CPACF_PCKMO_ENC_AES_192_KEY 0x13
#define CPACF_PCKMO_ENC_AES_256_KEY 0x14
+#define CPACF_PCKMO_ENC_ECC_P256_KEY 0x20
+#define CPACF_PCKMO_ENC_ECC_P384_KEY 0x21
+#define CPACF_PCKMO_ENC_ECC_P521_KEY 0x22
+#define CPACF_PCKMO_ENC_ECC_ED25519_KEY 0x28
+#define CPACF_PCKMO_ENC_ECC_ED448_KEY 0x29
/*
* Function codes for the PRNO (PERFORM RANDOM NUMBER OPERATION)
diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index 7e417d7de568..a0de5b9b02ea 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -140,7 +140,7 @@ union hws_trailer_header {
unsigned int dsdes:16; /* 48-63: size of diagnostic SDE */
unsigned long long overflow; /* 64 - Overflow Count */
};
- __uint128_t val;
+ u128 val;
};
struct hws_trailer_entry {
diff --git a/arch/s390/include/asm/os_info.h b/arch/s390/include/asm/os_info.h
index 0d1c74a7a650..a4d2e103f116 100644
--- a/arch/s390/include/asm/os_info.h
+++ b/arch/s390/include/asm/os_info.h
@@ -16,6 +16,9 @@
#define OS_INFO_VMCOREINFO 0
#define OS_INFO_REIPL_BLOCK 1
+#define OS_INFO_FLAGS_ENTRY 2
+
+#define OS_INFO_FLAG_REIPL_CLEAR (1UL << 0)
struct os_info_entry {
u64 addr;
@@ -30,8 +33,8 @@ struct os_info {
u16 version_minor;
u64 crashkernel_addr;
u64 crashkernel_size;
- struct os_info_entry entry[2];
- u8 reserved[4024];
+ struct os_info_entry entry[3];
+ u8 reserved[4004];
} __packed;
void os_info_init(void);
diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h
index 081837b391e3..264095dd84bc 100644
--- a/arch/s390/include/asm/percpu.h
+++ b/arch/s390/include/asm/percpu.h
@@ -148,6 +148,22 @@
#define this_cpu_cmpxchg_4(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval)
#define this_cpu_cmpxchg_8(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval)
+#define this_cpu_cmpxchg64(pcp, o, n) this_cpu_cmpxchg_8(pcp, o, n)
+
+#define this_cpu_cmpxchg128(pcp, oval, nval) \
+({ \
+ typedef typeof(pcp) pcp_op_T__; \
+ u128 old__, new__, ret__; \
+ pcp_op_T__ *ptr__; \
+ old__ = oval; \
+ new__ = nval; \
+ preempt_disable_notrace(); \
+ ptr__ = raw_cpu_ptr(&(pcp)); \
+ ret__ = cmpxchg128((void *)ptr__, old__, new__); \
+ preempt_enable_notrace(); \
+ ret__; \
+})
+
#define arch_this_cpu_xchg(pcp, nval) \
({ \
typeof(pcp) *ptr__; \
@@ -164,24 +180,6 @@
#define this_cpu_xchg_4(pcp, nval) arch_this_cpu_xchg(pcp, nval)
#define this_cpu_xchg_8(pcp, nval) arch_this_cpu_xchg(pcp, nval)
-#define arch_this_cpu_cmpxchg_double(pcp1, pcp2, o1, o2, n1, n2) \
-({ \
- typeof(pcp1) *p1__; \
- typeof(pcp2) *p2__; \
- int ret__; \
- \
- preempt_disable_notrace(); \
- p1__ = raw_cpu_ptr(&(pcp1)); \
- p2__ = raw_cpu_ptr(&(pcp2)); \
- ret__ = __cmpxchg_double((unsigned long)p1__, (unsigned long)p2__, \
- (unsigned long)(o1), (unsigned long)(o2), \
- (unsigned long)(n1), (unsigned long)(n2)); \
- preempt_enable_notrace(); \
- ret__; \
-})
-
-#define this_cpu_cmpxchg_double_8 arch_this_cpu_cmpxchg_double
-
#include <asm-generic/percpu.h>
#endif /* __ARCH_S390_PERCPU__ */
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 6822a11c2c8a..c55f3c3365af 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -42,9 +42,6 @@ static inline void update_page_count(int level, long count)
atomic_long_add(count, &direct_pages_count[level]);
}
-struct seq_file;
-void arch_report_meminfo(struct seq_file *m);
-
/*
* The S390 doesn't have any external MMU info: the kernel page
* tables contain all the necessary information.
diff --git a/arch/s390/include/asm/physmem_info.h b/arch/s390/include/asm/physmem_info.h
index 8e9c582592b3..9e41a74fce9a 100644
--- a/arch/s390/include/asm/physmem_info.h
+++ b/arch/s390/include/asm/physmem_info.h
@@ -3,6 +3,7 @@
#define _ASM_S390_MEM_DETECT_H
#include <linux/types.h>
+#include <asm/page.h>
enum physmem_info_source {
MEM_DETECT_NONE = 0,
@@ -133,7 +134,7 @@ static inline const char *get_rr_type_name(enum reserved_range_type t)
#define for_each_physmem_reserved_type_range(t, range, p_start, p_end) \
for (range = &physmem_info.reserved[t], *p_start = range->start, *p_end = range->end; \
- range && range->end; range = range->chain, \
+ range && range->end; range = range->chain ? __va(range->chain) : NULL, \
*p_start = range ? range->start : 0, *p_end = range ? range->end : 0)
static inline struct reserved_range *__physmem_reserved_next(enum reserved_range_type *t,
@@ -145,7 +146,7 @@ static inline struct reserved_range *__physmem_reserved_next(enum reserved_range
return range;
}
if (range->chain)
- return range->chain;
+ return __va(range->chain);
while (++*t < RR_MAX) {
range = &physmem_info.reserved[*t];
if (range->end)
diff --git a/arch/s390/include/asm/pkey.h b/arch/s390/include/asm/pkey.h
index dd3d20c332ac..47d80a7451a6 100644
--- a/arch/s390/include/asm/pkey.h
+++ b/arch/s390/include/asm/pkey.h
@@ -2,7 +2,7 @@
/*
* Kernelspace interface to the pkey device driver
*
- * Copyright IBM Corp. 2016,2019
+ * Copyright IBM Corp. 2016, 2023
*
* Author: Harald Freudenberger <freude@de.ibm.com>
*
@@ -23,6 +23,6 @@
* @return 0 on success, negative errno value on failure
*/
int pkey_keyblob2pkey(const u8 *key, u32 keylen,
- struct pkey_protkey *protkey);
+ u8 *protkey, u32 *protkeylen, u32 *protkeytype);
#endif /* _KAPI_PKEY_H */
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index c7c97921ed8d..a674c7d25da5 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -52,9 +52,6 @@ struct thread_info {
struct task_struct;
-void arch_release_task_struct(struct task_struct *tsk);
-int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
-
void arch_setup_new_exec(void);
#define arch_setup_new_exec arch_setup_new_exec
diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
index ce878e85b6e4..4d646659a5f5 100644
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -63,7 +63,7 @@ static inline int store_tod_clock_ext_cc(union tod_clock *clk)
return cc;
}
-static inline void store_tod_clock_ext(union tod_clock *tod)
+static __always_inline void store_tod_clock_ext(union tod_clock *tod)
{
asm volatile("stcke %0" : "=Q" (*tod) : : "cc");
}
@@ -177,7 +177,7 @@ static inline void local_tick_enable(unsigned long comp)
typedef unsigned long cycles_t;
-static inline unsigned long get_tod_clock(void)
+static __always_inline unsigned long get_tod_clock(void)
{
union tod_clock clk;
@@ -204,6 +204,11 @@ void init_cpu_timer(void);
extern union tod_clock tod_clock_base;
+static __always_inline unsigned long __get_tod_clock_monotonic(void)
+{
+ return get_tod_clock() - tod_clock_base.tod;
+}
+
/**
* get_clock_monotonic - returns current time in clock rate units
*
@@ -216,7 +221,7 @@ static inline unsigned long get_tod_clock_monotonic(void)
unsigned long tod;
preempt_disable_notrace();
- tod = get_tod_clock() - tod_clock_base.tod;
+ tod = __get_tod_clock_monotonic();
preempt_enable_notrace();
return tod;
}
@@ -240,7 +245,7 @@ static inline unsigned long get_tod_clock_monotonic(void)
* -> ns = (th * 125) + ((tl * 125) >> 9);
*
*/
-static inline unsigned long tod_to_ns(unsigned long todval)
+static __always_inline unsigned long tod_to_ns(unsigned long todval)
{
return ((todval >> 9) * 125) + (((todval & 0x1ff) * 125) >> 9);
}
diff --git a/arch/s390/include/uapi/asm/pkey.h b/arch/s390/include/uapi/asm/pkey.h
index 924b876f992c..f7bae1c63bd6 100644
--- a/arch/s390/include/uapi/asm/pkey.h
+++ b/arch/s390/include/uapi/asm/pkey.h
@@ -2,7 +2,7 @@
/*
* Userspace interface to the pkey device driver
*
- * Copyright IBM Corp. 2017, 2019
+ * Copyright IBM Corp. 2017, 2023
*
* Author: Harald Freudenberger <freude@de.ibm.com>
*
@@ -32,10 +32,15 @@
#define MINKEYBLOBSIZE SECKEYBLOBSIZE
/* defines for the type field within the pkey_protkey struct */
-#define PKEY_KEYTYPE_AES_128 1
-#define PKEY_KEYTYPE_AES_192 2
-#define PKEY_KEYTYPE_AES_256 3
-#define PKEY_KEYTYPE_ECC 4
+#define PKEY_KEYTYPE_AES_128 1
+#define PKEY_KEYTYPE_AES_192 2
+#define PKEY_KEYTYPE_AES_256 3
+#define PKEY_KEYTYPE_ECC 4
+#define PKEY_KEYTYPE_ECC_P256 5
+#define PKEY_KEYTYPE_ECC_P384 6
+#define PKEY_KEYTYPE_ECC_P521 7
+#define PKEY_KEYTYPE_ECC_ED25519 8
+#define PKEY_KEYTYPE_ECC_ED448 9
/* the newer ioctls use a pkey_key_type enum for type information */
enum pkey_key_type {
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index 8a617be28bb4..7af69948b290 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -568,9 +568,9 @@ static size_t get_elfcorehdr_size(int mem_chunk_cnt)
int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
{
Elf64_Phdr *phdr_notes, *phdr_loads;
+ size_t alloc_size;
int mem_chunk_cnt;
void *ptr, *hdr;
- u32 alloc_size;
u64 hdr_off;
/* If we are not in kdump or zfcp/nvme dump mode return */
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index 34674e38826b..9f41853f36b9 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -34,14 +34,12 @@ void kernel_stack_overflow(struct pt_regs * regs);
void handle_signal32(struct ksignal *ksig, sigset_t *oldset,
struct pt_regs *regs);
-void __init init_IRQ(void);
void do_io_irq(struct pt_regs *regs);
void do_ext_irq(struct pt_regs *regs);
void do_restart(void *arg);
void __init startup_init(void);
void die(struct pt_regs *regs, const char *str);
int setup_profiling_timer(unsigned int multiplier);
-void __init time_init(void);
unsigned long prepare_ftrace_return(unsigned long parent, unsigned long sp, unsigned long ip);
struct s390_mmap_arg_struct;
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index f44f70de9661..85a00d97a314 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -176,6 +176,8 @@ static bool reipl_fcp_clear;
static bool reipl_ccw_clear;
static bool reipl_eckd_clear;
+static unsigned long os_info_flags;
+
static inline int __diag308(unsigned long subcode, unsigned long addr)
{
union register_pair r1;
@@ -1938,6 +1940,20 @@ static void dump_reipl_run(struct shutdown_trigger *trigger)
struct lowcore *abs_lc;
unsigned int csum;
+ /*
+ * Set REIPL_CLEAR flag in os_info flags entry indicating
+ * 'clear' sysfs attribute has been set on the panicked system
+ * for specified reipl type.
+ * Always set for IPL_TYPE_NSS and IPL_TYPE_UNKNOWN.
+ */
+ if ((reipl_type == IPL_TYPE_CCW && reipl_ccw_clear) ||
+ (reipl_type == IPL_TYPE_ECKD && reipl_eckd_clear) ||
+ (reipl_type == IPL_TYPE_FCP && reipl_fcp_clear) ||
+ (reipl_type == IPL_TYPE_NVME && reipl_nvme_clear) ||
+ reipl_type == IPL_TYPE_NSS ||
+ reipl_type == IPL_TYPE_UNKNOWN)
+ os_info_flags |= OS_INFO_FLAG_REIPL_CLEAR;
+ os_info_entry_add(OS_INFO_FLAGS_ENTRY, &os_info_flags, sizeof(os_info_flags));
csum = (__force unsigned int)
csum_partial(reipl_block_actual, reipl_block_actual->hdr.len, 0);
abs_lc = get_abs_lowcore();
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index f1b35dcdf3eb..42215f9404af 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -352,7 +352,8 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
rc = apply_rela_bits(loc, val, 0, 64, 0, write);
else if (r_type == R_390_GOTENT ||
r_type == R_390_GOTPLTENT) {
- val += (Elf_Addr) me->mem[MOD_TEXT].base - loc;
+ val += (Elf_Addr)me->mem[MOD_TEXT].base +
+ me->arch.got_offset - loc;
rc = apply_rela_bits(loc, val, 1, 32, 1, write);
}
break;
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index cf1b6e8a708d..90679143534b 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -76,6 +76,7 @@ static inline int ctr_stcctm(enum cpumf_ctr_set set, u64 range, u64 *dest)
}
struct cpu_cf_events {
+ refcount_t refcnt; /* Reference count */
atomic_t ctr_set[CPUMF_CTR_SET_MAX];
u64 state; /* For perf_event_open SVC */
u64 dev_state; /* For /dev/hwctr */
@@ -88,9 +89,6 @@ struct cpu_cf_events {
unsigned int sets; /* # Counter set saved in memory */
};
-/* Per-CPU event structure for the counter facility */
-static DEFINE_PER_CPU(struct cpu_cf_events, cpu_cf_events);
-
static unsigned int cfdiag_cpu_speed; /* CPU speed for CF_DIAG trailer */
static debug_info_t *cf_dbg;
@@ -103,6 +101,221 @@ static debug_info_t *cf_dbg;
*/
static struct cpumf_ctr_info cpumf_ctr_info;
+struct cpu_cf_ptr {
+ struct cpu_cf_events *cpucf;
+};
+
+static struct cpu_cf_root { /* Anchor to per CPU data */
+ refcount_t refcnt; /* Overall active events */
+ struct cpu_cf_ptr __percpu *cfptr;
+} cpu_cf_root;
+
+/*
+ * Serialize event initialization and event removal. Both are called from
+ * user space in task context with perf_event_open() and close()
+ * system calls.
+ *
+ * This mutex serializes functions cpum_cf_alloc_cpu() called at event
+ * initialization via cpumf_pmu_event_init() and function cpum_cf_free_cpu()
+ * called at event removal via call back function hw_perf_event_destroy()
+ * when the event is deleted. They are serialized to enforce correct
+ * bookkeeping of pointer and reference counts anchored by
+ * struct cpu_cf_root and the access to cpu_cf_root::refcnt and the
+ * per CPU pointers stored in cpu_cf_root::cfptr.
+ */
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+/*
+ * Get pointer to per-cpu structure.
+ *
+ * Function get_cpu_cfhw() is called from
+ * - cfset_copy_all(): This function is protected by cpus_read_lock(), so
+ * CPU hot plug remove can not happen. Event removal requires a close()
+ * first.
+ *
+ * Function this_cpu_cfhw() is called from perf common code functions:
+ * - pmu_{en|dis}able(), pmu_{add|del}()and pmu_{start|stop}():
+ * All functions execute with interrupts disabled on that particular CPU.
+ * - cfset_ioctl_{on|off}, cfset_cpu_read(): see comment cfset_copy_all().
+ *
+ * Therefore it is safe to access the CPU specific pointer to the event.
+ */
+static struct cpu_cf_events *get_cpu_cfhw(int cpu)
+{
+ struct cpu_cf_ptr __percpu *p = cpu_cf_root.cfptr;
+
+ if (p) {
+ struct cpu_cf_ptr *q = per_cpu_ptr(p, cpu);
+
+ return q->cpucf;
+ }
+ return NULL;
+}
+
+static struct cpu_cf_events *this_cpu_cfhw(void)
+{
+ return get_cpu_cfhw(smp_processor_id());
+}
+
+/* Disable counter sets on dedicated CPU */
+static void cpum_cf_reset_cpu(void *flags)
+{
+ lcctl(0);
+}
+
+/* Free per CPU data when the last event is removed. */
+static void cpum_cf_free_root(void)
+{
+ if (!refcount_dec_and_test(&cpu_cf_root.refcnt))
+ return;
+ free_percpu(cpu_cf_root.cfptr);
+ cpu_cf_root.cfptr = NULL;
+ irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+ on_each_cpu(cpum_cf_reset_cpu, NULL, 1);
+ debug_sprintf_event(cf_dbg, 4, "%s2 root.refcnt %u cfptr %px\n",
+ __func__, refcount_read(&cpu_cf_root.refcnt),
+ cpu_cf_root.cfptr);
+}
+
+/*
+ * On initialization of first event also allocate per CPU data dynamically.
+ * Start with an array of pointers, the array size is the maximum number of
+ * CPUs possible, which might be larger than the number of CPUs currently
+ * online.
+ */
+static int cpum_cf_alloc_root(void)
+{
+ int rc = 0;
+
+ if (refcount_inc_not_zero(&cpu_cf_root.refcnt))
+ return rc;
+
+ /* The memory is already zeroed. */
+ cpu_cf_root.cfptr = alloc_percpu(struct cpu_cf_ptr);
+ if (cpu_cf_root.cfptr) {
+ refcount_set(&cpu_cf_root.refcnt, 1);
+ on_each_cpu(cpum_cf_reset_cpu, NULL, 1);
+ irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+ } else {
+ rc = -ENOMEM;
+ }
+
+ return rc;
+}
+
+/* Free CPU counter data structure for a PMU */
+static void cpum_cf_free_cpu(int cpu)
+{
+ struct cpu_cf_events *cpuhw;
+ struct cpu_cf_ptr *p;
+
+ mutex_lock(&pmc_reserve_mutex);
+ /*
+ * When invoked via CPU hotplug handler, there might be no events
+ * installed or that particular CPU might not have an
+ * event installed. This anchor pointer can be NULL!
+ */
+ if (!cpu_cf_root.cfptr)
+ goto out;
+ p = per_cpu_ptr(cpu_cf_root.cfptr, cpu);
+ cpuhw = p->cpucf;
+ /*
+ * Might be zero when called from CPU hotplug handler and no event
+ * installed on that CPU, but on different CPUs.
+ */
+ if (!cpuhw)
+ goto out;
+
+ if (refcount_dec_and_test(&cpuhw->refcnt)) {
+ kfree(cpuhw);
+ p->cpucf = NULL;
+ }
+ cpum_cf_free_root();
+out:
+ mutex_unlock(&pmc_reserve_mutex);
+}
+
+/* Allocate CPU counter data structure for a PMU. Called under mutex lock. */
+static int cpum_cf_alloc_cpu(int cpu)
+{
+ struct cpu_cf_events *cpuhw;
+ struct cpu_cf_ptr *p;
+ int rc;
+
+ mutex_lock(&pmc_reserve_mutex);
+ rc = cpum_cf_alloc_root();
+ if (rc)
+ goto unlock;
+ p = per_cpu_ptr(cpu_cf_root.cfptr, cpu);
+ cpuhw = p->cpucf;
+
+ if (!cpuhw) {
+ cpuhw = kzalloc(sizeof(*cpuhw), GFP_KERNEL);
+ if (cpuhw) {
+ p->cpucf = cpuhw;
+ refcount_set(&cpuhw->refcnt, 1);
+ } else {
+ rc = -ENOMEM;
+ }
+ } else {
+ refcount_inc(&cpuhw->refcnt);
+ }
+ if (rc) {
+ /*
+ * Error in allocation of event, decrement anchor. Since
+ * cpu_cf_event in not created, its destroy() function is not
+ * invoked. Adjust the reference counter for the anchor.
+ */
+ cpum_cf_free_root();
+ }
+unlock:
+ mutex_unlock(&pmc_reserve_mutex);
+ return rc;
+}
+
+/*
+ * Create/delete per CPU data structures for /dev/hwctr interface and events
+ * created by perf_event_open().
+ * If cpu is -1, track task on all available CPUs. This requires
+ * allocation of hardware data structures for all CPUs. This setup handles
+ * perf_event_open() with task context and /dev/hwctr interface.
+ * If cpu is non-zero install event on this CPU only. This setup handles
+ * perf_event_open() with CPU context.
+ */
+static int cpum_cf_alloc(int cpu)
+{
+ cpumask_var_t mask;
+ int rc;
+
+ if (cpu == -1) {
+ if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
+ return -ENOMEM;
+ for_each_online_cpu(cpu) {
+ rc = cpum_cf_alloc_cpu(cpu);
+ if (rc) {
+ for_each_cpu(cpu, mask)
+ cpum_cf_free_cpu(cpu);
+ break;
+ }
+ cpumask_set_cpu(cpu, mask);
+ }
+ free_cpumask_var(mask);
+ } else {
+ rc = cpum_cf_alloc_cpu(cpu);
+ }
+ return rc;
+}
+
+static void cpum_cf_free(int cpu)
+{
+ if (cpu == -1) {
+ for_each_online_cpu(cpu)
+ cpum_cf_free_cpu(cpu);
+ } else {
+ cpum_cf_free_cpu(cpu);
+ }
+}
+
#define CF_DIAG_CTRSET_DEF 0xfeef /* Counter set header mark */
/* interval in seconds */
@@ -451,10 +664,10 @@ static int validate_ctr_version(const u64 config, enum cpumf_ctr_set set)
*/
static void cpumf_pmu_enable(struct pmu *pmu)
{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
int err;
- if (cpuhw->flags & PMU_F_ENABLED)
+ if (!cpuhw || (cpuhw->flags & PMU_F_ENABLED))
return;
err = lcctl(cpuhw->state | cpuhw->dev_state);
@@ -471,11 +684,11 @@ static void cpumf_pmu_enable(struct pmu *pmu)
*/
static void cpumf_pmu_disable(struct pmu *pmu)
{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
- int err;
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
u64 inactive;
+ int err;
- if (!(cpuhw->flags & PMU_F_ENABLED))
+ if (!cpuhw || !(cpuhw->flags & PMU_F_ENABLED))
return;
inactive = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1);
@@ -487,58 +700,10 @@ static void cpumf_pmu_disable(struct pmu *pmu)
cpuhw->flags &= ~PMU_F_ENABLED;
}
-#define PMC_INIT 0UL
-#define PMC_RELEASE 1UL
-
-static void cpum_cf_setup_cpu(void *flags)
-{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
-
- switch ((unsigned long)flags) {
- case PMC_INIT:
- cpuhw->flags |= PMU_F_RESERVED;
- break;
-
- case PMC_RELEASE:
- cpuhw->flags &= ~PMU_F_RESERVED;
- break;
- }
-
- /* Disable CPU counter sets */
- lcctl(0);
- debug_sprintf_event(cf_dbg, 5, "%s flags %#x flags %#x state %#llx\n",
- __func__, *(int *)flags, cpuhw->flags,
- cpuhw->state);
-}
-
-/* Initialize the CPU-measurement counter facility */
-static int __kernel_cpumcf_begin(void)
-{
- on_each_cpu(cpum_cf_setup_cpu, (void *)PMC_INIT, 1);
- irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
-
- return 0;
-}
-
-/* Release the CPU-measurement counter facility */
-static void __kernel_cpumcf_end(void)
-{
- on_each_cpu(cpum_cf_setup_cpu, (void *)PMC_RELEASE, 1);
- irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
-}
-
-/* Number of perf events counting hardware events */
-static atomic_t num_events = ATOMIC_INIT(0);
-/* Used to avoid races in calling reserve/release_cpumf_hardware */
-static DEFINE_MUTEX(pmc_reserve_mutex);
-
/* Release the PMU if event is the last perf event */
static void hw_perf_event_destroy(struct perf_event *event)
{
- mutex_lock(&pmc_reserve_mutex);
- if (atomic_dec_return(&num_events) == 0)
- __kernel_cpumcf_end();
- mutex_unlock(&pmc_reserve_mutex);
+ cpum_cf_free(event->cpu);
}
/* CPUMF <-> perf event mappings for kernel+userspace (basic set) */
@@ -562,14 +727,6 @@ static const int cpumf_generic_events_user[] = {
[PERF_COUNT_HW_BUS_CYCLES] = -1,
};
-static void cpumf_hw_inuse(void)
-{
- mutex_lock(&pmc_reserve_mutex);
- if (atomic_inc_return(&num_events) == 1)
- __kernel_cpumcf_begin();
- mutex_unlock(&pmc_reserve_mutex);
-}
-
static int is_userspace_event(u64 ev)
{
return cpumf_generic_events_user[PERF_COUNT_HW_CPU_CYCLES] == ev ||
@@ -653,7 +810,8 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type)
}
/* Initialize for using the CPU-measurement counter facility */
- cpumf_hw_inuse();
+ if (cpum_cf_alloc(event->cpu))
+ return -ENOMEM;
event->destroy = hw_perf_event_destroy;
/*
@@ -756,7 +914,7 @@ static void cpumf_pmu_read(struct perf_event *event)
static void cpumf_pmu_start(struct perf_event *event, int flags)
{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
struct hw_perf_event *hwc = &event->hw;
int i;
@@ -830,7 +988,7 @@ static int cfdiag_push_sample(struct perf_event *event,
static void cpumf_pmu_stop(struct perf_event *event, int flags)
{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
struct hw_perf_event *hwc = &event->hw;
int i;
@@ -857,8 +1015,7 @@ static void cpumf_pmu_stop(struct perf_event *event, int flags)
false);
if (cfdiag_diffctr(cpuhw, event->hw.config_base))
cfdiag_push_sample(event, cpuhw);
- } else if (cpuhw->flags & PMU_F_RESERVED) {
- /* Only update when PMU not hotplugged off */
+ } else {
hw_perf_event_update(event);
}
hwc->state |= PERF_HES_UPTODATE;
@@ -867,7 +1024,7 @@ static void cpumf_pmu_stop(struct perf_event *event, int flags)
static int cpumf_pmu_add(struct perf_event *event, int flags)
{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
ctr_set_enable(&cpuhw->state, event->hw.config_base);
event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
@@ -880,7 +1037,7 @@ static int cpumf_pmu_add(struct perf_event *event, int flags)
static void cpumf_pmu_del(struct perf_event *event, int flags)
{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
int i;
cpumf_pmu_stop(event, PERF_EF_UPDATE);
@@ -912,29 +1069,83 @@ static struct pmu cpumf_pmu = {
.read = cpumf_pmu_read,
};
-static int cpum_cf_setup(unsigned int cpu, unsigned long flags)
-{
- local_irq_disable();
- cpum_cf_setup_cpu((void *)flags);
- local_irq_enable();
- return 0;
-}
+static struct cfset_session { /* CPUs and counter set bit mask */
+ struct list_head head; /* Head of list of active processes */
+} cfset_session = {
+ .head = LIST_HEAD_INIT(cfset_session.head)
+};
+
+static refcount_t cfset_opencnt = REFCOUNT_INIT(0); /* Access count */
+/*
+ * Synchronize access to device /dev/hwc. This mutex protects against
+ * concurrent access to functions cfset_open() and cfset_release().
+ * Same for CPU hotplug add and remove events triggering
+ * cpum_cf_online_cpu() and cpum_cf_offline_cpu().
+ * It also serializes concurrent device ioctl access from multiple
+ * processes accessing /dev/hwc.
+ *
+ * The mutex protects concurrent access to the /dev/hwctr session management
+ * struct cfset_session and reference counting variable cfset_opencnt.
+ */
+static DEFINE_MUTEX(cfset_ctrset_mutex);
+/*
+ * CPU hotplug handles only /dev/hwctr device.
+ * For perf_event_open() the CPU hotplug handling is done on kernel common
+ * code:
+ * - CPU add: Nothing is done since a file descriptor can not be created
+ * and returned to the user.
+ * - CPU delete: Handled by common code via pmu_disable(), pmu_stop() and
+ * pmu_delete(). The event itself is removed when the file descriptor is
+ * closed.
+ */
static int cfset_online_cpu(unsigned int cpu);
+
static int cpum_cf_online_cpu(unsigned int cpu)
{
- debug_sprintf_event(cf_dbg, 4, "%s cpu %d in_irq %ld\n", __func__,
- cpu, in_interrupt());
- cpum_cf_setup(cpu, PMC_INIT);
- return cfset_online_cpu(cpu);
+ int rc = 0;
+
+ debug_sprintf_event(cf_dbg, 4, "%s cpu %d root.refcnt %d "
+ "opencnt %d\n", __func__, cpu,
+ refcount_read(&cpu_cf_root.refcnt),
+ refcount_read(&cfset_opencnt));
+ /*
+ * Ignore notification for perf_event_open().
+ * Handle only /dev/hwctr device sessions.
+ */
+ mutex_lock(&cfset_ctrset_mutex);
+ if (refcount_read(&cfset_opencnt)) {
+ rc = cpum_cf_alloc_cpu(cpu);
+ if (!rc)
+ cfset_online_cpu(cpu);
+ }
+ mutex_unlock(&cfset_ctrset_mutex);
+ return rc;
}
static int cfset_offline_cpu(unsigned int cpu);
+
static int cpum_cf_offline_cpu(unsigned int cpu)
{
- debug_sprintf_event(cf_dbg, 4, "%s cpu %d\n", __func__, cpu);
- cfset_offline_cpu(cpu);
- return cpum_cf_setup(cpu, PMC_RELEASE);
+ debug_sprintf_event(cf_dbg, 4, "%s cpu %d root.refcnt %d opencnt %d\n",
+ __func__, cpu, refcount_read(&cpu_cf_root.refcnt),
+ refcount_read(&cfset_opencnt));
+ /*
+ * During task exit processing of grouped perf events triggered by CPU
+ * hotplug processing, pmu_disable() is called as part of perf context
+ * removal process. Therefore do not trigger event removal now for
+ * perf_event_open() created events. Perf common code triggers event
+ * destruction when the event file descriptor is closed.
+ *
+ * Handle only /dev/hwctr device sessions.
+ */
+ mutex_lock(&cfset_ctrset_mutex);
+ if (refcount_read(&cfset_opencnt)) {
+ cfset_offline_cpu(cpu);
+ cpum_cf_free_cpu(cpu);
+ }
+ mutex_unlock(&cfset_ctrset_mutex);
+ return 0;
}
/* Return true if store counter set multiple instruction is available */
@@ -953,13 +1164,13 @@ static void cpumf_measurement_alert(struct ext_code ext_code,
return;
inc_irq_stat(IRQEXT_CMC);
- cpuhw = this_cpu_ptr(&cpu_cf_events);
/*
* Measurement alerts are shared and might happen when the PMU
* is not reserved. Ignore these alerts in this case.
*/
- if (!(cpuhw->flags & PMU_F_RESERVED))
+ cpuhw = this_cpu_cfhw();
+ if (!cpuhw)
return;
/* counter authorization change alert */
@@ -1039,19 +1250,11 @@ out1:
* counter set via normal file operations.
*/
-static atomic_t cfset_opencnt = ATOMIC_INIT(0); /* Access count */
-static DEFINE_MUTEX(cfset_ctrset_mutex);/* Synchronize access to hardware */
struct cfset_call_on_cpu_parm { /* Parm struct for smp_call_on_cpu */
unsigned int sets; /* Counter set bit mask */
atomic_t cpus_ack; /* # CPUs successfully executed func */
};
-static struct cfset_session { /* CPUs and counter set bit mask */
- struct list_head head; /* Head of list of active processes */
-} cfset_session = {
- .head = LIST_HEAD_INIT(cfset_session.head)
-};
-
struct cfset_request { /* CPUs and counter set bit mask */
unsigned long ctrset; /* Bit mask of counter set to read */
cpumask_t mask; /* CPU mask to read from */
@@ -1113,11 +1316,11 @@ static void cfset_session_add(struct cfset_request *p)
/* Stop all counter sets via ioctl interface */
static void cfset_ioctl_off(void *parm)
{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
struct cfset_call_on_cpu_parm *p = parm;
int rc;
- /* Check if any counter set used by /dev/hwc */
+ /* Check if any counter set used by /dev/hwctr */
for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc)
if ((p->sets & cpumf_ctr_ctl[rc])) {
if (!atomic_dec_return(&cpuhw->ctr_set[rc])) {
@@ -1141,7 +1344,7 @@ static void cfset_ioctl_off(void *parm)
/* Start counter sets on particular CPU */
static void cfset_ioctl_on(void *parm)
{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
struct cfset_call_on_cpu_parm *p = parm;
int rc;
@@ -1163,7 +1366,7 @@ static void cfset_ioctl_on(void *parm)
static void cfset_release_cpu(void *p)
{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
int rc;
debug_sprintf_event(cf_dbg, 4, "%s state %#llx dev_state %#llx\n",
@@ -1203,27 +1406,41 @@ static int cfset_release(struct inode *inode, struct file *file)
kfree(file->private_data);
file->private_data = NULL;
}
- if (!atomic_dec_return(&cfset_opencnt))
+ if (refcount_dec_and_test(&cfset_opencnt)) { /* Last close */
on_each_cpu(cfset_release_cpu, NULL, 1);
+ cpum_cf_free(-1);
+ }
mutex_unlock(&cfset_ctrset_mutex);
-
- hw_perf_event_destroy(NULL);
return 0;
}
+/*
+ * Open via /dev/hwctr device. Allocate all per CPU resources on the first
+ * open of the device. The last close releases all per CPU resources.
+ * Parallel perf_event_open system calls also use per CPU resources.
+ * These invocations are handled via reference counting on the per CPU data
+ * structures.
+ */
static int cfset_open(struct inode *inode, struct file *file)
{
- if (!capable(CAP_SYS_ADMIN))
+ int rc = 0;
+
+ if (!perfmon_capable())
return -EPERM;
+ file->private_data = NULL;
+
mutex_lock(&cfset_ctrset_mutex);
- if (atomic_inc_return(&cfset_opencnt) == 1)
- cfset_session_init();
+ if (!refcount_inc_not_zero(&cfset_opencnt)) { /* First open */
+ rc = cpum_cf_alloc(-1);
+ if (!rc) {
+ cfset_session_init();
+ refcount_set(&cfset_opencnt, 1);
+ }
+ }
mutex_unlock(&cfset_ctrset_mutex);
- cpumf_hw_inuse();
- file->private_data = NULL;
/* nonseekable_open() never fails */
- return nonseekable_open(inode, file);
+ return rc ?: nonseekable_open(inode, file);
}
static int cfset_all_start(struct cfset_request *req)
@@ -1280,7 +1497,7 @@ static int cfset_all_copy(unsigned long arg, cpumask_t *mask)
ctrset_read = (struct s390_ctrset_read __user *)arg;
uptr = ctrset_read->data;
for_each_cpu(cpu, mask) {
- struct cpu_cf_events *cpuhw = per_cpu_ptr(&cpu_cf_events, cpu);
+ struct cpu_cf_events *cpuhw = get_cpu_cfhw(cpu);
struct s390_ctrset_cpudata __user *ctrset_cpudata;
ctrset_cpudata = uptr;
@@ -1324,7 +1541,7 @@ static size_t cfset_cpuset_read(struct s390_ctrset_setdata *p, int ctrset,
/* Read all counter sets. */
static void cfset_cpu_read(void *parm)
{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ struct cpu_cf_events *cpuhw = this_cpu_cfhw();
struct cfset_call_on_cpu_parm *p = parm;
int set, set_size;
size_t space;
@@ -1348,9 +1565,9 @@ static void cfset_cpu_read(void *parm)
cpuhw->used += space;
cpuhw->sets += 1;
}
+ debug_sprintf_event(cf_dbg, 4, "%s sets %d used %zd\n", __func__,
+ cpuhw->sets, cpuhw->used);
}
- debug_sprintf_event(cf_dbg, 4, "%s sets %d used %zd\n", __func__,
- cpuhw->sets, cpuhw->used);
}
static int cfset_all_read(unsigned long arg, struct cfset_request *req)
@@ -1502,6 +1719,7 @@ static struct miscdevice cfset_dev = {
.name = S390_HWCTR_DEVICE,
.minor = MISC_DYNAMIC_MINOR,
.fops = &cfset_fops,
+ .mode = 0666,
};
/* Hotplug add of a CPU. Scan through all active processes and add
@@ -1512,7 +1730,6 @@ static int cfset_online_cpu(unsigned int cpu)
struct cfset_call_on_cpu_parm p;
struct cfset_request *rp;
- mutex_lock(&cfset_ctrset_mutex);
if (!list_empty(&cfset_session.head)) {
list_for_each_entry(rp, &cfset_session.head, node) {
p.sets = rp->ctrset;
@@ -1520,19 +1737,18 @@ static int cfset_online_cpu(unsigned int cpu)
cpumask_set_cpu(cpu, &rp->mask);
}
}
- mutex_unlock(&cfset_ctrset_mutex);
return 0;
}
/* Hotplug remove of a CPU. Scan through all active processes and clear
* that CPU from the list of CPUs supplied with ioctl(..., START, ...).
+ * Adjust reference counts.
*/
static int cfset_offline_cpu(unsigned int cpu)
{
struct cfset_call_on_cpu_parm p;
struct cfset_request *rp;
- mutex_lock(&cfset_ctrset_mutex);
if (!list_empty(&cfset_session.head)) {
list_for_each_entry(rp, &cfset_session.head, node) {
p.sets = rp->ctrset;
@@ -1540,7 +1756,6 @@ static int cfset_offline_cpu(unsigned int cpu)
cpumask_clear_cpu(cpu, &rp->mask);
}
}
- mutex_unlock(&cfset_ctrset_mutex);
return 0;
}
@@ -1618,7 +1833,8 @@ static int cfdiag_event_init(struct perf_event *event)
}
/* Initialize for using the CPU-measurement counter facility */
- cpumf_hw_inuse();
+ if (cpum_cf_alloc(event->cpu))
+ return -ENOMEM;
event->destroy = hw_perf_event_destroy;
err = cfdiag_event_init2(event);
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 7ef72f5ff52e..8ecfbce4ac92 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -1271,16 +1271,6 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
}
}
-static inline __uint128_t __cdsg(__uint128_t *ptr, __uint128_t old, __uint128_t new)
-{
- asm volatile(
- " cdsg %[old],%[new],%[ptr]\n"
- : [old] "+d" (old), [ptr] "+QS" (*ptr)
- : [new] "d" (new)
- : "memory", "cc");
- return old;
-}
-
/* hw_perf_event_update() - Process sampling buffer
* @event: The perf event
* @flush_all: Flag to also flush partially filled sample-data-blocks
@@ -1352,7 +1342,7 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
new.f = 0;
new.a = 1;
new.overflow = 0;
- prev.val = __cdsg(&te->header.val, old.val, new.val);
+ prev.val = cmpxchg128(&te->header.val, old.val, new.val);
} while (prev.val != old.val);
/* Advance to next sample-data-block */
@@ -1562,7 +1552,7 @@ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index,
}
new.a = 1;
new.overflow = 0;
- prev.val = __cdsg(&te->header.val, old.val, new.val);
+ prev.val = cmpxchg128(&te->header.val, old.val, new.val);
} while (prev.val != old.val);
return true;
}
@@ -1636,7 +1626,7 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range,
new.a = 1;
else
new.a = 0;
- prev.val = __cdsg(&te->header.val, old.val, new.val);
+ prev.val = cmpxchg128(&te->header.val, old.val, new.val);
} while (prev.val != old.val);
*overflow += orig_overflow;
}
diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c
index a7b339c4fd7c..fe7d1774ded1 100644
--- a/arch/s390/kernel/perf_pai_crypto.c
+++ b/arch/s390/kernel/perf_pai_crypto.c
@@ -36,7 +36,7 @@ struct paicrypt_map {
unsigned long *page; /* Page for CPU to store counters */
struct pai_userdata *save; /* Page to store no-zero counters */
unsigned int active_events; /* # of PAI crypto users */
- unsigned int refcnt; /* Reference count mapped buffers */
+ refcount_t refcnt; /* Reference count mapped buffers */
enum paievt_mode mode; /* Type of event */
struct perf_event *event; /* Perf event for sampling */
};
@@ -57,10 +57,11 @@ static void paicrypt_event_destroy(struct perf_event *event)
static_branch_dec(&pai_key);
mutex_lock(&pai_reserve_mutex);
debug_sprintf_event(cfm_dbg, 5, "%s event %#llx cpu %d users %d"
- " mode %d refcnt %d\n", __func__,
+ " mode %d refcnt %u\n", __func__,
event->attr.config, event->cpu,
- cpump->active_events, cpump->mode, cpump->refcnt);
- if (!--cpump->refcnt) {
+ cpump->active_events, cpump->mode,
+ refcount_read(&cpump->refcnt));
+ if (refcount_dec_and_test(&cpump->refcnt)) {
debug_sprintf_event(cfm_dbg, 4, "%s page %#lx save %p\n",
__func__, (unsigned long)cpump->page,
cpump->save);
@@ -149,8 +150,10 @@ static int paicrypt_busy(struct perf_event_attr *a, struct paicrypt_map *cpump)
/* Allocate memory for counter page and counter extraction.
* Only the first counting event has to allocate a page.
*/
- if (cpump->page)
+ if (cpump->page) {
+ refcount_inc(&cpump->refcnt);
goto unlock;
+ }
rc = -ENOMEM;
cpump->page = (unsigned long *)get_zeroed_page(GFP_KERNEL);
@@ -164,18 +167,18 @@ static int paicrypt_busy(struct perf_event_attr *a, struct paicrypt_map *cpump)
goto unlock;
}
rc = 0;
+ refcount_set(&cpump->refcnt, 1);
unlock:
/* If rc is non-zero, do not set mode and reference count */
if (!rc) {
- cpump->refcnt++;
cpump->mode = a->sample_period ? PAI_MODE_SAMPLING
: PAI_MODE_COUNTING;
}
debug_sprintf_event(cfm_dbg, 5, "%s sample_period %#llx users %d"
- " mode %d refcnt %d page %#lx save %p rc %d\n",
+ " mode %d refcnt %u page %#lx save %p rc %d\n",
__func__, a->sample_period, cpump->active_events,
- cpump->mode, cpump->refcnt,
+ cpump->mode, refcount_read(&cpump->refcnt),
(unsigned long)cpump->page, cpump->save, rc);
mutex_unlock(&pai_reserve_mutex);
return rc;
diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c
index fcea307d7529..3b4f384f77f7 100644
--- a/arch/s390/kernel/perf_pai_ext.c
+++ b/arch/s390/kernel/perf_pai_ext.c
@@ -50,7 +50,7 @@ struct paiext_map {
struct pai_userdata *save; /* Area to store non-zero counters */
enum paievt_mode mode; /* Type of event */
unsigned int active_events; /* # of PAI Extension users */
- unsigned int refcnt;
+ refcount_t refcnt;
struct perf_event *event; /* Perf event for sampling */
struct paiext_cb *paiext_cb; /* PAI extension control block area */
};
@@ -60,14 +60,14 @@ struct paiext_mapptr {
};
static struct paiext_root { /* Anchor to per CPU data */
- int refcnt; /* Overall active events */
+ refcount_t refcnt; /* Overall active events */
struct paiext_mapptr __percpu *mapptr;
} paiext_root;
/* Free per CPU data when the last event is removed. */
static void paiext_root_free(void)
{
- if (!--paiext_root.refcnt) {
+ if (refcount_dec_and_test(&paiext_root.refcnt)) {
free_percpu(paiext_root.mapptr);
paiext_root.mapptr = NULL;
}
@@ -80,7 +80,7 @@ static void paiext_root_free(void)
*/
static int paiext_root_alloc(void)
{
- if (++paiext_root.refcnt == 1) {
+ if (!refcount_inc_not_zero(&paiext_root.refcnt)) {
/* The memory is already zeroed. */
paiext_root.mapptr = alloc_percpu(struct paiext_mapptr);
if (!paiext_root.mapptr) {
@@ -91,6 +91,7 @@ static int paiext_root_alloc(void)
*/
return -ENOMEM;
}
+ refcount_set(&paiext_root.refcnt, 1);
}
return 0;
}
@@ -122,7 +123,7 @@ static void paiext_event_destroy(struct perf_event *event)
mutex_lock(&paiext_reserve_mutex);
cpump->event = NULL;
- if (!--cpump->refcnt) /* Last reference gone */
+ if (refcount_dec_and_test(&cpump->refcnt)) /* Last reference gone */
paiext_free(mp);
paiext_root_free();
mutex_unlock(&paiext_reserve_mutex);
@@ -163,7 +164,7 @@ static int paiext_alloc(struct perf_event_attr *a, struct perf_event *event)
rc = -ENOMEM;
cpump = kzalloc(sizeof(*cpump), GFP_KERNEL);
if (!cpump)
- goto unlock;
+ goto undo;
/* Allocate memory for counter area and counter extraction.
* These are
@@ -183,8 +184,9 @@ static int paiext_alloc(struct perf_event_attr *a, struct perf_event *event)
GFP_KERNEL);
if (!cpump->save || !cpump->area || !cpump->paiext_cb) {
paiext_free(mp);
- goto unlock;
+ goto undo;
}
+ refcount_set(&cpump->refcnt, 1);
cpump->mode = a->sample_period ? PAI_MODE_SAMPLING
: PAI_MODE_COUNTING;
} else {
@@ -195,15 +197,15 @@ static int paiext_alloc(struct perf_event_attr *a, struct perf_event *event)
if (cpump->mode == PAI_MODE_SAMPLING ||
(cpump->mode == PAI_MODE_COUNTING && a->sample_period)) {
rc = -EBUSY;
- goto unlock;
+ goto undo;
}
+ refcount_inc(&cpump->refcnt);
}
rc = 0;
cpump->event = event;
- ++cpump->refcnt;
-unlock:
+undo:
if (rc) {
/* Error in allocation of event, decrement anchor. Since
* the event in not created, its destroy() function is never
@@ -211,6 +213,7 @@ unlock:
*/
paiext_root_free();
}
+unlock:
mutex_unlock(&paiext_reserve_mutex);
/* If rc is non-zero, no increment of counter/sampler was done. */
return rc;
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index b68f47541169..a6935af2235c 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -453,3 +453,4 @@
448 common process_mrelease sys_process_mrelease sys_process_mrelease
449 common futex_waitv sys_futex_waitv sys_futex_waitv
450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node
+451 common cachestat sys_cachestat sys_cachestat
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 6b7b6d5e3632..276278199c44 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -102,6 +102,11 @@ void __init time_early_init(void)
((long) qui.old_leap * 4096000000L);
}
+unsigned long long noinstr sched_clock_noinstr(void)
+{
+ return tod_to_ns(__get_tod_clock_monotonic());
+}
+
/*
* Scheduler clock - returns current time in nanosec units.
*/
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
index cb2ee06df286..3c62d1b218b1 100644
--- a/arch/s390/kernel/uv.c
+++ b/arch/s390/kernel/uv.c
@@ -294,6 +294,8 @@ again:
rc = -ENXIO;
ptep = get_locked_pte(gmap->mm, uaddr, &ptelock);
+ if (!ptep)
+ goto out;
if (pte_present(*ptep) && !(pte_val(*ptep) & _PAGE_INVALID) && pte_write(*ptep)) {
page = pte_page(*ptep);
rc = -EAGAIN;
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index da6dac36e959..9bd0a873f3b1 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -2777,7 +2777,7 @@ static struct page *get_map_page(struct kvm *kvm, u64 uaddr)
mmap_read_lock(kvm->mm);
get_user_pages_remote(kvm->mm, uaddr, 1, FOLL_WRITE,
- &page, NULL, NULL);
+ &page, NULL);
mmap_read_unlock(kvm->mm);
return page;
}
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index 580d2e3265cb..7c50eca85ca4 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -3,7 +3,7 @@
# Makefile for s390-specific library files..
#
-lib-y += delay.o string.o uaccess.o find.o spinlock.o
+lib-y += delay.o string.o uaccess.o find.o spinlock.o tishift.o
obj-y += mem.o xor.o
lib-$(CONFIG_KPROBES) += probes.o
lib-$(CONFIG_UPROBES) += probes.o
diff --git a/arch/s390/lib/tishift.S b/arch/s390/lib/tishift.S
new file mode 100644
index 000000000000..de33cf02cfd2
--- /dev/null
+++ b/arch/s390/lib/tishift.S
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/linkage.h>
+#include <asm/nospec-insn.h>
+#include <asm/export.h>
+
+ .section .noinstr.text, "ax"
+
+ GEN_BR_THUNK %r14
+
+SYM_FUNC_START(__ashlti3)
+ lmg %r0,%r1,0(%r3)
+ cije %r4,0,1f
+ lhi %r3,64
+ sr %r3,%r4
+ jnh 0f
+ srlg %r3,%r1,0(%r3)
+ sllg %r0,%r0,0(%r4)
+ sllg %r1,%r1,0(%r4)
+ ogr %r0,%r3
+ j 1f
+0: sllg %r0,%r1,-64(%r4)
+ lghi %r1,0
+1: stmg %r0,%r1,0(%r2)
+ BR_EX %r14
+SYM_FUNC_END(__ashlti3)
+EXPORT_SYMBOL(__ashlti3)
+
+SYM_FUNC_START(__ashrti3)
+ lmg %r0,%r1,0(%r3)
+ cije %r4,0,1f
+ lhi %r3,64
+ sr %r3,%r4
+ jnh 0f
+ sllg %r3,%r0,0(%r3)
+ srlg %r1,%r1,0(%r4)
+ srag %r0,%r0,0(%r4)
+ ogr %r1,%r3
+ j 1f
+0: srag %r1,%r0,-64(%r4)
+ srag %r0,%r0,63
+1: stmg %r0,%r1,0(%r2)
+ BR_EX %r14
+SYM_FUNC_END(__ashrti3)
+EXPORT_SYMBOL(__ashrti3)
+
+SYM_FUNC_START(__lshrti3)
+ lmg %r0,%r1,0(%r3)
+ cije %r4,0,1f
+ lhi %r3,64
+ sr %r3,%r4
+ jnh 0f
+ sllg %r3,%r0,0(%r3)
+ srlg %r1,%r1,0(%r4)
+ srlg %r0,%r0,0(%r4)
+ ogr %r1,%r3
+ j 1f
+0: srlg %r1,%r0,-64(%r4)
+ lghi %r0,0
+1: stmg %r0,%r1,0(%r2)
+ BR_EX %r14
+SYM_FUNC_END(__lshrti3)
+EXPORT_SYMBOL(__lshrti3)
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index dc90d1eb0d55..f4b6fc746fce 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -895,12 +895,12 @@ static int gmap_pte_op_fixup(struct gmap *gmap, unsigned long gaddr,
/**
* gmap_pte_op_end - release the page table lock
- * @ptl: pointer to the spinlock pointer
+ * @ptep: pointer to the locked pte
+ * @ptl: pointer to the page table spinlock
*/
-static void gmap_pte_op_end(spinlock_t *ptl)
+static void gmap_pte_op_end(pte_t *ptep, spinlock_t *ptl)
{
- if (ptl)
- spin_unlock(ptl);
+ pte_unmap_unlock(ptep, ptl);
}
/**
@@ -1011,7 +1011,7 @@ static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr,
{
int rc;
pte_t *ptep;
- spinlock_t *ptl = NULL;
+ spinlock_t *ptl;
unsigned long pbits = 0;
if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)
@@ -1025,7 +1025,7 @@ static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr,
pbits |= (bits & GMAP_NOTIFY_SHADOW) ? PGSTE_VSIE_BIT : 0;
/* Protect and unlock. */
rc = ptep_force_prot(gmap->mm, gaddr, ptep, prot, pbits);
- gmap_pte_op_end(ptl);
+ gmap_pte_op_end(ptep, ptl);
return rc;
}
@@ -1154,7 +1154,7 @@ int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val)
/* Do *NOT* clear the _PAGE_INVALID bit! */
rc = 0;
}
- gmap_pte_op_end(ptl);
+ gmap_pte_op_end(ptep, ptl);
}
if (!rc)
break;
@@ -1248,7 +1248,7 @@ static int gmap_protect_rmap(struct gmap *sg, unsigned long raddr,
if (!rc)
gmap_insert_rmap(sg, vmaddr, rmap);
spin_unlock(&sg->guest_table_lock);
- gmap_pte_op_end(ptl);
+ gmap_pte_op_end(ptep, ptl);
}
radix_tree_preload_end();
if (rc) {
@@ -2156,7 +2156,7 @@ int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte)
tptep = (pte_t *) gmap_table_walk(sg, saddr, 0);
if (!tptep) {
spin_unlock(&sg->guest_table_lock);
- gmap_pte_op_end(ptl);
+ gmap_pte_op_end(sptep, ptl);
radix_tree_preload_end();
break;
}
@@ -2167,7 +2167,7 @@ int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte)
rmap = NULL;
rc = 0;
}
- gmap_pte_op_end(ptl);
+ gmap_pte_op_end(sptep, ptl);
spin_unlock(&sg->guest_table_lock);
}
radix_tree_preload_end();
@@ -2495,7 +2495,7 @@ void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long bitmap[4],
continue;
if (ptep_test_and_clear_uc(gmap->mm, vmaddr, ptep))
set_bit(i, bitmap);
- spin_unlock(ptl);
+ pte_unmap_unlock(ptep, ptl);
}
}
gmap_pmd_op_end(gmap, pmdp);
@@ -2537,7 +2537,12 @@ static inline void thp_split_mm(struct mm_struct *mm)
* Remove all empty zero pages from the mapping for lazy refaulting
* - This must be called after mm->context.has_pgste is set, to avoid
* future creation of zero pages
- * - This must be called after THP was enabled
+ * - This must be called after THP was disabled.
+ *
+ * mm contracts with s390, that even if mm were to remove a page table,
+ * racing with the loop below and so causing pte_offset_map_lock() to fail,
+ * it will never insert a page table containing empty zero pages once
+ * mm_forbids_zeropage(mm) i.e. mm->context.has_pgste is set.
*/
static int __zap_zero_pages(pmd_t *pmd, unsigned long start,
unsigned long end, struct mm_walk *walk)
@@ -2549,6 +2554,8 @@ static int __zap_zero_pages(pmd_t *pmd, unsigned long start,
spinlock_t *ptl;
ptep = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+ if (!ptep)
+ break;
if (is_zero_pfn(pte_pfn(*ptep)))
ptep_xchg_direct(walk->mm, addr, ptep, __pte(_PAGE_INVALID));
pte_unmap_unlock(ptep, ptl);
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index 5ba3bd8a7b12..ca5a418c58a8 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -4,6 +4,7 @@
* Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
*/
#include <linux/hugetlb.h>
+#include <linux/proc_fs.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
#include <asm/cacheflush.h>
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 6effb24de6d9..3bd2ab2a9a34 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -829,7 +829,7 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
default:
return -EFAULT;
}
-
+again:
ptl = pmd_lock(mm, pmdp);
if (!pmd_present(*pmdp)) {
spin_unlock(ptl);
@@ -850,6 +850,8 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
spin_unlock(ptl);
ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
+ if (!ptep)
+ goto again;
new = old = pgste_get_lock(ptep);
pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT |
PGSTE_ACC_BITS | PGSTE_FP_BIT);
@@ -938,7 +940,7 @@ int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr)
default:
return -EFAULT;
}
-
+again:
ptl = pmd_lock(mm, pmdp);
if (!pmd_present(*pmdp)) {
spin_unlock(ptl);
@@ -955,6 +957,8 @@ int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr)
spin_unlock(ptl);
ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
+ if (!ptep)
+ goto again;
new = old = pgste_get_lock(ptep);
/* Reset guest reference bit only */
pgste_val(new) &= ~PGSTE_GR_BIT;
@@ -1000,7 +1004,7 @@ int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
default:
return -EFAULT;
}
-
+again:
ptl = pmd_lock(mm, pmdp);
if (!pmd_present(*pmdp)) {
spin_unlock(ptl);
@@ -1017,6 +1021,8 @@ int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
spin_unlock(ptl);
ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
+ if (!ptep)
+ goto again;
pgste = pgste_get_lock(ptep);
*key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
paddr = pte_val(*ptep) & PAGE_MASK;
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 5b22c6e24528..b9dcb4ae6c59 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -667,7 +667,15 @@ static void __init memblock_region_swap(void *a, void *b, int size)
#ifdef CONFIG_KASAN
#define __sha(x) ((unsigned long)kasan_mem_to_shadow((void *)x))
+
+static inline int set_memory_kasan(unsigned long start, unsigned long end)
+{
+ start = PAGE_ALIGN_DOWN(__sha(start));
+ end = PAGE_ALIGN(__sha(end));
+ return set_memory_rwnx(start, (end - start) >> PAGE_SHIFT);
+}
#endif
+
/*
* map whole physical memory to virtual memory (identity mapping)
* we reserve enough space in the vmalloc area for vmemmap to hotplug
@@ -737,10 +745,8 @@ void __init vmem_map_init(void)
}
#ifdef CONFIG_KASAN
- for_each_mem_range(i, &base, &end) {
- set_memory_rwnx(__sha(base),
- (__sha(end) - __sha(base)) >> PAGE_SHIFT);
- }
+ for_each_mem_range(i, &base, &end)
+ set_memory_kasan(base, end);
#endif
set_memory_rox((unsigned long)_stext,
(unsigned long)(_etext - _stext) >> PAGE_SHIFT);
diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile
index cc8cf5abea15..4e930f566878 100644
--- a/arch/s390/purgatory/Makefile
+++ b/arch/s390/purgatory/Makefile
@@ -10,7 +10,7 @@ PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
$(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE
$(call if_changed_rule,cc_o_c)
-CFLAGS_sha256.o := -D__DISABLE_EXPORTS
+CFLAGS_sha256.o := -D__DISABLE_EXPORTS -D__NO_FORTIFY
$(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S FORCE
$(call if_changed_rule,as_o_S)
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 393023d09245..2b3ce4fd3956 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -6,6 +6,7 @@ config SUPERH
select ARCH_ENABLE_MEMORY_HOTREMOVE if SPARSEMEM && MMU
select ARCH_HAVE_NMI_SAFE_CMPXCHG if (GUSA_RB || CPU_SH4A)
select ARCH_HAS_BINFMT_FLAT if !MMU
+ select ARCH_HAS_CPU_FINALIZE_INIT
select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_GIGANTIC_PAGE
select ARCH_HAS_GCOV_PROFILE_ALL
diff --git a/arch/sh/drivers/dma/dma-api.c b/arch/sh/drivers/dma/dma-api.c
index ab9170494dcc..89cd4a3b4cca 100644
--- a/arch/sh/drivers/dma/dma-api.c
+++ b/arch/sh/drivers/dma/dma-api.c
@@ -198,7 +198,7 @@ int request_dma(unsigned int chan, const char *dev_id)
if (atomic_xchg(&channel->busy, 1))
return -EBUSY;
- strlcpy(channel->dev_id, dev_id, sizeof(channel->dev_id));
+ strscpy(channel->dev_id, dev_id, sizeof(channel->dev_id));
if (info->ops->request) {
result = info->ops->request(channel);
diff --git a/arch/sh/include/asm/atomic-grb.h b/arch/sh/include/asm/atomic-grb.h
index 059791fd394f..cf1c10f15528 100644
--- a/arch/sh/include/asm/atomic-grb.h
+++ b/arch/sh/include/asm/atomic-grb.h
@@ -71,6 +71,11 @@ static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \
ATOMIC_OPS(add)
ATOMIC_OPS(sub)
+#define arch_atomic_add_return arch_atomic_add_return
+#define arch_atomic_sub_return arch_atomic_sub_return
+#define arch_atomic_fetch_add arch_atomic_fetch_add
+#define arch_atomic_fetch_sub arch_atomic_fetch_sub
+
#undef ATOMIC_OPS
#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
@@ -78,6 +83,10 @@ ATOMIC_OPS(and)
ATOMIC_OPS(or)
ATOMIC_OPS(xor)
+#define arch_atomic_fetch_and arch_atomic_fetch_and
+#define arch_atomic_fetch_or arch_atomic_fetch_or
+#define arch_atomic_fetch_xor arch_atomic_fetch_xor
+
#undef ATOMIC_OPS
#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
diff --git a/arch/sh/include/asm/atomic-irq.h b/arch/sh/include/asm/atomic-irq.h
index 7665de9d00d0..b4090cc35493 100644
--- a/arch/sh/include/asm/atomic-irq.h
+++ b/arch/sh/include/asm/atomic-irq.h
@@ -55,6 +55,11 @@ static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \
ATOMIC_OPS(add, +=)
ATOMIC_OPS(sub, -=)
+#define arch_atomic_add_return arch_atomic_add_return
+#define arch_atomic_sub_return arch_atomic_sub_return
+#define arch_atomic_fetch_add arch_atomic_fetch_add
+#define arch_atomic_fetch_sub arch_atomic_fetch_sub
+
#undef ATOMIC_OPS
#define ATOMIC_OPS(op, c_op) \
ATOMIC_OP(op, c_op) \
@@ -64,6 +69,10 @@ ATOMIC_OPS(and, &=)
ATOMIC_OPS(or, |=)
ATOMIC_OPS(xor, ^=)
+#define arch_atomic_fetch_and arch_atomic_fetch_and
+#define arch_atomic_fetch_or arch_atomic_fetch_or
+#define arch_atomic_fetch_xor arch_atomic_fetch_xor
+
#undef ATOMIC_OPS
#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
diff --git a/arch/sh/include/asm/atomic-llsc.h b/arch/sh/include/asm/atomic-llsc.h
index b63dcfbfa14e..9ef1fb1dd12e 100644
--- a/arch/sh/include/asm/atomic-llsc.h
+++ b/arch/sh/include/asm/atomic-llsc.h
@@ -73,6 +73,11 @@ static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \
ATOMIC_OPS(add)
ATOMIC_OPS(sub)
+#define arch_atomic_add_return arch_atomic_add_return
+#define arch_atomic_sub_return arch_atomic_sub_return
+#define arch_atomic_fetch_add arch_atomic_fetch_add
+#define arch_atomic_fetch_sub arch_atomic_fetch_sub
+
#undef ATOMIC_OPS
#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
@@ -80,6 +85,10 @@ ATOMIC_OPS(and)
ATOMIC_OPS(or)
ATOMIC_OPS(xor)
+#define arch_atomic_fetch_and arch_atomic_fetch_and
+#define arch_atomic_fetch_or arch_atomic_fetch_or
+#define arch_atomic_fetch_xor arch_atomic_fetch_xor
+
#undef ATOMIC_OPS
#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h
index 528bfeda78f5..7a18cb2a1c1a 100644
--- a/arch/sh/include/asm/atomic.h
+++ b/arch/sh/include/asm/atomic.h
@@ -30,9 +30,6 @@
#include <asm/atomic-irq.h>
#endif
-#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new))
-#define arch_atomic_cmpxchg(v, o, n) (arch_cmpxchg(&((v)->counter), (o), (n)))
-
#endif /* CONFIG_CPU_J2 */
#endif /* __ASM_SH_ATOMIC_H */
diff --git a/arch/sh/include/asm/bugs.h b/arch/sh/include/asm/bugs.h
deleted file mode 100644
index fe52abb69cea..000000000000
--- a/arch/sh/include/asm/bugs.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __ASM_SH_BUGS_H
-#define __ASM_SH_BUGS_H
-
-/*
- * This is included by init/main.c to check for architecture-dependent bugs.
- *
- * Needs:
- * void check_bugs(void);
- */
-
-/*
- * I don't know of any Super-H bugs yet.
- */
-
-#include <asm/processor.h>
-
-extern void select_idle_routine(void);
-
-static void __init check_bugs(void)
-{
- extern unsigned long loops_per_jiffy;
- char *p = &init_utsname()->machine[2]; /* "sh" */
-
- select_idle_routine();
-
- current_cpu_data.loops_per_jiffy = loops_per_jiffy;
-
- switch (current_cpu_data.family) {
- case CPU_FAMILY_SH2:
- *p++ = '2';
- break;
- case CPU_FAMILY_SH2A:
- *p++ = '2';
- *p++ = 'a';
- break;
- case CPU_FAMILY_SH3:
- *p++ = '3';
- break;
- case CPU_FAMILY_SH4:
- *p++ = '4';
- break;
- case CPU_FAMILY_SH4A:
- *p++ = '4';
- *p++ = 'a';
- break;
- case CPU_FAMILY_SH4AL_DSP:
- *p++ = '4';
- *p++ = 'a';
- *p++ = 'l';
- *p++ = '-';
- *p++ = 'd';
- *p++ = 's';
- *p++ = 'p';
- break;
- case CPU_FAMILY_UNKNOWN:
- /*
- * Specifically use CPU_FAMILY_UNKNOWN rather than
- * default:, so we're able to have the compiler whine
- * about unhandled enumerations.
- */
- break;
- }
-
- printk("CPU: %s\n", get_cpu_subtype(&current_cpu_data));
-
-#ifndef __LITTLE_ENDIAN__
- /* 'eb' means 'Endian Big' */
- *p++ = 'e';
- *p++ = 'b';
-#endif
- *p = '\0';
-}
-#endif /* __ASM_SH_BUGS_H */
diff --git a/arch/sh/include/asm/cache.h b/arch/sh/include/asm/cache.h
index 32dfa6b82ec6..b38dbc975581 100644
--- a/arch/sh/include/asm/cache.h
+++ b/arch/sh/include/asm/cache.h
@@ -14,6 +14,12 @@
#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
+/*
+ * Some drivers need to perform DMA into kmalloc'ed buffers
+ * and so we have to increase the kmalloc minalign for this.
+ */
+#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
+
#define __read_mostly __section(".data..read_mostly")
#ifndef __ASSEMBLY__
diff --git a/arch/sh/include/asm/irq.h b/arch/sh/include/asm/irq.h
index 1c4923502fd4..0f384b1f45ca 100644
--- a/arch/sh/include/asm/irq.h
+++ b/arch/sh/include/asm/irq.h
@@ -22,7 +22,6 @@ extern unsigned short *irq_mask_register;
/*
* PINT IRQs
*/
-void init_IRQ_pint(void);
void make_imask_irq(unsigned int irq);
static inline int generic_irq_demux(int irq)
diff --git a/arch/sh/include/asm/page.h b/arch/sh/include/asm/page.h
index 09ac6c7faee0..62f4b9edcb98 100644
--- a/arch/sh/include/asm/page.h
+++ b/arch/sh/include/asm/page.h
@@ -174,10 +174,4 @@ typedef struct page *pgtable_t;
#include <asm-generic/memory_model.h>
#include <asm-generic/getorder.h>
-/*
- * Some drivers need to perform DMA into kmalloc'ed buffers
- * and so we have to increase the kmalloc minalign for this.
- */
-#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
-
#endif /* __ASM_SH_PAGE_H */
diff --git a/arch/sh/include/asm/processor.h b/arch/sh/include/asm/processor.h
index 85a6c1c3c16e..73fba7c922f9 100644
--- a/arch/sh/include/asm/processor.h
+++ b/arch/sh/include/asm/processor.h
@@ -166,6 +166,8 @@ extern unsigned int instruction_size(unsigned int insn);
#define instruction_size(insn) (2)
#endif
+void select_idle_routine(void);
+
#endif /* __ASSEMBLY__ */
#include <asm/processor_32.h>
diff --git a/arch/sh/include/asm/rtc.h b/arch/sh/include/asm/rtc.h
index 69dbae2949b0..7fe7002d1d50 100644
--- a/arch/sh/include/asm/rtc.h
+++ b/arch/sh/include/asm/rtc.h
@@ -2,8 +2,6 @@
#ifndef _ASM_RTC_H
#define _ASM_RTC_H
-void time_init(void);
-
#define RTC_CAP_4_DIGIT_YEAR (1 << 0)
struct sh_rtc_platform_info {
diff --git a/arch/sh/include/asm/thread_info.h b/arch/sh/include/asm/thread_info.h
index 1400fbb8b423..9f19a682d315 100644
--- a/arch/sh/include/asm/thread_info.h
+++ b/arch/sh/include/asm/thread_info.h
@@ -84,9 +84,6 @@ static inline struct thread_info *current_thread_info(void)
#define THREAD_SIZE_ORDER (THREAD_SHIFT - PAGE_SHIFT)
-extern void arch_task_cache_init(void);
-extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
-extern void arch_release_task_struct(struct task_struct *tsk);
extern void init_thread_xstate(void);
#endif /* __ASSEMBLY__ */
diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c
index d662503b0665..045d93f151fd 100644
--- a/arch/sh/kernel/idle.c
+++ b/arch/sh/kernel/idle.c
@@ -15,6 +15,7 @@
#include <linux/irqflags.h>
#include <linux/smp.h>
#include <linux/atomic.h>
+#include <asm/processor.h>
#include <asm/smp.h>
#include <asm/bl_bit.h>
diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index af977ec4ca5e..b3da2757faaf 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -43,6 +43,7 @@
#include <asm/smp.h>
#include <asm/mmu_context.h>
#include <asm/mmzone.h>
+#include <asm/processor.h>
#include <asm/sparsemem.h>
#include <asm/platform_early.h>
@@ -304,9 +305,9 @@ void __init setup_arch(char **cmdline_p)
bss_resource.end = virt_to_phys(__bss_stop)-1;
#ifdef CONFIG_CMDLINE_OVERWRITE
- strlcpy(command_line, CONFIG_CMDLINE, sizeof(command_line));
+ strscpy(command_line, CONFIG_CMDLINE, sizeof(command_line));
#else
- strlcpy(command_line, COMMAND_LINE, sizeof(command_line));
+ strscpy(command_line, COMMAND_LINE, sizeof(command_line));
#ifdef CONFIG_CMDLINE_EXTEND
strlcat(command_line, " ", sizeof(command_line));
strlcat(command_line, CONFIG_CMDLINE, sizeof(command_line));
@@ -354,3 +355,57 @@ int test_mode_pin(int pin)
{
return sh_mv.mv_mode_pins() & pin;
}
+
+void __init arch_cpu_finalize_init(void)
+{
+ char *p = &init_utsname()->machine[2]; /* "sh" */
+
+ select_idle_routine();
+
+ current_cpu_data.loops_per_jiffy = loops_per_jiffy;
+
+ switch (current_cpu_data.family) {
+ case CPU_FAMILY_SH2:
+ *p++ = '2';
+ break;
+ case CPU_FAMILY_SH2A:
+ *p++ = '2';
+ *p++ = 'a';
+ break;
+ case CPU_FAMILY_SH3:
+ *p++ = '3';
+ break;
+ case CPU_FAMILY_SH4:
+ *p++ = '4';
+ break;
+ case CPU_FAMILY_SH4A:
+ *p++ = '4';
+ *p++ = 'a';
+ break;
+ case CPU_FAMILY_SH4AL_DSP:
+ *p++ = '4';
+ *p++ = 'a';
+ *p++ = 'l';
+ *p++ = '-';
+ *p++ = 'd';
+ *p++ = 's';
+ *p++ = 'p';
+ break;
+ case CPU_FAMILY_UNKNOWN:
+ /*
+ * Specifically use CPU_FAMILY_UNKNOWN rather than
+ * default:, so we're able to have the compiler whine
+ * about unhandled enumerations.
+ */
+ break;
+ }
+
+ pr_info("CPU: %s\n", get_cpu_subtype(&current_cpu_data));
+
+#ifndef __LITTLE_ENDIAN__
+ /* 'eb' means 'Endian Big' */
+ *p++ = 'e';
+ *p++ = 'b';
+#endif
+ *p = '\0';
+}
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
index 2de85c977f54..97377e8c5025 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -453,3 +453,4 @@
448 common process_mrelease sys_process_mrelease
449 common futex_waitv sys_futex_waitv
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
+451 common cachestat sys_cachestat
diff --git a/arch/sh/mm/hugetlbpage.c b/arch/sh/mm/hugetlbpage.c
index 999ab5916e69..6cb0ad73dbb9 100644
--- a/arch/sh/mm/hugetlbpage.c
+++ b/arch/sh/mm/hugetlbpage.c
@@ -38,7 +38,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
if (pud) {
pmd = pmd_alloc(mm, pud, addr);
if (pmd)
- pte = pte_alloc_map(mm, pmd, addr);
+ pte = pte_alloc_huge(mm, pmd, addr);
}
}
}
@@ -63,7 +63,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
if (pud) {
pmd = pmd_offset(pud, addr);
if (pmd)
- pte = pte_offset_map(pmd, addr);
+ pte = pte_offset_huge(pmd, addr);
}
}
}
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 8c196990558b..49849790e66d 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -33,7 +33,7 @@ config SPARC
select ARCH_WANT_IPC_PARSE_VERSION
select GENERIC_PCI_IOMAP
select HAS_IOPORT
- select HAVE_NMI_WATCHDOG if SPARC64
+ select HAVE_HARDLOCKUP_DETECTOR_SPARC64 if SPARC64
select HAVE_CBPF_JIT if SPARC32
select HAVE_EBPF_JIT if SPARC64
select HAVE_DEBUG_BUGVERBOSE
@@ -52,6 +52,7 @@ config SPARC
config SPARC32
def_bool !64BIT
select ARCH_32BIT_OFF_T
+ select ARCH_HAS_CPU_FINALIZE_INIT if !SMP
select ARCH_HAS_SYNC_DMA_FOR_CPU
select CLZ_TAB
select DMA_DIRECT_REMAP
diff --git a/arch/sparc/Kconfig.debug b/arch/sparc/Kconfig.debug
index 6b2bec1888b3..37e003665de6 100644
--- a/arch/sparc/Kconfig.debug
+++ b/arch/sparc/Kconfig.debug
@@ -14,3 +14,17 @@ config FRAME_POINTER
bool
depends on MCOUNT
default y
+
+config HAVE_HARDLOCKUP_DETECTOR_SPARC64
+ bool
+ depends on HAVE_NMI
+ select HARDLOCKUP_DETECTOR_SPARC64
+ help
+ Sparc64 hardlockup detector is the last one developed before adding
+ the common infrastructure for handling hardlockup detectors. It is
+ always built. It does _not_ use the common command line parameters
+ and sysctl interface, except for /proc/sys/kernel/nmi_watchdog.
+
+config HARDLOCKUP_DETECTOR_SPARC64
+ bool
+ depends on HAVE_HARDLOCKUP_DETECTOR_SPARC64
diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h
index d775daa83d12..60ce2fe57fcd 100644
--- a/arch/sparc/include/asm/atomic_32.h
+++ b/arch/sparc/include/asm/atomic_32.h
@@ -19,17 +19,31 @@
#include <asm-generic/atomic64.h>
int arch_atomic_add_return(int, atomic_t *);
+#define arch_atomic_add_return arch_atomic_add_return
+
int arch_atomic_fetch_add(int, atomic_t *);
+#define arch_atomic_fetch_add arch_atomic_fetch_add
+
int arch_atomic_fetch_and(int, atomic_t *);
+#define arch_atomic_fetch_and arch_atomic_fetch_and
+
int arch_atomic_fetch_or(int, atomic_t *);
+#define arch_atomic_fetch_or arch_atomic_fetch_or
+
int arch_atomic_fetch_xor(int, atomic_t *);
+#define arch_atomic_fetch_xor arch_atomic_fetch_xor
+
int arch_atomic_cmpxchg(atomic_t *, int, int);
+#define arch_atomic_cmpxchg arch_atomic_cmpxchg
+
int arch_atomic_xchg(atomic_t *, int);
-int arch_atomic_fetch_add_unless(atomic_t *, int, int);
-void arch_atomic_set(atomic_t *, int);
+#define arch_atomic_xchg arch_atomic_xchg
+int arch_atomic_fetch_add_unless(atomic_t *, int, int);
#define arch_atomic_fetch_add_unless arch_atomic_fetch_add_unless
+void arch_atomic_set(atomic_t *, int);
+
#define arch_atomic_set_release(v, i) arch_atomic_set((v), (i))
#define arch_atomic_read(v) READ_ONCE((v)->counter)
diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h
index 077891686715..a5e9c37605a7 100644
--- a/arch/sparc/include/asm/atomic_64.h
+++ b/arch/sparc/include/asm/atomic_64.h
@@ -37,6 +37,16 @@ s64 arch_atomic64_fetch_##op(s64, atomic64_t *);
ATOMIC_OPS(add)
ATOMIC_OPS(sub)
+#define arch_atomic_add_return arch_atomic_add_return
+#define arch_atomic_sub_return arch_atomic_sub_return
+#define arch_atomic_fetch_add arch_atomic_fetch_add
+#define arch_atomic_fetch_sub arch_atomic_fetch_sub
+
+#define arch_atomic64_add_return arch_atomic64_add_return
+#define arch_atomic64_sub_return arch_atomic64_sub_return
+#define arch_atomic64_fetch_add arch_atomic64_fetch_add
+#define arch_atomic64_fetch_sub arch_atomic64_fetch_sub
+
#undef ATOMIC_OPS
#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
@@ -44,22 +54,19 @@ ATOMIC_OPS(and)
ATOMIC_OPS(or)
ATOMIC_OPS(xor)
+#define arch_atomic_fetch_and arch_atomic_fetch_and
+#define arch_atomic_fetch_or arch_atomic_fetch_or
+#define arch_atomic_fetch_xor arch_atomic_fetch_xor
+
+#define arch_atomic64_fetch_and arch_atomic64_fetch_and
+#define arch_atomic64_fetch_or arch_atomic64_fetch_or
+#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor
+
#undef ATOMIC_OPS
#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
-#define arch_atomic_cmpxchg(v, o, n) (arch_cmpxchg(&((v)->counter), (o), (n)))
-
-static inline int arch_atomic_xchg(atomic_t *v, int new)
-{
- return arch_xchg(&v->counter, new);
-}
-
-#define arch_atomic64_cmpxchg(v, o, n) \
- ((__typeof__((v)->counter))arch_cmpxchg(&((v)->counter), (o), (n)))
-#define arch_atomic64_xchg(v, new) (arch_xchg(&((v)->counter), new))
-
s64 arch_atomic64_dec_if_positive(atomic64_t *v);
#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
diff --git a/arch/sparc/include/asm/bugs.h b/arch/sparc/include/asm/bugs.h
deleted file mode 100644
index 02fa369b9c21..000000000000
--- a/arch/sparc/include/asm/bugs.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* include/asm/bugs.h: Sparc probes for various bugs.
- *
- * Copyright (C) 1996, 2007 David S. Miller (davem@davemloft.net)
- */
-
-#ifdef CONFIG_SPARC32
-#include <asm/cpudata.h>
-#endif
-
-extern unsigned long loops_per_jiffy;
-
-static void __init check_bugs(void)
-{
-#if defined(CONFIG_SPARC32) && !defined(CONFIG_SMP)
- cpu_data(0).udelay_val = loops_per_jiffy;
-#endif
-}
diff --git a/arch/sparc/include/asm/irq_32.h b/arch/sparc/include/asm/irq_32.h
index 43ec2609b811..6ee48321cbc2 100644
--- a/arch/sparc/include/asm/irq_32.h
+++ b/arch/sparc/include/asm/irq_32.h
@@ -17,7 +17,6 @@
#define irq_canonicalize(irq) (irq)
-void __init init_IRQ(void);
void __init sun4d_init_sbi_irq(void);
#define NO_IRQ 0xffffffff
diff --git a/arch/sparc/include/asm/irq_64.h b/arch/sparc/include/asm/irq_64.h
index 154df2cf19f4..b436029f1ced 100644
--- a/arch/sparc/include/asm/irq_64.h
+++ b/arch/sparc/include/asm/irq_64.h
@@ -61,7 +61,6 @@ void sun4u_destroy_msi(unsigned int irq);
unsigned int irq_alloc(unsigned int dev_handle, unsigned int dev_ino);
void irq_free(unsigned int irq);
-void __init init_IRQ(void);
void fixup_irqs(void);
static inline void set_softint(unsigned long bits)
diff --git a/arch/sparc/include/asm/nmi.h b/arch/sparc/include/asm/nmi.h
index 90ee7863d9fe..920dc23f443f 100644
--- a/arch/sparc/include/asm/nmi.h
+++ b/arch/sparc/include/asm/nmi.h
@@ -8,7 +8,6 @@ void nmi_adjust_hz(unsigned int new_hz);
extern atomic_t nmi_active;
-void arch_touch_nmi_watchdog(void);
void start_nmi_watchdog(void *unused);
void stop_nmi_watchdog(void *unused);
diff --git a/arch/sparc/include/asm/timer_64.h b/arch/sparc/include/asm/timer_64.h
index dcfad4613e18..ffff52c8b760 100644
--- a/arch/sparc/include/asm/timer_64.h
+++ b/arch/sparc/include/asm/timer_64.h
@@ -34,7 +34,6 @@ extern struct sparc64_tick_ops *tick_ops;
unsigned long sparc64_get_clock_tick(unsigned int cpu);
void setup_sparc64_timer(void);
-void __init time_init(void);
#define TICK_PRIV_BIT BIT(63)
#define TICKCMP_IRQ_BIT BIT(63)
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index 2fda57a3ea86..682da3714686 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -130,6 +130,9 @@
#define SO_RCVMARK 0x0054
+#define SO_PASSPIDFD 0x0055
+#define SO_PEERPIDFD 0x0056
+
#if !defined(__KERNEL__)
diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c
index 4e4f3d3263e4..a8cbe403301f 100644
--- a/arch/sparc/kernel/ioport.c
+++ b/arch/sparc/kernel/ioport.c
@@ -191,7 +191,7 @@ static void __iomem *_sparc_alloc_io(unsigned int busno, unsigned long phys,
tack += sizeof (struct resource);
}
- strlcpy(tack, name, XNMLN+1);
+ strscpy(tack, name, XNMLN+1);
res->name = tack;
va = _sparc_ioremap(res, busno, phys, size);
diff --git a/arch/sparc/kernel/kernel.h b/arch/sparc/kernel/kernel.h
index 9cd09a3ef35f..15da3c0597a5 100644
--- a/arch/sparc/kernel/kernel.h
+++ b/arch/sparc/kernel/kernel.h
@@ -91,7 +91,6 @@ extern int static_irq_count;
extern spinlock_t irq_action_lock;
void unexpected_irq(int irq, void *dev_id, struct pt_regs * regs);
-void init_IRQ(void);
/* sun4m_irq.c */
void sun4m_init_IRQ(void);
diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c
index 060fff95a305..17cdfdbf1f3b 100644
--- a/arch/sparc/kernel/nmi.c
+++ b/arch/sparc/kernel/nmi.c
@@ -65,6 +65,11 @@ void arch_touch_nmi_watchdog(void)
}
EXPORT_SYMBOL(arch_touch_nmi_watchdog);
+int __init watchdog_hardlockup_probe(void)
+{
+ return 0;
+}
+
static void die_nmi(const char *str, struct pt_regs *regs, int do_panic)
{
int this_cpu = smp_processor_id();
@@ -282,11 +287,11 @@ __setup("nmi_watchdog=", setup_nmi_watchdog);
* sparc specific NMI watchdog enable function.
* Enables watchdog if it is not enabled already.
*/
-int watchdog_nmi_enable(unsigned int cpu)
+void watchdog_hardlockup_enable(unsigned int cpu)
{
if (atomic_read(&nmi_active) == -1) {
pr_warn("NMI watchdog cannot be enabled or disabled\n");
- return -1;
+ return;
}
/*
@@ -295,17 +300,15 @@ int watchdog_nmi_enable(unsigned int cpu)
* process first.
*/
if (!nmi_init_done)
- return 0;
+ return;
smp_call_function_single(cpu, start_nmi_watchdog, NULL, 1);
-
- return 0;
}
/*
* sparc specific NMI watchdog disable function.
* Disables watchdog if it is not disabled already.
*/
-void watchdog_nmi_disable(unsigned int cpu)
+void watchdog_hardlockup_disable(unsigned int cpu)
{
if (atomic_read(&nmi_active) == -1)
pr_warn_once("NMI watchdog cannot be enabled or disabled\n");
diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c
index c8e0dd99f370..1adf5c1c16b8 100644
--- a/arch/sparc/kernel/setup_32.c
+++ b/arch/sparc/kernel/setup_32.c
@@ -302,7 +302,7 @@ void __init setup_arch(char **cmdline_p)
/* Initialize PROM console and command line. */
*cmdline_p = prom_getbootargs();
- strlcpy(boot_command_line, *cmdline_p, COMMAND_LINE_SIZE);
+ strscpy(boot_command_line, *cmdline_p, COMMAND_LINE_SIZE);
parse_early_param();
boot_flags_init(*cmdline_p);
@@ -412,3 +412,10 @@ static int __init topology_init(void)
}
subsys_initcall(topology_init);
+
+#if defined(CONFIG_SPARC32) && !defined(CONFIG_SMP)
+void __init arch_cpu_finalize_init(void)
+{
+ cpu_data(0).udelay_val = loops_per_jiffy;
+}
+#endif
diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c
index 48abee4eee29..6546ca9d4d3f 100644
--- a/arch/sparc/kernel/setup_64.c
+++ b/arch/sparc/kernel/setup_64.c
@@ -636,7 +636,7 @@ void __init setup_arch(char **cmdline_p)
{
/* Initialize PROM console and command line. */
*cmdline_p = prom_getbootargs();
- strlcpy(boot_command_line, *cmdline_p, COMMAND_LINE_SIZE);
+ strscpy(boot_command_line, *cmdline_p, COMMAND_LINE_SIZE);
parse_early_param();
boot_flags_init(*cmdline_p);
diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c
index dad38960d1a8..ca450c7bc53f 100644
--- a/arch/sparc/kernel/signal32.c
+++ b/arch/sparc/kernel/signal32.c
@@ -328,6 +328,8 @@ static void flush_signal_insns(unsigned long address)
goto out_irqs_on;
ptep = pte_offset_map(pmdp, address);
+ if (!ptep)
+ goto out_irqs_on;
pte = *ptep;
if (!pte_present(pte))
goto out_unmap;
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index 4398cc6fb68d..faa835f3c54a 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -496,3 +496,4 @@
448 common process_mrelease sys_process_mrelease
449 common futex_waitv sys_futex_waitv
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
+451 common cachestat sys_cachestat
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index 69ff07bc6c07..e326caf708c6 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -99,6 +99,7 @@ static unsigned int get_user_insn(unsigned long tpc)
local_irq_disable();
pmdp = pmd_offset(pudp, tpc);
+again:
if (pmd_none(*pmdp) || unlikely(pmd_bad(*pmdp)))
goto out_irq_enable;
@@ -115,6 +116,8 @@ static unsigned int get_user_insn(unsigned long tpc)
#endif
{
ptep = pte_offset_map(pmdp, tpc);
+ if (!ptep)
+ goto again;
pte = *ptep;
if (pte_present(pte)) {
pa = (pte_pfn(pte) << PAGE_SHIFT);
diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
index d8e0e3c7038d..d7018823206c 100644
--- a/arch/sparc/mm/hugetlbpage.c
+++ b/arch/sparc/mm/hugetlbpage.c
@@ -298,7 +298,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
return NULL;
if (sz >= PMD_SIZE)
return (pte_t *)pmd;
- return pte_alloc_map(mm, pmd, addr);
+ return pte_alloc_huge(mm, pmd, addr);
}
pte_t *huge_pte_offset(struct mm_struct *mm,
@@ -325,7 +325,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
return NULL;
if (is_hugetlb_pmd(*pmd))
return (pte_t *)pmd;
- return pte_offset_map(pmd, addr);
+ return pte_offset_huge(pmd, addr);
}
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
diff --git a/arch/sparc/mm/io-unit.c b/arch/sparc/mm/io-unit.c
index bf3e6d2fe5d9..133dd42570d6 100644
--- a/arch/sparc/mm/io-unit.c
+++ b/arch/sparc/mm/io-unit.c
@@ -244,7 +244,7 @@ static void *iounit_alloc(struct device *dev, size_t len,
long i;
pmdp = pmd_off_k(addr);
- ptep = pte_offset_map(pmdp, addr);
+ ptep = pte_offset_kernel(pmdp, addr);
set_pte(ptep, mk_pte(virt_to_page(page), dvma_prot));
diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c
index 9e3f6933ca13..3a6caef68348 100644
--- a/arch/sparc/mm/iommu.c
+++ b/arch/sparc/mm/iommu.c
@@ -358,7 +358,7 @@ static void *sbus_iommu_alloc(struct device *dev, size_t len,
__flush_page_to_ram(page);
pmdp = pmd_off_k(addr);
- ptep = pte_offset_map(pmdp, addr);
+ ptep = pte_offset_kernel(pmdp, addr);
set_pte(ptep, mk_pte(virt_to_page(page), dvma_prot));
}
diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c
index 9a725547578e..7ecf8556947a 100644
--- a/arch/sparc/mm/tlb.c
+++ b/arch/sparc/mm/tlb.c
@@ -149,6 +149,8 @@ static void tlb_batch_pmd_scan(struct mm_struct *mm, unsigned long vaddr,
pte_t *pte;
pte = pte_offset_map(&pmd, vaddr);
+ if (!pte)
+ return;
end = vaddr + HPAGE_SIZE;
while (vaddr < end) {
if (pte_val(*pte) & _PAGE_VALID) {
diff --git a/arch/sparc/prom/bootstr_32.c b/arch/sparc/prom/bootstr_32.c
index e3b731ff00f0..1c7cd258b0dc 100644
--- a/arch/sparc/prom/bootstr_32.c
+++ b/arch/sparc/prom/bootstr_32.c
@@ -52,7 +52,7 @@ prom_getbootargs(void)
* V3 PROM cannot supply as with more than 128 bytes
* of an argument. But a smart bootstrap loader can.
*/
- strlcpy(barg_buf, *romvec->pv_v2bootargs.bootargs, sizeof(barg_buf));
+ strscpy(barg_buf, *romvec->pv_v2bootargs.bootargs, sizeof(barg_buf));
break;
default:
break;
diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index 541a9b18e343..b5e179360534 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -5,7 +5,7 @@ menu "UML-specific options"
config UML
bool
default y
- select ARCH_EPHEMERAL_INODES
+ select ARCH_HAS_CPU_FINALIZE_INIT
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_KCOV
diff --git a/arch/um/Makefile b/arch/um/Makefile
index 8186d4761bda..da4d5256af2f 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -149,7 +149,7 @@ export CFLAGS_vmlinux := $(LINK-y) $(LINK_WRAPS) $(LD_FLAGS_CMDLINE) $(CC_FLAGS_
# When cleaning we don't include .config, so we don't include
# TT or skas makefiles and don't clean skas_ptregs.h.
CLEAN_FILES += linux x.i gmon.out
-MRPROPER_FILES += arch/$(SUBARCH)/include/generated
+MRPROPER_FILES += $(HOST_DIR)/include/generated
archclean:
@find . \( -name '*.bb' -o -name '*.bbg' -o -name '*.da' \
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index f4c1e6e97ad5..50206feac577 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -108,9 +108,9 @@ static inline void ubd_set_bit(__u64 bit, unsigned char *data)
static DEFINE_MUTEX(ubd_lock);
static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */
-static int ubd_open(struct block_device *bdev, fmode_t mode);
-static void ubd_release(struct gendisk *disk, fmode_t mode);
-static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
+static int ubd_open(struct gendisk *disk, blk_mode_t mode);
+static void ubd_release(struct gendisk *disk);
+static int ubd_ioctl(struct block_device *bdev, blk_mode_t mode,
unsigned int cmd, unsigned long arg);
static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
@@ -1154,9 +1154,8 @@ static int __init ubd_driver_init(void){
device_initcall(ubd_driver_init);
-static int ubd_open(struct block_device *bdev, fmode_t mode)
+static int ubd_open(struct gendisk *disk, blk_mode_t mode)
{
- struct gendisk *disk = bdev->bd_disk;
struct ubd *ubd_dev = disk->private_data;
int err = 0;
@@ -1171,19 +1170,12 @@ static int ubd_open(struct block_device *bdev, fmode_t mode)
}
ubd_dev->count++;
set_disk_ro(disk, !ubd_dev->openflags.w);
-
- /* This should no more be needed. And it didn't work anyway to exclude
- * read-write remounting of filesystems.*/
- /*if((mode & FMODE_WRITE) && !ubd_dev->openflags.w){
- if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev);
- err = -EROFS;
- }*/
out:
mutex_unlock(&ubd_mutex);
return err;
}
-static void ubd_release(struct gendisk *disk, fmode_t mode)
+static void ubd_release(struct gendisk *disk)
{
struct ubd *ubd_dev = disk->private_data;
@@ -1397,7 +1389,7 @@ static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
return 0;
}
-static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
+static int ubd_ioctl(struct block_device *bdev, blk_mode_t mode,
unsigned int cmd, unsigned long arg)
{
struct ubd *ubd_dev = bdev->bd_disk->private_data;
diff --git a/arch/um/include/asm/bugs.h b/arch/um/include/asm/bugs.h
deleted file mode 100644
index 4473942a0839..000000000000
--- a/arch/um/include/asm/bugs.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __UM_BUGS_H
-#define __UM_BUGS_H
-
-void check_bugs(void);
-
-#endif
diff --git a/arch/um/include/shared/user.h b/arch/um/include/shared/user.h
index bda66e5a9d4e..0347a190429c 100644
--- a/arch/um/include/shared/user.h
+++ b/arch/um/include/shared/user.h
@@ -52,6 +52,7 @@ static inline int printk(const char *fmt, ...)
extern int in_aton(char *str);
extern size_t strlcpy(char *, const char *, size_t);
extern size_t strlcat(char *, const char *, size_t);
+extern size_t strscpy(char *, const char *, size_t);
/* Copied from linux/compiler-gcc.h since we can't include it directly */
#define barrier() __asm__ __volatile__("": : :"memory")
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index 0a23a98d4ca0..918fed7ad4d8 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -3,6 +3,7 @@
* Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
*/
+#include <linux/cpu.h>
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/mm.h>
@@ -430,7 +431,7 @@ void __init setup_arch(char **cmdline_p)
}
}
-void __init check_bugs(void)
+void __init arch_cpu_finalize_init(void)
{
arch_check_bugs();
os_check_bugs();
diff --git a/arch/um/os-Linux/drivers/tuntap_user.c b/arch/um/os-Linux/drivers/tuntap_user.c
index 53eb3d508645..2284e9c1cbbb 100644
--- a/arch/um/os-Linux/drivers/tuntap_user.c
+++ b/arch/um/os-Linux/drivers/tuntap_user.c
@@ -146,7 +146,7 @@ static int tuntap_open(void *data)
}
memset(&ifr, 0, sizeof(ifr));
ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
- strlcpy(ifr.ifr_name, pri->dev_name, sizeof(ifr.ifr_name));
+ strscpy(ifr.ifr_name, pri->dev_name, sizeof(ifr.ifr_name));
if (ioctl(pri->fd, TUNSETIFF, &ifr) < 0) {
err = -errno;
printk(UM_KERN_ERR "TUNSETIFF failed, errno = %d\n",
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cb1031018afa..b3ebf58cf77e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -71,6 +71,7 @@ config X86
select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
select ARCH_HAS_CACHE_LINE_SIZE
select ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION
+ select ARCH_HAS_CPU_FINALIZE_INIT
select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_DEBUG_VIRTUAL
select ARCH_HAS_DEBUG_VM_PGTABLE if !X86_PAE
@@ -274,7 +275,9 @@ config X86
select HAVE_UNSTABLE_SCHED_CLOCK
select HAVE_USER_RETURN_NOTIFIER
select HAVE_GENERIC_VDSO
+ select HOTPLUG_PARALLEL if SMP && X86_64
select HOTPLUG_SMT if SMP
+ select HOTPLUG_SPLIT_STARTUP if SMP && X86_32
select IRQ_FORCED_THREADING
select LOCK_MM_AND_FIND_VMA
select NEED_PER_CPU_EMBED_FIRST_CHUNK
@@ -292,7 +295,6 @@ config X86
select TRACE_IRQFLAGS_NMI_SUPPORT
select USER_STACKTRACE_SUPPORT
select HAVE_ARCH_KCSAN if X86_64
- select X86_FEATURE_NAMES if PROC_FS
select PROC_PID_ARCH_STATUS if PROC_FS
select HAVE_ARCH_NODE_DEV_GROUP if X86_SGX
select FUNCTION_ALIGNMENT_16B if X86_64 || X86_ALIGNMENT_16
@@ -442,17 +444,6 @@ config SMP
If you don't know what to do here, say N.
-config X86_FEATURE_NAMES
- bool "Processor feature human-readable names" if EMBEDDED
- default y
- help
- This option compiles in a table of x86 feature bits and corresponding
- names. This is required to support /proc/cpuinfo and a few kernel
- messages. You can disable this to save space, at the expense of
- making those few kernel messages show numeric feature bits instead.
-
- If in doubt, say Y.
-
config X86_X2APIC
bool "Support x2apic"
depends on X86_LOCAL_APIC && X86_64 && (IRQ_REMAP || HYPERVISOR_GUEST)
@@ -885,9 +876,11 @@ config INTEL_TDX_GUEST
bool "Intel TDX (Trust Domain Extensions) - Guest Support"
depends on X86_64 && CPU_SUP_INTEL
depends on X86_X2APIC
+ depends on EFI_STUB
select ARCH_HAS_CC_PLATFORM
select X86_MEM_ENCRYPT
select X86_MCE
+ select UNACCEPTED_MEMORY
help
Support running as a guest under Intel TDX. Without this support,
the guest kernel can not boot or run under TDX.
@@ -1542,11 +1535,13 @@ config X86_MEM_ENCRYPT
config AMD_MEM_ENCRYPT
bool "AMD Secure Memory Encryption (SME) support"
depends on X86_64 && CPU_SUP_AMD
+ depends on EFI_STUB
select DMA_COHERENT_POOL
select ARCH_USE_MEMREMAP_PROT
select INSTRUCTION_DECODER
select ARCH_HAS_CC_PLATFORM
select X86_MEM_ENCRYPT
+ select UNACCEPTED_MEMORY
help
Say yes to enable support for the encryption of system memory.
This requires an AMD processor that supports Secure Memory
@@ -2306,49 +2301,6 @@ config HOTPLUG_CPU
def_bool y
depends on SMP
-config BOOTPARAM_HOTPLUG_CPU0
- bool "Set default setting of cpu0_hotpluggable"
- depends on HOTPLUG_CPU
- help
- Set whether default state of cpu0_hotpluggable is on or off.
-
- Say Y here to enable CPU0 hotplug by default. If this switch
- is turned on, there is no need to give cpu0_hotplug kernel
- parameter and the CPU0 hotplug feature is enabled by default.
-
- Please note: there are two known CPU0 dependencies if you want
- to enable the CPU0 hotplug feature either by this switch or by
- cpu0_hotplug kernel parameter.
-
- First, resume from hibernate or suspend always starts from CPU0.
- So hibernate and suspend are prevented if CPU0 is offline.
-
- Second dependency is PIC interrupts always go to CPU0. CPU0 can not
- offline if any interrupt can not migrate out of CPU0. There may
- be other CPU0 dependencies.
-
- Please make sure the dependencies are under your control before
- you enable this feature.
-
- Say N if you don't want to enable CPU0 hotplug feature by default.
- You still can enable the CPU0 hotplug feature at boot by kernel
- parameter cpu0_hotplug.
-
-config DEBUG_HOTPLUG_CPU0
- def_bool n
- prompt "Debug CPU0 hotplug"
- depends on HOTPLUG_CPU
- help
- Enabling this option offlines CPU0 (if CPU0 can be offlined) as
- soon as possible and boots up userspace with CPU0 offlined. User
- can online CPU0 back after boot time.
-
- To debug CPU0 hotplug, you need to enable CPU0 offline/online
- feature by either turning on CONFIG_BOOTPARAM_HOTPLUG_CPU0 during
- compilation or giving cpu0_hotplug kernel parameter at boot.
-
- If unsure, say N.
-
config COMPAT_VDSO
def_bool n
prompt "Disable the 32-bit vDSO (needed for glibc 2.3.3)"
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 542377cd419d..00468adf180f 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -389,7 +389,7 @@ config IA32_FEAT_CTL
config X86_VMX_FEATURE_NAMES
def_bool y
- depends on IA32_FEAT_CTL && X86_FEATURE_NAMES
+ depends on IA32_FEAT_CTL
menuconfig PROCESSOR_SELECT
bool "Supported processor vendors" if EXPERT
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index b39975977c03..fdc2e3abd615 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -305,6 +305,18 @@ ifeq ($(RETPOLINE_CFLAGS),)
endif
endif
+ifdef CONFIG_UNWINDER_ORC
+orc_hash_h := arch/$(SRCARCH)/include/generated/asm/orc_hash.h
+orc_hash_sh := $(srctree)/scripts/orc_hash.sh
+targets += $(orc_hash_h)
+quiet_cmd_orc_hash = GEN $@
+ cmd_orc_hash = mkdir -p $(dir $@); \
+ $(CONFIG_SHELL) $(orc_hash_sh) < $< > $@
+$(orc_hash_h): $(srctree)/arch/x86/include/asm/orc_types.h $(orc_hash_sh) FORCE
+ $(call if_changed,orc_hash)
+archprepare: $(orc_hash_h)
+endif
+
archclean:
$(Q)rm -rf $(objtree)/arch/i386
$(Q)rm -rf $(objtree)/arch/x86_64
diff --git a/arch/x86/Makefile.postlink b/arch/x86/Makefile.postlink
new file mode 100644
index 000000000000..936093d29160
--- /dev/null
+++ b/arch/x86/Makefile.postlink
@@ -0,0 +1,47 @@
+# SPDX-License-Identifier: GPL-2.0
+# ===========================================================================
+# Post-link x86 pass
+# ===========================================================================
+#
+# 1. Separate relocations from vmlinux into vmlinux.relocs.
+# 2. Strip relocations from vmlinux.
+
+PHONY := __archpost
+__archpost:
+
+-include include/config/auto.conf
+include $(srctree)/scripts/Kbuild.include
+
+CMD_RELOCS = arch/x86/tools/relocs
+OUT_RELOCS = arch/x86/boot/compressed
+quiet_cmd_relocs = RELOCS $(OUT_RELOCS)/$@.relocs
+ cmd_relocs = \
+ mkdir -p $(OUT_RELOCS); \
+ $(CMD_RELOCS) $@ > $(OUT_RELOCS)/$@.relocs; \
+ $(CMD_RELOCS) --abs-relocs $@
+
+quiet_cmd_strip_relocs = RSTRIP $@
+ cmd_strip_relocs = \
+ $(OBJCOPY) --remove-section='.rel.*' --remove-section='.rel__*' \
+ --remove-section='.rela.*' --remove-section='.rela__*' $@
+
+# `@true` prevents complaint when there is nothing to be done
+
+vmlinux: FORCE
+ @true
+ifeq ($(CONFIG_X86_NEED_RELOCS),y)
+ $(call cmd,relocs)
+ $(call cmd,strip_relocs)
+endif
+
+%.ko: FORCE
+ @true
+
+clean:
+ @rm -f $(OUT_RELOCS)/vmlinux.relocs
+
+PHONY += FORCE clean
+
+FORCE:
+
+.PHONY: $(PHONY)
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 9e38ffaadb5d..f33e45ed1437 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -55,14 +55,12 @@ HOST_EXTRACFLAGS += -I$(srctree)/tools/include \
-include include/generated/autoconf.h \
-D__EXPORTED_HEADERS__
-ifdef CONFIG_X86_FEATURE_NAMES
$(obj)/cpu.o: $(obj)/cpustr.h
quiet_cmd_cpustr = CPUSTR $@
cmd_cpustr = $(obj)/mkcpustr > $@
$(obj)/cpustr.h: $(obj)/mkcpustr FORCE
$(call if_changed,cpustr)
-endif
targets += cpustr.h
# ---------------------------------------------------------------------------
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 6b6cfe607bdb..40d2ff503079 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -106,7 +106,8 @@ ifdef CONFIG_X86_64
endif
vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o
-vmlinux-objs-$(CONFIG_INTEL_TDX_GUEST) += $(obj)/tdx.o $(obj)/tdcall.o
+vmlinux-objs-$(CONFIG_INTEL_TDX_GUEST) += $(obj)/tdx.o $(obj)/tdcall.o $(obj)/tdx-shared.o
+vmlinux-objs-$(CONFIG_UNACCEPTED_MEMORY) += $(obj)/mem.o
vmlinux-objs-$(CONFIG_EFI) += $(obj)/efi.o
vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_mixed.o
@@ -121,11 +122,9 @@ $(obj)/vmlinux.bin: vmlinux FORCE
targets += $(patsubst $(obj)/%,%,$(vmlinux-objs-y)) vmlinux.bin.all vmlinux.relocs
-CMD_RELOCS = arch/x86/tools/relocs
-quiet_cmd_relocs = RELOCS $@
- cmd_relocs = $(CMD_RELOCS) $< > $@;$(CMD_RELOCS) --abs-relocs $<
-$(obj)/vmlinux.relocs: vmlinux FORCE
- $(call if_changed,relocs)
+# vmlinux.relocs is created by the vmlinux postlink step.
+$(obj)/vmlinux.relocs: vmlinux
+ @true
vmlinux.bin.all-y := $(obj)/vmlinux.bin
vmlinux.bin.all-$(CONFIG_X86_NEED_RELOCS) += $(obj)/vmlinux.relocs
diff --git a/arch/x86/boot/compressed/efi.h b/arch/x86/boot/compressed/efi.h
index 7db2f41b54cd..866c0af8b5b9 100644
--- a/arch/x86/boot/compressed/efi.h
+++ b/arch/x86/boot/compressed/efi.h
@@ -16,6 +16,7 @@ typedef guid_t efi_guid_t __aligned(__alignof__(u32));
#define ACPI_TABLE_GUID EFI_GUID(0xeb9d2d30, 0x2d88, 0x11d3, 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d)
#define ACPI_20_TABLE_GUID EFI_GUID(0x8868e871, 0xe4f1, 0x11d3, 0xbc, 0x22, 0x00, 0x80, 0xc7, 0x3c, 0x88, 0x81)
#define EFI_CC_BLOB_GUID EFI_GUID(0x067b1f5f, 0xcf26, 0x44c5, 0x85, 0x54, 0x93, 0xd7, 0x77, 0x91, 0x2d, 0x42)
+#define LINUX_EFI_UNACCEPTED_MEM_TABLE_GUID EFI_GUID(0xd5d1de3c, 0x105c, 0x44f9, 0x9e, 0xa9, 0xbc, 0xef, 0x98, 0x12, 0x00, 0x31)
#define EFI32_LOADER_SIGNATURE "EL32"
#define EFI64_LOADER_SIGNATURE "EL64"
@@ -32,6 +33,7 @@ typedef struct {
} efi_table_hdr_t;
#define EFI_CONVENTIONAL_MEMORY 7
+#define EFI_UNACCEPTED_MEMORY 15
#define EFI_MEMORY_MORE_RELIABLE \
((u64)0x0000000000010000ULL) /* higher reliability */
@@ -104,6 +106,14 @@ struct efi_setup_data {
u64 reserved[8];
};
+struct efi_unaccepted_memory {
+ u32 version;
+ u32 unit_size;
+ u64 phys_base;
+ u64 size;
+ unsigned long bitmap[];
+};
+
static inline int efi_guidcmp (efi_guid_t left, efi_guid_t right)
{
return memcmp(&left, &right, sizeof (efi_guid_t));
diff --git a/arch/x86/boot/compressed/error.c b/arch/x86/boot/compressed/error.c
index c881878e56d3..5313c5cb2b80 100644
--- a/arch/x86/boot/compressed/error.c
+++ b/arch/x86/boot/compressed/error.c
@@ -22,3 +22,22 @@ void error(char *m)
while (1)
asm("hlt");
}
+
+/* EFI libstub provides vsnprintf() */
+#ifdef CONFIG_EFI_STUB
+void panic(const char *fmt, ...)
+{
+ static char buf[1024];
+ va_list args;
+ int len;
+
+ va_start(args, fmt);
+ len = vsnprintf(buf, sizeof(buf), fmt, args);
+ va_end(args);
+
+ if (len && buf[len - 1] == '\n')
+ buf[len - 1] = '\0';
+
+ error(buf);
+}
+#endif
diff --git a/arch/x86/boot/compressed/error.h b/arch/x86/boot/compressed/error.h
index 1de5821184f1..86fe33b93715 100644
--- a/arch/x86/boot/compressed/error.h
+++ b/arch/x86/boot/compressed/error.h
@@ -6,5 +6,6 @@
void warn(char *m);
void error(char *m) __noreturn;
+void panic(const char *fmt, ...) __noreturn __cold;
#endif /* BOOT_COMPRESSED_ERROR_H */
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index 454757fbdfe5..9193acf0e9cd 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -672,6 +672,33 @@ static bool process_mem_region(struct mem_vector *region,
}
#ifdef CONFIG_EFI
+
+/*
+ * Only EFI_CONVENTIONAL_MEMORY and EFI_UNACCEPTED_MEMORY (if supported) are
+ * guaranteed to be free.
+ *
+ * Pick free memory more conservatively than the EFI spec allows: according to
+ * the spec, EFI_BOOT_SERVICES_{CODE|DATA} are also free memory and thus
+ * available to place the kernel image into, but in practice there's firmware
+ * where using that memory leads to crashes. Buggy vendor EFI code registers
+ * for an event that triggers on SetVirtualAddressMap(). The handler assumes
+ * that EFI_BOOT_SERVICES_DATA memory has not been touched by loader yet, which
+ * is probably true for Windows.
+ *
+ * Preserve EFI_BOOT_SERVICES_* regions until after SetVirtualAddressMap().
+ */
+static inline bool memory_type_is_free(efi_memory_desc_t *md)
+{
+ if (md->type == EFI_CONVENTIONAL_MEMORY)
+ return true;
+
+ if (IS_ENABLED(CONFIG_UNACCEPTED_MEMORY) &&
+ md->type == EFI_UNACCEPTED_MEMORY)
+ return true;
+
+ return false;
+}
+
/*
* Returns true if we processed the EFI memmap, which we prefer over the E820
* table if it is available.
@@ -716,18 +743,7 @@ process_efi_entries(unsigned long minimum, unsigned long image_size)
for (i = 0; i < nr_desc; i++) {
md = efi_early_memdesc_ptr(pmap, e->efi_memdesc_size, i);
- /*
- * Here we are more conservative in picking free memory than
- * the EFI spec allows:
- *
- * According to the spec, EFI_BOOT_SERVICES_{CODE|DATA} are also
- * free memory and thus available to place the kernel image into,
- * but in practice there's firmware where using that memory leads
- * to crashes.
- *
- * Only EFI_CONVENTIONAL_MEMORY is guaranteed to be free.
- */
- if (md->type != EFI_CONVENTIONAL_MEMORY)
+ if (!memory_type_is_free(md))
continue;
if (efi_soft_reserve_enabled() &&
diff --git a/arch/x86/boot/compressed/mem.c b/arch/x86/boot/compressed/mem.c
new file mode 100644
index 000000000000..3c1609245f2a
--- /dev/null
+++ b/arch/x86/boot/compressed/mem.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "error.h"
+#include "misc.h"
+#include "tdx.h"
+#include "sev.h"
+#include <asm/shared/tdx.h>
+
+/*
+ * accept_memory() and process_unaccepted_memory() called from EFI stub which
+ * runs before decompresser and its early_tdx_detect().
+ *
+ * Enumerate TDX directly from the early users.
+ */
+static bool early_is_tdx_guest(void)
+{
+ static bool once;
+ static bool is_tdx;
+
+ if (!IS_ENABLED(CONFIG_INTEL_TDX_GUEST))
+ return false;
+
+ if (!once) {
+ u32 eax, sig[3];
+
+ cpuid_count(TDX_CPUID_LEAF_ID, 0, &eax,
+ &sig[0], &sig[2], &sig[1]);
+ is_tdx = !memcmp(TDX_IDENT, sig, sizeof(sig));
+ once = true;
+ }
+
+ return is_tdx;
+}
+
+void arch_accept_memory(phys_addr_t start, phys_addr_t end)
+{
+ /* Platform-specific memory-acceptance call goes here */
+ if (early_is_tdx_guest()) {
+ if (!tdx_accept_memory(start, end))
+ panic("TDX: Failed to accept memory\n");
+ } else if (sev_snp_enabled()) {
+ snp_accept_memory(start, end);
+ } else {
+ error("Cannot accept memory: unknown platform\n");
+ }
+}
+
+bool init_unaccepted_memory(void)
+{
+ guid_t guid = LINUX_EFI_UNACCEPTED_MEM_TABLE_GUID;
+ struct efi_unaccepted_memory *table;
+ unsigned long cfg_table_pa;
+ unsigned int cfg_table_len;
+ enum efi_type et;
+ int ret;
+
+ et = efi_get_type(boot_params);
+ if (et == EFI_TYPE_NONE)
+ return false;
+
+ ret = efi_get_conf_table(boot_params, &cfg_table_pa, &cfg_table_len);
+ if (ret) {
+ warn("EFI config table not found.");
+ return false;
+ }
+
+ table = (void *)efi_find_vendor_table(boot_params, cfg_table_pa,
+ cfg_table_len, guid);
+ if (!table)
+ return false;
+
+ if (table->version != 1)
+ error("Unknown version of unaccepted memory table\n");
+
+ /*
+ * In many cases unaccepted_table is already set by EFI stub, but it
+ * has to be initialized again to cover cases when the table is not
+ * allocated by EFI stub or EFI stub copied the kernel image with
+ * efi_relocate_kernel() before the variable is set.
+ *
+ * It must be initialized before the first usage of accept_memory().
+ */
+ unaccepted_table = table;
+
+ return true;
+}
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 014ff222bf4b..94b7abcf624b 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -455,6 +455,12 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
#endif
debug_putstr("\nDecompressing Linux... ");
+
+ if (init_unaccepted_memory()) {
+ debug_putstr("Accepting memory... ");
+ accept_memory(__pa(output), __pa(output) + needed_size);
+ }
+
__decompress(input_data, input_len, NULL, NULL, output, output_len,
NULL, error);
entry_offset = parse_elf(output);
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 2f155a0e3041..964fe903a1cd 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -247,4 +247,14 @@ static inline unsigned long efi_find_vendor_table(struct boot_params *bp,
}
#endif /* CONFIG_EFI */
+#ifdef CONFIG_UNACCEPTED_MEMORY
+bool init_unaccepted_memory(void);
+#else
+static inline bool init_unaccepted_memory(void) { return false; }
+#endif
+
+/* Defined in EFI stub */
+extern struct efi_unaccepted_memory *unaccepted_table;
+void accept_memory(phys_addr_t start, phys_addr_t end);
+
#endif /* BOOT_COMPRESSED_MISC_H */
diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c
index 014b89c89088..09dc8c187b3c 100644
--- a/arch/x86/boot/compressed/sev.c
+++ b/arch/x86/boot/compressed/sev.c
@@ -115,7 +115,7 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
/* Include code for early handlers */
#include "../../kernel/sev-shared.c"
-static inline bool sev_snp_enabled(void)
+bool sev_snp_enabled(void)
{
return sev_status & MSR_AMD64_SEV_SNP_ENABLED;
}
@@ -181,6 +181,58 @@ static bool early_setup_ghcb(void)
return true;
}
+static phys_addr_t __snp_accept_memory(struct snp_psc_desc *desc,
+ phys_addr_t pa, phys_addr_t pa_end)
+{
+ struct psc_hdr *hdr;
+ struct psc_entry *e;
+ unsigned int i;
+
+ hdr = &desc->hdr;
+ memset(hdr, 0, sizeof(*hdr));
+
+ e = desc->entries;
+
+ i = 0;
+ while (pa < pa_end && i < VMGEXIT_PSC_MAX_ENTRY) {
+ hdr->end_entry = i;
+
+ e->gfn = pa >> PAGE_SHIFT;
+ e->operation = SNP_PAGE_STATE_PRIVATE;
+ if (IS_ALIGNED(pa, PMD_SIZE) && (pa_end - pa) >= PMD_SIZE) {
+ e->pagesize = RMP_PG_SIZE_2M;
+ pa += PMD_SIZE;
+ } else {
+ e->pagesize = RMP_PG_SIZE_4K;
+ pa += PAGE_SIZE;
+ }
+
+ e++;
+ i++;
+ }
+
+ if (vmgexit_psc(boot_ghcb, desc))
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC);
+
+ pvalidate_pages(desc);
+
+ return pa;
+}
+
+void snp_accept_memory(phys_addr_t start, phys_addr_t end)
+{
+ struct snp_psc_desc desc = {};
+ unsigned int i;
+ phys_addr_t pa;
+
+ if (!boot_ghcb && !early_setup_ghcb())
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC);
+
+ pa = start;
+ while (pa < end)
+ pa = __snp_accept_memory(&desc, pa, end);
+}
+
void sev_es_shutdown_ghcb(void)
{
if (!boot_ghcb)
diff --git a/arch/x86/boot/compressed/sev.h b/arch/x86/boot/compressed/sev.h
new file mode 100644
index 000000000000..fc725a981b09
--- /dev/null
+++ b/arch/x86/boot/compressed/sev.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * AMD SEV header for early boot related functions.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ */
+
+#ifndef BOOT_COMPRESSED_SEV_H
+#define BOOT_COMPRESSED_SEV_H
+
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+
+bool sev_snp_enabled(void);
+void snp_accept_memory(phys_addr_t start, phys_addr_t end);
+
+#else
+
+static inline bool sev_snp_enabled(void) { return false; }
+static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { }
+
+#endif
+
+#endif
diff --git a/arch/x86/boot/compressed/tdx-shared.c b/arch/x86/boot/compressed/tdx-shared.c
new file mode 100644
index 000000000000..5ac43762fe13
--- /dev/null
+++ b/arch/x86/boot/compressed/tdx-shared.c
@@ -0,0 +1,2 @@
+#include "error.h"
+#include "../../coco/tdx/tdx-shared.c"
diff --git a/arch/x86/boot/compressed/tdx.c b/arch/x86/boot/compressed/tdx.c
index 2d81d3cc72a1..8841b945a1e2 100644
--- a/arch/x86/boot/compressed/tdx.c
+++ b/arch/x86/boot/compressed/tdx.c
@@ -20,7 +20,7 @@ static inline unsigned int tdx_io_in(int size, u16 port)
{
struct tdx_hypercall_args args = {
.r10 = TDX_HYPERCALL_STANDARD,
- .r11 = EXIT_REASON_IO_INSTRUCTION,
+ .r11 = hcall_func(EXIT_REASON_IO_INSTRUCTION),
.r12 = size,
.r13 = 0,
.r14 = port,
@@ -36,7 +36,7 @@ static inline void tdx_io_out(int size, u16 port, u32 value)
{
struct tdx_hypercall_args args = {
.r10 = TDX_HYPERCALL_STANDARD,
- .r11 = EXIT_REASON_IO_INSTRUCTION,
+ .r11 = hcall_func(EXIT_REASON_IO_INSTRUCTION),
.r12 = size,
.r13 = 1,
.r14 = port,
diff --git a/arch/x86/boot/cpu.c b/arch/x86/boot/cpu.c
index 0bbf4f3707d2..feb6dbd7ca86 100644
--- a/arch/x86/boot/cpu.c
+++ b/arch/x86/boot/cpu.c
@@ -14,9 +14,7 @@
*/
#include "boot.h"
-#ifdef CONFIG_X86_FEATURE_NAMES
#include "cpustr.h"
-#endif
static char *cpu_name(int level)
{
@@ -35,7 +33,6 @@ static char *cpu_name(int level)
static void show_cap_strs(u32 *err_flags)
{
int i, j;
-#ifdef CONFIG_X86_FEATURE_NAMES
const unsigned char *msg_strs = (const unsigned char *)x86_cap_strs;
for (i = 0; i < NCAPINTS; i++) {
u32 e = err_flags[i];
@@ -58,16 +55,6 @@ static void show_cap_strs(u32 *err_flags)
e >>= 1;
}
}
-#else
- for (i = 0; i < NCAPINTS; i++) {
- u32 e = err_flags[i];
- for (j = 0; j < 32; j++) {
- if (e & 1)
- printf("%d:%d ", i, j);
- e >>= 1;
- }
- }
-#endif
}
int validate_cpu(void)
diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c
index 73f83233d25d..eeec9986570e 100644
--- a/arch/x86/coco/core.c
+++ b/arch/x86/coco/core.c
@@ -13,10 +13,10 @@
#include <asm/coco.h>
#include <asm/processor.h>
-enum cc_vendor cc_vendor __ro_after_init;
+enum cc_vendor cc_vendor __ro_after_init = CC_VENDOR_NONE;
static u64 cc_mask __ro_after_init;
-static bool intel_cc_platform_has(enum cc_attr attr)
+static bool noinstr intel_cc_platform_has(enum cc_attr attr)
{
switch (attr) {
case CC_ATTR_GUEST_UNROLL_STRING_IO:
@@ -34,7 +34,7 @@ static bool intel_cc_platform_has(enum cc_attr attr)
* the other levels of SME/SEV functionality, including C-bit
* based SEV-SNP, are not enabled.
*/
-static __maybe_unused bool amd_cc_platform_vtom(enum cc_attr attr)
+static __maybe_unused __always_inline bool amd_cc_platform_vtom(enum cc_attr attr)
{
switch (attr) {
case CC_ATTR_GUEST_MEM_ENCRYPT:
@@ -58,7 +58,7 @@ static __maybe_unused bool amd_cc_platform_vtom(enum cc_attr attr)
* the trampoline area must be encrypted.
*/
-static bool amd_cc_platform_has(enum cc_attr attr)
+static bool noinstr amd_cc_platform_has(enum cc_attr attr)
{
#ifdef CONFIG_AMD_MEM_ENCRYPT
@@ -97,7 +97,7 @@ static bool amd_cc_platform_has(enum cc_attr attr)
#endif
}
-bool cc_platform_has(enum cc_attr attr)
+bool noinstr cc_platform_has(enum cc_attr attr)
{
switch (cc_vendor) {
case CC_VENDOR_AMD:
diff --git a/arch/x86/coco/tdx/Makefile b/arch/x86/coco/tdx/Makefile
index 46c55998557d..2c7dcbf1458b 100644
--- a/arch/x86/coco/tdx/Makefile
+++ b/arch/x86/coco/tdx/Makefile
@@ -1,3 +1,3 @@
# SPDX-License-Identifier: GPL-2.0
-obj-y += tdx.o tdcall.o
+obj-y += tdx.o tdx-shared.o tdcall.o
diff --git a/arch/x86/coco/tdx/tdx-shared.c b/arch/x86/coco/tdx/tdx-shared.c
new file mode 100644
index 000000000000..ef20ddc37b58
--- /dev/null
+++ b/arch/x86/coco/tdx/tdx-shared.c
@@ -0,0 +1,71 @@
+#include <asm/tdx.h>
+#include <asm/pgtable.h>
+
+static unsigned long try_accept_one(phys_addr_t start, unsigned long len,
+ enum pg_level pg_level)
+{
+ unsigned long accept_size = page_level_size(pg_level);
+ u64 tdcall_rcx;
+ u8 page_size;
+
+ if (!IS_ALIGNED(start, accept_size))
+ return 0;
+
+ if (len < accept_size)
+ return 0;
+
+ /*
+ * Pass the page physical address to the TDX module to accept the
+ * pending, private page.
+ *
+ * Bits 2:0 of RCX encode page size: 0 - 4K, 1 - 2M, 2 - 1G.
+ */
+ switch (pg_level) {
+ case PG_LEVEL_4K:
+ page_size = 0;
+ break;
+ case PG_LEVEL_2M:
+ page_size = 1;
+ break;
+ case PG_LEVEL_1G:
+ page_size = 2;
+ break;
+ default:
+ return 0;
+ }
+
+ tdcall_rcx = start | page_size;
+ if (__tdx_module_call(TDX_ACCEPT_PAGE, tdcall_rcx, 0, 0, 0, NULL))
+ return 0;
+
+ return accept_size;
+}
+
+bool tdx_accept_memory(phys_addr_t start, phys_addr_t end)
+{
+ /*
+ * For shared->private conversion, accept the page using
+ * TDX_ACCEPT_PAGE TDX module call.
+ */
+ while (start < end) {
+ unsigned long len = end - start;
+ unsigned long accept_size;
+
+ /*
+ * Try larger accepts first. It gives chance to VMM to keep
+ * 1G/2M Secure EPT entries where possible and speeds up
+ * process by cutting number of hypercalls (if successful).
+ */
+
+ accept_size = try_accept_one(start, len, PG_LEVEL_1G);
+ if (!accept_size)
+ accept_size = try_accept_one(start, len, PG_LEVEL_2M);
+ if (!accept_size)
+ accept_size = try_accept_one(start, len, PG_LEVEL_4K);
+ if (!accept_size)
+ return false;
+ start += accept_size;
+ }
+
+ return true;
+}
diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index e146b599260f..1d6b863c42b0 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -14,20 +14,6 @@
#include <asm/insn-eval.h>
#include <asm/pgtable.h>
-/* TDX module Call Leaf IDs */
-#define TDX_GET_INFO 1
-#define TDX_GET_VEINFO 3
-#define TDX_GET_REPORT 4
-#define TDX_ACCEPT_PAGE 6
-#define TDX_WR 8
-
-/* TDCS fields. To be used by TDG.VM.WR and TDG.VM.RD module calls */
-#define TDCS_NOTIFY_ENABLES 0x9100000000000010
-
-/* TDX hypercall Leaf IDs */
-#define TDVMCALL_MAP_GPA 0x10001
-#define TDVMCALL_REPORT_FATAL_ERROR 0x10003
-
/* MMIO direction */
#define EPT_READ 0
#define EPT_WRITE 1
@@ -51,24 +37,6 @@
#define TDREPORT_SUBTYPE_0 0
-/*
- * Wrapper for standard use of __tdx_hypercall with no output aside from
- * return code.
- */
-static inline u64 _tdx_hypercall(u64 fn, u64 r12, u64 r13, u64 r14, u64 r15)
-{
- struct tdx_hypercall_args args = {
- .r10 = TDX_HYPERCALL_STANDARD,
- .r11 = fn,
- .r12 = r12,
- .r13 = r13,
- .r14 = r14,
- .r15 = r15,
- };
-
- return __tdx_hypercall(&args);
-}
-
/* Called from __tdx_hypercall() for unrecoverable failure */
noinstr void __tdx_hypercall_failed(void)
{
@@ -76,17 +44,6 @@ noinstr void __tdx_hypercall_failed(void)
panic("TDVMCALL failed. TDX module bug?");
}
-/*
- * The TDG.VP.VMCALL-Instruction-execution sub-functions are defined
- * independently from but are currently matched 1:1 with VMX EXIT_REASONs.
- * Reusing the KVM EXIT_REASON macros makes it easier to connect the host and
- * guest sides of these calls.
- */
-static __always_inline u64 hcall_func(u64 exit_reason)
-{
- return exit_reason;
-}
-
#ifdef CONFIG_KVM_GUEST
long tdx_kvm_hypercall(unsigned int nr, unsigned long p1, unsigned long p2,
unsigned long p3, unsigned long p4)
@@ -745,47 +702,6 @@ static bool tdx_cache_flush_required(void)
return true;
}
-static bool try_accept_one(phys_addr_t *start, unsigned long len,
- enum pg_level pg_level)
-{
- unsigned long accept_size = page_level_size(pg_level);
- u64 tdcall_rcx;
- u8 page_size;
-
- if (!IS_ALIGNED(*start, accept_size))
- return false;
-
- if (len < accept_size)
- return false;
-
- /*
- * Pass the page physical address to the TDX module to accept the
- * pending, private page.
- *
- * Bits 2:0 of RCX encode page size: 0 - 4K, 1 - 2M, 2 - 1G.
- */
- switch (pg_level) {
- case PG_LEVEL_4K:
- page_size = 0;
- break;
- case PG_LEVEL_2M:
- page_size = 1;
- break;
- case PG_LEVEL_1G:
- page_size = 2;
- break;
- default:
- return false;
- }
-
- tdcall_rcx = *start | page_size;
- if (__tdx_module_call(TDX_ACCEPT_PAGE, tdcall_rcx, 0, 0, 0, NULL))
- return false;
-
- *start += accept_size;
- return true;
-}
-
/*
* Inform the VMM of the guest's intent for this physical page: shared with
* the VMM or private to the guest. The VMM is expected to change its mapping
@@ -810,33 +726,34 @@ static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc)
if (_tdx_hypercall(TDVMCALL_MAP_GPA, start, end - start, 0, 0))
return false;
- /* private->shared conversion requires only MapGPA call */
- if (!enc)
- return true;
+ /* shared->private conversion requires memory to be accepted before use */
+ if (enc)
+ return tdx_accept_memory(start, end);
+
+ return true;
+}
+static bool tdx_enc_status_change_prepare(unsigned long vaddr, int numpages,
+ bool enc)
+{
/*
- * For shared->private conversion, accept the page using
- * TDX_ACCEPT_PAGE TDX module call.
+ * Only handle shared->private conversion here.
+ * See the comment in tdx_early_init().
*/
- while (start < end) {
- unsigned long len = end - start;
-
- /*
- * Try larger accepts first. It gives chance to VMM to keep
- * 1G/2M SEPT entries where possible and speeds up process by
- * cutting number of hypercalls (if successful).
- */
-
- if (try_accept_one(&start, len, PG_LEVEL_1G))
- continue;
-
- if (try_accept_one(&start, len, PG_LEVEL_2M))
- continue;
-
- if (!try_accept_one(&start, len, PG_LEVEL_4K))
- return false;
- }
+ if (enc)
+ return tdx_enc_status_changed(vaddr, numpages, enc);
+ return true;
+}
+static bool tdx_enc_status_change_finish(unsigned long vaddr, int numpages,
+ bool enc)
+{
+ /*
+ * Only handle private->shared conversion here.
+ * See the comment in tdx_early_init().
+ */
+ if (!enc)
+ return tdx_enc_status_changed(vaddr, numpages, enc);
return true;
}
@@ -852,7 +769,7 @@ void __init tdx_early_init(void)
setup_force_cpu_cap(X86_FEATURE_TDX_GUEST);
- cc_set_vendor(CC_VENDOR_INTEL);
+ cc_vendor = CC_VENDOR_INTEL;
tdx_parse_tdinfo(&cc_mask);
cc_set_mask(cc_mask);
@@ -867,9 +784,41 @@ void __init tdx_early_init(void)
*/
physical_mask &= cc_mask - 1;
- x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required;
- x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required;
- x86_platform.guest.enc_status_change_finish = tdx_enc_status_changed;
+ /*
+ * The kernel mapping should match the TDX metadata for the page.
+ * load_unaligned_zeropad() can touch memory *adjacent* to that which is
+ * owned by the caller and can catch even _momentary_ mismatches. Bad
+ * things happen on mismatch:
+ *
+ * - Private mapping => Shared Page == Guest shutdown
+ * - Shared mapping => Private Page == Recoverable #VE
+ *
+ * guest.enc_status_change_prepare() converts the page from
+ * shared=>private before the mapping becomes private.
+ *
+ * guest.enc_status_change_finish() converts the page from
+ * private=>shared after the mapping becomes private.
+ *
+ * In both cases there is a temporary shared mapping to a private page,
+ * which can result in a #VE. But, there is never a private mapping to
+ * a shared page.
+ */
+ x86_platform.guest.enc_status_change_prepare = tdx_enc_status_change_prepare;
+ x86_platform.guest.enc_status_change_finish = tdx_enc_status_change_finish;
+
+ x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required;
+ x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required;
+
+ /*
+ * TDX intercepts the RDMSR to read the X2APIC ID in the parallel
+ * bringup low level code. That raises #VE which cannot be handled
+ * there.
+ *
+ * Intel-TDX has a secure RDMSR hypercall, but that needs to be
+ * implemented seperately in the low level startup ASM code.
+ * Until that is in place, disable parallel bringup for TDX.
+ */
+ x86_cpuinit.parallel_bringup = false;
pr_info("Guest detected\n");
}
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 320480a8db4f..bc0a3c941b35 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -455,3 +455,4 @@
448 i386 process_mrelease sys_process_mrelease
449 i386 futex_waitv sys_futex_waitv
450 i386 set_mempolicy_home_node sys_set_mempolicy_home_node
+451 i386 cachestat sys_cachestat
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index c84d12608cd2..227538b0ce80 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -372,6 +372,7 @@
448 common process_mrelease sys_process_mrelease
449 common futex_waitv sys_futex_waitv
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
+451 common cachestat sys_cachestat
#
# Due to a historical design error, certain syscalls are numbered differently
diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S
index 5e37f41e5f14..27b5da2111ac 100644
--- a/arch/x86/entry/thunk_64.S
+++ b/arch/x86/entry/thunk_64.S
@@ -26,17 +26,7 @@ SYM_FUNC_START(\name)
pushq %r11
call \func
- jmp __thunk_restore
-SYM_FUNC_END(\name)
- _ASM_NOKPROBE(\name)
- .endm
-
- THUNK preempt_schedule_thunk, preempt_schedule
- THUNK preempt_schedule_notrace_thunk, preempt_schedule_notrace
- EXPORT_SYMBOL(preempt_schedule_thunk)
- EXPORT_SYMBOL(preempt_schedule_notrace_thunk)
-SYM_CODE_START_LOCAL(__thunk_restore)
popq %r11
popq %r10
popq %r9
@@ -48,5 +38,11 @@ SYM_CODE_START_LOCAL(__thunk_restore)
popq %rdi
popq %rbp
RET
- _ASM_NOKPROBE(__thunk_restore)
-SYM_CODE_END(__thunk_restore)
+SYM_FUNC_END(\name)
+ _ASM_NOKPROBE(\name)
+ .endm
+
+THUNK preempt_schedule_thunk, preempt_schedule
+THUNK preempt_schedule_notrace_thunk, preempt_schedule_notrace
+EXPORT_SYMBOL(preempt_schedule_thunk)
+EXPORT_SYMBOL(preempt_schedule_notrace_thunk)
diff --git a/arch/x86/entry/vdso/vgetcpu.c b/arch/x86/entry/vdso/vgetcpu.c
index 0a9007c24056..e4640306b2e3 100644
--- a/arch/x86/entry/vdso/vgetcpu.c
+++ b/arch/x86/entry/vdso/vgetcpu.c
@@ -8,6 +8,7 @@
#include <linux/kernel.h>
#include <linux/getcpu.h>
#include <asm/segment.h>
+#include <vdso/processor.h>
notrace long
__vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index bccea57dee81..abadd5f23425 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -374,7 +374,7 @@ static int amd_pmu_hw_config(struct perf_event *event)
/* pass precise event sampling to ibs: */
if (event->attr.precise_ip && get_ibs_caps())
- return -ENOENT;
+ return forward_event_to_ibs(event);
if (has_branch_stack(event) && !x86_pmu.lbr_nr)
return -EOPNOTSUPP;
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 64582954b5f6..371014802191 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -190,7 +190,7 @@ static struct perf_ibs *get_ibs_pmu(int type)
}
/*
- * Use IBS for precise event sampling:
+ * core pmu config -> IBS config
*
* perf record -a -e cpu-cycles:p ... # use ibs op counting cycle count
* perf record -a -e r076:p ... # same as -e cpu-cycles:p
@@ -199,25 +199,9 @@ static struct perf_ibs *get_ibs_pmu(int type)
* IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl,
* MSRC001_1033) is used to select either cycle or micro-ops counting
* mode.
- *
- * The rip of IBS samples has skid 0. Thus, IBS supports precise
- * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the
- * rip is invalid when IBS was not able to record the rip correctly.
- * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then.
- *
*/
-static int perf_ibs_precise_event(struct perf_event *event, u64 *config)
+static int core_pmu_ibs_config(struct perf_event *event, u64 *config)
{
- switch (event->attr.precise_ip) {
- case 0:
- return -ENOENT;
- case 1:
- case 2:
- break;
- default:
- return -EOPNOTSUPP;
- }
-
switch (event->attr.type) {
case PERF_TYPE_HARDWARE:
switch (event->attr.config) {
@@ -243,22 +227,37 @@ static int perf_ibs_precise_event(struct perf_event *event, u64 *config)
return -EOPNOTSUPP;
}
+/*
+ * The rip of IBS samples has skid 0. Thus, IBS supports precise
+ * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the
+ * rip is invalid when IBS was not able to record the rip correctly.
+ * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then.
+ */
+int forward_event_to_ibs(struct perf_event *event)
+{
+ u64 config = 0;
+
+ if (!event->attr.precise_ip || event->attr.precise_ip > 2)
+ return -EOPNOTSUPP;
+
+ if (!core_pmu_ibs_config(event, &config)) {
+ event->attr.type = perf_ibs_op.pmu.type;
+ event->attr.config = config;
+ }
+ return -ENOENT;
+}
+
static int perf_ibs_init(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
struct perf_ibs *perf_ibs;
u64 max_cnt, config;
- int ret;
perf_ibs = get_ibs_pmu(event->attr.type);
- if (perf_ibs) {
- config = event->attr.config;
- } else {
- perf_ibs = &perf_ibs_op;
- ret = perf_ibs_precise_event(event, &config);
- if (ret)
- return ret;
- }
+ if (!perf_ibs)
+ return -ENOENT;
+
+ config = event->attr.config;
if (event->pmu != &perf_ibs->pmu)
return -ENOENT;
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 89b9c1cebb61..a149fafad813 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -349,6 +349,16 @@ static struct event_constraint intel_spr_event_constraints[] = {
EVENT_CONSTRAINT_END
};
+static struct extra_reg intel_gnr_extra_regs[] __read_mostly = {
+ INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1),
+ INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
+ INTEL_UEVENT_EXTRA_REG(0x02c6, MSR_PEBS_FRONTEND, 0x9, FE),
+ INTEL_UEVENT_EXTRA_REG(0x03c6, MSR_PEBS_FRONTEND, 0x7fff1f, FE),
+ INTEL_UEVENT_EXTRA_REG(0x40ad, MSR_PEBS_FRONTEND, 0x7, FE),
+ INTEL_UEVENT_EXTRA_REG(0x04c2, MSR_PEBS_FRONTEND, 0x8, FE),
+ EVENT_EXTRA_END
+};
EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
@@ -2451,7 +2461,7 @@ static void intel_pmu_disable_fixed(struct perf_event *event)
intel_clear_masks(event, idx);
- mask = 0xfULL << ((idx - INTEL_PMC_IDX_FIXED) * 4);
+ mask = intel_fixed_bits_by_idx(idx - INTEL_PMC_IDX_FIXED, INTEL_FIXED_BITS_MASK);
cpuc->fixed_ctrl_val &= ~mask;
}
@@ -2750,25 +2760,25 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
* if requested:
*/
if (!event->attr.precise_ip)
- bits |= 0x8;
+ bits |= INTEL_FIXED_0_ENABLE_PMI;
if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
- bits |= 0x2;
+ bits |= INTEL_FIXED_0_USER;
if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
- bits |= 0x1;
+ bits |= INTEL_FIXED_0_KERNEL;
/*
* ANY bit is supported in v3 and up
*/
if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
- bits |= 0x4;
+ bits |= INTEL_FIXED_0_ANYTHREAD;
idx -= INTEL_PMC_IDX_FIXED;
- bits <<= (idx * 4);
- mask = 0xfULL << (idx * 4);
+ bits = intel_fixed_bits_by_idx(idx, bits);
+ mask = intel_fixed_bits_by_idx(idx, INTEL_FIXED_BITS_MASK);
if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip) {
- bits |= ICL_FIXED_0_ADAPTIVE << (idx * 4);
- mask |= ICL_FIXED_0_ADAPTIVE << (idx * 4);
+ bits |= intel_fixed_bits_by_idx(idx, ICL_FIXED_0_ADAPTIVE);
+ mask |= intel_fixed_bits_by_idx(idx, ICL_FIXED_0_ADAPTIVE);
}
cpuc->fixed_ctrl_val &= ~mask;
@@ -6496,6 +6506,7 @@ __init int intel_pmu_init(void)
case INTEL_FAM6_SAPPHIRERAPIDS_X:
case INTEL_FAM6_EMERALDRAPIDS_X:
x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
+ x86_pmu.extra_regs = intel_spr_extra_regs;
fallthrough;
case INTEL_FAM6_GRANITERAPIDS_X:
case INTEL_FAM6_GRANITERAPIDS_D:
@@ -6506,7 +6517,8 @@ __init int intel_pmu_init(void)
x86_pmu.event_constraints = intel_spr_event_constraints;
x86_pmu.pebs_constraints = intel_spr_pebs_event_constraints;
- x86_pmu.extra_regs = intel_spr_extra_regs;
+ if (!x86_pmu.extra_regs)
+ x86_pmu.extra_regs = intel_gnr_extra_regs;
x86_pmu.limit_period = spr_limit_period;
x86_pmu.pebs_ept = 1;
x86_pmu.pebs_aliases = NULL;
@@ -6650,6 +6662,7 @@ __init int intel_pmu_init(void)
pmu->pebs_constraints = intel_grt_pebs_event_constraints;
pmu->extra_regs = intel_grt_extra_regs;
if (is_mtl(boot_cpu_data.x86_model)) {
+ x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].extra_regs = intel_gnr_extra_regs;
x86_pmu.pebs_latency_data = mtl_latency_data_small;
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr;
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index a5f9474f08e1..6c04b52f139b 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -416,7 +416,7 @@ void __init hyperv_init(void)
goto free_vp_assist_page;
}
- cpuhp = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/hyperv_init:online",
+ cpuhp = cpuhp_setup_state(CPUHP_AP_HYPERV_ONLINE, "x86/hyperv_init:online",
hv_cpu_init, hv_cpu_die);
if (cpuhp < 0)
goto free_ghcb_page;
diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c
index 1ba5d3b99b16..85d38b9f3586 100644
--- a/arch/x86/hyperv/hv_vtl.c
+++ b/arch/x86/hyperv/hv_vtl.c
@@ -20,6 +20,8 @@ void __init hv_vtl_init_platform(void)
{
pr_info("Linux runs in Hyper-V Virtual Trust Level\n");
+ x86_platform.realmode_reserve = x86_init_noop;
+ x86_platform.realmode_init = x86_init_noop;
x86_init.irqs.pre_vector_init = x86_init_noop;
x86_init.timers.timer_init = x86_init_noop;
diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
index cc92388b7a99..14f46ad2ca64 100644
--- a/arch/x86/hyperv/ivm.c
+++ b/arch/x86/hyperv/ivm.c
@@ -17,6 +17,7 @@
#include <asm/mem_encrypt.h>
#include <asm/mshyperv.h>
#include <asm/hypervisor.h>
+#include <asm/mtrr.h>
#ifdef CONFIG_AMD_MEM_ENCRYPT
@@ -364,7 +365,7 @@ void __init hv_vtom_init(void)
* Set it here to indicate a vTOM VM.
*/
sev_status = MSR_AMD64_SNP_VTOM;
- cc_set_vendor(CC_VENDOR_AMD);
+ cc_vendor = CC_VENDOR_AMD;
cc_set_mask(ms_hyperv.shared_gpa_boundary);
physical_mask &= ms_hyperv.shared_gpa_boundary - 1;
@@ -372,6 +373,9 @@ void __init hv_vtom_init(void)
x86_platform.guest.enc_cache_flush_required = hv_vtom_cache_flush_required;
x86_platform.guest.enc_tlb_flush_required = hv_vtom_tlb_flush_required;
x86_platform.guest.enc_status_change_finish = hv_vtom_set_host_visibility;
+
+ /* Set WB as the default cache mode. */
+ mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK);
}
#endif /* CONFIG_AMD_MEM_ENCRYPT */
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 1e51650b79d7..4f1ce5fc4e19 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
+generated-y += orc_hash.h
generated-y += syscalls_32.h
generated-y += syscalls_64.h
generated-y += syscalls_x32.h
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index d7da28fada87..6c15a622ad60 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -113,7 +113,6 @@ extern void callthunks_patch_builtin_calls(void);
extern void callthunks_patch_module_calls(struct callthunk_sites *sites,
struct module *mod);
extern void *callthunks_translate_call_dest(void *dest);
-extern bool is_callthunk(void *addr);
extern int x86_call_depth_emit_accounting(u8 **pprog, void *func);
#else
static __always_inline void callthunks_patch_builtin_calls(void) {}
@@ -124,10 +123,6 @@ static __always_inline void *callthunks_translate_call_dest(void *dest)
{
return dest;
}
-static __always_inline bool is_callthunk(void *addr)
-{
- return false;
-}
static __always_inline int x86_call_depth_emit_accounting(u8 **pprog,
void *func)
{
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 3216da7074ba..98c32aa5963a 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -55,6 +55,8 @@ extern int local_apic_timer_c2_ok;
extern int disable_apic;
extern unsigned int lapic_timer_period;
+extern int cpuid_to_apicid[];
+
extern enum apic_intr_mode_id apic_intr_mode;
enum apic_intr_mode_id {
APIC_PIC,
@@ -377,7 +379,6 @@ extern struct apic *__apicdrivers[], *__apicdrivers_end[];
* APIC functionality to boot other CPUs - only used on SMP:
*/
#ifdef CONFIG_SMP
-extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip);
extern int lapic_can_unplug_cpu(void);
#endif
@@ -507,10 +508,8 @@ extern int default_check_phys_apicid_present(int phys_apicid);
#endif /* CONFIG_X86_LOCAL_APIC */
#ifdef CONFIG_SMP
-bool apic_id_is_primary_thread(unsigned int id);
void apic_smt_update(void);
#else
-static inline bool apic_id_is_primary_thread(unsigned int id) { return false; }
static inline void apic_smt_update(void) { }
#endif
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index 68d213e83fcc..4b125e5b3187 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -2,6 +2,8 @@
#ifndef _ASM_X86_APICDEF_H
#define _ASM_X86_APICDEF_H
+#include <linux/bits.h>
+
/*
* Constants for various Intel APICs. (local APIC, IOAPIC, etc.)
*
@@ -138,9 +140,10 @@
#define APIC_EILVT_MASKED (1 << 16)
#define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
-#define APIC_BASE_MSR 0x800
-#define XAPIC_ENABLE (1UL << 11)
-#define X2APIC_ENABLE (1UL << 10)
+#define APIC_BASE_MSR 0x800
+#define APIC_X2APIC_ID_MSR 0x802
+#define XAPIC_ENABLE BIT(11)
+#define X2APIC_ENABLE BIT(10)
#ifdef CONFIG_X86_32
# define MAX_IO_APICS 64
@@ -162,6 +165,7 @@
#define APIC_CPUID(apicid) ((apicid) & XAPIC_DEST_CPUS_MASK)
#define NUM_APIC_CLUSTERS ((BAD_APICID + 1) >> XAPIC_DEST_CPUS_SHIFT)
+#ifndef __ASSEMBLY__
/*
* the local APIC register structure, memory mapped. Not terribly well
* tested, but we might eventually use this one in the future - the
@@ -435,4 +439,5 @@ enum apic_delivery_modes {
APIC_DELIVERY_MODE_EXTINT = 7,
};
+#endif /* !__ASSEMBLY__ */
#endif /* _ASM_X86_APICDEF_H */
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 5e754e895767..55a55ec04350 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -14,12 +14,6 @@
* resource counting etc..
*/
-/**
- * arch_atomic_read - read atomic variable
- * @v: pointer of type atomic_t
- *
- * Atomically reads the value of @v.
- */
static __always_inline int arch_atomic_read(const atomic_t *v)
{
/*
@@ -29,25 +23,11 @@ static __always_inline int arch_atomic_read(const atomic_t *v)
return __READ_ONCE((v)->counter);
}
-/**
- * arch_atomic_set - set atomic variable
- * @v: pointer of type atomic_t
- * @i: required value
- *
- * Atomically sets the value of @v to @i.
- */
static __always_inline void arch_atomic_set(atomic_t *v, int i)
{
__WRITE_ONCE(v->counter, i);
}
-/**
- * arch_atomic_add - add integer to atomic variable
- * @i: integer value to add
- * @v: pointer of type atomic_t
- *
- * Atomically adds @i to @v.
- */
static __always_inline void arch_atomic_add(int i, atomic_t *v)
{
asm volatile(LOCK_PREFIX "addl %1,%0"
@@ -55,13 +35,6 @@ static __always_inline void arch_atomic_add(int i, atomic_t *v)
: "ir" (i) : "memory");
}
-/**
- * arch_atomic_sub - subtract integer from atomic variable
- * @i: integer value to subtract
- * @v: pointer of type atomic_t
- *
- * Atomically subtracts @i from @v.
- */
static __always_inline void arch_atomic_sub(int i, atomic_t *v)
{
asm volatile(LOCK_PREFIX "subl %1,%0"
@@ -69,27 +42,12 @@ static __always_inline void arch_atomic_sub(int i, atomic_t *v)
: "ir" (i) : "memory");
}
-/**
- * arch_atomic_sub_and_test - subtract value from variable and test result
- * @i: integer value to subtract
- * @v: pointer of type atomic_t
- *
- * Atomically subtracts @i from @v and returns
- * true if the result is zero, or false for all
- * other cases.
- */
static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v)
{
return GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, e, "er", i);
}
#define arch_atomic_sub_and_test arch_atomic_sub_and_test
-/**
- * arch_atomic_inc - increment atomic variable
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1.
- */
static __always_inline void arch_atomic_inc(atomic_t *v)
{
asm volatile(LOCK_PREFIX "incl %0"
@@ -97,12 +55,6 @@ static __always_inline void arch_atomic_inc(atomic_t *v)
}
#define arch_atomic_inc arch_atomic_inc
-/**
- * arch_atomic_dec - decrement atomic variable
- * @v: pointer of type atomic_t
- *
- * Atomically decrements @v by 1.
- */
static __always_inline void arch_atomic_dec(atomic_t *v)
{
asm volatile(LOCK_PREFIX "decl %0"
@@ -110,69 +62,30 @@ static __always_inline void arch_atomic_dec(atomic_t *v)
}
#define arch_atomic_dec arch_atomic_dec
-/**
- * arch_atomic_dec_and_test - decrement and test
- * @v: pointer of type atomic_t
- *
- * Atomically decrements @v by 1 and
- * returns true if the result is 0, or false for all other
- * cases.
- */
static __always_inline bool arch_atomic_dec_and_test(atomic_t *v)
{
return GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, e);
}
#define arch_atomic_dec_and_test arch_atomic_dec_and_test
-/**
- * arch_atomic_inc_and_test - increment and test
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
static __always_inline bool arch_atomic_inc_and_test(atomic_t *v)
{
return GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, e);
}
#define arch_atomic_inc_and_test arch_atomic_inc_and_test
-/**
- * arch_atomic_add_negative - add and test if negative
- * @i: integer value to add
- * @v: pointer of type atomic_t
- *
- * Atomically adds @i to @v and returns true
- * if the result is negative, or false when
- * result is greater than or equal to zero.
- */
static __always_inline bool arch_atomic_add_negative(int i, atomic_t *v)
{
return GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, s, "er", i);
}
#define arch_atomic_add_negative arch_atomic_add_negative
-/**
- * arch_atomic_add_return - add integer and return
- * @i: integer value to add
- * @v: pointer of type atomic_t
- *
- * Atomically adds @i to @v and returns @i + @v
- */
static __always_inline int arch_atomic_add_return(int i, atomic_t *v)
{
return i + xadd(&v->counter, i);
}
#define arch_atomic_add_return arch_atomic_add_return
-/**
- * arch_atomic_sub_return - subtract integer and return
- * @v: pointer of type atomic_t
- * @i: integer value to subtract
- *
- * Atomically subtracts @i from @v and returns @v - @i
- */
static __always_inline int arch_atomic_sub_return(int i, atomic_t *v)
{
return arch_atomic_add_return(-i, v);
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index 808b4eece251..3486d91b8595 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -61,30 +61,12 @@ ATOMIC64_DECL(add_unless);
#undef __ATOMIC64_DECL
#undef ATOMIC64_EXPORT
-/**
- * arch_atomic64_cmpxchg - cmpxchg atomic64 variable
- * @v: pointer to type atomic64_t
- * @o: expected value
- * @n: new value
- *
- * Atomically sets @v to @n if it was equal to @o and returns
- * the old value.
- */
-
static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n)
{
return arch_cmpxchg64(&v->counter, o, n);
}
#define arch_atomic64_cmpxchg arch_atomic64_cmpxchg
-/**
- * arch_atomic64_xchg - xchg atomic64 variable
- * @v: pointer to type atomic64_t
- * @n: value to assign
- *
- * Atomically xchgs the value of @v to @n and returns
- * the old value.
- */
static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 n)
{
s64 o;
@@ -97,13 +79,6 @@ static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 n)
}
#define arch_atomic64_xchg arch_atomic64_xchg
-/**
- * arch_atomic64_set - set atomic64 variable
- * @v: pointer to type atomic64_t
- * @i: value to assign
- *
- * Atomically sets the value of @v to @n.
- */
static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i)
{
unsigned high = (unsigned)(i >> 32);
@@ -113,12 +88,6 @@ static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i)
: "eax", "edx", "memory");
}
-/**
- * arch_atomic64_read - read atomic64 variable
- * @v: pointer to type atomic64_t
- *
- * Atomically reads the value of @v and returns it.
- */
static __always_inline s64 arch_atomic64_read(const atomic64_t *v)
{
s64 r;
@@ -126,13 +95,6 @@ static __always_inline s64 arch_atomic64_read(const atomic64_t *v)
return r;
}
-/**
- * arch_atomic64_add_return - add and return
- * @i: integer value to add
- * @v: pointer to type atomic64_t
- *
- * Atomically adds @i to @v and returns @i + *@v
- */
static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
{
alternative_atomic64(add_return,
@@ -142,9 +104,6 @@ static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
}
#define arch_atomic64_add_return arch_atomic64_add_return
-/*
- * Other variants with different arithmetic operators:
- */
static __always_inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v)
{
alternative_atomic64(sub_return,
@@ -172,13 +131,6 @@ static __always_inline s64 arch_atomic64_dec_return(atomic64_t *v)
}
#define arch_atomic64_dec_return arch_atomic64_dec_return
-/**
- * arch_atomic64_add - add integer to atomic64 variable
- * @i: integer value to add
- * @v: pointer to type atomic64_t
- *
- * Atomically adds @i to @v.
- */
static __always_inline s64 arch_atomic64_add(s64 i, atomic64_t *v)
{
__alternative_atomic64(add, add_return,
@@ -187,13 +139,6 @@ static __always_inline s64 arch_atomic64_add(s64 i, atomic64_t *v)
return i;
}
-/**
- * arch_atomic64_sub - subtract the atomic64 variable
- * @i: integer value to subtract
- * @v: pointer to type atomic64_t
- *
- * Atomically subtracts @i from @v.
- */
static __always_inline s64 arch_atomic64_sub(s64 i, atomic64_t *v)
{
__alternative_atomic64(sub, sub_return,
@@ -202,12 +147,6 @@ static __always_inline s64 arch_atomic64_sub(s64 i, atomic64_t *v)
return i;
}
-/**
- * arch_atomic64_inc - increment atomic64 variable
- * @v: pointer to type atomic64_t
- *
- * Atomically increments @v by 1.
- */
static __always_inline void arch_atomic64_inc(atomic64_t *v)
{
__alternative_atomic64(inc, inc_return, /* no output */,
@@ -215,12 +154,6 @@ static __always_inline void arch_atomic64_inc(atomic64_t *v)
}
#define arch_atomic64_inc arch_atomic64_inc
-/**
- * arch_atomic64_dec - decrement atomic64 variable
- * @v: pointer to type atomic64_t
- *
- * Atomically decrements @v by 1.
- */
static __always_inline void arch_atomic64_dec(atomic64_t *v)
{
__alternative_atomic64(dec, dec_return, /* no output */,
@@ -228,15 +161,6 @@ static __always_inline void arch_atomic64_dec(atomic64_t *v)
}
#define arch_atomic64_dec arch_atomic64_dec
-/**
- * arch_atomic64_add_unless - add unless the number is a given value
- * @v: pointer of type atomic64_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as it was not @u.
- * Returns non-zero if the add was done, zero otherwise.
- */
static __always_inline int arch_atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
{
unsigned low = (unsigned)u;
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index c496595bf601..3165c0feedf7 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -10,37 +10,16 @@
#define ATOMIC64_INIT(i) { (i) }
-/**
- * arch_atomic64_read - read atomic64 variable
- * @v: pointer of type atomic64_t
- *
- * Atomically reads the value of @v.
- * Doesn't imply a read memory barrier.
- */
static __always_inline s64 arch_atomic64_read(const atomic64_t *v)
{
return __READ_ONCE((v)->counter);
}
-/**
- * arch_atomic64_set - set atomic64 variable
- * @v: pointer to type atomic64_t
- * @i: required value
- *
- * Atomically sets the value of @v to @i.
- */
static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i)
{
__WRITE_ONCE(v->counter, i);
}
-/**
- * arch_atomic64_add - add integer to atomic64 variable
- * @i: integer value to add
- * @v: pointer to type atomic64_t
- *
- * Atomically adds @i to @v.
- */
static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v)
{
asm volatile(LOCK_PREFIX "addq %1,%0"
@@ -48,13 +27,6 @@ static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v)
: "er" (i), "m" (v->counter) : "memory");
}
-/**
- * arch_atomic64_sub - subtract the atomic64 variable
- * @i: integer value to subtract
- * @v: pointer to type atomic64_t
- *
- * Atomically subtracts @i from @v.
- */
static __always_inline void arch_atomic64_sub(s64 i, atomic64_t *v)
{
asm volatile(LOCK_PREFIX "subq %1,%0"
@@ -62,27 +34,12 @@ static __always_inline void arch_atomic64_sub(s64 i, atomic64_t *v)
: "er" (i), "m" (v->counter) : "memory");
}
-/**
- * arch_atomic64_sub_and_test - subtract value from variable and test result
- * @i: integer value to subtract
- * @v: pointer to type atomic64_t
- *
- * Atomically subtracts @i from @v and returns
- * true if the result is zero, or false for all
- * other cases.
- */
static __always_inline bool arch_atomic64_sub_and_test(s64 i, atomic64_t *v)
{
return GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, e, "er", i);
}
#define arch_atomic64_sub_and_test arch_atomic64_sub_and_test
-/**
- * arch_atomic64_inc - increment atomic64 variable
- * @v: pointer to type atomic64_t
- *
- * Atomically increments @v by 1.
- */
static __always_inline void arch_atomic64_inc(atomic64_t *v)
{
asm volatile(LOCK_PREFIX "incq %0"
@@ -91,12 +48,6 @@ static __always_inline void arch_atomic64_inc(atomic64_t *v)
}
#define arch_atomic64_inc arch_atomic64_inc
-/**
- * arch_atomic64_dec - decrement atomic64 variable
- * @v: pointer to type atomic64_t
- *
- * Atomically decrements @v by 1.
- */
static __always_inline void arch_atomic64_dec(atomic64_t *v)
{
asm volatile(LOCK_PREFIX "decq %0"
@@ -105,56 +56,24 @@ static __always_inline void arch_atomic64_dec(atomic64_t *v)
}
#define arch_atomic64_dec arch_atomic64_dec
-/**
- * arch_atomic64_dec_and_test - decrement and test
- * @v: pointer to type atomic64_t
- *
- * Atomically decrements @v by 1 and
- * returns true if the result is 0, or false for all other
- * cases.
- */
static __always_inline bool arch_atomic64_dec_and_test(atomic64_t *v)
{
return GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, e);
}
#define arch_atomic64_dec_and_test arch_atomic64_dec_and_test
-/**
- * arch_atomic64_inc_and_test - increment and test
- * @v: pointer to type atomic64_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
static __always_inline bool arch_atomic64_inc_and_test(atomic64_t *v)
{
return GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, e);
}
#define arch_atomic64_inc_and_test arch_atomic64_inc_and_test
-/**
- * arch_atomic64_add_negative - add and test if negative
- * @i: integer value to add
- * @v: pointer to type atomic64_t
- *
- * Atomically adds @i to @v and returns true
- * if the result is negative, or false when
- * result is greater than or equal to zero.
- */
static __always_inline bool arch_atomic64_add_negative(s64 i, atomic64_t *v)
{
return GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, s, "er", i);
}
#define arch_atomic64_add_negative arch_atomic64_add_negative
-/**
- * arch_atomic64_add_return - add and return
- * @i: integer value to add
- * @v: pointer to type atomic64_t
- *
- * Atomically adds @i to @v and returns @i + @v
- */
static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
{
return i + xadd(&v->counter, i);
diff --git a/arch/x86/include/asm/bugs.h b/arch/x86/include/asm/bugs.h
index 92ae28389940..f25ca2d709d4 100644
--- a/arch/x86/include/asm/bugs.h
+++ b/arch/x86/include/asm/bugs.h
@@ -4,8 +4,6 @@
#include <asm/processor.h>
-extern void check_bugs(void);
-
#if defined(CONFIG_CPU_SUP_INTEL) && defined(CONFIG_X86_32)
int ppro_with_ram_bug(void);
#else
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index 540573f515b7..d53636506134 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -239,29 +239,4 @@ extern void __add_wrong_size(void)
#define __xadd(ptr, inc, lock) __xchg_op((ptr), (inc), xadd, lock)
#define xadd(ptr, inc) __xadd((ptr), (inc), LOCK_PREFIX)
-#define __cmpxchg_double(pfx, p1, p2, o1, o2, n1, n2) \
-({ \
- bool __ret; \
- __typeof__(*(p1)) __old1 = (o1), __new1 = (n1); \
- __typeof__(*(p2)) __old2 = (o2), __new2 = (n2); \
- BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long)); \
- BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long)); \
- VM_BUG_ON((unsigned long)(p1) % (2 * sizeof(long))); \
- VM_BUG_ON((unsigned long)((p1) + 1) != (unsigned long)(p2)); \
- asm volatile(pfx "cmpxchg%c5b %1" \
- CC_SET(e) \
- : CC_OUT(e) (__ret), \
- "+m" (*(p1)), "+m" (*(p2)), \
- "+a" (__old1), "+d" (__old2) \
- : "i" (2 * sizeof(long)), \
- "b" (__new1), "c" (__new2)); \
- __ret; \
-})
-
-#define arch_cmpxchg_double(p1, p2, o1, o2, n1, n2) \
- __cmpxchg_double(LOCK_PREFIX, p1, p2, o1, o2, n1, n2)
-
-#define arch_cmpxchg_double_local(p1, p2, o1, o2, n1, n2) \
- __cmpxchg_double(, p1, p2, o1, o2, n1, n2)
-
#endif /* ASM_X86_CMPXCHG_H */
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
index 6ba80ce9438d..b5731c51f0f4 100644
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -103,6 +103,6 @@ static inline bool __try_cmpxchg64(volatile u64 *ptr, u64 *pold, u64 new)
#endif
-#define system_has_cmpxchg_double() boot_cpu_has(X86_FEATURE_CX8)
+#define system_has_cmpxchg64() boot_cpu_has(X86_FEATURE_CX8)
#endif /* _ASM_X86_CMPXCHG_32_H */
diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h
index 0d3beb27b7fe..44b08b53ab32 100644
--- a/arch/x86/include/asm/cmpxchg_64.h
+++ b/arch/x86/include/asm/cmpxchg_64.h
@@ -20,6 +20,71 @@
arch_try_cmpxchg((ptr), (po), (n)); \
})
-#define system_has_cmpxchg_double() boot_cpu_has(X86_FEATURE_CX16)
+union __u128_halves {
+ u128 full;
+ struct {
+ u64 low, high;
+ };
+};
+
+#define __arch_cmpxchg128(_ptr, _old, _new, _lock) \
+({ \
+ union __u128_halves o = { .full = (_old), }, \
+ n = { .full = (_new), }; \
+ \
+ asm volatile(_lock "cmpxchg16b %[ptr]" \
+ : [ptr] "+m" (*(_ptr)), \
+ "+a" (o.low), "+d" (o.high) \
+ : "b" (n.low), "c" (n.high) \
+ : "memory"); \
+ \
+ o.full; \
+})
+
+static __always_inline u128 arch_cmpxchg128(volatile u128 *ptr, u128 old, u128 new)
+{
+ return __arch_cmpxchg128(ptr, old, new, LOCK_PREFIX);
+}
+#define arch_cmpxchg128 arch_cmpxchg128
+
+static __always_inline u128 arch_cmpxchg128_local(volatile u128 *ptr, u128 old, u128 new)
+{
+ return __arch_cmpxchg128(ptr, old, new,);
+}
+#define arch_cmpxchg128_local arch_cmpxchg128_local
+
+#define __arch_try_cmpxchg128(_ptr, _oldp, _new, _lock) \
+({ \
+ union __u128_halves o = { .full = *(_oldp), }, \
+ n = { .full = (_new), }; \
+ bool ret; \
+ \
+ asm volatile(_lock "cmpxchg16b %[ptr]" \
+ CC_SET(e) \
+ : CC_OUT(e) (ret), \
+ [ptr] "+m" (*ptr), \
+ "+a" (o.low), "+d" (o.high) \
+ : "b" (n.low), "c" (n.high) \
+ : "memory"); \
+ \
+ if (unlikely(!ret)) \
+ *(_oldp) = o.full; \
+ \
+ likely(ret); \
+})
+
+static __always_inline bool arch_try_cmpxchg128(volatile u128 *ptr, u128 *oldp, u128 new)
+{
+ return __arch_try_cmpxchg128(ptr, oldp, new, LOCK_PREFIX);
+}
+#define arch_try_cmpxchg128 arch_try_cmpxchg128
+
+static __always_inline bool arch_try_cmpxchg128_local(volatile u128 *ptr, u128 *oldp, u128 new)
+{
+ return __arch_try_cmpxchg128(ptr, oldp, new,);
+}
+#define arch_try_cmpxchg128_local arch_try_cmpxchg128_local
+
+#define system_has_cmpxchg128() boot_cpu_has(X86_FEATURE_CX16)
#endif /* _ASM_X86_CMPXCHG_64_H */
diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h
index eb08796002f3..6ae2d16a7613 100644
--- a/arch/x86/include/asm/coco.h
+++ b/arch/x86/include/asm/coco.h
@@ -10,30 +10,13 @@ enum cc_vendor {
CC_VENDOR_INTEL,
};
-#ifdef CONFIG_ARCH_HAS_CC_PLATFORM
extern enum cc_vendor cc_vendor;
-static inline enum cc_vendor cc_get_vendor(void)
-{
- return cc_vendor;
-}
-
-static inline void cc_set_vendor(enum cc_vendor vendor)
-{
- cc_vendor = vendor;
-}
-
+#ifdef CONFIG_ARCH_HAS_CC_PLATFORM
void cc_set_mask(u64 mask);
u64 cc_mkenc(u64 val);
u64 cc_mkdec(u64 val);
#else
-static inline enum cc_vendor cc_get_vendor(void)
-{
- return CC_VENDOR_NONE;
-}
-
-static inline void cc_set_vendor(enum cc_vendor vendor) { }
-
static inline u64 cc_mkenc(u64 val)
{
return val;
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index 78796b98a544..3a233ebff712 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -30,10 +30,7 @@ struct x86_cpu {
#ifdef CONFIG_HOTPLUG_CPU
extern int arch_register_cpu(int num);
extern void arch_unregister_cpu(int);
-extern void start_cpu0(void);
-#ifdef CONFIG_DEBUG_HOTPLUG_CPU0
-extern int _debug_hotplug_cpu(int cpu, int action);
-#endif
+extern void soft_restart_cpu(void);
#endif
extern void ap_init_aperfmperf(void);
@@ -98,4 +95,6 @@ extern u64 x86_read_arch_cap_msr(void);
int intel_find_matching_signature(void *mc, unsigned int csig, int cpf);
int intel_microcode_sanity_check(void *mc, bool print_err, int hdr_type);
+extern struct cpumask cpus_stop_mask;
+
#endif /* _ASM_X86_CPU_H */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index ce0c8f7d3218..a26bebbdff87 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -38,15 +38,10 @@ enum cpuid_leafs
#define X86_CAP_FMT_NUM "%d:%d"
#define x86_cap_flag_num(flag) ((flag) >> 5), ((flag) & 31)
-#ifdef CONFIG_X86_FEATURE_NAMES
extern const char * const x86_cap_flags[NCAPINTS*32];
extern const char * const x86_power_flags[32];
#define X86_CAP_FMT "%s"
#define x86_cap_flag(flag) x86_cap_flags[flag]
-#else
-#define X86_CAP_FMT X86_CAP_FMT_NUM
-#define x86_cap_flag x86_cap_flag_num
-#endif
/*
* In order to save room, we index into this array by doing
diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h
index c5aed9e9226c..4acfd57de8f1 100644
--- a/arch/x86/include/asm/cpumask.h
+++ b/arch/x86/include/asm/cpumask.h
@@ -4,11 +4,6 @@
#ifndef __ASSEMBLY__
#include <linux/cpumask.h>
-extern cpumask_var_t cpu_callin_mask;
-extern cpumask_var_t cpu_callout_mask;
-extern cpumask_var_t cpu_initialized_mask;
-extern cpumask_var_t cpu_sibling_setup_mask;
-
extern void setup_cpu_local_masks(void);
/*
diff --git a/arch/x86/include/asm/doublefault.h b/arch/x86/include/asm/doublefault.h
index 54a6e4a2e132..de0e88b32207 100644
--- a/arch/x86/include/asm/doublefault.h
+++ b/arch/x86/include/asm/doublefault.h
@@ -2,6 +2,8 @@
#ifndef _ASM_X86_DOUBLEFAULT_H
#define _ASM_X86_DOUBLEFAULT_H
+#include <linux/linkage.h>
+
#ifdef CONFIG_X86_32
extern void doublefault_init_cpu_tss(void);
#else
@@ -10,4 +12,6 @@ static inline void doublefault_init_cpu_tss(void)
}
#endif
+asmlinkage void __noreturn doublefault_shim(void);
+
#endif /* _ASM_X86_DOUBLEFAULT_H */
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 419280d263d2..8b4be7cecdb8 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -31,6 +31,8 @@ extern unsigned long efi_mixed_mode_stack_pa;
#define ARCH_EFI_IRQ_FLAGS_MASK X86_EFLAGS_IF
+#define EFI_UNACCEPTED_UNIT_SIZE PMD_SIZE
+
/*
* The EFI services are called through variadic functions in many cases. These
* functions are implemented in assembler and support only a fixed number of
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index 503a577814b2..b475d9a582b8 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -109,7 +109,7 @@ extern void fpu_reset_from_exception_fixup(void);
/* Boot, hotplug and resume */
extern void fpu__init_cpu(void);
-extern void fpu__init_system(struct cpuinfo_x86 *c);
+extern void fpu__init_system(void);
extern void fpu__init_check_bugs(void);
extern void fpu__resume_cpu(void);
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 5061ac98ffa1..b8d4a07f9595 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -106,6 +106,9 @@ struct dyn_arch_ftrace {
#ifndef __ASSEMBLY__
+void prepare_ftrace_return(unsigned long ip, unsigned long *parent,
+ unsigned long frame_pointer);
+
#if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_DYNAMIC_FTRACE)
extern void set_ftrace_ops_ro(void);
#else
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index 768aa234cbb4..29e083b92813 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -40,8 +40,6 @@ extern void __handle_irq(struct irq_desc *desc, struct pt_regs *regs);
extern void init_ISA_irqs(void);
-extern void __init init_IRQ(void);
-
#ifdef CONFIG_X86_LOCAL_APIC
void arch_trigger_cpumask_backtrace(const struct cpumask *mask,
bool exclude_self);
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 9646ed6e8c0b..180b1cbfcc4e 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -350,4 +350,7 @@ static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { }
#endif
static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c) { return mce_amd_feature_init(c); }
+
+unsigned long copy_mc_fragile_handle_tail(char *to, char *from, unsigned len);
+
#endif /* _ASM_X86_MCE_H */
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index b7126701574c..7f97a8a97e24 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -17,6 +17,12 @@
#include <asm/bootparam.h>
+#ifdef CONFIG_X86_MEM_ENCRYPT
+void __init mem_encrypt_init(void);
+#else
+static inline void mem_encrypt_init(void) { }
+#endif
+
#ifdef CONFIG_AMD_MEM_ENCRYPT
extern u64 sme_me_mask;
@@ -87,9 +93,6 @@ static inline void mem_encrypt_free_decrypted_mem(void) { }
#endif /* CONFIG_AMD_MEM_ENCRYPT */
-/* Architecture __weak replacement functions */
-void __init mem_encrypt_init(void);
-
void add_encrypt_protection_map(void);
/*
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 49bb4f2bd300..88d9ef98e087 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -257,6 +257,11 @@ void hv_set_register(unsigned int reg, u64 value);
u64 hv_get_non_nested_register(unsigned int reg);
void hv_set_non_nested_register(unsigned int reg, u64 value);
+static __always_inline u64 hv_raw_get_register(unsigned int reg)
+{
+ return __rdmsr(reg);
+}
+
#else /* CONFIG_HYPERV */
static inline void hyperv_init(void) {}
static inline void hyperv_setup_mmu_ops(void) {}
diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h
index f0eeaf6e5f5f..090d658a85a6 100644
--- a/arch/x86/include/asm/mtrr.h
+++ b/arch/x86/include/asm/mtrr.h
@@ -23,14 +23,43 @@
#ifndef _ASM_X86_MTRR_H
#define _ASM_X86_MTRR_H
+#include <linux/bits.h>
#include <uapi/asm/mtrr.h>
+/* Defines for hardware MTRR registers. */
+#define MTRR_CAP_VCNT GENMASK(7, 0)
+#define MTRR_CAP_FIX BIT_MASK(8)
+#define MTRR_CAP_WC BIT_MASK(10)
+
+#define MTRR_DEF_TYPE_TYPE GENMASK(7, 0)
+#define MTRR_DEF_TYPE_FE BIT_MASK(10)
+#define MTRR_DEF_TYPE_E BIT_MASK(11)
+
+#define MTRR_DEF_TYPE_ENABLE (MTRR_DEF_TYPE_FE | MTRR_DEF_TYPE_E)
+#define MTRR_DEF_TYPE_DISABLE ~(MTRR_DEF_TYPE_TYPE | MTRR_DEF_TYPE_ENABLE)
+
+#define MTRR_PHYSBASE_TYPE GENMASK(7, 0)
+#define MTRR_PHYSBASE_RSVD GENMASK(11, 8)
+
+#define MTRR_PHYSMASK_RSVD GENMASK(10, 0)
+#define MTRR_PHYSMASK_V BIT_MASK(11)
+
+struct mtrr_state_type {
+ struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES];
+ mtrr_type fixed_ranges[MTRR_NUM_FIXED_RANGES];
+ unsigned char enabled;
+ bool have_fixed;
+ mtrr_type def_type;
+};
+
/*
* The following functions are for use by other drivers that cannot use
* arch_phys_wc_add and arch_phys_wc_del.
*/
# ifdef CONFIG_MTRR
void mtrr_bp_init(void);
+void mtrr_overwrite_state(struct mtrr_var_range *var, unsigned int num_var,
+ mtrr_type def_type);
extern u8 mtrr_type_lookup(u64 addr, u64 end, u8 *uniform);
extern void mtrr_save_fixed_ranges(void *);
extern void mtrr_save_state(void);
@@ -40,7 +69,6 @@ extern int mtrr_add_page(unsigned long base, unsigned long size,
unsigned int type, bool increment);
extern int mtrr_del(int reg, unsigned long base, unsigned long size);
extern int mtrr_del_page(int reg, unsigned long base, unsigned long size);
-extern void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi);
extern void mtrr_bp_restore(void);
extern int mtrr_trim_uncached_memory(unsigned long end_pfn);
extern int amd_special_default_mtrr(void);
@@ -48,12 +76,21 @@ void mtrr_disable(void);
void mtrr_enable(void);
void mtrr_generic_set_state(void);
# else
+static inline void mtrr_overwrite_state(struct mtrr_var_range *var,
+ unsigned int num_var,
+ mtrr_type def_type)
+{
+}
+
static inline u8 mtrr_type_lookup(u64 addr, u64 end, u8 *uniform)
{
/*
- * Return no-MTRRs:
+ * Return the default MTRR type, without any known other types in
+ * that range.
*/
- return MTRR_TYPE_INVALID;
+ *uniform = 1;
+
+ return MTRR_TYPE_UNCACHABLE;
}
#define mtrr_save_fixed_ranges(arg) do {} while (0)
#define mtrr_save_state() do {} while (0)
@@ -79,9 +116,6 @@ static inline int mtrr_trim_uncached_memory(unsigned long end_pfn)
{
return 0;
}
-static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
-{
-}
#define mtrr_bp_init() do {} while (0)
#define mtrr_bp_restore() do {} while (0)
#define mtrr_disable() do {} while (0)
@@ -121,7 +155,8 @@ struct mtrr_gentry32 {
#endif /* CONFIG_COMPAT */
/* Bit fields for enabled in struct mtrr_state_type */
-#define MTRR_STATE_MTRR_FIXED_ENABLED 0x01
-#define MTRR_STATE_MTRR_ENABLED 0x02
+#define MTRR_STATE_SHIFT 10
+#define MTRR_STATE_MTRR_FIXED_ENABLED (MTRR_DEF_TYPE_FE >> MTRR_STATE_SHIFT)
+#define MTRR_STATE_MTRR_ENABLED (MTRR_DEF_TYPE_E >> MTRR_STATE_SHIFT)
#endif /* _ASM_X86_MTRR_H */
diff --git a/arch/x86/include/asm/nops.h b/arch/x86/include/asm/nops.h
index c5573eaa5bb9..1c1b7550fa55 100644
--- a/arch/x86/include/asm/nops.h
+++ b/arch/x86/include/asm/nops.h
@@ -34,6 +34,8 @@
#define BYTES_NOP7 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00
#define BYTES_NOP8 0x3e,BYTES_NOP7
+#define ASM_NOP_MAX 8
+
#else
/*
@@ -47,6 +49,9 @@
* 6: osp nopl 0x00(%eax,%eax,1)
* 7: nopl 0x00000000(%eax)
* 8: nopl 0x00000000(%eax,%eax,1)
+ * 9: cs nopl 0x00000000(%eax,%eax,1)
+ * 10: osp cs nopl 0x00000000(%eax,%eax,1)
+ * 11: osp osp cs nopl 0x00000000(%eax,%eax,1)
*/
#define BYTES_NOP1 0x90
#define BYTES_NOP2 0x66,BYTES_NOP1
@@ -56,6 +61,15 @@
#define BYTES_NOP6 0x66,BYTES_NOP5
#define BYTES_NOP7 0x0f,0x1f,0x80,0x00,0x00,0x00,0x00
#define BYTES_NOP8 0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00
+#define BYTES_NOP9 0x2e,BYTES_NOP8
+#define BYTES_NOP10 0x66,BYTES_NOP9
+#define BYTES_NOP11 0x66,BYTES_NOP10
+
+#define ASM_NOP9 _ASM_BYTES(BYTES_NOP9)
+#define ASM_NOP10 _ASM_BYTES(BYTES_NOP10)
+#define ASM_NOP11 _ASM_BYTES(BYTES_NOP11)
+
+#define ASM_NOP_MAX 11
#endif /* CONFIG_64BIT */
@@ -68,8 +82,6 @@
#define ASM_NOP7 _ASM_BYTES(BYTES_NOP7)
#define ASM_NOP8 _ASM_BYTES(BYTES_NOP8)
-#define ASM_NOP_MAX 8
-
#ifndef __ASSEMBLY__
extern const unsigned char * const x86_nops[];
#endif
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index edb2b0cb8efe..55388c9f7601 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -84,12 +84,12 @@
movq $-1, PER_CPU_VAR(pcpu_hot + X86_call_depth);
#define RESET_CALL_DEPTH \
- mov $0x80, %rax; \
- shl $56, %rax; \
+ xor %eax, %eax; \
+ bts $63, %rax; \
movq %rax, PER_CPU_VAR(pcpu_hot + X86_call_depth);
#define RESET_CALL_DEPTH_FROM_CALL \
- mov $0xfc, %rax; \
+ movb $0xfc, %al; \
shl $56, %rax; \
movq %rax, PER_CPU_VAR(pcpu_hot + X86_call_depth); \
CALL_THUNKS_DEBUG_INC_CALLS
diff --git a/arch/x86/include/asm/orc_header.h b/arch/x86/include/asm/orc_header.h
new file mode 100644
index 000000000000..07bacf3e160e
--- /dev/null
+++ b/arch/x86/include/asm/orc_header.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* Copyright (c) Meta Platforms, Inc. and affiliates. */
+
+#ifndef _ORC_HEADER_H
+#define _ORC_HEADER_H
+
+#include <linux/types.h>
+#include <linux/compiler.h>
+#include <asm/orc_hash.h>
+
+/*
+ * The header is currently a 20-byte hash of the ORC entry definition; see
+ * scripts/orc_hash.sh.
+ */
+#define ORC_HEADER \
+ __used __section(".orc_header") __aligned(4) \
+ static const u8 orc_header[] = { ORC_HASH }
+
+#endif /* _ORC_HEADER_H */
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 13c0d63ed55e..34734d730463 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -210,6 +210,67 @@ do { \
(typeof(_var))(unsigned long) pco_old__; \
})
+#if defined(CONFIG_X86_32) && !defined(CONFIG_UML)
+#define percpu_cmpxchg64_op(size, qual, _var, _oval, _nval) \
+({ \
+ union { \
+ u64 var; \
+ struct { \
+ u32 low, high; \
+ }; \
+ } old__, new__; \
+ \
+ old__.var = _oval; \
+ new__.var = _nval; \
+ \
+ asm qual (ALTERNATIVE("leal %P[var], %%esi; call this_cpu_cmpxchg8b_emu", \
+ "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
+ : [var] "+m" (_var), \
+ "+a" (old__.low), \
+ "+d" (old__.high) \
+ : "b" (new__.low), \
+ "c" (new__.high) \
+ : "memory", "esi"); \
+ \
+ old__.var; \
+})
+
+#define raw_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg64_op(8, , pcp, oval, nval)
+#define this_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg64_op(8, volatile, pcp, oval, nval)
+#endif
+
+#ifdef CONFIG_X86_64
+#define raw_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg_op(8, , pcp, oval, nval);
+#define this_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg_op(8, volatile, pcp, oval, nval);
+
+#define percpu_cmpxchg128_op(size, qual, _var, _oval, _nval) \
+({ \
+ union { \
+ u128 var; \
+ struct { \
+ u64 low, high; \
+ }; \
+ } old__, new__; \
+ \
+ old__.var = _oval; \
+ new__.var = _nval; \
+ \
+ asm qual (ALTERNATIVE("leaq %P[var], %%rsi; call this_cpu_cmpxchg16b_emu", \
+ "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
+ : [var] "+m" (_var), \
+ "+a" (old__.low), \
+ "+d" (old__.high) \
+ : "b" (new__.low), \
+ "c" (new__.high) \
+ : "memory", "rsi"); \
+ \
+ old__.var; \
+})
+
+#define raw_cpu_cmpxchg128(pcp, oval, nval) percpu_cmpxchg128_op(16, , pcp, oval, nval)
+#define this_cpu_cmpxchg128(pcp, oval, nval) percpu_cmpxchg128_op(16, volatile, pcp, oval, nval)
+#endif
+
/*
* this_cpu_read() makes gcc load the percpu variable every time it is
* accessed while this_cpu_read_stable() allows the value to be cached.
@@ -290,23 +351,6 @@ do { \
#define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(2, volatile, pcp, oval, nval)
#define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(4, volatile, pcp, oval, nval)
-#ifdef CONFIG_X86_CMPXCHG64
-#define percpu_cmpxchg8b_double(pcp1, pcp2, o1, o2, n1, n2) \
-({ \
- bool __ret; \
- typeof(pcp1) __o1 = (o1), __n1 = (n1); \
- typeof(pcp2) __o2 = (o2), __n2 = (n2); \
- asm volatile("cmpxchg8b "__percpu_arg(1) \
- CC_SET(z) \
- : CC_OUT(z) (__ret), "+m" (pcp1), "+m" (pcp2), "+a" (__o1), "+d" (__o2) \
- : "b" (__n1), "c" (__n2)); \
- __ret; \
-})
-
-#define raw_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double
-#define this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double
-#endif /* CONFIG_X86_CMPXCHG64 */
-
/*
* Per cpu atomic 64 bit operations are only available under 64 bit.
* 32 bit must fall back to generic operations.
@@ -329,30 +373,6 @@ do { \
#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(8, volatile, pcp, val)
#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(8, volatile, pcp, nval)
#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(8, volatile, pcp, oval, nval)
-
-/*
- * Pretty complex macro to generate cmpxchg16 instruction. The instruction
- * is not supported on early AMD64 processors so we must be able to emulate
- * it in software. The address used in the cmpxchg16 instruction must be
- * aligned to a 16 byte boundary.
- */
-#define percpu_cmpxchg16b_double(pcp1, pcp2, o1, o2, n1, n2) \
-({ \
- bool __ret; \
- typeof(pcp1) __o1 = (o1), __n1 = (n1); \
- typeof(pcp2) __o2 = (o2), __n2 = (n2); \
- alternative_io("leaq %P1,%%rsi\n\tcall this_cpu_cmpxchg16b_emu\n\t", \
- "cmpxchg16b " __percpu_arg(1) "\n\tsetz %0\n\t", \
- X86_FEATURE_CX16, \
- ASM_OUTPUT2("=a" (__ret), "+m" (pcp1), \
- "+m" (pcp2), "+d" (__o2)), \
- "b" (__n1), "c" (__n2), "a" (__o1) : "rsi"); \
- __ret; \
-})
-
-#define raw_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double
-#define this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double
-
#endif
static __always_inline bool x86_this_cpu_constant_test_bit(unsigned int nr,
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index abf09882f58b..85a9fd5a3ec3 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -32,11 +32,21 @@
#define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23)
#define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL
+#define INTEL_FIXED_BITS_MASK 0xFULL
+#define INTEL_FIXED_BITS_STRIDE 4
+#define INTEL_FIXED_0_KERNEL (1ULL << 0)
+#define INTEL_FIXED_0_USER (1ULL << 1)
+#define INTEL_FIXED_0_ANYTHREAD (1ULL << 2)
+#define INTEL_FIXED_0_ENABLE_PMI (1ULL << 3)
+
#define HSW_IN_TX (1ULL << 32)
#define HSW_IN_TX_CHECKPOINTED (1ULL << 33)
#define ICL_EVENTSEL_ADAPTIVE (1ULL << 34)
#define ICL_FIXED_0_ADAPTIVE (1ULL << 32)
+#define intel_fixed_bits_by_idx(_idx, _bits) \
+ ((_bits) << ((_idx) * INTEL_FIXED_BITS_STRIDE))
+
#define AMD64_EVENTSEL_INT_CORE_ENABLE (1ULL << 36)
#define AMD64_EVENTSEL_GUESTONLY (1ULL << 40)
#define AMD64_EVENTSEL_HOSTONLY (1ULL << 41)
@@ -478,8 +488,10 @@ struct pebs_xmm {
#ifdef CONFIG_X86_LOCAL_APIC
extern u32 get_ibs_caps(void);
+extern int forward_event_to_ibs(struct perf_event *event);
#else
static inline u32 get_ibs_caps(void) { return 0; }
+static inline int forward_event_to_ibs(struct perf_event *event) { return -ENOENT; }
#endif
#ifdef CONFIG_PERF_EVENTS
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 15ae4d6ba476..5700bb337987 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -27,6 +27,7 @@
extern pgd_t early_top_pgt[PTRS_PER_PGD];
bool __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
+struct seq_file;
void ptdump_walk_pgd_level(struct seq_file *m, struct mm_struct *mm);
void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm,
bool user);
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 7929327abe00..a629b1b9f65a 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -237,8 +237,8 @@ static inline void native_pgd_clear(pgd_t *pgd)
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) })
#define __pmd_to_swp_entry(pmd) ((swp_entry_t) { pmd_val((pmd)) })
-#define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val })
-#define __swp_entry_to_pmd(x) ((pmd_t) { .pmd = (x).val })
+#define __swp_entry_to_pte(x) (__pte((x).val))
+#define __swp_entry_to_pmd(x) (__pmd((x).val))
extern void cleanup_highmap(void);
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 447d4bee25c4..ba3e2554799a 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -513,9 +513,6 @@ extern void native_pagetable_init(void);
#define native_pagetable_init paging_init
#endif
-struct seq_file;
-extern void arch_report_meminfo(struct seq_file *m);
-
enum pg_level {
PG_LEVEL_NONE,
PG_LEVEL_4K,
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index a1e4fa58b357..d46300e94f85 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -551,7 +551,6 @@ extern void switch_gdt_and_percpu_base(int);
extern void load_direct_gdt(int);
extern void load_fixmap_gdt(int);
extern void cpu_init(void);
-extern void cpu_init_secondary(void);
extern void cpu_init_exception_handling(void);
extern void cr4_init(void);
diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h
index f6a1737c77be..87e5482acd0d 100644
--- a/arch/x86/include/asm/realmode.h
+++ b/arch/x86/include/asm/realmode.h
@@ -52,6 +52,7 @@ struct trampoline_header {
u64 efer;
u32 cr4;
u32 flags;
+ u32 lock;
#endif
};
@@ -64,6 +65,8 @@ extern unsigned long initial_stack;
extern unsigned long initial_vc_handler;
#endif
+extern u32 *trampoline_lock;
+
extern unsigned char real_mode_blob[];
extern unsigned char real_mode_relocs[];
diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h
index 0759af9b1acf..b463fcbd4b90 100644
--- a/arch/x86/include/asm/sev-common.h
+++ b/arch/x86/include/asm/sev-common.h
@@ -106,8 +106,13 @@ enum psc_op {
#define GHCB_HV_FT_SNP BIT_ULL(0)
#define GHCB_HV_FT_SNP_AP_CREATION BIT_ULL(1)
-/* SNP Page State Change NAE event */
-#define VMGEXIT_PSC_MAX_ENTRY 253
+/*
+ * SNP Page State Change NAE event
+ * The VMGEXIT_PSC_MAX_ENTRY determines the size of the PSC structure, which
+ * is a local stack variable in set_pages_state(). Do not increase this value
+ * without evaluating the impact to stack usage.
+ */
+#define VMGEXIT_PSC_MAX_ENTRY 64
struct psc_hdr {
u16 cur_entry;
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 13dc2a9d23c1..66c806784c52 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -14,6 +14,7 @@
#include <asm/insn.h>
#include <asm/sev-common.h>
#include <asm/bootparam.h>
+#include <asm/coco.h>
#define GHCB_PROTOCOL_MIN 1ULL
#define GHCB_PROTOCOL_MAX 2ULL
@@ -80,11 +81,15 @@ extern void vc_no_ghcb(void);
extern void vc_boot_ghcb(void);
extern bool handle_vc_boot_ghcb(struct pt_regs *regs);
+/* PVALIDATE return codes */
+#define PVALIDATE_FAIL_SIZEMISMATCH 6
+
/* Software defined (when rFlags.CF = 1) */
#define PVALIDATE_FAIL_NOUPDATE 255
/* RMP page size */
#define RMP_PG_SIZE_4K 0
+#define RMP_PG_SIZE_2M 1
#define RMPADJUST_VMSA_PAGE_BIT BIT(16)
@@ -136,24 +141,26 @@ struct snp_secrets_page_layout {
} __packed;
#ifdef CONFIG_AMD_MEM_ENCRYPT
-extern struct static_key_false sev_es_enable_key;
extern void __sev_es_ist_enter(struct pt_regs *regs);
extern void __sev_es_ist_exit(void);
static __always_inline void sev_es_ist_enter(struct pt_regs *regs)
{
- if (static_branch_unlikely(&sev_es_enable_key))
+ if (cc_vendor == CC_VENDOR_AMD &&
+ cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
__sev_es_ist_enter(regs);
}
static __always_inline void sev_es_ist_exit(void)
{
- if (static_branch_unlikely(&sev_es_enable_key))
+ if (cc_vendor == CC_VENDOR_AMD &&
+ cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
__sev_es_ist_exit();
}
extern int sev_es_setup_ap_jump_table(struct real_mode_header *rmh);
extern void __sev_es_nmi_complete(void);
static __always_inline void sev_es_nmi_complete(void)
{
- if (static_branch_unlikely(&sev_es_enable_key))
+ if (cc_vendor == CC_VENDOR_AMD &&
+ cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
__sev_es_nmi_complete();
}
extern int __init sev_es_efi_map_ghcbs(pgd_t *pgd);
@@ -192,16 +199,17 @@ struct snp_guest_request_ioctl;
void setup_ghcb(void);
void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
- unsigned int npages);
+ unsigned long npages);
void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
- unsigned int npages);
+ unsigned long npages);
void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op);
-void snp_set_memory_shared(unsigned long vaddr, unsigned int npages);
-void snp_set_memory_private(unsigned long vaddr, unsigned int npages);
+void snp_set_memory_shared(unsigned long vaddr, unsigned long npages);
+void snp_set_memory_private(unsigned long vaddr, unsigned long npages);
void snp_set_wakeup_secondary_cpu(void);
bool snp_init(struct boot_params *bp);
void __init __noreturn snp_abort(void);
int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio);
+void snp_accept_memory(phys_addr_t start, phys_addr_t end);
#else
static inline void sev_es_ist_enter(struct pt_regs *regs) { }
static inline void sev_es_ist_exit(void) { }
@@ -212,12 +220,12 @@ static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate)
static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) { return 0; }
static inline void setup_ghcb(void) { }
static inline void __init
-early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned int npages) { }
+early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned long npages) { }
static inline void __init
-early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned int npages) { }
+early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned long npages) { }
static inline void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op) { }
-static inline void snp_set_memory_shared(unsigned long vaddr, unsigned int npages) { }
-static inline void snp_set_memory_private(unsigned long vaddr, unsigned int npages) { }
+static inline void snp_set_memory_shared(unsigned long vaddr, unsigned long npages) { }
+static inline void snp_set_memory_private(unsigned long vaddr, unsigned long npages) { }
static inline void snp_set_wakeup_secondary_cpu(void) { }
static inline bool snp_init(struct boot_params *bp) { return false; }
static inline void snp_abort(void) { }
@@ -225,6 +233,8 @@ static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *in
{
return -ENOTTY;
}
+
+static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { }
#endif
#endif
diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h
index 2631e01f6e0f..7513b3bb69b7 100644
--- a/arch/x86/include/asm/shared/tdx.h
+++ b/arch/x86/include/asm/shared/tdx.h
@@ -10,6 +10,20 @@
#define TDX_CPUID_LEAF_ID 0x21
#define TDX_IDENT "IntelTDX "
+/* TDX module Call Leaf IDs */
+#define TDX_GET_INFO 1
+#define TDX_GET_VEINFO 3
+#define TDX_GET_REPORT 4
+#define TDX_ACCEPT_PAGE 6
+#define TDX_WR 8
+
+/* TDCS fields. To be used by TDG.VM.WR and TDG.VM.RD module calls */
+#define TDCS_NOTIFY_ENABLES 0x9100000000000010
+
+/* TDX hypercall Leaf IDs */
+#define TDVMCALL_MAP_GPA 0x10001
+#define TDVMCALL_REPORT_FATAL_ERROR 0x10003
+
#ifndef __ASSEMBLY__
/*
@@ -37,8 +51,58 @@ struct tdx_hypercall_args {
u64 __tdx_hypercall(struct tdx_hypercall_args *args);
u64 __tdx_hypercall_ret(struct tdx_hypercall_args *args);
+/*
+ * Wrapper for standard use of __tdx_hypercall with no output aside from
+ * return code.
+ */
+static inline u64 _tdx_hypercall(u64 fn, u64 r12, u64 r13, u64 r14, u64 r15)
+{
+ struct tdx_hypercall_args args = {
+ .r10 = TDX_HYPERCALL_STANDARD,
+ .r11 = fn,
+ .r12 = r12,
+ .r13 = r13,
+ .r14 = r14,
+ .r15 = r15,
+ };
+
+ return __tdx_hypercall(&args);
+}
+
+
/* Called from __tdx_hypercall() for unrecoverable failure */
void __tdx_hypercall_failed(void);
+/*
+ * Used in __tdx_module_call() to gather the output registers' values of the
+ * TDCALL instruction when requesting services from the TDX module. This is a
+ * software only structure and not part of the TDX module/VMM ABI
+ */
+struct tdx_module_output {
+ u64 rcx;
+ u64 rdx;
+ u64 r8;
+ u64 r9;
+ u64 r10;
+ u64 r11;
+};
+
+/* Used to communicate with the TDX module */
+u64 __tdx_module_call(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9,
+ struct tdx_module_output *out);
+
+bool tdx_accept_memory(phys_addr_t start, phys_addr_t end);
+
+/*
+ * The TDG.VP.VMCALL-Instruction-execution sub-functions are defined
+ * independently from but are currently matched 1:1 with VMX EXIT_REASONs.
+ * Reusing the KVM EXIT_REASON macros makes it easier to connect the host and
+ * guest sides of these calls.
+ */
+static __always_inline u64 hcall_func(u64 exit_reason)
+{
+ return exit_reason;
+}
+
#endif /* !__ASSEMBLY__ */
#endif /* _ASM_X86_SHARED_TDX_H */
diff --git a/arch/x86/include/asm/sigframe.h b/arch/x86/include/asm/sigframe.h
index 5b1ed650b124..84eab2724875 100644
--- a/arch/x86/include/asm/sigframe.h
+++ b/arch/x86/include/asm/sigframe.h
@@ -85,6 +85,4 @@ struct rt_sigframe_x32 {
#endif /* CONFIG_X86_64 */
-void __init init_sigframe_size(void);
-
#endif /* _ASM_X86_SIGFRAME_H */
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 4e91054c84be..600cf25dbfc6 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -38,7 +38,9 @@ struct smp_ops {
void (*crash_stop_other_cpus)(void);
void (*smp_send_reschedule)(int cpu);
- int (*cpu_up)(unsigned cpu, struct task_struct *tidle);
+ void (*cleanup_dead_cpu)(unsigned cpu);
+ void (*poll_sync_state)(void);
+ int (*kick_ap_alive)(unsigned cpu, struct task_struct *tidle);
int (*cpu_disable)(void);
void (*cpu_die)(unsigned int cpu);
void (*play_dead)(void);
@@ -78,11 +80,6 @@ static inline void smp_cpus_done(unsigned int max_cpus)
smp_ops.smp_cpus_done(max_cpus);
}
-static inline int __cpu_up(unsigned int cpu, struct task_struct *tidle)
-{
- return smp_ops.cpu_up(cpu, tidle);
-}
-
static inline int __cpu_disable(void)
{
return smp_ops.cpu_disable();
@@ -90,7 +87,8 @@ static inline int __cpu_disable(void)
static inline void __cpu_die(unsigned int cpu)
{
- smp_ops.cpu_die(cpu);
+ if (smp_ops.cpu_die)
+ smp_ops.cpu_die(cpu);
}
static inline void __noreturn play_dead(void)
@@ -121,22 +119,23 @@ void native_smp_prepare_cpus(unsigned int max_cpus);
void calculate_max_logical_packages(void);
void native_smp_cpus_done(unsigned int max_cpus);
int common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
-int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
+int native_kick_ap(unsigned int cpu, struct task_struct *tidle);
int native_cpu_disable(void);
-int common_cpu_die(unsigned int cpu);
-void native_cpu_die(unsigned int cpu);
void __noreturn hlt_play_dead(void);
void native_play_dead(void);
void play_dead_common(void);
void wbinvd_on_cpu(int cpu);
int wbinvd_on_all_cpus(void);
-void cond_wakeup_cpu0(void);
+
+void smp_kick_mwait_play_dead(void);
void native_smp_send_reschedule(int cpu);
void native_send_call_func_ipi(const struct cpumask *mask);
void native_send_call_func_single_ipi(int cpu);
void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle);
+bool smp_park_other_cpus_in_init(void);
+
void smp_store_boot_cpu_info(void);
void smp_store_cpu_info(int id);
@@ -201,7 +200,14 @@ extern void nmi_selftest(void);
#endif
extern unsigned int smpboot_control;
+extern unsigned long apic_mmio_base;
#endif /* !__ASSEMBLY__ */
+/* Control bits for startup_64 */
+#define STARTUP_READ_APICID 0x80000000
+
+/* Top 8 bits are reserved for control */
+#define STARTUP_PARALLEL_MASK 0xFF000000
+
#endif /* _ASM_X86_SMP_H */
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 5b85987a5e97..4fb36fba4b5a 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -127,9 +127,11 @@ static inline int syscall_get_arch(struct task_struct *task)
}
void do_syscall_64(struct pt_regs *regs, int nr);
-void do_int80_syscall_32(struct pt_regs *regs);
-long do_fast_syscall_32(struct pt_regs *regs);
#endif /* CONFIG_X86_32 */
+void do_int80_syscall_32(struct pt_regs *regs);
+long do_fast_syscall_32(struct pt_regs *regs);
+long do_SYSENTER_32(struct pt_regs *regs);
+
#endif /* _ASM_X86_SYSCALL_H */
diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
index 28d889c9aa16..603e6d1e9d4a 100644
--- a/arch/x86/include/asm/tdx.h
+++ b/arch/x86/include/asm/tdx.h
@@ -5,6 +5,8 @@
#include <linux/init.h>
#include <linux/bits.h>
+
+#include <asm/errno.h>
#include <asm/ptrace.h>
#include <asm/shared/tdx.h>
@@ -21,21 +23,6 @@
#ifndef __ASSEMBLY__
/*
- * Used to gather the output registers values of the TDCALL and SEAMCALL
- * instructions when requesting services from the TDX module.
- *
- * This is a software only structure and not part of the TDX module/VMM ABI.
- */
-struct tdx_module_output {
- u64 rcx;
- u64 rdx;
- u64 r8;
- u64 r9;
- u64 r10;
- u64 r11;
-};
-
-/*
* Used by the #VE exception handler to gather the #VE exception
* info from the TDX module. This is a software only structure
* and not part of the TDX module/VMM ABI.
@@ -55,10 +42,6 @@ struct ve_info {
void __init tdx_early_init(void);
-/* Used to communicate with the TDX module */
-u64 __tdx_module_call(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9,
- struct tdx_module_output *out);
-
void tdx_get_ve_info(struct ve_info *ve);
bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve);
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index f1cccba52eb9..d63b02940747 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -232,9 +232,6 @@ static inline int arch_within_stack_frames(const void * const stack,
current_thread_info()->status & TS_COMPAT)
#endif
-extern void arch_task_cache_init(void);
-extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
-extern void arch_release_task_struct(struct task_struct *tsk);
extern void arch_setup_new_exec(void);
#define arch_setup_new_exec arch_setup_new_exec
#endif /* !__ASSEMBLY__ */
diff --git a/arch/x86/include/asm/time.h b/arch/x86/include/asm/time.h
index a53961c64a56..f360104ed172 100644
--- a/arch/x86/include/asm/time.h
+++ b/arch/x86/include/asm/time.h
@@ -6,7 +6,6 @@
#include <asm/mc146818rtc.h>
extern void hpet_time_init(void);
-extern void time_init(void);
extern bool pit_timer_init(void);
extern bool tsc_clocksource_watchdog_disabled(void);
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 75bfaa421030..80450e1d5385 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -14,6 +14,8 @@
#include <asm/processor-flags.h>
#include <asm/pgtable.h>
+DECLARE_PER_CPU(u64, tlbstate_untag_mask);
+
void __flush_tlb_all(void);
#define TLB_FLUSH_ALL -1UL
@@ -54,15 +56,6 @@ static inline void cr4_clear_bits(unsigned long mask)
local_irq_restore(flags);
}
-#ifdef CONFIG_ADDRESS_MASKING
-DECLARE_PER_CPU(u64, tlbstate_untag_mask);
-
-static inline u64 current_untag_mask(void)
-{
- return this_cpu_read(tlbstate_untag_mask);
-}
-#endif
-
#ifndef MODULE
/*
* 6 because 6 should be plenty and struct tlb_state will fit in two cache
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 458c891a8273..caf41c4869a0 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -31,9 +31,9 @@
* CONFIG_NUMA.
*/
#include <linux/numa.h>
+#include <linux/cpumask.h>
#ifdef CONFIG_NUMA
-#include <linux/cpumask.h>
#include <asm/mpspec.h>
#include <asm/percpu.h>
@@ -139,23 +139,31 @@ static inline int topology_max_smt_threads(void)
int topology_update_package_map(unsigned int apicid, unsigned int cpu);
int topology_update_die_map(unsigned int dieid, unsigned int cpu);
int topology_phys_to_logical_pkg(unsigned int pkg);
-int topology_phys_to_logical_die(unsigned int die, unsigned int cpu);
-bool topology_is_primary_thread(unsigned int cpu);
bool topology_smt_supported(void);
-#else
+
+extern struct cpumask __cpu_primary_thread_mask;
+#define cpu_primary_thread_mask ((const struct cpumask *)&__cpu_primary_thread_mask)
+
+/**
+ * topology_is_primary_thread - Check whether CPU is the primary SMT thread
+ * @cpu: CPU to check
+ */
+static inline bool topology_is_primary_thread(unsigned int cpu)
+{
+ return cpumask_test_cpu(cpu, cpu_primary_thread_mask);
+}
+#else /* CONFIG_SMP */
#define topology_max_packages() (1)
static inline int
topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; }
static inline int
topology_update_die_map(unsigned int dieid, unsigned int cpu) { return 0; }
static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
-static inline int topology_phys_to_logical_die(unsigned int die,
- unsigned int cpu) { return 0; }
static inline int topology_max_die_per_package(void) { return 1; }
static inline int topology_max_smt_threads(void) { return 1; }
static inline bool topology_is_primary_thread(unsigned int cpu) { return true; }
static inline bool topology_smt_supported(void) { return false; }
-#endif
+#endif /* !CONFIG_SMP */
static inline void arch_fix_phys_package_id(int num, u32 slot)
{
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index fbdc3d951494..594fce0ca744 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -32,7 +32,6 @@ extern struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns);
extern void tsc_early_init(void);
extern void tsc_init(void);
-extern unsigned long calibrate_delay_is_known(void);
extern void mark_tsc_unstable(char *reason);
extern int unsynchronized_tsc(void);
extern int check_tsc_unstable(void);
@@ -55,12 +54,10 @@ extern bool tsc_async_resets;
#ifdef CONFIG_X86_TSC
extern bool tsc_store_and_check_tsc_adjust(bool bootcpu);
extern void tsc_verify_tsc_adjust(bool resume);
-extern void check_tsc_sync_source(int cpu);
extern void check_tsc_sync_target(void);
#else
static inline bool tsc_store_and_check_tsc_adjust(bool bootcpu) { return false; }
static inline void tsc_verify_tsc_adjust(bool resume) { }
-static inline void check_tsc_sync_source(int cpu) { }
static inline void check_tsc_sync_target(void) { }
#endif
diff --git a/arch/x86/include/asm/unaccepted_memory.h b/arch/x86/include/asm/unaccepted_memory.h
new file mode 100644
index 000000000000..f5937e9866ac
--- /dev/null
+++ b/arch/x86/include/asm/unaccepted_memory.h
@@ -0,0 +1,27 @@
+#ifndef _ASM_X86_UNACCEPTED_MEMORY_H
+#define _ASM_X86_UNACCEPTED_MEMORY_H
+
+#include <linux/efi.h>
+#include <asm/tdx.h>
+#include <asm/sev.h>
+
+static inline void arch_accept_memory(phys_addr_t start, phys_addr_t end)
+{
+ /* Platform-specific memory-acceptance call goes here */
+ if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) {
+ if (!tdx_accept_memory(start, end))
+ panic("TDX: Failed to accept memory\n");
+ } else if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) {
+ snp_accept_memory(start, end);
+ } else {
+ panic("Cannot accept memory: unknown platform\n");
+ }
+}
+
+static inline struct efi_unaccepted_memory *efi_get_unaccepted_table(void)
+{
+ if (efi.unaccepted == EFI_INVALID_TABLE_ADDR)
+ return NULL;
+ return __va(efi.unaccepted);
+}
+#endif
diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h
index 01cb9692b160..85cc57cb6539 100644
--- a/arch/x86/include/asm/unwind_hints.h
+++ b/arch/x86/include/asm/unwind_hints.h
@@ -76,9 +76,18 @@
#else
+#define UNWIND_HINT_UNDEFINED \
+ UNWIND_HINT(UNWIND_HINT_TYPE_UNDEFINED, 0, 0, 0)
+
#define UNWIND_HINT_FUNC \
UNWIND_HINT(UNWIND_HINT_TYPE_FUNC, ORC_REG_SP, 8, 0)
+#define UNWIND_HINT_SAVE \
+ UNWIND_HINT(UNWIND_HINT_TYPE_SAVE, 0, 0, 0)
+
+#define UNWIND_HINT_RESTORE \
+ UNWIND_HINT(UNWIND_HINT_TYPE_RESTORE, 0, 0, 0)
+
#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_UNWIND_HINTS_H */
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index d3e3197917be..5fa76c2ced51 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -177,6 +177,7 @@ struct uv_hub_info_s {
unsigned short nr_possible_cpus;
unsigned short nr_online_cpus;
short memory_nid;
+ unsigned short *node_to_socket;
};
/* CPU specific info with a pointer to the hub common info struct */
@@ -519,25 +520,30 @@ static inline int uv_socket_to_node(int socket)
return _uv_socket_to_node(socket, uv_hub_info->socket_to_node);
}
+static inline int uv_pnode_to_socket(int pnode)
+{
+ unsigned short *p2s = uv_hub_info->pnode_to_socket;
+
+ return p2s ? p2s[pnode - uv_hub_info->min_pnode] : pnode;
+}
+
/* pnode, offset --> socket virtual */
static inline void *uv_pnode_offset_to_vaddr(int pnode, unsigned long offset)
{
unsigned int m_val = uv_hub_info->m_val;
unsigned long base;
- unsigned short sockid, node, *p2s;
+ unsigned short sockid;
if (m_val)
return __va(((unsigned long)pnode << m_val) | offset);
- p2s = uv_hub_info->pnode_to_socket;
- sockid = p2s ? p2s[pnode - uv_hub_info->min_pnode] : pnode;
- node = uv_socket_to_node(sockid);
+ sockid = uv_pnode_to_socket(pnode);
/* limit address of previous socket is our base, except node 0 is 0 */
- if (!node)
+ if (sockid == 0)
return __va((unsigned long)offset);
- base = (unsigned long)(uv_hub_info->gr_table[node - 1].limit);
+ base = (unsigned long)(uv_hub_info->gr_table[sockid - 1].limit);
return __va(base << UV_GAM_RANGE_SHFT | offset);
}
@@ -644,7 +650,7 @@ static inline int uv_cpu_blade_processor_id(int cpu)
/* Blade number to Node number (UV2..UV4 is 1:1) */
static inline int uv_blade_to_node(int blade)
{
- return blade;
+ return uv_socket_to_node(blade);
}
/* Blade number of current cpu. Numnbered 0 .. <#blades -1> */
@@ -656,23 +662,27 @@ static inline int uv_numa_blade_id(void)
/*
* Convert linux node number to the UV blade number.
* .. Currently for UV2 thru UV4 the node and the blade are identical.
- * .. If this changes then you MUST check references to this function!
+ * .. UV5 needs conversion when sub-numa clustering is enabled.
*/
static inline int uv_node_to_blade_id(int nid)
{
- return nid;
+ unsigned short *n2s = uv_hub_info->node_to_socket;
+
+ return n2s ? n2s[nid] : nid;
}
/* Convert a CPU number to the UV blade number */
static inline int uv_cpu_to_blade_id(int cpu)
{
- return uv_node_to_blade_id(cpu_to_node(cpu));
+ return uv_cpu_hub_info(cpu)->numa_blade_id;
}
/* Convert a blade id to the PNODE of the blade */
static inline int uv_blade_to_pnode(int bid)
{
- return uv_hub_info_list(uv_blade_to_node(bid))->pnode;
+ unsigned short *s2p = uv_hub_info->socket_to_pnode;
+
+ return s2p ? s2p[bid] : bid;
}
/* Nid of memory node on blade. -1 if no blade-local memory */
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h
index 57fa67373262..bb45812889dd 100644
--- a/arch/x86/include/asm/uv/uv_mmrs.h
+++ b/arch/x86/include/asm/uv/uv_mmrs.h
@@ -4199,6 +4199,13 @@ union uvh_rh_gam_mmioh_overlay_config1_u {
#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_SHFT 0
#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK 0x0000000000007fffUL
+/* UVH common defines */
+#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK ( \
+ is_uv(UV4A) ? UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK : \
+ is_uv(UV4) ? UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK : \
+ is_uv(UV3) ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK : \
+ 0)
+
union uvh_rh_gam_mmioh_redirect_config0_u {
unsigned long v;
@@ -4247,8 +4254,8 @@ union uvh_rh_gam_mmioh_redirect_config0_u {
0)
/* UV4A unique defines */
-#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_SHFT 0
-#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK 0x0000000000000fffUL
+#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_SHFT 0
+#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK 0x0000000000000fffUL
/* UV4 unique defines */
#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_SHFT 0
@@ -4258,6 +4265,13 @@ union uvh_rh_gam_mmioh_redirect_config0_u {
#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_SHFT 0
#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK 0x0000000000007fffUL
+/* UVH common defines */
+#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK ( \
+ is_uv(UV4A) ? UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK : \
+ is_uv(UV4) ? UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK : \
+ is_uv(UV3) ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK : \
+ 0)
+
union uvh_rh_gam_mmioh_redirect_config1_u {
unsigned long v;
diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h
index 4cf6794f9d68..c81858d903dc 100644
--- a/arch/x86/include/asm/vdso/gettimeofday.h
+++ b/arch/x86/include/asm/vdso/gettimeofday.h
@@ -231,14 +231,19 @@ static u64 vread_pvclock(void)
ret = __pvclock_read_cycles(pvti, rdtsc_ordered());
} while (pvclock_read_retry(pvti, version));
- return ret;
+ return ret & S64_MAX;
}
#endif
#ifdef CONFIG_HYPERV_TIMER
static u64 vread_hvclock(void)
{
- return hv_read_tsc_page(&hvclock_page);
+ u64 tsc, time;
+
+ if (hv_read_tsc_page_tsc(&hvclock_page, &tsc, &time))
+ return time & S64_MAX;
+
+ return U64_MAX;
}
#endif
@@ -246,7 +251,7 @@ static inline u64 __arch_get_hw_counter(s32 clock_mode,
const struct vdso_data *vd)
{
if (likely(clock_mode == VDSO_CLOCKMODE_TSC))
- return (u64)rdtsc_ordered();
+ return (u64)rdtsc_ordered() & S64_MAX;
/*
* For any memory-mapped vclock type, we need to make sure that gcc
* doesn't cleverly hoist a load before the mode check. Otherwise we
@@ -284,6 +289,9 @@ static inline bool arch_vdso_clocksource_ok(const struct vdso_data *vd)
* which can be invalidated asynchronously and indicate invalidation by
* returning U64_MAX, which can be effectively tested by checking for a
* negative value after casting it to s64.
+ *
+ * This effectively forces a S64_MAX mask on the calculations, unlike the
+ * U64_MAX mask normally used by x86 clocksources.
*/
static inline bool arch_vdso_cycles_ok(u64 cycles)
{
@@ -303,18 +311,29 @@ static inline bool arch_vdso_cycles_ok(u64 cycles)
* @last. If not then use @last, which is the base time of the current
* conversion period.
*
- * This variant also removes the masking of the subtraction because the
- * clocksource mask of all VDSO capable clocksources on x86 is U64_MAX
- * which would result in a pointless operation. The compiler cannot
- * optimize it away as the mask comes from the vdso data and is not compile
- * time constant.
+ * This variant also uses a custom mask because while the clocksource mask of
+ * all the VDSO capable clocksources on x86 is U64_MAX, the above code uses
+ * U64_MASK as an exception value, additionally arch_vdso_cycles_ok() above
+ * declares everything with the MSB/Sign-bit set as invalid. Therefore the
+ * effective mask is S64_MAX.
*/
static __always_inline
u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
{
- if (cycles > last)
- return (cycles - last) * mult;
- return 0;
+ /*
+ * Due to the MSB/Sign-bit being used as invald marker (see
+ * arch_vdso_cycles_valid() above), the effective mask is S64_MAX.
+ */
+ u64 delta = (cycles - last) & S64_MAX;
+
+ /*
+ * Due to the above mentioned TSC wobbles, filter out negative motion.
+ * Per the above masking, the effective sign bit is now bit 62.
+ */
+ if (unlikely(delta & (1ULL << 62)))
+ return 0;
+
+ return delta * mult;
}
#define vdso_calc_delta vdso_calc_delta
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 88085f369ff6..5240d88db52a 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -150,7 +150,7 @@ struct x86_init_acpi {
* @enc_cache_flush_required Returns true if a cache flush is needed before changing page encryption status
*/
struct x86_guest {
- void (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc);
+ bool (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc);
bool (*enc_status_change_finish)(unsigned long vaddr, int npages, bool enc);
bool (*enc_tlb_flush_required)(bool enc);
bool (*enc_cache_flush_required)(void);
@@ -177,11 +177,14 @@ struct x86_init_ops {
* struct x86_cpuinit_ops - platform specific cpu hotplug setups
* @setup_percpu_clockev: set up the per cpu clock event device
* @early_percpu_clock_init: early init of the per cpu clock event device
+ * @fixup_cpu_id: fixup function for cpuinfo_x86::phys_proc_id
+ * @parallel_bringup: Parallel bringup control
*/
struct x86_cpuinit_ops {
void (*setup_percpu_clockev)(void);
void (*early_percpu_clock_init)(void);
void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node);
+ bool parallel_bringup;
};
struct timespec64;
diff --git a/arch/x86/include/uapi/asm/mtrr.h b/arch/x86/include/uapi/asm/mtrr.h
index 376563f2bac1..3a8a8eb8ac3a 100644
--- a/arch/x86/include/uapi/asm/mtrr.h
+++ b/arch/x86/include/uapi/asm/mtrr.h
@@ -81,14 +81,6 @@ typedef __u8 mtrr_type;
#define MTRR_NUM_FIXED_RANGES 88
#define MTRR_MAX_VAR_RANGES 256
-struct mtrr_state_type {
- struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES];
- mtrr_type fixed_ranges[MTRR_NUM_FIXED_RANGES];
- unsigned char enabled;
- unsigned char have_fixed;
- mtrr_type def_type;
-};
-
#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
@@ -115,9 +107,9 @@ struct mtrr_state_type {
#define MTRR_NUM_TYPES 7
/*
- * Invalid MTRR memory type. mtrr_type_lookup() returns this value when
- * MTRRs are disabled. Note, this value is allocated from the reserved
- * values (0x7-0xff) of the MTRR memory types.
+ * Invalid MTRR memory type. No longer used outside of MTRR code.
+ * Note, this value is allocated from the reserved values (0x7-0xff) of
+ * the MTRR memory types.
*/
#define MTRR_TYPE_INVALID 0xff
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 1328c221af30..6dfecb27b846 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -16,6 +16,7 @@
#include <asm/cacheflush.h>
#include <asm/realmode.h>
#include <asm/hypervisor.h>
+#include <asm/smp.h>
#include <linux/ftrace.h>
#include "../../realmode/rm/wakeup.h"
@@ -127,7 +128,13 @@ int x86_acpi_suspend_lowlevel(void)
* value is in the actual %rsp register.
*/
current->thread.sp = (unsigned long)temp_stack + sizeof(temp_stack);
- smpboot_control = smp_processor_id();
+ /*
+ * Ensure the CPU knows which one it is when it comes back, if
+ * it isn't in parallel mode and expected to work that out for
+ * itself.
+ */
+ if (!(smpboot_control & STARTUP_PARALLEL_MASK))
+ smpboot_control = smp_processor_id();
#endif
initial_code = (unsigned long)wakeup_long64;
saved_magic = 0x123456789abcdef0L;
diff --git a/arch/x86/kernel/acpi/sleep.h b/arch/x86/kernel/acpi/sleep.h
index 171a40c74db6..054c15a2f860 100644
--- a/arch/x86/kernel/acpi/sleep.h
+++ b/arch/x86/kernel/acpi/sleep.h
@@ -12,7 +12,6 @@ extern int wakeup_pmode_return;
extern u8 wake_sleep_flags;
-extern unsigned long acpi_copy_wakeup_routine(unsigned long);
extern void wakeup_long64(void);
extern void do_suspend_lowlevel(void);
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index f615e0cb6d93..72646d75b6ff 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -37,11 +37,23 @@ EXPORT_SYMBOL_GPL(alternatives_patched);
#define MAX_PATCH_LEN (255-1)
-static int __initdata_or_module debug_alternative;
+#define DA_ALL (~0)
+#define DA_ALT 0x01
+#define DA_RET 0x02
+#define DA_RETPOLINE 0x04
+#define DA_ENDBR 0x08
+#define DA_SMP 0x10
+
+static unsigned int __initdata_or_module debug_alternative;
static int __init debug_alt(char *str)
{
- debug_alternative = 1;
+ if (str && *str == '=')
+ str++;
+
+ if (!str || kstrtouint(str, 0, &debug_alternative))
+ debug_alternative = DA_ALL;
+
return 1;
}
__setup("debug-alternative", debug_alt);
@@ -55,15 +67,15 @@ static int __init setup_noreplace_smp(char *str)
}
__setup("noreplace-smp", setup_noreplace_smp);
-#define DPRINTK(fmt, args...) \
+#define DPRINTK(type, fmt, args...) \
do { \
- if (debug_alternative) \
+ if (debug_alternative & DA_##type) \
printk(KERN_DEBUG pr_fmt(fmt) "\n", ##args); \
} while (0)
-#define DUMP_BYTES(buf, len, fmt, args...) \
+#define DUMP_BYTES(type, buf, len, fmt, args...) \
do { \
- if (unlikely(debug_alternative)) { \
+ if (unlikely(debug_alternative & DA_##type)) { \
int j; \
\
if (!(len)) \
@@ -86,6 +98,11 @@ static const unsigned char x86nops[] =
BYTES_NOP6,
BYTES_NOP7,
BYTES_NOP8,
+#ifdef CONFIG_64BIT
+ BYTES_NOP9,
+ BYTES_NOP10,
+ BYTES_NOP11,
+#endif
};
const unsigned char * const x86_nops[ASM_NOP_MAX+1] =
@@ -99,19 +116,44 @@ const unsigned char * const x86_nops[ASM_NOP_MAX+1] =
x86nops + 1 + 2 + 3 + 4 + 5,
x86nops + 1 + 2 + 3 + 4 + 5 + 6,
x86nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
+#ifdef CONFIG_64BIT
+ x86nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
+ x86nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9,
+ x86nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10,
+#endif
};
-/* Use this to add nops to a buffer, then text_poke the whole buffer. */
-static void __init_or_module add_nops(void *insns, unsigned int len)
+/*
+ * Fill the buffer with a single effective instruction of size @len.
+ *
+ * In order not to issue an ORC stack depth tracking CFI entry (Call Frame Info)
+ * for every single-byte NOP, try to generate the maximally available NOP of
+ * size <= ASM_NOP_MAX such that only a single CFI entry is generated (vs one for
+ * each single-byte NOPs). If @len to fill out is > ASM_NOP_MAX, pad with INT3 and
+ * *jump* over instead of executing long and daft NOPs.
+ */
+static void __init_or_module add_nop(u8 *instr, unsigned int len)
{
- while (len > 0) {
- unsigned int noplen = len;
- if (noplen > ASM_NOP_MAX)
- noplen = ASM_NOP_MAX;
- memcpy(insns, x86_nops[noplen], noplen);
- insns += noplen;
- len -= noplen;
+ u8 *target = instr + len;
+
+ if (!len)
+ return;
+
+ if (len <= ASM_NOP_MAX) {
+ memcpy(instr, x86_nops[len], len);
+ return;
}
+
+ if (len < 128) {
+ __text_gen_insn(instr, JMP8_INSN_OPCODE, instr, target, JMP8_INSN_SIZE);
+ instr += JMP8_INSN_SIZE;
+ } else {
+ __text_gen_insn(instr, JMP32_INSN_OPCODE, instr, target, JMP32_INSN_SIZE);
+ instr += JMP32_INSN_SIZE;
+ }
+
+ for (;instr < target; instr++)
+ *instr = INT3_INSN_OPCODE;
}
extern s32 __retpoline_sites[], __retpoline_sites_end[];
@@ -123,133 +165,223 @@ extern s32 __smp_locks[], __smp_locks_end[];
void text_poke_early(void *addr, const void *opcode, size_t len);
/*
- * Are we looking at a near JMP with a 1 or 4-byte displacement.
+ * Matches NOP and NOPL, not any of the other possible NOPs.
*/
-static inline bool is_jmp(const u8 opcode)
+static bool insn_is_nop(struct insn *insn)
{
- return opcode == 0xeb || opcode == 0xe9;
+ /* Anything NOP, but no REP NOP */
+ if (insn->opcode.bytes[0] == 0x90 &&
+ (!insn->prefixes.nbytes || insn->prefixes.bytes[0] != 0xF3))
+ return true;
+
+ /* NOPL */
+ if (insn->opcode.bytes[0] == 0x0F && insn->opcode.bytes[1] == 0x1F)
+ return true;
+
+ /* TODO: more nops */
+
+ return false;
}
-static void __init_or_module
-recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insn_buff)
+/*
+ * Find the offset of the first non-NOP instruction starting at @offset
+ * but no further than @len.
+ */
+static int skip_nops(u8 *instr, int offset, int len)
{
- u8 *next_rip, *tgt_rip;
- s32 n_dspl, o_dspl;
- int repl_len;
+ struct insn insn;
- if (a->replacementlen != 5)
- return;
+ for (; offset < len; offset += insn.length) {
+ if (insn_decode_kernel(&insn, &instr[offset]))
+ break;
- o_dspl = *(s32 *)(insn_buff + 1);
+ if (!insn_is_nop(&insn))
+ break;
+ }
- /* next_rip of the replacement JMP */
- next_rip = repl_insn + a->replacementlen;
- /* target rip of the replacement JMP */
- tgt_rip = next_rip + o_dspl;
- n_dspl = tgt_rip - orig_insn;
+ return offset;
+}
- DPRINTK("target RIP: %px, new_displ: 0x%x", tgt_rip, n_dspl);
+/*
+ * Optimize a sequence of NOPs, possibly preceded by an unconditional jump
+ * to the end of the NOP sequence into a single NOP.
+ */
+static bool __init_or_module
+__optimize_nops(u8 *instr, size_t len, struct insn *insn, int *next, int *prev, int *target)
+{
+ int i = *next - insn->length;
- if (tgt_rip - orig_insn >= 0) {
- if (n_dspl - 2 <= 127)
- goto two_byte_jmp;
- else
- goto five_byte_jmp;
- /* negative offset */
- } else {
- if (((n_dspl - 2) & 0xff) == (n_dspl - 2))
- goto two_byte_jmp;
- else
- goto five_byte_jmp;
+ switch (insn->opcode.bytes[0]) {
+ case JMP8_INSN_OPCODE:
+ case JMP32_INSN_OPCODE:
+ *prev = i;
+ *target = *next + insn->immediate.value;
+ return false;
}
-two_byte_jmp:
- n_dspl -= 2;
+ if (insn_is_nop(insn)) {
+ int nop = i;
- insn_buff[0] = 0xeb;
- insn_buff[1] = (s8)n_dspl;
- add_nops(insn_buff + 2, 3);
+ *next = skip_nops(instr, *next, len);
+ if (*target && *next == *target)
+ nop = *prev;
- repl_len = 2;
- goto done;
+ add_nop(instr + nop, *next - nop);
+ DUMP_BYTES(ALT, instr, len, "%px: [%d:%d) optimized NOPs: ", instr, nop, *next);
+ return true;
+ }
+
+ *target = 0;
+ return false;
+}
-five_byte_jmp:
- n_dspl -= 5;
+/*
+ * "noinline" to cause control flow change and thus invalidate I$ and
+ * cause refetch after modification.
+ */
+static void __init_or_module noinline optimize_nops(u8 *instr, size_t len)
+{
+ int prev, target = 0;
- insn_buff[0] = 0xe9;
- *(s32 *)&insn_buff[1] = n_dspl;
+ for (int next, i = 0; i < len; i = next) {
+ struct insn insn;
- repl_len = 5;
+ if (insn_decode_kernel(&insn, &instr[i]))
+ return;
-done:
+ next = i + insn.length;
- DPRINTK("final displ: 0x%08x, JMP 0x%lx",
- n_dspl, (unsigned long)orig_insn + n_dspl + repl_len);
+ __optimize_nops(instr, len, &insn, &next, &prev, &target);
+ }
}
/*
- * optimize_nops_range() - Optimize a sequence of single byte NOPs (0x90)
+ * In this context, "source" is where the instructions are placed in the
+ * section .altinstr_replacement, for example during kernel build by the
+ * toolchain.
+ * "Destination" is where the instructions are being patched in by this
+ * machinery.
*
- * @instr: instruction byte stream
- * @instrlen: length of the above
- * @off: offset within @instr where the first NOP has been detected
+ * The source offset is:
*
- * Return: number of NOPs found (and replaced).
+ * src_imm = target - src_next_ip (1)
+ *
+ * and the target offset is:
+ *
+ * dst_imm = target - dst_next_ip (2)
+ *
+ * so rework (1) as an expression for target like:
+ *
+ * target = src_imm + src_next_ip (1a)
+ *
+ * and substitute in (2) to get:
+ *
+ * dst_imm = (src_imm + src_next_ip) - dst_next_ip (3)
+ *
+ * Now, since the instruction stream is 'identical' at src and dst (it
+ * is being copied after all) it can be stated that:
+ *
+ * src_next_ip = src + ip_offset
+ * dst_next_ip = dst + ip_offset (4)
+ *
+ * Substitute (4) in (3) and observe ip_offset being cancelled out to
+ * obtain:
+ *
+ * dst_imm = src_imm + (src + ip_offset) - (dst + ip_offset)
+ * = src_imm + src - dst + ip_offset - ip_offset
+ * = src_imm + src - dst (5)
+ *
+ * IOW, only the relative displacement of the code block matters.
*/
-static __always_inline int optimize_nops_range(u8 *instr, u8 instrlen, int off)
-{
- unsigned long flags;
- int i = off, nnops;
- while (i < instrlen) {
- if (instr[i] != 0x90)
- break;
+#define apply_reloc_n(n_, p_, d_) \
+ do { \
+ s32 v = *(s##n_ *)(p_); \
+ v += (d_); \
+ BUG_ON((v >> 31) != (v >> (n_-1))); \
+ *(s##n_ *)(p_) = (s##n_)v; \
+ } while (0)
+
- i++;
+static __always_inline
+void apply_reloc(int n, void *ptr, uintptr_t diff)
+{
+ switch (n) {
+ case 1: apply_reloc_n(8, ptr, diff); break;
+ case 2: apply_reloc_n(16, ptr, diff); break;
+ case 4: apply_reloc_n(32, ptr, diff); break;
+ default: BUG();
}
+}
- nnops = i - off;
+static __always_inline
+bool need_reloc(unsigned long offset, u8 *src, size_t src_len)
+{
+ u8 *target = src + offset;
+ /*
+ * If the target is inside the patched block, it's relative to the
+ * block itself and does not need relocation.
+ */
+ return (target < src || target > src + src_len);
+}
- if (nnops <= 1)
- return nnops;
+static void __init_or_module noinline
+apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len)
+{
+ int prev, target = 0;
- local_irq_save(flags);
- add_nops(instr + off, nnops);
- local_irq_restore(flags);
+ for (int next, i = 0; i < len; i = next) {
+ struct insn insn;
- DUMP_BYTES(instr, instrlen, "%px: [%d:%d) optimized NOPs: ", instr, off, i);
+ if (WARN_ON_ONCE(insn_decode_kernel(&insn, &buf[i])))
+ return;
- return nnops;
-}
+ next = i + insn.length;
-/*
- * "noinline" to cause control flow change and thus invalidate I$ and
- * cause refetch after modification.
- */
-static void __init_or_module noinline optimize_nops(u8 *instr, size_t len)
-{
- struct insn insn;
- int i = 0;
+ if (__optimize_nops(buf, len, &insn, &next, &prev, &target))
+ continue;
- /*
- * Jump over the non-NOP insns and optimize single-byte NOPs into bigger
- * ones.
- */
- for (;;) {
- if (insn_decode_kernel(&insn, &instr[i]))
- return;
+ switch (insn.opcode.bytes[0]) {
+ case 0x0f:
+ if (insn.opcode.bytes[1] < 0x80 ||
+ insn.opcode.bytes[1] > 0x8f)
+ break;
- /*
- * See if this and any potentially following NOPs can be
- * optimized.
- */
- if (insn.length == 1 && insn.opcode.bytes[0] == 0x90)
- i += optimize_nops_range(instr, len, i);
- else
- i += insn.length;
+ fallthrough; /* Jcc.d32 */
+ case 0x70 ... 0x7f: /* Jcc.d8 */
+ case JMP8_INSN_OPCODE:
+ case JMP32_INSN_OPCODE:
+ case CALL_INSN_OPCODE:
+ if (need_reloc(next + insn.immediate.value, src, src_len)) {
+ apply_reloc(insn.immediate.nbytes,
+ buf + i + insn_offset_immediate(&insn),
+ src - dest);
+ }
- if (i >= len)
- return;
+ /*
+ * Where possible, convert JMP.d32 into JMP.d8.
+ */
+ if (insn.opcode.bytes[0] == JMP32_INSN_OPCODE) {
+ s32 imm = insn.immediate.value;
+ imm += src - dest;
+ imm += JMP32_INSN_SIZE - JMP8_INSN_SIZE;
+ if ((imm >> 31) == (imm >> 7)) {
+ buf[i+0] = JMP8_INSN_OPCODE;
+ buf[i+1] = (s8)imm;
+
+ memset(&buf[i+2], INT3_INSN_OPCODE, insn.length - 2);
+ }
+ }
+ break;
+ }
+
+ if (insn_rip_relative(&insn)) {
+ if (need_reloc(next + insn.displacement.value, src, src_len)) {
+ apply_reloc(insn.displacement.nbytes,
+ buf + i + insn_offset_displacement(&insn),
+ src - dest);
+ }
+ }
}
}
@@ -270,7 +402,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
u8 *instr, *replacement;
u8 insn_buff[MAX_PATCH_LEN];
- DPRINTK("alt table %px, -> %px", start, end);
+ DPRINTK(ALT, "alt table %px, -> %px", start, end);
/*
* The scan order should be from start to end. A later scanned
* alternative code can overwrite previously scanned alternative code.
@@ -294,47 +426,31 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
* - feature not present but ALT_FLAG_NOT is set to mean,
* patch if feature is *NOT* present.
*/
- if (!boot_cpu_has(a->cpuid) == !(a->flags & ALT_FLAG_NOT))
- goto next;
+ if (!boot_cpu_has(a->cpuid) == !(a->flags & ALT_FLAG_NOT)) {
+ optimize_nops(instr, a->instrlen);
+ continue;
+ }
- DPRINTK("feat: %s%d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d)",
+ DPRINTK(ALT, "feat: %s%d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d)",
(a->flags & ALT_FLAG_NOT) ? "!" : "",
a->cpuid >> 5,
a->cpuid & 0x1f,
instr, instr, a->instrlen,
replacement, a->replacementlen);
- DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr);
- DUMP_BYTES(replacement, a->replacementlen, "%px: rpl_insn: ", replacement);
-
memcpy(insn_buff, replacement, a->replacementlen);
insn_buff_sz = a->replacementlen;
- /*
- * 0xe8 is a relative jump; fix the offset.
- *
- * Instruction length is checked before the opcode to avoid
- * accessing uninitialized bytes for zero-length replacements.
- */
- if (a->replacementlen == 5 && *insn_buff == 0xe8) {
- *(s32 *)(insn_buff + 1) += replacement - instr;
- DPRINTK("Fix CALL offset: 0x%x, CALL 0x%lx",
- *(s32 *)(insn_buff + 1),
- (unsigned long)instr + *(s32 *)(insn_buff + 1) + 5);
- }
-
- if (a->replacementlen && is_jmp(replacement[0]))
- recompute_jump(a, instr, replacement, insn_buff);
-
for (; insn_buff_sz < a->instrlen; insn_buff_sz++)
insn_buff[insn_buff_sz] = 0x90;
- DUMP_BYTES(insn_buff, insn_buff_sz, "%px: final_insn: ", instr);
+ apply_relocation(insn_buff, a->instrlen, instr, replacement, a->replacementlen);
- text_poke_early(instr, insn_buff, insn_buff_sz);
+ DUMP_BYTES(ALT, instr, a->instrlen, "%px: old_insn: ", instr);
+ DUMP_BYTES(ALT, replacement, a->replacementlen, "%px: rpl_insn: ", replacement);
+ DUMP_BYTES(ALT, insn_buff, insn_buff_sz, "%px: final_insn: ", instr);
-next:
- optimize_nops(instr, a->instrlen);
+ text_poke_early(instr, insn_buff, insn_buff_sz);
}
}
@@ -555,15 +671,15 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
continue;
}
- DPRINTK("retpoline at: %pS (%px) len: %d to: %pS",
+ DPRINTK(RETPOLINE, "retpoline at: %pS (%px) len: %d to: %pS",
addr, addr, insn.length,
addr + insn.length + insn.immediate.value);
len = patch_retpoline(addr, &insn, bytes);
if (len == insn.length) {
optimize_nops(bytes, len);
- DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr);
- DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr);
+ DUMP_BYTES(RETPOLINE, ((u8*)addr), len, "%px: orig: ", addr);
+ DUMP_BYTES(RETPOLINE, ((u8*)bytes), len, "%px: repl: ", addr);
text_poke_early(addr, bytes, len);
}
}
@@ -590,13 +706,12 @@ static int patch_return(void *addr, struct insn *insn, u8 *bytes)
{
int i = 0;
+ /* Patch the custom return thunks... */
if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) {
- if (x86_return_thunk == __x86_return_thunk)
- return -1;
-
i = JMP32_INSN_SIZE;
__text_gen_insn(bytes, JMP32_INSN_OPCODE, addr, x86_return_thunk, i);
} else {
+ /* ... or patch them out if not needed. */
bytes[i++] = RET_INSN_OPCODE;
}
@@ -609,6 +724,14 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end)
{
s32 *s;
+ /*
+ * Do not patch out the default return thunks if those needed are the
+ * ones generated by the compiler.
+ */
+ if (cpu_feature_enabled(X86_FEATURE_RETHUNK) &&
+ (x86_return_thunk == __x86_return_thunk))
+ return;
+
for (s = start; s < end; s++) {
void *dest = NULL, *addr = (void *)s + *s;
struct insn insn;
@@ -630,14 +753,14 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end)
addr, dest, 5, addr))
continue;
- DPRINTK("return thunk at: %pS (%px) len: %d to: %pS",
+ DPRINTK(RET, "return thunk at: %pS (%px) len: %d to: %pS",
addr, addr, insn.length,
addr + insn.length + insn.immediate.value);
len = patch_return(addr, &insn, bytes);
if (len == insn.length) {
- DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr);
- DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr);
+ DUMP_BYTES(RET, ((u8*)addr), len, "%px: orig: ", addr);
+ DUMP_BYTES(RET, ((u8*)bytes), len, "%px: repl: ", addr);
text_poke_early(addr, bytes, len);
}
}
@@ -655,7 +778,7 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end) { }
#ifdef CONFIG_X86_KERNEL_IBT
-static void poison_endbr(void *addr, bool warn)
+static void __init_or_module poison_endbr(void *addr, bool warn)
{
u32 endbr, poison = gen_endbr_poison();
@@ -667,13 +790,13 @@ static void poison_endbr(void *addr, bool warn)
return;
}
- DPRINTK("ENDBR at: %pS (%px)", addr, addr);
+ DPRINTK(ENDBR, "ENDBR at: %pS (%px)", addr, addr);
/*
* When we have IBT, the lack of ENDBR will trigger #CP
*/
- DUMP_BYTES(((u8*)addr), 4, "%px: orig: ", addr);
- DUMP_BYTES(((u8*)&poison), 4, "%px: repl: ", addr);
+ DUMP_BYTES(ENDBR, ((u8*)addr), 4, "%px: orig: ", addr);
+ DUMP_BYTES(ENDBR, ((u8*)&poison), 4, "%px: repl: ", addr);
text_poke_early(addr, &poison, 4);
}
@@ -1148,7 +1271,7 @@ void __init_or_module alternatives_smp_module_add(struct module *mod,
smp->locks_end = locks_end;
smp->text = text;
smp->text_end = text_end;
- DPRINTK("locks %p -> %p, text %p -> %p, name %s\n",
+ DPRINTK(SMP, "locks %p -> %p, text %p -> %p, name %s\n",
smp->locks, smp->locks_end,
smp->text, smp->text_end, smp->name);
@@ -1225,6 +1348,20 @@ int alternatives_text_reserved(void *start, void *end)
#endif /* CONFIG_SMP */
#ifdef CONFIG_PARAVIRT
+
+/* Use this to add nops to a buffer, then text_poke the whole buffer. */
+static void __init_or_module add_nops(void *insns, unsigned int len)
+{
+ while (len > 0) {
+ unsigned int noplen = len;
+ if (noplen > ASM_NOP_MAX)
+ noplen = ASM_NOP_MAX;
+ memcpy(insns, x86_nops[noplen], noplen);
+ insns += noplen;
+ len -= noplen;
+ }
+}
+
void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
struct paravirt_patch_site *end)
{
@@ -1332,6 +1469,35 @@ static noinline void __init int3_selftest(void)
unregister_die_notifier(&int3_exception_nb);
}
+static __initdata int __alt_reloc_selftest_addr;
+
+__visible noinline void __init __alt_reloc_selftest(void *arg)
+{
+ WARN_ON(arg != &__alt_reloc_selftest_addr);
+}
+
+static noinline void __init alt_reloc_selftest(void)
+{
+ /*
+ * Tests apply_relocation().
+ *
+ * This has a relative immediate (CALL) in a place other than the first
+ * instruction and additionally on x86_64 we get a RIP-relative LEA:
+ *
+ * lea 0x0(%rip),%rdi # 5d0: R_X86_64_PC32 .init.data+0x5566c
+ * call +0 # 5d5: R_X86_64_PLT32 __alt_reloc_selftest-0x4
+ *
+ * Getting this wrong will either crash and burn or tickle the WARN
+ * above.
+ */
+ asm_inline volatile (
+ ALTERNATIVE("", "lea %[mem], %%" _ASM_ARG1 "; call __alt_reloc_selftest;", X86_FEATURE_ALWAYS)
+ : /* output */
+ : [mem] "m" (__alt_reloc_selftest_addr)
+ : _ASM_ARG1
+ );
+}
+
void __init alternative_instructions(void)
{
int3_selftest();
@@ -1419,6 +1585,8 @@ void __init alternative_instructions(void)
restart_nmi();
alternatives_patched = 1;
+
+ alt_reloc_selftest();
}
/**
@@ -1799,7 +1967,7 @@ struct bp_patching_desc *try_get_desc(void)
{
struct bp_patching_desc *desc = &bp_desc;
- if (!arch_atomic_inc_not_zero(&desc->refs))
+ if (!raw_atomic_inc_not_zero(&desc->refs))
return NULL;
return desc;
@@ -1810,7 +1978,7 @@ static __always_inline void put_desc(void)
struct bp_patching_desc *desc = &bp_desc;
smp_mb__before_atomic();
- arch_atomic_dec(&desc->refs);
+ raw_atomic_dec(&desc->refs);
}
static __always_inline void *text_poke_addr(struct text_poke_loc *tp)
@@ -1954,6 +2122,16 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries
atomic_set_release(&bp_desc.refs, 1);
/*
+ * Function tracing can enable thousands of places that need to be
+ * updated. This can take quite some time, and with full kernel debugging
+ * enabled, this could cause the softlockup watchdog to trigger.
+ * This function gets called every 256 entries added to be patched.
+ * Call cond_resched() here to make sure that other tasks can get scheduled
+ * while processing all the functions being patched.
+ */
+ cond_resched();
+
+ /*
* Corresponding read barrier in int3 notifier for making sure the
* nr_entries and handler are correctly ordered wrt. patching.
*/
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 7e331e8f3692..035a3db5330b 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -15,28 +15,31 @@
#include <linux/pci_ids.h>
#include <asm/amd_nb.h>
-#define PCI_DEVICE_ID_AMD_17H_ROOT 0x1450
-#define PCI_DEVICE_ID_AMD_17H_M10H_ROOT 0x15d0
-#define PCI_DEVICE_ID_AMD_17H_M30H_ROOT 0x1480
-#define PCI_DEVICE_ID_AMD_17H_M60H_ROOT 0x1630
-#define PCI_DEVICE_ID_AMD_17H_MA0H_ROOT 0x14b5
-#define PCI_DEVICE_ID_AMD_19H_M10H_ROOT 0x14a4
-#define PCI_DEVICE_ID_AMD_19H_M60H_ROOT 0x14d8
-#define PCI_DEVICE_ID_AMD_19H_M70H_ROOT 0x14e8
-#define PCI_DEVICE_ID_AMD_17H_DF_F4 0x1464
-#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec
-#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F4 0x1494
-#define PCI_DEVICE_ID_AMD_17H_M60H_DF_F4 0x144c
-#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F4 0x1444
-#define PCI_DEVICE_ID_AMD_17H_MA0H_DF_F4 0x1728
-#define PCI_DEVICE_ID_AMD_19H_DF_F4 0x1654
-#define PCI_DEVICE_ID_AMD_19H_M10H_DF_F4 0x14b1
-#define PCI_DEVICE_ID_AMD_19H_M40H_ROOT 0x14b5
-#define PCI_DEVICE_ID_AMD_19H_M40H_DF_F4 0x167d
-#define PCI_DEVICE_ID_AMD_19H_M50H_DF_F4 0x166e
-#define PCI_DEVICE_ID_AMD_19H_M60H_DF_F4 0x14e4
-#define PCI_DEVICE_ID_AMD_19H_M70H_DF_F4 0x14f4
-#define PCI_DEVICE_ID_AMD_19H_M78H_DF_F4 0x12fc
+#define PCI_DEVICE_ID_AMD_17H_ROOT 0x1450
+#define PCI_DEVICE_ID_AMD_17H_M10H_ROOT 0x15d0
+#define PCI_DEVICE_ID_AMD_17H_M30H_ROOT 0x1480
+#define PCI_DEVICE_ID_AMD_17H_M60H_ROOT 0x1630
+#define PCI_DEVICE_ID_AMD_17H_MA0H_ROOT 0x14b5
+#define PCI_DEVICE_ID_AMD_19H_M10H_ROOT 0x14a4
+#define PCI_DEVICE_ID_AMD_19H_M40H_ROOT 0x14b5
+#define PCI_DEVICE_ID_AMD_19H_M60H_ROOT 0x14d8
+#define PCI_DEVICE_ID_AMD_19H_M70H_ROOT 0x14e8
+#define PCI_DEVICE_ID_AMD_MI200_ROOT 0x14bb
+
+#define PCI_DEVICE_ID_AMD_17H_DF_F4 0x1464
+#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec
+#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F4 0x1494
+#define PCI_DEVICE_ID_AMD_17H_M60H_DF_F4 0x144c
+#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F4 0x1444
+#define PCI_DEVICE_ID_AMD_17H_MA0H_DF_F4 0x1728
+#define PCI_DEVICE_ID_AMD_19H_DF_F4 0x1654
+#define PCI_DEVICE_ID_AMD_19H_M10H_DF_F4 0x14b1
+#define PCI_DEVICE_ID_AMD_19H_M40H_DF_F4 0x167d
+#define PCI_DEVICE_ID_AMD_19H_M50H_DF_F4 0x166e
+#define PCI_DEVICE_ID_AMD_19H_M60H_DF_F4 0x14e4
+#define PCI_DEVICE_ID_AMD_19H_M70H_DF_F4 0x14f4
+#define PCI_DEVICE_ID_AMD_19H_M78H_DF_F4 0x12fc
+#define PCI_DEVICE_ID_AMD_MI200_DF_F4 0x14d4
/* Protect the PCI config register pairs used for SMN. */
static DEFINE_MUTEX(smn_mutex);
@@ -53,6 +56,7 @@ static const struct pci_device_id amd_root_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M40H_ROOT) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M60H_ROOT) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M70H_ROOT) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_ROOT) },
{}
};
@@ -81,6 +85,7 @@ static const struct pci_device_id amd_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M60H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M70H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M78H_DF_F3) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_DF_F3) },
{}
};
@@ -101,6 +106,7 @@ static const struct pci_device_id amd_nb_link_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M40H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M50H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_DF_F4) },
{}
};
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 770557110051..af49e24b46a4 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -101,6 +101,9 @@ static int apic_extnmi __ro_after_init = APIC_EXTNMI_BSP;
*/
static bool virt_ext_dest_id __ro_after_init;
+/* For parallel bootup. */
+unsigned long apic_mmio_base __ro_after_init;
+
/*
* Map cpu index to physical APIC ID
*/
@@ -2163,6 +2166,7 @@ void __init register_lapic_address(unsigned long address)
if (!x2apic_mode) {
set_fixmap_nocache(FIX_APIC_BASE, address);
+ apic_mmio_base = APIC_BASE;
apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
APIC_BASE, address);
}
@@ -2376,7 +2380,7 @@ static int nr_logical_cpuids = 1;
/*
* Used to store mapping between logical CPU IDs and APIC IDs.
*/
-static int cpuid_to_apicid[] = {
+int cpuid_to_apicid[] = {
[0 ... NR_CPUS - 1] = -1,
};
@@ -2386,20 +2390,31 @@ bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
}
#ifdef CONFIG_SMP
-/**
- * apic_id_is_primary_thread - Check whether APIC ID belongs to a primary thread
- * @apicid: APIC ID to check
+static void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid)
+{
+ /* Isolate the SMT bit(s) in the APICID and check for 0 */
+ u32 mask = (1U << (fls(smp_num_siblings) - 1)) - 1;
+
+ if (smp_num_siblings == 1 || !(apicid & mask))
+ cpumask_set_cpu(cpu, &__cpu_primary_thread_mask);
+}
+
+/*
+ * Due to the utter mess of CPUID evaluation smp_num_siblings is not valid
+ * during early boot. Initialize the primary thread mask before SMP
+ * bringup.
*/
-bool apic_id_is_primary_thread(unsigned int apicid)
+static int __init smp_init_primary_thread_mask(void)
{
- u32 mask;
+ unsigned int cpu;
- if (smp_num_siblings == 1)
- return true;
- /* Isolate the SMT bit(s) in the APICID and check for 0 */
- mask = (1U << (fls(smp_num_siblings) - 1)) - 1;
- return !(apicid & mask);
+ for (cpu = 0; cpu < nr_logical_cpuids; cpu++)
+ cpu_mark_primary_thread(cpu, cpuid_to_apicid[cpu]);
+ return 0;
}
+early_initcall(smp_init_primary_thread_mask);
+#else
+static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { }
#endif
/*
@@ -2544,6 +2559,9 @@ int generic_processor_info(int apicid, int version)
set_cpu_present(cpu, true);
num_processors++;
+ if (system_state != SYSTEM_BOOTING)
+ cpu_mark_primary_thread(cpu, apicid);
+
return cpu;
}
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 6bde05a86b4e..896bc41cb2ba 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -97,7 +97,10 @@ static void init_x2apic_ldr(void)
static int x2apic_phys_probe(void)
{
- if (x2apic_mode && (x2apic_phys || x2apic_fadt_phys()))
+ if (!x2apic_mode)
+ return 0;
+
+ if (x2apic_phys || x2apic_fadt_phys())
return 1;
return apic == &apic_x2apic_phys;
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 482855227964..d9384d5b4b8e 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -546,7 +546,6 @@ unsigned long sn_rtc_cycles_per_second;
EXPORT_SYMBOL(sn_rtc_cycles_per_second);
/* The following values are used for the per node hub info struct */
-static __initdata unsigned short *_node_to_pnode;
static __initdata unsigned short _min_socket, _max_socket;
static __initdata unsigned short _min_pnode, _max_pnode, _gr_table_len;
static __initdata struct uv_gam_range_entry *uv_gre_table;
@@ -554,6 +553,7 @@ static __initdata struct uv_gam_parameters *uv_gp_table;
static __initdata unsigned short *_socket_to_node;
static __initdata unsigned short *_socket_to_pnode;
static __initdata unsigned short *_pnode_to_socket;
+static __initdata unsigned short *_node_to_socket;
static __initdata struct uv_gam_range_s *_gr_table;
@@ -617,7 +617,8 @@ static __init void build_uv_gr_table(void)
bytes = _gr_table_len * sizeof(struct uv_gam_range_s);
grt = kzalloc(bytes, GFP_KERNEL);
- BUG_ON(!grt);
+ if (WARN_ON_ONCE(!grt))
+ return;
_gr_table = grt;
for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
@@ -1022,7 +1023,7 @@ static void __init calc_mmioh_map(enum mmioh_arch index,
switch (index) {
case UVY_MMIOH0:
mmr = UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG0;
- nasid_mask = UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK;
+ nasid_mask = UVYH_RH10_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK;
n = UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG0_DEPTH;
min_nasid = min_pnode;
max_nasid = max_pnode;
@@ -1030,7 +1031,7 @@ static void __init calc_mmioh_map(enum mmioh_arch index,
break;
case UVY_MMIOH1:
mmr = UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG1;
- nasid_mask = UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK;
+ nasid_mask = UVYH_RH10_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK;
n = UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG1_DEPTH;
min_nasid = min_pnode;
max_nasid = max_pnode;
@@ -1038,7 +1039,7 @@ static void __init calc_mmioh_map(enum mmioh_arch index,
break;
case UVX_MMIOH0:
mmr = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0;
- nasid_mask = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK;
+ nasid_mask = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK;
n = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_DEPTH;
min_nasid = min_pnode * 2;
max_nasid = max_pnode * 2;
@@ -1046,7 +1047,7 @@ static void __init calc_mmioh_map(enum mmioh_arch index,
break;
case UVX_MMIOH1:
mmr = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1;
- nasid_mask = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK;
+ nasid_mask = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK;
n = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_DEPTH;
min_nasid = min_pnode * 2;
max_nasid = max_pnode * 2;
@@ -1072,8 +1073,9 @@ static void __init calc_mmioh_map(enum mmioh_arch index,
/* Invalid NASID check */
if (nasid < min_nasid || max_nasid < nasid) {
- pr_err("UV:%s:Invalid NASID:%x (range:%x..%x)\n",
- __func__, index, min_nasid, max_nasid);
+ /* Not an error: unused table entries get "poison" values */
+ pr_debug("UV:%s:Invalid NASID(%x):%x (range:%x..%x)\n",
+ __func__, index, nasid, min_nasid, max_nasid);
nasid = -1;
}
@@ -1292,6 +1294,7 @@ static void __init uv_init_hub_info(struct uv_hub_info_s *hi)
hi->nasid_shift = uv_cpuid.nasid_shift;
hi->min_pnode = _min_pnode;
hi->min_socket = _min_socket;
+ hi->node_to_socket = _node_to_socket;
hi->pnode_to_socket = _pnode_to_socket;
hi->socket_to_node = _socket_to_node;
hi->socket_to_pnode = _socket_to_pnode;
@@ -1348,7 +1351,7 @@ static void __init decode_gam_rng_tbl(unsigned long ptr)
struct uv_gam_range_entry *gre = (struct uv_gam_range_entry *)ptr;
unsigned long lgre = 0, gend = 0;
int index = 0;
- int sock_min = 999999, pnode_min = 99999;
+ int sock_min = INT_MAX, pnode_min = INT_MAX;
int sock_max = -1, pnode_max = -1;
uv_gre_table = gre;
@@ -1459,11 +1462,37 @@ static int __init decode_uv_systab(void)
return 0;
}
+/*
+ * Given a bitmask 'bits' representing presnt blades, numbered
+ * starting at 'base', masking off unused high bits of blade number
+ * with 'mask', update the minimum and maximum blade numbers that we
+ * have found. (Masking with 'mask' necessary because of BIOS
+ * treatment of system partitioning when creating this table we are
+ * interpreting.)
+ */
+static inline void blade_update_min_max(unsigned long bits, int base, int mask, int *min, int *max)
+{
+ int first, last;
+
+ if (!bits)
+ return;
+ first = (base + __ffs(bits)) & mask;
+ last = (base + __fls(bits)) & mask;
+
+ if (*min > first)
+ *min = first;
+ if (*max < last)
+ *max = last;
+}
+
/* Set up physical blade translations from UVH_NODE_PRESENT_TABLE */
static __init void boot_init_possible_blades(struct uv_hub_info_s *hub_info)
{
unsigned long np;
int i, uv_pb = 0;
+ int sock_min = INT_MAX, sock_max = -1, s_mask;
+
+ s_mask = (1 << uv_cpuid.n_skt) - 1;
if (UVH_NODE_PRESENT_TABLE) {
pr_info("UV: NODE_PRESENT_DEPTH = %d\n",
@@ -1471,35 +1500,82 @@ static __init void boot_init_possible_blades(struct uv_hub_info_s *hub_info)
for (i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) {
np = uv_read_local_mmr(UVH_NODE_PRESENT_TABLE + i * 8);
pr_info("UV: NODE_PRESENT(%d) = 0x%016lx\n", i, np);
- uv_pb += hweight64(np);
+ blade_update_min_max(np, i * 64, s_mask, &sock_min, &sock_max);
}
}
if (UVH_NODE_PRESENT_0) {
np = uv_read_local_mmr(UVH_NODE_PRESENT_0);
pr_info("UV: NODE_PRESENT_0 = 0x%016lx\n", np);
- uv_pb += hweight64(np);
+ blade_update_min_max(np, 0, s_mask, &sock_min, &sock_max);
}
if (UVH_NODE_PRESENT_1) {
np = uv_read_local_mmr(UVH_NODE_PRESENT_1);
pr_info("UV: NODE_PRESENT_1 = 0x%016lx\n", np);
- uv_pb += hweight64(np);
+ blade_update_min_max(np, 64, s_mask, &sock_min, &sock_max);
+ }
+
+ /* Only update if we actually found some bits indicating blades present */
+ if (sock_max >= sock_min) {
+ _min_socket = sock_min;
+ _max_socket = sock_max;
+ uv_pb = sock_max - sock_min + 1;
}
if (uv_possible_blades != uv_pb)
uv_possible_blades = uv_pb;
- pr_info("UV: number nodes/possible blades %d\n", uv_pb);
+ pr_info("UV: number nodes/possible blades %d (%d - %d)\n",
+ uv_pb, sock_min, sock_max);
+}
+
+static int __init alloc_conv_table(int num_elem, unsigned short **table)
+{
+ int i;
+ size_t bytes;
+
+ bytes = num_elem * sizeof(*table[0]);
+ *table = kmalloc(bytes, GFP_KERNEL);
+ if (WARN_ON_ONCE(!*table))
+ return -ENOMEM;
+ for (i = 0; i < num_elem; i++)
+ ((unsigned short *)*table)[i] = SOCK_EMPTY;
+ return 0;
}
+/* Remove conversion table if it's 1:1 */
+#define FREE_1_TO_1_TABLE(tbl, min, max, max2) free_1_to_1_table(&tbl, #tbl, min, max, max2)
+
+static void __init free_1_to_1_table(unsigned short **tp, char *tname, int min, int max, int max2)
+{
+ int i;
+ unsigned short *table = *tp;
+
+ if (table == NULL)
+ return;
+ if (max != max2)
+ return;
+ for (i = 0; i < max; i++) {
+ if (i != table[i])
+ return;
+ }
+ kfree(table);
+ *tp = NULL;
+ pr_info("UV: %s is 1:1, conversion table removed\n", tname);
+}
+
+/*
+ * Build Socket Tables
+ * If the number of nodes is >1 per socket, socket to node table will
+ * contain lowest node number on that socket.
+ */
static void __init build_socket_tables(void)
{
struct uv_gam_range_entry *gre = uv_gre_table;
- int num, nump;
+ int nums, numn, nump;
int cpu, i, lnid;
int minsock = _min_socket;
int maxsock = _max_socket;
int minpnode = _min_pnode;
int maxpnode = _max_pnode;
- size_t bytes;
if (!gre) {
if (is_uv2_hub() || is_uv3_hub()) {
@@ -1507,39 +1583,36 @@ static void __init build_socket_tables(void)
return;
}
pr_err("UV: Error: UVsystab address translations not available!\n");
- BUG();
+ WARN_ON_ONCE(!gre);
+ return;
}
- /* Build socket id -> node id, pnode */
- num = maxsock - minsock + 1;
- bytes = num * sizeof(_socket_to_node[0]);
- _socket_to_node = kmalloc(bytes, GFP_KERNEL);
- _socket_to_pnode = kmalloc(bytes, GFP_KERNEL);
-
+ numn = num_possible_nodes();
nump = maxpnode - minpnode + 1;
- bytes = nump * sizeof(_pnode_to_socket[0]);
- _pnode_to_socket = kmalloc(bytes, GFP_KERNEL);
- BUG_ON(!_socket_to_node || !_socket_to_pnode || !_pnode_to_socket);
-
- for (i = 0; i < num; i++)
- _socket_to_node[i] = _socket_to_pnode[i] = SOCK_EMPTY;
-
- for (i = 0; i < nump; i++)
- _pnode_to_socket[i] = SOCK_EMPTY;
+ nums = maxsock - minsock + 1;
+
+ /* Allocate and clear tables */
+ if ((alloc_conv_table(nump, &_pnode_to_socket) < 0)
+ || (alloc_conv_table(nums, &_socket_to_pnode) < 0)
+ || (alloc_conv_table(numn, &_node_to_socket) < 0)
+ || (alloc_conv_table(nums, &_socket_to_node) < 0)) {
+ kfree(_pnode_to_socket);
+ kfree(_socket_to_pnode);
+ kfree(_node_to_socket);
+ return;
+ }
/* Fill in pnode/node/addr conversion list values: */
- pr_info("UV: GAM Building socket/pnode conversion tables\n");
for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
if (gre->type == UV_GAM_RANGE_TYPE_HOLE)
continue;
i = gre->sockid - minsock;
- /* Duplicate: */
- if (_socket_to_pnode[i] != SOCK_EMPTY)
- continue;
- _socket_to_pnode[i] = gre->pnode;
+ if (_socket_to_pnode[i] == SOCK_EMPTY)
+ _socket_to_pnode[i] = gre->pnode;
i = gre->pnode - minpnode;
- _pnode_to_socket[i] = gre->sockid;
+ if (_pnode_to_socket[i] == SOCK_EMPTY)
+ _pnode_to_socket[i] = gre->sockid;
pr_info("UV: sid:%02x type:%d nasid:%04x pn:%02x pn2s:%2x\n",
gre->sockid, gre->type, gre->nasid,
@@ -1549,66 +1622,39 @@ static void __init build_socket_tables(void)
/* Set socket -> node values: */
lnid = NUMA_NO_NODE;
- for_each_present_cpu(cpu) {
+ for_each_possible_cpu(cpu) {
int nid = cpu_to_node(cpu);
int apicid, sockid;
if (lnid == nid)
continue;
lnid = nid;
+
apicid = per_cpu(x86_cpu_to_apicid, cpu);
sockid = apicid >> uv_cpuid.socketid_shift;
- _socket_to_node[sockid - minsock] = nid;
- pr_info("UV: sid:%02x: apicid:%04x node:%2d\n",
- sockid, apicid, nid);
- }
- /* Set up physical blade to pnode translation from GAM Range Table: */
- bytes = num_possible_nodes() * sizeof(_node_to_pnode[0]);
- _node_to_pnode = kmalloc(bytes, GFP_KERNEL);
- BUG_ON(!_node_to_pnode);
+ if (_socket_to_node[sockid - minsock] == SOCK_EMPTY)
+ _socket_to_node[sockid - minsock] = nid;
- for (lnid = 0; lnid < num_possible_nodes(); lnid++) {
- unsigned short sockid;
+ if (_node_to_socket[nid] == SOCK_EMPTY)
+ _node_to_socket[nid] = sockid;
- for (sockid = minsock; sockid <= maxsock; sockid++) {
- if (lnid == _socket_to_node[sockid - minsock]) {
- _node_to_pnode[lnid] = _socket_to_pnode[sockid - minsock];
- break;
- }
- }
- if (sockid > maxsock) {
- pr_err("UV: socket for node %d not found!\n", lnid);
- BUG();
- }
+ pr_info("UV: sid:%02x: apicid:%04x socket:%02d node:%03x s2n:%03x\n",
+ sockid,
+ apicid,
+ _node_to_socket[nid],
+ nid,
+ _socket_to_node[sockid - minsock]);
}
/*
- * If socket id == pnode or socket id == node for all nodes,
+ * If e.g. socket id == pnode for all pnodes,
* system runs faster by removing corresponding conversion table.
*/
- pr_info("UV: Checking socket->node/pnode for identity maps\n");
- if (minsock == 0) {
- for (i = 0; i < num; i++)
- if (_socket_to_node[i] == SOCK_EMPTY || i != _socket_to_node[i])
- break;
- if (i >= num) {
- kfree(_socket_to_node);
- _socket_to_node = NULL;
- pr_info("UV: 1:1 socket_to_node table removed\n");
- }
- }
- if (minsock == minpnode) {
- for (i = 0; i < num; i++)
- if (_socket_to_pnode[i] != SOCK_EMPTY &&
- _socket_to_pnode[i] != i + minpnode)
- break;
- if (i >= num) {
- kfree(_socket_to_pnode);
- _socket_to_pnode = NULL;
- pr_info("UV: 1:1 socket_to_pnode table removed\n");
- }
- }
+ FREE_1_TO_1_TABLE(_socket_to_node, _min_socket, nums, numn);
+ FREE_1_TO_1_TABLE(_node_to_socket, _min_socket, nums, numn);
+ FREE_1_TO_1_TABLE(_socket_to_pnode, _min_pnode, nums, nump);
+ FREE_1_TO_1_TABLE(_pnode_to_socket, _min_pnode, nums, nump);
}
/* Check which reboot to use */
@@ -1692,12 +1738,13 @@ static __init int uv_system_init_hubless(void)
static void __init uv_system_init_hub(void)
{
struct uv_hub_info_s hub_info = {0};
- int bytes, cpu, nodeid;
- unsigned short min_pnode = 9999, max_pnode = 0;
+ int bytes, cpu, nodeid, bid;
+ unsigned short min_pnode = USHRT_MAX, max_pnode = 0;
char *hub = is_uv5_hub() ? "UV500" :
is_uv4_hub() ? "UV400" :
is_uv3_hub() ? "UV300" :
is_uv2_hub() ? "UV2000/3000" : NULL;
+ struct uv_hub_info_s **uv_hub_info_list_blade;
if (!hub) {
pr_err("UV: Unknown/unsupported UV hub\n");
@@ -1720,9 +1767,12 @@ static void __init uv_system_init_hub(void)
build_uv_gr_table();
set_block_size();
uv_init_hub_info(&hub_info);
- uv_possible_blades = num_possible_nodes();
- if (!_node_to_pnode)
+ /* If UV2 or UV3 may need to get # blades from HW */
+ if (is_uv(UV2|UV3) && !uv_gre_table)
boot_init_possible_blades(&hub_info);
+ else
+ /* min/max sockets set in decode_gam_rng_tbl */
+ uv_possible_blades = (_max_socket - _min_socket) + 1;
/* uv_num_possible_blades() is really the hub count: */
pr_info("UV: Found %d hubs, %d nodes, %d CPUs\n", uv_num_possible_blades(), num_possible_nodes(), num_possible_cpus());
@@ -1731,79 +1781,98 @@ static void __init uv_system_init_hub(void)
hub_info.coherency_domain_number = sn_coherency_id;
uv_rtc_init();
+ /*
+ * __uv_hub_info_list[] is indexed by node, but there is only
+ * one hub_info structure per blade. First, allocate one
+ * structure per blade. Further down we create a per-node
+ * table (__uv_hub_info_list[]) pointing to hub_info
+ * structures for the correct blade.
+ */
+
bytes = sizeof(void *) * uv_num_possible_blades();
- __uv_hub_info_list = kzalloc(bytes, GFP_KERNEL);
- BUG_ON(!__uv_hub_info_list);
+ uv_hub_info_list_blade = kzalloc(bytes, GFP_KERNEL);
+ if (WARN_ON_ONCE(!uv_hub_info_list_blade))
+ return;
bytes = sizeof(struct uv_hub_info_s);
- for_each_node(nodeid) {
+ for_each_possible_blade(bid) {
struct uv_hub_info_s *new_hub;
- if (__uv_hub_info_list[nodeid]) {
- pr_err("UV: Node %d UV HUB already initialized!?\n", nodeid);
- BUG();
+ /* Allocate & fill new per hub info list */
+ new_hub = (bid == 0) ? &uv_hub_info_node0
+ : kzalloc_node(bytes, GFP_KERNEL, uv_blade_to_node(bid));
+ if (WARN_ON_ONCE(!new_hub)) {
+ /* do not kfree() bid 0, which is statically allocated */
+ while (--bid > 0)
+ kfree(uv_hub_info_list_blade[bid]);
+ kfree(uv_hub_info_list_blade);
+ return;
}
- /* Allocate new per hub info list */
- new_hub = (nodeid == 0) ? &uv_hub_info_node0 : kzalloc_node(bytes, GFP_KERNEL, nodeid);
- BUG_ON(!new_hub);
- __uv_hub_info_list[nodeid] = new_hub;
- new_hub = uv_hub_info_list(nodeid);
- BUG_ON(!new_hub);
+ uv_hub_info_list_blade[bid] = new_hub;
*new_hub = hub_info;
/* Use information from GAM table if available: */
- if (_node_to_pnode)
- new_hub->pnode = _node_to_pnode[nodeid];
+ if (uv_gre_table)
+ new_hub->pnode = uv_blade_to_pnode(bid);
else /* Or fill in during CPU loop: */
new_hub->pnode = 0xffff;
- new_hub->numa_blade_id = uv_node_to_blade_id(nodeid);
+ new_hub->numa_blade_id = bid;
new_hub->memory_nid = NUMA_NO_NODE;
new_hub->nr_possible_cpus = 0;
new_hub->nr_online_cpus = 0;
}
+ /*
+ * Now populate __uv_hub_info_list[] for each node with the
+ * pointer to the struct for the blade it resides on.
+ */
+
+ bytes = sizeof(void *) * num_possible_nodes();
+ __uv_hub_info_list = kzalloc(bytes, GFP_KERNEL);
+ if (WARN_ON_ONCE(!__uv_hub_info_list)) {
+ for_each_possible_blade(bid)
+ /* bid 0 is statically allocated */
+ if (bid != 0)
+ kfree(uv_hub_info_list_blade[bid]);
+ kfree(uv_hub_info_list_blade);
+ return;
+ }
+
+ for_each_node(nodeid)
+ __uv_hub_info_list[nodeid] = uv_hub_info_list_blade[uv_node_to_blade_id(nodeid)];
+
/* Initialize per CPU info: */
for_each_possible_cpu(cpu) {
- int apicid = per_cpu(x86_cpu_to_apicid, cpu);
- int numa_node_id;
+ int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
+ unsigned short bid;
unsigned short pnode;
- nodeid = cpu_to_node(cpu);
- numa_node_id = numa_cpu_node(cpu);
pnode = uv_apicid_to_pnode(apicid);
+ bid = uv_pnode_to_socket(pnode) - _min_socket;
- uv_cpu_info_per(cpu)->p_uv_hub_info = uv_hub_info_list(nodeid);
+ uv_cpu_info_per(cpu)->p_uv_hub_info = uv_hub_info_list_blade[bid];
uv_cpu_info_per(cpu)->blade_cpu_id = uv_cpu_hub_info(cpu)->nr_possible_cpus++;
if (uv_cpu_hub_info(cpu)->memory_nid == NUMA_NO_NODE)
uv_cpu_hub_info(cpu)->memory_nid = cpu_to_node(cpu);
- /* Init memoryless node: */
- if (nodeid != numa_node_id &&
- uv_hub_info_list(numa_node_id)->pnode == 0xffff)
- uv_hub_info_list(numa_node_id)->pnode = pnode;
- else if (uv_cpu_hub_info(cpu)->pnode == 0xffff)
+ if (uv_cpu_hub_info(cpu)->pnode == 0xffff)
uv_cpu_hub_info(cpu)->pnode = pnode;
}
- for_each_node(nodeid) {
- unsigned short pnode = uv_hub_info_list(nodeid)->pnode;
+ for_each_possible_blade(bid) {
+ unsigned short pnode = uv_hub_info_list_blade[bid]->pnode;
- /* Add pnode info for pre-GAM list nodes without CPUs: */
- if (pnode == 0xffff) {
- unsigned long paddr;
+ if (pnode == 0xffff)
+ continue;
- paddr = node_start_pfn(nodeid) << PAGE_SHIFT;
- pnode = uv_gpa_to_pnode(uv_soc_phys_ram_to_gpa(paddr));
- uv_hub_info_list(nodeid)->pnode = pnode;
- }
min_pnode = min(pnode, min_pnode);
max_pnode = max(pnode, max_pnode);
- pr_info("UV: UVHUB node:%2d pn:%02x nrcpus:%d\n",
- nodeid,
- uv_hub_info_list(nodeid)->pnode,
- uv_hub_info_list(nodeid)->nr_possible_cpus);
+ pr_info("UV: HUB:%2d pn:%02x nrcpus:%d\n",
+ bid,
+ uv_hub_info_list_blade[bid]->pnode,
+ uv_hub_info_list_blade[bid]->nr_possible_cpus);
}
pr_info("UV: min_pnode:%02x max_pnode:%02x\n", min_pnode, max_pnode);
@@ -1811,6 +1880,9 @@ static void __init uv_system_init_hub(void)
map_mmr_high(max_pnode);
map_mmioh_high(min_pnode, max_pnode);
+ kfree(uv_hub_info_list_blade);
+ uv_hub_info_list_blade = NULL;
+
uv_nmi_setup();
uv_cpu_init();
uv_setup_proc_files(0);
diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c
index 22ab13966427..c06bfc086565 100644
--- a/arch/x86/kernel/callthunks.c
+++ b/arch/x86/kernel/callthunks.c
@@ -133,8 +133,8 @@ static bool skip_addr(void *dest)
/* Accounts directly */
if (dest == ret_from_fork)
return true;
-#ifdef CONFIG_HOTPLUG_CPU
- if (dest == start_cpu0)
+#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_AMD_MEM_ENCRYPT)
+ if (dest == soft_restart_cpu)
return true;
#endif
#ifdef CONFIG_FUNCTION_TRACER
@@ -293,7 +293,8 @@ void *callthunks_translate_call_dest(void *dest)
return target ? : dest;
}
-bool is_callthunk(void *addr)
+#ifdef CONFIG_BPF_JIT
+static bool is_callthunk(void *addr)
{
unsigned int tmpl_size = SKL_TMPL_SIZE;
void *tmpl = skl_call_thunk_template;
@@ -306,7 +307,6 @@ bool is_callthunk(void *addr)
return !bcmp((void *)(dest - tmpl_size), tmpl, tmpl_size);
}
-#ifdef CONFIG_BPF_JIT
int x86_call_depth_emit_accounting(u8 **pprog, void *func)
{
unsigned int tmpl_size = SKL_TMPL_SIZE;
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index d7e3ceaf75c1..4350f6bfc064 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -27,7 +27,7 @@ obj-y += cpuid-deps.o
obj-y += umwait.o
obj-$(CONFIG_PROC_FS) += proc.o
-obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
+obj-y += capflags.o powerflags.o
obj-$(CONFIG_IA32_FEAT_CTL) += feat_ctl.o
ifdef CONFIG_CPU_SUP_INTEL
@@ -54,7 +54,6 @@ obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o
obj-$(CONFIG_HYPERVISOR_GUEST) += vmware.o hypervisor.o mshyperv.o
obj-$(CONFIG_ACRN_GUEST) += acrn.o
-ifdef CONFIG_X86_FEATURE_NAMES
quiet_cmd_mkcapflags = MKCAP $@
cmd_mkcapflags = $(CONFIG_SHELL) $(srctree)/$(src)/mkcapflags.sh $@ $^
@@ -63,5 +62,4 @@ vmxfeature = $(src)/../../include/asm/vmxfeatures.h
$(obj)/capflags.c: $(cpufeature) $(vmxfeature) $(src)/mkcapflags.sh FORCE
$(call if_changed,mkcapflags)
-endif
targets += capflags.c
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 182af64387d0..9e2a91830f72 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -9,7 +9,6 @@
* - Andrew D. Balsa (code cleanup).
*/
#include <linux/init.h>
-#include <linux/utsname.h>
#include <linux/cpu.h>
#include <linux/module.h>
#include <linux/nospec.h>
@@ -27,8 +26,6 @@
#include <asm/msr.h>
#include <asm/vmx.h>
#include <asm/paravirt.h>
-#include <asm/alternative.h>
-#include <asm/set_memory.h>
#include <asm/intel-family.h>
#include <asm/e820/api.h>
#include <asm/hypervisor.h>
@@ -125,21 +122,8 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);
DEFINE_STATIC_KEY_FALSE(mmio_stale_data_clear);
EXPORT_SYMBOL_GPL(mmio_stale_data_clear);
-void __init check_bugs(void)
+void __init cpu_select_mitigations(void)
{
- identify_boot_cpu();
-
- /*
- * identify_boot_cpu() initialized SMT support information, let the
- * core code know.
- */
- cpu_smt_check_topology();
-
- if (!IS_ENABLED(CONFIG_SMP)) {
- pr_info("CPU: ");
- print_cpu_info(&boot_cpu_data);
- }
-
/*
* Read the SPEC_CTRL MSR to account for reserved bits which may
* have unknown values. AMD64_LS_CFG MSR is cached in the early AMD
@@ -176,39 +160,6 @@ void __init check_bugs(void)
md_clear_select_mitigation();
srbds_select_mitigation();
l1d_flush_select_mitigation();
-
- arch_smt_update();
-
-#ifdef CONFIG_X86_32
- /*
- * Check whether we are able to run this kernel safely on SMP.
- *
- * - i386 is no longer supported.
- * - In order to run on anything without a TSC, we need to be
- * compiled for a i486.
- */
- if (boot_cpu_data.x86 < 4)
- panic("Kernel requires i486+ for 'invlpg' and other features");
-
- init_utsname()->machine[1] =
- '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
- alternative_instructions();
-
- fpu__init_check_bugs();
-#else /* CONFIG_X86_64 */
- alternative_instructions();
-
- /*
- * Make sure the first 2MB area is not mapped by huge pages
- * There are typically fixed size MTRRs in there and overlapping
- * MTRRs into large pages causes slow downs.
- *
- * Right now we don't do that with gbpages because there seems
- * very little benefit for that case.
- */
- if (!direct_gbpages)
- set_memory_4k((unsigned long)__va(0), 1);
-#endif
}
/*
diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c
index 4063e8991211..8f86eacf69f7 100644
--- a/arch/x86/kernel/cpu/cacheinfo.c
+++ b/arch/x86/kernel/cpu/cacheinfo.c
@@ -39,6 +39,8 @@ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
/* Shared L2 cache maps */
DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map);
+static cpumask_var_t cpu_cacheinfo_mask;
+
/* Kernel controls MTRR and/or PAT MSRs. */
unsigned int memory_caching_control __ro_after_init;
@@ -1172,8 +1174,10 @@ void cache_bp_restore(void)
cache_cpu_init();
}
-static int cache_ap_init(unsigned int cpu)
+static int cache_ap_online(unsigned int cpu)
{
+ cpumask_set_cpu(cpu, cpu_cacheinfo_mask);
+
if (!memory_caching_control || get_cache_aps_delayed_init())
return 0;
@@ -1191,11 +1195,17 @@ static int cache_ap_init(unsigned int cpu)
* lock to prevent MTRR entry changes
*/
stop_machine_from_inactive_cpu(cache_rendezvous_handler, NULL,
- cpu_callout_mask);
+ cpu_cacheinfo_mask);
return 0;
}
+static int cache_ap_offline(unsigned int cpu)
+{
+ cpumask_clear_cpu(cpu, cpu_cacheinfo_mask);
+ return 0;
+}
+
/*
* Delayed cache initialization for all AP's
*/
@@ -1210,9 +1220,12 @@ void cache_aps_init(void)
static int __init cache_ap_register(void)
{
+ zalloc_cpumask_var(&cpu_cacheinfo_mask, GFP_KERNEL);
+ cpumask_set_cpu(smp_processor_id(), cpu_cacheinfo_mask);
+
cpuhp_setup_state_nocalls(CPUHP_AP_CACHECTRL_STARTING,
"x86/cachectrl:starting",
- cache_ap_init, NULL);
+ cache_ap_online, cache_ap_offline);
return 0;
}
-core_initcall(cache_ap_register);
+early_initcall(cache_ap_register);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 80710a68ef7d..52683fddafaf 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -18,12 +18,16 @@
#include <linux/init.h>
#include <linux/kprobes.h>
#include <linux/kgdb.h>
+#include <linux/mem_encrypt.h>
#include <linux/smp.h>
+#include <linux/cpu.h>
#include <linux/io.h>
#include <linux/syscore_ops.h>
#include <linux/pgtable.h>
#include <linux/stackprotector.h>
+#include <linux/utsname.h>
+#include <asm/alternative.h>
#include <asm/cmdline.h>
#include <asm/perf_event.h>
#include <asm/mmu_context.h>
@@ -59,7 +63,7 @@
#include <asm/intel-family.h>
#include <asm/cpu_device_id.h>
#include <asm/uv/uv.h>
-#include <asm/sigframe.h>
+#include <asm/set_memory.h>
#include <asm/traps.h>
#include <asm/sev.h>
@@ -67,14 +71,6 @@
u32 elf_hwcap2 __read_mostly;
-/* all of these masks are initialized in setup_cpu_local_masks() */
-cpumask_var_t cpu_initialized_mask;
-cpumask_var_t cpu_callout_mask;
-cpumask_var_t cpu_callin_mask;
-
-/* representing cpus for which sibling maps can be computed */
-cpumask_var_t cpu_sibling_setup_mask;
-
/* Number of siblings per CPU package */
int smp_num_siblings = 1;
EXPORT_SYMBOL(smp_num_siblings);
@@ -169,15 +165,6 @@ clear_ppin:
clear_cpu_cap(c, info->feature);
}
-/* correctly size the local cpu masks */
-void __init setup_cpu_local_masks(void)
-{
- alloc_bootmem_cpumask_var(&cpu_initialized_mask);
- alloc_bootmem_cpumask_var(&cpu_callin_mask);
- alloc_bootmem_cpumask_var(&cpu_callout_mask);
- alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
-}
-
static void default_init(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_X86_64
@@ -1502,12 +1489,10 @@ static void __init cpu_parse_early_param(void)
if (!kstrtouint(opt, 10, &bit)) {
if (bit < NCAPINTS * 32) {
-#ifdef CONFIG_X86_FEATURE_NAMES
/* empty-string, i.e., ""-defined feature flags */
if (!x86_cap_flags[bit])
pr_cont(" " X86_CAP_FMT_NUM, x86_cap_flag_num(bit));
else
-#endif
pr_cont(" " X86_CAP_FMT, x86_cap_flag(bit));
setup_clear_cpu_cap(bit);
@@ -1520,7 +1505,6 @@ static void __init cpu_parse_early_param(void)
continue;
}
-#ifdef CONFIG_X86_FEATURE_NAMES
for (bit = 0; bit < 32 * NCAPINTS; bit++) {
if (!x86_cap_flag(bit))
continue;
@@ -1537,7 +1521,6 @@ static void __init cpu_parse_early_param(void)
if (!found)
pr_cont(" (unknown: %s)", opt);
-#endif
}
pr_cont("\n");
@@ -1600,10 +1583,6 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
sld_setup(c);
- fpu__init_system(c);
-
- init_sigframe_size();
-
#ifdef CONFIG_X86_32
/*
* Regardless of whether PCID is enumerated, the SDM says
@@ -2123,19 +2102,6 @@ static void dbg_restore_debug_regs(void)
#define dbg_restore_debug_regs()
#endif /* ! CONFIG_KGDB */
-static void wait_for_master_cpu(int cpu)
-{
-#ifdef CONFIG_SMP
- /*
- * wait for ACK from master CPU before continuing
- * with AP initialization
- */
- WARN_ON(cpumask_test_and_set_cpu(cpu, cpu_initialized_mask));
- while (!cpumask_test_cpu(cpu, cpu_callout_mask))
- cpu_relax();
-#endif
-}
-
static inline void setup_getcpu(int cpu)
{
unsigned long cpudata = vdso_encode_cpunode(cpu, early_cpu_to_node(cpu));
@@ -2158,11 +2124,7 @@ static inline void setup_getcpu(int cpu)
}
#ifdef CONFIG_X86_64
-static inline void ucode_cpu_init(int cpu)
-{
- if (cpu)
- load_ucode_ap();
-}
+static inline void ucode_cpu_init(int cpu) { }
static inline void tss_setup_ist(struct tss_struct *tss)
{
@@ -2239,8 +2201,6 @@ void cpu_init(void)
struct task_struct *cur = current;
int cpu = raw_smp_processor_id();
- wait_for_master_cpu(cpu);
-
ucode_cpu_init(cpu);
#ifdef CONFIG_NUMA
@@ -2285,26 +2245,12 @@ void cpu_init(void)
doublefault_init_cpu_tss();
- fpu__init_cpu();
-
if (is_uv_system())
uv_cpu_init();
load_fixmap_gdt(cpu);
}
-#ifdef CONFIG_SMP
-void cpu_init_secondary(void)
-{
- /*
- * Relies on the BP having set-up the IDT tables, which are loaded
- * on this CPU in cpu_init_exception_handling().
- */
- cpu_init_exception_handling();
- cpu_init();
-}
-#endif
-
#ifdef CONFIG_MICROCODE_LATE_LOADING
/**
* store_cpu_caps() - Store a snapshot of CPU capabilities
@@ -2362,3 +2308,69 @@ void arch_smt_update(void)
/* Check whether IPI broadcasting can be enabled */
apic_smt_update();
}
+
+void __init arch_cpu_finalize_init(void)
+{
+ identify_boot_cpu();
+
+ /*
+ * identify_boot_cpu() initialized SMT support information, let the
+ * core code know.
+ */
+ cpu_smt_check_topology();
+
+ if (!IS_ENABLED(CONFIG_SMP)) {
+ pr_info("CPU: ");
+ print_cpu_info(&boot_cpu_data);
+ }
+
+ cpu_select_mitigations();
+
+ arch_smt_update();
+
+ if (IS_ENABLED(CONFIG_X86_32)) {
+ /*
+ * Check whether this is a real i386 which is not longer
+ * supported and fixup the utsname.
+ */
+ if (boot_cpu_data.x86 < 4)
+ panic("Kernel requires i486+ for 'invlpg' and other features");
+
+ init_utsname()->machine[1] =
+ '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
+ }
+
+ /*
+ * Must be before alternatives because it might set or clear
+ * feature bits.
+ */
+ fpu__init_system();
+ fpu__init_cpu();
+
+ alternative_instructions();
+
+ if (IS_ENABLED(CONFIG_X86_64)) {
+ /*
+ * Make sure the first 2MB area is not mapped by huge pages
+ * There are typically fixed size MTRRs in there and overlapping
+ * MTRRs into large pages causes slow downs.
+ *
+ * Right now we don't do that with gbpages because there seems
+ * very little benefit for that case.
+ */
+ if (!direct_gbpages)
+ set_memory_4k((unsigned long)__va(0), 1);
+ } else {
+ fpu__init_check_bugs();
+ }
+
+ /*
+ * This needs to be called before any devices perform DMA
+ * operations that might use the SWIOTLB bounce buffers. It will
+ * mark the bounce buffers as decrypted so that their usage will
+ * not cause "plain-text" data to be decrypted when accessed. It
+ * must be called after late_time_init() so that Hyper-V x86/x64
+ * hypercalls work when the SWIOTLB bounce buffers are decrypted.
+ */
+ mem_encrypt_init();
+}
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index f97b0fe13da8..1c44630d4789 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -79,6 +79,7 @@ extern void detect_ht(struct cpuinfo_x86 *c);
extern void check_null_seg_clears_base(struct cpuinfo_x86 *c);
unsigned int aperfmperf_get_khz(int cpu);
+void cpu_select_mitigations(void);
extern void x86_spec_ctrl_setup_ap(void);
extern void update_srbds_msr(void);
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 0b971f974096..5e74610b39e7 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -715,11 +715,13 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
bool amd_mce_is_memory_error(struct mce *m)
{
+ enum smca_bank_types bank_type;
/* ErrCodeExt[20:16] */
u8 xec = (m->status >> 16) & 0x1f;
+ bank_type = smca_get_bank_type(m->extcpu, m->bank);
if (mce_flags.smca)
- return smca_get_bank_type(m->extcpu, m->bank) == SMCA_UMC && xec == 0x0;
+ return (bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2) && xec == 0x0;
return m->bank == 4 && xec == 0x8;
}
@@ -1050,7 +1052,7 @@ static const char *get_name(unsigned int cpu, unsigned int bank, struct threshol
if (bank_type >= N_SMCA_BANK_TYPES)
return NULL;
- if (b && bank_type == SMCA_UMC) {
+ if (b && (bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2)) {
if (b->block < ARRAY_SIZE(smca_umc_block_names))
return smca_umc_block_names[b->block];
return NULL;
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 2eec60f50057..89e2aab5d34d 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1022,12 +1022,12 @@ static noinstr int mce_start(int *no_way_out)
if (!timeout)
return ret;
- arch_atomic_add(*no_way_out, &global_nwo);
+ raw_atomic_add(*no_way_out, &global_nwo);
/*
* Rely on the implied barrier below, such that global_nwo
* is updated before mce_callin.
*/
- order = arch_atomic_inc_return(&mce_callin);
+ order = raw_atomic_inc_return(&mce_callin);
arch_cpumask_clear_cpu(smp_processor_id(), &mce_missing_cpus);
/* Enable instrumentation around calls to external facilities */
@@ -1036,10 +1036,10 @@ static noinstr int mce_start(int *no_way_out)
/*
* Wait for everyone.
*/
- while (arch_atomic_read(&mce_callin) != num_online_cpus()) {
+ while (raw_atomic_read(&mce_callin) != num_online_cpus()) {
if (mce_timed_out(&timeout,
"Timeout: Not all CPUs entered broadcast exception handler")) {
- arch_atomic_set(&global_nwo, 0);
+ raw_atomic_set(&global_nwo, 0);
goto out;
}
ndelay(SPINUNIT);
@@ -1054,7 +1054,7 @@ static noinstr int mce_start(int *no_way_out)
/*
* Monarch: Starts executing now, the others wait.
*/
- arch_atomic_set(&mce_executing, 1);
+ raw_atomic_set(&mce_executing, 1);
} else {
/*
* Subject: Now start the scanning loop one by one in
@@ -1062,10 +1062,10 @@ static noinstr int mce_start(int *no_way_out)
* This way when there are any shared banks it will be
* only seen by one CPU before cleared, avoiding duplicates.
*/
- while (arch_atomic_read(&mce_executing) < order) {
+ while (raw_atomic_read(&mce_executing) < order) {
if (mce_timed_out(&timeout,
"Timeout: Subject CPUs unable to finish machine check processing")) {
- arch_atomic_set(&global_nwo, 0);
+ raw_atomic_set(&global_nwo, 0);
goto out;
}
ndelay(SPINUNIT);
@@ -1075,7 +1075,7 @@ static noinstr int mce_start(int *no_way_out)
/*
* Cache the global no_way_out state.
*/
- *no_way_out = arch_atomic_read(&global_nwo);
+ *no_way_out = raw_atomic_read(&global_nwo);
ret = order;
@@ -1533,7 +1533,7 @@ noinstr void do_machine_check(struct pt_regs *regs)
/* If this triggers there is no way to recover. Die hard. */
BUG_ON(!on_thread_stack() || !user_mode(regs));
- if (kill_current_task)
+ if (!mce_usable_address(&m))
queue_task_work(&m, msg, kill_me_now);
else
queue_task_work(&m, msg, kill_me_maybe);
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index f5fdeb1e3606..87208e46f7ed 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -78,8 +78,6 @@ static u16 find_equiv_id(struct equiv_cpu_table *et, u32 sig)
if (sig == e->installed_cpu)
return e->equiv_cpu;
-
- e++;
}
return 0;
}
@@ -596,11 +594,6 @@ void reload_ucode_amd(unsigned int cpu)
}
}
}
-static u16 __find_equiv_id(unsigned int cpu)
-{
- struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
- return find_equiv_id(&equiv_table, uci->cpu_sig.sig);
-}
/*
* a small, trivial cache of per-family ucode patches
@@ -651,9 +644,11 @@ static void free_cache(void)
static struct ucode_patch *find_patch(unsigned int cpu)
{
+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
u16 equiv_id;
- equiv_id = __find_equiv_id(cpu);
+
+ equiv_id = find_equiv_id(&equiv_table, uci->cpu_sig.sig);
if (!equiv_id)
return NULL;
@@ -705,7 +700,7 @@ static enum ucode_state apply_microcode_amd(int cpu)
rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
/* need to apply patch? */
- if (rev >= mc_amd->hdr.patch_id) {
+ if (rev > mc_amd->hdr.patch_id) {
ret = UCODE_OK;
goto out;
}
diff --git a/arch/x86/kernel/cpu/mtrr/Makefile b/arch/x86/kernel/cpu/mtrr/Makefile
index cc4f9f1cb94c..aee4bc5ad496 100644
--- a/arch/x86/kernel/cpu/mtrr/Makefile
+++ b/arch/x86/kernel/cpu/mtrr/Makefile
@@ -1,4 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-y := mtrr.o if.o generic.o cleanup.o
-obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o
+obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o legacy.o
diff --git a/arch/x86/kernel/cpu/mtrr/amd.c b/arch/x86/kernel/cpu/mtrr/amd.c
index eff6ac62c0ff..ef3e8e42b782 100644
--- a/arch/x86/kernel/cpu/mtrr/amd.c
+++ b/arch/x86/kernel/cpu/mtrr/amd.c
@@ -110,7 +110,7 @@ amd_validate_add_page(unsigned long base, unsigned long size, unsigned int type)
}
const struct mtrr_ops amd_mtrr_ops = {
- .vendor = X86_VENDOR_AMD,
+ .var_regs = 2,
.set = amd_set_mtrr,
.get = amd_get_mtrr,
.get_free_region = generic_get_free_region,
diff --git a/arch/x86/kernel/cpu/mtrr/centaur.c b/arch/x86/kernel/cpu/mtrr/centaur.c
index b8a74eddde83..6f6c3ae92943 100644
--- a/arch/x86/kernel/cpu/mtrr/centaur.c
+++ b/arch/x86/kernel/cpu/mtrr/centaur.c
@@ -45,15 +45,6 @@ centaur_get_free_region(unsigned long base, unsigned long size, int replace_reg)
return -ENOSPC;
}
-/*
- * Report boot time MCR setups
- */
-void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
-{
- centaur_mcr[mcr].low = lo;
- centaur_mcr[mcr].high = hi;
-}
-
static void
centaur_get_mcr(unsigned int reg, unsigned long *base,
unsigned long *size, mtrr_type * type)
@@ -112,7 +103,7 @@ centaur_validate_add_page(unsigned long base, unsigned long size, unsigned int t
}
const struct mtrr_ops centaur_mtrr_ops = {
- .vendor = X86_VENDOR_CENTAUR,
+ .var_regs = 8,
.set = centaur_set_mcr,
.get = centaur_get_mcr,
.get_free_region = centaur_get_free_region,
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index b5f43049fa5f..18cf79d6e2c5 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -55,9 +55,6 @@ static int __initdata nr_range;
static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
-static int __initdata debug_print;
-#define Dprintk(x...) do { if (debug_print) pr_debug(x); } while (0)
-
#define BIOS_BUG_MSG \
"WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n"
@@ -79,12 +76,11 @@ x86_get_mtrr_mem_range(struct range *range, int nr_range,
nr_range = add_range_with_merge(range, RANGE_NUM, nr_range,
base, base + size);
}
- if (debug_print) {
- pr_debug("After WB checking\n");
- for (i = 0; i < nr_range; i++)
- pr_debug("MTRR MAP PFN: %016llx - %016llx\n",
- range[i].start, range[i].end);
- }
+
+ Dprintk("After WB checking\n");
+ for (i = 0; i < nr_range; i++)
+ Dprintk("MTRR MAP PFN: %016llx - %016llx\n",
+ range[i].start, range[i].end);
/* Take out UC ranges: */
for (i = 0; i < num_var_ranges; i++) {
@@ -112,24 +108,22 @@ x86_get_mtrr_mem_range(struct range *range, int nr_range,
subtract_range(range, RANGE_NUM, extra_remove_base,
extra_remove_base + extra_remove_size);
- if (debug_print) {
- pr_debug("After UC checking\n");
- for (i = 0; i < RANGE_NUM; i++) {
- if (!range[i].end)
- continue;
- pr_debug("MTRR MAP PFN: %016llx - %016llx\n",
- range[i].start, range[i].end);
- }
+ Dprintk("After UC checking\n");
+ for (i = 0; i < RANGE_NUM; i++) {
+ if (!range[i].end)
+ continue;
+
+ Dprintk("MTRR MAP PFN: %016llx - %016llx\n",
+ range[i].start, range[i].end);
}
/* sort the ranges */
nr_range = clean_sort_range(range, RANGE_NUM);
- if (debug_print) {
- pr_debug("After sorting\n");
- for (i = 0; i < nr_range; i++)
- pr_debug("MTRR MAP PFN: %016llx - %016llx\n",
- range[i].start, range[i].end);
- }
+
+ Dprintk("After sorting\n");
+ for (i = 0; i < nr_range; i++)
+ Dprintk("MTRR MAP PFN: %016llx - %016llx\n",
+ range[i].start, range[i].end);
return nr_range;
}
@@ -164,16 +158,9 @@ static int __init enable_mtrr_cleanup_setup(char *str)
}
early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup);
-static int __init mtrr_cleanup_debug_setup(char *str)
-{
- debug_print = 1;
- return 0;
-}
-early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup);
-
static void __init
set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
- unsigned char type, unsigned int address_bits)
+ unsigned char type)
{
u32 base_lo, base_hi, mask_lo, mask_hi;
u64 base, mask;
@@ -183,7 +170,7 @@ set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
return;
}
- mask = (1ULL << address_bits) - 1;
+ mask = (1ULL << boot_cpu_data.x86_phys_bits) - 1;
mask &= ~((((u64)sizek) << 10) - 1);
base = ((u64)basek) << 10;
@@ -209,7 +196,7 @@ save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
range_state[reg].type = type;
}
-static void __init set_var_mtrr_all(unsigned int address_bits)
+static void __init set_var_mtrr_all(void)
{
unsigned long basek, sizek;
unsigned char type;
@@ -220,7 +207,7 @@ static void __init set_var_mtrr_all(unsigned int address_bits)
sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10);
type = range_state[reg].type;
- set_var_mtrr(reg, basek, sizek, type, address_bits);
+ set_var_mtrr(reg, basek, sizek, type);
}
}
@@ -267,7 +254,7 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk,
align = max_align;
sizek = 1UL << align;
- if (debug_print) {
+ if (mtrr_debug) {
char start_factor = 'K', size_factor = 'K';
unsigned long start_base, size_base;
@@ -542,7 +529,7 @@ static void __init print_out_mtrr_range_state(void)
start_base = to_size_factor(start_base, &start_factor);
type = range_state[i].type;
- pr_debug("reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
+ Dprintk("reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
i, start_base, start_factor,
size_base, size_factor,
(type == MTRR_TYPE_UNCACHABLE) ? "UC" :
@@ -680,7 +667,7 @@ static int __init mtrr_search_optimal_index(void)
return index_good;
}
-int __init mtrr_cleanup(unsigned address_bits)
+int __init mtrr_cleanup(void)
{
unsigned long x_remove_base, x_remove_size;
unsigned long base, size, def, dummy;
@@ -689,7 +676,10 @@ int __init mtrr_cleanup(unsigned address_bits)
int index_good;
int i;
- if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
+ if (!mtrr_enabled())
+ return 0;
+
+ if (!cpu_feature_enabled(X86_FEATURE_MTRR) || enable_mtrr_cleanup < 1)
return 0;
rdmsr(MSR_MTRRdefType, def, dummy);
@@ -711,7 +701,7 @@ int __init mtrr_cleanup(unsigned address_bits)
return 0;
/* Print original var MTRRs at first, for debugging: */
- pr_debug("original variable MTRRs\n");
+ Dprintk("original variable MTRRs\n");
print_out_mtrr_range_state();
memset(range, 0, sizeof(range));
@@ -742,8 +732,8 @@ int __init mtrr_cleanup(unsigned address_bits)
mtrr_print_out_one_result(i);
if (!result[i].bad) {
- set_var_mtrr_all(address_bits);
- pr_debug("New variable MTRRs\n");
+ set_var_mtrr_all();
+ Dprintk("New variable MTRRs\n");
print_out_mtrr_range_state();
return 1;
}
@@ -763,7 +753,7 @@ int __init mtrr_cleanup(unsigned address_bits)
mtrr_calc_range_state(chunk_size, gran_size,
x_remove_base, x_remove_size, i);
- if (debug_print) {
+ if (mtrr_debug) {
mtrr_print_out_one_result(i);
pr_info("\n");
}
@@ -786,8 +776,8 @@ int __init mtrr_cleanup(unsigned address_bits)
gran_size = result[i].gran_sizek;
gran_size <<= 10;
x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
- set_var_mtrr_all(address_bits);
- pr_debug("New variable MTRRs\n");
+ set_var_mtrr_all();
+ Dprintk("New variable MTRRs\n");
print_out_mtrr_range_state();
return 1;
} else {
@@ -802,7 +792,7 @@ int __init mtrr_cleanup(unsigned address_bits)
return 0;
}
#else
-int __init mtrr_cleanup(unsigned address_bits)
+int __init mtrr_cleanup(void)
{
return 0;
}
@@ -882,15 +872,18 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
/* extra one for all 0 */
int num[MTRR_NUM_TYPES + 1];
+ if (!mtrr_enabled())
+ return 0;
+
/*
* Make sure we only trim uncachable memory on machines that
* support the Intel MTRR architecture:
*/
- if (!is_cpu(INTEL) || disable_mtrr_trim)
+ if (!cpu_feature_enabled(X86_FEATURE_MTRR) || disable_mtrr_trim)
return 0;
rdmsr(MSR_MTRRdefType, def, dummy);
- def &= 0xff;
+ def &= MTRR_DEF_TYPE_TYPE;
if (def != MTRR_TYPE_UNCACHABLE)
return 0;
diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c
index 173b9e01e623..238dad57d4d6 100644
--- a/arch/x86/kernel/cpu/mtrr/cyrix.c
+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c
@@ -235,7 +235,7 @@ static void cyrix_set_arr(unsigned int reg, unsigned long base,
}
const struct mtrr_ops cyrix_mtrr_ops = {
- .vendor = X86_VENDOR_CYRIX,
+ .var_regs = 8,
.set = cyrix_set_arr,
.get = cyrix_get_arr,
.get_free_region = cyrix_get_free_region,
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index ee09d359e08f..2d6aa5d2e3d7 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -8,10 +8,12 @@
#include <linux/init.h>
#include <linux/io.h>
#include <linux/mm.h>
-
+#include <linux/cc_platform.h>
#include <asm/processor-flags.h>
#include <asm/cacheinfo.h>
#include <asm/cpufeature.h>
+#include <asm/hypervisor.h>
+#include <asm/mshyperv.h>
#include <asm/tlbflush.h>
#include <asm/mtrr.h>
#include <asm/msr.h>
@@ -31,6 +33,55 @@ static struct fixed_range_block fixed_range_blocks[] = {
{}
};
+struct cache_map {
+ u64 start;
+ u64 end;
+ u64 flags;
+ u64 type:8;
+ u64 fixed:1;
+};
+
+bool mtrr_debug;
+
+static int __init mtrr_param_setup(char *str)
+{
+ int rc = 0;
+
+ if (!str)
+ return -EINVAL;
+ if (!strcmp(str, "debug"))
+ mtrr_debug = true;
+ else
+ rc = -EINVAL;
+
+ return rc;
+}
+early_param("mtrr", mtrr_param_setup);
+
+/*
+ * CACHE_MAP_MAX is the maximum number of memory ranges in cache_map, where
+ * no 2 adjacent ranges have the same cache mode (those would be merged).
+ * The number is based on the worst case:
+ * - no two adjacent fixed MTRRs share the same cache mode
+ * - one variable MTRR is spanning a huge area with mode WB
+ * - 255 variable MTRRs with mode UC all overlap with the WB MTRR, creating 2
+ * additional ranges each (result like "ababababa...aba" with a = WB, b = UC),
+ * accounting for MTRR_MAX_VAR_RANGES * 2 - 1 range entries
+ * - a TOP_MEM2 area (even with overlapping an UC MTRR can't add 2 range entries
+ * to the possible maximum, as it always starts at 4GB, thus it can't be in
+ * the middle of that MTRR, unless that MTRR starts at 0, which would remove
+ * the initial "a" from the "abababa" pattern above)
+ * The map won't contain ranges with no matching MTRR (those fall back to the
+ * default cache mode).
+ */
+#define CACHE_MAP_MAX (MTRR_NUM_FIXED_RANGES + MTRR_MAX_VAR_RANGES * 2)
+
+static struct cache_map init_cache_map[CACHE_MAP_MAX] __initdata;
+static struct cache_map *cache_map __refdata = init_cache_map;
+static unsigned int cache_map_size = CACHE_MAP_MAX;
+static unsigned int cache_map_n;
+static unsigned int cache_map_fixed;
+
static unsigned long smp_changes_mask;
static int mtrr_state_set;
u64 mtrr_tom2;
@@ -38,6 +89,9 @@ u64 mtrr_tom2;
struct mtrr_state_type mtrr_state;
EXPORT_SYMBOL_GPL(mtrr_state);
+/* Reserved bits in the high portion of the MTRRphysBaseN MSR. */
+u32 phys_hi_rsvd;
+
/*
* BIOS is expected to clear MtrrFixDramModEn bit, see for example
* "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD
@@ -69,175 +123,370 @@ static u64 get_mtrr_size(u64 mask)
{
u64 size;
- mask >>= PAGE_SHIFT;
- mask |= size_or_mask;
+ mask |= (u64)phys_hi_rsvd << 32;
size = -mask;
- size <<= PAGE_SHIFT;
+
return size;
}
+static u8 get_var_mtrr_state(unsigned int reg, u64 *start, u64 *size)
+{
+ struct mtrr_var_range *mtrr = mtrr_state.var_ranges + reg;
+
+ if (!(mtrr->mask_lo & MTRR_PHYSMASK_V))
+ return MTRR_TYPE_INVALID;
+
+ *start = (((u64)mtrr->base_hi) << 32) + (mtrr->base_lo & PAGE_MASK);
+ *size = get_mtrr_size((((u64)mtrr->mask_hi) << 32) +
+ (mtrr->mask_lo & PAGE_MASK));
+
+ return mtrr->base_lo & MTRR_PHYSBASE_TYPE;
+}
+
+static u8 get_effective_type(u8 type1, u8 type2)
+{
+ if (type1 == MTRR_TYPE_UNCACHABLE || type2 == MTRR_TYPE_UNCACHABLE)
+ return MTRR_TYPE_UNCACHABLE;
+
+ if ((type1 == MTRR_TYPE_WRBACK && type2 == MTRR_TYPE_WRTHROUGH) ||
+ (type1 == MTRR_TYPE_WRTHROUGH && type2 == MTRR_TYPE_WRBACK))
+ return MTRR_TYPE_WRTHROUGH;
+
+ if (type1 != type2)
+ return MTRR_TYPE_UNCACHABLE;
+
+ return type1;
+}
+
+static void rm_map_entry_at(int idx)
+{
+ cache_map_n--;
+ if (cache_map_n > idx) {
+ memmove(cache_map + idx, cache_map + idx + 1,
+ sizeof(*cache_map) * (cache_map_n - idx));
+ }
+}
+
/*
- * Check and return the effective type for MTRR-MTRR type overlap.
- * Returns 1 if the effective type is UNCACHEABLE, else returns 0
+ * Add an entry into cache_map at a specific index. Merges adjacent entries if
+ * appropriate. Return the number of merges for correcting the scan index
+ * (this is needed as merging will reduce the number of entries, which will
+ * result in skipping entries in future iterations if the scan index isn't
+ * corrected).
+ * Note that the corrected index can never go below -1 (resulting in being 0 in
+ * the next scan iteration), as "2" is returned only if the current index is
+ * larger than zero.
*/
-static int check_type_overlap(u8 *prev, u8 *curr)
+static int add_map_entry_at(u64 start, u64 end, u8 type, int idx)
{
- if (*prev == MTRR_TYPE_UNCACHABLE || *curr == MTRR_TYPE_UNCACHABLE) {
- *prev = MTRR_TYPE_UNCACHABLE;
- *curr = MTRR_TYPE_UNCACHABLE;
- return 1;
+ bool merge_prev = false, merge_next = false;
+
+ if (start >= end)
+ return 0;
+
+ if (idx > 0) {
+ struct cache_map *prev = cache_map + idx - 1;
+
+ if (!prev->fixed && start == prev->end && type == prev->type)
+ merge_prev = true;
}
- if ((*prev == MTRR_TYPE_WRBACK && *curr == MTRR_TYPE_WRTHROUGH) ||
- (*prev == MTRR_TYPE_WRTHROUGH && *curr == MTRR_TYPE_WRBACK)) {
- *prev = MTRR_TYPE_WRTHROUGH;
- *curr = MTRR_TYPE_WRTHROUGH;
+ if (idx < cache_map_n) {
+ struct cache_map *next = cache_map + idx;
+
+ if (!next->fixed && end == next->start && type == next->type)
+ merge_next = true;
}
- if (*prev != *curr) {
- *prev = MTRR_TYPE_UNCACHABLE;
- *curr = MTRR_TYPE_UNCACHABLE;
+ if (merge_prev && merge_next) {
+ cache_map[idx - 1].end = cache_map[idx].end;
+ rm_map_entry_at(idx);
+ return 2;
+ }
+ if (merge_prev) {
+ cache_map[idx - 1].end = end;
return 1;
}
+ if (merge_next) {
+ cache_map[idx].start = start;
+ return 1;
+ }
+
+ /* Sanity check: the array should NEVER be too small! */
+ if (cache_map_n == cache_map_size) {
+ WARN(1, "MTRR cache mode memory map exhausted!\n");
+ cache_map_n = cache_map_fixed;
+ return 0;
+ }
+
+ if (cache_map_n > idx) {
+ memmove(cache_map + idx + 1, cache_map + idx,
+ sizeof(*cache_map) * (cache_map_n - idx));
+ }
+
+ cache_map[idx].start = start;
+ cache_map[idx].end = end;
+ cache_map[idx].type = type;
+ cache_map[idx].fixed = 0;
+ cache_map_n++;
return 0;
}
-/**
- * mtrr_type_lookup_fixed - look up memory type in MTRR fixed entries
- *
- * Return the MTRR fixed memory type of 'start'.
- *
- * MTRR fixed entries are divided into the following ways:
- * 0x00000 - 0x7FFFF : This range is divided into eight 64KB sub-ranges
- * 0x80000 - 0xBFFFF : This range is divided into sixteen 16KB sub-ranges
- * 0xC0000 - 0xFFFFF : This range is divided into sixty-four 4KB sub-ranges
- *
- * Return Values:
- * MTRR_TYPE_(type) - Matched memory type
- * MTRR_TYPE_INVALID - Unmatched
+/* Clear a part of an entry. Return 1 if start of entry is still valid. */
+static int clr_map_range_at(u64 start, u64 end, int idx)
+{
+ int ret = start != cache_map[idx].start;
+ u64 tmp;
+
+ if (start == cache_map[idx].start && end == cache_map[idx].end) {
+ rm_map_entry_at(idx);
+ } else if (start == cache_map[idx].start) {
+ cache_map[idx].start = end;
+ } else if (end == cache_map[idx].end) {
+ cache_map[idx].end = start;
+ } else {
+ tmp = cache_map[idx].end;
+ cache_map[idx].end = start;
+ add_map_entry_at(end, tmp, cache_map[idx].type, idx + 1);
+ }
+
+ return ret;
+}
+
+/*
+ * Add MTRR to the map. The current map is scanned and each part of the MTRR
+ * either overlapping with an existing entry or with a hole in the map is
+ * handled separately.
*/
-static u8 mtrr_type_lookup_fixed(u64 start, u64 end)
+static void add_map_entry(u64 start, u64 end, u8 type)
{
- int idx;
+ u8 new_type, old_type;
+ u64 tmp;
+ int i;
- if (start >= 0x100000)
- return MTRR_TYPE_INVALID;
+ for (i = 0; i < cache_map_n && start < end; i++) {
+ if (start >= cache_map[i].end)
+ continue;
+
+ if (start < cache_map[i].start) {
+ /* Region start has no overlap. */
+ tmp = min(end, cache_map[i].start);
+ i -= add_map_entry_at(start, tmp, type, i);
+ start = tmp;
+ continue;
+ }
- /* 0x0 - 0x7FFFF */
- if (start < 0x80000) {
- idx = 0;
- idx += (start >> 16);
- return mtrr_state.fixed_ranges[idx];
- /* 0x80000 - 0xBFFFF */
- } else if (start < 0xC0000) {
- idx = 1 * 8;
- idx += ((start - 0x80000) >> 14);
- return mtrr_state.fixed_ranges[idx];
+ new_type = get_effective_type(type, cache_map[i].type);
+ old_type = cache_map[i].type;
+
+ if (cache_map[i].fixed || new_type == old_type) {
+ /* Cut off start of new entry. */
+ start = cache_map[i].end;
+ continue;
+ }
+
+ /* Handle only overlapping part of region. */
+ tmp = min(end, cache_map[i].end);
+ i += clr_map_range_at(start, tmp, i);
+ i -= add_map_entry_at(start, tmp, new_type, i);
+ start = tmp;
}
- /* 0xC0000 - 0xFFFFF */
- idx = 3 * 8;
- idx += ((start - 0xC0000) >> 12);
- return mtrr_state.fixed_ranges[idx];
+ /* Add rest of region after last map entry (rest might be empty). */
+ add_map_entry_at(start, end, type, i);
}
-/**
- * mtrr_type_lookup_variable - look up memory type in MTRR variable entries
- *
- * Return Value:
- * MTRR_TYPE_(type) - Matched memory type or default memory type (unmatched)
- *
- * Output Arguments:
- * repeat - Set to 1 when [start:end] spanned across MTRR range and type
- * returned corresponds only to [start:*partial_end]. Caller has
- * to lookup again for [*partial_end:end].
- *
- * uniform - Set to 1 when an MTRR covers the region uniformly, i.e. the
- * region is fully covered by a single MTRR entry or the default
- * type.
+/* Add variable MTRRs to cache map. */
+static void map_add_var(void)
+{
+ u64 start, size;
+ unsigned int i;
+ u8 type;
+
+ /*
+ * Add AMD TOP_MEM2 area. Can't be added in mtrr_build_map(), as it
+ * needs to be added again when rebuilding the map due to potentially
+ * having moved as a result of variable MTRRs for memory below 4GB.
+ */
+ if (mtrr_tom2) {
+ add_map_entry(BIT_ULL(32), mtrr_tom2, MTRR_TYPE_WRBACK);
+ cache_map[cache_map_n - 1].fixed = 1;
+ }
+
+ for (i = 0; i < num_var_ranges; i++) {
+ type = get_var_mtrr_state(i, &start, &size);
+ if (type != MTRR_TYPE_INVALID)
+ add_map_entry(start, start + size, type);
+ }
+}
+
+/*
+ * Rebuild map by replacing variable entries. Needs to be called when MTRR
+ * registers are being changed after boot, as such changes could include
+ * removals of registers, which are complicated to handle without rebuild of
+ * the map.
*/
-static u8 mtrr_type_lookup_variable(u64 start, u64 end, u64 *partial_end,
- int *repeat, u8 *uniform)
+void generic_rebuild_map(void)
{
- int i;
- u64 base, mask;
- u8 prev_match, curr_match;
+ if (mtrr_if != &generic_mtrr_ops)
+ return;
- *repeat = 0;
- *uniform = 1;
+ cache_map_n = cache_map_fixed;
- prev_match = MTRR_TYPE_INVALID;
- for (i = 0; i < num_var_ranges; ++i) {
- unsigned short start_state, end_state, inclusive;
+ map_add_var();
+}
- if (!(mtrr_state.var_ranges[i].mask_lo & (1 << 11)))
- continue;
+static unsigned int __init get_cache_map_size(void)
+{
+ return cache_map_fixed + 2 * num_var_ranges + (mtrr_tom2 != 0);
+}
- base = (((u64)mtrr_state.var_ranges[i].base_hi) << 32) +
- (mtrr_state.var_ranges[i].base_lo & PAGE_MASK);
- mask = (((u64)mtrr_state.var_ranges[i].mask_hi) << 32) +
- (mtrr_state.var_ranges[i].mask_lo & PAGE_MASK);
-
- start_state = ((start & mask) == (base & mask));
- end_state = ((end & mask) == (base & mask));
- inclusive = ((start < base) && (end > base));
-
- if ((start_state != end_state) || inclusive) {
- /*
- * We have start:end spanning across an MTRR.
- * We split the region into either
- *
- * - start_state:1
- * (start:mtrr_end)(mtrr_end:end)
- * - end_state:1
- * (start:mtrr_start)(mtrr_start:end)
- * - inclusive:1
- * (start:mtrr_start)(mtrr_start:mtrr_end)(mtrr_end:end)
- *
- * depending on kind of overlap.
- *
- * Return the type of the first region and a pointer
- * to the start of next region so that caller will be
- * advised to lookup again after having adjusted start
- * and end.
- *
- * Note: This way we handle overlaps with multiple
- * entries and the default type properly.
- */
- if (start_state)
- *partial_end = base + get_mtrr_size(mask);
- else
- *partial_end = base;
-
- if (unlikely(*partial_end <= start)) {
- WARN_ON(1);
- *partial_end = start + PAGE_SIZE;
- }
+/* Build the cache_map containing the cache modes per memory range. */
+void __init mtrr_build_map(void)
+{
+ u64 start, end, size;
+ unsigned int i;
+ u8 type;
- end = *partial_end - 1; /* end is inclusive */
- *repeat = 1;
- *uniform = 0;
+ /* Add fixed MTRRs, optimize for adjacent entries with same type. */
+ if (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED) {
+ /*
+ * Start with 64k size fixed entries, preset 1st one (hence the
+ * loop below is starting with index 1).
+ */
+ start = 0;
+ end = size = 0x10000;
+ type = mtrr_state.fixed_ranges[0];
+
+ for (i = 1; i < MTRR_NUM_FIXED_RANGES; i++) {
+ /* 8 64k entries, then 16 16k ones, rest 4k. */
+ if (i == 8 || i == 24)
+ size >>= 2;
+
+ if (mtrr_state.fixed_ranges[i] != type) {
+ add_map_entry(start, end, type);
+ start = end;
+ type = mtrr_state.fixed_ranges[i];
+ }
+ end += size;
}
+ add_map_entry(start, end, type);
+ }
- if ((start & mask) != (base & mask))
- continue;
+ /* Mark fixed, they take precedence. */
+ for (i = 0; i < cache_map_n; i++)
+ cache_map[i].fixed = 1;
+ cache_map_fixed = cache_map_n;
- curr_match = mtrr_state.var_ranges[i].base_lo & 0xff;
- if (prev_match == MTRR_TYPE_INVALID) {
- prev_match = curr_match;
- continue;
+ map_add_var();
+
+ pr_info("MTRR map: %u entries (%u fixed + %u variable; max %u), built from %u variable MTRRs\n",
+ cache_map_n, cache_map_fixed, cache_map_n - cache_map_fixed,
+ get_cache_map_size(), num_var_ranges + (mtrr_tom2 != 0));
+
+ if (mtrr_debug) {
+ for (i = 0; i < cache_map_n; i++) {
+ pr_info("%3u: %016llx-%016llx %s\n", i,
+ cache_map[i].start, cache_map[i].end - 1,
+ mtrr_attrib_to_str(cache_map[i].type));
}
+ }
+}
- *uniform = 0;
- if (check_type_overlap(&prev_match, &curr_match))
- return curr_match;
+/* Copy the cache_map from __initdata memory to dynamically allocated one. */
+void __init mtrr_copy_map(void)
+{
+ unsigned int new_size = get_cache_map_size();
+
+ if (!mtrr_state.enabled || !new_size) {
+ cache_map = NULL;
+ return;
+ }
+
+ mutex_lock(&mtrr_mutex);
+
+ cache_map = kcalloc(new_size, sizeof(*cache_map), GFP_KERNEL);
+ if (cache_map) {
+ memmove(cache_map, init_cache_map,
+ cache_map_n * sizeof(*cache_map));
+ cache_map_size = new_size;
+ } else {
+ mtrr_state.enabled = 0;
+ pr_err("MTRRs disabled due to allocation failure for lookup map.\n");
+ }
+
+ mutex_unlock(&mtrr_mutex);
+}
+
+/**
+ * mtrr_overwrite_state - set static MTRR state
+ *
+ * Used to set MTRR state via different means (e.g. with data obtained from
+ * a hypervisor).
+ * Is allowed only for special cases when running virtualized. Must be called
+ * from the x86_init.hyper.init_platform() hook. It can be called only once.
+ * The MTRR state can't be changed afterwards. To ensure that, X86_FEATURE_MTRR
+ * is cleared.
+ */
+void mtrr_overwrite_state(struct mtrr_var_range *var, unsigned int num_var,
+ mtrr_type def_type)
+{
+ unsigned int i;
+
+ /* Only allowed to be called once before mtrr_bp_init(). */
+ if (WARN_ON_ONCE(mtrr_state_set))
+ return;
+
+ /* Only allowed when running virtualized. */
+ if (!cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
+ return;
+
+ /*
+ * Only allowed for special virtualization cases:
+ * - when running as Hyper-V, SEV-SNP guest using vTOM
+ * - when running as Xen PV guest
+ * - when running as SEV-SNP or TDX guest to avoid unnecessary
+ * VMM communication/Virtualization exceptions (#VC, #VE)
+ */
+ if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP) &&
+ !hv_is_isolation_supported() &&
+ !cpu_feature_enabled(X86_FEATURE_XENPV) &&
+ !cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+ return;
+
+ /* Disable MTRR in order to disable MTRR modifications. */
+ setup_clear_cpu_cap(X86_FEATURE_MTRR);
+
+ if (var) {
+ if (num_var > MTRR_MAX_VAR_RANGES) {
+ pr_warn("Trying to overwrite MTRR state with %u variable entries\n",
+ num_var);
+ num_var = MTRR_MAX_VAR_RANGES;
+ }
+ for (i = 0; i < num_var; i++)
+ mtrr_state.var_ranges[i] = var[i];
+ num_var_ranges = num_var;
}
- if (prev_match != MTRR_TYPE_INVALID)
- return prev_match;
+ mtrr_state.def_type = def_type;
+ mtrr_state.enabled |= MTRR_STATE_MTRR_ENABLED;
- return mtrr_state.def_type;
+ mtrr_state_set = 1;
+}
+
+static u8 type_merge(u8 type, u8 new_type, u8 *uniform)
+{
+ u8 effective_type;
+
+ if (type == MTRR_TYPE_INVALID)
+ return new_type;
+
+ effective_type = get_effective_type(type, new_type);
+ if (type != effective_type)
+ *uniform = 0;
+
+ return effective_type;
}
/**
@@ -248,66 +497,49 @@ static u8 mtrr_type_lookup_variable(u64 start, u64 end, u64 *partial_end,
* MTRR_TYPE_INVALID - MTRR is disabled
*
* Output Argument:
- * uniform - Set to 1 when an MTRR covers the region uniformly, i.e. the
- * region is fully covered by a single MTRR entry or the default
- * type.
+ * uniform - Set to 1 when the returned MTRR type is valid for the whole
+ * region, set to 0 else.
*/
u8 mtrr_type_lookup(u64 start, u64 end, u8 *uniform)
{
- u8 type, prev_type, is_uniform = 1, dummy;
- int repeat;
- u64 partial_end;
+ u8 type = MTRR_TYPE_INVALID;
+ unsigned int i;
- /* Make end inclusive instead of exclusive */
- end--;
+ if (!mtrr_state_set) {
+ /* Uniformity is unknown. */
+ *uniform = 0;
+ return MTRR_TYPE_UNCACHABLE;
+ }
- if (!mtrr_state_set)
- return MTRR_TYPE_INVALID;
+ *uniform = 1;
if (!(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED))
- return MTRR_TYPE_INVALID;
+ return MTRR_TYPE_UNCACHABLE;
- /*
- * Look up the fixed ranges first, which take priority over
- * the variable ranges.
- */
- if ((start < 0x100000) &&
- (mtrr_state.have_fixed) &&
- (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)) {
- is_uniform = 0;
- type = mtrr_type_lookup_fixed(start, end);
- goto out;
- }
+ for (i = 0; i < cache_map_n && start < end; i++) {
+ /* Region after current map entry? -> continue with next one. */
+ if (start >= cache_map[i].end)
+ continue;
- /*
- * Look up the variable ranges. Look of multiple ranges matching
- * this address and pick type as per MTRR precedence.
- */
- type = mtrr_type_lookup_variable(start, end, &partial_end,
- &repeat, &is_uniform);
+ /* Start of region not covered by current map entry? */
+ if (start < cache_map[i].start) {
+ /* At least some part of region has default type. */
+ type = type_merge(type, mtrr_state.def_type, uniform);
+ /* End of region not covered, too? -> lookup done. */
+ if (end <= cache_map[i].start)
+ return type;
+ }
- /*
- * Common path is with repeat = 0.
- * However, we can have cases where [start:end] spans across some
- * MTRR ranges and/or the default type. Do repeated lookups for
- * that case here.
- */
- while (repeat) {
- prev_type = type;
- start = partial_end;
- is_uniform = 0;
- type = mtrr_type_lookup_variable(start, end, &partial_end,
- &repeat, &dummy);
+ /* At least part of region covered by map entry. */
+ type = type_merge(type, cache_map[i].type, uniform);
- if (check_type_overlap(&prev_type, &type))
- goto out;
+ start = cache_map[i].end;
}
- if (mtrr_tom2 && (start >= (1ULL<<32)) && (end < mtrr_tom2))
- type = MTRR_TYPE_WRBACK;
+ /* End of region past last entry in map? -> use default type. */
+ if (start < end)
+ type = type_merge(type, mtrr_state.def_type, uniform);
-out:
- *uniform = is_uniform;
return type;
}
@@ -363,8 +595,8 @@ static void __init print_fixed_last(void)
if (!last_fixed_end)
return;
- pr_debug(" %05X-%05X %s\n", last_fixed_start,
- last_fixed_end - 1, mtrr_attrib_to_str(last_fixed_type));
+ pr_info(" %05X-%05X %s\n", last_fixed_start,
+ last_fixed_end - 1, mtrr_attrib_to_str(last_fixed_type));
last_fixed_end = 0;
}
@@ -402,10 +634,10 @@ static void __init print_mtrr_state(void)
unsigned int i;
int high_width;
- pr_debug("MTRR default type: %s\n",
- mtrr_attrib_to_str(mtrr_state.def_type));
+ pr_info("MTRR default type: %s\n",
+ mtrr_attrib_to_str(mtrr_state.def_type));
if (mtrr_state.have_fixed) {
- pr_debug("MTRR fixed ranges %sabled:\n",
+ pr_info("MTRR fixed ranges %sabled:\n",
((mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED) &&
(mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)) ?
"en" : "dis");
@@ -420,26 +652,27 @@ static void __init print_mtrr_state(void)
/* tail */
print_fixed_last();
}
- pr_debug("MTRR variable ranges %sabled:\n",
- mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED ? "en" : "dis");
- high_width = (__ffs64(size_or_mask) - (32 - PAGE_SHIFT) + 3) / 4;
+ pr_info("MTRR variable ranges %sabled:\n",
+ mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED ? "en" : "dis");
+ high_width = (boot_cpu_data.x86_phys_bits - (32 - PAGE_SHIFT) + 3) / 4;
for (i = 0; i < num_var_ranges; ++i) {
- if (mtrr_state.var_ranges[i].mask_lo & (1 << 11))
- pr_debug(" %u base %0*X%05X000 mask %0*X%05X000 %s\n",
- i,
- high_width,
- mtrr_state.var_ranges[i].base_hi,
- mtrr_state.var_ranges[i].base_lo >> 12,
- high_width,
- mtrr_state.var_ranges[i].mask_hi,
- mtrr_state.var_ranges[i].mask_lo >> 12,
- mtrr_attrib_to_str(mtrr_state.var_ranges[i].base_lo & 0xff));
+ if (mtrr_state.var_ranges[i].mask_lo & MTRR_PHYSMASK_V)
+ pr_info(" %u base %0*X%05X000 mask %0*X%05X000 %s\n",
+ i,
+ high_width,
+ mtrr_state.var_ranges[i].base_hi,
+ mtrr_state.var_ranges[i].base_lo >> 12,
+ high_width,
+ mtrr_state.var_ranges[i].mask_hi,
+ mtrr_state.var_ranges[i].mask_lo >> 12,
+ mtrr_attrib_to_str(mtrr_state.var_ranges[i].base_lo &
+ MTRR_PHYSBASE_TYPE));
else
- pr_debug(" %u disabled\n", i);
+ pr_info(" %u disabled\n", i);
}
if (mtrr_tom2)
- pr_debug("TOM2: %016llx aka %lldM\n", mtrr_tom2, mtrr_tom2>>20);
+ pr_info("TOM2: %016llx aka %lldM\n", mtrr_tom2, mtrr_tom2>>20);
}
/* Grab all of the MTRR state for this CPU into *state */
@@ -452,7 +685,7 @@ bool __init get_mtrr_state(void)
vrs = mtrr_state.var_ranges;
rdmsr(MSR_MTRRcap, lo, dummy);
- mtrr_state.have_fixed = (lo >> 8) & 1;
+ mtrr_state.have_fixed = lo & MTRR_CAP_FIX;
for (i = 0; i < num_var_ranges; i++)
get_mtrr_var_range(i, &vrs[i]);
@@ -460,8 +693,8 @@ bool __init get_mtrr_state(void)
get_fixed_ranges(mtrr_state.fixed_ranges);
rdmsr(MSR_MTRRdefType, lo, dummy);
- mtrr_state.def_type = (lo & 0xff);
- mtrr_state.enabled = (lo & 0xc00) >> 10;
+ mtrr_state.def_type = lo & MTRR_DEF_TYPE_TYPE;
+ mtrr_state.enabled = (lo & MTRR_DEF_TYPE_ENABLE) >> MTRR_STATE_SHIFT;
if (amd_special_default_mtrr()) {
unsigned low, high;
@@ -474,7 +707,8 @@ bool __init get_mtrr_state(void)
mtrr_tom2 &= 0xffffff800000ULL;
}
- print_mtrr_state();
+ if (mtrr_debug)
+ print_mtrr_state();
mtrr_state_set = 1;
@@ -574,7 +808,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi);
- if ((mask_lo & 0x800) == 0) {
+ if (!(mask_lo & MTRR_PHYSMASK_V)) {
/* Invalid (i.e. free) range */
*base = 0;
*size = 0;
@@ -585,8 +819,8 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi);
/* Work out the shifted address mask: */
- tmp = (u64)mask_hi << (32 - PAGE_SHIFT) | mask_lo >> PAGE_SHIFT;
- mask = size_or_mask | tmp;
+ tmp = (u64)mask_hi << 32 | (mask_lo & PAGE_MASK);
+ mask = (u64)phys_hi_rsvd << 32 | tmp;
/* Expand tmp with high bits to all 1s: */
hi = fls64(tmp);
@@ -604,9 +838,9 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
* This works correctly if size is a power of two, i.e. a
* contiguous range:
*/
- *size = -mask;
+ *size = -mask >> PAGE_SHIFT;
*base = (u64)base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT;
- *type = base_lo & 0xff;
+ *type = base_lo & MTRR_PHYSBASE_TYPE;
out_put_cpu:
put_cpu();
@@ -644,9 +878,8 @@ static bool set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr)
bool changed = false;
rdmsr(MTRRphysBase_MSR(index), lo, hi);
- if ((vr->base_lo & 0xfffff0ffUL) != (lo & 0xfffff0ffUL)
- || (vr->base_hi & (size_and_mask >> (32 - PAGE_SHIFT))) !=
- (hi & (size_and_mask >> (32 - PAGE_SHIFT)))) {
+ if ((vr->base_lo & ~MTRR_PHYSBASE_RSVD) != (lo & ~MTRR_PHYSBASE_RSVD)
+ || (vr->base_hi & ~phys_hi_rsvd) != (hi & ~phys_hi_rsvd)) {
mtrr_wrmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi);
changed = true;
@@ -654,9 +887,8 @@ static bool set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr)
rdmsr(MTRRphysMask_MSR(index), lo, hi);
- if ((vr->mask_lo & 0xfffff800UL) != (lo & 0xfffff800UL)
- || (vr->mask_hi & (size_and_mask >> (32 - PAGE_SHIFT))) !=
- (hi & (size_and_mask >> (32 - PAGE_SHIFT)))) {
+ if ((vr->mask_lo & ~MTRR_PHYSMASK_RSVD) != (lo & ~MTRR_PHYSMASK_RSVD)
+ || (vr->mask_hi & ~phys_hi_rsvd) != (hi & ~phys_hi_rsvd)) {
mtrr_wrmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
changed = true;
}
@@ -691,11 +923,12 @@ static unsigned long set_mtrr_state(void)
* Set_mtrr_restore restores the old value of MTRRdefType,
* so to set it we fiddle with the saved value:
*/
- if ((deftype_lo & 0xff) != mtrr_state.def_type
- || ((deftype_lo & 0xc00) >> 10) != mtrr_state.enabled) {
+ if ((deftype_lo & MTRR_DEF_TYPE_TYPE) != mtrr_state.def_type ||
+ ((deftype_lo & MTRR_DEF_TYPE_ENABLE) >> MTRR_STATE_SHIFT) != mtrr_state.enabled) {
- deftype_lo = (deftype_lo & ~0xcff) | mtrr_state.def_type |
- (mtrr_state.enabled << 10);
+ deftype_lo = (deftype_lo & MTRR_DEF_TYPE_DISABLE) |
+ mtrr_state.def_type |
+ (mtrr_state.enabled << MTRR_STATE_SHIFT);
change_mask |= MTRR_CHANGE_MASK_DEFTYPE;
}
@@ -708,7 +941,7 @@ void mtrr_disable(void)
rdmsr(MSR_MTRRdefType, deftype_lo, deftype_hi);
/* Disable MTRRs, and set the default type to uncached */
- mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi);
+ mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & MTRR_DEF_TYPE_DISABLE, deftype_hi);
}
void mtrr_enable(void)
@@ -762,9 +995,9 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base,
memset(vr, 0, sizeof(struct mtrr_var_range));
} else {
vr->base_lo = base << PAGE_SHIFT | type;
- vr->base_hi = (base & size_and_mask) >> (32 - PAGE_SHIFT);
- vr->mask_lo = -size << PAGE_SHIFT | 0x800;
- vr->mask_hi = (-size & size_and_mask) >> (32 - PAGE_SHIFT);
+ vr->base_hi = (base >> (32 - PAGE_SHIFT)) & ~phys_hi_rsvd;
+ vr->mask_lo = -size << PAGE_SHIFT | MTRR_PHYSMASK_V;
+ vr->mask_hi = (-size >> (32 - PAGE_SHIFT)) & ~phys_hi_rsvd;
mtrr_wrmsr(MTRRphysBase_MSR(reg), vr->base_lo, vr->base_hi);
mtrr_wrmsr(MTRRphysMask_MSR(reg), vr->mask_lo, vr->mask_hi);
@@ -783,7 +1016,7 @@ int generic_validate_add_page(unsigned long base, unsigned long size,
* For Intel PPro stepping <= 7
* must be 4 MiB aligned and not touch 0x70000000 -> 0x7003FFFF
*/
- if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 &&
+ if (mtrr_if == &generic_mtrr_ops && boot_cpu_data.x86 == 6 &&
boot_cpu_data.x86_model == 1 &&
boot_cpu_data.x86_stepping <= 7) {
if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) {
@@ -817,7 +1050,7 @@ static int generic_have_wrcomb(void)
{
unsigned long config, dummy;
rdmsr(MSR_MTRRcap, config, dummy);
- return config & (1 << 10);
+ return config & MTRR_CAP_WC;
}
int positive_have_wrcomb(void)
diff --git a/arch/x86/kernel/cpu/mtrr/legacy.c b/arch/x86/kernel/cpu/mtrr/legacy.c
new file mode 100644
index 000000000000..d25882fcf181
--- /dev/null
+++ b/arch/x86/kernel/cpu/mtrr/legacy.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/syscore_ops.h>
+#include <asm/cpufeature.h>
+#include <asm/mtrr.h>
+#include <asm/processor.h>
+#include "mtrr.h"
+
+void mtrr_set_if(void)
+{
+ switch (boot_cpu_data.x86_vendor) {
+ case X86_VENDOR_AMD:
+ /* Pre-Athlon (K6) AMD CPU MTRRs */
+ if (cpu_feature_enabled(X86_FEATURE_K6_MTRR))
+ mtrr_if = &amd_mtrr_ops;
+ break;
+ case X86_VENDOR_CENTAUR:
+ if (cpu_feature_enabled(X86_FEATURE_CENTAUR_MCR))
+ mtrr_if = &centaur_mtrr_ops;
+ break;
+ case X86_VENDOR_CYRIX:
+ if (cpu_feature_enabled(X86_FEATURE_CYRIX_ARR))
+ mtrr_if = &cyrix_mtrr_ops;
+ break;
+ default:
+ break;
+ }
+}
+
+/*
+ * The suspend/resume methods are only for CPUs without MTRR. CPUs using generic
+ * MTRR driver don't require this.
+ */
+struct mtrr_value {
+ mtrr_type ltype;
+ unsigned long lbase;
+ unsigned long lsize;
+};
+
+static struct mtrr_value *mtrr_value;
+
+static int mtrr_save(void)
+{
+ int i;
+
+ if (!mtrr_value)
+ return -ENOMEM;
+
+ for (i = 0; i < num_var_ranges; i++) {
+ mtrr_if->get(i, &mtrr_value[i].lbase,
+ &mtrr_value[i].lsize,
+ &mtrr_value[i].ltype);
+ }
+ return 0;
+}
+
+static void mtrr_restore(void)
+{
+ int i;
+
+ for (i = 0; i < num_var_ranges; i++) {
+ if (mtrr_value[i].lsize) {
+ mtrr_if->set(i, mtrr_value[i].lbase,
+ mtrr_value[i].lsize,
+ mtrr_value[i].ltype);
+ }
+ }
+}
+
+static struct syscore_ops mtrr_syscore_ops = {
+ .suspend = mtrr_save,
+ .resume = mtrr_restore,
+};
+
+void mtrr_register_syscore(void)
+{
+ mtrr_value = kcalloc(num_var_ranges, sizeof(*mtrr_value), GFP_KERNEL);
+
+ /*
+ * The CPU has no MTRR and seems to not support SMP. They have
+ * specific drivers, we use a tricky method to support
+ * suspend/resume for them.
+ *
+ * TBD: is there any system with such CPU which supports
+ * suspend/resume? If no, we should remove the code.
+ */
+ register_syscore_ops(&mtrr_syscore_ops);
+}
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.c b/arch/x86/kernel/cpu/mtrr/mtrr.c
index 783f3210d582..767bf1c71aad 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.c
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.c
@@ -59,15 +59,9 @@
#define MTRR_TO_PHYS_WC_OFFSET 1000
u32 num_var_ranges;
-static bool mtrr_enabled(void)
-{
- return !!mtrr_if;
-}
unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
-static DEFINE_MUTEX(mtrr_mutex);
-
-u64 size_or_mask, size_and_mask;
+DEFINE_MUTEX(mtrr_mutex);
const struct mtrr_ops *mtrr_if;
@@ -105,21 +99,6 @@ static int have_wrcomb(void)
return mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0;
}
-/* This function returns the number of variable MTRRs */
-static void __init set_num_var_ranges(bool use_generic)
-{
- unsigned long config = 0, dummy;
-
- if (use_generic)
- rdmsr(MSR_MTRRcap, config, dummy);
- else if (is_cpu(AMD) || is_cpu(HYGON))
- config = 2;
- else if (is_cpu(CYRIX) || is_cpu(CENTAUR))
- config = 8;
-
- num_var_ranges = config & 0xff;
-}
-
static void __init init_table(void)
{
int i, max;
@@ -194,20 +173,8 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2)
* Note that the mechanism is the same for UP systems, too; all the SMP stuff
* becomes nops.
*/
-static void
-set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type)
-{
- struct set_mtrr_data data = { .smp_reg = reg,
- .smp_base = base,
- .smp_size = size,
- .smp_type = type
- };
-
- stop_machine(mtrr_rendezvous_handler, &data, cpu_online_mask);
-}
-
-static void set_mtrr_cpuslocked(unsigned int reg, unsigned long base,
- unsigned long size, mtrr_type type)
+static void set_mtrr(unsigned int reg, unsigned long base, unsigned long size,
+ mtrr_type type)
{
struct set_mtrr_data data = { .smp_reg = reg,
.smp_base = base,
@@ -216,6 +183,8 @@ static void set_mtrr_cpuslocked(unsigned int reg, unsigned long base,
};
stop_machine_cpuslocked(mtrr_rendezvous_handler, &data, cpu_online_mask);
+
+ generic_rebuild_map();
}
/**
@@ -337,7 +306,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
/* Search for an empty MTRR */
i = mtrr_if->get_free_region(base, size, replace);
if (i >= 0) {
- set_mtrr_cpuslocked(i, base, size, type);
+ set_mtrr(i, base, size, type);
if (likely(replace < 0)) {
mtrr_usage_table[i] = 1;
} else {
@@ -345,7 +314,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
if (increment)
mtrr_usage_table[i]++;
if (unlikely(replace != i)) {
- set_mtrr_cpuslocked(replace, 0, 0, 0);
+ set_mtrr(replace, 0, 0, 0);
mtrr_usage_table[replace] = 0;
}
}
@@ -363,7 +332,7 @@ static int mtrr_check(unsigned long base, unsigned long size)
{
if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
pr_warn("size and base must be multiples of 4 kiB\n");
- pr_debug("size: 0x%lx base: 0x%lx\n", size, base);
+ Dprintk("size: 0x%lx base: 0x%lx\n", size, base);
dump_stack();
return -1;
}
@@ -454,8 +423,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
}
}
if (reg < 0) {
- pr_debug("no MTRR for %lx000,%lx000 found\n",
- base, size);
+ Dprintk("no MTRR for %lx000,%lx000 found\n", base, size);
goto out;
}
}
@@ -473,7 +441,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
goto out;
}
if (--mtrr_usage_table[reg] < 1)
- set_mtrr_cpuslocked(reg, 0, 0, 0);
+ set_mtrr(reg, 0, 0, 0);
error = reg;
out:
mutex_unlock(&mtrr_mutex);
@@ -574,136 +542,54 @@ int arch_phys_wc_index(int handle)
}
EXPORT_SYMBOL_GPL(arch_phys_wc_index);
-/* The suspend/resume methods are only for CPU without MTRR. CPU using generic
- * MTRR driver doesn't require this
- */
-struct mtrr_value {
- mtrr_type ltype;
- unsigned long lbase;
- unsigned long lsize;
-};
-
-static struct mtrr_value mtrr_value[MTRR_MAX_VAR_RANGES];
-
-static int mtrr_save(void)
-{
- int i;
-
- for (i = 0; i < num_var_ranges; i++) {
- mtrr_if->get(i, &mtrr_value[i].lbase,
- &mtrr_value[i].lsize,
- &mtrr_value[i].ltype);
- }
- return 0;
-}
-
-static void mtrr_restore(void)
-{
- int i;
-
- for (i = 0; i < num_var_ranges; i++) {
- if (mtrr_value[i].lsize) {
- set_mtrr(i, mtrr_value[i].lbase,
- mtrr_value[i].lsize,
- mtrr_value[i].ltype);
- }
- }
-}
-
-
-
-static struct syscore_ops mtrr_syscore_ops = {
- .suspend = mtrr_save,
- .resume = mtrr_restore,
-};
-
int __initdata changed_by_mtrr_cleanup;
-#define SIZE_OR_MASK_BITS(n) (~((1ULL << ((n) - PAGE_SHIFT)) - 1))
/**
- * mtrr_bp_init - initialize mtrrs on the boot CPU
+ * mtrr_bp_init - initialize MTRRs on the boot CPU
*
* This needs to be called early; before any of the other CPUs are
* initialized (i.e. before smp_init()).
- *
*/
void __init mtrr_bp_init(void)
{
+ bool generic_mtrrs = cpu_feature_enabled(X86_FEATURE_MTRR);
const char *why = "(not available)";
- u32 phys_addr;
-
- phys_addr = 32;
+ unsigned long config, dummy;
- if (boot_cpu_has(X86_FEATURE_MTRR)) {
- mtrr_if = &generic_mtrr_ops;
- size_or_mask = SIZE_OR_MASK_BITS(36);
- size_and_mask = 0x00f00000;
- phys_addr = 36;
+ phys_hi_rsvd = GENMASK(31, boot_cpu_data.x86_phys_bits - 32);
+ if (!generic_mtrrs && mtrr_state.enabled) {
/*
- * This is an AMD specific MSR, but we assume(hope?) that
- * Intel will implement it too when they extend the address
- * bus of the Xeon.
+ * Software overwrite of MTRR state, only for generic case.
+ * Note that X86_FEATURE_MTRR has been reset in this case.
*/
- if (cpuid_eax(0x80000000) >= 0x80000008) {
- phys_addr = cpuid_eax(0x80000008) & 0xff;
- /* CPUID workaround for Intel 0F33/0F34 CPU */
- if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
- boot_cpu_data.x86 == 0xF &&
- boot_cpu_data.x86_model == 0x3 &&
- (boot_cpu_data.x86_stepping == 0x3 ||
- boot_cpu_data.x86_stepping == 0x4))
- phys_addr = 36;
-
- size_or_mask = SIZE_OR_MASK_BITS(phys_addr);
- size_and_mask = ~size_or_mask & 0xfffff00000ULL;
- } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR &&
- boot_cpu_data.x86 == 6) {
- /*
- * VIA C* family have Intel style MTRRs,
- * but don't support PAE
- */
- size_or_mask = SIZE_OR_MASK_BITS(32);
- size_and_mask = 0;
- phys_addr = 32;
- }
- } else {
- switch (boot_cpu_data.x86_vendor) {
- case X86_VENDOR_AMD:
- if (cpu_feature_enabled(X86_FEATURE_K6_MTRR)) {
- /* Pre-Athlon (K6) AMD CPU MTRRs */
- mtrr_if = &amd_mtrr_ops;
- size_or_mask = SIZE_OR_MASK_BITS(32);
- size_and_mask = 0;
- }
- break;
- case X86_VENDOR_CENTAUR:
- if (cpu_feature_enabled(X86_FEATURE_CENTAUR_MCR)) {
- mtrr_if = &centaur_mtrr_ops;
- size_or_mask = SIZE_OR_MASK_BITS(32);
- size_and_mask = 0;
- }
- break;
- case X86_VENDOR_CYRIX:
- if (cpu_feature_enabled(X86_FEATURE_CYRIX_ARR)) {
- mtrr_if = &cyrix_mtrr_ops;
- size_or_mask = SIZE_OR_MASK_BITS(32);
- size_and_mask = 0;
- }
- break;
- default:
- break;
- }
+ init_table();
+ mtrr_build_map();
+ pr_info("MTRRs set to read-only\n");
+
+ return;
}
+ if (generic_mtrrs)
+ mtrr_if = &generic_mtrr_ops;
+ else
+ mtrr_set_if();
+
if (mtrr_enabled()) {
- set_num_var_ranges(mtrr_if == &generic_mtrr_ops);
+ /* Get the number of variable MTRR ranges. */
+ if (mtrr_if == &generic_mtrr_ops)
+ rdmsr(MSR_MTRRcap, config, dummy);
+ else
+ config = mtrr_if->var_regs;
+ num_var_ranges = config & MTRR_CAP_VCNT;
+
init_table();
if (mtrr_if == &generic_mtrr_ops) {
/* BIOS may override */
if (get_mtrr_state()) {
memory_caching_control |= CACHE_MTRR;
- changed_by_mtrr_cleanup = mtrr_cleanup(phys_addr);
+ changed_by_mtrr_cleanup = mtrr_cleanup();
+ mtrr_build_map();
} else {
mtrr_if = NULL;
why = "by BIOS";
@@ -730,8 +616,14 @@ void mtrr_save_state(void)
smp_call_function_single(first_cpu, mtrr_save_fixed_ranges, NULL, 1);
}
-static int __init mtrr_init_finialize(void)
+static int __init mtrr_init_finalize(void)
{
+ /*
+ * Map might exist if mtrr_overwrite_state() has been called or if
+ * mtrr_enabled() returns true.
+ */
+ mtrr_copy_map();
+
if (!mtrr_enabled())
return 0;
@@ -741,16 +633,8 @@ static int __init mtrr_init_finialize(void)
return 0;
}
- /*
- * The CPU has no MTRR and seems to not support SMP. They have
- * specific drivers, we use a tricky method to support
- * suspend/resume for them.
- *
- * TBD: is there any system with such CPU which supports
- * suspend/resume? If no, we should remove the code.
- */
- register_syscore_ops(&mtrr_syscore_ops);
+ mtrr_register_syscore();
return 0;
}
-subsys_initcall(mtrr_init_finialize);
+subsys_initcall(mtrr_init_finalize);
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 02eb5871492d..5655f253d929 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -10,10 +10,13 @@
#define MTRR_CHANGE_MASK_VARIABLE 0x02
#define MTRR_CHANGE_MASK_DEFTYPE 0x04
+extern bool mtrr_debug;
+#define Dprintk(x...) do { if (mtrr_debug) pr_info(x); } while (0)
+
extern unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
struct mtrr_ops {
- u32 vendor;
+ u32 var_regs;
void (*set)(unsigned int reg, unsigned long base,
unsigned long size, mtrr_type type);
void (*get)(unsigned int reg, unsigned long *base,
@@ -51,18 +54,26 @@ void fill_mtrr_var_range(unsigned int index,
u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
bool get_mtrr_state(void);
-extern u64 size_or_mask, size_and_mask;
extern const struct mtrr_ops *mtrr_if;
-
-#define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd)
+extern struct mutex mtrr_mutex;
extern unsigned int num_var_ranges;
extern u64 mtrr_tom2;
extern struct mtrr_state_type mtrr_state;
+extern u32 phys_hi_rsvd;
void mtrr_state_warn(void);
const char *mtrr_attrib_to_str(int x);
void mtrr_wrmsr(unsigned, unsigned, unsigned);
+#ifdef CONFIG_X86_32
+void mtrr_set_if(void);
+void mtrr_register_syscore(void);
+#else
+static inline void mtrr_set_if(void) { }
+static inline void mtrr_register_syscore(void) { }
+#endif
+void mtrr_build_map(void);
+void mtrr_copy_map(void);
/* CPU specific mtrr_ops vectors. */
extern const struct mtrr_ops amd_mtrr_ops;
@@ -70,4 +81,14 @@ extern const struct mtrr_ops cyrix_mtrr_ops;
extern const struct mtrr_ops centaur_mtrr_ops;
extern int changed_by_mtrr_cleanup;
-extern int mtrr_cleanup(unsigned address_bits);
+extern int mtrr_cleanup(void);
+
+/*
+ * Must be used by code which uses mtrr_if to call platform-specific
+ * MTRR manipulation functions.
+ */
+static inline bool mtrr_enabled(void)
+{
+ return !!mtrr_if;
+}
+void generic_rebuild_map(void);
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 6ad33f355861..725344048f85 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -726,11 +726,15 @@ unlock:
static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s)
{
struct task_struct *p, *t;
+ pid_t pid;
rcu_read_lock();
for_each_process_thread(p, t) {
- if (is_closid_match(t, r) || is_rmid_match(t, r))
- seq_printf(s, "%d\n", t->pid);
+ if (is_closid_match(t, r) || is_rmid_match(t, r)) {
+ pid = task_pid_vnr(t);
+ if (pid)
+ seq_printf(s, "%d\n", pid);
+ }
}
rcu_read_unlock();
}
@@ -2301,6 +2305,26 @@ static struct rdtgroup *kernfs_to_rdtgroup(struct kernfs_node *kn)
}
}
+static void rdtgroup_kn_get(struct rdtgroup *rdtgrp, struct kernfs_node *kn)
+{
+ atomic_inc(&rdtgrp->waitcount);
+ kernfs_break_active_protection(kn);
+}
+
+static void rdtgroup_kn_put(struct rdtgroup *rdtgrp, struct kernfs_node *kn)
+{
+ if (atomic_dec_and_test(&rdtgrp->waitcount) &&
+ (rdtgrp->flags & RDT_DELETED)) {
+ if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
+ rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
+ rdtgroup_pseudo_lock_remove(rdtgrp);
+ kernfs_unbreak_active_protection(kn);
+ rdtgroup_remove(rdtgrp);
+ } else {
+ kernfs_unbreak_active_protection(kn);
+ }
+}
+
struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn)
{
struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
@@ -2308,8 +2332,7 @@ struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn)
if (!rdtgrp)
return NULL;
- atomic_inc(&rdtgrp->waitcount);
- kernfs_break_active_protection(kn);
+ rdtgroup_kn_get(rdtgrp, kn);
mutex_lock(&rdtgroup_mutex);
@@ -2328,17 +2351,7 @@ void rdtgroup_kn_unlock(struct kernfs_node *kn)
return;
mutex_unlock(&rdtgroup_mutex);
-
- if (atomic_dec_and_test(&rdtgrp->waitcount) &&
- (rdtgrp->flags & RDT_DELETED)) {
- if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
- rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
- rdtgroup_pseudo_lock_remove(rdtgrp);
- kernfs_unbreak_active_protection(kn);
- rdtgroup_remove(rdtgrp);
- } else {
- kernfs_unbreak_active_protection(kn);
- }
+ rdtgroup_kn_put(rdtgrp, kn);
}
static int mkdir_mondata_all(struct kernfs_node *parent_kn,
@@ -3505,6 +3518,133 @@ out:
return ret;
}
+/**
+ * mongrp_reparent() - replace parent CTRL_MON group of a MON group
+ * @rdtgrp: the MON group whose parent should be replaced
+ * @new_prdtgrp: replacement parent CTRL_MON group for @rdtgrp
+ * @cpus: cpumask provided by the caller for use during this call
+ *
+ * Replaces the parent CTRL_MON group for a MON group, resulting in all member
+ * tasks' CLOSID immediately changing to that of the new parent group.
+ * Monitoring data for the group is unaffected by this operation.
+ */
+static void mongrp_reparent(struct rdtgroup *rdtgrp,
+ struct rdtgroup *new_prdtgrp,
+ cpumask_var_t cpus)
+{
+ struct rdtgroup *prdtgrp = rdtgrp->mon.parent;
+
+ WARN_ON(rdtgrp->type != RDTMON_GROUP);
+ WARN_ON(new_prdtgrp->type != RDTCTRL_GROUP);
+
+ /* Nothing to do when simply renaming a MON group. */
+ if (prdtgrp == new_prdtgrp)
+ return;
+
+ WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list));
+ list_move_tail(&rdtgrp->mon.crdtgrp_list,
+ &new_prdtgrp->mon.crdtgrp_list);
+
+ rdtgrp->mon.parent = new_prdtgrp;
+ rdtgrp->closid = new_prdtgrp->closid;
+
+ /* Propagate updated closid to all tasks in this group. */
+ rdt_move_group_tasks(rdtgrp, rdtgrp, cpus);
+
+ update_closid_rmid(cpus, NULL);
+}
+
+static int rdtgroup_rename(struct kernfs_node *kn,
+ struct kernfs_node *new_parent, const char *new_name)
+{
+ struct rdtgroup *new_prdtgrp;
+ struct rdtgroup *rdtgrp;
+ cpumask_var_t tmpmask;
+ int ret;
+
+ rdtgrp = kernfs_to_rdtgroup(kn);
+ new_prdtgrp = kernfs_to_rdtgroup(new_parent);
+ if (!rdtgrp || !new_prdtgrp)
+ return -ENOENT;
+
+ /* Release both kernfs active_refs before obtaining rdtgroup mutex. */
+ rdtgroup_kn_get(rdtgrp, kn);
+ rdtgroup_kn_get(new_prdtgrp, new_parent);
+
+ mutex_lock(&rdtgroup_mutex);
+
+ rdt_last_cmd_clear();
+
+ /*
+ * Don't allow kernfs_to_rdtgroup() to return a parent rdtgroup if
+ * either kernfs_node is a file.
+ */
+ if (kernfs_type(kn) != KERNFS_DIR ||
+ kernfs_type(new_parent) != KERNFS_DIR) {
+ rdt_last_cmd_puts("Source and destination must be directories");
+ ret = -EPERM;
+ goto out;
+ }
+
+ if ((rdtgrp->flags & RDT_DELETED) || (new_prdtgrp->flags & RDT_DELETED)) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ if (rdtgrp->type != RDTMON_GROUP || !kn->parent ||
+ !is_mon_groups(kn->parent, kn->name)) {
+ rdt_last_cmd_puts("Source must be a MON group\n");
+ ret = -EPERM;
+ goto out;
+ }
+
+ if (!is_mon_groups(new_parent, new_name)) {
+ rdt_last_cmd_puts("Destination must be a mon_groups subdirectory\n");
+ ret = -EPERM;
+ goto out;
+ }
+
+ /*
+ * If the MON group is monitoring CPUs, the CPUs must be assigned to the
+ * current parent CTRL_MON group and therefore cannot be assigned to
+ * the new parent, making the move illegal.
+ */
+ if (!cpumask_empty(&rdtgrp->cpu_mask) &&
+ rdtgrp->mon.parent != new_prdtgrp) {
+ rdt_last_cmd_puts("Cannot move a MON group that monitors CPUs\n");
+ ret = -EPERM;
+ goto out;
+ }
+
+ /*
+ * Allocate the cpumask for use in mongrp_reparent() to avoid the
+ * possibility of failing to allocate it after kernfs_rename() has
+ * succeeded.
+ */
+ if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ /*
+ * Perform all input validation and allocations needed to ensure
+ * mongrp_reparent() will succeed before calling kernfs_rename(),
+ * otherwise it would be necessary to revert this call if
+ * mongrp_reparent() failed.
+ */
+ ret = kernfs_rename(kn, new_parent, new_name);
+ if (!ret)
+ mongrp_reparent(rdtgrp, new_prdtgrp, tmpmask);
+
+ free_cpumask_var(tmpmask);
+
+out:
+ mutex_unlock(&rdtgroup_mutex);
+ rdtgroup_kn_put(rdtgrp, kn);
+ rdtgroup_kn_put(new_prdtgrp, new_parent);
+ return ret;
+}
+
static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf)
{
if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3))
@@ -3522,6 +3662,7 @@ static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf)
static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = {
.mkdir = rdtgroup_mkdir,
.rmdir = rdtgroup_rmdir,
+ .rename = rdtgroup_rename,
.show_options = rdtgroup_show_options,
};
diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
index 2a0e90fe2abc..91fa70e51004 100644
--- a/arch/x86/kernel/cpu/sgx/encl.c
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -755,6 +755,7 @@ static void sgx_mmu_notifier_release(struct mmu_notifier *mn,
{
struct sgx_encl_mm *encl_mm = container_of(mn, struct sgx_encl_mm, mmu_notifier);
struct sgx_encl_mm *tmp = NULL;
+ bool found = false;
/*
* The enclave itself can remove encl_mm. Note, objects can't be moved
@@ -764,12 +765,13 @@ static void sgx_mmu_notifier_release(struct mmu_notifier *mn,
list_for_each_entry(tmp, &encl_mm->encl->mm_list, list) {
if (tmp == encl_mm) {
list_del_rcu(&encl_mm->list);
+ found = true;
break;
}
}
spin_unlock(&encl_mm->encl->mm_lock);
- if (tmp == encl_mm) {
+ if (found) {
synchronize_srcu(&encl_mm->encl->srcu);
mmu_notifier_put(mn);
}
diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
index 21ca0a831b70..5d390df21440 100644
--- a/arch/x86/kernel/cpu/sgx/ioctl.c
+++ b/arch/x86/kernel/cpu/sgx/ioctl.c
@@ -214,7 +214,7 @@ static int __sgx_encl_add_page(struct sgx_encl *encl,
if (!(vma->vm_flags & VM_MAYEXEC))
return -EACCES;
- ret = get_user_pages(src, 1, 0, &src_page, NULL);
+ ret = get_user_pages(src, 1, 0, &src_page);
if (ret < 1)
return -EFAULT;
diff --git a/arch/x86/kernel/doublefault_32.c b/arch/x86/kernel/doublefault_32.c
index 3b58d8703094..6eaf9a6bc02f 100644
--- a/arch/x86/kernel/doublefault_32.c
+++ b/arch/x86/kernel/doublefault_32.c
@@ -9,6 +9,7 @@
#include <asm/processor.h>
#include <asm/desc.h>
#include <asm/traps.h>
+#include <asm/doublefault.h>
#define ptr_ok(x) ((x) > PAGE_OFFSET && (x) < PAGE_OFFSET + MAXMEM)
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 851eb13edc01..998a08f17e33 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -53,7 +53,7 @@ void fpu__init_cpu(void)
fpu__init_cpu_xstate();
}
-static bool fpu__probe_without_cpuid(void)
+static bool __init fpu__probe_without_cpuid(void)
{
unsigned long cr0;
u16 fsw, fcw;
@@ -71,7 +71,7 @@ static bool fpu__probe_without_cpuid(void)
return fsw == 0 && (fcw & 0x103f) == 0x003f;
}
-static void fpu__init_system_early_generic(struct cpuinfo_x86 *c)
+static void __init fpu__init_system_early_generic(void)
{
if (!boot_cpu_has(X86_FEATURE_CPUID) &&
!test_bit(X86_FEATURE_FPU, (unsigned long *)cpu_caps_cleared)) {
@@ -211,10 +211,10 @@ static void __init fpu__init_system_xstate_size_legacy(void)
* Called on the boot CPU once per system bootup, to set up the initial
* FPU state that is later cloned into all processes:
*/
-void __init fpu__init_system(struct cpuinfo_x86 *c)
+void __init fpu__init_system(void)
{
fpstate_reset(&current->thread.fpu);
- fpu__init_system_early_generic(c);
+ fpu__init_system_early_generic();
/*
* The FPU has to be operational for some of the
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 5e7ead52cfdb..01e8f34daf22 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -525,9 +525,6 @@ static void *addr_from_call(void *ptr)
return ptr + CALL_INSN_SIZE + call.disp;
}
-void prepare_ftrace_return(unsigned long ip, unsigned long *parent,
- unsigned long frame_pointer);
-
/*
* If the ops->trampoline was not allocated, then it probably
* has a static trampoline func, or is the ftrace caller itself.
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 10c27b4261eb..246a609f889b 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -69,6 +69,7 @@ asmlinkage __visible void __init __noreturn i386_start_kernel(void)
* to the first kernel PMD. Note the upper half of each PMD or PTE are
* always zero at this stage.
*/
+void __init mk_early_pgtbl_32(void);
void __init mk_early_pgtbl_32(void)
{
#ifdef __pa
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 67c8ed99144b..c9318993f959 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -138,20 +138,6 @@ SYM_CODE_START(startup_32)
jmp .Ldefault_entry
SYM_CODE_END(startup_32)
-#ifdef CONFIG_HOTPLUG_CPU
-/*
- * Boot CPU0 entry point. It's called from play_dead(). Everything has been set
- * up already except stack. We just set up stack here. Then call
- * start_secondary().
- */
-SYM_FUNC_START(start_cpu0)
- movl initial_stack, %ecx
- movl %ecx, %esp
- call *(initial_code)
-1: jmp 1b
-SYM_FUNC_END(start_cpu0)
-#endif
-
/*
* Non-boot CPU entry point; entered from trampoline.S
* We can't lgdt here, because lgdt itself uses a data segment, but
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 113c13376e51..c5b9289837dc 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -24,7 +24,9 @@
#include "../entry/calling.h"
#include <asm/export.h>
#include <asm/nospec-branch.h>
+#include <asm/apicdef.h>
#include <asm/fixmap.h>
+#include <asm/smp.h>
/*
* We are not able to switch in one step to the final KERNEL ADDRESS SPACE
@@ -234,8 +236,67 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
ANNOTATE_NOENDBR // above
#ifdef CONFIG_SMP
+ /*
+ * For parallel boot, the APIC ID is read from the APIC, and then
+ * used to look up the CPU number. For booting a single CPU, the
+ * CPU number is encoded in smpboot_control.
+ *
+ * Bit 31 STARTUP_READ_APICID (Read APICID from APIC)
+ * Bit 0-23 CPU# if STARTUP_xx flags are not set
+ */
movl smpboot_control(%rip), %ecx
+ testl $STARTUP_READ_APICID, %ecx
+ jnz .Lread_apicid
+ /*
+ * No control bit set, single CPU bringup. CPU number is provided
+ * in bit 0-23. This is also the boot CPU case (CPU number 0).
+ */
+ andl $(~STARTUP_PARALLEL_MASK), %ecx
+ jmp .Lsetup_cpu
+.Lread_apicid:
+ /* Check whether X2APIC mode is already enabled */
+ mov $MSR_IA32_APICBASE, %ecx
+ rdmsr
+ testl $X2APIC_ENABLE, %eax
+ jnz .Lread_apicid_msr
+
+ /* Read the APIC ID from the fix-mapped MMIO space. */
+ movq apic_mmio_base(%rip), %rcx
+ addq $APIC_ID, %rcx
+ movl (%rcx), %eax
+ shr $24, %eax
+ jmp .Llookup_AP
+
+.Lread_apicid_msr:
+ mov $APIC_X2APIC_ID_MSR, %ecx
+ rdmsr
+
+.Llookup_AP:
+ /* EAX contains the APIC ID of the current CPU */
+ xorq %rcx, %rcx
+ leaq cpuid_to_apicid(%rip), %rbx
+
+.Lfind_cpunr:
+ cmpl (%rbx,%rcx,4), %eax
+ jz .Lsetup_cpu
+ inc %ecx
+#ifdef CONFIG_FORCE_NR_CPUS
+ cmpl $NR_CPUS, %ecx
+#else
+ cmpl nr_cpu_ids(%rip), %ecx
+#endif
+ jb .Lfind_cpunr
+
+ /* APIC ID not found in the table. Drop the trampoline lock and bail. */
+ movq trampoline_lock(%rip), %rax
+ movl $0, (%rax)
+
+1: cli
+ hlt
+ jmp 1b
+
+.Lsetup_cpu:
/* Get the per cpu offset for the given CPU# which is in ECX */
movq __per_cpu_offset(,%rcx,8), %rdx
#else
@@ -252,6 +313,16 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
movq TASK_threadsp(%rax), %rsp
/*
+ * Now that this CPU is running on its own stack, drop the realmode
+ * protection. For the boot CPU the pointer is NULL!
+ */
+ movq trampoline_lock(%rip), %rax
+ testq %rax, %rax
+ jz .Lsetup_gdt
+ movl $0, (%rax)
+
+.Lsetup_gdt:
+ /*
* We must switch to a new descriptor in kernel space for the GDT
* because soon the kernel won't have access anymore to the userspace
* addresses where we're currently running on. We have to do that here
@@ -375,13 +446,13 @@ SYM_CODE_END(secondary_startup_64)
#include "verify_cpu.S"
#include "sev_verify_cbit.S"
-#ifdef CONFIG_HOTPLUG_CPU
+#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_AMD_MEM_ENCRYPT)
/*
- * Boot CPU0 entry point. It's called from play_dead(). Everything has been set
- * up already except stack. We just set up stack here. Then call
- * start_secondary() via .Ljump_to_C_code.
+ * Entry point for soft restart of a CPU. Invoked from xxx_play_dead() for
+ * restarting the boot CPU or for restarting SEV guest CPUs after CPU hot
+ * unplug. Everything is set up already except the stack.
*/
-SYM_CODE_START(start_cpu0)
+SYM_CODE_START(soft_restart_cpu)
ANNOTATE_NOENDBR
UNWIND_HINT_END_OF_STACK
@@ -390,7 +461,7 @@ SYM_CODE_START(start_cpu0)
movq TASK_threadsp(%rcx), %rsp
jmp .Ljump_to_C_code
-SYM_CODE_END(start_cpu0)
+SYM_CODE_END(soft_restart_cpu)
#endif
#ifdef CONFIG_AMD_MEM_ENCRYPT
@@ -433,6 +504,8 @@ SYM_DATA(initial_code, .quad x86_64_start_kernel)
#ifdef CONFIG_AMD_MEM_ENCRYPT
SYM_DATA(initial_vc_handler, .quad handle_vc_boot_ghcb)
#endif
+
+SYM_DATA(trampoline_lock, .quad 0);
__FINITDATA
__INIT
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 766ffe3ba313..9f668d2f3d11 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -211,6 +211,13 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
#ifdef CONFIG_X86_MCE_THRESHOLD
sum += irq_stats(cpu)->irq_threshold_count;
#endif
+#ifdef CONFIG_X86_HV_CALLBACK_VECTOR
+ sum += irq_stats(cpu)->irq_hv_callback_count;
+#endif
+#if IS_ENABLED(CONFIG_HYPERV)
+ sum += irq_stats(cpu)->irq_hv_reenlightenment_count;
+ sum += irq_stats(cpu)->hyperv_stimer0_count;
+#endif
#ifdef CONFIG_X86_MCE
sum += per_cpu(mce_exception_count, cpu);
sum += per_cpu(mce_poll_count, cpu);
diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c
index 670eb08b972a..ee4fe8cdb857 100644
--- a/arch/x86/kernel/itmt.c
+++ b/arch/x86/kernel/itmt.c
@@ -165,32 +165,19 @@ int arch_asym_cpu_priority(int cpu)
/**
* sched_set_itmt_core_prio() - Set CPU priority based on ITMT
- * @prio: Priority of cpu core
- * @core_cpu: The cpu number associated with the core
+ * @prio: Priority of @cpu
+ * @cpu: The CPU number
*
* The pstate driver will find out the max boost frequency
* and call this function to set a priority proportional
- * to the max boost frequency. CPU with higher boost
+ * to the max boost frequency. CPUs with higher boost
* frequency will receive higher priority.
*
* No need to rebuild sched domain after updating
* the CPU priorities. The sched domains have no
* dependency on CPU priorities.
*/
-void sched_set_itmt_core_prio(int prio, int core_cpu)
+void sched_set_itmt_core_prio(int prio, int cpu)
{
- int cpu, i = 1;
-
- for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) {
- int smt_prio;
-
- /*
- * Ensure that the siblings are moved to the end
- * of the priority chain and only used when
- * all other high priority cpus are out of capacity.
- */
- smt_prio = prio * smp_num_siblings / (i * i);
- per_cpu(sched_core_priority, cpu) = smt_prio;
- i++;
- }
+ per_cpu(sched_core_priority, cpu) = prio;
}
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 0f35d44c56fe..fb8f52149be9 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -71,7 +71,7 @@ static int kvm_set_wallclock(const struct timespec64 *now)
return -ENODEV;
}
-static noinstr u64 kvm_clock_read(void)
+static u64 kvm_clock_read(void)
{
u64 ret;
@@ -88,7 +88,7 @@ static u64 kvm_clock_get_cycles(struct clocksource *cs)
static noinstr u64 kvm_sched_clock_read(void)
{
- return kvm_clock_read() - kvm_sched_clock_offset;
+ return pvclock_clocksource_read_nowd(this_cpu_pvti()) - kvm_sched_clock_offset;
}
static inline void kvm_sched_clock_init(bool stable)
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 525876e7b9f4..adc67f98819a 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -367,8 +367,10 @@ static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt)
va = (unsigned long)ldt_slot_va(ldt->slot) + offset;
ptep = get_locked_pte(mm, va, &ptl);
- pte_clear(mm, va, ptep);
- pte_unmap_unlock(ptep, ptl);
+ if (!WARN_ON_ONCE(!ptep)) {
+ pte_clear(mm, va, ptep);
+ pte_unmap_unlock(ptep, ptl);
+ }
}
va = (unsigned long)ldt_slot_va(ldt->slot);
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 776f4b1e395b..a0c551846b35 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -496,7 +496,7 @@ DEFINE_IDTENTRY_RAW(exc_nmi)
*/
sev_es_nmi_complete();
if (IS_ENABLED(CONFIG_NMI_CHECK_CPU))
- arch_atomic_long_inc(&nsp->idt_calls);
+ raw_atomic_long_inc(&nsp->idt_calls);
if (IS_ENABLED(CONFIG_SMP) && arch_cpu_is_offline(smp_processor_id()))
return;
diff --git a/arch/x86/kernel/platform-quirks.c b/arch/x86/kernel/platform-quirks.c
index b348a672f71d..b525fe6d6657 100644
--- a/arch/x86/kernel/platform-quirks.c
+++ b/arch/x86/kernel/platform-quirks.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/kernel.h>
#include <linux/init.h>
+#include <linux/pnp.h>
#include <asm/setup.h>
#include <asm/bios_ebda.h>
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index dac41a0072ea..ff9b80a0e3e3 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -759,15 +759,26 @@ bool xen_set_default_idle(void)
}
#endif
+struct cpumask cpus_stop_mask;
+
void __noreturn stop_this_cpu(void *dummy)
{
+ struct cpuinfo_x86 *c = this_cpu_ptr(&cpu_info);
+ unsigned int cpu = smp_processor_id();
+
local_irq_disable();
+
/*
- * Remove this CPU:
+ * Remove this CPU from the online mask and disable it
+ * unconditionally. This might be redundant in case that the reboot
+ * vector was handled late and stop_other_cpus() sent an NMI.
+ *
+ * According to SDM and APM NMIs can be accepted even after soft
+ * disabling the local APIC.
*/
- set_cpu_online(smp_processor_id(), false);
+ set_cpu_online(cpu, false);
disable_local_APIC();
- mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
+ mcheck_cpu_clear(c);
/*
* Use wbinvd on processors that support SME. This provides support
@@ -781,8 +792,17 @@ void __noreturn stop_this_cpu(void *dummy)
* Test the CPUID bit directly because the machine might've cleared
* X86_FEATURE_SME due to cmdline options.
*/
- if (cpuid_eax(0x8000001f) & BIT(0))
+ if (c->extended_cpuid_level >= 0x8000001f && (cpuid_eax(0x8000001f) & BIT(0)))
native_wbinvd();
+
+ /*
+ * This brings a cache line back and dirties it, but
+ * native_stop_other_cpus() will overwrite cpus_stop_mask after it
+ * observed that all CPUs reported stop. This write will invalidate
+ * the related cache line on this CPU.
+ */
+ cpumask_clear_cpu(cpu, &cpus_stop_mask);
+
for (;;) {
/*
* Use native_halt() so that memory contents don't change
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 56acf53a782a..b3f81379c2fc 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -101,11 +101,11 @@ u64 __pvclock_clocksource_read(struct pvclock_vcpu_time_info *src, bool dowd)
* updating at the same time, and one of them could be slightly behind,
* making the assumption that last_value always go forward fail to hold.
*/
- last = arch_atomic64_read(&last_value);
+ last = raw_atomic64_read(&last_value);
do {
if (ret <= last)
return last;
- } while (!arch_atomic64_try_cmpxchg(&last_value, &last, ret));
+ } while (!raw_atomic64_try_cmpxchg(&last_value, &last, ret));
return ret;
}
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 16babff771bd..fd975a4a5200 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -796,7 +796,6 @@ static void __init early_reserve_memory(void)
memblock_x86_reserve_range_setup_data();
- reserve_ibft_region();
reserve_bios_regions();
trim_snb_memory();
}
@@ -1032,11 +1031,14 @@ void __init setup_arch(char **cmdline_p)
if (efi_enabled(EFI_BOOT))
efi_init();
+ reserve_ibft_region();
dmi_setup();
/*
* VMware detection requires dmi to be available, so this
* needs to be done after dmi_setup(), for the boot CPU.
+ * For some guest types (Xen PV, SEV-SNP, TDX) it is required to be
+ * called before cache_bp_init() for setting up MTRR state.
*/
init_hypervisor_platform();
diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c
index 3a5b0c9c4fcc..2eabccde94fb 100644
--- a/arch/x86/kernel/sev-shared.c
+++ b/arch/x86/kernel/sev-shared.c
@@ -12,6 +12,9 @@
#ifndef __BOOT_COMPRESSED
#define error(v) pr_err(v)
#define has_cpuflag(f) boot_cpu_has(f)
+#else
+#undef WARN
+#define WARN(condition, format...) (!!(condition))
#endif
/* I/O parameters for CPUID-related helpers */
@@ -991,3 +994,103 @@ static void __init setup_cpuid_table(const struct cc_blob_sev_info *cc_info)
cpuid_ext_range_max = fn->eax;
}
}
+
+static void pvalidate_pages(struct snp_psc_desc *desc)
+{
+ struct psc_entry *e;
+ unsigned long vaddr;
+ unsigned int size;
+ unsigned int i;
+ bool validate;
+ int rc;
+
+ for (i = 0; i <= desc->hdr.end_entry; i++) {
+ e = &desc->entries[i];
+
+ vaddr = (unsigned long)pfn_to_kaddr(e->gfn);
+ size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K;
+ validate = e->operation == SNP_PAGE_STATE_PRIVATE;
+
+ rc = pvalidate(vaddr, size, validate);
+ if (rc == PVALIDATE_FAIL_SIZEMISMATCH && size == RMP_PG_SIZE_2M) {
+ unsigned long vaddr_end = vaddr + PMD_SIZE;
+
+ for (; vaddr < vaddr_end; vaddr += PAGE_SIZE) {
+ rc = pvalidate(vaddr, RMP_PG_SIZE_4K, validate);
+ if (rc)
+ break;
+ }
+ }
+
+ if (rc) {
+ WARN(1, "Failed to validate address 0x%lx ret %d", vaddr, rc);
+ sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
+ }
+ }
+}
+
+static int vmgexit_psc(struct ghcb *ghcb, struct snp_psc_desc *desc)
+{
+ int cur_entry, end_entry, ret = 0;
+ struct snp_psc_desc *data;
+ struct es_em_ctxt ctxt;
+
+ vc_ghcb_invalidate(ghcb);
+
+ /* Copy the input desc into GHCB shared buffer */
+ data = (struct snp_psc_desc *)ghcb->shared_buffer;
+ memcpy(ghcb->shared_buffer, desc, min_t(int, GHCB_SHARED_BUF_SIZE, sizeof(*desc)));
+
+ /*
+ * As per the GHCB specification, the hypervisor can resume the guest
+ * before processing all the entries. Check whether all the entries
+ * are processed. If not, then keep retrying. Note, the hypervisor
+ * will update the data memory directly to indicate the status, so
+ * reference the data->hdr everywhere.
+ *
+ * The strategy here is to wait for the hypervisor to change the page
+ * state in the RMP table before guest accesses the memory pages. If the
+ * page state change was not successful, then later memory access will
+ * result in a crash.
+ */
+ cur_entry = data->hdr.cur_entry;
+ end_entry = data->hdr.end_entry;
+
+ while (data->hdr.cur_entry <= data->hdr.end_entry) {
+ ghcb_set_sw_scratch(ghcb, (u64)__pa(data));
+
+ /* This will advance the shared buffer data points to. */
+ ret = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_PSC, 0, 0);
+
+ /*
+ * Page State Change VMGEXIT can pass error code through
+ * exit_info_2.
+ */
+ if (WARN(ret || ghcb->save.sw_exit_info_2,
+ "SNP: PSC failed ret=%d exit_info_2=%llx\n",
+ ret, ghcb->save.sw_exit_info_2)) {
+ ret = 1;
+ goto out;
+ }
+
+ /* Verify that reserved bit is not set */
+ if (WARN(data->hdr.reserved, "Reserved bit is set in the PSC header\n")) {
+ ret = 1;
+ goto out;
+ }
+
+ /*
+ * Sanity check that entry processing is not going backwards.
+ * This will happen only if hypervisor is tricking us.
+ */
+ if (WARN(data->hdr.end_entry > end_entry || cur_entry > data->hdr.cur_entry,
+"SNP: PSC processing going backward, end_entry %d (got %d) cur_entry %d (got %d)\n",
+ end_entry, data->hdr.end_entry, cur_entry, data->hdr.cur_entry)) {
+ ret = 1;
+ goto out;
+ }
+ }
+
+out:
+ return ret;
+}
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index b031244d6d2d..1ee7bed453de 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -113,13 +113,23 @@ struct ghcb_state {
};
static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data);
-DEFINE_STATIC_KEY_FALSE(sev_es_enable_key);
-
static DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa);
struct sev_config {
__u64 debug : 1,
- __reserved : 63;
+
+ /*
+ * A flag used by __set_pages_state() that indicates when the
+ * per-CPU GHCB has been created and registered and thus can be
+ * used by the BSP instead of the early boot GHCB.
+ *
+ * For APs, the per-CPU GHCB is created before they are started
+ * and registered upon startup, so this flag can be used globally
+ * for the BSP and APs.
+ */
+ ghcbs_initialized : 1,
+
+ __reserved : 62;
};
static struct sev_config sev_cfg __read_mostly;
@@ -645,32 +655,26 @@ static u64 __init get_jump_table_addr(void)
return ret;
}
-static void pvalidate_pages(unsigned long vaddr, unsigned int npages, bool validate)
-{
- unsigned long vaddr_end;
- int rc;
-
- vaddr = vaddr & PAGE_MASK;
- vaddr_end = vaddr + (npages << PAGE_SHIFT);
-
- while (vaddr < vaddr_end) {
- rc = pvalidate(vaddr, RMP_PG_SIZE_4K, validate);
- if (WARN(rc, "Failed to validate address 0x%lx ret %d", vaddr, rc))
- sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
-
- vaddr = vaddr + PAGE_SIZE;
- }
-}
-
-static void __init early_set_pages_state(unsigned long paddr, unsigned int npages, enum psc_op op)
+static void early_set_pages_state(unsigned long vaddr, unsigned long paddr,
+ unsigned long npages, enum psc_op op)
{
unsigned long paddr_end;
u64 val;
+ int ret;
+
+ vaddr = vaddr & PAGE_MASK;
paddr = paddr & PAGE_MASK;
paddr_end = paddr + (npages << PAGE_SHIFT);
while (paddr < paddr_end) {
+ if (op == SNP_PAGE_STATE_SHARED) {
+ /* Page validation must be rescinded before changing to shared */
+ ret = pvalidate(vaddr, RMP_PG_SIZE_4K, false);
+ if (WARN(ret, "Failed to validate address 0x%lx ret %d", paddr, ret))
+ goto e_term;
+ }
+
/*
* Use the MSR protocol because this function can be called before
* the GHCB is established.
@@ -691,7 +695,15 @@ static void __init early_set_pages_state(unsigned long paddr, unsigned int npage
paddr, GHCB_MSR_PSC_RESP_VAL(val)))
goto e_term;
- paddr = paddr + PAGE_SIZE;
+ if (op == SNP_PAGE_STATE_PRIVATE) {
+ /* Page validation must be performed after changing to private */
+ ret = pvalidate(vaddr, RMP_PG_SIZE_4K, true);
+ if (WARN(ret, "Failed to validate address 0x%lx ret %d", paddr, ret))
+ goto e_term;
+ }
+
+ vaddr += PAGE_SIZE;
+ paddr += PAGE_SIZE;
}
return;
@@ -701,7 +713,7 @@ e_term:
}
void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
- unsigned int npages)
+ unsigned long npages)
{
/*
* This can be invoked in early boot while running identity mapped, so
@@ -716,14 +728,11 @@ void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long padd
* Ask the hypervisor to mark the memory pages as private in the RMP
* table.
*/
- early_set_pages_state(paddr, npages, SNP_PAGE_STATE_PRIVATE);
-
- /* Validate the memory pages after they've been added in the RMP table. */
- pvalidate_pages(vaddr, npages, true);
+ early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_PRIVATE);
}
void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
- unsigned int npages)
+ unsigned long npages)
{
/*
* This can be invoked in early boot while running identity mapped, so
@@ -734,11 +743,8 @@ void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr
if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
return;
- /* Invalidate the memory pages before they are marked shared in the RMP table. */
- pvalidate_pages(vaddr, npages, false);
-
/* Ask hypervisor to mark the memory pages shared in the RMP table. */
- early_set_pages_state(paddr, npages, SNP_PAGE_STATE_SHARED);
+ early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_SHARED);
}
void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op)
@@ -756,96 +762,16 @@ void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op
WARN(1, "invalid memory op %d\n", op);
}
-static int vmgexit_psc(struct snp_psc_desc *desc)
+static unsigned long __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr,
+ unsigned long vaddr_end, int op)
{
- int cur_entry, end_entry, ret = 0;
- struct snp_psc_desc *data;
struct ghcb_state state;
- struct es_em_ctxt ctxt;
- unsigned long flags;
- struct ghcb *ghcb;
-
- /*
- * __sev_get_ghcb() needs to run with IRQs disabled because it is using
- * a per-CPU GHCB.
- */
- local_irq_save(flags);
-
- ghcb = __sev_get_ghcb(&state);
- if (!ghcb) {
- ret = 1;
- goto out_unlock;
- }
-
- /* Copy the input desc into GHCB shared buffer */
- data = (struct snp_psc_desc *)ghcb->shared_buffer;
- memcpy(ghcb->shared_buffer, desc, min_t(int, GHCB_SHARED_BUF_SIZE, sizeof(*desc)));
-
- /*
- * As per the GHCB specification, the hypervisor can resume the guest
- * before processing all the entries. Check whether all the entries
- * are processed. If not, then keep retrying. Note, the hypervisor
- * will update the data memory directly to indicate the status, so
- * reference the data->hdr everywhere.
- *
- * The strategy here is to wait for the hypervisor to change the page
- * state in the RMP table before guest accesses the memory pages. If the
- * page state change was not successful, then later memory access will
- * result in a crash.
- */
- cur_entry = data->hdr.cur_entry;
- end_entry = data->hdr.end_entry;
-
- while (data->hdr.cur_entry <= data->hdr.end_entry) {
- ghcb_set_sw_scratch(ghcb, (u64)__pa(data));
-
- /* This will advance the shared buffer data points to. */
- ret = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_PSC, 0, 0);
-
- /*
- * Page State Change VMGEXIT can pass error code through
- * exit_info_2.
- */
- if (WARN(ret || ghcb->save.sw_exit_info_2,
- "SNP: PSC failed ret=%d exit_info_2=%llx\n",
- ret, ghcb->save.sw_exit_info_2)) {
- ret = 1;
- goto out;
- }
-
- /* Verify that reserved bit is not set */
- if (WARN(data->hdr.reserved, "Reserved bit is set in the PSC header\n")) {
- ret = 1;
- goto out;
- }
-
- /*
- * Sanity check that entry processing is not going backwards.
- * This will happen only if hypervisor is tricking us.
- */
- if (WARN(data->hdr.end_entry > end_entry || cur_entry > data->hdr.cur_entry,
-"SNP: PSC processing going backward, end_entry %d (got %d) cur_entry %d (got %d)\n",
- end_entry, data->hdr.end_entry, cur_entry, data->hdr.cur_entry)) {
- ret = 1;
- goto out;
- }
- }
-
-out:
- __sev_put_ghcb(&state);
-
-out_unlock:
- local_irq_restore(flags);
-
- return ret;
-}
-
-static void __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr,
- unsigned long vaddr_end, int op)
-{
+ bool use_large_entry;
struct psc_hdr *hdr;
struct psc_entry *e;
+ unsigned long flags;
unsigned long pfn;
+ struct ghcb *ghcb;
int i;
hdr = &data->hdr;
@@ -854,74 +780,104 @@ static void __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr,
memset(data, 0, sizeof(*data));
i = 0;
- while (vaddr < vaddr_end) {
- if (is_vmalloc_addr((void *)vaddr))
+ while (vaddr < vaddr_end && i < ARRAY_SIZE(data->entries)) {
+ hdr->end_entry = i;
+
+ if (is_vmalloc_addr((void *)vaddr)) {
pfn = vmalloc_to_pfn((void *)vaddr);
- else
+ use_large_entry = false;
+ } else {
pfn = __pa(vaddr) >> PAGE_SHIFT;
+ use_large_entry = true;
+ }
e->gfn = pfn;
e->operation = op;
- hdr->end_entry = i;
- /*
- * Current SNP implementation doesn't keep track of the RMP page
- * size so use 4K for simplicity.
- */
- e->pagesize = RMP_PG_SIZE_4K;
+ if (use_large_entry && IS_ALIGNED(vaddr, PMD_SIZE) &&
+ (vaddr_end - vaddr) >= PMD_SIZE) {
+ e->pagesize = RMP_PG_SIZE_2M;
+ vaddr += PMD_SIZE;
+ } else {
+ e->pagesize = RMP_PG_SIZE_4K;
+ vaddr += PAGE_SIZE;
+ }
- vaddr = vaddr + PAGE_SIZE;
e++;
i++;
}
- if (vmgexit_psc(data))
+ /* Page validation must be rescinded before changing to shared */
+ if (op == SNP_PAGE_STATE_SHARED)
+ pvalidate_pages(data);
+
+ local_irq_save(flags);
+
+ if (sev_cfg.ghcbs_initialized)
+ ghcb = __sev_get_ghcb(&state);
+ else
+ ghcb = boot_ghcb;
+
+ /* Invoke the hypervisor to perform the page state changes */
+ if (!ghcb || vmgexit_psc(ghcb, data))
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC);
+
+ if (sev_cfg.ghcbs_initialized)
+ __sev_put_ghcb(&state);
+
+ local_irq_restore(flags);
+
+ /* Page validation must be performed after changing to private */
+ if (op == SNP_PAGE_STATE_PRIVATE)
+ pvalidate_pages(data);
+
+ return vaddr;
}
-static void set_pages_state(unsigned long vaddr, unsigned int npages, int op)
+static void set_pages_state(unsigned long vaddr, unsigned long npages, int op)
{
- unsigned long vaddr_end, next_vaddr;
- struct snp_psc_desc *desc;
+ struct snp_psc_desc desc;
+ unsigned long vaddr_end;
- desc = kmalloc(sizeof(*desc), GFP_KERNEL_ACCOUNT);
- if (!desc)
- panic("SNP: failed to allocate memory for PSC descriptor\n");
+ /* Use the MSR protocol when a GHCB is not available. */
+ if (!boot_ghcb)
+ return early_set_pages_state(vaddr, __pa(vaddr), npages, op);
vaddr = vaddr & PAGE_MASK;
vaddr_end = vaddr + (npages << PAGE_SHIFT);
- while (vaddr < vaddr_end) {
- /* Calculate the last vaddr that fits in one struct snp_psc_desc. */
- next_vaddr = min_t(unsigned long, vaddr_end,
- (VMGEXIT_PSC_MAX_ENTRY * PAGE_SIZE) + vaddr);
-
- __set_pages_state(desc, vaddr, next_vaddr, op);
-
- vaddr = next_vaddr;
- }
-
- kfree(desc);
+ while (vaddr < vaddr_end)
+ vaddr = __set_pages_state(&desc, vaddr, vaddr_end, op);
}
-void snp_set_memory_shared(unsigned long vaddr, unsigned int npages)
+void snp_set_memory_shared(unsigned long vaddr, unsigned long npages)
{
if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
return;
- pvalidate_pages(vaddr, npages, false);
-
set_pages_state(vaddr, npages, SNP_PAGE_STATE_SHARED);
}
-void snp_set_memory_private(unsigned long vaddr, unsigned int npages)
+void snp_set_memory_private(unsigned long vaddr, unsigned long npages)
{
if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
return;
set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE);
+}
+
+void snp_accept_memory(phys_addr_t start, phys_addr_t end)
+{
+ unsigned long vaddr;
+ unsigned int npages;
+
+ if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+ return;
+
+ vaddr = (unsigned long)__va(start);
+ npages = (end - start) >> PAGE_SHIFT;
- pvalidate_pages(vaddr, npages, true);
+ set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE);
}
static int snp_set_vmsa(void *va, bool vmsa)
@@ -1267,6 +1223,8 @@ void setup_ghcb(void)
if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
snp_register_per_cpu_ghcb();
+ sev_cfg.ghcbs_initialized = true;
+
return;
}
@@ -1328,7 +1286,7 @@ static void sev_es_play_dead(void)
* If we get here, the VCPU was woken up again. Jump to CPU
* startup code to get it back online.
*/
- start_cpu0();
+ soft_restart_cpu();
}
#else /* CONFIG_HOTPLUG_CPU */
#define sev_es_play_dead native_play_dead
@@ -1395,9 +1353,6 @@ void __init sev_es_init_vc_handling(void)
sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
}
- /* Enable SEV-ES special handling */
- static_branch_enable(&sev_es_enable_key);
-
/* Initialize per-cpu GHCB pages */
for_each_possible_cpu(cpu) {
alloc_runtime_data(cpu);
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 004cb30b7419..cfeec3ee877e 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -182,7 +182,7 @@ get_sigframe(struct ksignal *ksig, struct pt_regs *regs, size_t frame_size,
static unsigned long __ro_after_init max_frame_size;
static unsigned int __ro_after_init fpu_default_state_size;
-void __init init_sigframe_size(void)
+static int __init init_sigframe_size(void)
{
fpu_default_state_size = fpu__get_fpstate_size();
@@ -194,7 +194,9 @@ void __init init_sigframe_size(void)
max_frame_size = round_up(max_frame_size, FRAME_ALIGNMENT);
pr_info("max sigframe size: %lu\n", max_frame_size);
+ return 0;
}
+early_initcall(init_sigframe_size);
unsigned long get_sigframe_size(void)
{
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 375b33ecafa2..7eb18ca7bd45 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -21,12 +21,14 @@
#include <linux/interrupt.h>
#include <linux/cpu.h>
#include <linux/gfp.h>
+#include <linux/kexec.h>
#include <asm/mtrr.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
#include <asm/proto.h>
#include <asm/apic.h>
+#include <asm/cpu.h>
#include <asm/idtentry.h>
#include <asm/nmi.h>
#include <asm/mce.h>
@@ -129,7 +131,7 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs)
}
/*
- * this function calls the 'stop' function on all other CPUs in the system.
+ * Disable virtualization, APIC etc. and park the CPU in a HLT loop
*/
DEFINE_IDTENTRY_SYSVEC(sysvec_reboot)
{
@@ -146,61 +148,96 @@ static int register_stop_handler(void)
static void native_stop_other_cpus(int wait)
{
- unsigned long flags;
- unsigned long timeout;
+ unsigned int cpu = smp_processor_id();
+ unsigned long flags, timeout;
if (reboot_force)
return;
- /*
- * Use an own vector here because smp_call_function
- * does lots of things not suitable in a panic situation.
- */
+ /* Only proceed if this is the first CPU to reach this code */
+ if (atomic_cmpxchg(&stopping_cpu, -1, cpu) != -1)
+ return;
+
+ /* For kexec, ensure that offline CPUs are out of MWAIT and in HLT */
+ if (kexec_in_progress)
+ smp_kick_mwait_play_dead();
/*
- * We start by using the REBOOT_VECTOR irq.
- * The irq is treated as a sync point to allow critical
- * regions of code on other cpus to release their spin locks
- * and re-enable irqs. Jumping straight to an NMI might
- * accidentally cause deadlocks with further shutdown/panic
- * code. By syncing, we give the cpus up to one second to
- * finish their work before we force them off with the NMI.
+ * 1) Send an IPI on the reboot vector to all other CPUs.
+ *
+ * The other CPUs should react on it after leaving critical
+ * sections and re-enabling interrupts. They might still hold
+ * locks, but there is nothing which can be done about that.
+ *
+ * 2) Wait for all other CPUs to report that they reached the
+ * HLT loop in stop_this_cpu()
+ *
+ * 3) If the system uses INIT/STARTUP for CPU bringup, then
+ * send all present CPUs an INIT vector, which brings them
+ * completely out of the way.
+ *
+ * 4) If #3 is not possible and #2 timed out send an NMI to the
+ * CPUs which did not yet report
+ *
+ * 5) Wait for all other CPUs to report that they reached the
+ * HLT loop in stop_this_cpu()
+ *
+ * #4 can obviously race against a CPU reaching the HLT loop late.
+ * That CPU will have reported already and the "have all CPUs
+ * reached HLT" condition will be true despite the fact that the
+ * other CPU is still handling the NMI. Again, there is no
+ * protection against that as "disabled" APICs still respond to
+ * NMIs.
*/
- if (num_online_cpus() > 1) {
- /* did someone beat us here? */
- if (atomic_cmpxchg(&stopping_cpu, -1, safe_smp_processor_id()) != -1)
- return;
-
- /* sync above data before sending IRQ */
- wmb();
+ cpumask_copy(&cpus_stop_mask, cpu_online_mask);
+ cpumask_clear_cpu(cpu, &cpus_stop_mask);
+ if (!cpumask_empty(&cpus_stop_mask)) {
apic_send_IPI_allbutself(REBOOT_VECTOR);
/*
* Don't wait longer than a second for IPI completion. The
* wait request is not checked here because that would
- * prevent an NMI shutdown attempt in case that not all
+ * prevent an NMI/INIT shutdown in case that not all
* CPUs reach shutdown state.
*/
timeout = USEC_PER_SEC;
- while (num_online_cpus() > 1 && timeout--)
+ while (!cpumask_empty(&cpus_stop_mask) && timeout--)
udelay(1);
}
- /* if the REBOOT_VECTOR didn't work, try with the NMI */
- if (num_online_cpus() > 1) {
+ /*
+ * Park all other CPUs in INIT including "offline" CPUs, if
+ * possible. That's a safe place where they can't resume execution
+ * of HLT and then execute the HLT loop from overwritten text or
+ * page tables.
+ *
+ * The only downside is a broadcast MCE, but up to the point where
+ * the kexec() kernel brought all APs online again an MCE will just
+ * make HLT resume and handle the MCE. The machine crashes and burns
+ * due to overwritten text, page tables and data. So there is a
+ * choice between fire and frying pan. The result is pretty much
+ * the same. Chose frying pan until x86 provides a sane mechanism
+ * to park a CPU.
+ */
+ if (smp_park_other_cpus_in_init())
+ goto done;
+
+ /*
+ * If park with INIT was not possible and the REBOOT_VECTOR didn't
+ * take all secondary CPUs offline, try with the NMI.
+ */
+ if (!cpumask_empty(&cpus_stop_mask)) {
/*
* If NMI IPI is enabled, try to register the stop handler
* and send the IPI. In any case try to wait for the other
* CPUs to stop.
*/
if (!smp_no_nmi_ipi && !register_stop_handler()) {
- /* Sync above data before sending IRQ */
- wmb();
-
pr_emerg("Shutting down cpus with NMI\n");
- apic_send_IPI_allbutself(NMI_VECTOR);
+ for_each_cpu(cpu, &cpus_stop_mask)
+ apic->send_IPI(cpu, NMI_VECTOR);
}
/*
* Don't wait longer than 10 ms if the caller didn't
@@ -208,14 +245,21 @@ static void native_stop_other_cpus(int wait)
* one or more CPUs do not reach shutdown state.
*/
timeout = USEC_PER_MSEC * 10;
- while (num_online_cpus() > 1 && (wait || timeout--))
+ while (!cpumask_empty(&cpus_stop_mask) && (wait || timeout--))
udelay(1);
}
+done:
local_irq_save(flags);
disable_local_APIC();
mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
local_irq_restore(flags);
+
+ /*
+ * Ensure that the cpus_stop_mask cache lines are invalidated on
+ * the other CPUs. See comment vs. SME in stop_this_cpu().
+ */
+ cpumask_clear(&cpus_stop_mask);
}
/*
@@ -268,8 +312,7 @@ struct smp_ops smp_ops = {
#endif
.smp_send_reschedule = native_smp_send_reschedule,
- .cpu_up = native_cpu_up,
- .cpu_die = native_cpu_die,
+ .kick_ap_alive = native_kick_ap,
.cpu_disable = native_cpu_disable,
.play_dead = native_play_dead,
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 352f0ce1ece4..ed2d51960a7d 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -53,10 +53,13 @@
#include <linux/tboot.h>
#include <linux/gfp.h>
#include <linux/cpuidle.h>
+#include <linux/kexec.h>
#include <linux/numa.h>
#include <linux/pgtable.h>
#include <linux/overflow.h>
#include <linux/stackprotector.h>
+#include <linux/cpuhotplug.h>
+#include <linux/mc146818rtc.h>
#include <asm/acpi.h>
#include <asm/cacheinfo.h>
@@ -74,7 +77,7 @@
#include <asm/fpu/api.h>
#include <asm/setup.h>
#include <asm/uv/uv.h>
-#include <linux/mc146818rtc.h>
+#include <asm/microcode.h>
#include <asm/i8259.h>
#include <asm/misc.h>
#include <asm/qspinlock.h>
@@ -101,6 +104,26 @@ EXPORT_PER_CPU_SYMBOL(cpu_die_map);
DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
EXPORT_PER_CPU_SYMBOL(cpu_info);
+/* CPUs which are the primary SMT threads */
+struct cpumask __cpu_primary_thread_mask __read_mostly;
+
+/* Representing CPUs for which sibling maps can be computed */
+static cpumask_var_t cpu_sibling_setup_mask;
+
+struct mwait_cpu_dead {
+ unsigned int control;
+ unsigned int status;
+};
+
+#define CPUDEAD_MWAIT_WAIT 0xDEADBEEF
+#define CPUDEAD_MWAIT_KEXEC_HLT 0x4A17DEAD
+
+/*
+ * Cache line aligned data for mwait_play_dead(). Separate on purpose so
+ * that it's unlikely to be touched by other CPUs.
+ */
+static DEFINE_PER_CPU_ALIGNED(struct mwait_cpu_dead, mwait_cpu_dead);
+
/* Logical package management. We might want to allocate that dynamically */
unsigned int __max_logical_packages __read_mostly;
EXPORT_SYMBOL(__max_logical_packages);
@@ -121,7 +144,6 @@ int arch_update_cpu_topology(void)
return retval;
}
-
static unsigned int smpboot_warm_reset_vector_count;
static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
@@ -154,66 +176,63 @@ static inline void smpboot_restore_warm_reset_vector(void)
}
-/*
- * Report back to the Boot Processor during boot time or to the caller processor
- * during CPU online.
- */
-static void smp_callin(void)
+/* Run the next set of setup steps for the upcoming CPU */
+static void ap_starting(void)
{
- int cpuid;
+ int cpuid = smp_processor_id();
- /*
- * If waken up by an INIT in an 82489DX configuration
- * cpu_callout_mask guarantees we don't get here before
- * an INIT_deassert IPI reaches our local APIC, so it is
- * now safe to touch our local APIC.
- */
- cpuid = smp_processor_id();
+ /* Mop up eventual mwait_play_dead() wreckage */
+ this_cpu_write(mwait_cpu_dead.status, 0);
+ this_cpu_write(mwait_cpu_dead.control, 0);
/*
- * the boot CPU has finished the init stage and is spinning
- * on callin_map until we finish. We are free to set up this
- * CPU, first the APIC. (this is probably redundant on most
- * boards)
+ * If woken up by an INIT in an 82489DX configuration the alive
+ * synchronization guarantees that the CPU does not reach this
+ * point before an INIT_deassert IPI reaches the local APIC, so it
+ * is now safe to touch the local APIC.
+ *
+ * Set up this CPU, first the APIC, which is probably redundant on
+ * most boards.
*/
apic_ap_setup();
- /*
- * Save our processor parameters. Note: this information
- * is needed for clock calibration.
- */
+ /* Save the processor parameters. */
smp_store_cpu_info(cpuid);
/*
* The topology information must be up to date before
- * calibrate_delay() and notify_cpu_starting().
+ * notify_cpu_starting().
*/
- set_cpu_sibling_map(raw_smp_processor_id());
+ set_cpu_sibling_map(cpuid);
ap_init_aperfmperf();
- /*
- * Get our bogomips.
- * Update loops_per_jiffy in cpu_data. Previous call to
- * smp_store_cpu_info() stored a value that is close but not as
- * accurate as the value just calculated.
- */
- calibrate_delay();
- cpu_data(cpuid).loops_per_jiffy = loops_per_jiffy;
pr_debug("Stack at about %p\n", &cpuid);
wmb();
+ /*
+ * This runs the AP through all the cpuhp states to its target
+ * state CPUHP_ONLINE.
+ */
notify_cpu_starting(cpuid);
+}
+static void ap_calibrate_delay(void)
+{
/*
- * Allow the master to continue.
+ * Calibrate the delay loop and update loops_per_jiffy in cpu_data.
+ * smp_store_cpu_info() stored a value that is close but not as
+ * accurate as the value just calculated.
+ *
+ * As this is invoked after the TSC synchronization check,
+ * calibrate_delay_is_known() will skip the calibration routine
+ * when TSC is synchronized across sockets.
*/
- cpumask_set_cpu(cpuid, cpu_callin_mask);
+ calibrate_delay();
+ cpu_data(smp_processor_id()).loops_per_jiffy = loops_per_jiffy;
}
-static int cpu0_logical_apicid;
-static int enable_start_cpu0;
/*
* Activate a secondary processor.
*/
@@ -226,24 +245,63 @@ static void notrace start_secondary(void *unused)
*/
cr4_init();
-#ifdef CONFIG_X86_32
- /* switch away from the initial page table */
- load_cr3(swapper_pg_dir);
- __flush_tlb_all();
-#endif
- cpu_init_secondary();
+ /*
+ * 32-bit specific. 64-bit reaches this code with the correct page
+ * table established. Yet another historical divergence.
+ */
+ if (IS_ENABLED(CONFIG_X86_32)) {
+ /* switch away from the initial page table */
+ load_cr3(swapper_pg_dir);
+ __flush_tlb_all();
+ }
+
+ cpu_init_exception_handling();
+
+ /*
+ * 32-bit systems load the microcode from the ASM startup code for
+ * historical reasons.
+ *
+ * On 64-bit systems load it before reaching the AP alive
+ * synchronization point below so it is not part of the full per
+ * CPU serialized bringup part when "parallel" bringup is enabled.
+ *
+ * That's even safe when hyperthreading is enabled in the CPU as
+ * the core code starts the primary threads first and leaves the
+ * secondary threads waiting for SIPI. Loading microcode on
+ * physical cores concurrently is a safe operation.
+ *
+ * This covers both the Intel specific issue that concurrent
+ * microcode loading on SMT siblings must be prohibited and the
+ * vendor independent issue`that microcode loading which changes
+ * CPUID, MSRs etc. must be strictly serialized to maintain
+ * software state correctness.
+ */
+ if (IS_ENABLED(CONFIG_X86_64))
+ load_ucode_ap();
+
+ /*
+ * Synchronization point with the hotplug core. Sets this CPUs
+ * synchronization state to ALIVE and spin-waits for the control CPU to
+ * release this CPU for further bringup.
+ */
+ cpuhp_ap_sync_alive();
+
+ cpu_init();
+ fpu__init_cpu();
rcu_cpu_starting(raw_smp_processor_id());
x86_cpuinit.early_percpu_clock_init();
- smp_callin();
- enable_start_cpu0 = 0;
+ ap_starting();
+
+ /* Check TSC synchronization with the control CPU. */
+ check_tsc_sync_target();
- /* otherwise gcc will move up smp_processor_id before the cpu_init */
- barrier();
/*
- * Check TSC synchronization with the boot CPU:
+ * Calibrate the delay loop after the TSC synchronization check.
+ * This allows to skip the calibration when TSC is synchronized
+ * across sockets.
*/
- check_tsc_sync_target();
+ ap_calibrate_delay();
speculative_store_bypass_ht_init();
@@ -257,7 +315,6 @@ static void notrace start_secondary(void *unused)
set_cpu_online(smp_processor_id(), true);
lapic_online();
unlock_vector_lock();
- cpu_set_state_online(smp_processor_id());
x86_platform.nmi_init();
/* enable local interrupts */
@@ -270,15 +327,6 @@ static void notrace start_secondary(void *unused)
}
/**
- * topology_is_primary_thread - Check whether CPU is the primary SMT thread
- * @cpu: CPU to check
- */
-bool topology_is_primary_thread(unsigned int cpu)
-{
- return apic_id_is_primary_thread(per_cpu(x86_cpu_to_apicid, cpu));
-}
-
-/**
* topology_smt_supported - Check whether SMT is supported by the CPUs
*/
bool topology_smt_supported(void)
@@ -288,6 +336,7 @@ bool topology_smt_supported(void)
/**
* topology_phys_to_logical_pkg - Map a physical package id to a logical
+ * @phys_pkg: The physical package id to map
*
* Returns logical package id or -1 if not found
*/
@@ -304,15 +353,17 @@ int topology_phys_to_logical_pkg(unsigned int phys_pkg)
return -1;
}
EXPORT_SYMBOL(topology_phys_to_logical_pkg);
+
/**
* topology_phys_to_logical_die - Map a physical die id to logical
+ * @die_id: The physical die id to map
+ * @cur_cpu: The CPU for which the mapping is done
*
* Returns logical die id or -1 if not found
*/
-int topology_phys_to_logical_die(unsigned int die_id, unsigned int cur_cpu)
+static int topology_phys_to_logical_die(unsigned int die_id, unsigned int cur_cpu)
{
- int cpu;
- int proc_id = cpu_data(cur_cpu).phys_proc_id;
+ int cpu, proc_id = cpu_data(cur_cpu).phys_proc_id;
for_each_possible_cpu(cpu) {
struct cpuinfo_x86 *c = &cpu_data(cpu);
@@ -323,7 +374,6 @@ int topology_phys_to_logical_die(unsigned int die_id, unsigned int cur_cpu)
}
return -1;
}
-EXPORT_SYMBOL(topology_phys_to_logical_die);
/**
* topology_update_package_map - Update the physical to logical package map
@@ -398,7 +448,7 @@ void smp_store_cpu_info(int id)
c->cpu_index = id;
/*
* During boot time, CPU0 has this setup already. Save the info when
- * bringing up AP or offlined CPU0.
+ * bringing up an AP.
*/
identify_secondary_cpu(c);
c->initialized = true;
@@ -552,7 +602,7 @@ static int x86_core_flags(void)
#ifdef CONFIG_SCHED_SMT
static int x86_smt_flags(void)
{
- return cpu_smt_flags() | x86_sched_itmt_flags();
+ return cpu_smt_flags();
}
#endif
#ifdef CONFIG_SCHED_CLUSTER
@@ -563,50 +613,57 @@ static int x86_cluster_flags(void)
#endif
#endif
-static struct sched_domain_topology_level x86_numa_in_package_topology[] = {
-#ifdef CONFIG_SCHED_SMT
- { cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) },
-#endif
-#ifdef CONFIG_SCHED_CLUSTER
- { cpu_clustergroup_mask, x86_cluster_flags, SD_INIT_NAME(CLS) },
-#endif
-#ifdef CONFIG_SCHED_MC
- { cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) },
-#endif
- { NULL, },
-};
+/*
+ * Set if a package/die has multiple NUMA nodes inside.
+ * AMD Magny-Cours, Intel Cluster-on-Die, and Intel
+ * Sub-NUMA Clustering have this.
+ */
+static bool x86_has_numa_in_package;
-static struct sched_domain_topology_level x86_hybrid_topology[] = {
-#ifdef CONFIG_SCHED_SMT
- { cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) },
-#endif
-#ifdef CONFIG_SCHED_MC
- { cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) },
-#endif
- { cpu_cpu_mask, SD_INIT_NAME(DIE) },
- { NULL, },
-};
+static struct sched_domain_topology_level x86_topology[6];
+
+static void __init build_sched_topology(void)
+{
+ int i = 0;
-static struct sched_domain_topology_level x86_topology[] = {
#ifdef CONFIG_SCHED_SMT
- { cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) },
+ x86_topology[i++] = (struct sched_domain_topology_level){
+ cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT)
+ };
#endif
#ifdef CONFIG_SCHED_CLUSTER
- { cpu_clustergroup_mask, x86_cluster_flags, SD_INIT_NAME(CLS) },
+ /*
+ * For now, skip the cluster domain on Hybrid.
+ */
+ if (!cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) {
+ x86_topology[i++] = (struct sched_domain_topology_level){
+ cpu_clustergroup_mask, x86_cluster_flags, SD_INIT_NAME(CLS)
+ };
+ }
#endif
#ifdef CONFIG_SCHED_MC
- { cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) },
+ x86_topology[i++] = (struct sched_domain_topology_level){
+ cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC)
+ };
#endif
- { cpu_cpu_mask, SD_INIT_NAME(DIE) },
- { NULL, },
-};
+ /*
+ * When there is NUMA topology inside the package skip the DIE domain
+ * since the NUMA domains will auto-magically create the right spanning
+ * domains based on the SLIT.
+ */
+ if (!x86_has_numa_in_package) {
+ x86_topology[i++] = (struct sched_domain_topology_level){
+ cpu_cpu_mask, SD_INIT_NAME(DIE)
+ };
+ }
-/*
- * Set if a package/die has multiple NUMA nodes inside.
- * AMD Magny-Cours, Intel Cluster-on-Die, and Intel
- * Sub-NUMA Clustering have this.
- */
-static bool x86_has_numa_in_package;
+ /*
+ * There must be one trailing NULL entry left.
+ */
+ BUG_ON(i >= ARRAY_SIZE(x86_topology)-1);
+
+ set_sched_topology(x86_topology);
+}
void set_cpu_sibling_map(int cpu)
{
@@ -706,9 +763,9 @@ static void impress_friends(void)
* Allow the user to impress friends.
*/
pr_debug("Before bogomips\n");
- for_each_possible_cpu(cpu)
- if (cpumask_test_cpu(cpu, cpu_callout_mask))
- bogosum += cpu_data(cpu).loops_per_jiffy;
+ for_each_online_cpu(cpu)
+ bogosum += cpu_data(cpu).loops_per_jiffy;
+
pr_info("Total of %d processors activated (%lu.%02lu BogoMIPS)\n",
num_online_cpus(),
bogosum/(500000/HZ),
@@ -795,86 +852,42 @@ static void __init smp_quirk_init_udelay(void)
}
/*
- * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
- * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
- * won't ... remember to clear down the APIC, etc later.
+ * Wake up AP by INIT, INIT, STARTUP sequence.
*/
-int
-wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip)
+static void send_init_sequence(int phys_apicid)
{
- u32 dm = apic->dest_mode_logical ? APIC_DEST_LOGICAL : APIC_DEST_PHYSICAL;
- unsigned long send_status, accept_status = 0;
- int maxlvt;
+ int maxlvt = lapic_get_maxlvt();
- /* Target chip */
- /* Boot on the stack */
- /* Kick the second */
- apic_icr_write(APIC_DM_NMI | dm, apicid);
-
- pr_debug("Waiting for send to finish...\n");
- send_status = safe_apic_wait_icr_idle();
-
- /*
- * Give the other CPU some time to accept the IPI.
- */
- udelay(200);
+ /* Be paranoid about clearing APIC errors. */
if (APIC_INTEGRATED(boot_cpu_apic_version)) {
- maxlvt = lapic_get_maxlvt();
- if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
+ /* Due to the Pentium erratum 3AP. */
+ if (maxlvt > 3)
apic_write(APIC_ESR, 0);
- accept_status = (apic_read(APIC_ESR) & 0xEF);
+ apic_read(APIC_ESR);
}
- pr_debug("NMI sent\n");
- if (send_status)
- pr_err("APIC never delivered???\n");
- if (accept_status)
- pr_err("APIC delivery error (%lx)\n", accept_status);
+ /* Assert INIT on the target CPU */
+ apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT, phys_apicid);
+ safe_apic_wait_icr_idle();
- return (send_status | accept_status);
+ udelay(init_udelay);
+
+ /* Deassert INIT on the target CPU */
+ apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid);
+ safe_apic_wait_icr_idle();
}
-static int
-wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
+/*
+ * Wake up AP by INIT, INIT, STARTUP sequence.
+ */
+static int wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
{
unsigned long send_status = 0, accept_status = 0;
- int maxlvt, num_starts, j;
+ int num_starts, j, maxlvt;
+ preempt_disable();
maxlvt = lapic_get_maxlvt();
-
- /*
- * Be paranoid about clearing APIC errors.
- */
- if (APIC_INTEGRATED(boot_cpu_apic_version)) {
- if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
- apic_write(APIC_ESR, 0);
- apic_read(APIC_ESR);
- }
-
- pr_debug("Asserting INIT\n");
-
- /*
- * Turn INIT on target chip
- */
- /*
- * Send IPI
- */
- apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT,
- phys_apicid);
-
- pr_debug("Waiting for send to finish...\n");
- send_status = safe_apic_wait_icr_idle();
-
- udelay(init_udelay);
-
- pr_debug("Deasserting INIT\n");
-
- /* Target chip */
- /* Send IPI */
- apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid);
-
- pr_debug("Waiting for send to finish...\n");
- send_status = safe_apic_wait_icr_idle();
+ send_init_sequence(phys_apicid);
mb();
@@ -945,15 +958,16 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
if (accept_status)
pr_err("APIC delivery error (%lx)\n", accept_status);
+ preempt_enable();
return (send_status | accept_status);
}
/* reduce the number of lines printed when booting a large cpu count system */
static void announce_cpu(int cpu, int apicid)
{
+ static int width, node_width, first = 1;
static int current_node = NUMA_NO_NODE;
int node = early_cpu_to_node(cpu);
- static int width, node_width;
if (!width)
width = num_digits(num_possible_cpus()) + 1; /* + '#' sign */
@@ -961,10 +975,10 @@ static void announce_cpu(int cpu, int apicid)
if (!node_width)
node_width = num_digits(num_possible_nodes()) + 1; /* + '#' */
- if (cpu == 1)
- printk(KERN_INFO "x86: Booting SMP configuration:\n");
-
if (system_state < SYSTEM_RUNNING) {
+ if (first)
+ pr_info("x86: Booting SMP configuration:\n");
+
if (node != current_node) {
if (current_node > (-1))
pr_cont("\n");
@@ -975,77 +989,16 @@ static void announce_cpu(int cpu, int apicid)
}
/* Add padding for the BSP */
- if (cpu == 1)
+ if (first)
pr_cont("%*s", width + 1, " ");
+ first = 0;
pr_cont("%*s#%d", width - num_digits(cpu), " ", cpu);
-
} else
pr_info("Booting Node %d Processor %d APIC 0x%x\n",
node, cpu, apicid);
}
-static int wakeup_cpu0_nmi(unsigned int cmd, struct pt_regs *regs)
-{
- int cpu;
-
- cpu = smp_processor_id();
- if (cpu == 0 && !cpu_online(cpu) && enable_start_cpu0)
- return NMI_HANDLED;
-
- return NMI_DONE;
-}
-
-/*
- * Wake up AP by INIT, INIT, STARTUP sequence.
- *
- * Instead of waiting for STARTUP after INITs, BSP will execute the BIOS
- * boot-strap code which is not a desired behavior for waking up BSP. To
- * void the boot-strap code, wake up CPU0 by NMI instead.
- *
- * This works to wake up soft offlined CPU0 only. If CPU0 is hard offlined
- * (i.e. physically hot removed and then hot added), NMI won't wake it up.
- * We'll change this code in the future to wake up hard offlined CPU0 if
- * real platform and request are available.
- */
-static int
-wakeup_cpu_via_init_nmi(int cpu, unsigned long start_ip, int apicid,
- int *cpu0_nmi_registered)
-{
- int id;
- int boot_error;
-
- preempt_disable();
-
- /*
- * Wake up AP by INIT, INIT, STARTUP sequence.
- */
- if (cpu) {
- boot_error = wakeup_secondary_cpu_via_init(apicid, start_ip);
- goto out;
- }
-
- /*
- * Wake up BSP by nmi.
- *
- * Register a NMI handler to help wake up CPU0.
- */
- boot_error = register_nmi_handler(NMI_LOCAL,
- wakeup_cpu0_nmi, 0, "wake_cpu0");
-
- if (!boot_error) {
- enable_start_cpu0 = 1;
- *cpu0_nmi_registered = 1;
- id = apic->dest_mode_logical ? cpu0_logical_apicid : apicid;
- boot_error = wakeup_secondary_cpu_via_nmi(id, start_ip);
- }
-
-out:
- preempt_enable();
-
- return boot_error;
-}
-
int common_cpu_up(unsigned int cpu, struct task_struct *idle)
{
int ret;
@@ -1071,17 +1024,13 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle)
/*
* NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
* (ie clustered apic addressing mode), this is a LOGICAL apic ID.
- * Returns zero if CPU booted OK, else error code from
+ * Returns zero if startup was successfully sent, else error code from
* ->wakeup_secondary_cpu.
*/
-static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
- int *cpu0_nmi_registered)
+static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
{
- /* start_ip had better be page-aligned! */
unsigned long start_ip = real_mode_header->trampoline_start;
-
- unsigned long boot_error = 0;
- unsigned long timeout;
+ int ret;
#ifdef CONFIG_X86_64
/* If 64-bit wakeup method exists, use the 64-bit mode trampoline IP */
@@ -1094,7 +1043,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
if (IS_ENABLED(CONFIG_X86_32)) {
early_gdt_descr.address = (unsigned long)get_cpu_gdt_rw(cpu);
initial_stack = idle->thread.sp;
- } else {
+ } else if (!(smpboot_control & STARTUP_PARALLEL_MASK)) {
smpboot_control = cpu;
}
@@ -1108,7 +1057,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
* This grunge runs the startup process for
* the targeted processor.
*/
-
if (x86_platform.legacy.warm_reset) {
pr_debug("Setting warm reset code and vector.\n");
@@ -1123,13 +1071,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
}
}
- /*
- * AP might wait on cpu_callout_mask in cpu_init() with
- * cpu_initialized_mask set if previous attempt to online
- * it timed-out. Clear cpu_initialized_mask so that after
- * INIT/SIPI it could start with a clean state.
- */
- cpumask_clear_cpu(cpu, cpu_initialized_mask);
smp_mb();
/*
@@ -1137,66 +1078,25 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
* - Use a method from the APIC driver if one defined, with wakeup
* straight to 64-bit mode preferred over wakeup to RM.
* Otherwise,
- * - Use an INIT boot APIC message for APs or NMI for BSP.
+ * - Use an INIT boot APIC message
*/
if (apic->wakeup_secondary_cpu_64)
- boot_error = apic->wakeup_secondary_cpu_64(apicid, start_ip);
+ ret = apic->wakeup_secondary_cpu_64(apicid, start_ip);
else if (apic->wakeup_secondary_cpu)
- boot_error = apic->wakeup_secondary_cpu(apicid, start_ip);
+ ret = apic->wakeup_secondary_cpu(apicid, start_ip);
else
- boot_error = wakeup_cpu_via_init_nmi(cpu, start_ip, apicid,
- cpu0_nmi_registered);
-
- if (!boot_error) {
- /*
- * Wait 10s total for first sign of life from AP
- */
- boot_error = -1;
- timeout = jiffies + 10*HZ;
- while (time_before(jiffies, timeout)) {
- if (cpumask_test_cpu(cpu, cpu_initialized_mask)) {
- /*
- * Tell AP to proceed with initialization
- */
- cpumask_set_cpu(cpu, cpu_callout_mask);
- boot_error = 0;
- break;
- }
- schedule();
- }
- }
-
- if (!boot_error) {
- /*
- * Wait till AP completes initial initialization
- */
- while (!cpumask_test_cpu(cpu, cpu_callin_mask)) {
- /*
- * Allow other tasks to run while we wait for the
- * AP to come online. This also gives a chance
- * for the MTRR work(triggered by the AP coming online)
- * to be completed in the stop machine context.
- */
- schedule();
- }
- }
+ ret = wakeup_secondary_cpu_via_init(apicid, start_ip);
- if (x86_platform.legacy.warm_reset) {
- /*
- * Cleanup possible dangling ends...
- */
- smpboot_restore_warm_reset_vector();
- }
-
- return boot_error;
+ /* If the wakeup mechanism failed, cleanup the warm reset vector */
+ if (ret)
+ arch_cpuhp_cleanup_kick_cpu(cpu);
+ return ret;
}
-int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
+int native_kick_ap(unsigned int cpu, struct task_struct *tidle)
{
int apicid = apic->cpu_present_to_apicid(cpu);
- int cpu0_nmi_registered = 0;
- unsigned long flags;
- int err, ret = 0;
+ int err;
lockdep_assert_irqs_enabled();
@@ -1210,24 +1110,11 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
}
/*
- * Already booted CPU?
- */
- if (cpumask_test_cpu(cpu, cpu_callin_mask)) {
- pr_debug("do_boot_cpu %d Already started\n", cpu);
- return -ENOSYS;
- }
-
- /*
* Save current MTRR state in case it was changed since early boot
* (e.g. by the ACPI SMI) to initialize new CPUs with MTRRs in sync:
*/
mtrr_save_state();
- /* x86 CPUs take themselves offline, so delayed offline is OK. */
- err = cpu_check_up_prepare(cpu);
- if (err && err != -EBUSY)
- return err;
-
/* the FPU context is blank, nobody can own it */
per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL;
@@ -1235,41 +1122,44 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
if (err)
return err;
- err = do_boot_cpu(apicid, cpu, tidle, &cpu0_nmi_registered);
- if (err) {
+ err = do_boot_cpu(apicid, cpu, tidle);
+ if (err)
pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu);
- ret = -EIO;
- goto unreg_nmi;
- }
- /*
- * Check TSC synchronization with the AP (keep irqs disabled
- * while doing so):
- */
- local_irq_save(flags);
- check_tsc_sync_source(cpu);
- local_irq_restore(flags);
+ return err;
+}
- while (!cpu_online(cpu)) {
- cpu_relax();
- touch_nmi_watchdog();
- }
+int arch_cpuhp_kick_ap_alive(unsigned int cpu, struct task_struct *tidle)
+{
+ return smp_ops.kick_ap_alive(cpu, tidle);
+}
-unreg_nmi:
- /*
- * Clean up the nmi handler. Do this after the callin and callout sync
- * to avoid impact of possible long unregister time.
- */
- if (cpu0_nmi_registered)
- unregister_nmi_handler(NMI_LOCAL, "wake_cpu0");
+void arch_cpuhp_cleanup_kick_cpu(unsigned int cpu)
+{
+ /* Cleanup possible dangling ends... */
+ if (smp_ops.kick_ap_alive == native_kick_ap && x86_platform.legacy.warm_reset)
+ smpboot_restore_warm_reset_vector();
+}
- return ret;
+void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu)
+{
+ if (smp_ops.cleanup_dead_cpu)
+ smp_ops.cleanup_dead_cpu(cpu);
+
+ if (system_state == SYSTEM_RUNNING)
+ pr_info("CPU %u is now offline\n", cpu);
+}
+
+void arch_cpuhp_sync_state_poll(void)
+{
+ if (smp_ops.poll_sync_state)
+ smp_ops.poll_sync_state();
}
/**
- * arch_disable_smp_support() - disables SMP support for x86 at runtime
+ * arch_disable_smp_support() - Disables SMP support for x86 at boottime
*/
-void arch_disable_smp_support(void)
+void __init arch_disable_smp_support(void)
{
disable_ioapic_support();
}
@@ -1361,14 +1251,6 @@ static void __init smp_cpu_index_default(void)
}
}
-static void __init smp_get_logical_apicid(void)
-{
- if (x2apic_mode)
- cpu0_logical_apicid = apic_read(APIC_LDR);
- else
- cpu0_logical_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
-}
-
void __init smp_prepare_cpus_common(void)
{
unsigned int i;
@@ -1379,7 +1261,6 @@ void __init smp_prepare_cpus_common(void)
* Setup boot CPU information
*/
smp_store_boot_cpu_info(); /* Final full version of the data */
- cpumask_copy(cpu_callin_mask, cpumask_of(0));
mb();
for_each_possible_cpu(i) {
@@ -1390,18 +1271,24 @@ void __init smp_prepare_cpus_common(void)
zalloc_cpumask_var(&per_cpu(cpu_l2c_shared_map, i), GFP_KERNEL);
}
- /*
- * Set 'default' x86 topology, this matches default_topology() in that
- * it has NUMA nodes as a topology level. See also
- * native_smp_cpus_done().
- *
- * Must be done before set_cpus_sibling_map() is ran.
- */
- set_sched_topology(x86_topology);
-
set_cpu_sibling_map(0);
}
+#ifdef CONFIG_X86_64
+/* Establish whether parallel bringup can be supported. */
+bool __init arch_cpuhp_init_parallel_bringup(void)
+{
+ if (!x86_cpuinit.parallel_bringup) {
+ pr_info("Parallel CPU startup disabled by the platform\n");
+ return false;
+ }
+
+ smpboot_control = STARTUP_READ_APICID;
+ pr_debug("Parallel CPU startup enabled: 0x%08x\n", smpboot_control);
+ return true;
+}
+#endif
+
/*
* Prepare for SMP bootup.
* @max_cpus: configured maximum number of CPUs, It is a legacy parameter
@@ -1431,8 +1318,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
/* Setup local timer */
x86_init.timers.setup_percpu_clockev();
- smp_get_logical_apicid();
-
pr_info("CPU0: ");
print_cpu_info(&cpu_data(0));
@@ -1455,6 +1340,25 @@ void arch_thaw_secondary_cpus_end(void)
cache_aps_init();
}
+bool smp_park_other_cpus_in_init(void)
+{
+ unsigned int cpu, this_cpu = smp_processor_id();
+ unsigned int apicid;
+
+ if (apic->wakeup_secondary_cpu_64 || apic->wakeup_secondary_cpu)
+ return false;
+
+ for_each_present_cpu(cpu) {
+ if (cpu == this_cpu)
+ continue;
+ apicid = apic->cpu_present_to_apicid(cpu);
+ if (apicid == BAD_APICID)
+ continue;
+ send_init_sequence(apicid);
+ }
+ return true;
+}
+
/*
* Early setup to make printk work.
*/
@@ -1466,9 +1370,6 @@ void __init native_smp_prepare_boot_cpu(void)
if (!IS_ENABLED(CONFIG_SMP))
switch_gdt_and_percpu_base(me);
- /* already set me in cpu_online_mask in boot_cpu_init() */
- cpumask_set_cpu(me, cpu_callout_mask);
- cpu_set_state_online(me);
native_pv_lock_init();
}
@@ -1490,13 +1391,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
pr_debug("Boot done\n");
calculate_max_logical_packages();
-
- /* XXX for now assume numa-in-package and hybrid don't overlap */
- if (x86_has_numa_in_package)
- set_sched_topology(x86_numa_in_package_topology);
- if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU))
- set_sched_topology(x86_hybrid_topology);
-
+ build_sched_topology();
nmi_selftest();
impress_friends();
cache_aps_init();
@@ -1592,6 +1487,12 @@ __init void prefill_possible_map(void)
set_cpu_possible(i, true);
}
+/* correctly size the local cpu masks */
+void __init setup_cpu_local_masks(void)
+{
+ alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
+}
+
#ifdef CONFIG_HOTPLUG_CPU
/* Recompute SMT state for all CPUs on offline */
@@ -1650,10 +1551,6 @@ static void remove_siblinginfo(int cpu)
static void remove_cpu_from_maps(int cpu)
{
set_cpu_online(cpu, false);
- cpumask_clear_cpu(cpu, cpu_callout_mask);
- cpumask_clear_cpu(cpu, cpu_callin_mask);
- /* was set by cpu_init() */
- cpumask_clear_cpu(cpu, cpu_initialized_mask);
numa_remove_cpu(cpu);
}
@@ -1704,64 +1601,27 @@ int native_cpu_disable(void)
return 0;
}
-int common_cpu_die(unsigned int cpu)
-{
- int ret = 0;
-
- /* We don't do anything here: idle task is faking death itself. */
-
- /* They ack this in play_dead() by setting CPU_DEAD */
- if (cpu_wait_death(cpu, 5)) {
- if (system_state == SYSTEM_RUNNING)
- pr_info("CPU %u is now offline\n", cpu);
- } else {
- pr_err("CPU %u didn't die...\n", cpu);
- ret = -1;
- }
-
- return ret;
-}
-
-void native_cpu_die(unsigned int cpu)
-{
- common_cpu_die(cpu);
-}
-
void play_dead_common(void)
{
idle_task_exit();
- /* Ack it */
- (void)cpu_report_death();
-
+ cpuhp_ap_report_dead();
/*
* With physical CPU hotplug, we should halt the cpu
*/
local_irq_disable();
}
-/**
- * cond_wakeup_cpu0 - Wake up CPU0 if needed.
- *
- * If NMI wants to wake up CPU0, start CPU0.
- */
-void cond_wakeup_cpu0(void)
-{
- if (smp_processor_id() == 0 && enable_start_cpu0)
- start_cpu0();
-}
-EXPORT_SYMBOL_GPL(cond_wakeup_cpu0);
-
/*
* We need to flush the caches before going to sleep, lest we have
* dirty data in our caches when we come back up.
*/
static inline void mwait_play_dead(void)
{
+ struct mwait_cpu_dead *md = this_cpu_ptr(&mwait_cpu_dead);
unsigned int eax, ebx, ecx, edx;
unsigned int highest_cstate = 0;
unsigned int highest_subcstate = 0;
- void *mwait_ptr;
int i;
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
@@ -1796,12 +1656,9 @@ static inline void mwait_play_dead(void)
(highest_subcstate - 1);
}
- /*
- * This should be a memory location in a cache line which is
- * unlikely to be touched by other processors. The actual
- * content is immaterial as it is not actually modified in any way.
- */
- mwait_ptr = &current_thread_info()->flags;
+ /* Set up state for the kexec() hack below */
+ md->status = CPUDEAD_MWAIT_WAIT;
+ md->control = CPUDEAD_MWAIT_WAIT;
wbinvd();
@@ -1814,13 +1671,58 @@ static inline void mwait_play_dead(void)
* case where we return around the loop.
*/
mb();
- clflush(mwait_ptr);
+ clflush(md);
mb();
- __monitor(mwait_ptr, 0, 0);
+ __monitor(md, 0, 0);
mb();
__mwait(eax, 0);
- cond_wakeup_cpu0();
+ if (READ_ONCE(md->control) == CPUDEAD_MWAIT_KEXEC_HLT) {
+ /*
+ * Kexec is about to happen. Don't go back into mwait() as
+ * the kexec kernel might overwrite text and data including
+ * page tables and stack. So mwait() would resume when the
+ * monitor cache line is written to and then the CPU goes
+ * south due to overwritten text, page tables and stack.
+ *
+ * Note: This does _NOT_ protect against a stray MCE, NMI,
+ * SMI. They will resume execution at the instruction
+ * following the HLT instruction and run into the problem
+ * which this is trying to prevent.
+ */
+ WRITE_ONCE(md->status, CPUDEAD_MWAIT_KEXEC_HLT);
+ while(1)
+ native_halt();
+ }
+ }
+}
+
+/*
+ * Kick all "offline" CPUs out of mwait on kexec(). See comment in
+ * mwait_play_dead().
+ */
+void smp_kick_mwait_play_dead(void)
+{
+ u32 newstate = CPUDEAD_MWAIT_KEXEC_HLT;
+ struct mwait_cpu_dead *md;
+ unsigned int cpu, i;
+
+ for_each_cpu_andnot(cpu, cpu_present_mask, cpu_online_mask) {
+ md = per_cpu_ptr(&mwait_cpu_dead, cpu);
+
+ /* Does it sit in mwait_play_dead() ? */
+ if (READ_ONCE(md->status) != CPUDEAD_MWAIT_WAIT)
+ continue;
+
+ /* Wait up to 5ms */
+ for (i = 0; READ_ONCE(md->status) != newstate && i < 1000; i++) {
+ /* Bring it out of mwait */
+ WRITE_ONCE(md->control, newstate);
+ udelay(5);
+ }
+
+ if (READ_ONCE(md->status) != newstate)
+ pr_err_once("CPU%u is stuck in mwait_play_dead()\n", cpu);
}
}
@@ -1829,11 +1731,8 @@ void __noreturn hlt_play_dead(void)
if (__this_cpu_read(cpu_info.x86) >= 4)
wbinvd();
- while (1) {
+ while (1)
native_halt();
-
- cond_wakeup_cpu0();
- }
}
void native_play_dead(void)
@@ -1852,12 +1751,6 @@ int native_cpu_disable(void)
return -ENOSYS;
}
-void native_cpu_die(unsigned int cpu)
-{
- /* We said "no" in __cpu_disable */
- BUG();
-}
-
void native_play_dead(void)
{
BUG();
diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c
index 1b83377274b8..ca004e2e4469 100644
--- a/arch/x86/kernel/topology.c
+++ b/arch/x86/kernel/topology.c
@@ -38,102 +38,12 @@
static DEFINE_PER_CPU(struct x86_cpu, cpu_devices);
#ifdef CONFIG_HOTPLUG_CPU
-
-#ifdef CONFIG_BOOTPARAM_HOTPLUG_CPU0
-static int cpu0_hotpluggable = 1;
-#else
-static int cpu0_hotpluggable;
-static int __init enable_cpu0_hotplug(char *str)
-{
- cpu0_hotpluggable = 1;
- return 1;
-}
-
-__setup("cpu0_hotplug", enable_cpu0_hotplug);
-#endif
-
-#ifdef CONFIG_DEBUG_HOTPLUG_CPU0
-/*
- * This function offlines a CPU as early as possible and allows userspace to
- * boot up without the CPU. The CPU can be onlined back by user after boot.
- *
- * This is only called for debugging CPU offline/online feature.
- */
-int _debug_hotplug_cpu(int cpu, int action)
+int arch_register_cpu(int cpu)
{
- int ret;
-
- if (!cpu_is_hotpluggable(cpu))
- return -EINVAL;
+ struct x86_cpu *xc = per_cpu_ptr(&cpu_devices, cpu);
- switch (action) {
- case 0:
- ret = remove_cpu(cpu);
- if (!ret)
- pr_info("DEBUG_HOTPLUG_CPU0: CPU %u is now offline\n", cpu);
- else
- pr_debug("Can't offline CPU%d.\n", cpu);
- break;
- case 1:
- ret = add_cpu(cpu);
- if (ret)
- pr_debug("Can't online CPU%d.\n", cpu);
-
- break;
- default:
- ret = -EINVAL;
- }
-
- return ret;
-}
-
-static int __init debug_hotplug_cpu(void)
-{
- _debug_hotplug_cpu(0, 0);
- return 0;
-}
-
-late_initcall_sync(debug_hotplug_cpu);
-#endif /* CONFIG_DEBUG_HOTPLUG_CPU0 */
-
-int arch_register_cpu(int num)
-{
- struct cpuinfo_x86 *c = &cpu_data(num);
-
- /*
- * Currently CPU0 is only hotpluggable on Intel platforms. Other
- * vendors can add hotplug support later.
- * Xen PV guests don't support CPU0 hotplug at all.
- */
- if (c->x86_vendor != X86_VENDOR_INTEL ||
- cpu_feature_enabled(X86_FEATURE_XENPV))
- cpu0_hotpluggable = 0;
-
- /*
- * Two known BSP/CPU0 dependencies: Resume from suspend/hibernate
- * depends on BSP. PIC interrupts depend on BSP.
- *
- * If the BSP dependencies are under control, one can tell kernel to
- * enable BSP hotplug. This basically adds a control file and
- * one can attempt to offline BSP.
- */
- if (num == 0 && cpu0_hotpluggable) {
- unsigned int irq;
- /*
- * We won't take down the boot processor on i386 if some
- * interrupts only are able to be serviced by the BSP in PIC.
- */
- for_each_active_irq(irq) {
- if (!IO_APIC_IRQ(irq) && irq_has_action(irq)) {
- cpu0_hotpluggable = 0;
- break;
- }
- }
- }
- if (num || cpu0_hotpluggable)
- per_cpu(cpu_devices, num).cpu.hotpluggable = 1;
-
- return register_cpu(&per_cpu(cpu_devices, num).cpu, num);
+ xc->cpu.hotpluggable = cpu > 0;
+ return register_cpu(&xc->cpu, cpu);
}
EXPORT_SYMBOL(arch_register_cpu);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 344698852146..3425c6a943e4 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -69,12 +69,10 @@ static int __init tsc_early_khz_setup(char *buf)
}
early_param("tsc_early_khz", tsc_early_khz_setup);
-__always_inline void cyc2ns_read_begin(struct cyc2ns_data *data)
+__always_inline void __cyc2ns_read(struct cyc2ns_data *data)
{
int seq, idx;
- preempt_disable_notrace();
-
do {
seq = this_cpu_read(cyc2ns.seq.seqcount.sequence);
idx = seq & 1;
@@ -86,6 +84,12 @@ __always_inline void cyc2ns_read_begin(struct cyc2ns_data *data)
} while (unlikely(seq != this_cpu_read(cyc2ns.seq.seqcount.sequence)));
}
+__always_inline void cyc2ns_read_begin(struct cyc2ns_data *data)
+{
+ preempt_disable_notrace();
+ __cyc2ns_read(data);
+}
+
__always_inline void cyc2ns_read_end(void)
{
preempt_enable_notrace();
@@ -115,18 +119,25 @@ __always_inline void cyc2ns_read_end(void)
* -johnstul@us.ibm.com "math is hard, lets go shopping!"
*/
-static __always_inline unsigned long long cycles_2_ns(unsigned long long cyc)
+static __always_inline unsigned long long __cycles_2_ns(unsigned long long cyc)
{
struct cyc2ns_data data;
unsigned long long ns;
- cyc2ns_read_begin(&data);
+ __cyc2ns_read(&data);
ns = data.cyc2ns_offset;
ns += mul_u64_u32_shr(cyc, data.cyc2ns_mul, data.cyc2ns_shift);
- cyc2ns_read_end();
+ return ns;
+}
+static __always_inline unsigned long long cycles_2_ns(unsigned long long cyc)
+{
+ unsigned long long ns;
+ preempt_disable_notrace();
+ ns = __cycles_2_ns(cyc);
+ preempt_enable_notrace();
return ns;
}
@@ -223,7 +234,7 @@ noinstr u64 native_sched_clock(void)
u64 tsc_now = rdtsc();
/* return the value in ns */
- return cycles_2_ns(tsc_now);
+ return __cycles_2_ns(tsc_now);
}
/*
@@ -250,7 +261,7 @@ u64 native_sched_clock_from_tsc(u64 tsc)
/* We need to define a real function for sched_clock, to override the
weak default version */
#ifdef CONFIG_PARAVIRT
-noinstr u64 sched_clock(void)
+noinstr u64 sched_clock_noinstr(void)
{
return paravirt_sched_clock();
}
@@ -260,11 +271,20 @@ bool using_native_sched_clock(void)
return static_call_query(pv_sched_clock) == native_sched_clock;
}
#else
-u64 sched_clock(void) __attribute__((alias("native_sched_clock")));
+u64 sched_clock_noinstr(void) __attribute__((alias("native_sched_clock")));
bool using_native_sched_clock(void) { return true; }
#endif
+notrace u64 sched_clock(void)
+{
+ u64 now;
+ preempt_disable_notrace();
+ now = sched_clock_noinstr();
+ preempt_enable_notrace();
+ return now;
+}
+
int check_tsc_unstable(void)
{
return tsc_unstable;
@@ -1598,10 +1618,7 @@ void __init tsc_init(void)
#ifdef CONFIG_SMP
/*
- * If we have a constant TSC and are using the TSC for the delay loop,
- * we can skip clock calibration if another cpu in the same socket has already
- * been calibrated. This assumes that CONSTANT_TSC applies to all
- * cpus in the socket - this should be a safe assumption.
+ * Check whether existing calibration data can be reused.
*/
unsigned long calibrate_delay_is_known(void)
{
@@ -1609,6 +1626,21 @@ unsigned long calibrate_delay_is_known(void)
int constant_tsc = cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC);
const struct cpumask *mask = topology_core_cpumask(cpu);
+ /*
+ * If TSC has constant frequency and TSC is synchronized across
+ * sockets then reuse CPU0 calibration.
+ */
+ if (constant_tsc && !tsc_unstable)
+ return cpu_data(0).loops_per_jiffy;
+
+ /*
+ * If TSC has constant frequency and TSC is not synchronized across
+ * sockets and this is not the first CPU in the socket, then reuse
+ * the calibration value of an already online CPU on that socket.
+ *
+ * This assumes that CONSTANT_TSC is consistent for all CPUs in a
+ * socket.
+ */
if (!constant_tsc || !mask)
return 0;
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 9452dc9664b5..bbc440c93e08 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -245,7 +245,6 @@ bool tsc_store_and_check_tsc_adjust(bool bootcpu)
*/
static atomic_t start_count;
static atomic_t stop_count;
-static atomic_t skip_test;
static atomic_t test_runs;
/*
@@ -344,21 +343,14 @@ static inline unsigned int loop_timeout(int cpu)
}
/*
- * Source CPU calls into this - it waits for the freshly booted
- * target CPU to arrive and then starts the measurement:
+ * The freshly booted CPU initiates this via an async SMP function call.
*/
-void check_tsc_sync_source(int cpu)
+static void check_tsc_sync_source(void *__cpu)
{
+ unsigned int cpu = (unsigned long)__cpu;
int cpus = 2;
/*
- * No need to check if we already know that the TSC is not
- * synchronized or if we have no TSC.
- */
- if (unsynchronized_tsc())
- return;
-
- /*
* Set the maximum number of test runs to
* 1 if the CPU does not provide the TSC_ADJUST MSR
* 3 if the MSR is available, so the target can try to adjust
@@ -368,16 +360,9 @@ void check_tsc_sync_source(int cpu)
else
atomic_set(&test_runs, 3);
retry:
- /*
- * Wait for the target to start or to skip the test:
- */
- while (atomic_read(&start_count) != cpus - 1) {
- if (atomic_read(&skip_test) > 0) {
- atomic_set(&skip_test, 0);
- return;
- }
+ /* Wait for the target to start. */
+ while (atomic_read(&start_count) != cpus - 1)
cpu_relax();
- }
/*
* Trigger the target to continue into the measurement too:
@@ -397,14 +382,14 @@ retry:
if (!nr_warps) {
atomic_set(&test_runs, 0);
- pr_debug("TSC synchronization [CPU#%d -> CPU#%d]: passed\n",
+ pr_debug("TSC synchronization [CPU#%d -> CPU#%u]: passed\n",
smp_processor_id(), cpu);
} else if (atomic_dec_and_test(&test_runs) || random_warps) {
/* Force it to 0 if random warps brought us here */
atomic_set(&test_runs, 0);
- pr_warn("TSC synchronization [CPU#%d -> CPU#%d]:\n",
+ pr_warn("TSC synchronization [CPU#%d -> CPU#%u]:\n",
smp_processor_id(), cpu);
pr_warn("Measured %Ld cycles TSC warp between CPUs, "
"turning off TSC clock.\n", max_warp);
@@ -457,11 +442,12 @@ void check_tsc_sync_target(void)
* SoCs the TSC is frequency synchronized, but still the TSC ADJUST
* register might have been wreckaged by the BIOS..
*/
- if (tsc_store_and_check_tsc_adjust(false) || tsc_clocksource_reliable) {
- atomic_inc(&skip_test);
+ if (tsc_store_and_check_tsc_adjust(false) || tsc_clocksource_reliable)
return;
- }
+ /* Kick the control CPU into the TSC synchronization function */
+ smp_call_function_single(cpumask_first(cpu_online_mask), check_tsc_sync_source,
+ (unsigned long *)(unsigned long)cpu, 0);
retry:
/*
* Register this CPU's participation and wait for the
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 3ac50b7298d1..7e574cf3bf8a 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -7,14 +7,23 @@
#include <asm/unwind.h>
#include <asm/orc_types.h>
#include <asm/orc_lookup.h>
+#include <asm/orc_header.h>
+
+ORC_HEADER;
#define orc_warn(fmt, ...) \
printk_deferred_once(KERN_WARNING "WARNING: " fmt, ##__VA_ARGS__)
#define orc_warn_current(args...) \
({ \
- if (state->task == current && !state->error) \
+ static bool dumped_before; \
+ if (state->task == current && !state->error) { \
orc_warn(args); \
+ if (unwind_debug && !dumped_before) { \
+ dumped_before = true; \
+ unwind_dump(state); \
+ } \
+ } \
})
extern int __start_orc_unwind_ip[];
@@ -23,8 +32,49 @@ extern struct orc_entry __start_orc_unwind[];
extern struct orc_entry __stop_orc_unwind[];
static bool orc_init __ro_after_init;
+static bool unwind_debug __ro_after_init;
static unsigned int lookup_num_blocks __ro_after_init;
+static int __init unwind_debug_cmdline(char *str)
+{
+ unwind_debug = true;
+
+ return 0;
+}
+early_param("unwind_debug", unwind_debug_cmdline);
+
+static void unwind_dump(struct unwind_state *state)
+{
+ static bool dumped_before;
+ unsigned long word, *sp;
+ struct stack_info stack_info = {0};
+ unsigned long visit_mask = 0;
+
+ if (dumped_before)
+ return;
+
+ dumped_before = true;
+
+ printk_deferred("unwind stack type:%d next_sp:%p mask:0x%lx graph_idx:%d\n",
+ state->stack_info.type, state->stack_info.next_sp,
+ state->stack_mask, state->graph_idx);
+
+ for (sp = __builtin_frame_address(0); sp;
+ sp = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
+ if (get_stack_info(sp, state->task, &stack_info, &visit_mask))
+ break;
+
+ for (; sp < stack_info.end; sp++) {
+
+ word = READ_ONCE_NOCHECK(*sp);
+
+ printk_deferred("%0*lx: %0*lx (%pB)\n", BITS_PER_LONG/4,
+ (unsigned long)sp, BITS_PER_LONG/4,
+ word, (void *)word);
+ }
+ }
+}
+
static inline unsigned long orc_ip(const int *ip)
{
return (unsigned long)ip + *ip;
@@ -136,21 +186,6 @@ static struct orc_entry null_orc_entry = {
.type = ORC_TYPE_CALL
};
-#ifdef CONFIG_CALL_THUNKS
-static struct orc_entry *orc_callthunk_find(unsigned long ip)
-{
- if (!is_callthunk((void *)ip))
- return NULL;
-
- return &null_orc_entry;
-}
-#else
-static struct orc_entry *orc_callthunk_find(unsigned long ip)
-{
- return NULL;
-}
-#endif
-
/* Fake frame pointer entry -- used as a fallback for generated code */
static struct orc_entry orc_fp_entry = {
.type = ORC_TYPE_CALL,
@@ -203,11 +238,7 @@ static struct orc_entry *orc_find(unsigned long ip)
if (orc)
return orc;
- orc = orc_ftrace_find(ip);
- if (orc)
- return orc;
-
- return orc_callthunk_find(ip);
+ return orc_ftrace_find(ip);
}
#ifdef CONFIG_MODULES
@@ -219,7 +250,6 @@ static struct orc_entry *cur_orc_table = __start_orc_unwind;
static void orc_sort_swap(void *_a, void *_b, int size)
{
struct orc_entry *orc_a, *orc_b;
- struct orc_entry orc_tmp;
int *a = _a, *b = _b, tmp;
int delta = _b - _a;
@@ -231,9 +261,7 @@ static void orc_sort_swap(void *_a, void *_b, int size)
/* Swap the corresponding .orc_unwind entries: */
orc_a = cur_orc_table + (a - cur_orc_ip_table);
orc_b = cur_orc_table + (b - cur_orc_ip_table);
- orc_tmp = *orc_a;
- *orc_a = *orc_b;
- *orc_b = orc_tmp;
+ swap(*orc_a, *orc_b);
}
static int orc_sort_cmp(const void *_a, const void *_b)
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 25f155205770..03c885d3640f 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -508,4 +508,8 @@ INIT_PER_CPU(irq_stack_backing_store);
"fixed_percpu_data is not at start of per-cpu area");
#endif
+#ifdef CONFIG_RETHUNK
+. = ASSERT((__x86_return_thunk & 0x3f) == 0, "__x86_return_thunk not cacheline-aligned");
+#endif
+
#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index d82f4fa2f1bf..a37ebd3b4773 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -126,12 +126,13 @@ struct x86_init_ops x86_init __initdata = {
struct x86_cpuinit_ops x86_cpuinit = {
.early_percpu_clock_init = x86_init_noop,
.setup_percpu_clockev = setup_secondary_APIC_clock,
+ .parallel_bringup = true,
};
static void default_nmi_init(void) { };
-static void enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool enc) { }
-static bool enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return false; }
+static bool enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool enc) { return true; }
+static bool enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return true; }
static bool enc_tlb_flush_required_noop(bool enc) { return false; }
static bool enc_cache_flush_required_noop(void) { return false; }
static bool is_private_mmio_noop(u64 addr) {return false; }
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 04b57a336b34..7f70207e8689 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2799,14 +2799,13 @@ static u64 read_tsc(void)
static inline u64 vgettsc(struct pvclock_clock *clock, u64 *tsc_timestamp,
int *mode)
{
- long v;
u64 tsc_pg_val;
+ long v;
switch (clock->vclock_mode) {
case VDSO_CLOCKMODE_HVCLOCK:
- tsc_pg_val = hv_read_tsc_page_tsc(hv_get_tsc_page(),
- tsc_timestamp);
- if (tsc_pg_val != U64_MAX) {
+ if (hv_read_tsc_page_tsc(hv_get_tsc_page(),
+ tsc_timestamp, &tsc_pg_val)) {
/* TSC page valid */
*mode = VDSO_CLOCKMODE_HVCLOCK;
v = (tsc_pg_val - clock->cycle_last) &
@@ -13162,7 +13161,7 @@ EXPORT_SYMBOL_GPL(kvm_arch_end_assignment);
bool noinstr kvm_arch_has_assigned_device(struct kvm *kvm)
{
- return arch_atomic_read(&kvm->arch.assigned_device_count);
+ return raw_atomic_read(&kvm->arch.assigned_device_count);
}
EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device);
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 01932af64193..ea3a28e7b613 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -61,8 +61,9 @@ ifeq ($(CONFIG_X86_32),y)
lib-y += strstr_32.o
lib-y += string_32.o
lib-y += memmove_32.o
+ lib-y += cmpxchg8b_emu.o
ifneq ($(CONFIG_X86_CMPXCHG64),y)
- lib-y += cmpxchg8b_emu.o atomic64_386_32.o
+ lib-y += atomic64_386_32.o
endif
else
obj-y += iomap_copy_64.o
diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S
index 33c70c0160ea..6962df315793 100644
--- a/arch/x86/lib/cmpxchg16b_emu.S
+++ b/arch/x86/lib/cmpxchg16b_emu.S
@@ -1,47 +1,54 @@
/* SPDX-License-Identifier: GPL-2.0-only */
#include <linux/linkage.h>
#include <asm/percpu.h>
+#include <asm/processor-flags.h>
.text
/*
+ * Emulate 'cmpxchg16b %gs:(%rsi)'
+ *
* Inputs:
* %rsi : memory location to compare
* %rax : low 64 bits of old value
* %rdx : high 64 bits of old value
* %rbx : low 64 bits of new value
* %rcx : high 64 bits of new value
- * %al : Operation successful
+ *
+ * Notably this is not LOCK prefixed and is not safe against NMIs
*/
SYM_FUNC_START(this_cpu_cmpxchg16b_emu)
-#
-# Emulate 'cmpxchg16b %gs:(%rsi)' except we return the result in %al not
-# via the ZF. Caller will access %al to get result.
-#
-# Note that this is only useful for a cpuops operation. Meaning that we
-# do *not* have a fully atomic operation but just an operation that is
-# *atomic* on a single cpu (as provided by the this_cpu_xx class of
-# macros).
-#
pushfq
cli
- cmpq PER_CPU_VAR((%rsi)), %rax
- jne .Lnot_same
- cmpq PER_CPU_VAR(8(%rsi)), %rdx
- jne .Lnot_same
+ /* if (*ptr == old) */
+ cmpq PER_CPU_VAR(0(%rsi)), %rax
+ jne .Lnot_same
+ cmpq PER_CPU_VAR(8(%rsi)), %rdx
+ jne .Lnot_same
- movq %rbx, PER_CPU_VAR((%rsi))
- movq %rcx, PER_CPU_VAR(8(%rsi))
+ /* *ptr = new */
+ movq %rbx, PER_CPU_VAR(0(%rsi))
+ movq %rcx, PER_CPU_VAR(8(%rsi))
+
+ /* set ZF in EFLAGS to indicate success */
+ orl $X86_EFLAGS_ZF, (%rsp)
popfq
- mov $1, %al
RET
.Lnot_same:
+ /* *ptr != old */
+
+ /* old = *ptr */
+ movq PER_CPU_VAR(0(%rsi)), %rax
+ movq PER_CPU_VAR(8(%rsi)), %rdx
+
+ /* clear ZF in EFLAGS to indicate failure */
+ andl $(~X86_EFLAGS_ZF), (%rsp)
+
popfq
- xor %al,%al
RET
SYM_FUNC_END(this_cpu_cmpxchg16b_emu)
diff --git a/arch/x86/lib/cmpxchg8b_emu.S b/arch/x86/lib/cmpxchg8b_emu.S
index 6a912d58fecc..49805257b125 100644
--- a/arch/x86/lib/cmpxchg8b_emu.S
+++ b/arch/x86/lib/cmpxchg8b_emu.S
@@ -2,10 +2,16 @@
#include <linux/linkage.h>
#include <asm/export.h>
+#include <asm/percpu.h>
+#include <asm/processor-flags.h>
.text
+#ifndef CONFIG_X86_CMPXCHG64
+
/*
+ * Emulate 'cmpxchg8b (%esi)' on UP
+ *
* Inputs:
* %esi : memory location to compare
* %eax : low 32 bits of old value
@@ -15,32 +21,65 @@
*/
SYM_FUNC_START(cmpxchg8b_emu)
-#
-# Emulate 'cmpxchg8b (%esi)' on UP except we don't
-# set the whole ZF thing (caller will just compare
-# eax:edx with the expected value)
-#
pushfl
cli
- cmpl (%esi), %eax
- jne .Lnot_same
- cmpl 4(%esi), %edx
- jne .Lhalf_same
+ cmpl 0(%esi), %eax
+ jne .Lnot_same
+ cmpl 4(%esi), %edx
+ jne .Lnot_same
+
+ movl %ebx, 0(%esi)
+ movl %ecx, 4(%esi)
- movl %ebx, (%esi)
- movl %ecx, 4(%esi)
+ orl $X86_EFLAGS_ZF, (%esp)
popfl
RET
.Lnot_same:
- movl (%esi), %eax
-.Lhalf_same:
- movl 4(%esi), %edx
+ movl 0(%esi), %eax
+ movl 4(%esi), %edx
+
+ andl $(~X86_EFLAGS_ZF), (%esp)
popfl
RET
SYM_FUNC_END(cmpxchg8b_emu)
EXPORT_SYMBOL(cmpxchg8b_emu)
+
+#endif
+
+#ifndef CONFIG_UML
+
+SYM_FUNC_START(this_cpu_cmpxchg8b_emu)
+
+ pushfl
+ cli
+
+ cmpl PER_CPU_VAR(0(%esi)), %eax
+ jne .Lnot_same2
+ cmpl PER_CPU_VAR(4(%esi)), %edx
+ jne .Lnot_same2
+
+ movl %ebx, PER_CPU_VAR(0(%esi))
+ movl %ecx, PER_CPU_VAR(4(%esi))
+
+ orl $X86_EFLAGS_ZF, (%esp)
+
+ popfl
+ RET
+
+.Lnot_same2:
+ movl PER_CPU_VAR(0(%esi)), %eax
+ movl PER_CPU_VAR(4(%esi)), %edx
+
+ andl $(~X86_EFLAGS_ZF), (%esp)
+
+ popfl
+ RET
+
+SYM_FUNC_END(this_cpu_cmpxchg8b_emu)
+
+#endif
diff --git a/arch/x86/lib/csum-partial_64.c b/arch/x86/lib/csum-partial_64.c
index 50734a23034c..cea25ca8b8cf 100644
--- a/arch/x86/lib/csum-partial_64.c
+++ b/arch/x86/lib/csum-partial_64.c
@@ -5,22 +5,34 @@
* This file contains network checksum routines that are better done
* in an architecture-specific manner due to speed.
*/
-
+
#include <linux/compiler.h>
#include <linux/export.h>
#include <asm/checksum.h>
#include <asm/word-at-a-time.h>
-static inline unsigned short from32to16(unsigned a)
+static inline unsigned short from32to16(unsigned a)
{
- unsigned short b = a >> 16;
+ unsigned short b = a >> 16;
asm("addw %w2,%w0\n\t"
- "adcw $0,%w0\n"
+ "adcw $0,%w0\n"
: "=r" (b)
: "0" (b), "r" (a));
return b;
}
+static inline __wsum csum_tail(u64 temp64, int odd)
+{
+ unsigned int result;
+
+ result = add32_with_carry(temp64 >> 32, temp64 & 0xffffffff);
+ if (unlikely(odd)) {
+ result = from32to16(result);
+ result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
+ }
+ return (__force __wsum)result;
+}
+
/*
* Do a checksum on an arbitrary memory area.
* Returns a 32bit checksum.
@@ -35,7 +47,7 @@ static inline unsigned short from32to16(unsigned a)
__wsum csum_partial(const void *buff, int len, __wsum sum)
{
u64 temp64 = (__force u64)sum;
- unsigned odd, result;
+ unsigned odd;
odd = 1 & (unsigned long) buff;
if (unlikely(odd)) {
@@ -47,21 +59,52 @@ __wsum csum_partial(const void *buff, int len, __wsum sum)
buff++;
}
- while (unlikely(len >= 64)) {
+ /*
+ * len == 40 is the hot case due to IPv6 headers, but annotating it likely()
+ * has noticeable negative affect on codegen for all other cases with
+ * minimal performance benefit here.
+ */
+ if (len == 40) {
asm("addq 0*8(%[src]),%[res]\n\t"
"adcq 1*8(%[src]),%[res]\n\t"
"adcq 2*8(%[src]),%[res]\n\t"
"adcq 3*8(%[src]),%[res]\n\t"
"adcq 4*8(%[src]),%[res]\n\t"
- "adcq 5*8(%[src]),%[res]\n\t"
- "adcq 6*8(%[src]),%[res]\n\t"
- "adcq 7*8(%[src]),%[res]\n\t"
"adcq $0,%[res]"
- : [res] "+r" (temp64)
- : [src] "r" (buff)
- : "memory");
- buff += 64;
- len -= 64;
+ : [res] "+r"(temp64)
+ : [src] "r"(buff), "m"(*(const char(*)[40])buff));
+ return csum_tail(temp64, odd);
+ }
+ if (unlikely(len >= 64)) {
+ /*
+ * Extra accumulators for better ILP in the loop.
+ */
+ u64 tmp_accum, tmp_carries;
+
+ asm("xorl %k[tmp_accum],%k[tmp_accum]\n\t"
+ "xorl %k[tmp_carries],%k[tmp_carries]\n\t"
+ "subl $64, %[len]\n\t"
+ "1:\n\t"
+ "addq 0*8(%[src]),%[res]\n\t"
+ "adcq 1*8(%[src]),%[res]\n\t"
+ "adcq 2*8(%[src]),%[res]\n\t"
+ "adcq 3*8(%[src]),%[res]\n\t"
+ "adcl $0,%k[tmp_carries]\n\t"
+ "addq 4*8(%[src]),%[tmp_accum]\n\t"
+ "adcq 5*8(%[src]),%[tmp_accum]\n\t"
+ "adcq 6*8(%[src]),%[tmp_accum]\n\t"
+ "adcq 7*8(%[src]),%[tmp_accum]\n\t"
+ "adcl $0,%k[tmp_carries]\n\t"
+ "addq $64, %[src]\n\t"
+ "subl $64, %[len]\n\t"
+ "jge 1b\n\t"
+ "addq %[tmp_accum],%[res]\n\t"
+ "adcq %[tmp_carries],%[res]\n\t"
+ "adcq $0,%[res]"
+ : [tmp_accum] "=&r"(tmp_accum),
+ [tmp_carries] "=&r"(tmp_carries), [res] "+r"(temp64),
+ [len] "+r"(len), [src] "+r"(buff)
+ : "m"(*(const char *)buff));
}
if (len & 32) {
@@ -70,45 +113,37 @@ __wsum csum_partial(const void *buff, int len, __wsum sum)
"adcq 2*8(%[src]),%[res]\n\t"
"adcq 3*8(%[src]),%[res]\n\t"
"adcq $0,%[res]"
- : [res] "+r" (temp64)
- : [src] "r" (buff)
- : "memory");
+ : [res] "+r"(temp64)
+ : [src] "r"(buff), "m"(*(const char(*)[32])buff));
buff += 32;
}
if (len & 16) {
asm("addq 0*8(%[src]),%[res]\n\t"
"adcq 1*8(%[src]),%[res]\n\t"
"adcq $0,%[res]"
- : [res] "+r" (temp64)
- : [src] "r" (buff)
- : "memory");
+ : [res] "+r"(temp64)
+ : [src] "r"(buff), "m"(*(const char(*)[16])buff));
buff += 16;
}
if (len & 8) {
asm("addq 0*8(%[src]),%[res]\n\t"
"adcq $0,%[res]"
- : [res] "+r" (temp64)
- : [src] "r" (buff)
- : "memory");
+ : [res] "+r"(temp64)
+ : [src] "r"(buff), "m"(*(const char(*)[8])buff));
buff += 8;
}
if (len & 7) {
- unsigned int shift = (8 - (len & 7)) * 8;
+ unsigned int shift = (-len << 3) & 63;
unsigned long trail;
trail = (load_unaligned_zeropad(buff) << shift) >> shift;
asm("addq %[trail],%[res]\n\t"
"adcq $0,%[res]"
- : [res] "+r" (temp64)
- : [trail] "r" (trail));
+ : [res] "+r"(temp64)
+ : [trail] "r"(trail));
}
- result = add32_with_carry(temp64 >> 32, temp64 & 0xffffffff);
- if (unlikely(odd)) {
- result = from32to16(result);
- result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
- }
- return (__force __wsum)result;
+ return csum_tail(temp64, odd);
}
EXPORT_SYMBOL(csum_partial);
@@ -118,6 +153,6 @@ EXPORT_SYMBOL(csum_partial);
*/
__sum16 ip_compute_csum(const void *buff, int len)
{
- return csum_fold(csum_partial(buff,len,0));
+ return csum_fold(csum_partial(buff, len, 0));
}
EXPORT_SYMBOL(ip_compute_csum);
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index b64a2bd1a1ef..9c63713477bb 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -143,43 +143,43 @@ SYM_FUNC_END(__get_user_nocheck_8)
EXPORT_SYMBOL(__get_user_nocheck_8)
-SYM_CODE_START_LOCAL(.Lbad_get_user_clac)
+SYM_CODE_START_LOCAL(__get_user_handle_exception)
ASM_CLAC
.Lbad_get_user:
xor %edx,%edx
mov $(-EFAULT),%_ASM_AX
RET
-SYM_CODE_END(.Lbad_get_user_clac)
+SYM_CODE_END(__get_user_handle_exception)
#ifdef CONFIG_X86_32
-SYM_CODE_START_LOCAL(.Lbad_get_user_8_clac)
+SYM_CODE_START_LOCAL(__get_user_8_handle_exception)
ASM_CLAC
bad_get_user_8:
xor %edx,%edx
xor %ecx,%ecx
mov $(-EFAULT),%_ASM_AX
RET
-SYM_CODE_END(.Lbad_get_user_8_clac)
+SYM_CODE_END(__get_user_8_handle_exception)
#endif
/* get_user */
- _ASM_EXTABLE(1b, .Lbad_get_user_clac)
- _ASM_EXTABLE(2b, .Lbad_get_user_clac)
- _ASM_EXTABLE(3b, .Lbad_get_user_clac)
+ _ASM_EXTABLE(1b, __get_user_handle_exception)
+ _ASM_EXTABLE(2b, __get_user_handle_exception)
+ _ASM_EXTABLE(3b, __get_user_handle_exception)
#ifdef CONFIG_X86_64
- _ASM_EXTABLE(4b, .Lbad_get_user_clac)
+ _ASM_EXTABLE(4b, __get_user_handle_exception)
#else
- _ASM_EXTABLE(4b, .Lbad_get_user_8_clac)
- _ASM_EXTABLE(5b, .Lbad_get_user_8_clac)
+ _ASM_EXTABLE(4b, __get_user_8_handle_exception)
+ _ASM_EXTABLE(5b, __get_user_8_handle_exception)
#endif
/* __get_user */
- _ASM_EXTABLE(6b, .Lbad_get_user_clac)
- _ASM_EXTABLE(7b, .Lbad_get_user_clac)
- _ASM_EXTABLE(8b, .Lbad_get_user_clac)
+ _ASM_EXTABLE(6b, __get_user_handle_exception)
+ _ASM_EXTABLE(7b, __get_user_handle_exception)
+ _ASM_EXTABLE(8b, __get_user_handle_exception)
#ifdef CONFIG_X86_64
- _ASM_EXTABLE(9b, .Lbad_get_user_clac)
+ _ASM_EXTABLE(9b, __get_user_handle_exception)
#else
- _ASM_EXTABLE(9b, .Lbad_get_user_8_clac)
- _ASM_EXTABLE(10b, .Lbad_get_user_8_clac)
+ _ASM_EXTABLE(9b, __get_user_8_handle_exception)
+ _ASM_EXTABLE(10b, __get_user_8_handle_exception)
#endif
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
index 02661861e5dd..0559b206fb11 100644
--- a/arch/x86/lib/memmove_64.S
+++ b/arch/x86/lib/memmove_64.S
@@ -38,10 +38,12 @@ SYM_FUNC_START(__memmove)
cmp %rdi, %r8
jg 2f
- /* FSRM implies ERMS => no length checks, do the copy directly */
+#define CHECK_LEN cmp $0x20, %rdx; jb 1f
+#define MEMMOVE_BYTES movq %rdx, %rcx; rep movsb; RET
.Lmemmove_begin_forward:
- ALTERNATIVE "cmp $0x20, %rdx; jb 1f", "", X86_FEATURE_FSRM
- ALTERNATIVE "", "jmp .Lmemmove_erms", X86_FEATURE_ERMS
+ ALTERNATIVE_2 __stringify(CHECK_LEN), \
+ __stringify(CHECK_LEN; MEMMOVE_BYTES), X86_FEATURE_ERMS, \
+ __stringify(MEMMOVE_BYTES), X86_FEATURE_FSRM
/*
* movsq instruction have many startup latency
@@ -207,11 +209,6 @@ SYM_FUNC_START(__memmove)
movb %r11b, (%rdi)
13:
RET
-
-.Lmemmove_erms:
- movq %rdx, %rcx
- rep movsb
- RET
SYM_FUNC_END(__memmove)
EXPORT_SYMBOL(__memmove)
diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c
index b09cd2ad426c..47fd9bd6b91d 100644
--- a/arch/x86/lib/msr.c
+++ b/arch/x86/lib/msr.c
@@ -27,14 +27,14 @@ void msrs_free(struct msr *msrs)
EXPORT_SYMBOL(msrs_free);
/**
- * Read an MSR with error handling
- *
+ * msr_read - Read an MSR with error handling
* @msr: MSR to read
* @m: value to read into
*
* It returns read data only on success, otherwise it doesn't change the output
* argument @m.
*
+ * Return: %0 for success, otherwise an error code
*/
static int msr_read(u32 msr, struct msr *m)
{
@@ -49,10 +49,12 @@ static int msr_read(u32 msr, struct msr *m)
}
/**
- * Write an MSR with error handling
+ * msr_write - Write an MSR with error handling
*
* @msr: MSR to write
* @m: value to write
+ *
+ * Return: %0 for success, otherwise an error code
*/
static int msr_write(u32 msr, struct msr *m)
{
@@ -88,12 +90,14 @@ static inline int __flip_bit(u32 msr, u8 bit, bool set)
}
/**
- * Set @bit in a MSR @msr.
+ * msr_set_bit - Set @bit in a MSR @msr.
+ * @msr: MSR to write
+ * @bit: bit number to set
*
- * Retval:
- * < 0: An error was encountered.
- * = 0: Bit was already set.
- * > 0: Hardware accepted the MSR write.
+ * Return:
+ * * < 0: An error was encountered.
+ * * = 0: Bit was already set.
+ * * > 0: Hardware accepted the MSR write.
*/
int msr_set_bit(u32 msr, u8 bit)
{
@@ -101,12 +105,14 @@ int msr_set_bit(u32 msr, u8 bit)
}
/**
- * Clear @bit in a MSR @msr.
+ * msr_clear_bit - Clear @bit in a MSR @msr.
+ * @msr: MSR to write
+ * @bit: bit number to clear
*
- * Retval:
- * < 0: An error was encountered.
- * = 0: Bit was already cleared.
- * > 0: Hardware accepted the MSR write.
+ * Return:
+ * * < 0: An error was encountered.
+ * * = 0: Bit was already cleared.
+ * * > 0: Hardware accepted the MSR write.
*/
int msr_clear_bit(u32 msr, u8 bit)
{
diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S
index 3062d09a776d..1451e0c4ae22 100644
--- a/arch/x86/lib/putuser.S
+++ b/arch/x86/lib/putuser.S
@@ -131,22 +131,22 @@ SYM_FUNC_START(__put_user_nocheck_8)
SYM_FUNC_END(__put_user_nocheck_8)
EXPORT_SYMBOL(__put_user_nocheck_8)
-SYM_CODE_START_LOCAL(.Lbad_put_user_clac)
+SYM_CODE_START_LOCAL(__put_user_handle_exception)
ASM_CLAC
.Lbad_put_user:
movl $-EFAULT,%ecx
RET
-SYM_CODE_END(.Lbad_put_user_clac)
+SYM_CODE_END(__put_user_handle_exception)
- _ASM_EXTABLE(1b, .Lbad_put_user_clac)
- _ASM_EXTABLE(2b, .Lbad_put_user_clac)
- _ASM_EXTABLE(3b, .Lbad_put_user_clac)
- _ASM_EXTABLE(4b, .Lbad_put_user_clac)
- _ASM_EXTABLE(5b, .Lbad_put_user_clac)
- _ASM_EXTABLE(6b, .Lbad_put_user_clac)
- _ASM_EXTABLE(7b, .Lbad_put_user_clac)
- _ASM_EXTABLE(9b, .Lbad_put_user_clac)
+ _ASM_EXTABLE(1b, __put_user_handle_exception)
+ _ASM_EXTABLE(2b, __put_user_handle_exception)
+ _ASM_EXTABLE(3b, __put_user_handle_exception)
+ _ASM_EXTABLE(4b, __put_user_handle_exception)
+ _ASM_EXTABLE(5b, __put_user_handle_exception)
+ _ASM_EXTABLE(6b, __put_user_handle_exception)
+ _ASM_EXTABLE(7b, __put_user_handle_exception)
+ _ASM_EXTABLE(9b, __put_user_handle_exception)
#ifdef CONFIG_X86_32
- _ASM_EXTABLE(8b, .Lbad_put_user_clac)
- _ASM_EXTABLE(10b, .Lbad_put_user_clac)
+ _ASM_EXTABLE(8b, __put_user_handle_exception)
+ _ASM_EXTABLE(10b, __put_user_handle_exception)
#endif
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index b3b1e376dce8..3fd066d42ec0 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -143,7 +143,7 @@ SYM_CODE_END(__x86_indirect_jump_thunk_array)
* from re-poisioning the BTB prediction.
*/
.align 64
- .skip 63, 0xcc
+ .skip 64 - (__x86_return_thunk - zen_untrain_ret), 0xcc
SYM_START(zen_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
ANNOTATE_NOENDBR
/*
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 003d90138e20..e9251b89a9e9 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -9,6 +9,7 @@
#include <linux/export.h>
#include <linux/uaccess.h>
#include <linux/highmem.h>
+#include <linux/libnvdimm.h>
/*
* Zero Userspace
diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c
index 7fe56c594aa6..91c52ead1226 100644
--- a/arch/x86/math-emu/fpu_entry.c
+++ b/arch/x86/math-emu/fpu_entry.c
@@ -32,6 +32,7 @@
#include <asm/traps.h>
#include <asm/user.h>
#include <asm/fpu/api.h>
+#include <asm/fpu/regset.h>
#include "fpu_system.h"
#include "fpu_emu.h"
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index 2c54b76d8f84..d9efa35711ee 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -3,6 +3,7 @@
#include <linux/export.h>
#include <linux/swap.h> /* for totalram_pages */
#include <linux/memblock.h>
+#include <asm/numa.h>
void __init set_highmem_pages_init(void)
{
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index d4e2648a1dfb..b63403d7179d 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -45,7 +45,6 @@
#include <asm/olpc_ofw.h>
#include <asm/pgalloc.h>
#include <asm/sections.h>
-#include <asm/paravirt.h>
#include <asm/setup.h>
#include <asm/set_memory.h>
#include <asm/page_types.h>
@@ -74,7 +73,6 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
#ifdef CONFIG_X86_PAE
if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
pmd_table = (pmd_t *)alloc_low_page();
- paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
p4d = p4d_offset(pgd, 0);
pud = pud_offset(p4d, 0);
@@ -99,7 +97,6 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
if (!(pmd_val(*pmd) & _PAGE_PRESENT)) {
pte_t *page_table = (pte_t *)alloc_low_page();
- paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT);
set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
BUG_ON(page_table != pte_offset_kernel(pmd, 0));
}
@@ -181,12 +178,10 @@ static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd,
set_pte(newpte + i, pte[i]);
*adr = (void *)(((unsigned long)(*adr)) + PAGE_SIZE);
- paravirt_alloc_pte(&init_mm, __pa(newpte) >> PAGE_SHIFT);
set_pmd(pmd, __pmd(__pa(newpte)|_PAGE_TABLE));
BUG_ON(newpte != pte_offset_kernel(pmd, 0));
__flush_tlb_all();
- paravirt_release_pte(__pa(pte) >> PAGE_SHIFT);
pte = newpte;
}
BUG_ON(vaddr < fix_to_virt(FIX_KMAP_BEGIN - 1)
@@ -482,7 +477,6 @@ void __init native_pagetable_init(void)
pfn, pmd, __pa(pmd), pte, __pa(pte));
pte_clear(NULL, va, pte);
}
- paravirt_alloc_pmd(&init_mm, __pa(base) >> PAGE_SHIFT);
paging_init();
}
@@ -491,15 +485,8 @@ void __init native_pagetable_init(void)
* point, we've been running on some set of pagetables constructed by
* the boot process.
*
- * If we're booting on native hardware, this will be a pagetable
- * constructed in arch/x86/kernel/head_32.S. The root of the
- * pagetable will be swapper_pg_dir.
- *
- * If we're booting paravirtualized under a hypervisor, then there are
- * more options: we may already be running PAE, and the pagetable may
- * or may not be based in swapper_pg_dir. In any case,
- * paravirt_pagetable_init() will set up swapper_pg_dir
- * appropriately for the rest of the initialization to work.
+ * This will be a pagetable constructed in arch/x86/kernel/head_32.S.
+ * The root of the pagetable will be swapper_pg_dir.
*
* In general, pagetable_init() assumes that the pagetable may already
* be partially populated, and so it avoids stomping on any existing
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index 557f0fe25dff..37db264866b6 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -172,10 +172,10 @@ void __meminit init_trampoline_kaslr(void)
set_p4d(p4d_tramp,
__p4d(_KERNPG_TABLE | __pa(pud_page_tramp)));
- set_pgd(&trampoline_pgd_entry,
- __pgd(_KERNPG_TABLE | __pa(p4d_page_tramp)));
+ trampoline_pgd_entry =
+ __pgd(_KERNPG_TABLE | __pa(p4d_page_tramp));
} else {
- set_pgd(&trampoline_pgd_entry,
- __pgd(_KERNPG_TABLE | __pa(pud_page_tramp)));
+ trampoline_pgd_entry =
+ __pgd(_KERNPG_TABLE | __pa(pud_page_tramp));
}
}
diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c
index e0b51c09109f..54bbd5163e8d 100644
--- a/arch/x86/mm/mem_encrypt_amd.c
+++ b/arch/x86/mm/mem_encrypt_amd.c
@@ -319,7 +319,7 @@ static void enc_dec_hypercall(unsigned long vaddr, int npages, bool enc)
#endif
}
-static void amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc)
+static bool amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc)
{
/*
* To maintain the security guarantees of SEV-SNP guests, make sure
@@ -327,6 +327,8 @@ static void amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool
*/
if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP) && !enc)
snp_set_memory_shared(vaddr, npages);
+
+ return true;
}
/* Return true unconditionally: return value doesn't matter for the SEV side */
@@ -501,6 +503,21 @@ void __init sme_early_init(void)
x86_platform.guest.enc_status_change_finish = amd_enc_status_change_finish;
x86_platform.guest.enc_tlb_flush_required = amd_enc_tlb_flush_required;
x86_platform.guest.enc_cache_flush_required = amd_enc_cache_flush_required;
+
+ /*
+ * AMD-SEV-ES intercepts the RDMSR to read the X2APIC ID in the
+ * parallel bringup low level code. That raises #VC which cannot be
+ * handled there.
+ * It does not provide a RDMSR GHCB protocol so the early startup
+ * code cannot directly communicate with the secure firmware. The
+ * alternative solution to retrieve the APIC ID via CPUID(0xb),
+ * which is covered by the GHCB protocol, is not viable either
+ * because there is no enforcement of the CPUID(0xb) provided
+ * "initial" APIC ID to be the same as the real APIC ID.
+ * Disable parallel bootup.
+ */
+ if (sev_status & MSR_AMD64_SEV_ES_ENABLED)
+ x86_cpuinit.parallel_bringup = false;
}
void __init mem_encrypt_free_decrypted_mem(void)
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
index c6efcf559d88..d73aeb16417f 100644
--- a/arch/x86/mm/mem_encrypt_identity.c
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -188,7 +188,7 @@ static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd)
if (pmd_large(*pmd))
return;
- pte = pte_offset_map(pmd, ppd->vaddr);
+ pte = pte_offset_kernel(pmd, ppd->vaddr);
if (pte_none(*pte))
set_pte(pte, __pte(ppd->paddr | ppd->pte_flags));
}
@@ -612,7 +612,7 @@ void __init sme_enable(struct boot_params *bp)
out:
if (sme_me_mask) {
physical_mask &= ~sme_me_mask;
- cc_set_vendor(CC_VENDOR_AMD);
+ cc_vendor = CC_VENDOR_AMD;
cc_set_mask(sme_me_mask);
}
}
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 7159cf787613..df4182b6449f 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -9,6 +9,7 @@
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
#include <linux/debugfs.h>
#include <linux/pfn.h>
#include <linux/percpu.h>
@@ -231,7 +232,7 @@ within_inclusive(unsigned long addr, unsigned long start, unsigned long end)
* points to #2, but almost all physical-to-virtual translations point to #1.
*
* This is so that we can have both a directmap of all physical memory *and*
- * take full advantage of the the limited (s32) immediate addressing range (2G)
+ * take full advantage of the limited (s32) immediate addressing range (2G)
* of x86_64.
*
* See Documentation/arch/x86/x86_64/mm.rst for more detail.
@@ -2151,7 +2152,8 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc)
cpa_flush(&cpa, x86_platform.guest.enc_cache_flush_required());
/* Notify hypervisor that we are about to set/clr encryption attribute. */
- x86_platform.guest.enc_status_change_prepare(addr, numpages, enc);
+ if (!x86_platform.guest.enc_status_change_prepare(addr, numpages, enc))
+ return -EIO;
ret = __change_page_attr_set_clr(&cpa, 1);
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index e4f499eb0f29..15a8009a4480 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -702,14 +702,8 @@ void p4d_clear_huge(p4d_t *p4d)
* pud_set_huge - setup kernel PUD mapping
*
* MTRRs can override PAT memory types with 4KiB granularity. Therefore, this
- * function sets up a huge page only if any of the following conditions are met:
- *
- * - MTRRs are disabled, or
- *
- * - MTRRs are enabled and the range is completely covered by a single MTRR, or
- *
- * - MTRRs are enabled and the corresponding MTRR memory type is WB, which
- * has no effect on the requested PAT memory type.
+ * function sets up a huge page only if the complete range has the same MTRR
+ * caching mode.
*
* Callers should try to decrease page size (1GB -> 2MB -> 4K) if the bigger
* page mapping attempt fails.
@@ -718,11 +712,10 @@ void p4d_clear_huge(p4d_t *p4d)
*/
int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
{
- u8 mtrr, uniform;
+ u8 uniform;
- mtrr = mtrr_type_lookup(addr, addr + PUD_SIZE, &uniform);
- if ((mtrr != MTRR_TYPE_INVALID) && (!uniform) &&
- (mtrr != MTRR_TYPE_WRBACK))
+ mtrr_type_lookup(addr, addr + PUD_SIZE, &uniform);
+ if (!uniform)
return 0;
/* Bail out if we are we on a populated non-leaf entry: */
@@ -745,11 +738,10 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
*/
int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
{
- u8 mtrr, uniform;
+ u8 uniform;
- mtrr = mtrr_type_lookup(addr, addr + PMD_SIZE, &uniform);
- if ((mtrr != MTRR_TYPE_INVALID) && (!uniform) &&
- (mtrr != MTRR_TYPE_WRBACK)) {
+ mtrr_type_lookup(addr, addr + PMD_SIZE, &uniform);
+ if (!uniform) {
pr_warn_once("%s: Cannot satisfy [mem %#010llx-%#010llx] with a huge-page mapping due to MTRR override.\n",
__func__, addr, addr + PMD_SIZE);
return 0;
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 1056bbf55b17..438adb695daa 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -2570,7 +2570,7 @@ out_image:
}
if (bpf_jit_enable > 1)
- bpf_jit_dump(prog->len, proglen, pass + 1, image);
+ bpf_jit_dump(prog->len, proglen, pass + 1, rw_image);
if (image) {
if (!prog->is_func || extra_pass) {
diff --git a/arch/x86/pci/ce4100.c b/arch/x86/pci/ce4100.c
index 584c25b588b4..87313701f069 100644
--- a/arch/x86/pci/ce4100.c
+++ b/arch/x86/pci/ce4100.c
@@ -83,7 +83,7 @@ static void ehci_reg_read(struct sim_dev_reg *reg, u32 *value)
*value |= 0x100;
}
-void sata_revid_init(struct sim_dev_reg *reg)
+static void sata_revid_init(struct sim_dev_reg *reg)
{
reg->sim_reg.value = 0x01060100;
reg->sim_reg.mask = 0;
@@ -172,7 +172,7 @@ static inline void extract_bytes(u32 *value, int reg, int len)
*value &= mask;
}
-int bridge_read(unsigned int devfn, int reg, int len, u32 *value)
+static int bridge_read(unsigned int devfn, int reg, int len, u32 *value)
{
u32 av_bridge_base, av_bridge_limit;
int retval = 0;
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index f3f2d87cce1b..e9f99c56f3ce 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -96,6 +96,9 @@ static const unsigned long * const efi_tables[] = {
#ifdef CONFIG_EFI_COCO_SECRET
&efi.coco_secret,
#endif
+#ifdef CONFIG_UNACCEPTED_MEMORY
+ &efi.unaccepted,
+#endif
};
u64 efi_setup; /* efi setup_data physical address */
diff --git a/arch/x86/platform/olpc/olpc_dt.c b/arch/x86/platform/olpc/olpc_dt.c
index 75e3319e8bee..74ebd6882690 100644
--- a/arch/x86/platform/olpc/olpc_dt.c
+++ b/arch/x86/platform/olpc/olpc_dt.c
@@ -234,7 +234,7 @@ static int __init olpc_dt_compatible_match(phandle node, const char *compat)
return 0;
}
-void __init olpc_dt_fixup(void)
+static void __init olpc_dt_fixup(void)
{
phandle node;
u32 board_rev;
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 7a4d5e911415..63230ff8cf4f 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -351,43 +351,6 @@ static int bsp_pm_callback(struct notifier_block *nb, unsigned long action,
case PM_HIBERNATION_PREPARE:
ret = bsp_check();
break;
-#ifdef CONFIG_DEBUG_HOTPLUG_CPU0
- case PM_RESTORE_PREPARE:
- /*
- * When system resumes from hibernation, online CPU0 because
- * 1. it's required for resume and
- * 2. the CPU was online before hibernation
- */
- if (!cpu_online(0))
- _debug_hotplug_cpu(0, 1);
- break;
- case PM_POST_RESTORE:
- /*
- * When a resume really happens, this code won't be called.
- *
- * This code is called only when user space hibernation software
- * prepares for snapshot device during boot time. So we just
- * call _debug_hotplug_cpu() to restore to CPU0's state prior to
- * preparing the snapshot device.
- *
- * This works for normal boot case in our CPU0 hotplug debug
- * mode, i.e. CPU0 is offline and user mode hibernation
- * software initializes during boot time.
- *
- * If CPU0 is online and user application accesses snapshot
- * device after boot time, this will offline CPU0 and user may
- * see different CPU0 state before and after accessing
- * the snapshot device. But hopefully this is not a case when
- * user debugging CPU0 hotplug. Even if users hit this case,
- * they can easily online CPU0 back.
- *
- * To simplify this debug code, we only consider normal boot
- * case. Otherwise we need to remember CPU0's state and restore
- * to that state and resolve racy conditions etc.
- */
- _debug_hotplug_cpu(0, 0);
- break;
-#endif
default:
break;
}
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index 42abd6af1198..c2a29be35c01 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -12,7 +12,7 @@ $(obj)/string.o: $(srctree)/arch/x86/boot/compressed/string.c FORCE
$(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE
$(call if_changed_rule,cc_o_c)
-CFLAGS_sha256.o := -D__DISABLE_EXPORTS
+CFLAGS_sha256.o := -D__DISABLE_EXPORTS -D__NO_FORTIFY
# When profile-guided optimization is enabled, llvm emits two different
# overlapping text sections, which is not supported by kexec. Remove profile
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index af565816d2ba..788e5559549f 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -154,6 +154,9 @@ static void __init setup_real_mode(void)
trampoline_header->flags = 0;
+ trampoline_lock = &trampoline_header->lock;
+ *trampoline_lock = 0;
+
trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd);
/* Map the real mode stub as virtual == physical */
diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S
index e38d61d6562e..c9f76fae902e 100644
--- a/arch/x86/realmode/rm/trampoline_64.S
+++ b/arch/x86/realmode/rm/trampoline_64.S
@@ -37,6 +37,24 @@
.text
.code16
+.macro LOCK_AND_LOAD_REALMODE_ESP lock_pa=0
+ /*
+ * Make sure only one CPU fiddles with the realmode stack
+ */
+.Llock_rm\@:
+ .if \lock_pa
+ lock btsl $0, pa_tr_lock
+ .else
+ lock btsl $0, tr_lock
+ .endif
+ jnc 2f
+ pause
+ jmp .Llock_rm\@
+2:
+ # Setup stack
+ movl $rm_stack_end, %esp
+.endm
+
.balign PAGE_SIZE
SYM_CODE_START(trampoline_start)
cli # We should be safe anyway
@@ -49,8 +67,7 @@ SYM_CODE_START(trampoline_start)
mov %ax, %es
mov %ax, %ss
- # Setup stack
- movl $rm_stack_end, %esp
+ LOCK_AND_LOAD_REALMODE_ESP
call verify_cpu # Verify the cpu supports long mode
testl %eax, %eax # Check for return code
@@ -93,8 +110,7 @@ SYM_CODE_START(sev_es_trampoline_start)
mov %ax, %es
mov %ax, %ss
- # Setup stack
- movl $rm_stack_end, %esp
+ LOCK_AND_LOAD_REALMODE_ESP
jmp .Lswitch_to_protected
SYM_CODE_END(sev_es_trampoline_start)
@@ -177,7 +193,7 @@ SYM_CODE_START(pa_trampoline_compat)
* In compatibility mode. Prep ESP and DX for startup_32, then disable
* paging and complete the switch to legacy 32-bit mode.
*/
- movl $rm_stack_end, %esp
+ LOCK_AND_LOAD_REALMODE_ESP lock_pa=1
movw $__KERNEL_DS, %dx
movl $(CR0_STATE & ~X86_CR0_PG), %eax
@@ -241,6 +257,7 @@ SYM_DATA_START(trampoline_header)
SYM_DATA(tr_efer, .space 8)
SYM_DATA(tr_cr4, .space 4)
SYM_DATA(tr_flags, .space 4)
+ SYM_DATA(tr_lock, .space 4)
SYM_DATA_END(trampoline_header)
#include "trampoline_common.S"
diff --git a/arch/x86/video/fbdev.c b/arch/x86/video/fbdev.c
index 9fd24846d094..9e9143085d19 100644
--- a/arch/x86/video/fbdev.c
+++ b/arch/x86/video/fbdev.c
@@ -10,6 +10,7 @@
#include <linux/pci.h>
#include <linux/module.h>
#include <linux/vgaarb.h>
+#include <asm/fb.h>
int fb_is_primary_device(struct fb_info *info)
{
diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c
index 7d7ffb9c826a..863d0d6b3edc 100644
--- a/arch/x86/xen/efi.c
+++ b/arch/x86/xen/efi.c
@@ -16,6 +16,8 @@
#include <asm/setup.h>
#include <asm/xen/hypercall.h>
+#include "xen-ops.h"
+
static efi_char16_t vendor[100] __initdata;
static efi_system_table_t efi_systab_xen __initdata = {
diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c
index c1cd28e915a3..a6820ca940bf 100644
--- a/arch/x86/xen/enlighten_hvm.c
+++ b/arch/x86/xen/enlighten_hvm.c
@@ -161,13 +161,12 @@ static int xen_cpu_up_prepare_hvm(unsigned int cpu)
int rc = 0;
/*
- * This can happen if CPU was offlined earlier and
- * offlining timed out in common_cpu_die().
+ * If a CPU was offlined earlier and offlining timed out then the
+ * lock mechanism is still initialized. Uninit it unconditionally
+ * as it's safe to call even if already uninited. Interrupts and
+ * timer have already been handled in xen_cpu_dead_hvm().
*/
- if (cpu_report_state(cpu) == CPU_DEAD_FROZEN) {
- xen_smp_intr_free(cpu);
- xen_uninit_lock_cpu(cpu);
- }
+ xen_uninit_lock_cpu(cpu);
if (cpu_acpi_id(cpu) != U32_MAX)
per_cpu(xen_vcpu_id, cpu) = cpu_acpi_id(cpu);
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 093b78c8bbec..93b658248d01 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -68,6 +68,7 @@
#include <asm/reboot.h>
#include <asm/hypervisor.h>
#include <asm/mach_traps.h>
+#include <asm/mtrr.h>
#include <asm/mwait.h>
#include <asm/pci_x86.h>
#include <asm/cpu.h>
@@ -119,6 +120,54 @@ static int __init parse_xen_msr_safe(char *str)
}
early_param("xen_msr_safe", parse_xen_msr_safe);
+/* Get MTRR settings from Xen and put them into mtrr_state. */
+static void __init xen_set_mtrr_data(void)
+{
+#ifdef CONFIG_MTRR
+ struct xen_platform_op op = {
+ .cmd = XENPF_read_memtype,
+ .interface_version = XENPF_INTERFACE_VERSION,
+ };
+ unsigned int reg;
+ unsigned long mask;
+ uint32_t eax, width;
+ static struct mtrr_var_range var[MTRR_MAX_VAR_RANGES] __initdata;
+
+ /* Get physical address width (only 64-bit cpus supported). */
+ width = 36;
+ eax = cpuid_eax(0x80000000);
+ if ((eax >> 16) == 0x8000 && eax >= 0x80000008) {
+ eax = cpuid_eax(0x80000008);
+ width = eax & 0xff;
+ }
+
+ for (reg = 0; reg < MTRR_MAX_VAR_RANGES; reg++) {
+ op.u.read_memtype.reg = reg;
+ if (HYPERVISOR_platform_op(&op))
+ break;
+
+ /*
+ * Only called in dom0, which has all RAM PFNs mapped at
+ * RAM MFNs, and all PCI space etc. is identity mapped.
+ * This means we can treat MFN == PFN regarding MTRR settings.
+ */
+ var[reg].base_lo = op.u.read_memtype.type;
+ var[reg].base_lo |= op.u.read_memtype.mfn << PAGE_SHIFT;
+ var[reg].base_hi = op.u.read_memtype.mfn >> (32 - PAGE_SHIFT);
+ mask = ~((op.u.read_memtype.nr_mfns << PAGE_SHIFT) - 1);
+ mask &= (1UL << width) - 1;
+ if (mask)
+ mask |= MTRR_PHYSMASK_V;
+ var[reg].mask_lo = mask;
+ var[reg].mask_hi = mask >> 32;
+ }
+
+ /* Only overwrite MTRR state if any MTRR could be got from Xen. */
+ if (reg)
+ mtrr_overwrite_state(var, reg, MTRR_TYPE_UNCACHABLE);
+#endif
+}
+
static void __init xen_pv_init_platform(void)
{
/* PV guests can't operate virtio devices without grants. */
@@ -135,6 +184,11 @@ static void __init xen_pv_init_platform(void)
/* pvclock is in shared info area */
xen_init_time_ops();
+
+ if (xen_initial_domain())
+ xen_set_mtrr_data();
+ else
+ mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK);
}
static void __init xen_pv_guest_late_init(void)
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index b3b8d289b9ab..e0a975165de7 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -86,6 +86,22 @@
#include "mmu.h"
#include "debugfs.h"
+/*
+ * Prototypes for functions called via PV_CALLEE_SAVE_REGS_THUNK() in order
+ * to avoid warnings with "-Wmissing-prototypes".
+ */
+pteval_t xen_pte_val(pte_t pte);
+pgdval_t xen_pgd_val(pgd_t pgd);
+pmdval_t xen_pmd_val(pmd_t pmd);
+pudval_t xen_pud_val(pud_t pud);
+p4dval_t xen_p4d_val(p4d_t p4d);
+pte_t xen_make_pte(pteval_t pte);
+pgd_t xen_make_pgd(pgdval_t pgd);
+pmd_t xen_make_pmd(pmdval_t pmd);
+pud_t xen_make_pud(pudval_t pud);
+p4d_t xen_make_p4d(p4dval_t p4d);
+pte_t xen_make_pte_init(pteval_t pte);
+
#ifdef CONFIG_X86_VSYSCALL_EMULATION
/* l3 pud for userspace vsyscall mapping */
static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index c2be3efb2ba0..8b5cf7bb1f47 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -6,6 +6,7 @@
*/
#include <linux/init.h>
+#include <linux/iscsi_ibft.h>
#include <linux/sched.h>
#include <linux/kstrtox.h>
#include <linux/mm.h>
@@ -764,17 +765,26 @@ char * __init xen_memory_setup(void)
BUG_ON(memmap.nr_entries == 0);
xen_e820_table.nr_entries = memmap.nr_entries;
- /*
- * Xen won't allow a 1:1 mapping to be created to UNUSABLE
- * regions, so if we're using the machine memory map leave the
- * region as RAM as it is in the pseudo-physical map.
- *
- * UNUSABLE regions in domUs are not handled and will need
- * a patch in the future.
- */
- if (xen_initial_domain())
+ if (xen_initial_domain()) {
+ /*
+ * Xen won't allow a 1:1 mapping to be created to UNUSABLE
+ * regions, so if we're using the machine memory map leave the
+ * region as RAM as it is in the pseudo-physical map.
+ *
+ * UNUSABLE regions in domUs are not handled and will need
+ * a patch in the future.
+ */
xen_ignore_unusable();
+#ifdef CONFIG_ISCSI_IBFT_FIND
+ /* Reserve 0.5 MiB to 1 MiB region so iBFT can be found */
+ xen_e820_table.entries[xen_e820_table.nr_entries].addr = IBFT_START;
+ xen_e820_table.entries[xen_e820_table.nr_entries].size = IBFT_END - IBFT_START;
+ xen_e820_table.entries[xen_e820_table.nr_entries].type = E820_TYPE_RESERVED;
+ xen_e820_table.nr_entries++;
+#endif
+ }
+
/* Make sure the Xen-supplied memory map is well-ordered. */
e820__update_table(&xen_e820_table);
diff --git a/arch/x86/xen/smp.h b/arch/x86/xen/smp.h
index 22fb982ff971..c20cbb14c82b 100644
--- a/arch/x86/xen/smp.h
+++ b/arch/x86/xen/smp.h
@@ -2,6 +2,10 @@
#ifndef _XEN_SMP_H
#ifdef CONFIG_SMP
+
+void asm_cpu_bringup_and_idle(void);
+asmlinkage void cpu_bringup_and_idle(void);
+
extern void xen_send_IPI_mask(const struct cpumask *mask,
int vector);
extern void xen_send_IPI_mask_allbutself(const struct cpumask *mask,
diff --git a/arch/x86/xen/smp_hvm.c b/arch/x86/xen/smp_hvm.c
index b70afdff419c..ac95d1981cc0 100644
--- a/arch/x86/xen/smp_hvm.c
+++ b/arch/x86/xen/smp_hvm.c
@@ -55,18 +55,16 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
}
#ifdef CONFIG_HOTPLUG_CPU
-static void xen_hvm_cpu_die(unsigned int cpu)
+static void xen_hvm_cleanup_dead_cpu(unsigned int cpu)
{
- if (common_cpu_die(cpu) == 0) {
- if (xen_have_vector_callback) {
- xen_smp_intr_free(cpu);
- xen_uninit_lock_cpu(cpu);
- xen_teardown_timer(cpu);
- }
+ if (xen_have_vector_callback) {
+ xen_smp_intr_free(cpu);
+ xen_uninit_lock_cpu(cpu);
+ xen_teardown_timer(cpu);
}
}
#else
-static void xen_hvm_cpu_die(unsigned int cpu)
+static void xen_hvm_cleanup_dead_cpu(unsigned int cpu)
{
BUG();
}
@@ -77,7 +75,7 @@ void __init xen_hvm_smp_init(void)
smp_ops.smp_prepare_boot_cpu = xen_hvm_smp_prepare_boot_cpu;
smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus;
smp_ops.smp_cpus_done = xen_smp_cpus_done;
- smp_ops.cpu_die = xen_hvm_cpu_die;
+ smp_ops.cleanup_dead_cpu = xen_hvm_cleanup_dead_cpu;
if (!xen_have_vector_callback) {
#ifdef CONFIG_PARAVIRT_SPINLOCKS
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index a9cf8c8fa074..d5ae5de2daa2 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -55,13 +55,13 @@ static DEFINE_PER_CPU(struct xen_common_irq, xen_irq_work) = { .irq = -1 };
static DEFINE_PER_CPU(struct xen_common_irq, xen_pmu_irq) = { .irq = -1 };
static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id);
-void asm_cpu_bringup_and_idle(void);
static void cpu_bringup(void)
{
int cpu;
cr4_init();
+ cpuhp_ap_sync_alive();
cpu_init();
touch_softlockup_watchdog();
@@ -83,7 +83,7 @@ static void cpu_bringup(void)
set_cpu_online(cpu, true);
- cpu_set_state_online(cpu); /* Implies full memory barrier. */
+ smp_mb();
/* We can take interrupts now: we're officially "up". */
local_irq_enable();
@@ -254,15 +254,12 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
struct desc_struct *gdt;
unsigned long gdt_mfn;
- /* used to tell cpu_init() that it can proceed with initialization */
- cpumask_set_cpu(cpu, cpu_callout_mask);
if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map))
return 0;
ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
if (ctxt == NULL) {
cpumask_clear_cpu(cpu, xen_cpu_initialized_map);
- cpumask_clear_cpu(cpu, cpu_callout_mask);
return -ENOMEM;
}
@@ -316,7 +313,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
return 0;
}
-static int xen_pv_cpu_up(unsigned int cpu, struct task_struct *idle)
+static int xen_pv_kick_ap(unsigned int cpu, struct task_struct *idle)
{
int rc;
@@ -326,14 +323,6 @@ static int xen_pv_cpu_up(unsigned int cpu, struct task_struct *idle)
xen_setup_runstate_info(cpu);
- /*
- * PV VCPUs are always successfully taken down (see 'while' loop
- * in xen_cpu_die()), so -EBUSY is an error.
- */
- rc = cpu_check_up_prepare(cpu);
- if (rc)
- return rc;
-
/* make sure interrupts start blocked */
per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1;
@@ -343,15 +332,20 @@ static int xen_pv_cpu_up(unsigned int cpu, struct task_struct *idle)
xen_pmu_init(cpu);
- rc = HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL);
- BUG_ON(rc);
-
- while (cpu_report_state(cpu) != CPU_ONLINE)
- HYPERVISOR_sched_op(SCHEDOP_yield, NULL);
+ /*
+ * Why is this a BUG? If the hypercall fails then everything can be
+ * rolled back, no?
+ */
+ BUG_ON(HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL));
return 0;
}
+static void xen_pv_poll_sync_state(void)
+{
+ HYPERVISOR_sched_op(SCHEDOP_yield, NULL);
+}
+
#ifdef CONFIG_HOTPLUG_CPU
static int xen_pv_cpu_disable(void)
{
@@ -367,18 +361,18 @@ static int xen_pv_cpu_disable(void)
static void xen_pv_cpu_die(unsigned int cpu)
{
- while (HYPERVISOR_vcpu_op(VCPUOP_is_up,
- xen_vcpu_nr(cpu), NULL)) {
+ while (HYPERVISOR_vcpu_op(VCPUOP_is_up, xen_vcpu_nr(cpu), NULL)) {
__set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout(HZ/10);
}
+}
- if (common_cpu_die(cpu) == 0) {
- xen_smp_intr_free(cpu);
- xen_uninit_lock_cpu(cpu);
- xen_teardown_timer(cpu);
- xen_pmu_finish(cpu);
- }
+static void xen_pv_cleanup_dead_cpu(unsigned int cpu)
+{
+ xen_smp_intr_free(cpu);
+ xen_uninit_lock_cpu(cpu);
+ xen_teardown_timer(cpu);
+ xen_pmu_finish(cpu);
}
static void __noreturn xen_pv_play_dead(void) /* used only with HOTPLUG_CPU */
@@ -400,6 +394,11 @@ static void xen_pv_cpu_die(unsigned int cpu)
BUG();
}
+static void xen_pv_cleanup_dead_cpu(unsigned int cpu)
+{
+ BUG();
+}
+
static void __noreturn xen_pv_play_dead(void)
{
BUG();
@@ -438,8 +437,10 @@ static const struct smp_ops xen_smp_ops __initconst = {
.smp_prepare_cpus = xen_pv_smp_prepare_cpus,
.smp_cpus_done = xen_smp_cpus_done,
- .cpu_up = xen_pv_cpu_up,
+ .kick_ap_alive = xen_pv_kick_ap,
.cpu_die = xen_pv_cpu_die,
+ .cleanup_dead_cpu = xen_pv_cleanup_dead_cpu,
+ .poll_sync_state = xen_pv_poll_sync_state,
.cpu_disable = xen_pv_cpu_disable,
.play_dead = xen_pv_play_dead,
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index b74ac2562cfb..52fa5609b7f6 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -66,11 +66,10 @@ static noinstr u64 xen_sched_clock(void)
struct pvclock_vcpu_time_info *src;
u64 ret;
- preempt_disable_notrace();
src = &__this_cpu_read(xen_vcpu)->time;
ret = pvclock_clocksource_read_nowd(src);
ret -= xen_sched_clock_offset;
- preempt_enable_notrace();
+
return ret;
}
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index a10903785a33..408a2aa66c69 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -72,8 +72,6 @@ void xen_restore_time_memory_area(void);
void xen_init_time_ops(void);
void xen_hvm_init_time_ops(void);
-irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
-
bool xen_vcpu_stolen(int vcpu);
void xen_vcpu_setup(int cpu);
@@ -148,9 +146,12 @@ int xen_cpuhp_setup(int (*cpu_up_prepare_cb)(unsigned int),
void xen_pin_vcpu(int cpu);
void xen_emergency_restart(void);
+void xen_force_evtchn_callback(void);
+
#ifdef CONFIG_XEN_PV
void xen_pv_pre_suspend(void);
void xen_pv_post_suspend(int suspend_cancelled);
+void xen_start_kernel(struct start_info *si);
#else
static inline void xen_pv_pre_suspend(void) {}
static inline void xen_pv_post_suspend(int suspend_cancelled) {}
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 2d0d6440b979..2a51a466779f 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -16,7 +16,6 @@ config XTENSA
select ARCH_USE_MEMTEST
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_USE_QUEUED_SPINLOCKS
- select ARCH_WANT_FRAME_POINTERS
select ARCH_WANT_IPC_PARSE_VERSION
select BUILDTIME_TABLE_SORT
select CLONE_BACKWARDS
@@ -35,6 +34,7 @@ config XTENSA
select HAVE_ARCH_KCSAN
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
+ select HAVE_ASM_MODVERSIONS
select HAVE_CONTEXT_TRACKING_USER
select HAVE_DEBUG_KMEMLEAK
select HAVE_DMA_CONTIGUOUS
@@ -204,6 +204,18 @@ config XTENSA_UNALIGNED_USER
Say Y here to enable unaligned memory access in user space.
+config XTENSA_LOAD_STORE
+ bool "Load/store exception handler for memory only readable with l32"
+ help
+ The Xtensa architecture only allows reading memory attached to its
+ instruction bus with l32r and l32i instructions, all other
+ instructions raise an exception with the LoadStoreErrorCause code.
+ This makes it hard to use some configurations, e.g. store string
+ literals in FLASH memory attached to the instruction bus.
+
+ Say Y here to enable exception handler that allows transparent
+ byte and 2-byte access to memory attached to instruction bus.
+
config HAVE_SMP
bool "System Supports SMP (MX)"
depends on XTENSA_VARIANT_CUSTOM
diff --git a/arch/xtensa/Kconfig.debug b/arch/xtensa/Kconfig.debug
index 83cc8d12fa0e..e84172a7763c 100644
--- a/arch/xtensa/Kconfig.debug
+++ b/arch/xtensa/Kconfig.debug
@@ -38,3 +38,11 @@ config PRINT_STACK_DEPTH
help
This option allows you to set the stack depth that the kernel
prints in stack traces.
+
+config PRINT_USER_CODE_ON_UNHANDLED_EXCEPTION
+ bool "Dump user code around unhandled exception address"
+ help
+ Enable this option to display user code around PC of the unhandled
+ exception (starting at address aligned on 16 byte boundary).
+ This may simplify finding faulting code in the absence of other
+ debug facilities.
diff --git a/arch/xtensa/boot/boot-redboot/Makefile b/arch/xtensa/boot/boot-redboot/Makefile
index 1d1d46215b1c..c0eef3f6f32d 100644
--- a/arch/xtensa/boot/boot-redboot/Makefile
+++ b/arch/xtensa/boot/boot-redboot/Makefile
@@ -6,16 +6,12 @@
OBJCOPY_ARGS := -O $(if $(CONFIG_CPU_BIG_ENDIAN),elf32-xtensa-be,elf32-xtensa-le)
-LD_ARGS = -T $(srctree)/$(obj)/boot.ld
-
boot-y := bootstrap.o
targets += $(boot-y)
OBJS := $(addprefix $(obj)/,$(boot-y))
LIBS := arch/xtensa/boot/lib/lib.a arch/xtensa/lib/lib.a
-LIBGCC := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name)
-
$(obj)/zImage.o: $(obj)/../vmlinux.bin.gz $(OBJS)
$(Q)$(OBJCOPY) $(OBJCOPY_ARGS) -R .comment \
--add-section image=$< \
@@ -23,7 +19,10 @@ $(obj)/zImage.o: $(obj)/../vmlinux.bin.gz $(OBJS)
$(OBJS) $@
$(obj)/zImage.elf: $(obj)/zImage.o $(LIBS)
- $(Q)$(LD) $(LD_ARGS) -o $@ $^ -L/xtensa-elf/lib $(LIBGCC)
+ $(Q)$(LD) $(KBUILD_LDFLAGS) \
+ -T $(srctree)/$(obj)/boot.ld \
+ --build-id=none \
+ -o $@ $^
$(obj)/../zImage.redboot: $(obj)/zImage.elf
$(Q)$(OBJCOPY) -S -O binary $< $@
diff --git a/arch/xtensa/include/asm/asm-prototypes.h b/arch/xtensa/include/asm/asm-prototypes.h
new file mode 100644
index 000000000000..b0da61812b85
--- /dev/null
+++ b/arch/xtensa/include/asm/asm-prototypes.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_PROTOTYPES_H
+#define __ASM_PROTOTYPES_H
+
+#include <asm/cacheflush.h>
+#include <asm/checksum.h>
+#include <asm/ftrace.h>
+#include <asm/page.h>
+#include <asm/string.h>
+#include <asm/uaccess.h>
+
+#include <asm-generic/asm-prototypes.h>
+
+/*
+ * gcc internal math functions
+ */
+long long __ashrdi3(long long, int);
+long long __ashldi3(long long, int);
+long long __bswapdi2(long long);
+int __bswapsi2(int);
+long long __lshrdi3(long long, int);
+int __divsi3(int, int);
+int __modsi3(int, int);
+int __mulsi3(int, int);
+unsigned int __udivsi3(unsigned int, unsigned int);
+unsigned int __umodsi3(unsigned int, unsigned int);
+unsigned long long __umulsidi3(unsigned int, unsigned int);
+
+#endif /* __ASM_PROTOTYPES_H */
diff --git a/arch/xtensa/include/asm/asmmacro.h b/arch/xtensa/include/asm/asmmacro.h
index e3474ca411ff..01bf7d9dbb19 100644
--- a/arch/xtensa/include/asm/asmmacro.h
+++ b/arch/xtensa/include/asm/asmmacro.h
@@ -11,6 +11,7 @@
#ifndef _XTENSA_ASMMACRO_H
#define _XTENSA_ASMMACRO_H
+#include <asm-generic/export.h>
#include <asm/core.h>
/*
diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h
index 52da614f953c..7308b7f777d7 100644
--- a/arch/xtensa/include/asm/atomic.h
+++ b/arch/xtensa/include/asm/atomic.h
@@ -245,6 +245,11 @@ static inline int arch_atomic_fetch_##op(int i, atomic_t * v) \
ATOMIC_OPS(add)
ATOMIC_OPS(sub)
+#define arch_atomic_add_return arch_atomic_add_return
+#define arch_atomic_sub_return arch_atomic_sub_return
+#define arch_atomic_fetch_add arch_atomic_fetch_add
+#define arch_atomic_fetch_sub arch_atomic_fetch_sub
+
#undef ATOMIC_OPS
#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
@@ -252,12 +257,13 @@ ATOMIC_OPS(and)
ATOMIC_OPS(or)
ATOMIC_OPS(xor)
+#define arch_atomic_fetch_and arch_atomic_fetch_and
+#define arch_atomic_fetch_or arch_atomic_fetch_or
+#define arch_atomic_fetch_xor arch_atomic_fetch_xor
+
#undef ATOMIC_OPS
#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
-#define arch_atomic_cmpxchg(v, o, n) ((int)arch_cmpxchg(&((v)->counter), (o), (n)))
-#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new))
-
#endif /* _XTENSA_ATOMIC_H */
diff --git a/arch/xtensa/include/asm/bugs.h b/arch/xtensa/include/asm/bugs.h
deleted file mode 100644
index 69b29d198249..000000000000
--- a/arch/xtensa/include/asm/bugs.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * include/asm-xtensa/bugs.h
- *
- * This is included by init/main.c to check for architecture-dependent bugs.
- *
- * Xtensa processors don't have any bugs. :)
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License. See the file "COPYING" in the main directory of
- * this archive for more details.
- */
-
-#ifndef _XTENSA_BUGS_H
-#define _XTENSA_BUGS_H
-
-static void check_bugs(void) { }
-
-#endif /* _XTENSA_BUGS_H */
diff --git a/arch/xtensa/include/asm/core.h b/arch/xtensa/include/asm/core.h
index f856d2bcb9f3..0e1bb6f019d6 100644
--- a/arch/xtensa/include/asm/core.h
+++ b/arch/xtensa/include/asm/core.h
@@ -26,6 +26,14 @@
#define XCHAL_SPANNING_WAY 0
#endif
+#ifndef XCHAL_HAVE_TRAX
+#define XCHAL_HAVE_TRAX 0
+#endif
+
+#ifndef XCHAL_NUM_PERF_COUNTERS
+#define XCHAL_NUM_PERF_COUNTERS 0
+#endif
+
#if XCHAL_HAVE_WINDOWED
#if defined(CONFIG_USER_ABI_DEFAULT) || defined(CONFIG_USER_ABI_CALL0_PROBE)
/* Whether windowed ABI is supported in userspace. */
diff --git a/arch/xtensa/include/asm/ftrace.h b/arch/xtensa/include/asm/ftrace.h
index 6c6d9a9f185f..0ea4f84cd558 100644
--- a/arch/xtensa/include/asm/ftrace.h
+++ b/arch/xtensa/include/asm/ftrace.h
@@ -13,17 +13,8 @@
#include <asm/processor.h>
#ifndef __ASSEMBLY__
-#define ftrace_return_address0 ({ unsigned long a0, a1; \
- __asm__ __volatile__ ( \
- "mov %0, a0\n" \
- "mov %1, a1\n" \
- : "=r"(a0), "=r"(a1)); \
- MAKE_PC_FROM_RA(a0, a1); })
-
-#ifdef CONFIG_FRAME_POINTER
extern unsigned long return_address(unsigned level);
#define ftrace_return_address(n) return_address(n)
-#endif
#endif /* __ASSEMBLY__ */
#ifdef CONFIG_FUNCTION_TRACER
diff --git a/arch/xtensa/include/asm/platform.h b/arch/xtensa/include/asm/platform.h
index 354ca942de40..94f13fabf7cd 100644
--- a/arch/xtensa/include/asm/platform.h
+++ b/arch/xtensa/include/asm/platform.h
@@ -28,31 +28,11 @@ extern void platform_init(bp_tag_t*);
extern void platform_setup (char **);
/*
- * platform_restart is called to restart the system.
- */
-extern void platform_restart (void);
-
-/*
- * platform_halt is called to stop the system and halt.
- */
-extern void platform_halt (void);
-
-/*
- * platform_power_off is called to stop the system and power it off.
- */
-extern void platform_power_off (void);
-
-/*
* platform_idle is called from the idle function.
*/
extern void platform_idle (void);
/*
- * platform_heartbeat is called every HZ
- */
-extern void platform_heartbeat (void);
-
-/*
* platform_calibrate_ccount calibrates cpu clock freq (CONFIG_XTENSA_CALIBRATE)
*/
extern void platform_calibrate_ccount (void);
diff --git a/arch/xtensa/include/asm/string.h b/arch/xtensa/include/asm/string.h
index 89b51a0c752f..ffce43513fa2 100644
--- a/arch/xtensa/include/asm/string.h
+++ b/arch/xtensa/include/asm/string.h
@@ -118,9 +118,6 @@ extern void *__memcpy(void *__to, __const__ void *__from, size_t __n);
extern void *memmove(void *__dest, __const__ void *__src, size_t __n);
extern void *__memmove(void *__dest, __const__ void *__src, size_t __n);
-/* Don't build bcopy at all ... */
-#define __HAVE_ARCH_BCOPY
-
#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
/*
diff --git a/arch/xtensa/include/asm/traps.h b/arch/xtensa/include/asm/traps.h
index 6f74ccc0c7ea..212c3b9ff407 100644
--- a/arch/xtensa/include/asm/traps.h
+++ b/arch/xtensa/include/asm/traps.h
@@ -47,6 +47,7 @@ __init trap_set_handler(int cause, xtensa_exception_handler *handler);
asmlinkage void fast_illegal_instruction_user(void);
asmlinkage void fast_syscall_user(void);
asmlinkage void fast_alloca(void);
+asmlinkage void fast_load_store(void);
asmlinkage void fast_unaligned(void);
asmlinkage void fast_second_level_miss(void);
asmlinkage void fast_store_prohibited(void);
@@ -64,8 +65,14 @@ void do_unhandled(struct pt_regs *regs);
static inline void __init early_trap_init(void)
{
static struct exc_table init_exc_table __initdata = {
+#ifdef CONFIG_XTENSA_LOAD_STORE
+ .fast_kernel_handler[EXCCAUSE_LOAD_STORE_ERROR] =
+ fast_load_store,
+#endif
+#ifdef CONFIG_MMU
.fast_kernel_handler[EXCCAUSE_DTLB_MISS] =
fast_second_level_miss,
+#endif
};
xtensa_set_sr(&init_exc_table, excsave1);
}
diff --git a/arch/xtensa/kernel/align.S b/arch/xtensa/kernel/align.S
index d062c732ef18..20d6b4961001 100644
--- a/arch/xtensa/kernel/align.S
+++ b/arch/xtensa/kernel/align.S
@@ -22,7 +22,17 @@
#include <asm/asmmacro.h>
#include <asm/processor.h>
-#if XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION
+#if XCHAL_UNALIGNED_LOAD_EXCEPTION || defined CONFIG_XTENSA_LOAD_STORE
+#define LOAD_EXCEPTION_HANDLER
+#endif
+
+#if XCHAL_UNALIGNED_STORE_EXCEPTION || defined LOAD_EXCEPTION_HANDLER
+#define ANY_EXCEPTION_HANDLER
+#endif
+
+#if XCHAL_HAVE_WINDOWED
+#define UNALIGNED_USER_EXCEPTION
+#endif
/* First-level exception handler for unaligned exceptions.
*
@@ -58,10 +68,6 @@
* BE shift left / mask 0 0 X X
*/
-#if XCHAL_HAVE_WINDOWED
-#define UNALIGNED_USER_EXCEPTION
-#endif
-
#if XCHAL_HAVE_BE
#define HWORD_START 16
@@ -103,7 +109,7 @@
*
* 23 0
* -----------------------------
- * res 0000 0010
+ * L8UI xxxx xxxx 0000 ssss tttt 0010
* L16UI xxxx xxxx 0001 ssss tttt 0010
* L32I xxxx xxxx 0010 ssss tttt 0010
* XXX 0011 ssss tttt 0010
@@ -128,9 +134,11 @@
#define OP0_L32I_N 0x8 /* load immediate narrow */
#define OP0_S32I_N 0x9 /* store immediate narrow */
+#define OP0_LSAI 0x2 /* load/store */
#define OP1_SI_MASK 0x4 /* OP1 bit set for stores */
#define OP1_SI_BIT 2 /* OP1 bit number for stores */
+#define OP1_L8UI 0x0
#define OP1_L32I 0x2
#define OP1_L16UI 0x1
#define OP1_L16SI 0x9
@@ -155,60 +163,74 @@
*/
.literal_position
-ENTRY(fast_unaligned)
+#ifdef CONFIG_XTENSA_LOAD_STORE
+ENTRY(fast_load_store)
- /* Note: We don't expect the address to be aligned on a word
- * boundary. After all, the processor generated that exception
- * and it would be a hardware fault.
- */
+ call0 .Lsave_and_load_instruction
- /* Save some working register */
+ /* Analyze the instruction (load or store?). */
- s32i a4, a2, PT_AREG4
- s32i a5, a2, PT_AREG5
- s32i a6, a2, PT_AREG6
- s32i a7, a2, PT_AREG7
- s32i a8, a2, PT_AREG8
+ extui a0, a4, INSN_OP0, 4 # get insn.op0 nibble
- rsr a0, depc
- s32i a0, a2, PT_AREG2
- s32i a3, a2, PT_AREG3
+#if XCHAL_HAVE_DENSITY
+ _beqi a0, OP0_L32I_N, 1f # L32I.N, jump
+#endif
+ bnei a0, OP0_LSAI, .Linvalid_instruction
+ /* 'store indicator bit' set, jump */
+ bbsi.l a4, OP1_SI_BIT + INSN_OP1, .Linvalid_instruction
- rsr a3, excsave1
- movi a4, fast_unaligned_fixup
- s32i a4, a3, EXC_TABLE_FIXUP
+1:
+ movi a3, ~3
+ and a3, a3, a8 # align memory address
- /* Keep value of SAR in a0 */
+ __ssa8 a8
- rsr a0, sar
- rsr a8, excvaddr # load unaligned memory address
+#ifdef CONFIG_MMU
+ /* l32e can't be used here even when it's available. */
+ /* TODO access_ok(a3) could be used here */
+ j .Linvalid_instruction
+#endif
+ l32i a5, a3, 0
+ l32i a6, a3, 4
+ __src_b a3, a5, a6 # a3 has the data word
- /* Now, identify one of the following load/store instructions.
- *
- * The only possible danger of a double exception on the
- * following l32i instructions is kernel code in vmalloc
- * memory. The processor was just executing at the EPC_1
- * address, and indeed, already fetched the instruction. That
- * guarantees a TLB mapping, which hasn't been replaced by
- * this unaligned exception handler that uses only static TLB
- * mappings. However, high-level interrupt handlers might
- * modify TLB entries, so for the generic case, we register a
- * TABLE_FIXUP handler here, too.
- */
+#if XCHAL_HAVE_DENSITY
+ addi a7, a7, 2 # increment PC (assume 16-bit insn)
+ _beqi a0, OP0_L32I_N, .Lload_w# l32i.n: jump
+ addi a7, a7, 1
+#else
+ addi a7, a7, 3
+#endif
- /* a3...a6 saved on stack, a2 = SP */
+ extui a5, a4, INSN_OP1, 4
+ _beqi a5, OP1_L32I, .Lload_w
+ bnei a5, OP1_L8UI, .Lload16
+ extui a3, a3, 0, 8
+ j .Lload_w
- /* Extract the instruction that caused the unaligned access. */
+ENDPROC(fast_load_store)
+#endif
- rsr a7, epc1 # load exception address
- movi a3, ~3
- and a3, a3, a7 # mask lower bits
+/*
+ * Entry condition:
+ *
+ * a0: trashed, original value saved on stack (PT_AREG0)
+ * a1: a1
+ * a2: new stack pointer, original in DEPC
+ * a3: a3
+ * depc: a2, original value saved on stack (PT_DEPC)
+ * excsave_1: dispatch table
+ *
+ * PT_DEPC >= VALID_DOUBLE_EXCEPTION_ADDRESS: double exception, DEPC
+ * < VALID_DOUBLE_EXCEPTION_ADDRESS: regular exception
+ */
- l32i a4, a3, 0 # load 2 words
- l32i a5, a3, 4
+#ifdef ANY_EXCEPTION_HANDLER
+ENTRY(fast_unaligned)
- __ssa8 a7
- __src_b a4, a4, a5 # a4 has the instruction
+#if XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION
+
+ call0 .Lsave_and_load_instruction
/* Analyze the instruction (load or store?). */
@@ -222,12 +244,17 @@ ENTRY(fast_unaligned)
/* 'store indicator bit' not set, jump */
_bbci.l a4, OP1_SI_BIT + INSN_OP1, .Lload
+#endif
+#if XCHAL_UNALIGNED_STORE_EXCEPTION
+
/* Store: Jump to table entry to get the value in the source register.*/
.Lstore:movi a5, .Lstore_table # table
extui a6, a4, INSN_T, 4 # get source register
addx8 a5, a6, a5
jx a5 # jump into table
+#endif
+#if XCHAL_UNALIGNED_LOAD_EXCEPTION
/* Load: Load memory address. */
@@ -249,7 +276,7 @@ ENTRY(fast_unaligned)
addi a7, a7, 2 # increment PC (assume 16-bit insn)
extui a5, a4, INSN_OP0, 4
- _beqi a5, OP0_L32I_N, 1f # l32i.n: jump
+ _beqi a5, OP0_L32I_N, .Lload_w# l32i.n: jump
addi a7, a7, 1
#else
@@ -257,21 +284,26 @@ ENTRY(fast_unaligned)
#endif
extui a5, a4, INSN_OP1, 4
- _beqi a5, OP1_L32I, 1f # l32i: jump
-
+ _beqi a5, OP1_L32I, .Lload_w # l32i: jump
+#endif
+#ifdef LOAD_EXCEPTION_HANDLER
+.Lload16:
extui a3, a3, 0, 16 # extract lower 16 bits
- _beqi a5, OP1_L16UI, 1f
+ _beqi a5, OP1_L16UI, .Lload_w
addi a5, a5, -OP1_L16SI
- _bnez a5, .Linvalid_instruction_load
+ _bnez a5, .Linvalid_instruction
/* sign extend value */
-
+#if XCHAL_HAVE_SEXT
+ sext a3, a3, 15
+#else
slli a3, a3, 16
srai a3, a3, 16
+#endif
/* Set target register. */
-1:
+.Lload_w:
extui a4, a4, INSN_T, 4 # extract target register
movi a5, .Lload_table
addx8 a4, a4, a5
@@ -295,30 +327,32 @@ ENTRY(fast_unaligned)
mov a13, a3 ; _j .Lexit; .align 8
mov a14, a3 ; _j .Lexit; .align 8
mov a15, a3 ; _j .Lexit; .align 8
-
+#endif
+#if XCHAL_UNALIGNED_STORE_EXCEPTION
.Lstore_table:
- l32i a3, a2, PT_AREG0; _j 1f; .align 8
- mov a3, a1; _j 1f; .align 8 # fishy??
- l32i a3, a2, PT_AREG2; _j 1f; .align 8
- l32i a3, a2, PT_AREG3; _j 1f; .align 8
- l32i a3, a2, PT_AREG4; _j 1f; .align 8
- l32i a3, a2, PT_AREG5; _j 1f; .align 8
- l32i a3, a2, PT_AREG6; _j 1f; .align 8
- l32i a3, a2, PT_AREG7; _j 1f; .align 8
- l32i a3, a2, PT_AREG8; _j 1f; .align 8
- mov a3, a9 ; _j 1f; .align 8
- mov a3, a10 ; _j 1f; .align 8
- mov a3, a11 ; _j 1f; .align 8
- mov a3, a12 ; _j 1f; .align 8
- mov a3, a13 ; _j 1f; .align 8
- mov a3, a14 ; _j 1f; .align 8
- mov a3, a15 ; _j 1f; .align 8
+ l32i a3, a2, PT_AREG0; _j .Lstore_w; .align 8
+ mov a3, a1; _j .Lstore_w; .align 8 # fishy??
+ l32i a3, a2, PT_AREG2; _j .Lstore_w; .align 8
+ l32i a3, a2, PT_AREG3; _j .Lstore_w; .align 8
+ l32i a3, a2, PT_AREG4; _j .Lstore_w; .align 8
+ l32i a3, a2, PT_AREG5; _j .Lstore_w; .align 8
+ l32i a3, a2, PT_AREG6; _j .Lstore_w; .align 8
+ l32i a3, a2, PT_AREG7; _j .Lstore_w; .align 8
+ l32i a3, a2, PT_AREG8; _j .Lstore_w; .align 8
+ mov a3, a9 ; _j .Lstore_w; .align 8
+ mov a3, a10 ; _j .Lstore_w; .align 8
+ mov a3, a11 ; _j .Lstore_w; .align 8
+ mov a3, a12 ; _j .Lstore_w; .align 8
+ mov a3, a13 ; _j .Lstore_w; .align 8
+ mov a3, a14 ; _j .Lstore_w; .align 8
+ mov a3, a15 ; _j .Lstore_w; .align 8
+#endif
+#ifdef ANY_EXCEPTION_HANDLER
/* We cannot handle this exception. */
.extern _kernel_exception
-.Linvalid_instruction_load:
-.Linvalid_instruction_store:
+.Linvalid_instruction:
movi a4, 0
rsr a3, excsave1
@@ -326,6 +360,7 @@ ENTRY(fast_unaligned)
/* Restore a4...a8 and SAR, set SP, and jump to default exception. */
+ l32i a0, a2, PT_SAR
l32i a8, a2, PT_AREG8
l32i a7, a2, PT_AREG7
l32i a6, a2, PT_AREG6
@@ -342,9 +377,11 @@ ENTRY(fast_unaligned)
2: movi a0, _user_exception
jx a0
+#endif
+#if XCHAL_UNALIGNED_STORE_EXCEPTION
-1: # a7: instruction pointer, a4: instruction, a3: value
-
+ # a7: instruction pointer, a4: instruction, a3: value
+.Lstore_w:
movi a6, 0 # mask: ffffffff:00000000
#if XCHAL_HAVE_DENSITY
@@ -361,7 +398,7 @@ ENTRY(fast_unaligned)
extui a5, a4, INSN_OP1, 4 # extract OP1
_beqi a5, OP1_S32I, 1f # jump if 32 bit store
- _bnei a5, OP1_S16I, .Linvalid_instruction_store
+ _bnei a5, OP1_S16I, .Linvalid_instruction
movi a5, -1
__extl a3, a3 # get 16-bit value
@@ -406,7 +443,8 @@ ENTRY(fast_unaligned)
#else
s32i a6, a4, 4
#endif
-
+#endif
+#ifdef ANY_EXCEPTION_HANDLER
.Lexit:
#if XCHAL_HAVE_LOOPS
rsr a4, lend # check if we reached LEND
@@ -434,6 +472,7 @@ ENTRY(fast_unaligned)
/* Restore working register */
+ l32i a0, a2, PT_SAR
l32i a8, a2, PT_AREG8
l32i a7, a2, PT_AREG7
l32i a6, a2, PT_AREG6
@@ -448,6 +487,59 @@ ENTRY(fast_unaligned)
l32i a2, a2, PT_AREG2
rfe
+ .align 4
+.Lsave_and_load_instruction:
+
+ /* Save some working register */
+
+ s32i a3, a2, PT_AREG3
+ s32i a4, a2, PT_AREG4
+ s32i a5, a2, PT_AREG5
+ s32i a6, a2, PT_AREG6
+ s32i a7, a2, PT_AREG7
+ s32i a8, a2, PT_AREG8
+
+ rsr a4, depc
+ s32i a4, a2, PT_AREG2
+
+ rsr a5, sar
+ s32i a5, a2, PT_SAR
+
+ rsr a3, excsave1
+ movi a4, fast_unaligned_fixup
+ s32i a4, a3, EXC_TABLE_FIXUP
+
+ rsr a8, excvaddr # load unaligned memory address
+
+ /* Now, identify one of the following load/store instructions.
+ *
+ * The only possible danger of a double exception on the
+ * following l32i instructions is kernel code in vmalloc
+ * memory. The processor was just executing at the EPC_1
+ * address, and indeed, already fetched the instruction. That
+ * guarantees a TLB mapping, which hasn't been replaced by
+ * this unaligned exception handler that uses only static TLB
+ * mappings. However, high-level interrupt handlers might
+ * modify TLB entries, so for the generic case, we register a
+ * TABLE_FIXUP handler here, too.
+ */
+
+ /* a3...a6 saved on stack, a2 = SP */
+
+ /* Extract the instruction that caused the unaligned access. */
+
+ rsr a7, epc1 # load exception address
+ movi a3, ~3
+ and a3, a3, a7 # mask lower bits
+
+ l32i a4, a3, 0 # load 2 words
+ l32i a5, a3, 4
+
+ __ssa8 a7
+ __src_b a4, a4, a5 # a4 has the instruction
+
+ ret
+#endif
ENDPROC(fast_unaligned)
ENTRY(fast_unaligned_fixup)
@@ -459,10 +551,11 @@ ENTRY(fast_unaligned_fixup)
l32i a7, a2, PT_AREG7
l32i a6, a2, PT_AREG6
l32i a5, a2, PT_AREG5
- l32i a4, a2, PT_AREG4
+ l32i a4, a2, PT_SAR
l32i a0, a2, PT_AREG2
- xsr a0, depc # restore depc and a0
- wsr a0, sar
+ wsr a4, sar
+ wsr a0, depc # restore depc and a0
+ l32i a4, a2, PT_AREG4
rsr a0, exccause
s32i a0, a2, PT_DEPC # mark as a regular exception
@@ -483,5 +576,4 @@ ENTRY(fast_unaligned_fixup)
jx a0
ENDPROC(fast_unaligned_fixup)
-
-#endif /* XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION */
+#endif
diff --git a/arch/xtensa/kernel/mcount.S b/arch/xtensa/kernel/mcount.S
index 51daaf4e0b82..309b3298258f 100644
--- a/arch/xtensa/kernel/mcount.S
+++ b/arch/xtensa/kernel/mcount.S
@@ -78,6 +78,7 @@ ENTRY(_mcount)
#error Unsupported Xtensa ABI
#endif
ENDPROC(_mcount)
+EXPORT_SYMBOL(_mcount)
ENTRY(ftrace_stub)
abi_entry_default
diff --git a/arch/xtensa/kernel/platform.c b/arch/xtensa/kernel/platform.c
index ac1e0e566995..926b8bf0f14c 100644
--- a/arch/xtensa/kernel/platform.c
+++ b/arch/xtensa/kernel/platform.c
@@ -17,27 +17,28 @@
#include <asm/platform.h>
#include <asm/timex.h>
-#define _F(r,f,a,b) \
- r __platform_##f a b; \
- r platform_##f a __attribute__((weak, alias("__platform_"#f)))
-
/*
* Default functions that are used if no platform specific function is defined.
- * (Please, refer to include/asm-xtensa/platform.h for more information)
+ * (Please, refer to arch/xtensa/include/asm/platform.h for more information)
*/
-_F(void, init, (bp_tag_t *first), { });
-_F(void, setup, (char** cmd), { });
-_F(void, restart, (void), { while(1); });
-_F(void, halt, (void), { while(1); });
-_F(void, power_off, (void), { while(1); });
-_F(void, idle, (void), { __asm__ __volatile__ ("waiti 0" ::: "memory"); });
-_F(void, heartbeat, (void), { });
+void __weak __init platform_init(bp_tag_t *first)
+{
+}
+
+void __weak __init platform_setup(char **cmd)
+{
+}
+
+void __weak platform_idle(void)
+{
+ __asm__ __volatile__ ("waiti 0" ::: "memory");
+}
#ifdef CONFIG_XTENSA_CALIBRATE_CCOUNT
-_F(void, calibrate_ccount, (void),
+void __weak platform_calibrate_ccount(void)
{
pr_err("ERROR: Cannot calibrate cpu frequency! Assuming 10MHz.\n");
ccount_freq = 10 * 1000000UL;
-});
+}
#endif
diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c
index 9191738f9941..aba3ff4e60d8 100644
--- a/arch/xtensa/kernel/setup.c
+++ b/arch/xtensa/kernel/setup.c
@@ -22,6 +22,7 @@
#include <linux/screen_info.h>
#include <linux/kernel.h>
#include <linux/percpu.h>
+#include <linux/reboot.h>
#include <linux/cpu.h>
#include <linux/of.h>
#include <linux/of_fdt.h>
@@ -46,6 +47,7 @@
#include <asm/smp.h>
#include <asm/sysmem.h>
#include <asm/timex.h>
+#include <asm/traps.h>
#if defined(CONFIG_VGA_CONSOLE) || defined(CONFIG_DUMMY_CONSOLE)
struct screen_info screen_info = {
@@ -241,6 +243,12 @@ void __init early_init_devtree(void *params)
void __init init_arch(bp_tag_t *bp_start)
{
+ /* Initialize basic exception handling if configuration may need it */
+
+ if (IS_ENABLED(CONFIG_KASAN) ||
+ IS_ENABLED(CONFIG_XTENSA_LOAD_STORE))
+ early_trap_init();
+
/* Initialize MMU. */
init_mmu();
@@ -522,19 +530,30 @@ void cpu_reset(void)
void machine_restart(char * cmd)
{
- platform_restart();
+ local_irq_disable();
+ smp_send_stop();
+ do_kernel_restart(cmd);
+ pr_err("Reboot failed -- System halted\n");
+ while (1)
+ cpu_relax();
}
void machine_halt(void)
{
- platform_halt();
- while (1);
+ local_irq_disable();
+ smp_send_stop();
+ do_kernel_power_off();
+ while (1)
+ cpu_relax();
}
void machine_power_off(void)
{
- platform_power_off();
- while (1);
+ local_irq_disable();
+ smp_send_stop();
+ do_kernel_power_off();
+ while (1)
+ cpu_relax();
}
#ifdef CONFIG_PROC_FS
@@ -574,6 +593,12 @@ c_show(struct seq_file *f, void *slot)
# if XCHAL_HAVE_OCD
"ocd "
# endif
+#if XCHAL_HAVE_TRAX
+ "trax "
+#endif
+#if XCHAL_NUM_PERF_COUNTERS
+ "perf "
+#endif
#endif
#if XCHAL_HAVE_DENSITY
"density "
@@ -623,11 +648,13 @@ c_show(struct seq_file *f, void *slot)
seq_printf(f,"physical aregs\t: %d\n"
"misc regs\t: %d\n"
"ibreak\t\t: %d\n"
- "dbreak\t\t: %d\n",
+ "dbreak\t\t: %d\n"
+ "perf counters\t: %d\n",
XCHAL_NUM_AREGS,
XCHAL_NUM_MISC_REGS,
XCHAL_NUM_IBREAK,
- XCHAL_NUM_DBREAK);
+ XCHAL_NUM_DBREAK,
+ XCHAL_NUM_PERF_COUNTERS);
/* Interrupt. */
diff --git a/arch/xtensa/kernel/stacktrace.c b/arch/xtensa/kernel/stacktrace.c
index 7f7755cd28f0..f643ea5e36da 100644
--- a/arch/xtensa/kernel/stacktrace.c
+++ b/arch/xtensa/kernel/stacktrace.c
@@ -237,8 +237,6 @@ EXPORT_SYMBOL_GPL(save_stack_trace);
#endif
-#ifdef CONFIG_FRAME_POINTER
-
struct return_addr_data {
unsigned long addr;
unsigned skip;
@@ -271,5 +269,3 @@ unsigned long return_address(unsigned level)
return r.addr;
}
EXPORT_SYMBOL(return_address);
-
-#endif
diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl
index 52c94ab5c205..2b69c3c035b6 100644
--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@ -421,3 +421,4 @@
448 common process_mrelease sys_process_mrelease
449 common futex_waitv sys_futex_waitv
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
+451 common cachestat sys_cachestat
diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c
index 16b8a6273772..1c3dfea843ec 100644
--- a/arch/xtensa/kernel/time.c
+++ b/arch/xtensa/kernel/time.c
@@ -121,10 +121,6 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id)
set_linux_timer(get_linux_timer());
evt->event_handler(evt);
-
- /* Allow platform to do something useful (Wdog). */
- platform_heartbeat();
-
return IRQ_HANDLED;
}
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index f0a7d1c2641e..17eb180eff7c 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -54,9 +54,10 @@ static void do_interrupt(struct pt_regs *regs);
#if XTENSA_FAKE_NMI
static void do_nmi(struct pt_regs *regs);
#endif
-#if XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION
-static void do_unaligned_user(struct pt_regs *regs);
+#ifdef CONFIG_XTENSA_LOAD_STORE
+static void do_load_store(struct pt_regs *regs);
#endif
+static void do_unaligned_user(struct pt_regs *regs);
static void do_multihit(struct pt_regs *regs);
#if XTENSA_HAVE_COPROCESSORS
static void do_coprocessor(struct pt_regs *regs);
@@ -91,7 +92,10 @@ static dispatch_init_table_t __initdata dispatch_init_table[] = {
{ EXCCAUSE_SYSTEM_CALL, USER, fast_syscall_user },
{ EXCCAUSE_SYSTEM_CALL, 0, system_call },
/* EXCCAUSE_INSTRUCTION_FETCH unhandled */
-/* EXCCAUSE_LOAD_STORE_ERROR unhandled*/
+#ifdef CONFIG_XTENSA_LOAD_STORE
+{ EXCCAUSE_LOAD_STORE_ERROR, USER|KRNL, fast_load_store },
+{ EXCCAUSE_LOAD_STORE_ERROR, 0, do_load_store },
+#endif
{ EXCCAUSE_LEVEL1_INTERRUPT, 0, do_interrupt },
#ifdef SUPPORT_WINDOWED
{ EXCCAUSE_ALLOCA, USER|KRNL, fast_alloca },
@@ -102,9 +106,9 @@ static dispatch_init_table_t __initdata dispatch_init_table[] = {
#ifdef CONFIG_XTENSA_UNALIGNED_USER
{ EXCCAUSE_UNALIGNED, USER, fast_unaligned },
#endif
-{ EXCCAUSE_UNALIGNED, 0, do_unaligned_user },
{ EXCCAUSE_UNALIGNED, KRNL, fast_unaligned },
#endif
+{ EXCCAUSE_UNALIGNED, 0, do_unaligned_user },
#ifdef CONFIG_MMU
{ EXCCAUSE_ITLB_MISS, 0, do_page_fault },
{ EXCCAUSE_ITLB_MISS, USER|KRNL, fast_second_level_miss},
@@ -171,6 +175,23 @@ __die_if_kernel(const char *str, struct pt_regs *regs, long err)
die(str, regs, err);
}
+#ifdef CONFIG_PRINT_USER_CODE_ON_UNHANDLED_EXCEPTION
+static inline void dump_user_code(struct pt_regs *regs)
+{
+ char buf[32];
+
+ if (copy_from_user(buf, (void __user *)(regs->pc & -16), sizeof(buf)) == 0) {
+ print_hex_dump(KERN_INFO, " ", DUMP_PREFIX_NONE,
+ 32, 1, buf, sizeof(buf), false);
+
+ }
+}
+#else
+static inline void dump_user_code(struct pt_regs *regs)
+{
+}
+#endif
+
/*
* Unhandled Exceptions. Kill user task or panic if in kernel space.
*/
@@ -186,6 +207,7 @@ void do_unhandled(struct pt_regs *regs)
"\tEXCCAUSE is %ld\n",
current->comm, task_pid_nr(current), regs->pc,
regs->exccause);
+ dump_user_code(regs);
force_sig(SIGILL);
}
@@ -349,6 +371,19 @@ static void do_div0(struct pt_regs *regs)
force_sig_fault(SIGFPE, FPE_INTDIV, (void __user *)regs->pc);
}
+#ifdef CONFIG_XTENSA_LOAD_STORE
+static void do_load_store(struct pt_regs *regs)
+{
+ __die_if_kernel("Unhandled load/store exception in kernel",
+ regs, SIGKILL);
+
+ pr_info_ratelimited("Load/store error to %08lx in '%s' (pid = %d, pc = %#010lx)\n",
+ regs->excvaddr, current->comm,
+ task_pid_nr(current), regs->pc);
+ force_sig_fault(SIGBUS, BUS_ADRERR, (void *)regs->excvaddr);
+}
+#endif
+
/*
* Handle unaligned memory accesses from user space. Kill task.
*
@@ -356,7 +391,6 @@ static void do_div0(struct pt_regs *regs)
* accesses causes from user space.
*/
-#if XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION
static void do_unaligned_user(struct pt_regs *regs)
{
__die_if_kernel("Unhandled unaligned exception in kernel",
@@ -368,7 +402,6 @@ static void do_unaligned_user(struct pt_regs *regs)
task_pid_nr(current), regs->pc);
force_sig_fault(SIGBUS, BUS_ADRALN, (void *) regs->excvaddr);
}
-#endif
#if XTENSA_HAVE_COPROCESSORS
static void do_coprocessor(struct pt_regs *regs)
@@ -534,31 +567,58 @@ static void show_trace(struct task_struct *task, unsigned long *sp,
}
#define STACK_DUMP_ENTRY_SIZE 4
-#define STACK_DUMP_LINE_SIZE 32
+#define STACK_DUMP_LINE_SIZE 16
static size_t kstack_depth_to_print = CONFIG_PRINT_STACK_DEPTH;
-void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl)
+struct stack_fragment
{
- size_t len, off = 0;
-
- if (!sp)
- sp = stack_pointer(task);
+ size_t len;
+ size_t off;
+ u8 *sp;
+ const char *loglvl;
+};
- len = min((-(size_t)sp) & (THREAD_SIZE - STACK_DUMP_ENTRY_SIZE),
- kstack_depth_to_print * STACK_DUMP_ENTRY_SIZE);
+static int show_stack_fragment_cb(struct stackframe *frame, void *data)
+{
+ struct stack_fragment *sf = data;
- printk("%sStack:\n", loglvl);
- while (off < len) {
+ while (sf->off < sf->len) {
u8 line[STACK_DUMP_LINE_SIZE];
- size_t line_len = len - off > STACK_DUMP_LINE_SIZE ?
- STACK_DUMP_LINE_SIZE : len - off;
+ size_t line_len = sf->len - sf->off > STACK_DUMP_LINE_SIZE ?
+ STACK_DUMP_LINE_SIZE : sf->len - sf->off;
+ bool arrow = sf->off == 0;
- __memcpy(line, (u8 *)sp + off, line_len);
- print_hex_dump(loglvl, " ", DUMP_PREFIX_NONE,
+ if (frame && frame->sp == (unsigned long)(sf->sp + sf->off))
+ arrow = true;
+
+ __memcpy(line, sf->sp + sf->off, line_len);
+ print_hex_dump(sf->loglvl, arrow ? "> " : " ", DUMP_PREFIX_NONE,
STACK_DUMP_LINE_SIZE, STACK_DUMP_ENTRY_SIZE,
line, line_len, false);
- off += STACK_DUMP_LINE_SIZE;
+ sf->off += STACK_DUMP_LINE_SIZE;
+ if (arrow)
+ return 0;
}
+ return 1;
+}
+
+void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl)
+{
+ struct stack_fragment sf;
+
+ if (!sp)
+ sp = stack_pointer(task);
+
+ sf.len = min((-(size_t)sp) & (THREAD_SIZE - STACK_DUMP_ENTRY_SIZE),
+ kstack_depth_to_print * STACK_DUMP_ENTRY_SIZE);
+ sf.off = 0;
+ sf.sp = (u8 *)sp;
+ sf.loglvl = loglvl;
+
+ printk("%sStack:\n", loglvl);
+ walk_stackframe(sp, show_stack_fragment_cb, &sf);
+ while (sf.off < sf.len)
+ show_stack_fragment_cb(NULL, &sf);
show_trace(task, sp, loglvl);
}
diff --git a/arch/xtensa/kernel/xtensa_ksyms.c b/arch/xtensa/kernel/xtensa_ksyms.c
index 17a7ef86fd0d..62d81e76e18e 100644
--- a/arch/xtensa/kernel/xtensa_ksyms.c
+++ b/arch/xtensa/kernel/xtensa_ksyms.c
@@ -13,71 +13,10 @@
*/
#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <asm/irq.h>
-#include <linux/in6.h>
-
-#include <linux/uaccess.h>
-#include <asm/cacheflush.h>
-#include <asm/checksum.h>
-#include <asm/dma.h>
-#include <asm/io.h>
-#include <asm/page.h>
-#include <asm/ftrace.h>
-#ifdef CONFIG_BLK_DEV_FD
-#include <asm/floppy.h>
-#endif
-#ifdef CONFIG_NET
-#include <net/checksum.h>
-#endif /* CONFIG_NET */
-
-
-/*
- * String functions
- */
-EXPORT_SYMBOL(memset);
-EXPORT_SYMBOL(memcpy);
-EXPORT_SYMBOL(memmove);
-EXPORT_SYMBOL(__memset);
-EXPORT_SYMBOL(__memcpy);
-EXPORT_SYMBOL(__memmove);
-#ifdef CONFIG_ARCH_HAS_STRNCPY_FROM_USER
-EXPORT_SYMBOL(__strncpy_user);
-#endif
-EXPORT_SYMBOL(clear_page);
-EXPORT_SYMBOL(copy_page);
+#include <asm/pgtable.h>
EXPORT_SYMBOL(empty_zero_page);
-/*
- * gcc internal math functions
- */
-extern long long __ashrdi3(long long, int);
-extern long long __ashldi3(long long, int);
-extern long long __bswapdi2(long long);
-extern int __bswapsi2(int);
-extern long long __lshrdi3(long long, int);
-extern int __divsi3(int, int);
-extern int __modsi3(int, int);
-extern int __mulsi3(int, int);
-extern unsigned int __udivsi3(unsigned int, unsigned int);
-extern unsigned int __umodsi3(unsigned int, unsigned int);
-extern unsigned long long __umulsidi3(unsigned int, unsigned int);
-
-EXPORT_SYMBOL(__ashldi3);
-EXPORT_SYMBOL(__ashrdi3);
-EXPORT_SYMBOL(__bswapdi2);
-EXPORT_SYMBOL(__bswapsi2);
-EXPORT_SYMBOL(__lshrdi3);
-EXPORT_SYMBOL(__divsi3);
-EXPORT_SYMBOL(__modsi3);
-EXPORT_SYMBOL(__mulsi3);
-EXPORT_SYMBOL(__udivsi3);
-EXPORT_SYMBOL(__umodsi3);
-EXPORT_SYMBOL(__umulsidi3);
-
unsigned int __sync_fetch_and_and_4(volatile void *p, unsigned int v)
{
BUG();
@@ -89,35 +28,3 @@ unsigned int __sync_fetch_and_or_4(volatile void *p, unsigned int v)
BUG();
}
EXPORT_SYMBOL(__sync_fetch_and_or_4);
-
-/*
- * Networking support
- */
-EXPORT_SYMBOL(csum_partial);
-EXPORT_SYMBOL(csum_partial_copy_generic);
-
-/*
- * Architecture-specific symbols
- */
-EXPORT_SYMBOL(__xtensa_copy_user);
-EXPORT_SYMBOL(__invalidate_icache_range);
-
-/*
- * Kernel hacking ...
- */
-
-#if defined(CONFIG_VGA_CONSOLE) || defined(CONFIG_DUMMY_CONSOLE)
-// FIXME EXPORT_SYMBOL(screen_info);
-#endif
-
-extern long common_exception_return;
-EXPORT_SYMBOL(common_exception_return);
-
-#ifdef CONFIG_FUNCTION_TRACER
-EXPORT_SYMBOL(_mcount);
-#endif
-
-EXPORT_SYMBOL(__invalidate_dcache_range);
-#if XCHAL_DCACHE_IS_WRITEBACK
-EXPORT_SYMBOL(__flush_dcache_range);
-#endif
diff --git a/arch/xtensa/lib/Makefile b/arch/xtensa/lib/Makefile
index c9c2614188f7..6e5b2232668c 100644
--- a/arch/xtensa/lib/Makefile
+++ b/arch/xtensa/lib/Makefile
@@ -6,7 +6,8 @@
lib-y += memcopy.o memset.o checksum.o \
ashldi3.o ashrdi3.o bswapdi2.o bswapsi2.o lshrdi3.o \
divsi3.o udivsi3.o modsi3.o umodsi3.o mulsi3.o umulsidi3.o \
- usercopy.o strncpy_user.o strnlen_user.o
+ usercopy.o strnlen_user.o
+lib-$(CONFIG_ARCH_HAS_STRNCPY_FROM_USER) += strncpy_user.o
lib-$(CONFIG_PCI) += pci-auto.o
lib-$(CONFIG_KCSAN) += kcsan-stubs.o
KCSAN_SANITIZE_kcsan-stubs.o := n
diff --git a/arch/xtensa/lib/ashldi3.S b/arch/xtensa/lib/ashldi3.S
index 67fb0da9e432..cd6b731215d3 100644
--- a/arch/xtensa/lib/ashldi3.S
+++ b/arch/xtensa/lib/ashldi3.S
@@ -26,3 +26,4 @@ ENTRY(__ashldi3)
abi_ret_default
ENDPROC(__ashldi3)
+EXPORT_SYMBOL(__ashldi3)
diff --git a/arch/xtensa/lib/ashrdi3.S b/arch/xtensa/lib/ashrdi3.S
index cbf052c512cc..07bc6e758020 100644
--- a/arch/xtensa/lib/ashrdi3.S
+++ b/arch/xtensa/lib/ashrdi3.S
@@ -26,3 +26,4 @@ ENTRY(__ashrdi3)
abi_ret_default
ENDPROC(__ashrdi3)
+EXPORT_SYMBOL(__ashrdi3)
diff --git a/arch/xtensa/lib/bswapdi2.S b/arch/xtensa/lib/bswapdi2.S
index d8e52e05eba6..5d94a9352887 100644
--- a/arch/xtensa/lib/bswapdi2.S
+++ b/arch/xtensa/lib/bswapdi2.S
@@ -19,3 +19,4 @@ ENTRY(__bswapdi2)
abi_ret_default
ENDPROC(__bswapdi2)
+EXPORT_SYMBOL(__bswapdi2)
diff --git a/arch/xtensa/lib/bswapsi2.S b/arch/xtensa/lib/bswapsi2.S
index 9c1de1344f79..fbfb8613d410 100644
--- a/arch/xtensa/lib/bswapsi2.S
+++ b/arch/xtensa/lib/bswapsi2.S
@@ -14,3 +14,4 @@ ENTRY(__bswapsi2)
abi_ret_default
ENDPROC(__bswapsi2)
+EXPORT_SYMBOL(__bswapsi2)
diff --git a/arch/xtensa/lib/checksum.S b/arch/xtensa/lib/checksum.S
index cf1bed1a5bd6..ffee6f94c8f8 100644
--- a/arch/xtensa/lib/checksum.S
+++ b/arch/xtensa/lib/checksum.S
@@ -169,6 +169,7 @@ ENTRY(csum_partial)
j 5b /* branch to handle the remaining byte */
ENDPROC(csum_partial)
+EXPORT_SYMBOL(csum_partial)
/*
* Copy from ds while checksumming, otherwise like csum_partial
@@ -346,6 +347,7 @@ EX(10f) s8i a8, a3, 1
j 4b /* process the possible trailing odd byte */
ENDPROC(csum_partial_copy_generic)
+EXPORT_SYMBOL(csum_partial_copy_generic)
# Exception handler:
diff --git a/arch/xtensa/lib/divsi3.S b/arch/xtensa/lib/divsi3.S
index b044b4744a8b..edb3c4ad971b 100644
--- a/arch/xtensa/lib/divsi3.S
+++ b/arch/xtensa/lib/divsi3.S
@@ -72,3 +72,4 @@ ENTRY(__divsi3)
abi_ret_default
ENDPROC(__divsi3)
+EXPORT_SYMBOL(__divsi3)
diff --git a/arch/xtensa/lib/lshrdi3.S b/arch/xtensa/lib/lshrdi3.S
index 129ef8d1725b..e432e1a40702 100644
--- a/arch/xtensa/lib/lshrdi3.S
+++ b/arch/xtensa/lib/lshrdi3.S
@@ -26,3 +26,4 @@ ENTRY(__lshrdi3)
abi_ret_default
ENDPROC(__lshrdi3)
+EXPORT_SYMBOL(__lshrdi3)
diff --git a/arch/xtensa/lib/memcopy.S b/arch/xtensa/lib/memcopy.S
index b20d206bcb71..f60760396cee 100644
--- a/arch/xtensa/lib/memcopy.S
+++ b/arch/xtensa/lib/memcopy.S
@@ -273,21 +273,8 @@ WEAK(memcpy)
abi_ret_default
ENDPROC(__memcpy)
-
-/*
- * void bcopy(const void *src, void *dest, size_t n);
- */
-
-ENTRY(bcopy)
-
- abi_entry_default
- # a2=src, a3=dst, a4=len
- mov a5, a3
- mov a3, a2
- mov a2, a5
- j .Lmovecommon # go to common code for memmove+bcopy
-
-ENDPROC(bcopy)
+EXPORT_SYMBOL(__memcpy)
+EXPORT_SYMBOL(memcpy)
/*
* void *memmove(void *dst, const void *src, size_t len);
@@ -551,3 +538,5 @@ WEAK(memmove)
abi_ret_default
ENDPROC(__memmove)
+EXPORT_SYMBOL(__memmove)
+EXPORT_SYMBOL(memmove)
diff --git a/arch/xtensa/lib/memset.S b/arch/xtensa/lib/memset.S
index 59b1524fd601..262c3f39f945 100644
--- a/arch/xtensa/lib/memset.S
+++ b/arch/xtensa/lib/memset.S
@@ -142,6 +142,8 @@ EX(10f) s8i a3, a5, 0
abi_ret_default
ENDPROC(__memset)
+EXPORT_SYMBOL(__memset)
+EXPORT_SYMBOL(memset)
.section .fixup, "ax"
.align 4
diff --git a/arch/xtensa/lib/modsi3.S b/arch/xtensa/lib/modsi3.S
index d00e77181e20..c5f4295c6868 100644
--- a/arch/xtensa/lib/modsi3.S
+++ b/arch/xtensa/lib/modsi3.S
@@ -60,6 +60,7 @@ ENTRY(__modsi3)
abi_ret_default
ENDPROC(__modsi3)
+EXPORT_SYMBOL(__modsi3)
#if !XCHAL_HAVE_NSA
.section .rodata
diff --git a/arch/xtensa/lib/mulsi3.S b/arch/xtensa/lib/mulsi3.S
index 91a9d7c62f96..c6b4fd46bfa9 100644
--- a/arch/xtensa/lib/mulsi3.S
+++ b/arch/xtensa/lib/mulsi3.S
@@ -131,3 +131,4 @@ ENTRY(__mulsi3)
abi_ret_default
ENDPROC(__mulsi3)
+EXPORT_SYMBOL(__mulsi3)
diff --git a/arch/xtensa/lib/strncpy_user.S b/arch/xtensa/lib/strncpy_user.S
index 0731912227d3..9841d1694cdf 100644
--- a/arch/xtensa/lib/strncpy_user.S
+++ b/arch/xtensa/lib/strncpy_user.S
@@ -201,6 +201,7 @@ EX(10f) s8i a9, a11, 0
abi_ret_default
ENDPROC(__strncpy_user)
+EXPORT_SYMBOL(__strncpy_user)
.section .fixup, "ax"
.align 4
diff --git a/arch/xtensa/lib/strnlen_user.S b/arch/xtensa/lib/strnlen_user.S
index 3d391dca3efb..cdcf57474164 100644
--- a/arch/xtensa/lib/strnlen_user.S
+++ b/arch/xtensa/lib/strnlen_user.S
@@ -133,6 +133,7 @@ EX(10f) l32i a9, a4, 0 # get word with first two bytes of string
abi_ret_default
ENDPROC(__strnlen_user)
+EXPORT_SYMBOL(__strnlen_user)
.section .fixup, "ax"
.align 4
diff --git a/arch/xtensa/lib/udivsi3.S b/arch/xtensa/lib/udivsi3.S
index d2477e0786cf..59ea2dfc3f72 100644
--- a/arch/xtensa/lib/udivsi3.S
+++ b/arch/xtensa/lib/udivsi3.S
@@ -66,3 +66,4 @@ ENTRY(__udivsi3)
abi_ret_default
ENDPROC(__udivsi3)
+EXPORT_SYMBOL(__udivsi3)
diff --git a/arch/xtensa/lib/umodsi3.S b/arch/xtensa/lib/umodsi3.S
index 5f031bfa0354..d39a7e56a971 100644
--- a/arch/xtensa/lib/umodsi3.S
+++ b/arch/xtensa/lib/umodsi3.S
@@ -55,3 +55,4 @@ ENTRY(__umodsi3)
abi_ret_default
ENDPROC(__umodsi3)
+EXPORT_SYMBOL(__umodsi3)
diff --git a/arch/xtensa/lib/umulsidi3.S b/arch/xtensa/lib/umulsidi3.S
index 136081647942..8c7a94a0c5d0 100644
--- a/arch/xtensa/lib/umulsidi3.S
+++ b/arch/xtensa/lib/umulsidi3.S
@@ -228,3 +228,4 @@ ENTRY(__umulsidi3)
#endif /* XCHAL_NO_MUL */
ENDPROC(__umulsidi3)
+EXPORT_SYMBOL(__umulsidi3)
diff --git a/arch/xtensa/lib/usercopy.S b/arch/xtensa/lib/usercopy.S
index 16128c094c62..2c665c0b408e 100644
--- a/arch/xtensa/lib/usercopy.S
+++ b/arch/xtensa/lib/usercopy.S
@@ -283,6 +283,7 @@ EX(10f) s8i a6, a5, 0
abi_ret(STACK_SIZE)
ENDPROC(__xtensa_copy_user)
+EXPORT_SYMBOL(__xtensa_copy_user)
.section .fixup, "ax"
.align 4
diff --git a/arch/xtensa/mm/kasan_init.c b/arch/xtensa/mm/kasan_init.c
index 1fef24db2ff6..f00d122aa806 100644
--- a/arch/xtensa/mm/kasan_init.c
+++ b/arch/xtensa/mm/kasan_init.c
@@ -14,7 +14,6 @@
#include <linux/kernel.h>
#include <asm/initialize_mmu.h>
#include <asm/tlbflush.h>
-#include <asm/traps.h>
void __init kasan_early_init(void)
{
@@ -31,7 +30,6 @@ void __init kasan_early_init(void)
BUG_ON(!pmd_none(*pmd));
set_pmd(pmd, __pmd((unsigned long)kasan_early_shadow_pte));
}
- early_trap_init();
}
static void __init populate(void *start, void *end)
diff --git a/arch/xtensa/mm/misc.S b/arch/xtensa/mm/misc.S
index 0527bf6e3211..ec36f73c4765 100644
--- a/arch/xtensa/mm/misc.S
+++ b/arch/xtensa/mm/misc.S
@@ -47,6 +47,7 @@ ENTRY(clear_page)
abi_ret_default
ENDPROC(clear_page)
+EXPORT_SYMBOL(clear_page)
/*
* copy_page and copy_user_page are the same for non-cache-aliased configs.
@@ -89,6 +90,7 @@ ENTRY(copy_page)
abi_ret_default
ENDPROC(copy_page)
+EXPORT_SYMBOL(copy_page)
#ifdef CONFIG_MMU
/*
@@ -367,6 +369,7 @@ ENTRY(__invalidate_icache_range)
abi_ret_default
ENDPROC(__invalidate_icache_range)
+EXPORT_SYMBOL(__invalidate_icache_range)
/*
* void __flush_invalidate_dcache_range(ulong start, ulong size)
@@ -397,6 +400,7 @@ ENTRY(__flush_dcache_range)
abi_ret_default
ENDPROC(__flush_dcache_range)
+EXPORT_SYMBOL(__flush_dcache_range)
/*
* void _invalidate_dcache_range(ulong start, ulong size)
@@ -411,6 +415,7 @@ ENTRY(__invalidate_dcache_range)
abi_ret_default
ENDPROC(__invalidate_dcache_range)
+EXPORT_SYMBOL(__invalidate_dcache_range)
/*
* void _invalidate_icache_all(void)
diff --git a/arch/xtensa/mm/tlb.c b/arch/xtensa/mm/tlb.c
index 27a477dae232..0a11fc5f185b 100644
--- a/arch/xtensa/mm/tlb.c
+++ b/arch/xtensa/mm/tlb.c
@@ -179,6 +179,7 @@ static unsigned get_pte_for_vaddr(unsigned vaddr)
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
+ unsigned int pteval;
if (!mm)
mm = task->active_mm;
@@ -197,7 +198,9 @@ static unsigned get_pte_for_vaddr(unsigned vaddr)
pte = pte_offset_map(pmd, vaddr);
if (!pte)
return 0;
- return pte_val(*pte);
+ pteval = pte_val(*pte);
+ pte_unmap(pte);
+ return pteval;
}
enum {
diff --git a/arch/xtensa/platforms/iss/setup.c b/arch/xtensa/platforms/iss/setup.c
index d3433e1bb94e..0f1fe132691e 100644
--- a/arch/xtensa/platforms/iss/setup.c
+++ b/arch/xtensa/platforms/iss/setup.c
@@ -16,6 +16,7 @@
#include <linux/notifier.h>
#include <linux/panic_notifier.h>
#include <linux/printk.h>
+#include <linux/reboot.h>
#include <linux/string.h>
#include <asm/platform.h>
@@ -24,26 +25,27 @@
#include <platform/simcall.h>
-void platform_halt(void)
-{
- pr_info(" ** Called platform_halt() **\n");
- simc_exit(0);
-}
-
-void platform_power_off(void)
+static int iss_power_off(struct sys_off_data *unused)
{
pr_info(" ** Called platform_power_off() **\n");
simc_exit(0);
+ return NOTIFY_DONE;
}
-void platform_restart(void)
+static int iss_restart(struct notifier_block *this,
+ unsigned long event, void *ptr)
{
/* Flush and reset the mmu, simulate a processor reset, and
* jump to the reset vector. */
cpu_reset();
- /* control never gets here */
+
+ return NOTIFY_DONE;
}
+static struct notifier_block iss_restart_block = {
+ .notifier_call = iss_restart,
+};
+
static int
iss_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
{
@@ -82,4 +84,8 @@ void __init platform_setup(char **p_cmdline)
}
atomic_notifier_chain_register(&panic_notifier_list, &iss_panic_block);
+ register_restart_handler(&iss_restart_block);
+ register_sys_off_handler(SYS_OFF_MODE_POWER_OFF,
+ SYS_OFF_PRIO_PLATFORM,
+ iss_power_off, NULL);
}
diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c
index f50caaa1c249..178cf96ca10a 100644
--- a/arch/xtensa/platforms/iss/simdisk.c
+++ b/arch/xtensa/platforms/iss/simdisk.c
@@ -120,9 +120,9 @@ static void simdisk_submit_bio(struct bio *bio)
bio_endio(bio);
}
-static int simdisk_open(struct block_device *bdev, fmode_t mode)
+static int simdisk_open(struct gendisk *disk, blk_mode_t mode)
{
- struct simdisk *dev = bdev->bd_disk->private_data;
+ struct simdisk *dev = disk->private_data;
spin_lock(&dev->lock);
++dev->users;
@@ -130,7 +130,7 @@ static int simdisk_open(struct block_device *bdev, fmode_t mode)
return 0;
}
-static void simdisk_release(struct gendisk *disk, fmode_t mode)
+static void simdisk_release(struct gendisk *disk)
{
struct simdisk *dev = disk->private_data;
spin_lock(&dev->lock);
diff --git a/arch/xtensa/platforms/xt2000/setup.c b/arch/xtensa/platforms/xt2000/setup.c
index 0dc22c371614..258e01a51fd8 100644
--- a/arch/xtensa/platforms/xt2000/setup.c
+++ b/arch/xtensa/platforms/xt2000/setup.c
@@ -23,6 +23,7 @@
#include <linux/platform_device.h>
#include <linux/serial.h>
#include <linux/serial_8250.h>
+#include <linux/timer.h>
#include <asm/processor.h>
#include <asm/platform.h>
@@ -41,51 +42,46 @@ static void led_print (int f, char *s)
break;
}
-void platform_halt(void)
-{
- led_print (0, " HALT ");
- local_irq_disable();
- while (1);
-}
-
-void platform_power_off(void)
+static int xt2000_power_off(struct sys_off_data *unused)
{
led_print (0, "POWEROFF");
local_irq_disable();
while (1);
+ return NOTIFY_DONE;
}
-void platform_restart(void)
+static int xt2000_restart(struct notifier_block *this,
+ unsigned long event, void *ptr)
{
/* Flush and reset the mmu, simulate a processor reset, and
* jump to the reset vector. */
cpu_reset();
- /* control never gets here */
+
+ return NOTIFY_DONE;
}
+static struct notifier_block xt2000_restart_block = {
+ .notifier_call = xt2000_restart,
+};
+
void __init platform_setup(char** cmdline)
{
led_print (0, "LINUX ");
}
-/* early initialization */
+/* Heartbeat. Let the LED blink. */
-void __init platform_init(bp_tag_t *first)
-{
-}
+static void xt2000_heartbeat(struct timer_list *unused);
-/* Heartbeat. Let the LED blink. */
+static DEFINE_TIMER(heartbeat_timer, xt2000_heartbeat);
-void platform_heartbeat(void)
+static void xt2000_heartbeat(struct timer_list *unused)
{
- static int i, t;
+ static int i;
- if (--t < 0)
- {
- t = 59;
- led_print(7, i ? ".": " ");
- i ^= 1;
- }
+ led_print(7, i ? "." : " ");
+ i ^= 1;
+ mod_timer(&heartbeat_timer, jiffies + HZ / 2);
}
//#define RS_TABLE_SIZE 2
@@ -143,7 +139,11 @@ static int __init xt2000_setup_devinit(void)
{
platform_device_register(&xt2000_serial8250_device);
platform_device_register(&xt2000_sonic_device);
-
+ mod_timer(&heartbeat_timer, jiffies + HZ / 2);
+ register_restart_handler(&xt2000_restart_block);
+ register_sys_off_handler(SYS_OFF_MODE_POWER_OFF,
+ SYS_OFF_PRIO_DEFAULT,
+ xt2000_power_off, NULL);
return 0;
}
diff --git a/arch/xtensa/platforms/xtfpga/setup.c b/arch/xtensa/platforms/xtfpga/setup.c
index c79c1d09ea86..a2432f081710 100644
--- a/arch/xtensa/platforms/xtfpga/setup.c
+++ b/arch/xtensa/platforms/xtfpga/setup.c
@@ -33,23 +33,17 @@
#include <platform/lcd.h>
#include <platform/hardware.h>
-void platform_halt(void)
-{
- lcd_disp_at_pos(" HALT ", 0);
- local_irq_disable();
- while (1)
- cpu_relax();
-}
-
-void platform_power_off(void)
+static int xtfpga_power_off(struct sys_off_data *unused)
{
lcd_disp_at_pos("POWEROFF", 0);
local_irq_disable();
while (1)
cpu_relax();
+ return NOTIFY_DONE;
}
-void platform_restart(void)
+static int xtfpga_restart(struct notifier_block *this,
+ unsigned long event, void *ptr)
{
/* Try software reset first. */
WRITE_ONCE(*(u32 *)XTFPGA_SWRST_VADDR, 0xdead);
@@ -58,9 +52,14 @@ void platform_restart(void)
* simulate a processor reset, and jump to the reset vector.
*/
cpu_reset();
- /* control never gets here */
+
+ return NOTIFY_DONE;
}
+static struct notifier_block xtfpga_restart_block = {
+ .notifier_call = xtfpga_restart,
+};
+
#ifdef CONFIG_XTENSA_CALIBRATE_CCOUNT
void __init platform_calibrate_ccount(void)
@@ -70,6 +69,14 @@ void __init platform_calibrate_ccount(void)
#endif
+static void __init xtfpga_register_handlers(void)
+{
+ register_restart_handler(&xtfpga_restart_block);
+ register_sys_off_handler(SYS_OFF_MODE_POWER_OFF,
+ SYS_OFF_PRIO_DEFAULT,
+ xtfpga_power_off, NULL);
+}
+
#ifdef CONFIG_USE_OF
static void __init xtfpga_clk_setup(struct device_node *np)
@@ -134,6 +141,9 @@ static int __init machine_setup(void)
if ((eth = of_find_compatible_node(eth, NULL, "opencores,ethoc")))
update_local_mac(eth);
of_node_put(eth);
+
+ xtfpga_register_handlers();
+
return 0;
}
arch_initcall(machine_setup);
@@ -281,6 +291,8 @@ static int __init xtavnet_init(void)
pr_info("XTFPGA: Ethernet MAC %pM\n", ethoc_pdata.hwaddr);
ethoc_pdata.eth_clkfreq = *(long *)XTFPGA_CLKFRQ_VADDR;
+ xtfpga_register_handlers();
+
return 0;
}
diff --git a/block/Makefile b/block/Makefile
index b31b05390749..46ada9dc8bbf 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -9,7 +9,7 @@ obj-y := bdev.o fops.o bio.o elevator.o blk-core.o blk-sysfs.o \
blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \
genhd.o ioprio.o badblocks.o partitions/ blk-rq-qos.o \
- disk-events.o blk-ia-ranges.o
+ disk-events.o blk-ia-ranges.o early-lookup.o
obj-$(CONFIG_BOUNCE) += bounce.o
obj-$(CONFIG_BLK_DEV_BSG_COMMON) += bsg.o
diff --git a/block/bdev.c b/block/bdev.c
index 21c63bfef323..979e28a46b98 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -93,7 +93,7 @@ EXPORT_SYMBOL(invalidate_bdev);
* Drop all buffers & page cache for given bdev range. This function bails
* with error if bdev has other exclusive owner (such as filesystem).
*/
-int truncate_bdev_range(struct block_device *bdev, fmode_t mode,
+int truncate_bdev_range(struct block_device *bdev, blk_mode_t mode,
loff_t lstart, loff_t lend)
{
/*
@@ -101,14 +101,14 @@ int truncate_bdev_range(struct block_device *bdev, fmode_t mode,
* while we discard the buffer cache to avoid discarding buffers
* under live filesystem.
*/
- if (!(mode & FMODE_EXCL)) {
- int err = bd_prepare_to_claim(bdev, truncate_bdev_range);
+ if (!(mode & BLK_OPEN_EXCL)) {
+ int err = bd_prepare_to_claim(bdev, truncate_bdev_range, NULL);
if (err)
goto invalidate;
}
truncate_inode_pages_range(bdev->bd_inode->i_mapping, lstart, lend);
- if (!(mode & FMODE_EXCL))
+ if (!(mode & BLK_OPEN_EXCL))
bd_abort_claiming(bdev, truncate_bdev_range);
return 0;
@@ -308,7 +308,7 @@ EXPORT_SYMBOL(thaw_bdev);
* pseudo-fs
*/
-static __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock);
+static __cacheline_aligned_in_smp DEFINE_MUTEX(bdev_lock);
static struct kmem_cache * bdev_cachep __read_mostly;
static struct inode *bdev_alloc_inode(struct super_block *sb)
@@ -415,6 +415,7 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
bdev = I_BDEV(inode);
mutex_init(&bdev->bd_fsfreeze_mutex);
spin_lock_init(&bdev->bd_size_lock);
+ mutex_init(&bdev->bd_holder_lock);
bdev->bd_partno = partno;
bdev->bd_inode = inode;
bdev->bd_queue = disk->queue;
@@ -463,39 +464,48 @@ long nr_blockdev_pages(void)
/**
* bd_may_claim - test whether a block device can be claimed
* @bdev: block device of interest
- * @whole: whole block device containing @bdev, may equal @bdev
* @holder: holder trying to claim @bdev
+ * @hops: holder ops
*
* Test whether @bdev can be claimed by @holder.
*
- * CONTEXT:
- * spin_lock(&bdev_lock).
- *
* RETURNS:
* %true if @bdev can be claimed, %false otherwise.
*/
-static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
- void *holder)
+static bool bd_may_claim(struct block_device *bdev, void *holder,
+ const struct blk_holder_ops *hops)
{
- if (bdev->bd_holder == holder)
- return true; /* already a holder */
- else if (bdev->bd_holder != NULL)
- return false; /* held by someone else */
- else if (whole == bdev)
- return true; /* is a whole device which isn't held */
-
- else if (whole->bd_holder == bd_may_claim)
- return true; /* is a partition of a device that is being partitioned */
- else if (whole->bd_holder != NULL)
- return false; /* is a partition of a held device */
- else
- return true; /* is a partition of an un-held device */
+ struct block_device *whole = bdev_whole(bdev);
+
+ lockdep_assert_held(&bdev_lock);
+
+ if (bdev->bd_holder) {
+ /*
+ * The same holder can always re-claim.
+ */
+ if (bdev->bd_holder == holder) {
+ if (WARN_ON_ONCE(bdev->bd_holder_ops != hops))
+ return false;
+ return true;
+ }
+ return false;
+ }
+
+ /*
+ * If the whole devices holder is set to bd_may_claim, a partition on
+ * the device is claimed, but not the whole device.
+ */
+ if (whole != bdev &&
+ whole->bd_holder && whole->bd_holder != bd_may_claim)
+ return false;
+ return true;
}
/**
* bd_prepare_to_claim - claim a block device
* @bdev: block device of interest
* @holder: holder trying to claim @bdev
+ * @hops: holder ops.
*
* Claim @bdev. This function fails if @bdev is already claimed by another
* holder and waits if another claiming is in progress. return, the caller
@@ -504,17 +514,18 @@ static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
* RETURNS:
* 0 if @bdev can be claimed, -EBUSY otherwise.
*/
-int bd_prepare_to_claim(struct block_device *bdev, void *holder)
+int bd_prepare_to_claim(struct block_device *bdev, void *holder,
+ const struct blk_holder_ops *hops)
{
struct block_device *whole = bdev_whole(bdev);
if (WARN_ON_ONCE(!holder))
return -EINVAL;
retry:
- spin_lock(&bdev_lock);
+ mutex_lock(&bdev_lock);
/* if someone else claimed, fail */
- if (!bd_may_claim(bdev, whole, holder)) {
- spin_unlock(&bdev_lock);
+ if (!bd_may_claim(bdev, holder, hops)) {
+ mutex_unlock(&bdev_lock);
return -EBUSY;
}
@@ -524,7 +535,7 @@ retry:
DEFINE_WAIT(wait);
prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
- spin_unlock(&bdev_lock);
+ mutex_unlock(&bdev_lock);
schedule();
finish_wait(wq, &wait);
goto retry;
@@ -532,7 +543,7 @@ retry:
/* yay, all mine */
whole->bd_claiming = holder;
- spin_unlock(&bdev_lock);
+ mutex_unlock(&bdev_lock);
return 0;
}
EXPORT_SYMBOL_GPL(bd_prepare_to_claim); /* only for the loop driver */
@@ -550,16 +561,18 @@ static void bd_clear_claiming(struct block_device *whole, void *holder)
* bd_finish_claiming - finish claiming of a block device
* @bdev: block device of interest
* @holder: holder that has claimed @bdev
+ * @hops: block device holder operations
*
* Finish exclusive open of a block device. Mark the device as exlusively
* open by the holder and wake up all waiters for exclusive open to finish.
*/
-static void bd_finish_claiming(struct block_device *bdev, void *holder)
+static void bd_finish_claiming(struct block_device *bdev, void *holder,
+ const struct blk_holder_ops *hops)
{
struct block_device *whole = bdev_whole(bdev);
- spin_lock(&bdev_lock);
- BUG_ON(!bd_may_claim(bdev, whole, holder));
+ mutex_lock(&bdev_lock);
+ BUG_ON(!bd_may_claim(bdev, holder, hops));
/*
* Note that for a whole device bd_holders will be incremented twice,
* and bd_holder will be set to bd_may_claim before being set to holder
@@ -567,9 +580,12 @@ static void bd_finish_claiming(struct block_device *bdev, void *holder)
whole->bd_holders++;
whole->bd_holder = bd_may_claim;
bdev->bd_holders++;
+ mutex_lock(&bdev->bd_holder_lock);
bdev->bd_holder = holder;
+ bdev->bd_holder_ops = hops;
+ mutex_unlock(&bdev->bd_holder_lock);
bd_clear_claiming(whole, holder);
- spin_unlock(&bdev_lock);
+ mutex_unlock(&bdev_lock);
}
/**
@@ -583,12 +599,47 @@ static void bd_finish_claiming(struct block_device *bdev, void *holder)
*/
void bd_abort_claiming(struct block_device *bdev, void *holder)
{
- spin_lock(&bdev_lock);
+ mutex_lock(&bdev_lock);
bd_clear_claiming(bdev_whole(bdev), holder);
- spin_unlock(&bdev_lock);
+ mutex_unlock(&bdev_lock);
}
EXPORT_SYMBOL(bd_abort_claiming);
+static void bd_end_claim(struct block_device *bdev, void *holder)
+{
+ struct block_device *whole = bdev_whole(bdev);
+ bool unblock = false;
+
+ /*
+ * Release a claim on the device. The holder fields are protected with
+ * bdev_lock. open_mutex is used to synchronize disk_holder unlinking.
+ */
+ mutex_lock(&bdev_lock);
+ WARN_ON_ONCE(bdev->bd_holder != holder);
+ WARN_ON_ONCE(--bdev->bd_holders < 0);
+ WARN_ON_ONCE(--whole->bd_holders < 0);
+ if (!bdev->bd_holders) {
+ mutex_lock(&bdev->bd_holder_lock);
+ bdev->bd_holder = NULL;
+ bdev->bd_holder_ops = NULL;
+ mutex_unlock(&bdev->bd_holder_lock);
+ if (bdev->bd_write_holder)
+ unblock = true;
+ }
+ if (!whole->bd_holders)
+ whole->bd_holder = NULL;
+ mutex_unlock(&bdev_lock);
+
+ /*
+ * If this was the last claim, remove holder link and unblock evpoll if
+ * it was a write holder.
+ */
+ if (unblock) {
+ disk_unblock_events(bdev->bd_disk);
+ bdev->bd_write_holder = false;
+ }
+}
+
static void blkdev_flush_mapping(struct block_device *bdev)
{
WARN_ON_ONCE(bdev->bd_holders);
@@ -597,13 +648,13 @@ static void blkdev_flush_mapping(struct block_device *bdev)
bdev_write_inode(bdev);
}
-static int blkdev_get_whole(struct block_device *bdev, fmode_t mode)
+static int blkdev_get_whole(struct block_device *bdev, blk_mode_t mode)
{
struct gendisk *disk = bdev->bd_disk;
int ret;
if (disk->fops->open) {
- ret = disk->fops->open(bdev, mode);
+ ret = disk->fops->open(disk, mode);
if (ret) {
/* avoid ghost partitions on a removed medium */
if (ret == -ENOMEDIUM &&
@@ -621,22 +672,19 @@ static int blkdev_get_whole(struct block_device *bdev, fmode_t mode)
return 0;
}
-static void blkdev_put_whole(struct block_device *bdev, fmode_t mode)
+static void blkdev_put_whole(struct block_device *bdev)
{
if (atomic_dec_and_test(&bdev->bd_openers))
blkdev_flush_mapping(bdev);
if (bdev->bd_disk->fops->release)
- bdev->bd_disk->fops->release(bdev->bd_disk, mode);
+ bdev->bd_disk->fops->release(bdev->bd_disk);
}
-static int blkdev_get_part(struct block_device *part, fmode_t mode)
+static int blkdev_get_part(struct block_device *part, blk_mode_t mode)
{
struct gendisk *disk = part->bd_disk;
int ret;
- if (atomic_read(&part->bd_openers))
- goto done;
-
ret = blkdev_get_whole(bdev_whole(part), mode);
if (ret)
return ret;
@@ -645,26 +693,27 @@ static int blkdev_get_part(struct block_device *part, fmode_t mode)
if (!bdev_nr_sectors(part))
goto out_blkdev_put;
- disk->open_partitions++;
- set_init_blocksize(part);
-done:
+ if (!atomic_read(&part->bd_openers)) {
+ disk->open_partitions++;
+ set_init_blocksize(part);
+ }
atomic_inc(&part->bd_openers);
return 0;
out_blkdev_put:
- blkdev_put_whole(bdev_whole(part), mode);
+ blkdev_put_whole(bdev_whole(part));
return ret;
}
-static void blkdev_put_part(struct block_device *part, fmode_t mode)
+static void blkdev_put_part(struct block_device *part)
{
struct block_device *whole = bdev_whole(part);
- if (!atomic_dec_and_test(&part->bd_openers))
- return;
- blkdev_flush_mapping(part);
- whole->bd_disk->open_partitions--;
- blkdev_put_whole(whole, mode);
+ if (atomic_dec_and_test(&part->bd_openers)) {
+ blkdev_flush_mapping(part);
+ whole->bd_disk->open_partitions--;
+ }
+ blkdev_put_whole(whole);
}
struct block_device *blkdev_get_no_open(dev_t dev)
@@ -695,17 +744,17 @@ void blkdev_put_no_open(struct block_device *bdev)
{
put_device(&bdev->bd_device);
}
-
+
/**
* blkdev_get_by_dev - open a block device by device number
* @dev: device number of block device to open
- * @mode: FMODE_* mask
+ * @mode: open mode (BLK_OPEN_*)
* @holder: exclusive holder identifier
+ * @hops: holder operations
*
- * Open the block device described by device number @dev. If @mode includes
- * %FMODE_EXCL, the block device is opened with exclusive access. Specifying
- * %FMODE_EXCL with a %NULL @holder is invalid. Exclusive opens may nest for
- * the same @holder.
+ * Open the block device described by device number @dev. If @holder is not
+ * %NULL, the block device is opened with exclusive access. Exclusive opens may
+ * nest for the same @holder.
*
* Use this interface ONLY if you really do not have anything better - i.e. when
* you are behind a truly sucky interface and all you are given is a device
@@ -717,7 +766,8 @@ void blkdev_put_no_open(struct block_device *bdev)
* RETURNS:
* Reference to the block_device on success, ERR_PTR(-errno) on failure.
*/
-struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
+struct block_device *blkdev_get_by_dev(dev_t dev, blk_mode_t mode, void *holder,
+ const struct blk_holder_ops *hops)
{
bool unblock_events = true;
struct block_device *bdev;
@@ -726,8 +776,8 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
ret = devcgroup_check_permission(DEVCG_DEV_BLOCK,
MAJOR(dev), MINOR(dev),
- ((mode & FMODE_READ) ? DEVCG_ACC_READ : 0) |
- ((mode & FMODE_WRITE) ? DEVCG_ACC_WRITE : 0));
+ ((mode & BLK_OPEN_READ) ? DEVCG_ACC_READ : 0) |
+ ((mode & BLK_OPEN_WRITE) ? DEVCG_ACC_WRITE : 0));
if (ret)
return ERR_PTR(ret);
@@ -736,10 +786,16 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
return ERR_PTR(-ENXIO);
disk = bdev->bd_disk;
- if (mode & FMODE_EXCL) {
- ret = bd_prepare_to_claim(bdev, holder);
+ if (holder) {
+ mode |= BLK_OPEN_EXCL;
+ ret = bd_prepare_to_claim(bdev, holder, hops);
if (ret)
goto put_blkdev;
+ } else {
+ if (WARN_ON_ONCE(mode & BLK_OPEN_EXCL)) {
+ ret = -EIO;
+ goto put_blkdev;
+ }
}
disk_block_events(disk);
@@ -756,8 +812,8 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
ret = blkdev_get_whole(bdev, mode);
if (ret)
goto put_module;
- if (mode & FMODE_EXCL) {
- bd_finish_claiming(bdev, holder);
+ if (holder) {
+ bd_finish_claiming(bdev, holder, hops);
/*
* Block event polling for write claims if requested. Any write
@@ -766,7 +822,7 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
* writeable reference is too fragile given the way @mode is
* used in blkdev_get/put().
*/
- if ((mode & FMODE_WRITE) && !bdev->bd_write_holder &&
+ if ((mode & BLK_OPEN_WRITE) && !bdev->bd_write_holder &&
(disk->event_flags & DISK_EVENT_FLAG_BLOCK_ON_EXCL_WRITE)) {
bdev->bd_write_holder = true;
unblock_events = false;
@@ -780,7 +836,7 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
put_module:
module_put(disk->fops->owner);
abort_claiming:
- if (mode & FMODE_EXCL)
+ if (holder)
bd_abort_claiming(bdev, holder);
mutex_unlock(&disk->open_mutex);
disk_unblock_events(disk);
@@ -793,13 +849,13 @@ EXPORT_SYMBOL(blkdev_get_by_dev);
/**
* blkdev_get_by_path - open a block device by name
* @path: path to the block device to open
- * @mode: FMODE_* mask
+ * @mode: open mode (BLK_OPEN_*)
* @holder: exclusive holder identifier
+ * @hops: holder operations
*
- * Open the block device described by the device file at @path. If @mode
- * includes %FMODE_EXCL, the block device is opened with exclusive access.
- * Specifying %FMODE_EXCL with a %NULL @holder is invalid. Exclusive opens may
- * nest for the same @holder.
+ * Open the block device described by the device file at @path. If @holder is
+ * not %NULL, the block device is opened with exclusive access. Exclusive opens
+ * may nest for the same @holder.
*
* CONTEXT:
* Might sleep.
@@ -807,8 +863,8 @@ EXPORT_SYMBOL(blkdev_get_by_dev);
* RETURNS:
* Reference to the block_device on success, ERR_PTR(-errno) on failure.
*/
-struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
- void *holder)
+struct block_device *blkdev_get_by_path(const char *path, blk_mode_t mode,
+ void *holder, const struct blk_holder_ops *hops)
{
struct block_device *bdev;
dev_t dev;
@@ -818,9 +874,9 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
if (error)
return ERR_PTR(error);
- bdev = blkdev_get_by_dev(dev, mode, holder);
- if (!IS_ERR(bdev) && (mode & FMODE_WRITE) && bdev_read_only(bdev)) {
- blkdev_put(bdev, mode);
+ bdev = blkdev_get_by_dev(dev, mode, holder, hops);
+ if (!IS_ERR(bdev) && (mode & BLK_OPEN_WRITE) && bdev_read_only(bdev)) {
+ blkdev_put(bdev, holder);
return ERR_PTR(-EACCES);
}
@@ -828,7 +884,7 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
}
EXPORT_SYMBOL(blkdev_get_by_path);
-void blkdev_put(struct block_device *bdev, fmode_t mode)
+void blkdev_put(struct block_device *bdev, void *holder)
{
struct gendisk *disk = bdev->bd_disk;
@@ -843,36 +899,8 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
sync_blockdev(bdev);
mutex_lock(&disk->open_mutex);
- if (mode & FMODE_EXCL) {
- struct block_device *whole = bdev_whole(bdev);
- bool bdev_free;
-
- /*
- * Release a claim on the device. The holder fields
- * are protected with bdev_lock. open_mutex is to
- * synchronize disk_holder unlinking.
- */
- spin_lock(&bdev_lock);
-
- WARN_ON_ONCE(--bdev->bd_holders < 0);
- WARN_ON_ONCE(--whole->bd_holders < 0);
-
- if ((bdev_free = !bdev->bd_holders))
- bdev->bd_holder = NULL;
- if (!whole->bd_holders)
- whole->bd_holder = NULL;
-
- spin_unlock(&bdev_lock);
-
- /*
- * If this was the last claim, remove holder link and
- * unblock evpoll if it was a write holder.
- */
- if (bdev_free && bdev->bd_write_holder) {
- disk_unblock_events(disk);
- bdev->bd_write_holder = false;
- }
- }
+ if (holder)
+ bd_end_claim(bdev, holder);
/*
* Trigger event checking and tell drivers to flush MEDIA_CHANGE
@@ -882,9 +910,9 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
disk_flush_events(disk, DISK_EVENT_MEDIA_CHANGE);
if (bdev_is_partition(bdev))
- blkdev_put_part(bdev, mode);
+ blkdev_put_part(bdev);
else
- blkdev_put_whole(bdev, mode);
+ blkdev_put_whole(bdev);
mutex_unlock(&disk->open_mutex);
module_put(disk->fops->owner);
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 3164e3177965..09bbbcf9e049 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -5403,6 +5403,10 @@ void bfq_put_queue(struct bfq_queue *bfqq)
if (bfqq->bfqd->last_completed_rq_bfqq == bfqq)
bfqq->bfqd->last_completed_rq_bfqq = NULL;
+ WARN_ON_ONCE(!list_empty(&bfqq->fifo));
+ WARN_ON_ONCE(!RB_EMPTY_ROOT(&bfqq->sort_list));
+ WARN_ON_ONCE(bfqq->dispatched);
+
kmem_cache_free(bfq_pool, bfqq);
bfqg_and_blkg_put(bfqg);
}
@@ -7135,6 +7139,7 @@ static void bfq_exit_queue(struct elevator_queue *e)
{
struct bfq_data *bfqd = e->elevator_data;
struct bfq_queue *bfqq, *n;
+ unsigned int actuator;
hrtimer_cancel(&bfqd->idle_slice_timer);
@@ -7143,6 +7148,10 @@ static void bfq_exit_queue(struct elevator_queue *e)
bfq_deactivate_bfqq(bfqd, bfqq, false, false);
spin_unlock_irq(&bfqd->lock);
+ for (actuator = 0; actuator < bfqd->num_actuators; actuator++)
+ WARN_ON_ONCE(bfqd->rq_in_driver[actuator]);
+ WARN_ON_ONCE(bfqd->tot_rq_in_driver);
+
hrtimer_cancel(&bfqd->idle_slice_timer);
/* release oom-queue reference to root group */
diff --git a/block/bio.c b/block/bio.c
index 043944fd46eb..8672179213b9 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1138,6 +1138,14 @@ int bio_add_page(struct bio *bio, struct page *page,
}
EXPORT_SYMBOL(bio_add_page);
+void bio_add_folio_nofail(struct bio *bio, struct folio *folio, size_t len,
+ size_t off)
+{
+ WARN_ON_ONCE(len > UINT_MAX);
+ WARN_ON_ONCE(off > UINT_MAX);
+ __bio_add_page(bio, &folio->page, len, off);
+}
+
/**
* bio_add_folio - Attempt to add part of a folio to a bio.
* @bio: BIO to add to.
@@ -1169,7 +1177,7 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty)
bio_for_each_segment_all(bvec, bio, iter_all) {
if (mark_dirty && !PageCompound(bvec->bv_page))
set_page_dirty_lock(bvec->bv_page);
- put_page(bvec->bv_page);
+ bio_release_page(bio, bvec->bv_page);
}
}
EXPORT_SYMBOL_GPL(__bio_release_pages);
@@ -1191,7 +1199,6 @@ void bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter)
bio->bi_io_vec = (struct bio_vec *)iter->bvec;
bio->bi_iter.bi_bvec_done = iter->iov_offset;
bio->bi_iter.bi_size = size;
- bio_set_flag(bio, BIO_NO_PAGE_REF);
bio_set_flag(bio, BIO_CLONED);
}
@@ -1206,7 +1213,7 @@ static int bio_iov_add_page(struct bio *bio, struct page *page,
}
if (same_page)
- put_page(page);
+ bio_release_page(bio, page);
return 0;
}
@@ -1220,7 +1227,7 @@ static int bio_iov_add_zone_append_page(struct bio *bio, struct page *page,
queue_max_zone_append_sectors(q), &same_page) != len)
return -EINVAL;
if (same_page)
- put_page(page);
+ bio_release_page(bio, page);
return 0;
}
@@ -1231,10 +1238,10 @@ static int bio_iov_add_zone_append_page(struct bio *bio, struct page *page,
* @bio: bio to add pages to
* @iter: iov iterator describing the region to be mapped
*
- * Pins pages from *iter and appends them to @bio's bvec array. The
- * pages will have to be released using put_page() when done.
- * For multi-segment *iter, this function only adds pages from the
- * next non-empty segment of the iov iterator.
+ * Extracts pages from *iter and appends them to @bio's bvec array. The pages
+ * will have to be cleaned up in the way indicated by the BIO_PAGE_PINNED flag.
+ * For a multi-segment *iter, this function only adds pages from the next
+ * non-empty segment of the iov iterator.
*/
static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
{
@@ -1266,9 +1273,9 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
* result to ensure the bio's total size is correct. The remainder of
* the iov data will be picked up in the next bio iteration.
*/
- size = iov_iter_get_pages(iter, pages,
- UINT_MAX - bio->bi_iter.bi_size,
- nr_pages, &offset, extraction_flags);
+ size = iov_iter_extract_pages(iter, &pages,
+ UINT_MAX - bio->bi_iter.bi_size,
+ nr_pages, extraction_flags, &offset);
if (unlikely(size <= 0))
return size ? size : -EFAULT;
@@ -1301,7 +1308,7 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
iov_iter_revert(iter, left);
out:
while (i < nr_pages)
- put_page(pages[i++]);
+ bio_release_page(bio, pages[i++]);
return ret;
}
@@ -1336,6 +1343,8 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
return 0;
}
+ if (iov_iter_extract_will_pin(iter))
+ bio_set_flag(bio, BIO_PAGE_PINNED);
do {
ret = __bio_iov_iter_get_pages(bio, iter);
} while (!ret && iov_iter_count(iter) && !bio_full(bio, 0));
@@ -1489,8 +1498,8 @@ void bio_set_pages_dirty(struct bio *bio)
* the BIO and re-dirty the pages in process context.
*
* It is expected that bio_check_pages_dirty() will wholly own the BIO from
- * here on. It will run one put_page() against each page and will run one
- * bio_put() against the BIO.
+ * here on. It will unpin each page and will run one bio_put() against the
+ * BIO.
*/
static void bio_dirty_fn(struct work_struct *work);
diff --git a/block/blk-cgroup-fc-appid.c b/block/blk-cgroup-fc-appid.c
index 842e5e1c0f3c..3ec21333f393 100644
--- a/block/blk-cgroup-fc-appid.c
+++ b/block/blk-cgroup-fc-appid.c
@@ -34,7 +34,7 @@ int blkcg_set_fc_appid(char *app_id, u64 cgrp_id, size_t app_id_len)
* the vmid from the fabric.
* Adding the overhead of a lock is not necessary.
*/
- strlcpy(blkcg->fc_app_id, app_id, app_id_len);
+ strscpy(blkcg->fc_app_id, app_id, app_id_len);
css_put(css);
out_cgrp_put:
cgroup_put(cgrp);
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index f0b5c9c41cde..aaf9903ad7b2 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -624,8 +624,13 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css,
struct blkg_iostat_set *bis =
per_cpu_ptr(blkg->iostat_cpu, cpu);
memset(bis, 0, sizeof(*bis));
+
+ /* Re-initialize the cleared blkg_iostat_set */
+ u64_stats_init(&bis->sync);
+ bis->blkg = blkg;
}
memset(&blkg->iostat, 0, sizeof(blkg->iostat));
+ u64_stats_init(&blkg->iostat.sync);
for (i = 0; i < BLKCG_MAX_POLS; i++) {
struct blkcg_policy *pol = blkcg_policy[i];
@@ -762,6 +767,13 @@ int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx)
return -ENODEV;
}
+ mutex_lock(&bdev->bd_queue->rq_qos_mutex);
+ if (!disk_live(bdev->bd_disk)) {
+ blkdev_put_no_open(bdev);
+ mutex_unlock(&bdev->bd_queue->rq_qos_mutex);
+ return -ENODEV;
+ }
+
ctx->body = input;
ctx->bdev = bdev;
return 0;
@@ -906,6 +918,7 @@ EXPORT_SYMBOL_GPL(blkg_conf_prep);
*/
void blkg_conf_exit(struct blkg_conf_ctx *ctx)
__releases(&ctx->bdev->bd_queue->queue_lock)
+ __releases(&ctx->bdev->bd_queue->rq_qos_mutex)
{
if (ctx->blkg) {
spin_unlock_irq(&bdev_get_queue(ctx->bdev)->queue_lock);
@@ -913,6 +926,7 @@ void blkg_conf_exit(struct blkg_conf_ctx *ctx)
}
if (ctx->bdev) {
+ mutex_unlock(&ctx->bdev->bd_queue->rq_qos_mutex);
blkdev_put_no_open(ctx->bdev);
ctx->body = NULL;
ctx->bdev = NULL;
@@ -970,6 +984,7 @@ static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu)
struct llist_head *lhead = per_cpu_ptr(blkcg->lhead, cpu);
struct llist_node *lnode;
struct blkg_iostat_set *bisc, *next_bisc;
+ unsigned long flags;
rcu_read_lock();
@@ -983,7 +998,7 @@ static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu)
* When flushing from cgroup, cgroup_rstat_lock is always held, so
* this lock won't cause contention most of time.
*/
- raw_spin_lock(&blkg_stat_lock);
+ raw_spin_lock_irqsave(&blkg_stat_lock, flags);
/*
* Iterate only the iostat_cpu's queued in the lockless list.
@@ -1009,7 +1024,7 @@ static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu)
blkcg_iostat_update(parent, &blkg->iostat.cur,
&blkg->iostat.last);
}
- raw_spin_unlock(&blkg_stat_lock);
+ raw_spin_unlock_irqrestore(&blkg_stat_lock, flags);
out:
rcu_read_unlock();
}
diff --git a/block/blk-core.c b/block/blk-core.c
index 1da77e7d6289..3fc68b944479 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -420,6 +420,7 @@ struct request_queue *blk_alloc_queue(int node_id)
mutex_init(&q->debugfs_mutex);
mutex_init(&q->sysfs_lock);
mutex_init(&q->sysfs_dir_lock);
+ mutex_init(&q->rq_qos_mutex);
spin_lock_init(&q->queue_lock);
init_waitqueue_head(&q->mq_freeze_wq);
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 04698ed9bcd4..dba392cf22be 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -188,7 +188,9 @@ static void blk_flush_complete_seq(struct request *rq,
case REQ_FSEQ_DATA:
list_move_tail(&rq->flush.list, &fq->flush_data_in_flight);
- blk_mq_add_to_requeue_list(rq, BLK_MQ_INSERT_AT_HEAD);
+ spin_lock(&q->requeue_lock);
+ list_add_tail(&rq->queuelist, &q->flush_list);
+ spin_unlock(&q->requeue_lock);
blk_mq_kick_requeue_list(q);
break;
@@ -346,7 +348,10 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
smp_wmb();
req_ref_set(flush_rq, 1);
- blk_mq_add_to_requeue_list(flush_rq, 0);
+ spin_lock(&q->requeue_lock);
+ list_add_tail(&flush_rq->queuelist, &q->flush_list);
+ spin_unlock(&q->requeue_lock);
+
blk_mq_kick_requeue_list(q);
}
@@ -376,22 +381,29 @@ static enum rq_end_io_ret mq_flush_data_end_io(struct request *rq,
return RQ_END_IO_NONE;
}
-/**
- * blk_insert_flush - insert a new PREFLUSH/FUA request
- * @rq: request to insert
- *
- * To be called from __elv_add_request() for %ELEVATOR_INSERT_FLUSH insertions.
- * or __blk_mq_run_hw_queue() to dispatch request.
- * @rq is being submitted. Analyze what needs to be done and put it on the
- * right queue.
+static void blk_rq_init_flush(struct request *rq)
+{
+ rq->flush.seq = 0;
+ INIT_LIST_HEAD(&rq->flush.list);
+ rq->rq_flags |= RQF_FLUSH_SEQ;
+ rq->flush.saved_end_io = rq->end_io; /* Usually NULL */
+ rq->end_io = mq_flush_data_end_io;
+}
+
+/*
+ * Insert a PREFLUSH/FUA request into the flush state machine.
+ * Returns true if the request has been consumed by the flush state machine,
+ * or false if the caller should continue to process it.
*/
-void blk_insert_flush(struct request *rq)
+bool blk_insert_flush(struct request *rq)
{
struct request_queue *q = rq->q;
unsigned long fflags = q->queue_flags; /* may change, cache */
unsigned int policy = blk_flush_policy(fflags, rq);
struct blk_flush_queue *fq = blk_get_flush_queue(q, rq->mq_ctx);
- struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
+
+ /* FLUSH/FUA request must never be merged */
+ WARN_ON_ONCE(rq->bio != rq->biotail);
/*
* @policy now records what operations need to be done. Adjust
@@ -408,45 +420,45 @@ void blk_insert_flush(struct request *rq)
*/
rq->cmd_flags |= REQ_SYNC;
- /*
- * An empty flush handed down from a stacking driver may
- * translate into nothing if the underlying device does not
- * advertise a write-back cache. In this case, simply
- * complete the request.
- */
- if (!policy) {
+ switch (policy) {
+ case 0:
+ /*
+ * An empty flush handed down from a stacking driver may
+ * translate into nothing if the underlying device does not
+ * advertise a write-back cache. In this case, simply
+ * complete the request.
+ */
blk_mq_end_request(rq, 0);
- return;
- }
-
- BUG_ON(rq->bio != rq->biotail); /*assumes zero or single bio rq */
-
- /*
- * If there's data but flush is not necessary, the request can be
- * processed directly without going through flush machinery. Queue
- * for normal execution.
- */
- if ((policy & REQ_FSEQ_DATA) &&
- !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
- blk_mq_request_bypass_insert(rq, 0);
- blk_mq_run_hw_queue(hctx, false);
- return;
+ return true;
+ case REQ_FSEQ_DATA:
+ /*
+ * If there's data, but no flush is necessary, the request can
+ * be processed directly without going through flush machinery.
+ * Queue for normal execution.
+ */
+ return false;
+ case REQ_FSEQ_DATA | REQ_FSEQ_POSTFLUSH:
+ /*
+ * Initialize the flush fields and completion handler to trigger
+ * the post flush, and then just pass the command on.
+ */
+ blk_rq_init_flush(rq);
+ rq->flush.seq |= REQ_FSEQ_POSTFLUSH;
+ spin_lock_irq(&fq->mq_flush_lock);
+ list_move_tail(&rq->flush.list, &fq->flush_data_in_flight);
+ spin_unlock_irq(&fq->mq_flush_lock);
+ return false;
+ default:
+ /*
+ * Mark the request as part of a flush sequence and submit it
+ * for further processing to the flush state machine.
+ */
+ blk_rq_init_flush(rq);
+ spin_lock_irq(&fq->mq_flush_lock);
+ blk_flush_complete_seq(rq, fq, REQ_FSEQ_ACTIONS & ~policy, 0);
+ spin_unlock_irq(&fq->mq_flush_lock);
+ return true;
}
-
- /*
- * @rq should go through flush machinery. Mark it part of flush
- * sequence and submit for further processing.
- */
- memset(&rq->flush, 0, sizeof(rq->flush));
- INIT_LIST_HEAD(&rq->flush.list);
- rq->rq_flags |= RQF_FLUSH_SEQ;
- rq->flush.saved_end_io = rq->end_io; /* Usually NULL */
-
- rq->end_io = mq_flush_data_end_io;
-
- spin_lock_irq(&fq->mq_flush_lock);
- blk_flush_complete_seq(rq, fq, REQ_FSEQ_ACTIONS & ~policy, 0);
- spin_unlock_irq(&fq->mq_flush_lock);
}
/**
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 63fc02042408..25dd4db11121 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -77,6 +77,10 @@ static void ioc_destroy_icq(struct io_cq *icq)
struct elevator_type *et = q->elevator->type;
lockdep_assert_held(&ioc->lock);
+ lockdep_assert_held(&q->queue_lock);
+
+ if (icq->flags & ICQ_DESTROYED)
+ return;
radix_tree_delete(&ioc->icq_tree, icq->q->id);
hlist_del_init(&icq->ioc_node);
@@ -128,12 +132,7 @@ static void ioc_release_fn(struct work_struct *work)
spin_lock(&q->queue_lock);
spin_lock(&ioc->lock);
- /*
- * The icq may have been destroyed when the ioc lock
- * was released.
- */
- if (!(icq->flags & ICQ_DESTROYED))
- ioc_destroy_icq(icq);
+ ioc_destroy_icq(icq);
spin_unlock(&q->queue_lock);
rcu_read_unlock();
@@ -171,23 +170,20 @@ static bool ioc_delay_free(struct io_context *ioc)
*/
void ioc_clear_queue(struct request_queue *q)
{
- LIST_HEAD(icq_list);
-
spin_lock_irq(&q->queue_lock);
- list_splice_init(&q->icq_list, &icq_list);
- spin_unlock_irq(&q->queue_lock);
-
- rcu_read_lock();
- while (!list_empty(&icq_list)) {
+ while (!list_empty(&q->icq_list)) {
struct io_cq *icq =
- list_entry(icq_list.next, struct io_cq, q_node);
-
- spin_lock_irq(&icq->ioc->lock);
- if (!(icq->flags & ICQ_DESTROYED))
- ioc_destroy_icq(icq);
- spin_unlock_irq(&icq->ioc->lock);
+ list_first_entry(&q->icq_list, struct io_cq, q_node);
+
+ /*
+ * Other context won't hold ioc lock to wait for queue_lock, see
+ * details in ioc_release_fn().
+ */
+ spin_lock(&icq->ioc->lock);
+ ioc_destroy_icq(icq);
+ spin_unlock(&icq->ioc->lock);
}
- rcu_read_unlock();
+ spin_unlock_irq(&q->queue_lock);
}
#else /* CONFIG_BLK_ICQ */
static inline void ioc_exit_icqs(struct io_context *ioc)
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 285ced3467ab..6084a9519883 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -2455,6 +2455,7 @@ static u64 adjust_inuse_and_calc_cost(struct ioc_gq *iocg, u64 vtime,
u32 hwi, adj_step;
s64 margin;
u64 cost, new_inuse;
+ unsigned long flags;
current_hweight(iocg, NULL, &hwi);
old_hwi = hwi;
@@ -2473,11 +2474,11 @@ static u64 adjust_inuse_and_calc_cost(struct ioc_gq *iocg, u64 vtime,
iocg->inuse == iocg->active)
return cost;
- spin_lock_irq(&ioc->lock);
+ spin_lock_irqsave(&ioc->lock, flags);
/* we own inuse only when @iocg is in the normal active state */
if (iocg->abs_vdebt || list_empty(&iocg->active_list)) {
- spin_unlock_irq(&ioc->lock);
+ spin_unlock_irqrestore(&ioc->lock, flags);
return cost;
}
@@ -2498,7 +2499,7 @@ static u64 adjust_inuse_and_calc_cost(struct ioc_gq *iocg, u64 vtime,
} while (time_after64(vtime + cost, now->vnow) &&
iocg->inuse != iocg->active);
- spin_unlock_irq(&ioc->lock);
+ spin_unlock_irqrestore(&ioc->lock, flags);
TRACE_IOCG_PATH(inuse_adjust, iocg, now,
old_inuse, iocg->inuse, old_hwi, hwi);
diff --git a/block/blk-ioprio.c b/block/blk-ioprio.c
index 055529b9b92b..4051fada01f1 100644
--- a/block/blk-ioprio.c
+++ b/block/blk-ioprio.c
@@ -23,25 +23,28 @@
/**
* enum prio_policy - I/O priority class policy.
* @POLICY_NO_CHANGE: (default) do not modify the I/O priority class.
- * @POLICY_NONE_TO_RT: modify IOPRIO_CLASS_NONE into IOPRIO_CLASS_RT.
+ * @POLICY_PROMOTE_TO_RT: modify no-IOPRIO_CLASS_RT to IOPRIO_CLASS_RT.
* @POLICY_RESTRICT_TO_BE: modify IOPRIO_CLASS_NONE and IOPRIO_CLASS_RT into
* IOPRIO_CLASS_BE.
* @POLICY_ALL_TO_IDLE: change the I/O priority class into IOPRIO_CLASS_IDLE.
+ * @POLICY_NONE_TO_RT: an alias for POLICY_PROMOTE_TO_RT.
*
* See also <linux/ioprio.h>.
*/
enum prio_policy {
POLICY_NO_CHANGE = 0,
- POLICY_NONE_TO_RT = 1,
+ POLICY_PROMOTE_TO_RT = 1,
POLICY_RESTRICT_TO_BE = 2,
POLICY_ALL_TO_IDLE = 3,
+ POLICY_NONE_TO_RT = 4,
};
static const char *policy_name[] = {
[POLICY_NO_CHANGE] = "no-change",
- [POLICY_NONE_TO_RT] = "none-to-rt",
+ [POLICY_PROMOTE_TO_RT] = "promote-to-rt",
[POLICY_RESTRICT_TO_BE] = "restrict-to-be",
[POLICY_ALL_TO_IDLE] = "idle",
+ [POLICY_NONE_TO_RT] = "none-to-rt",
};
static struct blkcg_policy ioprio_policy;
@@ -189,6 +192,20 @@ void blkcg_set_ioprio(struct bio *bio)
if (!blkcg || blkcg->prio_policy == POLICY_NO_CHANGE)
return;
+ if (blkcg->prio_policy == POLICY_PROMOTE_TO_RT ||
+ blkcg->prio_policy == POLICY_NONE_TO_RT) {
+ /*
+ * For RT threads, the default priority level is 4 because
+ * task_nice is 0. By promoting non-RT io-priority to RT-class
+ * and default level 4, those requests that are already
+ * RT-class but need a higher io-priority can use ioprio_set()
+ * to achieve this.
+ */
+ if (IOPRIO_PRIO_CLASS(bio->bi_ioprio) != IOPRIO_CLASS_RT)
+ bio->bi_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_RT, 4);
+ return;
+ }
+
/*
* Except for IOPRIO_CLASS_NONE, higher I/O priority numbers
* correspond to a lower priority. Hence, the max_t() below selects
diff --git a/block/blk-map.c b/block/blk-map.c
index 46eed2e627c3..44d74a30ddac 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -281,21 +281,21 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
if (blk_queue_pci_p2pdma(rq->q))
extraction_flags |= ITER_ALLOW_P2PDMA;
+ if (iov_iter_extract_will_pin(iter))
+ bio_set_flag(bio, BIO_PAGE_PINNED);
while (iov_iter_count(iter)) {
- struct page **pages, *stack_pages[UIO_FASTIOV];
+ struct page *stack_pages[UIO_FASTIOV];
+ struct page **pages = stack_pages;
ssize_t bytes;
size_t offs;
int npages;
- if (nr_vecs <= ARRAY_SIZE(stack_pages)) {
- pages = stack_pages;
- bytes = iov_iter_get_pages(iter, pages, LONG_MAX,
- nr_vecs, &offs, extraction_flags);
- } else {
- bytes = iov_iter_get_pages_alloc(iter, &pages,
- LONG_MAX, &offs, extraction_flags);
- }
+ if (nr_vecs > ARRAY_SIZE(stack_pages))
+ pages = NULL;
+
+ bytes = iov_iter_extract_pages(iter, &pages, LONG_MAX,
+ nr_vecs, extraction_flags, &offs);
if (unlikely(bytes <= 0)) {
ret = bytes ? bytes : -EFAULT;
goto out_unmap;
@@ -317,7 +317,7 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
if (!bio_add_hw_page(rq->q, bio, page, n, offs,
max_sectors, &same_page)) {
if (same_page)
- put_page(page);
+ bio_release_page(bio, page);
break;
}
@@ -329,7 +329,7 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
* release the pages we didn't map into the bio, if any
*/
while (j < npages)
- put_page(pages[j++]);
+ bio_release_page(bio, pages[j++]);
if (pages != stack_pages)
kvfree(pages);
/* couldn't stuff something into bio? */
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index d23a8554ec4a..c3b5930106b2 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -88,6 +88,7 @@ static const char *const blk_queue_flag_name[] = {
QUEUE_FLAG_NAME(IO_STAT),
QUEUE_FLAG_NAME(NOXMERGES),
QUEUE_FLAG_NAME(ADD_RANDOM),
+ QUEUE_FLAG_NAME(SYNCHRONOUS),
QUEUE_FLAG_NAME(SAME_FORCE),
QUEUE_FLAG_NAME(INIT_DONE),
QUEUE_FLAG_NAME(STABLE_WRITES),
@@ -103,6 +104,8 @@ static const char *const blk_queue_flag_name[] = {
QUEUE_FLAG_NAME(RQ_ALLOC_TIME),
QUEUE_FLAG_NAME(HCTX_ACTIVE),
QUEUE_FLAG_NAME(NOWAIT),
+ QUEUE_FLAG_NAME(SQ_SCHED),
+ QUEUE_FLAG_NAME(SKIP_TAGSET_QUIESCE),
};
#undef QUEUE_FLAG_NAME
@@ -241,14 +244,14 @@ static const char *const cmd_flag_name[] = {
#define RQF_NAME(name) [ilog2((__force u32)RQF_##name)] = #name
static const char *const rqf_name[] = {
RQF_NAME(STARTED),
- RQF_NAME(SOFTBARRIER),
RQF_NAME(FLUSH_SEQ),
RQF_NAME(MIXED_MERGE),
RQF_NAME(MQ_INFLIGHT),
RQF_NAME(DONTPREP),
+ RQF_NAME(SCHED_TAGS),
+ RQF_NAME(USE_SCHED),
RQF_NAME(FAILED),
RQF_NAME(QUIET),
- RQF_NAME(ELVPRIV),
RQF_NAME(IO_STAT),
RQF_NAME(PM),
RQF_NAME(HASHED),
@@ -256,7 +259,6 @@ static const char *const rqf_name[] = {
RQF_NAME(SPECIAL_PAYLOAD),
RQF_NAME(ZONE_WRITE_LOCKED),
RQF_NAME(TIMED_OUT),
- RQF_NAME(ELV),
RQF_NAME(RESV),
};
#undef RQF_NAME
@@ -399,7 +401,7 @@ static void blk_mq_debugfs_tags_show(struct seq_file *m,
seq_printf(m, "nr_tags=%u\n", tags->nr_tags);
seq_printf(m, "nr_reserved_tags=%u\n", tags->nr_reserved_tags);
seq_printf(m, "active_queues=%d\n",
- atomic_read(&tags->active_queues));
+ READ_ONCE(tags->active_queues));
seq_puts(m, "\nbitmap_tags:\n");
sbitmap_queue_show(&tags->bitmap_tags, m);
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index 7c3cbad17f30..1326526bb733 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -37,7 +37,7 @@ static inline bool
blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq,
struct bio *bio)
{
- if (rq->rq_flags & RQF_ELV) {
+ if (rq->rq_flags & RQF_USE_SCHED) {
struct elevator_queue *e = q->elevator;
if (e->type->ops.allow_merge)
@@ -48,7 +48,7 @@ blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq,
static inline void blk_mq_sched_completed_request(struct request *rq, u64 now)
{
- if (rq->rq_flags & RQF_ELV) {
+ if (rq->rq_flags & RQF_USE_SCHED) {
struct elevator_queue *e = rq->q->elevator;
if (e->type->ops.completed_request)
@@ -58,11 +58,11 @@ static inline void blk_mq_sched_completed_request(struct request *rq, u64 now)
static inline void blk_mq_sched_requeue_request(struct request *rq)
{
- if (rq->rq_flags & RQF_ELV) {
+ if (rq->rq_flags & RQF_USE_SCHED) {
struct request_queue *q = rq->q;
struct elevator_queue *e = q->elevator;
- if ((rq->rq_flags & RQF_ELVPRIV) && e->type->ops.requeue_request)
+ if (e->type->ops.requeue_request)
e->type->ops.requeue_request(rq);
}
}
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index dfd81cab5788..cc57e2dd9a0b 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -38,6 +38,7 @@ static void blk_mq_update_wake_batch(struct blk_mq_tags *tags,
void __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
{
unsigned int users;
+ struct blk_mq_tags *tags = hctx->tags;
/*
* calling test_bit() prior to test_and_set_bit() is intentional,
@@ -55,9 +56,11 @@ void __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
return;
}
- users = atomic_inc_return(&hctx->tags->active_queues);
-
- blk_mq_update_wake_batch(hctx->tags, users);
+ spin_lock_irq(&tags->lock);
+ users = tags->active_queues + 1;
+ WRITE_ONCE(tags->active_queues, users);
+ blk_mq_update_wake_batch(tags, users);
+ spin_unlock_irq(&tags->lock);
}
/*
@@ -90,9 +93,11 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
return;
}
- users = atomic_dec_return(&tags->active_queues);
-
+ spin_lock_irq(&tags->lock);
+ users = tags->active_queues - 1;
+ WRITE_ONCE(tags->active_queues, users);
blk_mq_update_wake_batch(tags, users);
+ spin_unlock_irq(&tags->lock);
blk_mq_tag_wakeup_all(tags, false);
}
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 850bfb844ed2..decb6ab2d508 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -45,6 +45,8 @@
static DEFINE_PER_CPU(struct llist_head, blk_cpu_done);
static void blk_mq_insert_request(struct request *rq, blk_insert_t flags);
+static void blk_mq_request_bypass_insert(struct request *rq,
+ blk_insert_t flags);
static void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
struct list_head *list);
@@ -354,12 +356,12 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
data->rq_flags |= RQF_IO_STAT;
rq->rq_flags = data->rq_flags;
- if (!(data->rq_flags & RQF_ELV)) {
- rq->tag = tag;
- rq->internal_tag = BLK_MQ_NO_TAG;
- } else {
+ if (data->rq_flags & RQF_SCHED_TAGS) {
rq->tag = BLK_MQ_NO_TAG;
rq->internal_tag = tag;
+ } else {
+ rq->tag = tag;
+ rq->internal_tag = BLK_MQ_NO_TAG;
}
rq->timeout = 0;
@@ -386,17 +388,14 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
WRITE_ONCE(rq->deadline, 0);
req_ref_set(rq, 1);
- if (rq->rq_flags & RQF_ELV) {
+ if (rq->rq_flags & RQF_USE_SCHED) {
struct elevator_queue *e = data->q->elevator;
INIT_HLIST_NODE(&rq->hash);
RB_CLEAR_NODE(&rq->rb_node);
- if (!op_is_flush(data->cmd_flags) &&
- e->type->ops.prepare_request) {
+ if (e->type->ops.prepare_request)
e->type->ops.prepare_request(rq);
- rq->rq_flags |= RQF_ELVPRIV;
- }
}
return rq;
@@ -449,26 +448,32 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
data->flags |= BLK_MQ_REQ_NOWAIT;
if (q->elevator) {
- struct elevator_queue *e = q->elevator;
-
- data->rq_flags |= RQF_ELV;
+ /*
+ * All requests use scheduler tags when an I/O scheduler is
+ * enabled for the queue.
+ */
+ data->rq_flags |= RQF_SCHED_TAGS;
/*
* Flush/passthrough requests are special and go directly to the
- * dispatch list. Don't include reserved tags in the
- * limiting, as it isn't useful.
+ * dispatch list.
*/
- if (!op_is_flush(data->cmd_flags) &&
- !blk_op_is_passthrough(data->cmd_flags) &&
- e->type->ops.limit_depth &&
- !(data->flags & BLK_MQ_REQ_RESERVED))
- e->type->ops.limit_depth(data->cmd_flags, data);
+ if ((data->cmd_flags & REQ_OP_MASK) != REQ_OP_FLUSH &&
+ !blk_op_is_passthrough(data->cmd_flags)) {
+ struct elevator_mq_ops *ops = &q->elevator->type->ops;
+
+ WARN_ON_ONCE(data->flags & BLK_MQ_REQ_RESERVED);
+
+ data->rq_flags |= RQF_USE_SCHED;
+ if (ops->limit_depth)
+ ops->limit_depth(data->cmd_flags, data);
+ }
}
retry:
data->ctx = blk_mq_get_ctx(q);
data->hctx = blk_mq_map_queue(q, data->cmd_flags, data->ctx);
- if (!(data->rq_flags & RQF_ELV))
+ if (!(data->rq_flags & RQF_SCHED_TAGS))
blk_mq_tag_busy(data->hctx);
if (data->flags & BLK_MQ_REQ_RESERVED)
@@ -648,10 +653,10 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
goto out_queue_exit;
data.ctx = __blk_mq_get_ctx(q, cpu);
- if (!q->elevator)
- blk_mq_tag_busy(data.hctx);
+ if (q->elevator)
+ data.rq_flags |= RQF_SCHED_TAGS;
else
- data.rq_flags |= RQF_ELV;
+ blk_mq_tag_busy(data.hctx);
if (flags & BLK_MQ_REQ_RESERVED)
data.rq_flags |= RQF_RESV;
@@ -699,7 +704,7 @@ void blk_mq_free_request(struct request *rq)
{
struct request_queue *q = rq->q;
- if ((rq->rq_flags & RQF_ELVPRIV) &&
+ if ((rq->rq_flags & RQF_USE_SCHED) &&
q->elevator->type->ops.finish_request)
q->elevator->type->ops.finish_request(rq);
@@ -957,6 +962,8 @@ EXPORT_SYMBOL_GPL(blk_update_request);
static inline void blk_account_io_done(struct request *req, u64 now)
{
+ trace_block_io_done(req);
+
/*
* Account IO completion. flush_rq isn't accounted as a
* normal IO on queueing nor completion. Accounting the
@@ -976,6 +983,8 @@ static inline void blk_account_io_done(struct request *req, u64 now)
static inline void blk_account_io_start(struct request *req)
{
+ trace_block_io_start(req);
+
if (blk_do_io_stat(req)) {
/*
* All non-passthrough requests are created from a bio with one
@@ -1176,8 +1185,9 @@ bool blk_mq_complete_request_remote(struct request *rq)
* or a polled request, always complete locally,
* it's pointless to redirect the completion.
*/
- if (rq->mq_hctx->nr_ctx == 1 ||
- rq->cmd_flags & REQ_POLLED)
+ if ((rq->mq_hctx->nr_ctx == 1 &&
+ rq->mq_ctx->cpu == raw_smp_processor_id()) ||
+ rq->cmd_flags & REQ_POLLED)
return false;
if (blk_mq_complete_need_ipi(rq)) {
@@ -1270,7 +1280,7 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
if (!plug->multiple_queues && last && last->q != rq->q)
plug->multiple_queues = true;
- if (!plug->has_elevator && (rq->rq_flags & RQF_ELV))
+ if (!plug->has_elevator && (rq->rq_flags & RQF_USE_SCHED))
plug->has_elevator = true;
rq->rq_next = NULL;
rq_list_add(&plug->mq_list, rq);
@@ -1411,13 +1421,16 @@ static void __blk_mq_requeue_request(struct request *rq)
void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list)
{
struct request_queue *q = rq->q;
+ unsigned long flags;
__blk_mq_requeue_request(rq);
/* this request will be re-inserted to io scheduler queue */
blk_mq_sched_requeue_request(rq);
- blk_mq_add_to_requeue_list(rq, BLK_MQ_INSERT_AT_HEAD);
+ spin_lock_irqsave(&q->requeue_lock, flags);
+ list_add_tail(&rq->queuelist, &q->requeue_list);
+ spin_unlock_irqrestore(&q->requeue_lock, flags);
if (kick_requeue_list)
blk_mq_kick_requeue_list(q);
@@ -1429,13 +1442,16 @@ static void blk_mq_requeue_work(struct work_struct *work)
struct request_queue *q =
container_of(work, struct request_queue, requeue_work.work);
LIST_HEAD(rq_list);
- struct request *rq, *next;
+ LIST_HEAD(flush_list);
+ struct request *rq;
spin_lock_irq(&q->requeue_lock);
list_splice_init(&q->requeue_list, &rq_list);
+ list_splice_init(&q->flush_list, &flush_list);
spin_unlock_irq(&q->requeue_lock);
- list_for_each_entry_safe(rq, next, &rq_list, queuelist) {
+ while (!list_empty(&rq_list)) {
+ rq = list_entry(rq_list.next, struct request, queuelist);
/*
* If RQF_DONTPREP ist set, the request has been started by the
* driver already and might have driver-specific data allocated
@@ -1443,18 +1459,16 @@ static void blk_mq_requeue_work(struct work_struct *work)
* block layer merges for the request.
*/
if (rq->rq_flags & RQF_DONTPREP) {
- rq->rq_flags &= ~RQF_SOFTBARRIER;
list_del_init(&rq->queuelist);
blk_mq_request_bypass_insert(rq, 0);
- } else if (rq->rq_flags & RQF_SOFTBARRIER) {
- rq->rq_flags &= ~RQF_SOFTBARRIER;
+ } else {
list_del_init(&rq->queuelist);
blk_mq_insert_request(rq, BLK_MQ_INSERT_AT_HEAD);
}
}
- while (!list_empty(&rq_list)) {
- rq = list_entry(rq_list.next, struct request, queuelist);
+ while (!list_empty(&flush_list)) {
+ rq = list_entry(flush_list.next, struct request, queuelist);
list_del_init(&rq->queuelist);
blk_mq_insert_request(rq, 0);
}
@@ -1462,27 +1476,6 @@ static void blk_mq_requeue_work(struct work_struct *work)
blk_mq_run_hw_queues(q, false);
}
-void blk_mq_add_to_requeue_list(struct request *rq, blk_insert_t insert_flags)
-{
- struct request_queue *q = rq->q;
- unsigned long flags;
-
- /*
- * We abuse this flag that is otherwise used by the I/O scheduler to
- * request head insertion from the workqueue.
- */
- BUG_ON(rq->rq_flags & RQF_SOFTBARRIER);
-
- spin_lock_irqsave(&q->requeue_lock, flags);
- if (insert_flags & BLK_MQ_INSERT_AT_HEAD) {
- rq->rq_flags |= RQF_SOFTBARRIER;
- list_add(&rq->queuelist, &q->requeue_list);
- } else {
- list_add_tail(&rq->queuelist, &q->requeue_list);
- }
- spin_unlock_irqrestore(&q->requeue_lock, flags);
-}
-
void blk_mq_kick_requeue_list(struct request_queue *q)
{
kblockd_mod_delayed_work_on(WORK_CPU_UNBOUND, &q->requeue_work, 0);
@@ -2427,7 +2420,7 @@ static void blk_mq_run_work_fn(struct work_struct *work)
* Should only be used carefully, when the caller knows we want to
* bypass a potential IO scheduler on the target device.
*/
-void blk_mq_request_bypass_insert(struct request *rq, blk_insert_t flags)
+static void blk_mq_request_bypass_insert(struct request *rq, blk_insert_t flags)
{
struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
@@ -2492,7 +2485,7 @@ static void blk_mq_insert_request(struct request *rq, blk_insert_t flags)
* dispatch it given we prioritize requests in hctx->dispatch.
*/
blk_mq_request_bypass_insert(rq, flags);
- } else if (rq->rq_flags & RQF_FLUSH_SEQ) {
+ } else if (req_op(rq) == REQ_OP_FLUSH) {
/*
* Firstly normal IO request is inserted to scheduler queue or
* sw queue, meantime we add flush request to dispatch queue(
@@ -2622,7 +2615,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
return;
}
- if ((rq->rq_flags & RQF_ELV) || !blk_mq_get_budget_and_tag(rq)) {
+ if ((rq->rq_flags & RQF_USE_SCHED) || !blk_mq_get_budget_and_tag(rq)) {
blk_mq_insert_request(rq, 0);
blk_mq_run_hw_queue(hctx, false);
return;
@@ -2711,6 +2704,7 @@ static void blk_mq_dispatch_plug_list(struct blk_plug *plug, bool from_sched)
struct request *requeue_list = NULL;
struct request **requeue_lastp = &requeue_list;
unsigned int depth = 0;
+ bool is_passthrough = false;
LIST_HEAD(list);
do {
@@ -2719,7 +2713,9 @@ static void blk_mq_dispatch_plug_list(struct blk_plug *plug, bool from_sched)
if (!this_hctx) {
this_hctx = rq->mq_hctx;
this_ctx = rq->mq_ctx;
- } else if (this_hctx != rq->mq_hctx || this_ctx != rq->mq_ctx) {
+ is_passthrough = blk_rq_is_passthrough(rq);
+ } else if (this_hctx != rq->mq_hctx || this_ctx != rq->mq_ctx ||
+ is_passthrough != blk_rq_is_passthrough(rq)) {
rq_list_add_tail(&requeue_lastp, rq);
continue;
}
@@ -2731,7 +2727,13 @@ static void blk_mq_dispatch_plug_list(struct blk_plug *plug, bool from_sched)
trace_block_unplug(this_hctx->queue, depth, !from_sched);
percpu_ref_get(&this_hctx->queue->q_usage_counter);
- if (this_hctx->queue->elevator) {
+ /* passthrough requests should never be issued to the I/O scheduler */
+ if (is_passthrough) {
+ spin_lock(&this_hctx->lock);
+ list_splice_tail_init(&list, &this_hctx->dispatch);
+ spin_unlock(&this_hctx->lock);
+ blk_mq_run_hw_queue(this_hctx, from_sched);
+ } else if (this_hctx->queue->elevator) {
this_hctx->queue->elevator->type->ops.insert_requests(this_hctx,
&list, 0);
blk_mq_run_hw_queue(this_hctx, from_sched);
@@ -2970,10 +2972,8 @@ void blk_mq_submit_bio(struct bio *bio)
return;
}
- if (op_is_flush(bio->bi_opf)) {
- blk_insert_flush(rq);
+ if (op_is_flush(bio->bi_opf) && blk_insert_flush(rq))
return;
- }
if (plug) {
blk_add_rq_to_plug(plug, rq);
@@ -2981,7 +2981,7 @@ void blk_mq_submit_bio(struct bio *bio)
}
hctx = rq->mq_hctx;
- if ((rq->rq_flags & RQF_ELV) ||
+ if ((rq->rq_flags & RQF_USE_SCHED) ||
(hctx->dispatch_busy && (q->nr_hw_queues == 1 || !is_sync))) {
blk_mq_insert_request(rq, 0);
blk_mq_run_hw_queue(hctx, true);
@@ -4232,6 +4232,7 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
blk_mq_update_poll_flag(q);
INIT_DELAYED_WORK(&q->requeue_work, blk_mq_requeue_work);
+ INIT_LIST_HEAD(&q->flush_list);
INIT_LIST_HEAD(&q->requeue_list);
spin_lock_init(&q->requeue_lock);
@@ -4608,9 +4609,6 @@ static bool blk_mq_elv_switch_none(struct list_head *head,
{
struct blk_mq_qe_pair *qe;
- if (!q->elevator)
- return true;
-
qe = kmalloc(sizeof(*qe), GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY);
if (!qe)
return false;
@@ -4618,6 +4616,12 @@ static bool blk_mq_elv_switch_none(struct list_head *head,
/* q->elevator needs protection from ->sysfs_lock */
mutex_lock(&q->sysfs_lock);
+ /* the check has to be done with holding sysfs_lock */
+ if (!q->elevator) {
+ kfree(qe);
+ goto unlock;
+ }
+
INIT_LIST_HEAD(&qe->node);
qe->q = q;
qe->type = q->elevator->type;
@@ -4625,6 +4629,7 @@ static bool blk_mq_elv_switch_none(struct list_head *head,
__elevator_get(qe->type);
list_add(&qe->node, head);
elevator_disable(q);
+unlock:
mutex_unlock(&q->sysfs_lock);
return true;
diff --git a/block/blk-mq.h b/block/blk-mq.h
index e876584d3516..1743857e0b01 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -47,7 +47,6 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
void blk_mq_wake_waiters(struct request_queue *q);
bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *,
unsigned int);
-void blk_mq_add_to_requeue_list(struct request *rq, blk_insert_t insert_flags);
void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list);
struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx,
struct blk_mq_ctx *start);
@@ -64,10 +63,6 @@ struct blk_mq_tags *blk_mq_alloc_map_and_rqs(struct blk_mq_tag_set *set,
void blk_mq_free_map_and_rqs(struct blk_mq_tag_set *set,
struct blk_mq_tags *tags,
unsigned int hctx_idx);
-/*
- * Internal helpers for request insertion into sw queues
- */
-void blk_mq_request_bypass_insert(struct request *rq, blk_insert_t flags);
/*
* CPU -> queue mappings
@@ -226,9 +221,9 @@ static inline bool blk_mq_is_shared_tags(unsigned int flags)
static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data *data)
{
- if (!(data->rq_flags & RQF_ELV))
- return data->hctx->tags;
- return data->hctx->sched_tags;
+ if (data->rq_flags & RQF_SCHED_TAGS)
+ return data->hctx->sched_tags;
+ return data->hctx->tags;
}
static inline bool blk_mq_hctx_stopped(struct blk_mq_hw_ctx *hctx)
@@ -417,8 +412,7 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
return true;
}
- users = atomic_read(&hctx->tags->active_queues);
-
+ users = READ_ONCE(hctx->tags->active_queues);
if (!users)
return true;
diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
index d8cc820a365e..167be74df4ee 100644
--- a/block/blk-rq-qos.c
+++ b/block/blk-rq-qos.c
@@ -288,11 +288,13 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data,
void rq_qos_exit(struct request_queue *q)
{
+ mutex_lock(&q->rq_qos_mutex);
while (q->rq_qos) {
struct rq_qos *rqos = q->rq_qos;
q->rq_qos = rqos->next;
rqos->ops->exit(rqos);
}
+ mutex_unlock(&q->rq_qos_mutex);
}
int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
@@ -300,6 +302,8 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
{
struct request_queue *q = disk->queue;
+ lockdep_assert_held(&q->rq_qos_mutex);
+
rqos->disk = disk;
rqos->id = id;
rqos->ops = ops;
@@ -307,18 +311,13 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
/*
* No IO can be in-flight when adding rqos, so freeze queue, which
* is fine since we only support rq_qos for blk-mq queue.
- *
- * Reuse ->queue_lock for protecting against other concurrent
- * rq_qos adding/deleting
*/
blk_mq_freeze_queue(q);
- spin_lock_irq(&q->queue_lock);
if (rq_qos_id(q, rqos->id))
goto ebusy;
rqos->next = q->rq_qos;
q->rq_qos = rqos;
- spin_unlock_irq(&q->queue_lock);
blk_mq_unfreeze_queue(q);
@@ -330,7 +329,6 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
return 0;
ebusy:
- spin_unlock_irq(&q->queue_lock);
blk_mq_unfreeze_queue(q);
return -EBUSY;
}
@@ -340,21 +338,15 @@ void rq_qos_del(struct rq_qos *rqos)
struct request_queue *q = rqos->disk->queue;
struct rq_qos **cur;
- /*
- * See comment in rq_qos_add() about freezing queue & using
- * ->queue_lock.
- */
- blk_mq_freeze_queue(q);
+ lockdep_assert_held(&q->rq_qos_mutex);
- spin_lock_irq(&q->queue_lock);
+ blk_mq_freeze_queue(q);
for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) {
if (*cur == rqos) {
*cur = rqos->next;
break;
}
}
- spin_unlock_irq(&q->queue_lock);
-
blk_mq_unfreeze_queue(q);
mutex_lock(&q->debugfs_mutex);
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 9ec2a2f1eda3..7a87506ff8e1 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -944,7 +944,9 @@ int wbt_init(struct gendisk *disk)
/*
* Assign rwb and add the stats callback.
*/
+ mutex_lock(&q->rq_qos_mutex);
ret = rq_qos_add(&rwb->rqos, disk, RQ_QOS_WBT, &wbt_rqos_ops);
+ mutex_unlock(&q->rq_qos_mutex);
if (ret)
goto err_free;
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index fce9082384d6..0f9f97cdddd9 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -57,16 +57,10 @@ EXPORT_SYMBOL_GPL(blk_zone_cond_str);
*/
bool blk_req_needs_zone_write_lock(struct request *rq)
{
- if (blk_rq_is_passthrough(rq))
- return false;
-
if (!rq->q->disk->seq_zones_wlock)
return false;
- if (bdev_op_is_zoned_write(rq->q->disk->part0, req_op(rq)))
- return blk_rq_zone_is_seq(rq);
-
- return false;
+ return blk_rq_is_seq_zoned_write(rq);
}
EXPORT_SYMBOL_GPL(blk_req_needs_zone_write_lock);
@@ -329,8 +323,8 @@ static int blkdev_copy_zone_to_user(struct blk_zone *zone, unsigned int idx,
* BLKREPORTZONE ioctl processing.
* Called from blkdev_ioctl.
*/
-int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
- unsigned int cmd, unsigned long arg)
+int blkdev_report_zones_ioctl(struct block_device *bdev, unsigned int cmd,
+ unsigned long arg)
{
void __user *argp = (void __user *)arg;
struct zone_report_args args;
@@ -362,8 +356,8 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
return 0;
}
-static int blkdev_truncate_zone_range(struct block_device *bdev, fmode_t mode,
- const struct blk_zone_range *zrange)
+static int blkdev_truncate_zone_range(struct block_device *bdev,
+ blk_mode_t mode, const struct blk_zone_range *zrange)
{
loff_t start, end;
@@ -382,7 +376,7 @@ static int blkdev_truncate_zone_range(struct block_device *bdev, fmode_t mode,
* BLKRESETZONE, BLKOPENZONE, BLKCLOSEZONE and BLKFINISHZONE ioctl processing.
* Called from blkdev_ioctl.
*/
-int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
+int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode,
unsigned int cmd, unsigned long arg)
{
void __user *argp = (void __user *)arg;
@@ -396,7 +390,7 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
if (!bdev_is_zoned(bdev))
return -ENOTTY;
- if (!(mode & FMODE_WRITE))
+ if (!(mode & BLK_OPEN_WRITE))
return -EBADF;
if (copy_from_user(&zrange, argp, sizeof(struct blk_zone_range)))
diff --git a/block/blk.h b/block/blk.h
index 45547bcf1119..608c5dcc516b 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -269,7 +269,7 @@ bool blk_bio_list_merge(struct request_queue *q, struct list_head *list,
*/
#define ELV_ON_HASH(rq) ((rq)->rq_flags & RQF_HASHED)
-void blk_insert_flush(struct request *rq);
+bool blk_insert_flush(struct request *rq);
int elevator_switch(struct request_queue *q, struct elevator_type *new_e);
void elevator_disable(struct request_queue *q);
@@ -394,10 +394,27 @@ static inline struct bio *blk_queue_bounce(struct bio *bio,
#ifdef CONFIG_BLK_DEV_ZONED
void disk_free_zone_bitmaps(struct gendisk *disk);
void disk_clear_zone_settings(struct gendisk *disk);
-#else
+int blkdev_report_zones_ioctl(struct block_device *bdev, unsigned int cmd,
+ unsigned long arg);
+int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode,
+ unsigned int cmd, unsigned long arg);
+#else /* CONFIG_BLK_DEV_ZONED */
static inline void disk_free_zone_bitmaps(struct gendisk *disk) {}
static inline void disk_clear_zone_settings(struct gendisk *disk) {}
-#endif
+static inline int blkdev_report_zones_ioctl(struct block_device *bdev,
+ unsigned int cmd, unsigned long arg)
+{
+ return -ENOTTY;
+}
+static inline int blkdev_zone_mgmt_ioctl(struct block_device *bdev,
+ blk_mode_t mode, unsigned int cmd, unsigned long arg)
+{
+ return -ENOTTY;
+}
+#endif /* CONFIG_BLK_DEV_ZONED */
+
+struct block_device *bdev_alloc(struct gendisk *disk, u8 partno);
+void bdev_add(struct block_device *bdev, dev_t dev);
int blk_alloc_ext_minor(void);
void blk_free_ext_minor(unsigned int minor);
@@ -409,7 +426,7 @@ int bdev_add_partition(struct gendisk *disk, int partno, sector_t start,
int bdev_del_partition(struct gendisk *disk, int partno);
int bdev_resize_partition(struct gendisk *disk, int partno, sector_t start,
sector_t length);
-void blk_drop_partitions(struct gendisk *disk);
+void drop_partition(struct block_device *part);
void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors);
@@ -420,9 +437,19 @@ int bio_add_hw_page(struct request_queue *q, struct bio *bio,
struct page *page, unsigned int len, unsigned int offset,
unsigned int max_sectors, bool *same_page);
+/*
+ * Clean up a page appropriately, where the page may be pinned, may have a
+ * ref taken on it or neither.
+ */
+static inline void bio_release_page(struct bio *bio, struct page *page)
+{
+ if (bio_flagged(bio, BIO_PAGE_PINNED))
+ unpin_user_page(page);
+}
+
struct request_queue *blk_alloc_queue(int node_id);
-int disk_scan_partitions(struct gendisk *disk, fmode_t mode);
+int disk_scan_partitions(struct gendisk *disk, blk_mode_t mode);
int disk_alloc_events(struct gendisk *disk);
void disk_add_events(struct gendisk *disk);
@@ -437,6 +464,9 @@ extern struct device_attribute dev_attr_events_poll_msecs;
extern struct attribute_group blk_trace_attr_group;
+blk_mode_t file_to_blk_mode(struct file *file);
+int truncate_bdev_range(struct block_device *bdev, blk_mode_t mode,
+ loff_t lstart, loff_t lend);
long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg);
long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg);
diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index 435c32373cd6..b3acdbdb6e7e 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -26,7 +26,7 @@ struct bsg_set {
};
static int bsg_transport_sg_io_fn(struct request_queue *q, struct sg_io_v4 *hdr,
- fmode_t mode, unsigned int timeout)
+ bool open_for_write, unsigned int timeout)
{
struct bsg_job *job;
struct request *rq;
diff --git a/block/bsg.c b/block/bsg.c
index 7eca43f33d7f..1a9396a3b7d7 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -39,7 +39,7 @@ static inline struct bsg_device *to_bsg_device(struct inode *inode)
#define BSG_MAX_DEVS 32768
static DEFINE_IDA(bsg_minor_ida);
-static struct class *bsg_class;
+static const struct class bsg_class;
static int bsg_major;
static unsigned int bsg_timeout(struct bsg_device *bd, struct sg_io_v4 *hdr)
@@ -54,7 +54,8 @@ static unsigned int bsg_timeout(struct bsg_device *bd, struct sg_io_v4 *hdr)
return max_t(unsigned int, timeout, BLK_MIN_SG_TIMEOUT);
}
-static int bsg_sg_io(struct bsg_device *bd, fmode_t mode, void __user *uarg)
+static int bsg_sg_io(struct bsg_device *bd, bool open_for_write,
+ void __user *uarg)
{
struct sg_io_v4 hdr;
int ret;
@@ -63,7 +64,8 @@ static int bsg_sg_io(struct bsg_device *bd, fmode_t mode, void __user *uarg)
return -EFAULT;
if (hdr.guard != 'Q')
return -EINVAL;
- ret = bd->sg_io_fn(bd->queue, &hdr, mode, bsg_timeout(bd, &hdr));
+ ret = bd->sg_io_fn(bd->queue, &hdr, open_for_write,
+ bsg_timeout(bd, &hdr));
if (!ret && copy_to_user(uarg, &hdr, sizeof(hdr)))
return -EFAULT;
return ret;
@@ -146,7 +148,7 @@ static long bsg_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case SG_EMULATED_HOST:
return put_user(1, intp);
case SG_IO:
- return bsg_sg_io(bd, file->f_mode, uarg);
+ return bsg_sg_io(bd, file->f_mode & FMODE_WRITE, uarg);
case SCSI_IOCTL_SEND_COMMAND:
pr_warn_ratelimited("%s: calling unsupported SCSI_IOCTL_SEND_COMMAND\n",
current->comm);
@@ -206,7 +208,7 @@ struct bsg_device *bsg_register_queue(struct request_queue *q,
return ERR_PTR(ret);
}
bd->device.devt = MKDEV(bsg_major, ret);
- bd->device.class = bsg_class;
+ bd->device.class = &bsg_class;
bd->device.parent = parent;
bd->device.release = bsg_device_release;
dev_set_name(&bd->device, "%s", name);
@@ -240,15 +242,19 @@ static char *bsg_devnode(const struct device *dev, umode_t *mode)
return kasprintf(GFP_KERNEL, "bsg/%s", dev_name(dev));
}
+static const struct class bsg_class = {
+ .name = "bsg",
+ .devnode = bsg_devnode,
+};
+
static int __init bsg_init(void)
{
dev_t devid;
int ret;
- bsg_class = class_create("bsg");
- if (IS_ERR(bsg_class))
- return PTR_ERR(bsg_class);
- bsg_class->devnode = bsg_devnode;
+ ret = class_register(&bsg_class);
+ if (ret)
+ return ret;
ret = alloc_chrdev_region(&devid, 0, BSG_MAX_DEVS, "bsg");
if (ret)
@@ -260,7 +266,7 @@ static int __init bsg_init(void)
return 0;
destroy_bsg_class:
- class_destroy(bsg_class);
+ class_unregister(&bsg_class);
return ret;
}
diff --git a/block/disk-events.c b/block/disk-events.c
index aee25a7e1ab7..0cfac464e6d1 100644
--- a/block/disk-events.c
+++ b/block/disk-events.c
@@ -263,31 +263,31 @@ static unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask)
}
/**
- * bdev_check_media_change - check if a removable media has been changed
- * @bdev: block device to check
+ * disk_check_media_change - check if a removable media has been changed
+ * @disk: gendisk to check
*
* Check whether a removable media has been changed, and attempt to free all
* dentries and inodes and invalidates all block device page cache entries in
* that case.
*
- * Returns %true if the block device changed, or %false if not.
+ * Returns %true if the media has changed, or %false if not.
*/
-bool bdev_check_media_change(struct block_device *bdev)
+bool disk_check_media_change(struct gendisk *disk)
{
unsigned int events;
- events = disk_clear_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE |
+ events = disk_clear_events(disk, DISK_EVENT_MEDIA_CHANGE |
DISK_EVENT_EJECT_REQUEST);
if (!(events & DISK_EVENT_MEDIA_CHANGE))
return false;
- if (__invalidate_device(bdev, true))
+ if (__invalidate_device(disk->part0, true))
pr_warn("VFS: busy inodes on changed media %s\n",
- bdev->bd_disk->disk_name);
- set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state);
+ disk->disk_name);
+ set_bit(GD_NEED_PART_SCAN, &disk->state);
return true;
}
-EXPORT_SYMBOL(bdev_check_media_change);
+EXPORT_SYMBOL(disk_check_media_change);
/**
* disk_force_media_change - force a media change event
@@ -307,6 +307,7 @@ bool disk_force_media_change(struct gendisk *disk, unsigned int events)
if (!(events & DISK_EVENT_MEDIA_CHANGE))
return false;
+ inc_diskseq(disk);
if (__invalidate_device(disk->part0, true))
pr_warn("VFS: busy inodes on changed media %s\n",
disk->disk_name);
diff --git a/block/early-lookup.c b/block/early-lookup.c
new file mode 100644
index 000000000000..3effbd0d35e9
--- /dev/null
+++ b/block/early-lookup.c
@@ -0,0 +1,316 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Code for looking up block devices in the early boot code before mounting the
+ * root file system.
+ */
+#include <linux/blkdev.h>
+#include <linux/ctype.h>
+
+struct uuidcmp {
+ const char *uuid;
+ int len;
+};
+
+/**
+ * match_dev_by_uuid - callback for finding a partition using its uuid
+ * @dev: device passed in by the caller
+ * @data: opaque pointer to the desired struct uuidcmp to match
+ *
+ * Returns 1 if the device matches, and 0 otherwise.
+ */
+static int __init match_dev_by_uuid(struct device *dev, const void *data)
+{
+ struct block_device *bdev = dev_to_bdev(dev);
+ const struct uuidcmp *cmp = data;
+
+ if (!bdev->bd_meta_info ||
+ strncasecmp(cmp->uuid, bdev->bd_meta_info->uuid, cmp->len))
+ return 0;
+ return 1;
+}
+
+/**
+ * devt_from_partuuid - looks up the dev_t of a partition by its UUID
+ * @uuid_str: char array containing ascii UUID
+ * @devt: dev_t result
+ *
+ * The function will return the first partition which contains a matching
+ * UUID value in its partition_meta_info struct. This does not search
+ * by filesystem UUIDs.
+ *
+ * If @uuid_str is followed by a "/PARTNROFF=%d", then the number will be
+ * extracted and used as an offset from the partition identified by the UUID.
+ *
+ * Returns 0 on success or a negative error code on failure.
+ */
+static int __init devt_from_partuuid(const char *uuid_str, dev_t *devt)
+{
+ struct uuidcmp cmp;
+ struct device *dev = NULL;
+ int offset = 0;
+ char *slash;
+
+ cmp.uuid = uuid_str;
+
+ slash = strchr(uuid_str, '/');
+ /* Check for optional partition number offset attributes. */
+ if (slash) {
+ char c = 0;
+
+ /* Explicitly fail on poor PARTUUID syntax. */
+ if (sscanf(slash + 1, "PARTNROFF=%d%c", &offset, &c) != 1)
+ goto out_invalid;
+ cmp.len = slash - uuid_str;
+ } else {
+ cmp.len = strlen(uuid_str);
+ }
+
+ if (!cmp.len)
+ goto out_invalid;
+
+ dev = class_find_device(&block_class, NULL, &cmp, &match_dev_by_uuid);
+ if (!dev)
+ return -ENODEV;
+
+ if (offset) {
+ /*
+ * Attempt to find the requested partition by adding an offset
+ * to the partition number found by UUID.
+ */
+ *devt = part_devt(dev_to_disk(dev),
+ dev_to_bdev(dev)->bd_partno + offset);
+ } else {
+ *devt = dev->devt;
+ }
+
+ put_device(dev);
+ return 0;
+
+out_invalid:
+ pr_err("VFS: PARTUUID= is invalid.\n"
+ "Expected PARTUUID=<valid-uuid-id>[/PARTNROFF=%%d]\n");
+ return -EINVAL;
+}
+
+/**
+ * match_dev_by_label - callback for finding a partition using its label
+ * @dev: device passed in by the caller
+ * @data: opaque pointer to the label to match
+ *
+ * Returns 1 if the device matches, and 0 otherwise.
+ */
+static int __init match_dev_by_label(struct device *dev, const void *data)
+{
+ struct block_device *bdev = dev_to_bdev(dev);
+ const char *label = data;
+
+ if (!bdev->bd_meta_info || strcmp(label, bdev->bd_meta_info->volname))
+ return 0;
+ return 1;
+}
+
+static int __init devt_from_partlabel(const char *label, dev_t *devt)
+{
+ struct device *dev;
+
+ dev = class_find_device(&block_class, NULL, label, &match_dev_by_label);
+ if (!dev)
+ return -ENODEV;
+ *devt = dev->devt;
+ put_device(dev);
+ return 0;
+}
+
+static dev_t __init blk_lookup_devt(const char *name, int partno)
+{
+ dev_t devt = MKDEV(0, 0);
+ struct class_dev_iter iter;
+ struct device *dev;
+
+ class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
+ while ((dev = class_dev_iter_next(&iter))) {
+ struct gendisk *disk = dev_to_disk(dev);
+
+ if (strcmp(dev_name(dev), name))
+ continue;
+
+ if (partno < disk->minors) {
+ /* We need to return the right devno, even
+ * if the partition doesn't exist yet.
+ */
+ devt = MKDEV(MAJOR(dev->devt),
+ MINOR(dev->devt) + partno);
+ } else {
+ devt = part_devt(disk, partno);
+ if (devt)
+ break;
+ }
+ }
+ class_dev_iter_exit(&iter);
+ return devt;
+}
+
+static int __init devt_from_devname(const char *name, dev_t *devt)
+{
+ int part;
+ char s[32];
+ char *p;
+
+ if (strlen(name) > 31)
+ return -EINVAL;
+ strcpy(s, name);
+ for (p = s; *p; p++) {
+ if (*p == '/')
+ *p = '!';
+ }
+
+ *devt = blk_lookup_devt(s, 0);
+ if (*devt)
+ return 0;
+
+ /*
+ * Try non-existent, but valid partition, which may only exist after
+ * opening the device, like partitioned md devices.
+ */
+ while (p > s && isdigit(p[-1]))
+ p--;
+ if (p == s || !*p || *p == '0')
+ return -ENODEV;
+
+ /* try disk name without <part number> */
+ part = simple_strtoul(p, NULL, 10);
+ *p = '\0';
+ *devt = blk_lookup_devt(s, part);
+ if (*devt)
+ return 0;
+
+ /* try disk name without p<part number> */
+ if (p < s + 2 || !isdigit(p[-2]) || p[-1] != 'p')
+ return -ENODEV;
+ p[-1] = '\0';
+ *devt = blk_lookup_devt(s, part);
+ if (*devt)
+ return 0;
+ return -ENODEV;
+}
+
+static int __init devt_from_devnum(const char *name, dev_t *devt)
+{
+ unsigned maj, min, offset;
+ char *p, dummy;
+
+ if (sscanf(name, "%u:%u%c", &maj, &min, &dummy) == 2 ||
+ sscanf(name, "%u:%u:%u:%c", &maj, &min, &offset, &dummy) == 3) {
+ *devt = MKDEV(maj, min);
+ if (maj != MAJOR(*devt) || min != MINOR(*devt))
+ return -EINVAL;
+ } else {
+ *devt = new_decode_dev(simple_strtoul(name, &p, 16));
+ if (*p)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/*
+ * Convert a name into device number. We accept the following variants:
+ *
+ * 1) <hex_major><hex_minor> device number in hexadecimal represents itself
+ * no leading 0x, for example b302.
+ * 3) /dev/<disk_name> represents the device number of disk
+ * 4) /dev/<disk_name><decimal> represents the device number
+ * of partition - device number of disk plus the partition number
+ * 5) /dev/<disk_name>p<decimal> - same as the above, that form is
+ * used when disk name of partitioned disk ends on a digit.
+ * 6) PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF representing the
+ * unique id of a partition if the partition table provides it.
+ * The UUID may be either an EFI/GPT UUID, or refer to an MSDOS
+ * partition using the format SSSSSSSS-PP, where SSSSSSSS is a zero-
+ * filled hex representation of the 32-bit "NT disk signature", and PP
+ * is a zero-filled hex representation of the 1-based partition number.
+ * 7) PARTUUID=<UUID>/PARTNROFF=<int> to select a partition in relation to
+ * a partition with a known unique id.
+ * 8) <major>:<minor> major and minor number of the device separated by
+ * a colon.
+ * 9) PARTLABEL=<name> with name being the GPT partition label.
+ * MSDOS partitions do not support labels!
+ *
+ * If name doesn't have fall into the categories above, we return (0,0).
+ * block_class is used to check if something is a disk name. If the disk
+ * name contains slashes, the device name has them replaced with
+ * bangs.
+ */
+int __init early_lookup_bdev(const char *name, dev_t *devt)
+{
+ if (strncmp(name, "PARTUUID=", 9) == 0)
+ return devt_from_partuuid(name + 9, devt);
+ if (strncmp(name, "PARTLABEL=", 10) == 0)
+ return devt_from_partlabel(name + 10, devt);
+ if (strncmp(name, "/dev/", 5) == 0)
+ return devt_from_devname(name + 5, devt);
+ return devt_from_devnum(name, devt);
+}
+
+static char __init *bdevt_str(dev_t devt, char *buf)
+{
+ if (MAJOR(devt) <= 0xff && MINOR(devt) <= 0xff) {
+ char tbuf[BDEVT_SIZE];
+ snprintf(tbuf, BDEVT_SIZE, "%02x%02x", MAJOR(devt), MINOR(devt));
+ snprintf(buf, BDEVT_SIZE, "%-9s", tbuf);
+ } else
+ snprintf(buf, BDEVT_SIZE, "%03x:%05x", MAJOR(devt), MINOR(devt));
+
+ return buf;
+}
+
+/*
+ * print a full list of all partitions - intended for places where the root
+ * filesystem can't be mounted and thus to give the victim some idea of what
+ * went wrong
+ */
+void __init printk_all_partitions(void)
+{
+ struct class_dev_iter iter;
+ struct device *dev;
+
+ class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
+ while ((dev = class_dev_iter_next(&iter))) {
+ struct gendisk *disk = dev_to_disk(dev);
+ struct block_device *part;
+ char devt_buf[BDEVT_SIZE];
+ unsigned long idx;
+
+ /*
+ * Don't show empty devices or things that have been
+ * suppressed
+ */
+ if (get_capacity(disk) == 0 || (disk->flags & GENHD_FL_HIDDEN))
+ continue;
+
+ /*
+ * Note, unlike /proc/partitions, I am showing the numbers in
+ * hex - the same format as the root= option takes.
+ */
+ rcu_read_lock();
+ xa_for_each(&disk->part_tbl, idx, part) {
+ if (!bdev_nr_sectors(part))
+ continue;
+ printk("%s%s %10llu %pg %s",
+ bdev_is_partition(part) ? " " : "",
+ bdevt_str(part->bd_dev, devt_buf),
+ bdev_nr_sectors(part) >> 1, part,
+ part->bd_meta_info ?
+ part->bd_meta_info->uuid : "");
+ if (bdev_is_partition(part))
+ printk("\n");
+ else if (dev->parent && dev->parent->driver)
+ printk(" driver: %s\n",
+ dev->parent->driver->name);
+ else
+ printk(" (driver?)\n");
+ }
+ rcu_read_unlock();
+ }
+ class_dev_iter_exit(&iter);
+}
diff --git a/block/elevator.c b/block/elevator.c
index 24909069f872..8400e303fbcb 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -751,7 +751,7 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *buf,
if (!elv_support_iosched(q))
return count;
- strlcpy(elevator_name, buf, sizeof(elevator_name));
+ strscpy(elevator_name, buf, sizeof(elevator_name));
ret = elevator_change(q, strstrip(elevator_name));
if (!ret)
return count;
diff --git a/block/fops.c b/block/fops.c
index 58d0aebc7313..a286bf3325c5 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -54,7 +54,7 @@ static bool blkdev_dio_unaligned(struct block_device *bdev, loff_t pos,
static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
struct iov_iter *iter, unsigned int nr_pages)
{
- struct block_device *bdev = iocb->ki_filp->private_data;
+ struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs;
loff_t pos = iocb->ki_pos;
bool should_dirty = false;
@@ -170,7 +170,7 @@ static void blkdev_bio_end_io(struct bio *bio)
static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
unsigned int nr_pages)
{
- struct block_device *bdev = iocb->ki_filp->private_data;
+ struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
struct blk_plug plug;
struct blkdev_dio *dio;
struct bio *bio;
@@ -310,7 +310,7 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
struct iov_iter *iter,
unsigned int nr_pages)
{
- struct block_device *bdev = iocb->ki_filp->private_data;
+ struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
bool is_read = iov_iter_rw(iter) == READ;
blk_opf_t opf = is_read ? REQ_OP_READ : dio_bio_write_op(iocb);
struct blkdev_dio *dio;
@@ -451,7 +451,7 @@ static loff_t blkdev_llseek(struct file *file, loff_t offset, int whence)
static int blkdev_fsync(struct file *filp, loff_t start, loff_t end,
int datasync)
{
- struct block_device *bdev = filp->private_data;
+ struct block_device *bdev = I_BDEV(filp->f_mapping->host);
int error;
error = file_write_and_wait_range(filp, start, end);
@@ -470,6 +470,30 @@ static int blkdev_fsync(struct file *filp, loff_t start, loff_t end,
return error;
}
+blk_mode_t file_to_blk_mode(struct file *file)
+{
+ blk_mode_t mode = 0;
+
+ if (file->f_mode & FMODE_READ)
+ mode |= BLK_OPEN_READ;
+ if (file->f_mode & FMODE_WRITE)
+ mode |= BLK_OPEN_WRITE;
+ if (file->private_data)
+ mode |= BLK_OPEN_EXCL;
+ if (file->f_flags & O_NDELAY)
+ mode |= BLK_OPEN_NDELAY;
+
+ /*
+ * If all bits in O_ACCMODE set (aka O_RDWR | O_WRONLY), the floppy
+ * driver has historically allowed ioctls as if the file was opened for
+ * writing, but does not allow and actual reads or writes.
+ */
+ if ((file->f_flags & O_ACCMODE) == (O_RDWR | O_WRONLY))
+ mode |= BLK_OPEN_WRITE_IOCTL;
+
+ return mode;
+}
+
static int blkdev_open(struct inode *inode, struct file *filp)
{
struct block_device *bdev;
@@ -481,30 +505,31 @@ static int blkdev_open(struct inode *inode, struct file *filp)
* during an unstable branch.
*/
filp->f_flags |= O_LARGEFILE;
- filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
+ filp->f_mode |= FMODE_BUF_RASYNC;
- if (filp->f_flags & O_NDELAY)
- filp->f_mode |= FMODE_NDELAY;
+ /*
+ * Use the file private data to store the holder for exclusive openes.
+ * file_to_blk_mode relies on it being present to set BLK_OPEN_EXCL.
+ */
if (filp->f_flags & O_EXCL)
- filp->f_mode |= FMODE_EXCL;
- if ((filp->f_flags & O_ACCMODE) == 3)
- filp->f_mode |= FMODE_WRITE_IOCTL;
+ filp->private_data = filp;
- bdev = blkdev_get_by_dev(inode->i_rdev, filp->f_mode, filp);
+ bdev = blkdev_get_by_dev(inode->i_rdev, file_to_blk_mode(filp),
+ filp->private_data, NULL);
if (IS_ERR(bdev))
return PTR_ERR(bdev);
- filp->private_data = bdev;
+ if (bdev_nowait(bdev))
+ filp->f_mode |= FMODE_NOWAIT;
+
filp->f_mapping = bdev->bd_inode->i_mapping;
filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping);
return 0;
}
-static int blkdev_close(struct inode *inode, struct file *filp)
+static int blkdev_release(struct inode *inode, struct file *filp)
{
- struct block_device *bdev = filp->private_data;
-
- blkdev_put(bdev, filp->f_mode);
+ blkdev_put(I_BDEV(filp->f_mapping->host), filp->private_data);
return 0;
}
@@ -517,10 +542,9 @@ static int blkdev_close(struct inode *inode, struct file *filp)
*/
static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
- struct block_device *bdev = iocb->ki_filp->private_data;
+ struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
struct inode *bd_inode = bdev->bd_inode;
loff_t size = bdev_nr_bytes(bdev);
- struct blk_plug plug;
size_t shorted = 0;
ssize_t ret;
@@ -545,18 +569,16 @@ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
iov_iter_truncate(from, size);
}
- blk_start_plug(&plug);
ret = __generic_file_write_iter(iocb, from);
if (ret > 0)
ret = generic_write_sync(iocb, ret);
iov_iter_reexpand(from, iov_iter_count(from) + shorted);
- blk_finish_plug(&plug);
return ret;
}
static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
- struct block_device *bdev = iocb->ki_filp->private_data;
+ struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
loff_t size = bdev_nr_bytes(bdev);
loff_t pos = iocb->ki_pos;
size_t shorted = 0;
@@ -576,21 +598,9 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
goto reexpand; /* skip atime */
if (iocb->ki_flags & IOCB_DIRECT) {
- struct address_space *mapping = iocb->ki_filp->f_mapping;
-
- if (iocb->ki_flags & IOCB_NOWAIT) {
- if (filemap_range_needs_writeback(mapping, pos,
- pos + count - 1)) {
- ret = -EAGAIN;
- goto reexpand;
- }
- } else {
- ret = filemap_write_and_wait_range(mapping, pos,
- pos + count - 1);
- if (ret < 0)
- goto reexpand;
- }
-
+ ret = kiocb_write_and_wait(iocb, count);
+ if (ret < 0)
+ goto reexpand;
file_accessed(iocb->ki_filp);
ret = blkdev_direct_IO(iocb, to);
@@ -649,7 +659,7 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start,
filemap_invalidate_lock(inode->i_mapping);
/* Invalidate the page cache, including dirty pages. */
- error = truncate_bdev_range(bdev, file->f_mode, start, end);
+ error = truncate_bdev_range(bdev, file_to_blk_mode(file), start, end);
if (error)
goto fail;
@@ -690,7 +700,7 @@ static int blkdev_mmap(struct file *file, struct vm_area_struct *vma)
const struct file_operations def_blk_fops = {
.open = blkdev_open,
- .release = blkdev_close,
+ .release = blkdev_release,
.llseek = blkdev_llseek,
.read_iter = blkdev_read_iter,
.write_iter = blkdev_write_iter,
@@ -701,7 +711,7 @@ const struct file_operations def_blk_fops = {
#ifdef CONFIG_COMPAT
.compat_ioctl = compat_blkdev_ioctl,
#endif
- .splice_read = generic_file_splice_read,
+ .splice_read = filemap_splice_read,
.splice_write = iter_file_splice_write,
.fallocate = blkdev_fallocate,
};
diff --git a/block/genhd.c b/block/genhd.c
index 1cb489b927d5..3d287b32d50d 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -25,8 +25,9 @@
#include <linux/pm_runtime.h>
#include <linux/badblocks.h>
#include <linux/part_stat.h>
-#include "blk-throttle.h"
+#include <linux/blktrace_api.h>
+#include "blk-throttle.h"
#include "blk.h"
#include "blk-mq-sched.h"
#include "blk-rq-qos.h"
@@ -253,7 +254,7 @@ int __register_blkdev(unsigned int major, const char *name,
#ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD
p->probe = probe;
#endif
- strlcpy(p->name, name, sizeof(p->name));
+ strscpy(p->name, name, sizeof(p->name));
p->next = NULL;
index = major_to_index(major);
@@ -318,18 +319,6 @@ void blk_free_ext_minor(unsigned int minor)
ida_free(&ext_devt_ida, minor);
}
-static char *bdevt_str(dev_t devt, char *buf)
-{
- if (MAJOR(devt) <= 0xff && MINOR(devt) <= 0xff) {
- char tbuf[BDEVT_SIZE];
- snprintf(tbuf, BDEVT_SIZE, "%02x%02x", MAJOR(devt), MINOR(devt));
- snprintf(buf, BDEVT_SIZE, "%-9s", tbuf);
- } else
- snprintf(buf, BDEVT_SIZE, "%03x:%05x", MAJOR(devt), MINOR(devt));
-
- return buf;
-}
-
void disk_uevent(struct gendisk *disk, enum kobject_action action)
{
struct block_device *part;
@@ -351,7 +340,7 @@ void disk_uevent(struct gendisk *disk, enum kobject_action action)
}
EXPORT_SYMBOL_GPL(disk_uevent);
-int disk_scan_partitions(struct gendisk *disk, fmode_t mode)
+int disk_scan_partitions(struct gendisk *disk, blk_mode_t mode)
{
struct block_device *bdev;
int ret = 0;
@@ -369,18 +358,20 @@ int disk_scan_partitions(struct gendisk *disk, fmode_t mode)
* synchronize with other exclusive openers and other partition
* scanners.
*/
- if (!(mode & FMODE_EXCL)) {
- ret = bd_prepare_to_claim(disk->part0, disk_scan_partitions);
+ if (!(mode & BLK_OPEN_EXCL)) {
+ ret = bd_prepare_to_claim(disk->part0, disk_scan_partitions,
+ NULL);
if (ret)
return ret;
}
set_bit(GD_NEED_PART_SCAN, &disk->state);
- bdev = blkdev_get_by_dev(disk_devt(disk), mode & ~FMODE_EXCL, NULL);
+ bdev = blkdev_get_by_dev(disk_devt(disk), mode & ~BLK_OPEN_EXCL, NULL,
+ NULL);
if (IS_ERR(bdev))
ret = PTR_ERR(bdev);
else
- blkdev_put(bdev, mode & ~FMODE_EXCL);
+ blkdev_put(bdev, NULL);
/*
* If blkdev_get_by_dev() failed early, GD_NEED_PART_SCAN is still set,
@@ -388,7 +379,7 @@ int disk_scan_partitions(struct gendisk *disk, fmode_t mode)
* creat partition for underlying disk.
*/
clear_bit(GD_NEED_PART_SCAN, &disk->state);
- if (!(mode & FMODE_EXCL))
+ if (!(mode & BLK_OPEN_EXCL))
bd_abort_claiming(disk->part0, disk_scan_partitions);
return ret;
}
@@ -516,7 +507,7 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
bdev_add(disk->part0, ddev->devt);
if (get_capacity(disk))
- disk_scan_partitions(disk, FMODE_READ);
+ disk_scan_partitions(disk, BLK_OPEN_READ);
/*
* Announce the disk and partitions after all partitions are
@@ -563,6 +554,28 @@ out_exit_elevator:
}
EXPORT_SYMBOL(device_add_disk);
+static void blk_report_disk_dead(struct gendisk *disk)
+{
+ struct block_device *bdev;
+ unsigned long idx;
+
+ rcu_read_lock();
+ xa_for_each(&disk->part_tbl, idx, bdev) {
+ if (!kobject_get_unless_zero(&bdev->bd_device.kobj))
+ continue;
+ rcu_read_unlock();
+
+ mutex_lock(&bdev->bd_holder_lock);
+ if (bdev->bd_holder_ops && bdev->bd_holder_ops->mark_dead)
+ bdev->bd_holder_ops->mark_dead(bdev);
+ mutex_unlock(&bdev->bd_holder_lock);
+
+ put_device(&bdev->bd_device);
+ rcu_read_lock();
+ }
+ rcu_read_unlock();
+}
+
/**
* blk_mark_disk_dead - mark a disk as dead
* @disk: disk to mark as dead
@@ -572,13 +585,26 @@ EXPORT_SYMBOL(device_add_disk);
*/
void blk_mark_disk_dead(struct gendisk *disk)
{
- set_bit(GD_DEAD, &disk->state);
- blk_queue_start_drain(disk->queue);
+ /*
+ * Fail any new I/O.
+ */
+ if (test_and_set_bit(GD_DEAD, &disk->state))
+ return;
+
+ if (test_bit(GD_OWNS_QUEUE, &disk->state))
+ blk_queue_flag_set(QUEUE_FLAG_DYING, disk->queue);
/*
* Stop buffered writers from dirtying pages that can't be written out.
*/
- set_capacity_and_notify(disk, 0);
+ set_capacity(disk, 0);
+
+ /*
+ * Prevent new I/O from crossing bio_queue_enter().
+ */
+ blk_queue_start_drain(disk->queue);
+
+ blk_report_disk_dead(disk);
}
EXPORT_SYMBOL_GPL(blk_mark_disk_dead);
@@ -604,6 +630,8 @@ EXPORT_SYMBOL_GPL(blk_mark_disk_dead);
void del_gendisk(struct gendisk *disk)
{
struct request_queue *q = disk->queue;
+ struct block_device *part;
+ unsigned long idx;
might_sleep();
@@ -612,26 +640,27 @@ void del_gendisk(struct gendisk *disk)
disk_del_events(disk);
+ /*
+ * Prevent new openers by unlinked the bdev inode, and write out
+ * dirty data before marking the disk dead and stopping all I/O.
+ */
mutex_lock(&disk->open_mutex);
- remove_inode_hash(disk->part0->bd_inode);
- blk_drop_partitions(disk);
+ xa_for_each(&disk->part_tbl, idx, part) {
+ remove_inode_hash(part->bd_inode);
+ fsync_bdev(part);
+ __invalidate_device(part, true);
+ }
mutex_unlock(&disk->open_mutex);
- fsync_bdev(disk->part0);
- __invalidate_device(disk->part0, true);
+ blk_mark_disk_dead(disk);
/*
- * Fail any new I/O.
+ * Drop all partitions now that the disk is marked dead.
*/
- set_bit(GD_DEAD, &disk->state);
- if (test_bit(GD_OWNS_QUEUE, &disk->state))
- blk_queue_flag_set(QUEUE_FLAG_DYING, q);
- set_capacity(disk, 0);
-
- /*
- * Prevent new I/O from crossing bio_queue_enter().
- */
- blk_queue_start_drain(q);
+ mutex_lock(&disk->open_mutex);
+ xa_for_each_start(&disk->part_tbl, idx, part, 1)
+ drop_partition(part);
+ mutex_unlock(&disk->open_mutex);
if (!(disk->flags & GENHD_FL_HIDDEN)) {
sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
@@ -755,57 +784,6 @@ void blk_request_module(dev_t devt)
}
#endif /* CONFIG_BLOCK_LEGACY_AUTOLOAD */
-/*
- * print a full list of all partitions - intended for places where the root
- * filesystem can't be mounted and thus to give the victim some idea of what
- * went wrong
- */
-void __init printk_all_partitions(void)
-{
- struct class_dev_iter iter;
- struct device *dev;
-
- class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
- while ((dev = class_dev_iter_next(&iter))) {
- struct gendisk *disk = dev_to_disk(dev);
- struct block_device *part;
- char devt_buf[BDEVT_SIZE];
- unsigned long idx;
-
- /*
- * Don't show empty devices or things that have been
- * suppressed
- */
- if (get_capacity(disk) == 0 || (disk->flags & GENHD_FL_HIDDEN))
- continue;
-
- /*
- * Note, unlike /proc/partitions, I am showing the numbers in
- * hex - the same format as the root= option takes.
- */
- rcu_read_lock();
- xa_for_each(&disk->part_tbl, idx, part) {
- if (!bdev_nr_sectors(part))
- continue;
- printk("%s%s %10llu %pg %s",
- bdev_is_partition(part) ? " " : "",
- bdevt_str(part->bd_dev, devt_buf),
- bdev_nr_sectors(part) >> 1, part,
- part->bd_meta_info ?
- part->bd_meta_info->uuid : "");
- if (bdev_is_partition(part))
- printk("\n");
- else if (dev->parent && dev->parent->driver)
- printk(" driver: %s\n",
- dev->parent->driver->name);
- else
- printk(" (driver?)\n");
- }
- rcu_read_unlock();
- }
- class_dev_iter_exit(&iter);
-}
-
#ifdef CONFIG_PROC_FS
/* iterator */
static void *disk_seqf_start(struct seq_file *seqf, loff_t *pos)
@@ -1171,6 +1149,8 @@ static void disk_release(struct device *dev)
might_sleep();
WARN_ON_ONCE(disk_live(disk));
+ blk_trace_remove(disk->queue);
+
/*
* To undo the all initialization from blk_mq_init_allocated_queue in
* case of a probe failure where add_disk is never called we have to
@@ -1339,35 +1319,6 @@ dev_t part_devt(struct gendisk *disk, u8 partno)
return devt;
}
-dev_t blk_lookup_devt(const char *name, int partno)
-{
- dev_t devt = MKDEV(0, 0);
- struct class_dev_iter iter;
- struct device *dev;
-
- class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
- while ((dev = class_dev_iter_next(&iter))) {
- struct gendisk *disk = dev_to_disk(dev);
-
- if (strcmp(dev_name(dev), name))
- continue;
-
- if (partno < disk->minors) {
- /* We need to return the right devno, even
- * if the partition doesn't exist yet.
- */
- devt = MKDEV(MAJOR(dev->devt),
- MINOR(dev->devt) + partno);
- } else {
- devt = part_devt(disk, partno);
- if (devt)
- break;
- }
- }
- class_dev_iter_exit(&iter);
- return devt;
-}
-
struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
struct lock_class_key *lkclass)
{
diff --git a/block/ioctl.c b/block/ioctl.c
index 9c5f637ff153..3be11941fb2d 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -82,7 +82,7 @@ static int compat_blkpg_ioctl(struct block_device *bdev,
}
#endif
-static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
+static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode,
unsigned long arg)
{
uint64_t range[2];
@@ -90,7 +90,7 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
struct inode *inode = bdev->bd_inode;
int err;
- if (!(mode & FMODE_WRITE))
+ if (!(mode & BLK_OPEN_WRITE))
return -EBADF;
if (!bdev_max_discard_sectors(bdev))
@@ -120,14 +120,14 @@ fail:
return err;
}
-static int blk_ioctl_secure_erase(struct block_device *bdev, fmode_t mode,
+static int blk_ioctl_secure_erase(struct block_device *bdev, blk_mode_t mode,
void __user *argp)
{
uint64_t start, len;
uint64_t range[2];
int err;
- if (!(mode & FMODE_WRITE))
+ if (!(mode & BLK_OPEN_WRITE))
return -EBADF;
if (!bdev_max_secure_erase_sectors(bdev))
return -EOPNOTSUPP;
@@ -151,7 +151,7 @@ static int blk_ioctl_secure_erase(struct block_device *bdev, fmode_t mode,
}
-static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode,
+static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode,
unsigned long arg)
{
uint64_t range[2];
@@ -159,7 +159,7 @@ static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode,
struct inode *inode = bdev->bd_inode;
int err;
- if (!(mode & FMODE_WRITE))
+ if (!(mode & BLK_OPEN_WRITE))
return -EBADF;
if (copy_from_user(range, (void __user *)arg, sizeof(range)))
@@ -240,7 +240,7 @@ static int compat_put_ulong(compat_ulong_t __user *argp, compat_ulong_t val)
* drivers that implement only commands that are completely compatible
* between 32-bit and 64-bit user space
*/
-int blkdev_compat_ptr_ioctl(struct block_device *bdev, fmode_t mode,
+int blkdev_compat_ptr_ioctl(struct block_device *bdev, blk_mode_t mode,
unsigned cmd, unsigned long arg)
{
struct gendisk *disk = bdev->bd_disk;
@@ -254,13 +254,28 @@ int blkdev_compat_ptr_ioctl(struct block_device *bdev, fmode_t mode,
EXPORT_SYMBOL(blkdev_compat_ptr_ioctl);
#endif
-static int blkdev_pr_register(struct block_device *bdev,
+static bool blkdev_pr_allowed(struct block_device *bdev, blk_mode_t mode)
+{
+ /* no sense to make reservations for partitions */
+ if (bdev_is_partition(bdev))
+ return false;
+
+ if (capable(CAP_SYS_ADMIN))
+ return true;
+ /*
+ * Only allow unprivileged reservations if the file descriptor is open
+ * for writing.
+ */
+ return mode & BLK_OPEN_WRITE;
+}
+
+static int blkdev_pr_register(struct block_device *bdev, blk_mode_t mode,
struct pr_registration __user *arg)
{
const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
struct pr_registration reg;
- if (!capable(CAP_SYS_ADMIN))
+ if (!blkdev_pr_allowed(bdev, mode))
return -EPERM;
if (!ops || !ops->pr_register)
return -EOPNOTSUPP;
@@ -272,13 +287,13 @@ static int blkdev_pr_register(struct block_device *bdev,
return ops->pr_register(bdev, reg.old_key, reg.new_key, reg.flags);
}
-static int blkdev_pr_reserve(struct block_device *bdev,
+static int blkdev_pr_reserve(struct block_device *bdev, blk_mode_t mode,
struct pr_reservation __user *arg)
{
const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
struct pr_reservation rsv;
- if (!capable(CAP_SYS_ADMIN))
+ if (!blkdev_pr_allowed(bdev, mode))
return -EPERM;
if (!ops || !ops->pr_reserve)
return -EOPNOTSUPP;
@@ -290,13 +305,13 @@ static int blkdev_pr_reserve(struct block_device *bdev,
return ops->pr_reserve(bdev, rsv.key, rsv.type, rsv.flags);
}
-static int blkdev_pr_release(struct block_device *bdev,
+static int blkdev_pr_release(struct block_device *bdev, blk_mode_t mode,
struct pr_reservation __user *arg)
{
const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
struct pr_reservation rsv;
- if (!capable(CAP_SYS_ADMIN))
+ if (!blkdev_pr_allowed(bdev, mode))
return -EPERM;
if (!ops || !ops->pr_release)
return -EOPNOTSUPP;
@@ -308,13 +323,13 @@ static int blkdev_pr_release(struct block_device *bdev,
return ops->pr_release(bdev, rsv.key, rsv.type);
}
-static int blkdev_pr_preempt(struct block_device *bdev,
+static int blkdev_pr_preempt(struct block_device *bdev, blk_mode_t mode,
struct pr_preempt __user *arg, bool abort)
{
const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
struct pr_preempt p;
- if (!capable(CAP_SYS_ADMIN))
+ if (!blkdev_pr_allowed(bdev, mode))
return -EPERM;
if (!ops || !ops->pr_preempt)
return -EOPNOTSUPP;
@@ -326,13 +341,13 @@ static int blkdev_pr_preempt(struct block_device *bdev,
return ops->pr_preempt(bdev, p.old_key, p.new_key, p.type, abort);
}
-static int blkdev_pr_clear(struct block_device *bdev,
+static int blkdev_pr_clear(struct block_device *bdev, blk_mode_t mode,
struct pr_clear __user *arg)
{
const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
struct pr_clear c;
- if (!capable(CAP_SYS_ADMIN))
+ if (!blkdev_pr_allowed(bdev, mode))
return -EPERM;
if (!ops || !ops->pr_clear)
return -EOPNOTSUPP;
@@ -344,8 +359,8 @@ static int blkdev_pr_clear(struct block_device *bdev,
return ops->pr_clear(bdev, c.key);
}
-static int blkdev_flushbuf(struct block_device *bdev, fmode_t mode,
- unsigned cmd, unsigned long arg)
+static int blkdev_flushbuf(struct block_device *bdev, unsigned cmd,
+ unsigned long arg)
{
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
@@ -354,8 +369,8 @@ static int blkdev_flushbuf(struct block_device *bdev, fmode_t mode,
return 0;
}
-static int blkdev_roset(struct block_device *bdev, fmode_t mode,
- unsigned cmd, unsigned long arg)
+static int blkdev_roset(struct block_device *bdev, unsigned cmd,
+ unsigned long arg)
{
int ret, n;
@@ -439,7 +454,7 @@ static int compat_hdio_getgeo(struct block_device *bdev,
#endif
/* set the logical block size */
-static int blkdev_bszset(struct block_device *bdev, fmode_t mode,
+static int blkdev_bszset(struct block_device *bdev, blk_mode_t mode,
int __user *argp)
{
int ret, n;
@@ -451,13 +466,13 @@ static int blkdev_bszset(struct block_device *bdev, fmode_t mode,
if (get_user(n, argp))
return -EFAULT;
- if (mode & FMODE_EXCL)
+ if (mode & BLK_OPEN_EXCL)
return set_blocksize(bdev, n);
- if (IS_ERR(blkdev_get_by_dev(bdev->bd_dev, mode | FMODE_EXCL, &bdev)))
+ if (IS_ERR(blkdev_get_by_dev(bdev->bd_dev, mode, &bdev, NULL)))
return -EBUSY;
ret = set_blocksize(bdev, n);
- blkdev_put(bdev, mode | FMODE_EXCL);
+ blkdev_put(bdev, &bdev);
return ret;
}
@@ -467,7 +482,7 @@ static int blkdev_bszset(struct block_device *bdev, fmode_t mode,
* user space. Note the separate arg/argp parameters that are needed
* to deal with the compat_ptr() conversion.
*/
-static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
+static int blkdev_common_ioctl(struct block_device *bdev, blk_mode_t mode,
unsigned int cmd, unsigned long arg,
void __user *argp)
{
@@ -475,9 +490,9 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
switch (cmd) {
case BLKFLSBUF:
- return blkdev_flushbuf(bdev, mode, cmd, arg);
+ return blkdev_flushbuf(bdev, cmd, arg);
case BLKROSET:
- return blkdev_roset(bdev, mode, cmd, arg);
+ return blkdev_roset(bdev, cmd, arg);
case BLKDISCARD:
return blk_ioctl_discard(bdev, mode, arg);
case BLKSECDISCARD:
@@ -487,7 +502,7 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
case BLKGETDISKSEQ:
return put_u64(argp, bdev->bd_disk->diskseq);
case BLKREPORTZONE:
- return blkdev_report_zones_ioctl(bdev, mode, cmd, arg);
+ return blkdev_report_zones_ioctl(bdev, cmd, arg);
case BLKRESETZONE:
case BLKOPENZONE:
case BLKCLOSEZONE:
@@ -534,17 +549,17 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
case BLKTRACETEARDOWN:
return blk_trace_ioctl(bdev, cmd, argp);
case IOC_PR_REGISTER:
- return blkdev_pr_register(bdev, argp);
+ return blkdev_pr_register(bdev, mode, argp);
case IOC_PR_RESERVE:
- return blkdev_pr_reserve(bdev, argp);
+ return blkdev_pr_reserve(bdev, mode, argp);
case IOC_PR_RELEASE:
- return blkdev_pr_release(bdev, argp);
+ return blkdev_pr_release(bdev, mode, argp);
case IOC_PR_PREEMPT:
- return blkdev_pr_preempt(bdev, argp, false);
+ return blkdev_pr_preempt(bdev, mode, argp, false);
case IOC_PR_PREEMPT_ABORT:
- return blkdev_pr_preempt(bdev, argp, true);
+ return blkdev_pr_preempt(bdev, mode, argp, true);
case IOC_PR_CLEAR:
- return blkdev_pr_clear(bdev, argp);
+ return blkdev_pr_clear(bdev, mode, argp);
default:
return -ENOIOCTLCMD;
}
@@ -560,18 +575,9 @@ long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
{
struct block_device *bdev = I_BDEV(file->f_mapping->host);
void __user *argp = (void __user *)arg;
- fmode_t mode = file->f_mode;
+ blk_mode_t mode = file_to_blk_mode(file);
int ret;
- /*
- * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have
- * to updated it before every ioctl.
- */
- if (file->f_flags & O_NDELAY)
- mode |= FMODE_NDELAY;
- else
- mode &= ~FMODE_NDELAY;
-
switch (cmd) {
/* These need separate implementations for the data structure */
case HDIO_GETGEO:
@@ -630,16 +636,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
void __user *argp = compat_ptr(arg);
struct block_device *bdev = I_BDEV(file->f_mapping->host);
struct gendisk *disk = bdev->bd_disk;
- fmode_t mode = file->f_mode;
-
- /*
- * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have
- * to updated it before every ioctl.
- */
- if (file->f_flags & O_NDELAY)
- mode |= FMODE_NDELAY;
- else
- mode &= ~FMODE_NDELAY;
+ blk_mode_t mode = file_to_blk_mode(file);
switch (cmd) {
/* These need separate implementations for the data structure */
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 5839a027e0f0..6aa5daf7ae32 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -74,8 +74,8 @@ struct dd_per_prio {
struct list_head dispatch;
struct rb_root sort_list[DD_DIR_COUNT];
struct list_head fifo_list[DD_DIR_COUNT];
- /* Next request in FIFO order. Read, write or both are NULL. */
- struct request *next_rq[DD_DIR_COUNT];
+ /* Position of the most recently dispatched request. */
+ sector_t latest_pos[DD_DIR_COUNT];
struct io_stats_per_prio stats;
};
@@ -156,6 +156,40 @@ deadline_latter_request(struct request *rq)
return NULL;
}
+/*
+ * Return the first request for which blk_rq_pos() >= @pos. For zoned devices,
+ * return the first request after the start of the zone containing @pos.
+ */
+static inline struct request *deadline_from_pos(struct dd_per_prio *per_prio,
+ enum dd_data_dir data_dir, sector_t pos)
+{
+ struct rb_node *node = per_prio->sort_list[data_dir].rb_node;
+ struct request *rq, *res = NULL;
+
+ if (!node)
+ return NULL;
+
+ rq = rb_entry_rq(node);
+ /*
+ * A zoned write may have been requeued with a starting position that
+ * is below that of the most recently dispatched request. Hence, for
+ * zoned writes, start searching from the start of a zone.
+ */
+ if (blk_rq_is_seq_zoned_write(rq))
+ pos -= round_down(pos, rq->q->limits.chunk_sectors);
+
+ while (node) {
+ rq = rb_entry_rq(node);
+ if (blk_rq_pos(rq) >= pos) {
+ res = rq;
+ node = node->rb_left;
+ } else {
+ node = node->rb_right;
+ }
+ }
+ return res;
+}
+
static void
deadline_add_rq_rb(struct dd_per_prio *per_prio, struct request *rq)
{
@@ -167,11 +201,6 @@ deadline_add_rq_rb(struct dd_per_prio *per_prio, struct request *rq)
static inline void
deadline_del_rq_rb(struct dd_per_prio *per_prio, struct request *rq)
{
- const enum dd_data_dir data_dir = rq_data_dir(rq);
-
- if (per_prio->next_rq[data_dir] == rq)
- per_prio->next_rq[data_dir] = deadline_latter_request(rq);
-
elv_rb_del(deadline_rb_root(per_prio, rq), rq);
}
@@ -251,10 +280,6 @@ static void
deadline_move_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
struct request *rq)
{
- const enum dd_data_dir data_dir = rq_data_dir(rq);
-
- per_prio->next_rq[data_dir] = deadline_latter_request(rq);
-
/*
* take it off the sort and fifo list
*/
@@ -272,21 +297,15 @@ static u32 dd_queued(struct deadline_data *dd, enum dd_prio prio)
}
/*
- * deadline_check_fifo returns 0 if there are no expired requests on the fifo,
- * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir])
+ * deadline_check_fifo returns true if and only if there are expired requests
+ * in the FIFO list. Requires !list_empty(&dd->fifo_list[data_dir]).
*/
-static inline int deadline_check_fifo(struct dd_per_prio *per_prio,
- enum dd_data_dir data_dir)
+static inline bool deadline_check_fifo(struct dd_per_prio *per_prio,
+ enum dd_data_dir data_dir)
{
struct request *rq = rq_entry_fifo(per_prio->fifo_list[data_dir].next);
- /*
- * rq is expired!
- */
- if (time_after_eq(jiffies, (unsigned long)rq->fifo_time))
- return 1;
-
- return 0;
+ return time_is_before_eq_jiffies((unsigned long)rq->fifo_time);
}
/*
@@ -310,14 +329,11 @@ static struct request *deadline_skip_seq_writes(struct deadline_data *dd,
struct request *rq)
{
sector_t pos = blk_rq_pos(rq);
- sector_t skipped_sectors = 0;
- while (rq) {
- if (blk_rq_pos(rq) != pos + skipped_sectors)
- break;
- skipped_sectors += blk_rq_sectors(rq);
+ do {
+ pos += blk_rq_sectors(rq);
rq = deadline_latter_request(rq);
- }
+ } while (rq && blk_rq_pos(rq) == pos);
return rq;
}
@@ -330,7 +346,7 @@ static struct request *
deadline_fifo_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
enum dd_data_dir data_dir)
{
- struct request *rq;
+ struct request *rq, *rb_rq, *next;
unsigned long flags;
if (list_empty(&per_prio->fifo_list[data_dir]))
@@ -348,7 +364,12 @@ deadline_fifo_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
* zones and these zones are unlocked.
*/
spin_lock_irqsave(&dd->zone_lock, flags);
- list_for_each_entry(rq, &per_prio->fifo_list[DD_WRITE], queuelist) {
+ list_for_each_entry_safe(rq, next, &per_prio->fifo_list[DD_WRITE],
+ queuelist) {
+ /* Check whether a prior request exists for the same zone. */
+ rb_rq = deadline_from_pos(per_prio, data_dir, blk_rq_pos(rq));
+ if (rb_rq && blk_rq_pos(rb_rq) < blk_rq_pos(rq))
+ rq = rb_rq;
if (blk_req_can_dispatch_to_zone(rq) &&
(blk_queue_nonrot(rq->q) ||
!deadline_is_seq_write(dd, rq)))
@@ -372,7 +393,8 @@ deadline_next_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
struct request *rq;
unsigned long flags;
- rq = per_prio->next_rq[data_dir];
+ rq = deadline_from_pos(per_prio, data_dir,
+ per_prio->latest_pos[data_dir]);
if (!rq)
return NULL;
@@ -435,6 +457,7 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd,
if (started_after(dd, rq, latest_start))
return NULL;
list_del_init(&rq->queuelist);
+ data_dir = rq_data_dir(rq);
goto done;
}
@@ -442,9 +465,11 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd,
* batches are currently reads XOR writes
*/
rq = deadline_next_request(dd, per_prio, dd->last_dir);
- if (rq && dd->batching < dd->fifo_batch)
- /* we have a next request are still entitled to batch */
+ if (rq && dd->batching < dd->fifo_batch) {
+ /* we have a next request and are still entitled to batch */
+ data_dir = rq_data_dir(rq);
goto dispatch_request;
+ }
/*
* at this point we are not running a batch. select the appropriate
@@ -522,6 +547,7 @@ dispatch_request:
done:
ioprio_class = dd_rq_ioclass(rq);
prio = ioprio_class_to_prio[ioprio_class];
+ dd->per_prio[prio].latest_pos[data_dir] = blk_rq_pos(rq);
dd->per_prio[prio].stats.dispatched++;
/*
* If the request needs its target zone locked, do it.
@@ -766,7 +792,7 @@ static bool dd_bio_merge(struct request_queue *q, struct bio *bio,
* add rq to rbtree and fifo
*/
static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
- blk_insert_t flags)
+ blk_insert_t flags, struct list_head *free)
{
struct request_queue *q = hctx->queue;
struct deadline_data *dd = q->elevator->elevator_data;
@@ -775,7 +801,6 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
u8 ioprio_class = IOPRIO_PRIO_CLASS(ioprio);
struct dd_per_prio *per_prio;
enum dd_prio prio;
- LIST_HEAD(free);
lockdep_assert_held(&dd->lock);
@@ -792,10 +817,8 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
rq->elv.priv[0] = (void *)(uintptr_t)1;
}
- if (blk_mq_sched_try_insert_merge(q, rq, &free)) {
- blk_mq_free_requests(&free);
+ if (blk_mq_sched_try_insert_merge(q, rq, free))
return;
- }
trace_block_rq_insert(rq);
@@ -803,6 +826,8 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
list_add(&rq->queuelist, &per_prio->dispatch);
rq->fifo_time = jiffies;
} else {
+ struct list_head *insert_before;
+
deadline_add_rq_rb(per_prio, rq);
if (rq_mergeable(rq)) {
@@ -815,7 +840,20 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
* set expire time and add to fifo list
*/
rq->fifo_time = jiffies + dd->fifo_expire[data_dir];
- list_add_tail(&rq->queuelist, &per_prio->fifo_list[data_dir]);
+ insert_before = &per_prio->fifo_list[data_dir];
+#ifdef CONFIG_BLK_DEV_ZONED
+ /*
+ * Insert zoned writes such that requests are sorted by
+ * position per zone.
+ */
+ if (blk_rq_is_seq_zoned_write(rq)) {
+ struct request *rq2 = deadline_latter_request(rq);
+
+ if (rq2 && blk_rq_zone_no(rq2) == blk_rq_zone_no(rq))
+ insert_before = &rq2->queuelist;
+ }
+#endif
+ list_add_tail(&rq->queuelist, insert_before);
}
}
@@ -828,6 +866,7 @@ static void dd_insert_requests(struct blk_mq_hw_ctx *hctx,
{
struct request_queue *q = hctx->queue;
struct deadline_data *dd = q->elevator->elevator_data;
+ LIST_HEAD(free);
spin_lock(&dd->lock);
while (!list_empty(list)) {
@@ -835,9 +874,11 @@ static void dd_insert_requests(struct blk_mq_hw_ctx *hctx,
rq = list_first_entry(list, struct request, queuelist);
list_del_init(&rq->queuelist);
- dd_insert_request(hctx, rq, flags);
+ dd_insert_request(hctx, rq, flags, &free);
}
spin_unlock(&dd->lock);
+
+ blk_mq_free_requests(&free);
}
/* Callback from inside blk_mq_rq_ctx_init(). */
@@ -1035,8 +1076,10 @@ static int deadline_##name##_next_rq_show(void *data, \
struct request_queue *q = data; \
struct deadline_data *dd = q->elevator->elevator_data; \
struct dd_per_prio *per_prio = &dd->per_prio[prio]; \
- struct request *rq = per_prio->next_rq[data_dir]; \
+ struct request *rq; \
\
+ rq = deadline_from_pos(per_prio, data_dir, \
+ per_prio->latest_pos[data_dir]); \
if (rq) \
__blk_mq_debugfs_rq_show(m, rq); \
return 0; \
diff --git a/block/partitions/amiga.c b/block/partitions/amiga.c
index 5c8624e26a54..ed222b9c901b 100644
--- a/block/partitions/amiga.c
+++ b/block/partitions/amiga.c
@@ -11,10 +11,18 @@
#define pr_fmt(fmt) fmt
#include <linux/types.h>
+#include <linux/mm_types.h>
+#include <linux/overflow.h>
#include <linux/affs_hardblocks.h>
#include "check.h"
+/* magic offsets in partition DosEnvVec */
+#define NR_HD 3
+#define NR_SECT 5
+#define LO_CYL 9
+#define HI_CYL 10
+
static __inline__ u32
checksum_block(__be32 *m, int size)
{
@@ -31,8 +39,12 @@ int amiga_partition(struct parsed_partitions *state)
unsigned char *data;
struct RigidDiskBlock *rdb;
struct PartitionBlock *pb;
- int start_sect, nr_sects, blk, part, res = 0;
- int blksize = 1; /* Multiplier for disk block size */
+ u64 start_sect, nr_sects;
+ sector_t blk, end_sect;
+ u32 cylblk; /* rdb_CylBlocks = nr_heads*sect_per_track */
+ u32 nr_hd, nr_sect, lo_cyl, hi_cyl;
+ int part, res = 0;
+ unsigned int blksize = 1; /* Multiplier for disk block size */
int slot = 1;
for (blk = 0; ; blk++, put_dev_sector(sect)) {
@@ -40,7 +52,7 @@ int amiga_partition(struct parsed_partitions *state)
goto rdb_done;
data = read_part_sector(state, blk, &sect);
if (!data) {
- pr_err("Dev %s: unable to read RDB block %d\n",
+ pr_err("Dev %s: unable to read RDB block %llu\n",
state->disk->disk_name, blk);
res = -1;
goto rdb_done;
@@ -57,12 +69,12 @@ int amiga_partition(struct parsed_partitions *state)
*(__be32 *)(data+0xdc) = 0;
if (checksum_block((__be32 *)data,
be32_to_cpu(rdb->rdb_SummedLongs) & 0x7F)==0) {
- pr_err("Trashed word at 0xd0 in block %d ignored in checksum calculation\n",
+ pr_err("Trashed word at 0xd0 in block %llu ignored in checksum calculation\n",
blk);
break;
}
- pr_err("Dev %s: RDB in block %d has bad checksum\n",
+ pr_err("Dev %s: RDB in block %llu has bad checksum\n",
state->disk->disk_name, blk);
}
@@ -79,10 +91,15 @@ int amiga_partition(struct parsed_partitions *state)
blk = be32_to_cpu(rdb->rdb_PartitionList);
put_dev_sector(sect);
for (part = 1; blk>0 && part<=16; part++, put_dev_sector(sect)) {
- blk *= blksize; /* Read in terms partition table understands */
+ /* Read in terms partition table understands */
+ if (check_mul_overflow(blk, (sector_t) blksize, &blk)) {
+ pr_err("Dev %s: overflow calculating partition block %llu! Skipping partitions %u and beyond\n",
+ state->disk->disk_name, blk, part);
+ break;
+ }
data = read_part_sector(state, blk, &sect);
if (!data) {
- pr_err("Dev %s: unable to read partition block %d\n",
+ pr_err("Dev %s: unable to read partition block %llu\n",
state->disk->disk_name, blk);
res = -1;
goto rdb_done;
@@ -94,19 +111,70 @@ int amiga_partition(struct parsed_partitions *state)
if (checksum_block((__be32 *)pb, be32_to_cpu(pb->pb_SummedLongs) & 0x7F) != 0 )
continue;
- /* Tell Kernel about it */
+ /* RDB gives us more than enough rope to hang ourselves with,
+ * many times over (2^128 bytes if all fields max out).
+ * Some careful checks are in order, so check for potential
+ * overflows.
+ * We are multiplying four 32 bit numbers to one sector_t!
+ */
+
+ nr_hd = be32_to_cpu(pb->pb_Environment[NR_HD]);
+ nr_sect = be32_to_cpu(pb->pb_Environment[NR_SECT]);
+
+ /* CylBlocks is total number of blocks per cylinder */
+ if (check_mul_overflow(nr_hd, nr_sect, &cylblk)) {
+ pr_err("Dev %s: heads*sects %u overflows u32, skipping partition!\n",
+ state->disk->disk_name, cylblk);
+ continue;
+ }
+
+ /* check for consistency with RDB defined CylBlocks */
+ if (cylblk > be32_to_cpu(rdb->rdb_CylBlocks)) {
+ pr_warn("Dev %s: cylblk %u > rdb_CylBlocks %u!\n",
+ state->disk->disk_name, cylblk,
+ be32_to_cpu(rdb->rdb_CylBlocks));
+ }
+
+ /* RDB allows for variable logical block size -
+ * normalize to 512 byte blocks and check result.
+ */
+
+ if (check_mul_overflow(cylblk, blksize, &cylblk)) {
+ pr_err("Dev %s: partition %u bytes per cyl. overflows u32, skipping partition!\n",
+ state->disk->disk_name, part);
+ continue;
+ }
+
+ /* Calculate partition start and end. Limit of 32 bit on cylblk
+ * guarantees no overflow occurs if LBD support is enabled.
+ */
+
+ lo_cyl = be32_to_cpu(pb->pb_Environment[LO_CYL]);
+ start_sect = ((u64) lo_cyl * cylblk);
+
+ hi_cyl = be32_to_cpu(pb->pb_Environment[HI_CYL]);
+ nr_sects = (((u64) hi_cyl - lo_cyl + 1) * cylblk);
- nr_sects = (be32_to_cpu(pb->pb_Environment[10]) + 1 -
- be32_to_cpu(pb->pb_Environment[9])) *
- be32_to_cpu(pb->pb_Environment[3]) *
- be32_to_cpu(pb->pb_Environment[5]) *
- blksize;
if (!nr_sects)
continue;
- start_sect = be32_to_cpu(pb->pb_Environment[9]) *
- be32_to_cpu(pb->pb_Environment[3]) *
- be32_to_cpu(pb->pb_Environment[5]) *
- blksize;
+
+ /* Warn user if partition end overflows u32 (AmigaDOS limit) */
+
+ if ((start_sect + nr_sects) > UINT_MAX) {
+ pr_warn("Dev %s: partition %u (%llu-%llu) needs 64 bit device support!\n",
+ state->disk->disk_name, part,
+ start_sect, start_sect + nr_sects);
+ }
+
+ if (check_add_overflow(start_sect, nr_sects, &end_sect)) {
+ pr_err("Dev %s: partition %u (%llu-%llu) needs LBD device support, skipping partition!\n",
+ state->disk->disk_name, part,
+ start_sect, end_sect);
+ continue;
+ }
+
+ /* Tell Kernel about it */
+
put_partition(state,slot++,start_sect,nr_sects);
{
/* Be even more informative to aid mounting */
diff --git a/block/partitions/core.c b/block/partitions/core.c
index 49e0496ff23c..13a7341299a9 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -12,7 +12,7 @@
#include <linux/raid/detect.h>
#include "check.h"
-static int (*check_part[])(struct parsed_partitions *) = {
+static int (*const check_part[])(struct parsed_partitions *) = {
/*
* Probe partition formats with tables at disk address 0
* that also have an ADFS boot block at 0xdc0.
@@ -228,7 +228,7 @@ static struct attribute *part_attrs[] = {
NULL
};
-static struct attribute_group part_attr_group = {
+static const struct attribute_group part_attr_group = {
.attrs = part_attrs,
};
@@ -256,31 +256,36 @@ static int part_uevent(const struct device *dev, struct kobj_uevent_env *env)
return 0;
}
-struct device_type part_type = {
+const struct device_type part_type = {
.name = "partition",
.groups = part_attr_groups,
.release = part_release,
.uevent = part_uevent,
};
-static void delete_partition(struct block_device *part)
+void drop_partition(struct block_device *part)
{
lockdep_assert_held(&part->bd_disk->open_mutex);
- fsync_bdev(part);
- __invalidate_device(part, true);
-
xa_erase(&part->bd_disk->part_tbl, part->bd_partno);
kobject_put(part->bd_holder_dir);
+
device_del(&part->bd_device);
+ put_device(&part->bd_device);
+}
+static void delete_partition(struct block_device *part)
+{
/*
* Remove the block device from the inode hash, so that it cannot be
* looked up any more even when openers still hold references.
*/
remove_inode_hash(part->bd_inode);
- put_device(&part->bd_device);
+ fsync_bdev(part);
+ __invalidate_device(part, true);
+
+ drop_partition(part);
}
static ssize_t whole_disk_show(struct device *dev,
@@ -288,7 +293,7 @@ static ssize_t whole_disk_show(struct device *dev,
{
return 0;
}
-static DEVICE_ATTR(whole_disk, 0444, whole_disk_show, NULL);
+static const DEVICE_ATTR(whole_disk, 0444, whole_disk_show, NULL);
/*
* Must be called either with open_mutex held, before a disk can be opened or
@@ -436,10 +441,21 @@ static bool partition_overlaps(struct gendisk *disk, sector_t start,
int bdev_add_partition(struct gendisk *disk, int partno, sector_t start,
sector_t length)
{
+ sector_t capacity = get_capacity(disk), end;
struct block_device *part;
int ret;
mutex_lock(&disk->open_mutex);
+ if (check_add_overflow(start, length, &end)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (start >= capacity || end > capacity) {
+ ret = -EINVAL;
+ goto out;
+ }
+
if (!disk_live(disk)) {
ret = -ENXIO;
goto out;
@@ -519,17 +535,6 @@ static bool disk_unlock_native_capacity(struct gendisk *disk)
return true;
}
-void blk_drop_partitions(struct gendisk *disk)
-{
- struct block_device *part;
- unsigned long idx;
-
- lockdep_assert_held(&disk->open_mutex);
-
- xa_for_each_start(&disk->part_tbl, idx, part, 1)
- delete_partition(part);
-}
-
static bool blk_add_partition(struct gendisk *disk,
struct parsed_partitions *state, int p)
{
@@ -646,6 +651,8 @@ out_free_state:
int bdev_disk_changed(struct gendisk *disk, bool invalidate)
{
+ struct block_device *part;
+ unsigned long idx;
int ret = 0;
lockdep_assert_held(&disk->open_mutex);
@@ -658,8 +665,9 @@ rescan:
return -EBUSY;
sync_blockdev(disk->part0);
invalidate_bdev(disk->part0);
- blk_drop_partitions(disk);
+ xa_for_each_start(&disk->part_tbl, idx, part, 1)
+ delete_partition(part);
clear_bit(GD_NEED_PART_SCAN, &disk->state);
/*
diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index 5f7252a5b7b4..6218c773d71c 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -482,7 +482,6 @@ static const struct proto_ops alg_proto_ops = {
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.mmap = sock_no_mmap,
- .sendpage = sock_no_sendpage,
.sendmsg = sock_no_sendmsg,
.recvmsg = sock_no_recvmsg,
@@ -531,50 +530,25 @@ static const struct net_proto_family alg_family = {
.owner = THIS_MODULE,
};
-int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len)
-{
- size_t off;
- ssize_t n;
- int npages, i;
-
- n = iov_iter_get_pages2(iter, sgl->pages, len, ALG_MAX_PAGES, &off);
- if (n < 0)
- return n;
-
- npages = DIV_ROUND_UP(off + n, PAGE_SIZE);
- if (WARN_ON(npages == 0))
- return -EINVAL;
- /* Add one extra for linking */
- sg_init_table(sgl->sg, npages + 1);
-
- for (i = 0, len = n; i < npages; i++) {
- int plen = min_t(int, len, PAGE_SIZE - off);
-
- sg_set_page(sgl->sg + i, sgl->pages[i], plen, off);
-
- off = 0;
- len -= plen;
- }
- sg_mark_end(sgl->sg + npages - 1);
- sgl->npages = npages;
-
- return n;
-}
-EXPORT_SYMBOL_GPL(af_alg_make_sg);
-
static void af_alg_link_sg(struct af_alg_sgl *sgl_prev,
struct af_alg_sgl *sgl_new)
{
- sg_unmark_end(sgl_prev->sg + sgl_prev->npages - 1);
- sg_chain(sgl_prev->sg, sgl_prev->npages + 1, sgl_new->sg);
+ sg_unmark_end(sgl_prev->sgt.sgl + sgl_prev->sgt.nents - 1);
+ sg_chain(sgl_prev->sgt.sgl, sgl_prev->sgt.nents + 1, sgl_new->sgt.sgl);
}
void af_alg_free_sg(struct af_alg_sgl *sgl)
{
int i;
- for (i = 0; i < sgl->npages; i++)
- put_page(sgl->pages[i]);
+ if (sgl->sgt.sgl) {
+ if (sgl->need_unpin)
+ for (i = 0; i < sgl->sgt.nents; i++)
+ unpin_user_page(sg_page(&sgl->sgt.sgl[i]));
+ if (sgl->sgt.sgl != sgl->sgl)
+ kvfree(sgl->sgt.sgl);
+ sgl->sgt.sgl = NULL;
+ }
}
EXPORT_SYMBOL_GPL(af_alg_free_sg);
@@ -1015,7 +989,7 @@ int af_alg_sendmsg(struct socket *sock, struct msghdr *msg, size_t size,
while (size) {
struct scatterlist *sg;
size_t len = size;
- size_t plen;
+ ssize_t plen;
/* use the existing memory in an allocated page */
if (ctx->merge) {
@@ -1060,35 +1034,58 @@ int af_alg_sendmsg(struct socket *sock, struct msghdr *msg, size_t size,
if (sgl->cur)
sg_unmark_end(sg + sgl->cur - 1);
- do {
- struct page *pg;
- unsigned int i = sgl->cur;
-
- plen = min_t(size_t, len, PAGE_SIZE);
-
- pg = alloc_page(GFP_KERNEL);
- if (!pg) {
- err = -ENOMEM;
+ if (msg->msg_flags & MSG_SPLICE_PAGES) {
+ struct sg_table sgtable = {
+ .sgl = sg,
+ .nents = sgl->cur,
+ .orig_nents = sgl->cur,
+ };
+
+ plen = extract_iter_to_sg(&msg->msg_iter, len, &sgtable,
+ MAX_SGL_ENTS - sgl->cur, 0);
+ if (plen < 0) {
+ err = plen;
goto unlock;
}
- sg_assign_page(sg + i, pg);
-
- err = memcpy_from_msg(page_address(sg_page(sg + i)),
- msg, plen);
- if (err) {
- __free_page(sg_page(sg + i));
- sg_assign_page(sg + i, NULL);
- goto unlock;
- }
-
- sg[i].length = plen;
+ for (; sgl->cur < sgtable.nents; sgl->cur++)
+ get_page(sg_page(&sg[sgl->cur]));
len -= plen;
ctx->used += plen;
copied += plen;
size -= plen;
- sgl->cur++;
- } while (len && sgl->cur < MAX_SGL_ENTS);
+ } else {
+ do {
+ struct page *pg;
+ unsigned int i = sgl->cur;
+
+ plen = min_t(size_t, len, PAGE_SIZE);
+
+ pg = alloc_page(GFP_KERNEL);
+ if (!pg) {
+ err = -ENOMEM;
+ goto unlock;
+ }
+
+ sg_assign_page(sg + i, pg);
+
+ err = memcpy_from_msg(
+ page_address(sg_page(sg + i)),
+ msg, plen);
+ if (err) {
+ __free_page(sg_page(sg + i));
+ sg_assign_page(sg + i, NULL);
+ goto unlock;
+ }
+
+ sg[i].length = plen;
+ len -= plen;
+ ctx->used += plen;
+ copied += plen;
+ size -= plen;
+ sgl->cur++;
+ } while (len && sgl->cur < MAX_SGL_ENTS);
+ }
if (!size)
sg_mark_end(sg + sgl->cur - 1);
@@ -1109,69 +1106,6 @@ unlock:
EXPORT_SYMBOL_GPL(af_alg_sendmsg);
/**
- * af_alg_sendpage - sendpage system call handler
- * @sock: socket of connection to user space to write to
- * @page: data to send
- * @offset: offset into page to begin sending
- * @size: length of data
- * @flags: message send/receive flags
- *
- * This is a generic implementation of sendpage to fill ctx->tsgl_list.
- */
-ssize_t af_alg_sendpage(struct socket *sock, struct page *page,
- int offset, size_t size, int flags)
-{
- struct sock *sk = sock->sk;
- struct alg_sock *ask = alg_sk(sk);
- struct af_alg_ctx *ctx = ask->private;
- struct af_alg_tsgl *sgl;
- int err = -EINVAL;
-
- if (flags & MSG_SENDPAGE_NOTLAST)
- flags |= MSG_MORE;
-
- lock_sock(sk);
- if (!ctx->more && ctx->used)
- goto unlock;
-
- if (!size)
- goto done;
-
- if (!af_alg_writable(sk)) {
- err = af_alg_wait_for_wmem(sk, flags);
- if (err)
- goto unlock;
- }
-
- err = af_alg_alloc_tsgl(sk);
- if (err)
- goto unlock;
-
- ctx->merge = 0;
- sgl = list_entry(ctx->tsgl_list.prev, struct af_alg_tsgl, list);
-
- if (sgl->cur)
- sg_unmark_end(sgl->sg + sgl->cur - 1);
-
- sg_mark_end(sgl->sg + sgl->cur);
-
- get_page(page);
- sg_set_page(sgl->sg + sgl->cur, page, size, offset);
- sgl->cur++;
- ctx->used += size;
-
-done:
- ctx->more = flags & MSG_MORE;
-
-unlock:
- af_alg_data_wakeup(sk);
- release_sock(sk);
-
- return err ?: size;
-}
-EXPORT_SYMBOL_GPL(af_alg_sendpage);
-
-/**
* af_alg_free_resources - release resources required for crypto request
* @areq: Request holding the TX and RX SGL
*/
@@ -1288,8 +1222,8 @@ int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags,
while (maxsize > len && msg_data_left(msg)) {
struct af_alg_rsgl *rsgl;
+ ssize_t err;
size_t seglen;
- int err;
/* limit the amount of readable buffers */
if (!af_alg_readable(sk))
@@ -1306,16 +1240,23 @@ int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags,
return -ENOMEM;
}
- rsgl->sgl.npages = 0;
+ rsgl->sgl.sgt.sgl = rsgl->sgl.sgl;
+ rsgl->sgl.sgt.nents = 0;
+ rsgl->sgl.sgt.orig_nents = 0;
list_add_tail(&rsgl->list, &areq->rsgl_list);
- /* make one iovec available as scatterlist */
- err = af_alg_make_sg(&rsgl->sgl, &msg->msg_iter, seglen);
+ sg_init_table(rsgl->sgl.sgt.sgl, ALG_MAX_PAGES);
+ err = extract_iter_to_sg(&msg->msg_iter, seglen, &rsgl->sgl.sgt,
+ ALG_MAX_PAGES, 0);
if (err < 0) {
rsgl->sg_num_bytes = 0;
return err;
}
+ sg_mark_end(rsgl->sgl.sgt.sgl + rsgl->sgl.sgt.nents - 1);
+ rsgl->sgl.need_unpin =
+ iov_iter_extract_will_pin(&msg->msg_iter);
+
/* chain the new scatterlist with previous one */
if (areq->last_rsgl)
af_alg_link_sg(&areq->last_rsgl->sgl, &rsgl->sgl);
diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c
index 42493b4d8ce4..7d58cbbce4af 100644
--- a/crypto/algif_aead.c
+++ b/crypto/algif_aead.c
@@ -9,10 +9,10 @@
* The following concept of the memory management is used:
*
* The kernel maintains two SGLs, the TX SGL and the RX SGL. The TX SGL is
- * filled by user space with the data submitted via sendpage/sendmsg. Filling
- * up the TX SGL does not cause a crypto operation -- the data will only be
- * tracked by the kernel. Upon receipt of one recvmsg call, the caller must
- * provide a buffer which is tracked with the RX SGL.
+ * filled by user space with the data submitted via sendmsg (maybe with
+ * MSG_SPLICE_PAGES). Filling up the TX SGL does not cause a crypto operation
+ * -- the data will only be tracked by the kernel. Upon receipt of one recvmsg
+ * call, the caller must provide a buffer which is tracked with the RX SGL.
*
* During the processing of the recvmsg operation, the cipher request is
* allocated and prepared. As part of the recvmsg operation, the processed
@@ -113,19 +113,19 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
}
/*
- * Data length provided by caller via sendmsg/sendpage that has not
- * yet been processed.
+ * Data length provided by caller via sendmsg that has not yet been
+ * processed.
*/
used = ctx->used;
/*
- * Make sure sufficient data is present -- note, the same check is
- * also present in sendmsg/sendpage. The checks in sendpage/sendmsg
- * shall provide an information to the data sender that something is
- * wrong, but they are irrelevant to maintain the kernel integrity.
- * We need this check here too in case user space decides to not honor
- * the error message in sendmsg/sendpage and still call recvmsg. This
- * check here protects the kernel integrity.
+ * Make sure sufficient data is present -- note, the same check is also
+ * present in sendmsg. The checks in sendmsg shall provide an
+ * information to the data sender that something is wrong, but they are
+ * irrelevant to maintain the kernel integrity. We need this check
+ * here too in case user space decides to not honor the error message
+ * in sendmsg and still call recvmsg. This check here protects the
+ * kernel integrity.
*/
if (!aead_sufficient_data(sk))
return -EINVAL;
@@ -210,7 +210,7 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
*/
/* Use the RX SGL as source (and destination) for crypto op. */
- rsgl_src = areq->first_rsgl.sgl.sg;
+ rsgl_src = areq->first_rsgl.sgl.sgt.sgl;
if (ctx->enc) {
/*
@@ -224,7 +224,8 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
* RX SGL: AAD || PT || Tag
*/
err = crypto_aead_copy_sgl(null_tfm, tsgl_src,
- areq->first_rsgl.sgl.sg, processed);
+ areq->first_rsgl.sgl.sgt.sgl,
+ processed);
if (err)
goto free;
af_alg_pull_tsgl(sk, processed, NULL, 0);
@@ -242,7 +243,8 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
/* Copy AAD || CT to RX SGL buffer for in-place operation. */
err = crypto_aead_copy_sgl(null_tfm, tsgl_src,
- areq->first_rsgl.sgl.sg, outlen);
+ areq->first_rsgl.sgl.sgt.sgl,
+ outlen);
if (err)
goto free;
@@ -267,10 +269,10 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
if (usedpages) {
/* RX SGL present */
struct af_alg_sgl *sgl_prev = &areq->last_rsgl->sgl;
+ struct scatterlist *sg = sgl_prev->sgt.sgl;
- sg_unmark_end(sgl_prev->sg + sgl_prev->npages - 1);
- sg_chain(sgl_prev->sg, sgl_prev->npages + 1,
- areq->tsgl);
+ sg_unmark_end(sg + sgl_prev->sgt.nents - 1);
+ sg_chain(sg, sgl_prev->sgt.nents + 1, areq->tsgl);
} else
/* no RX SGL present (e.g. authentication only) */
rsgl_src = areq->tsgl;
@@ -278,7 +280,7 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
/* Initialize the crypto operation */
aead_request_set_crypt(&areq->cra_u.aead_req, rsgl_src,
- areq->first_rsgl.sgl.sg, used, ctx->iv);
+ areq->first_rsgl.sgl.sgt.sgl, used, ctx->iv);
aead_request_set_ad(&areq->cra_u.aead_req, ctx->aead_assoclen);
aead_request_set_tfm(&areq->cra_u.aead_req, tfm);
@@ -368,7 +370,6 @@ static struct proto_ops algif_aead_ops = {
.release = af_alg_release,
.sendmsg = aead_sendmsg,
- .sendpage = af_alg_sendpage,
.recvmsg = aead_recvmsg,
.poll = af_alg_poll,
};
@@ -420,18 +421,6 @@ static int aead_sendmsg_nokey(struct socket *sock, struct msghdr *msg,
return aead_sendmsg(sock, msg, size);
}
-static ssize_t aead_sendpage_nokey(struct socket *sock, struct page *page,
- int offset, size_t size, int flags)
-{
- int err;
-
- err = aead_check_key(sock);
- if (err)
- return err;
-
- return af_alg_sendpage(sock, page, offset, size, flags);
-}
-
static int aead_recvmsg_nokey(struct socket *sock, struct msghdr *msg,
size_t ignored, int flags)
{
@@ -459,7 +448,6 @@ static struct proto_ops algif_aead_ops_nokey = {
.release = af_alg_release,
.sendmsg = aead_sendmsg_nokey,
- .sendpage = aead_sendpage_nokey,
.recvmsg = aead_recvmsg_nokey,
.poll = af_alg_poll,
};
diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c
index 63af72e19fa8..0ab43e149f0e 100644
--- a/crypto/algif_hash.c
+++ b/crypto/algif_hash.c
@@ -63,122 +63,114 @@ static void hash_free_result(struct sock *sk, struct hash_ctx *ctx)
static int hash_sendmsg(struct socket *sock, struct msghdr *msg,
size_t ignored)
{
- int limit = ALG_MAX_PAGES * PAGE_SIZE;
struct sock *sk = sock->sk;
struct alg_sock *ask = alg_sk(sk);
struct hash_ctx *ctx = ask->private;
- long copied = 0;
+ ssize_t copied = 0;
+ size_t len, max_pages, npages;
+ bool continuing = ctx->more, need_init = false;
int err;
- if (limit > sk->sk_sndbuf)
- limit = sk->sk_sndbuf;
+ max_pages = min_t(size_t, ALG_MAX_PAGES,
+ DIV_ROUND_UP(sk->sk_sndbuf, PAGE_SIZE));
lock_sock(sk);
- if (!ctx->more) {
- if ((msg->msg_flags & MSG_MORE))
- hash_free_result(sk, ctx);
-
- err = crypto_wait_req(crypto_ahash_init(&ctx->req), &ctx->wait);
- if (err)
- goto unlock;
- }
-
- ctx->more = false;
-
- while (msg_data_left(msg)) {
- int len = msg_data_left(msg);
-
- if (len > limit)
- len = limit;
-
- len = af_alg_make_sg(&ctx->sgl, &msg->msg_iter, len);
- if (len < 0) {
- err = copied ? 0 : len;
- goto unlock;
- }
-
- ahash_request_set_crypt(&ctx->req, ctx->sgl.sg, NULL, len);
-
- err = crypto_wait_req(crypto_ahash_update(&ctx->req),
- &ctx->wait);
- af_alg_free_sg(&ctx->sgl);
- if (err) {
- iov_iter_revert(&msg->msg_iter, len);
- goto unlock;
+ if (!continuing) {
+ /* Discard a previous request that wasn't marked MSG_MORE. */
+ hash_free_result(sk, ctx);
+ if (!msg_data_left(msg))
+ goto done; /* Zero-length; don't start new req */
+ need_init = true;
+ } else if (!msg_data_left(msg)) {
+ /*
+ * No data - finalise the prev req if MSG_MORE so any error
+ * comes out here.
+ */
+ if (!(msg->msg_flags & MSG_MORE)) {
+ err = hash_alloc_result(sk, ctx);
+ if (err)
+ goto unlock_free;
+ ahash_request_set_crypt(&ctx->req, NULL,
+ ctx->result, 0);
+ err = crypto_wait_req(crypto_ahash_final(&ctx->req),
+ &ctx->wait);
+ if (err)
+ goto unlock_free;
}
-
- copied += len;
+ goto done_more;
}
- err = 0;
+ while (msg_data_left(msg)) {
+ ctx->sgl.sgt.sgl = ctx->sgl.sgl;
+ ctx->sgl.sgt.nents = 0;
+ ctx->sgl.sgt.orig_nents = 0;
- ctx->more = msg->msg_flags & MSG_MORE;
- if (!ctx->more) {
- err = hash_alloc_result(sk, ctx);
- if (err)
- goto unlock;
+ err = -EIO;
+ npages = iov_iter_npages(&msg->msg_iter, max_pages);
+ if (npages == 0)
+ goto unlock_free;
- ahash_request_set_crypt(&ctx->req, NULL, ctx->result, 0);
- err = crypto_wait_req(crypto_ahash_final(&ctx->req),
- &ctx->wait);
- }
+ sg_init_table(ctx->sgl.sgl, npages);
-unlock:
- release_sock(sk);
+ ctx->sgl.need_unpin = iov_iter_extract_will_pin(&msg->msg_iter);
- return err ?: copied;
-}
+ err = extract_iter_to_sg(&msg->msg_iter, LONG_MAX,
+ &ctx->sgl.sgt, npages, 0);
+ if (err < 0)
+ goto unlock_free;
+ len = err;
+ sg_mark_end(ctx->sgl.sgt.sgl + ctx->sgl.sgt.nents - 1);
-static ssize_t hash_sendpage(struct socket *sock, struct page *page,
- int offset, size_t size, int flags)
-{
- struct sock *sk = sock->sk;
- struct alg_sock *ask = alg_sk(sk);
- struct hash_ctx *ctx = ask->private;
- int err;
-
- if (flags & MSG_SENDPAGE_NOTLAST)
- flags |= MSG_MORE;
-
- lock_sock(sk);
- sg_init_table(ctx->sgl.sg, 1);
- sg_set_page(ctx->sgl.sg, page, size, offset);
-
- if (!(flags & MSG_MORE)) {
- err = hash_alloc_result(sk, ctx);
- if (err)
- goto unlock;
- } else if (!ctx->more)
- hash_free_result(sk, ctx);
+ if (!msg_data_left(msg)) {
+ err = hash_alloc_result(sk, ctx);
+ if (err)
+ goto unlock_free;
+ }
- ahash_request_set_crypt(&ctx->req, ctx->sgl.sg, ctx->result, size);
+ ahash_request_set_crypt(&ctx->req, ctx->sgl.sgt.sgl,
+ ctx->result, len);
- if (!(flags & MSG_MORE)) {
- if (ctx->more)
- err = crypto_ahash_finup(&ctx->req);
- else
+ if (!msg_data_left(msg) && !continuing &&
+ !(msg->msg_flags & MSG_MORE)) {
err = crypto_ahash_digest(&ctx->req);
- } else {
- if (!ctx->more) {
- err = crypto_ahash_init(&ctx->req);
- err = crypto_wait_req(err, &ctx->wait);
- if (err)
- goto unlock;
+ } else {
+ if (need_init) {
+ err = crypto_wait_req(
+ crypto_ahash_init(&ctx->req),
+ &ctx->wait);
+ if (err)
+ goto unlock_free;
+ need_init = false;
+ }
+
+ if (msg_data_left(msg) || (msg->msg_flags & MSG_MORE))
+ err = crypto_ahash_update(&ctx->req);
+ else
+ err = crypto_ahash_finup(&ctx->req);
+ continuing = true;
}
- err = crypto_ahash_update(&ctx->req);
- }
-
- err = crypto_wait_req(err, &ctx->wait);
- if (err)
- goto unlock;
+ err = crypto_wait_req(err, &ctx->wait);
+ if (err)
+ goto unlock_free;
- ctx->more = flags & MSG_MORE;
+ copied += len;
+ af_alg_free_sg(&ctx->sgl);
+ }
+done_more:
+ ctx->more = msg->msg_flags & MSG_MORE;
+done:
+ err = 0;
unlock:
release_sock(sk);
+ return copied ?: err;
- return err ?: size;
+unlock_free:
+ af_alg_free_sg(&ctx->sgl);
+ hash_free_result(sk, ctx);
+ ctx->more = false;
+ goto unlock;
}
static int hash_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
@@ -296,7 +288,6 @@ static struct proto_ops algif_hash_ops = {
.release = af_alg_release,
.sendmsg = hash_sendmsg,
- .sendpage = hash_sendpage,
.recvmsg = hash_recvmsg,
.accept = hash_accept,
};
@@ -348,18 +339,6 @@ static int hash_sendmsg_nokey(struct socket *sock, struct msghdr *msg,
return hash_sendmsg(sock, msg, size);
}
-static ssize_t hash_sendpage_nokey(struct socket *sock, struct page *page,
- int offset, size_t size, int flags)
-{
- int err;
-
- err = hash_check_key(sock);
- if (err)
- return err;
-
- return hash_sendpage(sock, page, offset, size, flags);
-}
-
static int hash_recvmsg_nokey(struct socket *sock, struct msghdr *msg,
size_t ignored, int flags)
{
@@ -398,7 +377,6 @@ static struct proto_ops algif_hash_ops_nokey = {
.release = af_alg_release,
.sendmsg = hash_sendmsg_nokey,
- .sendpage = hash_sendpage_nokey,
.recvmsg = hash_recvmsg_nokey,
.accept = hash_accept_nokey,
};
diff --git a/crypto/algif_rng.c b/crypto/algif_rng.c
index 407408c43730..10c41adac3b1 100644
--- a/crypto/algif_rng.c
+++ b/crypto/algif_rng.c
@@ -174,7 +174,6 @@ static struct proto_ops algif_rng_ops = {
.bind = sock_no_bind,
.accept = sock_no_accept,
.sendmsg = sock_no_sendmsg,
- .sendpage = sock_no_sendpage,
.release = af_alg_release,
.recvmsg = rng_recvmsg,
@@ -192,7 +191,6 @@ static struct proto_ops __maybe_unused algif_rng_test_ops = {
.mmap = sock_no_mmap,
.bind = sock_no_bind,
.accept = sock_no_accept,
- .sendpage = sock_no_sendpage,
.release = af_alg_release,
.recvmsg = rng_test_recvmsg,
diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c
index ee8890ee8f33..9ada9b741af8 100644
--- a/crypto/algif_skcipher.c
+++ b/crypto/algif_skcipher.c
@@ -9,10 +9,10 @@
* The following concept of the memory management is used:
*
* The kernel maintains two SGLs, the TX SGL and the RX SGL. The TX SGL is
- * filled by user space with the data submitted via sendpage/sendmsg. Filling
- * up the TX SGL does not cause a crypto operation -- the data will only be
- * tracked by the kernel. Upon receipt of one recvmsg call, the caller must
- * provide a buffer which is tracked with the RX SGL.
+ * filled by user space with the data submitted via sendmsg. Filling up the TX
+ * SGL does not cause a crypto operation -- the data will only be tracked by
+ * the kernel. Upon receipt of one recvmsg call, the caller must provide a
+ * buffer which is tracked with the RX SGL.
*
* During the processing of the recvmsg operation, the cipher request is
* allocated and prepared. As part of the recvmsg operation, the processed
@@ -105,7 +105,7 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
/* Initialize the crypto operation */
skcipher_request_set_tfm(&areq->cra_u.skcipher_req, tfm);
skcipher_request_set_crypt(&areq->cra_u.skcipher_req, areq->tsgl,
- areq->first_rsgl.sgl.sg, len, ctx->iv);
+ areq->first_rsgl.sgl.sgt.sgl, len, ctx->iv);
if (msg->msg_iocb && !is_sync_kiocb(msg->msg_iocb)) {
/* AIO operation */
@@ -194,7 +194,6 @@ static struct proto_ops algif_skcipher_ops = {
.release = af_alg_release,
.sendmsg = skcipher_sendmsg,
- .sendpage = af_alg_sendpage,
.recvmsg = skcipher_recvmsg,
.poll = af_alg_poll,
};
@@ -246,18 +245,6 @@ static int skcipher_sendmsg_nokey(struct socket *sock, struct msghdr *msg,
return skcipher_sendmsg(sock, msg, size);
}
-static ssize_t skcipher_sendpage_nokey(struct socket *sock, struct page *page,
- int offset, size_t size, int flags)
-{
- int err;
-
- err = skcipher_check_key(sock);
- if (err)
- return err;
-
- return af_alg_sendpage(sock, page, offset, size, flags);
-}
-
static int skcipher_recvmsg_nokey(struct socket *sock, struct msghdr *msg,
size_t ignored, int flags)
{
@@ -285,7 +272,6 @@ static struct proto_ops algif_skcipher_ops_nokey = {
.release = af_alg_release,
.sendmsg = skcipher_sendmsg_nokey,
- .sendpage = skcipher_sendpage_nokey,
.recvmsg = skcipher_recvmsg_nokey,
.poll = af_alg_poll,
};
diff --git a/drivers/accel/qaic/qaic_data.c b/drivers/accel/qaic/qaic_data.c
index e42c1f9ffff8..e9a1cb779b30 100644
--- a/drivers/accel/qaic/qaic_data.c
+++ b/drivers/accel/qaic/qaic_data.c
@@ -23,6 +23,7 @@
#include <linux/wait.h>
#include <drm/drm_file.h>
#include <drm/drm_gem.h>
+#include <drm/drm_prime.h>
#include <drm/drm_print.h>
#include <uapi/drm/qaic_accel.h>
@@ -616,8 +617,7 @@ static void qaic_free_object(struct drm_gem_object *obj)
if (obj->import_attach) {
/* DMABUF/PRIME Path */
- dma_buf_detach(obj->import_attach->dmabuf, obj->import_attach);
- dma_buf_put(obj->import_attach->dmabuf);
+ drm_prime_gem_destroy(obj, NULL);
} else {
/* Private buffer allocation path */
qaic_free_sgt(bo->sgt);
diff --git a/drivers/acpi/acpi_ffh.c b/drivers/acpi/acpi_ffh.c
index 19aff808bbb8..8d5126963dc7 100644
--- a/drivers/acpi/acpi_ffh.c
+++ b/drivers/acpi/acpi_ffh.c
@@ -9,8 +9,6 @@
#include <linux/idr.h>
#include <linux/io.h>
-#include <linux/arm-smccc.h>
-
static struct acpi_ffh_info ffh_ctx;
int __weak acpi_ffh_address_space_arch_setup(void *handler_ctxt,
diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c
index 77186f084d3a..539e700de4d2 100644
--- a/drivers/acpi/acpi_lpss.c
+++ b/drivers/acpi/acpi_lpss.c
@@ -201,11 +201,19 @@ static void byt_i2c_setup(struct lpss_private_data *pdata)
writel(0, pdata->mmio_base + LPSS_I2C_ENABLE);
}
-/* BSW PWM used for backlight control by the i915 driver */
+/*
+ * BSW PWM1 is used for backlight control by the i915 driver
+ * BSW PWM2 is used for backlight control for fixed (etched into the glass)
+ * touch controls on some models. These touch-controls have specialized
+ * drivers which know they need the "pwm_soc_lpss_2" con-id.
+ */
static struct pwm_lookup bsw_pwm_lookup[] = {
PWM_LOOKUP_WITH_MODULE("80862288:00", 0, "0000:00:02.0",
"pwm_soc_backlight", 0, PWM_POLARITY_NORMAL,
"pwm-lpss-platform"),
+ PWM_LOOKUP_WITH_MODULE("80862289:00", 0, NULL,
+ "pwm_soc_lpss_2", 0, PWM_POLARITY_NORMAL,
+ "pwm-lpss-platform"),
};
static void bsw_pwm_setup(struct lpss_private_data *pdata)
diff --git a/drivers/acpi/acpi_pad.c b/drivers/acpi/acpi_pad.c
index 02f1a1b1143c..7a453c5ff303 100644
--- a/drivers/acpi/acpi_pad.c
+++ b/drivers/acpi/acpi_pad.c
@@ -66,6 +66,7 @@ static void power_saving_mwait_init(void)
case X86_VENDOR_AMD:
case X86_VENDOR_INTEL:
case X86_VENDOR_ZHAOXIN:
+ case X86_VENDOR_CENTAUR:
/*
* AMD Fam10h TSC will tick in all
* C/P/S0/S1 states when this bit is set.
diff --git a/drivers/acpi/acpica/achware.h b/drivers/acpi/acpica/achware.h
index ebf8fd373cf7..79bbfe00d241 100644
--- a/drivers/acpi/acpica/achware.h
+++ b/drivers/acpi/acpica/achware.h
@@ -101,8 +101,6 @@ acpi_status
acpi_hw_get_gpe_status(struct acpi_gpe_event_info *gpe_event_info,
acpi_event_status *event_status);
-acpi_status acpi_hw_disable_all_gpes(void);
-
acpi_status acpi_hw_enable_all_runtime_gpes(void);
acpi_status acpi_hw_enable_all_wakeup_gpes(void);
diff --git a/drivers/acpi/apei/bert.c b/drivers/acpi/apei/bert.c
index 7514e38d5640..5427e49e646b 100644
--- a/drivers/acpi/apei/bert.c
+++ b/drivers/acpi/apei/bert.c
@@ -34,7 +34,7 @@
#define ACPI_BERT_PRINT_MAX_RECORDS 5
#define ACPI_BERT_PRINT_MAX_LEN 1024
-static int bert_disable;
+static int bert_disable __initdata;
/*
* Print "all" the error records in the BERT table, but avoid huge spam to
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 34ad071a64e9..ef59d6ea16da 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -152,7 +152,6 @@ struct ghes_vendor_record_entry {
};
static struct gen_pool *ghes_estatus_pool;
-static unsigned long ghes_estatus_pool_size_request;
static struct ghes_estatus_cache __rcu *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
static atomic_t ghes_estatus_cache_alloced;
@@ -191,7 +190,6 @@ int ghes_estatus_pool_init(unsigned int num_ghes)
len = GHES_ESTATUS_CACHE_AVG_SIZE * GHES_ESTATUS_CACHE_ALLOCED_MAX;
len += (num_ghes * GHES_ESOURCE_PREALLOC_MAX_SIZE);
- ghes_estatus_pool_size_request = PAGE_ALIGN(len);
addr = (unsigned long)vmalloc(PAGE_ALIGN(len));
if (!addr)
goto err_pool_alloc;
@@ -1544,6 +1542,8 @@ struct list_head *ghes_get_devices(void)
pr_warn_once("Force-loading ghes_edac on an unsupported platform. You're on your own!\n");
}
+ } else if (list_empty(&ghes_devs)) {
+ return NULL;
}
return &ghes_devs;
diff --git a/drivers/acpi/arm64/Makefile b/drivers/acpi/arm64/Makefile
index e21a9e84e394..f81fe24894b2 100644
--- a/drivers/acpi/arm64/Makefile
+++ b/drivers/acpi/arm64/Makefile
@@ -3,4 +3,4 @@ obj-$(CONFIG_ACPI_AGDI) += agdi.o
obj-$(CONFIG_ACPI_IORT) += iort.o
obj-$(CONFIG_ACPI_GTDT) += gtdt.o
obj-$(CONFIG_ACPI_APMT) += apmt.o
-obj-y += dma.o
+obj-y += dma.o init.o
diff --git a/drivers/acpi/arm64/agdi.c b/drivers/acpi/arm64/agdi.c
index f605302395c3..8b3c7d42b41b 100644
--- a/drivers/acpi/arm64/agdi.c
+++ b/drivers/acpi/arm64/agdi.c
@@ -9,11 +9,11 @@
#define pr_fmt(fmt) "ACPI: AGDI: " fmt
#include <linux/acpi.h>
-#include <linux/acpi_agdi.h>
#include <linux/arm_sdei.h>
#include <linux/io.h>
#include <linux/kernel.h>
#include <linux/platform_device.h>
+#include "init.h"
struct agdi_data {
int sdei_event;
diff --git a/drivers/acpi/arm64/apmt.c b/drivers/acpi/arm64/apmt.c
index 8cab69fa5d59..bb010f6164e5 100644
--- a/drivers/acpi/arm64/apmt.c
+++ b/drivers/acpi/arm64/apmt.c
@@ -10,10 +10,10 @@
#define pr_fmt(fmt) "ACPI: APMT: " fmt
#include <linux/acpi.h>
-#include <linux/acpi_apmt.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/platform_device.h>
+#include "init.h"
#define DEV_NAME "arm-cs-arch-pmu"
@@ -35,11 +35,13 @@ static int __init apmt_init_resources(struct resource *res,
num_res++;
- res[num_res].start = node->base_address1;
- res[num_res].end = node->base_address1 + SZ_4K - 1;
- res[num_res].flags = IORESOURCE_MEM;
+ if (node->flags & ACPI_APMT_FLAGS_DUAL_PAGE) {
+ res[num_res].start = node->base_address1;
+ res[num_res].end = node->base_address1 + SZ_4K - 1;
+ res[num_res].flags = IORESOURCE_MEM;
- num_res++;
+ num_res++;
+ }
if (node->ovflw_irq != 0) {
trigger = (node->ovflw_irq_flags & ACPI_APMT_OVFLW_IRQ_FLAGS_MODE);
diff --git a/drivers/acpi/arm64/init.c b/drivers/acpi/arm64/init.c
new file mode 100644
index 000000000000..d3ce53dda122
--- /dev/null
+++ b/drivers/acpi/arm64/init.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/acpi.h>
+#include "init.h"
+
+void __init acpi_arm_init(void)
+{
+ if (IS_ENABLED(CONFIG_ACPI_AGDI))
+ acpi_agdi_init();
+ if (IS_ENABLED(CONFIG_ACPI_APMT))
+ acpi_apmt_init();
+ if (IS_ENABLED(CONFIG_ACPI_IORT))
+ acpi_iort_init();
+}
diff --git a/drivers/acpi/arm64/init.h b/drivers/acpi/arm64/init.h
new file mode 100644
index 000000000000..a1715a2a34e9
--- /dev/null
+++ b/drivers/acpi/arm64/init.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#include <linux/init.h>
+
+void __init acpi_agdi_init(void);
+void __init acpi_apmt_init(void);
+void __init acpi_iort_init(void);
diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index 38fb84974f35..3631230a61c8 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -19,6 +19,7 @@
#include <linux/platform_device.h>
#include <linux/slab.h>
#include <linux/dma-map-ops.h>
+#include "init.h"
#define IORT_TYPE_MASK(type) (1 << (type))
#define IORT_MSI_TYPE (1 << ACPI_IORT_NODE_ITS_GROUP)
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index d161ff707de4..e3e0bd0c5a50 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -26,9 +26,6 @@
#include <asm/mpspec.h>
#include <linux/dmi.h>
#endif
-#include <linux/acpi_agdi.h>
-#include <linux/acpi_apmt.h>
-#include <linux/acpi_iort.h>
#include <linux/acpi_viot.h>
#include <linux/pci.h>
#include <acpi/apei.h>
@@ -530,65 +527,30 @@ static void acpi_notify_device(acpi_handle handle, u32 event, void *data)
acpi_drv->ops.notify(device, event);
}
-static void acpi_notify_device_fixed(void *data)
-{
- struct acpi_device *device = data;
-
- /* Fixed hardware devices have no handles */
- acpi_notify_device(NULL, ACPI_FIXED_HARDWARE_EVENT, device);
-}
-
-static u32 acpi_device_fixed_event(void *data)
-{
- acpi_os_execute(OSL_NOTIFY_HANDLER, acpi_notify_device_fixed, data);
- return ACPI_INTERRUPT_HANDLED;
-}
-
static int acpi_device_install_notify_handler(struct acpi_device *device,
struct acpi_driver *acpi_drv)
{
- acpi_status status;
-
- if (device->device_type == ACPI_BUS_TYPE_POWER_BUTTON) {
- status =
- acpi_install_fixed_event_handler(ACPI_EVENT_POWER_BUTTON,
- acpi_device_fixed_event,
- device);
- } else if (device->device_type == ACPI_BUS_TYPE_SLEEP_BUTTON) {
- status =
- acpi_install_fixed_event_handler(ACPI_EVENT_SLEEP_BUTTON,
- acpi_device_fixed_event,
- device);
- } else {
- u32 type = acpi_drv->flags & ACPI_DRIVER_ALL_NOTIFY_EVENTS ?
+ u32 type = acpi_drv->flags & ACPI_DRIVER_ALL_NOTIFY_EVENTS ?
ACPI_ALL_NOTIFY : ACPI_DEVICE_NOTIFY;
+ acpi_status status;
- status = acpi_install_notify_handler(device->handle, type,
- acpi_notify_device,
- device);
- }
-
+ status = acpi_install_notify_handler(device->handle, type,
+ acpi_notify_device, device);
if (ACPI_FAILURE(status))
return -EINVAL;
+
return 0;
}
static void acpi_device_remove_notify_handler(struct acpi_device *device,
struct acpi_driver *acpi_drv)
{
- if (device->device_type == ACPI_BUS_TYPE_POWER_BUTTON) {
- acpi_remove_fixed_event_handler(ACPI_EVENT_POWER_BUTTON,
- acpi_device_fixed_event);
- } else if (device->device_type == ACPI_BUS_TYPE_SLEEP_BUTTON) {
- acpi_remove_fixed_event_handler(ACPI_EVENT_SLEEP_BUTTON,
- acpi_device_fixed_event);
- } else {
- u32 type = acpi_drv->flags & ACPI_DRIVER_ALL_NOTIFY_EVENTS ?
+ u32 type = acpi_drv->flags & ACPI_DRIVER_ALL_NOTIFY_EVENTS ?
ACPI_ALL_NOTIFY : ACPI_DEVICE_NOTIFY;
- acpi_remove_notify_handler(device->handle, type,
- acpi_notify_device);
- }
+ acpi_remove_notify_handler(device->handle, type,
+ acpi_notify_device);
+
acpi_os_wait_events_complete();
}
@@ -1408,7 +1370,7 @@ static int __init acpi_init(void)
acpi_init_ffh();
pci_mmcfg_late_init();
- acpi_iort_init();
+ acpi_arm_init();
acpi_viot_early_init();
acpi_hest_init();
acpi_ghes_init();
@@ -1420,8 +1382,6 @@ static int __init acpi_init(void)
acpi_debugger_init();
acpi_setup_sb_notify_handler();
acpi_viot_init();
- acpi_agdi_init();
- acpi_apmt_init();
return 0;
}
diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c
index 475e1eddfa3b..1e76a64cce0a 100644
--- a/drivers/acpi/button.c
+++ b/drivers/acpi/button.c
@@ -78,6 +78,15 @@ static const struct dmi_system_id dmi_lid_quirks[] = {
.driver_data = (void *)(long)ACPI_BUTTON_LID_INIT_DISABLED,
},
{
+ /* Nextbook Ares 8A tablet, _LID device always reports lid closed */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Insyde"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "CherryTrail"),
+ DMI_MATCH(DMI_BIOS_VERSION, "M882"),
+ },
+ .driver_data = (void *)(long)ACPI_BUTTON_LID_INIT_DISABLED,
+ },
+ {
/*
* Lenovo Yoga 9 14ITL5, initial notification of the LID device
* never happens.
@@ -126,7 +135,6 @@ static const struct dmi_system_id dmi_lid_quirks[] = {
static int acpi_button_add(struct acpi_device *device);
static void acpi_button_remove(struct acpi_device *device);
-static void acpi_button_notify(struct acpi_device *device, u32 event);
#ifdef CONFIG_PM_SLEEP
static int acpi_button_suspend(struct device *dev);
@@ -144,7 +152,6 @@ static struct acpi_driver acpi_button_driver = {
.ops = {
.add = acpi_button_add,
.remove = acpi_button_remove,
- .notify = acpi_button_notify,
},
.drv.pm = &acpi_button_pm,
};
@@ -400,45 +407,65 @@ static void acpi_lid_initialize_state(struct acpi_device *device)
button->lid_state_initialized = true;
}
-static void acpi_button_notify(struct acpi_device *device, u32 event)
+static void acpi_lid_notify(acpi_handle handle, u32 event, void *data)
{
- struct acpi_button *button = acpi_driver_data(device);
+ struct acpi_device *device = data;
+ struct acpi_button *button;
+
+ if (event != ACPI_BUTTON_NOTIFY_STATUS) {
+ acpi_handle_debug(device->handle, "Unsupported event [0x%x]\n",
+ event);
+ return;
+ }
+
+ button = acpi_driver_data(device);
+ if (!button->lid_state_initialized)
+ return;
+
+ acpi_lid_update_state(device, true);
+}
+
+static void acpi_button_notify(acpi_handle handle, u32 event, void *data)
+{
+ struct acpi_device *device = data;
+ struct acpi_button *button;
struct input_dev *input;
+ int keycode;
- switch (event) {
- case ACPI_FIXED_HARDWARE_EVENT:
- event = ACPI_BUTTON_NOTIFY_STATUS;
- fallthrough;
- case ACPI_BUTTON_NOTIFY_STATUS:
- input = button->input;
- if (button->type == ACPI_BUTTON_TYPE_LID) {
- if (button->lid_state_initialized)
- acpi_lid_update_state(device, true);
- } else {
- int keycode;
-
- acpi_pm_wakeup_event(&device->dev);
- if (button->suspended)
- break;
-
- keycode = test_bit(KEY_SLEEP, input->keybit) ?
- KEY_SLEEP : KEY_POWER;
- input_report_key(input, keycode, 1);
- input_sync(input);
- input_report_key(input, keycode, 0);
- input_sync(input);
-
- acpi_bus_generate_netlink_event(
- device->pnp.device_class,
- dev_name(&device->dev),
- event, ++button->pushed);
- }
- break;
- default:
+ if (event != ACPI_BUTTON_NOTIFY_STATUS) {
acpi_handle_debug(device->handle, "Unsupported event [0x%x]\n",
event);
- break;
+ return;
}
+
+ acpi_pm_wakeup_event(&device->dev);
+
+ button = acpi_driver_data(device);
+ if (button->suspended)
+ return;
+
+ input = button->input;
+ keycode = test_bit(KEY_SLEEP, input->keybit) ? KEY_SLEEP : KEY_POWER;
+
+ input_report_key(input, keycode, 1);
+ input_sync(input);
+ input_report_key(input, keycode, 0);
+ input_sync(input);
+
+ acpi_bus_generate_netlink_event(device->pnp.device_class,
+ dev_name(&device->dev),
+ event, ++button->pushed);
+}
+
+static void acpi_button_notify_run(void *data)
+{
+ acpi_button_notify(NULL, ACPI_BUTTON_NOTIFY_STATUS, data);
+}
+
+static u32 acpi_button_event(void *data)
+{
+ acpi_os_execute(OSL_NOTIFY_HANDLER, acpi_button_notify_run, data);
+ return ACPI_INTERRUPT_HANDLED;
}
#ifdef CONFIG_PM_SLEEP
@@ -480,11 +507,13 @@ static int acpi_lid_input_open(struct input_dev *input)
static int acpi_button_add(struct acpi_device *device)
{
+ acpi_notify_handler handler;
struct acpi_button *button;
struct input_dev *input;
const char *hid = acpi_device_hid(device);
+ acpi_status status;
char *name, *class;
- int error;
+ int error = 0;
if (!strcmp(hid, ACPI_BUTTON_HID_LID) &&
lid_init_state == ACPI_BUTTON_LID_INIT_DISABLED)
@@ -508,17 +537,20 @@ static int acpi_button_add(struct acpi_device *device)
if (!strcmp(hid, ACPI_BUTTON_HID_POWER) ||
!strcmp(hid, ACPI_BUTTON_HID_POWERF)) {
button->type = ACPI_BUTTON_TYPE_POWER;
+ handler = acpi_button_notify;
strcpy(name, ACPI_BUTTON_DEVICE_NAME_POWER);
sprintf(class, "%s/%s",
ACPI_BUTTON_CLASS, ACPI_BUTTON_SUBCLASS_POWER);
} else if (!strcmp(hid, ACPI_BUTTON_HID_SLEEP) ||
!strcmp(hid, ACPI_BUTTON_HID_SLEEPF)) {
button->type = ACPI_BUTTON_TYPE_SLEEP;
+ handler = acpi_button_notify;
strcpy(name, ACPI_BUTTON_DEVICE_NAME_SLEEP);
sprintf(class, "%s/%s",
ACPI_BUTTON_CLASS, ACPI_BUTTON_SUBCLASS_SLEEP);
} else if (!strcmp(hid, ACPI_BUTTON_HID_LID)) {
button->type = ACPI_BUTTON_TYPE_LID;
+ handler = acpi_lid_notify;
strcpy(name, ACPI_BUTTON_DEVICE_NAME_LID);
sprintf(class, "%s/%s",
ACPI_BUTTON_CLASS, ACPI_BUTTON_SUBCLASS_LID);
@@ -526,12 +558,15 @@ static int acpi_button_add(struct acpi_device *device)
} else {
pr_info("Unsupported hid [%s]\n", hid);
error = -ENODEV;
- goto err_free_input;
}
- error = acpi_button_add_fs(device);
- if (error)
- goto err_free_input;
+ if (!error)
+ error = acpi_button_add_fs(device);
+
+ if (error) {
+ input_free_device(input);
+ goto err_free_button;
+ }
snprintf(button->phys, sizeof(button->phys), "%s/button/input0", hid);
@@ -559,6 +594,29 @@ static int acpi_button_add(struct acpi_device *device)
error = input_register_device(input);
if (error)
goto err_remove_fs;
+
+ switch (device->device_type) {
+ case ACPI_BUS_TYPE_POWER_BUTTON:
+ status = acpi_install_fixed_event_handler(ACPI_EVENT_POWER_BUTTON,
+ acpi_button_event,
+ device);
+ break;
+ case ACPI_BUS_TYPE_SLEEP_BUTTON:
+ status = acpi_install_fixed_event_handler(ACPI_EVENT_SLEEP_BUTTON,
+ acpi_button_event,
+ device);
+ break;
+ default:
+ status = acpi_install_notify_handler(device->handle,
+ ACPI_DEVICE_NOTIFY, handler,
+ device);
+ break;
+ }
+ if (ACPI_FAILURE(status)) {
+ error = -ENODEV;
+ goto err_input_unregister;
+ }
+
if (button->type == ACPI_BUTTON_TYPE_LID) {
/*
* This assumes there's only one lid device, or if there are
@@ -571,11 +629,11 @@ static int acpi_button_add(struct acpi_device *device)
pr_info("%s [%s]\n", name, acpi_device_bid(device));
return 0;
- err_remove_fs:
+err_input_unregister:
+ input_unregister_device(input);
+err_remove_fs:
acpi_button_remove_fs(device);
- err_free_input:
- input_free_device(input);
- err_free_button:
+err_free_button:
kfree(button);
return error;
}
@@ -584,6 +642,24 @@ static void acpi_button_remove(struct acpi_device *device)
{
struct acpi_button *button = acpi_driver_data(device);
+ switch (device->device_type) {
+ case ACPI_BUS_TYPE_POWER_BUTTON:
+ acpi_remove_fixed_event_handler(ACPI_EVENT_POWER_BUTTON,
+ acpi_button_event);
+ break;
+ case ACPI_BUS_TYPE_SLEEP_BUTTON:
+ acpi_remove_fixed_event_handler(ACPI_EVENT_SLEEP_BUTTON,
+ acpi_button_event);
+ break;
+ default:
+ acpi_remove_notify_handler(device->handle, ACPI_DEVICE_NOTIFY,
+ button->type == ACPI_BUTTON_TYPE_LID ?
+ acpi_lid_notify :
+ acpi_button_notify);
+ break;
+ }
+ acpi_os_wait_events_complete();
+
acpi_button_remove_fs(device);
input_unregister_device(button->input);
kfree(button);
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index 928899ab9502..8569f55e55b6 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -662,21 +662,6 @@ static void advance_transaction(struct acpi_ec *ec, bool interrupt)
ec_dbg_stm("%s (%d)", interrupt ? "IRQ" : "TASK", smp_processor_id());
- /*
- * Clear GPE_STS upfront to allow subsequent hardware GPE_STS 0->1
- * changes to always trigger a GPE interrupt.
- *
- * GPE STS is a W1C register, which means:
- *
- * 1. Software can clear it without worrying about clearing the other
- * GPEs' STS bits when the hardware sets them in parallel.
- *
- * 2. As long as software can ensure only clearing it when it is set,
- * hardware won't set it in parallel.
- */
- if (ec->gpe >= 0 && acpi_ec_gpe_status_set(ec))
- acpi_clear_gpe(NULL, ec->gpe);
-
status = acpi_ec_read_status(ec);
/*
@@ -1287,6 +1272,22 @@ static void acpi_ec_handle_interrupt(struct acpi_ec *ec)
unsigned long flags;
spin_lock_irqsave(&ec->lock, flags);
+
+ /*
+ * Clear GPE_STS upfront to allow subsequent hardware GPE_STS 0->1
+ * changes to always trigger a GPE interrupt.
+ *
+ * GPE STS is a W1C register, which means:
+ *
+ * 1. Software can clear it without worrying about clearing the other
+ * GPEs' STS bits when the hardware sets them in parallel.
+ *
+ * 2. As long as software can ensure only clearing it when it is set,
+ * hardware won't set it in parallel.
+ */
+ if (ec->gpe >= 0 && acpi_ec_gpe_status_set(ec))
+ acpi_clear_gpe(NULL, ec->gpe);
+
advance_transaction(ec, true);
spin_unlock_irqrestore(&ec->lock, flags);
}
diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h
index 6023ad61831a..573bc0de2990 100644
--- a/drivers/acpi/nfit/nfit.h
+++ b/drivers/acpi/nfit/nfit.h
@@ -347,4 +347,6 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev);
bool intel_fwa_supported(struct nvdimm_bus *nvdimm_bus);
extern struct device_attribute dev_attr_firmware_activate_noidle;
+void nfit_intel_shutdown_status(struct nfit_mem *nfit_mem);
+
#endif /* __NFIT_H__ */
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 9718d07cc2a2..dc615ef6550a 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -597,10 +597,6 @@ static int acpi_idle_play_dead(struct cpuidle_device *dev, int index)
io_idle(cx->address);
} else
return -ENODEV;
-
-#if defined(CONFIG_X86) && defined(CONFIG_HOTPLUG_CPU)
- cond_wakeup_cpu0();
-#endif
}
/* Never reached */
diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c
index 0800a9d77558..1dd8d5aebf67 100644
--- a/drivers/acpi/resource.c
+++ b/drivers/acpi/resource.c
@@ -470,52 +470,6 @@ static const struct dmi_system_id asus_laptop[] = {
{ }
};
-static const struct dmi_system_id lenovo_laptop[] = {
- {
- .ident = "LENOVO IdeaPad Flex 5 14ALC7",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
- DMI_MATCH(DMI_PRODUCT_NAME, "82R9"),
- },
- },
- {
- .ident = "LENOVO IdeaPad Flex 5 16ALC7",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
- DMI_MATCH(DMI_PRODUCT_NAME, "82RA"),
- },
- },
- { }
-};
-
-static const struct dmi_system_id tongfang_gm_rg[] = {
- {
- .ident = "TongFang GMxRGxx/XMG CORE 15 (M22)/TUXEDO Stellaris 15 Gen4 AMD",
- .matches = {
- DMI_MATCH(DMI_BOARD_NAME, "GMxRGxx"),
- },
- },
- { }
-};
-
-static const struct dmi_system_id maingear_laptop[] = {
- {
- .ident = "MAINGEAR Vector Pro 2 15",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Micro Electronics Inc"),
- DMI_MATCH(DMI_PRODUCT_NAME, "MG-VCP2-15A3070T"),
- }
- },
- {
- .ident = "MAINGEAR Vector Pro 2 17",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Micro Electronics Inc"),
- DMI_MATCH(DMI_PRODUCT_NAME, "MG-VCP2-17A3070T"),
- },
- },
- { }
-};
-
static const struct dmi_system_id lg_laptop[] = {
{
.ident = "LG Electronics 17U70P",
@@ -539,10 +493,6 @@ struct irq_override_cmp {
static const struct irq_override_cmp override_table[] = {
{ medion_laptop, 1, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, false },
{ asus_laptop, 1, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, false },
- { lenovo_laptop, 6, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, true },
- { lenovo_laptop, 10, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, true },
- { tongfang_gm_rg, 1, ACPI_EDGE_SENSITIVE, ACPI_ACTIVE_LOW, 1, true },
- { maingear_laptop, 1, ACPI_EDGE_SENSITIVE, ACPI_ACTIVE_LOW, 1, true },
{ lg_laptop, 1, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, false },
};
@@ -562,16 +512,6 @@ static bool acpi_dev_irq_override(u32 gsi, u8 triggering, u8 polarity,
return entry->override;
}
-#ifdef CONFIG_X86
- /*
- * IRQ override isn't needed on modern AMD Zen systems and
- * this override breaks active low IRQs on AMD Ryzen 6000 and
- * newer systems. Skip it.
- */
- if (boot_cpu_has(X86_FEATURE_ZEN))
- return false;
-#endif
-
return true;
}
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 0c6f06abe3f4..1c3e1e2bb0b5 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -2029,8 +2029,6 @@ static u32 acpi_scan_check_dep(acpi_handle handle, bool check_dep)
return count;
}
-static bool acpi_bus_scan_second_pass;
-
static acpi_status acpi_bus_check_add(acpi_handle handle, bool check_dep,
struct acpi_device **adev_p)
{
@@ -2050,10 +2048,8 @@ static acpi_status acpi_bus_check_add(acpi_handle handle, bool check_dep,
return AE_OK;
/* Bail out if there are dependencies. */
- if (acpi_scan_check_dep(handle, check_dep) > 0) {
- acpi_bus_scan_second_pass = true;
+ if (acpi_scan_check_dep(handle, check_dep) > 0)
return AE_CTRL_DEPTH;
- }
fallthrough;
case ACPI_TYPE_ANY: /* for ACPI_ROOT_OBJECT */
@@ -2301,6 +2297,12 @@ static bool acpi_scan_clear_dep_queue(struct acpi_device *adev)
return true;
}
+static void acpi_scan_delete_dep_data(struct acpi_dep_data *dep)
+{
+ list_del(&dep->node);
+ kfree(dep);
+}
+
static int acpi_scan_clear_dep(struct acpi_dep_data *dep, void *data)
{
struct acpi_device *adev = acpi_get_acpi_dev(dep->consumer);
@@ -2311,8 +2313,10 @@ static int acpi_scan_clear_dep(struct acpi_dep_data *dep, void *data)
acpi_dev_put(adev);
}
- list_del(&dep->node);
- kfree(dep);
+ if (dep->free_when_met)
+ acpi_scan_delete_dep_data(dep);
+ else
+ dep->met = true;
return 0;
}
@@ -2406,6 +2410,55 @@ struct acpi_device *acpi_dev_get_next_consumer_dev(struct acpi_device *supplier,
}
EXPORT_SYMBOL_GPL(acpi_dev_get_next_consumer_dev);
+static void acpi_scan_postponed_branch(acpi_handle handle)
+{
+ struct acpi_device *adev = NULL;
+
+ if (ACPI_FAILURE(acpi_bus_check_add(handle, false, &adev)))
+ return;
+
+ acpi_walk_namespace(ACPI_TYPE_ANY, handle, ACPI_UINT32_MAX,
+ acpi_bus_check_add_2, NULL, NULL, (void **)&adev);
+ acpi_bus_attach(adev, NULL);
+}
+
+static void acpi_scan_postponed(void)
+{
+ struct acpi_dep_data *dep, *tmp;
+
+ mutex_lock(&acpi_dep_list_lock);
+
+ list_for_each_entry_safe(dep, tmp, &acpi_dep_list, node) {
+ acpi_handle handle = dep->consumer;
+
+ /*
+ * In case there are multiple acpi_dep_list entries with the
+ * same consumer, skip the current entry if the consumer device
+ * object corresponding to it is present already.
+ */
+ if (!acpi_fetch_acpi_dev(handle)) {
+ /*
+ * Even though the lock is released here, tmp is
+ * guaranteed to be valid, because none of the list
+ * entries following dep is marked as "free when met"
+ * and so they cannot be deleted.
+ */
+ mutex_unlock(&acpi_dep_list_lock);
+
+ acpi_scan_postponed_branch(handle);
+
+ mutex_lock(&acpi_dep_list_lock);
+ }
+
+ if (dep->met)
+ acpi_scan_delete_dep_data(dep);
+ else
+ dep->free_when_met = true;
+ }
+
+ mutex_unlock(&acpi_dep_list_lock);
+}
+
/**
* acpi_bus_scan - Add ACPI device node objects in a given namespace scope.
* @handle: Root of the namespace scope to scan.
@@ -2424,8 +2477,6 @@ int acpi_bus_scan(acpi_handle handle)
{
struct acpi_device *device = NULL;
- acpi_bus_scan_second_pass = false;
-
/* Pass 1: Avoid enumerating devices with missing dependencies. */
if (ACPI_SUCCESS(acpi_bus_check_add(handle, true, &device)))
@@ -2438,19 +2489,9 @@ int acpi_bus_scan(acpi_handle handle)
acpi_bus_attach(device, (void *)true);
- if (!acpi_bus_scan_second_pass)
- return 0;
-
/* Pass 2: Enumerate all of the remaining devices. */
- device = NULL;
-
- if (ACPI_SUCCESS(acpi_bus_check_add(handle, false, &device)))
- acpi_walk_namespace(ACPI_TYPE_ANY, handle, ACPI_UINT32_MAX,
- acpi_bus_check_add_2, NULL, NULL,
- (void **)&device);
-
- acpi_bus_attach(device, NULL);
+ acpi_scan_postponed();
return 0;
}
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 72470b9f16c4..808484d11209 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -636,11 +636,19 @@ static int acpi_suspend_enter(suspend_state_t pm_state)
}
/*
- * Disable and clear GPE status before interrupt is enabled. Some GPEs
- * (like wakeup GPE) haven't handler, this can avoid such GPE misfire.
- * acpi_leave_sleep_state will reenable specific GPEs later
+ * Disable all GPE and clear their status bits before interrupts are
+ * enabled. Some GPEs (like wakeup GPEs) have no handlers and this can
+ * prevent them from producing spurious interrups.
+ *
+ * acpi_leave_sleep_state() will reenable specific GPEs later.
+ *
+ * Because this code runs on one CPU with disabled interrupts (all of
+ * the other CPUs are offline at this time), it need not acquire any
+ * sleeping locks which may trigger an implicit preemption point even
+ * if there is no contention, so avoid doing that by using a low-level
+ * library routine here.
*/
- acpi_disable_all_gpes();
+ acpi_hw_disable_all_gpes();
/* Allow EC transactions to happen. */
acpi_ec_unblock_transactions();
@@ -840,7 +848,7 @@ void __weak acpi_s2idle_setup(void)
s2idle_set_ops(&acpi_s2idle_ops);
}
-static void acpi_sleep_suspend_setup(void)
+static void __init acpi_sleep_suspend_setup(void)
{
bool suspend_ops_needed = false;
int i;
diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index 4720a3649a61..f9f6ebb08fdb 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -40,12 +40,35 @@
#define ACPI_THERMAL_NOTIFY_HOT 0xF1
#define ACPI_THERMAL_MODE_ACTIVE 0x00
-#define ACPI_THERMAL_MAX_ACTIVE 10
-#define ACPI_THERMAL_MAX_LIMIT_STR_LEN 65
+#define ACPI_THERMAL_MAX_ACTIVE 10
+#define ACPI_THERMAL_MAX_LIMIT_STR_LEN 65
-MODULE_AUTHOR("Paul Diefenbaugh");
-MODULE_DESCRIPTION("ACPI Thermal Zone Driver");
-MODULE_LICENSE("GPL");
+#define ACPI_TRIPS_CRITICAL BIT(0)
+#define ACPI_TRIPS_HOT BIT(1)
+#define ACPI_TRIPS_PASSIVE BIT(2)
+#define ACPI_TRIPS_ACTIVE BIT(3)
+#define ACPI_TRIPS_DEVICES BIT(4)
+
+#define ACPI_TRIPS_THRESHOLDS (ACPI_TRIPS_PASSIVE | ACPI_TRIPS_ACTIVE)
+
+#define ACPI_TRIPS_INIT (ACPI_TRIPS_CRITICAL | ACPI_TRIPS_HOT | \
+ ACPI_TRIPS_PASSIVE | ACPI_TRIPS_ACTIVE | \
+ ACPI_TRIPS_DEVICES)
+
+/*
+ * This exception is thrown out in two cases:
+ * 1.An invalid trip point becomes invalid or a valid trip point becomes invalid
+ * when re-evaluating the AML code.
+ * 2.TODO: Devices listed in _PSL, _ALx, _TZD may change.
+ * We need to re-bind the cooling devices of a thermal zone when this occurs.
+ */
+#define ACPI_THERMAL_TRIPS_EXCEPTION(flags, tz, str) \
+do { \
+ if (flags != ACPI_TRIPS_INIT) \
+ acpi_handle_info(tz->device->handle, \
+ "ACPI thermal trip point %s changed\n" \
+ "Please report to linux-acpi@vger.kernel.org\n", str); \
+} while (0)
static int act;
module_param(act, int, 0644);
@@ -73,75 +96,30 @@ MODULE_PARM_DESC(psv, "Disable or override all passive trip points.");
static struct workqueue_struct *acpi_thermal_pm_queue;
-static int acpi_thermal_add(struct acpi_device *device);
-static void acpi_thermal_remove(struct acpi_device *device);
-static void acpi_thermal_notify(struct acpi_device *device, u32 event);
-
-static const struct acpi_device_id thermal_device_ids[] = {
- {ACPI_THERMAL_HID, 0},
- {"", 0},
-};
-MODULE_DEVICE_TABLE(acpi, thermal_device_ids);
-
-#ifdef CONFIG_PM_SLEEP
-static int acpi_thermal_suspend(struct device *dev);
-static int acpi_thermal_resume(struct device *dev);
-#else
-#define acpi_thermal_suspend NULL
-#define acpi_thermal_resume NULL
-#endif
-static SIMPLE_DEV_PM_OPS(acpi_thermal_pm, acpi_thermal_suspend, acpi_thermal_resume);
-
-static struct acpi_driver acpi_thermal_driver = {
- .name = "thermal",
- .class = ACPI_THERMAL_CLASS,
- .ids = thermal_device_ids,
- .ops = {
- .add = acpi_thermal_add,
- .remove = acpi_thermal_remove,
- .notify = acpi_thermal_notify,
- },
- .drv.pm = &acpi_thermal_pm,
-};
-
-struct acpi_thermal_state {
- u8 critical:1;
- u8 hot:1;
- u8 passive:1;
- u8 active:1;
- u8 reserved:4;
- int active_index;
-};
-
-struct acpi_thermal_state_flags {
- u8 valid:1;
- u8 enabled:1;
- u8 reserved:6;
-};
-
struct acpi_thermal_critical {
- struct acpi_thermal_state_flags flags;
unsigned long temperature;
+ bool valid;
};
struct acpi_thermal_hot {
- struct acpi_thermal_state_flags flags;
unsigned long temperature;
+ bool valid;
};
struct acpi_thermal_passive {
- struct acpi_thermal_state_flags flags;
+ struct acpi_handle_list devices;
unsigned long temperature;
unsigned long tc1;
unsigned long tc2;
unsigned long tsp;
- struct acpi_handle_list devices;
+ bool valid;
};
struct acpi_thermal_active {
- struct acpi_thermal_state_flags flags;
- unsigned long temperature;
struct acpi_handle_list devices;
+ unsigned long temperature;
+ bool valid;
+ bool enabled;
};
struct acpi_thermal_trips {
@@ -151,12 +129,6 @@ struct acpi_thermal_trips {
struct acpi_thermal_active active[ACPI_THERMAL_MAX_ACTIVE];
};
-struct acpi_thermal_flags {
- u8 cooling_mode:1; /* _SCP */
- u8 devices:1; /* _TZD */
- u8 reserved:6;
-};
-
struct acpi_thermal {
struct acpi_device *device;
acpi_bus_id name;
@@ -164,8 +136,6 @@ struct acpi_thermal {
unsigned long last_temperature;
unsigned long polling_frequency;
volatile u8 zombie;
- struct acpi_thermal_flags flags;
- struct acpi_thermal_state state;
struct acpi_thermal_trips trips;
struct acpi_handle_list devices;
struct thermal_zone_device *thermal_zone;
@@ -220,52 +190,12 @@ static int acpi_thermal_get_polling_frequency(struct acpi_thermal *tz)
return 0;
}
-static int acpi_thermal_set_cooling_mode(struct acpi_thermal *tz, int mode)
-{
- if (!tz)
- return -EINVAL;
-
- if (ACPI_FAILURE(acpi_execute_simple_method(tz->device->handle,
- "_SCP", mode)))
- return -ENODEV;
-
- return 0;
-}
-
-#define ACPI_TRIPS_CRITICAL 0x01
-#define ACPI_TRIPS_HOT 0x02
-#define ACPI_TRIPS_PASSIVE 0x04
-#define ACPI_TRIPS_ACTIVE 0x08
-#define ACPI_TRIPS_DEVICES 0x10
-
-#define ACPI_TRIPS_REFRESH_THRESHOLDS (ACPI_TRIPS_PASSIVE | ACPI_TRIPS_ACTIVE)
-#define ACPI_TRIPS_REFRESH_DEVICES ACPI_TRIPS_DEVICES
-
-#define ACPI_TRIPS_INIT (ACPI_TRIPS_CRITICAL | ACPI_TRIPS_HOT | \
- ACPI_TRIPS_PASSIVE | ACPI_TRIPS_ACTIVE | \
- ACPI_TRIPS_DEVICES)
-
-/*
- * This exception is thrown out in two cases:
- * 1.An invalid trip point becomes invalid or a valid trip point becomes invalid
- * when re-evaluating the AML code.
- * 2.TODO: Devices listed in _PSL, _ALx, _TZD may change.
- * We need to re-bind the cooling devices of a thermal zone when this occurs.
- */
-#define ACPI_THERMAL_TRIPS_EXCEPTION(flags, tz, str) \
-do { \
- if (flags != ACPI_TRIPS_INIT) \
- acpi_handle_info(tz->device->handle, \
- "ACPI thermal trip point %s changed\n" \
- "Please report to linux-acpi@vger.kernel.org\n", str); \
-} while (0)
-
static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag)
{
acpi_status status;
unsigned long long tmp;
struct acpi_handle_list devices;
- int valid = 0;
+ bool valid = false;
int i;
/* Critical Shutdown */
@@ -279,21 +209,21 @@ static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag)
* ... so lets discard those as invalid.
*/
if (ACPI_FAILURE(status)) {
- tz->trips.critical.flags.valid = 0;
+ tz->trips.critical.valid = false;
acpi_handle_debug(tz->device->handle,
"No critical threshold\n");
} else if (tmp <= 2732) {
pr_info(FW_BUG "Invalid critical threshold (%llu)\n", tmp);
- tz->trips.critical.flags.valid = 0;
+ tz->trips.critical.valid = false;
} else {
- tz->trips.critical.flags.valid = 1;
+ tz->trips.critical.valid = true;
acpi_handle_debug(tz->device->handle,
"Found critical threshold [%lu]\n",
tz->trips.critical.temperature);
}
- if (tz->trips.critical.flags.valid) {
+ if (tz->trips.critical.valid) {
if (crt == -1) {
- tz->trips.critical.flags.valid = 0;
+ tz->trips.critical.valid = false;
} else if (crt > 0) {
unsigned long crt_k = celsius_to_deci_kelvin(crt);
@@ -312,12 +242,12 @@ static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag)
if (flag & ACPI_TRIPS_HOT) {
status = acpi_evaluate_integer(tz->device->handle, "_HOT", NULL, &tmp);
if (ACPI_FAILURE(status)) {
- tz->trips.hot.flags.valid = 0;
+ tz->trips.hot.valid = false;
acpi_handle_debug(tz->device->handle,
"No hot threshold\n");
} else {
tz->trips.hot.temperature = tmp;
- tz->trips.hot.flags.valid = 1;
+ tz->trips.hot.valid = true;
acpi_handle_debug(tz->device->handle,
"Found hot threshold [%lu]\n",
tz->trips.hot.temperature);
@@ -325,9 +255,9 @@ static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag)
}
/* Passive (optional) */
- if (((flag & ACPI_TRIPS_PASSIVE) && tz->trips.passive.flags.valid) ||
+ if (((flag & ACPI_TRIPS_PASSIVE) && tz->trips.passive.valid) ||
flag == ACPI_TRIPS_INIT) {
- valid = tz->trips.passive.flags.valid;
+ valid = tz->trips.passive.valid;
if (psv == -1) {
status = AE_SUPPORT;
} else if (psv > 0) {
@@ -339,44 +269,44 @@ static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag)
}
if (ACPI_FAILURE(status)) {
- tz->trips.passive.flags.valid = 0;
+ tz->trips.passive.valid = false;
} else {
tz->trips.passive.temperature = tmp;
- tz->trips.passive.flags.valid = 1;
+ tz->trips.passive.valid = true;
if (flag == ACPI_TRIPS_INIT) {
status = acpi_evaluate_integer(tz->device->handle,
"_TC1", NULL, &tmp);
if (ACPI_FAILURE(status))
- tz->trips.passive.flags.valid = 0;
+ tz->trips.passive.valid = false;
else
tz->trips.passive.tc1 = tmp;
status = acpi_evaluate_integer(tz->device->handle,
"_TC2", NULL, &tmp);
if (ACPI_FAILURE(status))
- tz->trips.passive.flags.valid = 0;
+ tz->trips.passive.valid = false;
else
tz->trips.passive.tc2 = tmp;
status = acpi_evaluate_integer(tz->device->handle,
"_TSP", NULL, &tmp);
if (ACPI_FAILURE(status))
- tz->trips.passive.flags.valid = 0;
+ tz->trips.passive.valid = false;
else
tz->trips.passive.tsp = tmp;
}
}
}
- if ((flag & ACPI_TRIPS_DEVICES) && tz->trips.passive.flags.valid) {
+ if ((flag & ACPI_TRIPS_DEVICES) && tz->trips.passive.valid) {
memset(&devices, 0, sizeof(struct acpi_handle_list));
status = acpi_evaluate_reference(tz->device->handle, "_PSL",
NULL, &devices);
if (ACPI_FAILURE(status)) {
acpi_handle_info(tz->device->handle,
"Invalid passive threshold\n");
- tz->trips.passive.flags.valid = 0;
+ tz->trips.passive.valid = false;
} else {
- tz->trips.passive.flags.valid = 1;
+ tz->trips.passive.valid = true;
}
if (memcmp(&tz->trips.passive.devices, &devices,
@@ -387,24 +317,24 @@ static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag)
}
}
if ((flag & ACPI_TRIPS_PASSIVE) || (flag & ACPI_TRIPS_DEVICES)) {
- if (valid != tz->trips.passive.flags.valid)
+ if (valid != tz->trips.passive.valid)
ACPI_THERMAL_TRIPS_EXCEPTION(flag, tz, "state");
}
/* Active (optional) */
for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE; i++) {
char name[5] = { '_', 'A', 'C', ('0' + i), '\0' };
- valid = tz->trips.active[i].flags.valid;
+ valid = tz->trips.active[i].valid;
if (act == -1)
break; /* disable all active trip points */
if (flag == ACPI_TRIPS_INIT || ((flag & ACPI_TRIPS_ACTIVE) &&
- tz->trips.active[i].flags.valid)) {
+ tz->trips.active[i].valid)) {
status = acpi_evaluate_integer(tz->device->handle,
name, NULL, &tmp);
if (ACPI_FAILURE(status)) {
- tz->trips.active[i].flags.valid = 0;
+ tz->trips.active[i].valid = false;
if (i == 0)
break;
@@ -426,21 +356,21 @@ static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag)
break;
} else {
tz->trips.active[i].temperature = tmp;
- tz->trips.active[i].flags.valid = 1;
+ tz->trips.active[i].valid = true;
}
}
name[2] = 'L';
- if ((flag & ACPI_TRIPS_DEVICES) && tz->trips.active[i].flags.valid) {
+ if ((flag & ACPI_TRIPS_DEVICES) && tz->trips.active[i].valid) {
memset(&devices, 0, sizeof(struct acpi_handle_list));
status = acpi_evaluate_reference(tz->device->handle,
name, NULL, &devices);
if (ACPI_FAILURE(status)) {
acpi_handle_info(tz->device->handle,
"Invalid active%d threshold\n", i);
- tz->trips.active[i].flags.valid = 0;
+ tz->trips.active[i].valid = false;
} else {
- tz->trips.active[i].flags.valid = 1;
+ tz->trips.active[i].valid = true;
}
if (memcmp(&tz->trips.active[i].devices, &devices,
@@ -451,10 +381,10 @@ static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag)
}
}
if ((flag & ACPI_TRIPS_ACTIVE) || (flag & ACPI_TRIPS_DEVICES))
- if (valid != tz->trips.active[i].flags.valid)
+ if (valid != tz->trips.active[i].valid)
ACPI_THERMAL_TRIPS_EXCEPTION(flag, tz, "state");
- if (!tz->trips.active[i].flags.valid)
+ if (!tz->trips.active[i].valid)
break;
}
@@ -474,17 +404,18 @@ static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag)
static int acpi_thermal_get_trip_points(struct acpi_thermal *tz)
{
- int i, valid, ret = acpi_thermal_trips_update(tz, ACPI_TRIPS_INIT);
+ int i, ret = acpi_thermal_trips_update(tz, ACPI_TRIPS_INIT);
+ bool valid;
if (ret)
return ret;
- valid = tz->trips.critical.flags.valid |
- tz->trips.hot.flags.valid |
- tz->trips.passive.flags.valid;
+ valid = tz->trips.critical.valid |
+ tz->trips.hot.valid |
+ tz->trips.passive.valid;
for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE; i++)
- valid |= tz->trips.active[i].flags.valid;
+ valid = valid || tz->trips.active[i].valid;
if (!valid) {
pr_warn(FW_BUG "No valid trip found\n");
@@ -521,7 +452,7 @@ static int thermal_get_trip_type(struct thermal_zone_device *thermal,
if (!tz || trip < 0)
return -EINVAL;
- if (tz->trips.critical.flags.valid) {
+ if (tz->trips.critical.valid) {
if (!trip) {
*type = THERMAL_TRIP_CRITICAL;
return 0;
@@ -529,7 +460,7 @@ static int thermal_get_trip_type(struct thermal_zone_device *thermal,
trip--;
}
- if (tz->trips.hot.flags.valid) {
+ if (tz->trips.hot.valid) {
if (!trip) {
*type = THERMAL_TRIP_HOT;
return 0;
@@ -537,7 +468,7 @@ static int thermal_get_trip_type(struct thermal_zone_device *thermal,
trip--;
}
- if (tz->trips.passive.flags.valid) {
+ if (tz->trips.passive.valid) {
if (!trip) {
*type = THERMAL_TRIP_PASSIVE;
return 0;
@@ -545,7 +476,7 @@ static int thermal_get_trip_type(struct thermal_zone_device *thermal,
trip--;
}
- for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE && tz->trips.active[i].flags.valid; i++) {
+ for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE && tz->trips.active[i].valid; i++) {
if (!trip) {
*type = THERMAL_TRIP_ACTIVE;
return 0;
@@ -565,7 +496,7 @@ static int thermal_get_trip_temp(struct thermal_zone_device *thermal,
if (!tz || trip < 0)
return -EINVAL;
- if (tz->trips.critical.flags.valid) {
+ if (tz->trips.critical.valid) {
if (!trip) {
*temp = deci_kelvin_to_millicelsius_with_offset(
tz->trips.critical.temperature,
@@ -575,7 +506,7 @@ static int thermal_get_trip_temp(struct thermal_zone_device *thermal,
trip--;
}
- if (tz->trips.hot.flags.valid) {
+ if (tz->trips.hot.valid) {
if (!trip) {
*temp = deci_kelvin_to_millicelsius_with_offset(
tz->trips.hot.temperature,
@@ -585,7 +516,7 @@ static int thermal_get_trip_temp(struct thermal_zone_device *thermal,
trip--;
}
- if (tz->trips.passive.flags.valid) {
+ if (tz->trips.passive.valid) {
if (!trip) {
*temp = deci_kelvin_to_millicelsius_with_offset(
tz->trips.passive.temperature,
@@ -596,7 +527,7 @@ static int thermal_get_trip_temp(struct thermal_zone_device *thermal,
}
for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE &&
- tz->trips.active[i].flags.valid; i++) {
+ tz->trips.active[i].valid; i++) {
if (!trip) {
*temp = deci_kelvin_to_millicelsius_with_offset(
tz->trips.active[i].temperature,
@@ -614,7 +545,7 @@ static int thermal_get_crit_temp(struct thermal_zone_device *thermal,
{
struct acpi_thermal *tz = thermal_zone_device_priv(thermal);
- if (tz->trips.critical.flags.valid) {
+ if (tz->trips.critical.valid) {
*temperature = deci_kelvin_to_millicelsius_with_offset(
tz->trips.critical.temperature,
tz->kelvin_offset);
@@ -700,13 +631,13 @@ static int acpi_thermal_cooling_device_cb(struct thermal_zone_device *thermal,
int trip = -1;
int result = 0;
- if (tz->trips.critical.flags.valid)
+ if (tz->trips.critical.valid)
trip++;
- if (tz->trips.hot.flags.valid)
+ if (tz->trips.hot.valid)
trip++;
- if (tz->trips.passive.flags.valid) {
+ if (tz->trips.passive.valid) {
trip++;
for (i = 0; i < tz->trips.passive.devices.count; i++) {
handle = tz->trips.passive.devices.handles[i];
@@ -731,7 +662,7 @@ static int acpi_thermal_cooling_device_cb(struct thermal_zone_device *thermal,
}
for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE; i++) {
- if (!tz->trips.active[i].flags.valid)
+ if (!tz->trips.active[i].valid)
break;
trip++;
@@ -819,19 +750,19 @@ static int acpi_thermal_register_thermal_zone(struct acpi_thermal *tz)
acpi_status status;
int i;
- if (tz->trips.critical.flags.valid)
+ if (tz->trips.critical.valid)
trips++;
- if (tz->trips.hot.flags.valid)
+ if (tz->trips.hot.valid)
trips++;
- if (tz->trips.passive.flags.valid)
+ if (tz->trips.passive.valid)
trips++;
- for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE && tz->trips.active[i].flags.valid;
+ for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE && tz->trips.active[i].valid;
i++, trips++);
- if (tz->trips.passive.flags.valid)
+ if (tz->trips.passive.valid)
tz->thermal_zone = thermal_zone_device_register("acpitz", trips, 0, tz,
&acpi_thermal_zone_ops, NULL,
tz->trips.passive.tsp * 100,
@@ -906,13 +837,13 @@ static void acpi_thermal_notify(struct acpi_device *device, u32 event)
acpi_queue_thermal_check(tz);
break;
case ACPI_THERMAL_NOTIFY_THRESHOLDS:
- acpi_thermal_trips_update(tz, ACPI_TRIPS_REFRESH_THRESHOLDS);
+ acpi_thermal_trips_update(tz, ACPI_TRIPS_THRESHOLDS);
acpi_queue_thermal_check(tz);
acpi_bus_generate_netlink_event(device->pnp.device_class,
dev_name(&device->dev), event, 0);
break;
case ACPI_THERMAL_NOTIFY_DEVICES:
- acpi_thermal_trips_update(tz, ACPI_TRIPS_REFRESH_DEVICES);
+ acpi_thermal_trips_update(tz, ACPI_TRIPS_DEVICES);
acpi_queue_thermal_check(tz);
acpi_bus_generate_netlink_event(device->pnp.device_class,
dev_name(&device->dev), event, 0);
@@ -976,9 +907,8 @@ static int acpi_thermal_get_info(struct acpi_thermal *tz)
return result;
/* Set the cooling mode [_SCP] to active cooling (default) */
- result = acpi_thermal_set_cooling_mode(tz, ACPI_THERMAL_MODE_ACTIVE);
- if (!result)
- tz->flags.cooling_mode = 1;
+ acpi_execute_simple_method(tz->device->handle, "_SCP",
+ ACPI_THERMAL_MODE_ACTIVE);
/* Get default polling frequency [_TZP] (optional) */
if (tzp)
@@ -1001,7 +931,7 @@ static int acpi_thermal_get_info(struct acpi_thermal *tz)
*/
static void acpi_thermal_guess_offset(struct acpi_thermal *tz)
{
- if (tz->trips.critical.flags.valid &&
+ if (tz->trips.critical.valid &&
(tz->trips.critical.temperature % 5) == 1)
tz->kelvin_offset = 273100;
else
@@ -1110,27 +1040,48 @@ static int acpi_thermal_resume(struct device *dev)
return -EINVAL;
for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE; i++) {
- if (!tz->trips.active[i].flags.valid)
+ if (!tz->trips.active[i].valid)
break;
- tz->trips.active[i].flags.enabled = 1;
+ tz->trips.active[i].enabled = true;
for (j = 0; j < tz->trips.active[i].devices.count; j++) {
result = acpi_bus_update_power(
tz->trips.active[i].devices.handles[j],
&power_state);
if (result || (power_state != ACPI_STATE_D0)) {
- tz->trips.active[i].flags.enabled = 0;
+ tz->trips.active[i].enabled = false;
break;
}
}
- tz->state.active |= tz->trips.active[i].flags.enabled;
}
acpi_queue_thermal_check(tz);
return AE_OK;
}
+#else
+#define acpi_thermal_suspend NULL
+#define acpi_thermal_resume NULL
#endif
+static SIMPLE_DEV_PM_OPS(acpi_thermal_pm, acpi_thermal_suspend, acpi_thermal_resume);
+
+static const struct acpi_device_id thermal_device_ids[] = {
+ {ACPI_THERMAL_HID, 0},
+ {"", 0},
+};
+MODULE_DEVICE_TABLE(acpi, thermal_device_ids);
+
+static struct acpi_driver acpi_thermal_driver = {
+ .name = "thermal",
+ .class = ACPI_THERMAL_CLASS,
+ .ids = thermal_device_ids,
+ .ops = {
+ .add = acpi_thermal_add,
+ .remove = acpi_thermal_remove,
+ .notify = acpi_thermal_notify,
+ },
+ .drv.pm = &acpi_thermal_pm,
+};
static int thermal_act(const struct dmi_system_id *d) {
if (act == 0) {
@@ -1236,3 +1187,7 @@ static void __exit acpi_thermal_exit(void)
module_init(acpi_thermal_init);
module_exit(acpi_thermal_exit);
+
+MODULE_AUTHOR("Paul Diefenbaugh");
+MODULE_DESCRIPTION("ACPI Thermal Zone Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/acpi/tiny-power-button.c b/drivers/acpi/tiny-power-button.c
index 598f548b21f3..6353be6fec69 100644
--- a/drivers/acpi/tiny-power-button.c
+++ b/drivers/acpi/tiny-power-button.c
@@ -19,18 +19,52 @@ static const struct acpi_device_id tiny_power_button_device_ids[] = {
};
MODULE_DEVICE_TABLE(acpi, tiny_power_button_device_ids);
-static int acpi_noop_add(struct acpi_device *device)
+static void acpi_tiny_power_button_notify(acpi_handle handle, u32 event, void *data)
{
- return 0;
+ kill_cad_pid(power_signal, 1);
}
-static void acpi_noop_remove(struct acpi_device *device)
+static void acpi_tiny_power_button_notify_run(void *not_used)
{
+ acpi_tiny_power_button_notify(NULL, ACPI_FIXED_HARDWARE_EVENT, NULL);
}
-static void acpi_tiny_power_button_notify(struct acpi_device *device, u32 event)
+static u32 acpi_tiny_power_button_event(void *not_used)
{
- kill_cad_pid(power_signal, 1);
+ acpi_os_execute(OSL_NOTIFY_HANDLER, acpi_tiny_power_button_notify_run, NULL);
+ return ACPI_INTERRUPT_HANDLED;
+}
+
+static int acpi_tiny_power_button_add(struct acpi_device *device)
+{
+ acpi_status status;
+
+ if (device->device_type == ACPI_BUS_TYPE_POWER_BUTTON) {
+ status = acpi_install_fixed_event_handler(ACPI_EVENT_POWER_BUTTON,
+ acpi_tiny_power_button_event,
+ NULL);
+ } else {
+ status = acpi_install_notify_handler(device->handle,
+ ACPI_DEVICE_NOTIFY,
+ acpi_tiny_power_button_notify,
+ NULL);
+ }
+ if (ACPI_FAILURE(status))
+ return -ENODEV;
+
+ return 0;
+}
+
+static void acpi_tiny_power_button_remove(struct acpi_device *device)
+{
+ if (device->device_type == ACPI_BUS_TYPE_POWER_BUTTON) {
+ acpi_remove_fixed_event_handler(ACPI_EVENT_POWER_BUTTON,
+ acpi_tiny_power_button_event);
+ } else {
+ acpi_remove_notify_handler(device->handle, ACPI_DEVICE_NOTIFY,
+ acpi_tiny_power_button_notify);
+ }
+ acpi_os_wait_events_complete();
}
static struct acpi_driver acpi_tiny_power_button_driver = {
@@ -38,9 +72,8 @@ static struct acpi_driver acpi_tiny_power_button_driver = {
.class = "tiny-power-button",
.ids = tiny_power_button_device_ids,
.ops = {
- .add = acpi_noop_add,
- .remove = acpi_noop_remove,
- .notify = acpi_tiny_power_button_notify,
+ .add = acpi_tiny_power_button_add,
+ .remove = acpi_tiny_power_button_remove,
},
};
diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c
index bcc25d457581..18cc08c858cf 100644
--- a/drivers/acpi/video_detect.c
+++ b/drivers/acpi/video_detect.c
@@ -471,6 +471,22 @@ static const struct dmi_system_id video_detect_dmi_table[] = {
},
},
{
+ .callback = video_detect_force_native,
+ /* Lenovo ThinkPad X131e (3371 AMD version) */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "3371"),
+ },
+ },
+ {
+ .callback = video_detect_force_native,
+ /* Apple iMac11,3 */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "iMac11,3"),
+ },
+ },
+ {
/* https://bugzilla.redhat.com/show_bug.cgi?id=1217249 */
.callback = video_detect_force_native,
/* Apple MacBook Pro 12,1 */
@@ -514,6 +530,14 @@ static const struct dmi_system_id video_detect_dmi_table[] = {
},
{
.callback = video_detect_force_native,
+ /* Dell Studio 1569 */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Studio 1569"),
+ },
+ },
+ {
+ .callback = video_detect_force_native,
/* Acer Aspire 3830TG */
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
@@ -828,6 +852,27 @@ enum acpi_backlight_type __acpi_video_get_backlight_type(bool native, bool *auto
if (native_available)
return acpi_backlight_native;
+ /*
+ * The vendor specific BIOS interfaces are only necessary for
+ * laptops from before ~2008.
+ *
+ * For laptops from ~2008 till ~2023 this point is never reached
+ * because on those (video_caps & ACPI_VIDEO_BACKLIGHT) above is true.
+ *
+ * Laptops from after ~2023 no longer support ACPI_VIDEO_BACKLIGHT,
+ * if this point is reached on those, this likely means that
+ * the GPU kms driver which sets native_available has not loaded yet.
+ *
+ * Returning acpi_backlight_vendor in this case is known to sometimes
+ * cause a non working vendor specific /sys/class/backlight device to
+ * get registered.
+ *
+ * Return acpi_backlight_none on laptops with ACPI tables written
+ * for Windows 8 (laptops from after ~2012) to avoid this problem.
+ */
+ if (acpi_osi_is_win8())
+ return acpi_backlight_none;
+
/* No ACPI video/native (old hw), use vendor specific fw methods. */
return acpi_backlight_vendor;
}
diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c
index e499c60c4579..ce62e61a9605 100644
--- a/drivers/acpi/x86/s2idle.c
+++ b/drivers/acpi/x86/s2idle.c
@@ -59,6 +59,7 @@ static int lps0_dsm_func_mask;
static guid_t lps0_dsm_guid_microsoft;
static int lps0_dsm_func_mask_microsoft;
+static int lps0_dsm_state;
/* Device constraint entry structure */
struct lpi_device_info {
@@ -320,6 +321,44 @@ static void lpi_check_constraints(void)
}
}
+static bool acpi_s2idle_vendor_amd(void)
+{
+ return boot_cpu_data.x86_vendor == X86_VENDOR_AMD;
+}
+
+static const char *acpi_sleep_dsm_state_to_str(unsigned int state)
+{
+ if (lps0_dsm_func_mask_microsoft || !acpi_s2idle_vendor_amd()) {
+ switch (state) {
+ case ACPI_LPS0_SCREEN_OFF:
+ return "screen off";
+ case ACPI_LPS0_SCREEN_ON:
+ return "screen on";
+ case ACPI_LPS0_ENTRY:
+ return "lps0 entry";
+ case ACPI_LPS0_EXIT:
+ return "lps0 exit";
+ case ACPI_LPS0_MS_ENTRY:
+ return "lps0 ms entry";
+ case ACPI_LPS0_MS_EXIT:
+ return "lps0 ms exit";
+ }
+ } else {
+ switch (state) {
+ case ACPI_LPS0_SCREEN_ON_AMD:
+ return "screen on";
+ case ACPI_LPS0_SCREEN_OFF_AMD:
+ return "screen off";
+ case ACPI_LPS0_ENTRY_AMD:
+ return "lps0 entry";
+ case ACPI_LPS0_EXIT_AMD:
+ return "lps0 exit";
+ }
+ }
+
+ return "unknown";
+}
+
static void acpi_sleep_run_lps0_dsm(unsigned int func, unsigned int func_mask, guid_t dsm_guid)
{
union acpi_object *out_obj;
@@ -331,14 +370,15 @@ static void acpi_sleep_run_lps0_dsm(unsigned int func, unsigned int func_mask, g
rev_id, func, NULL);
ACPI_FREE(out_obj);
- acpi_handle_debug(lps0_device_handle, "_DSM function %u evaluation %s\n",
- func, out_obj ? "successful" : "failed");
+ lps0_dsm_state = func;
+ if (pm_debug_messages_on) {
+ acpi_handle_info(lps0_device_handle,
+ "%s transitioned to state %s\n",
+ out_obj ? "Successfully" : "Failed to",
+ acpi_sleep_dsm_state_to_str(lps0_dsm_state));
+ }
}
-static bool acpi_s2idle_vendor_amd(void)
-{
- return boot_cpu_data.x86_vendor == X86_VENDOR_AMD;
-}
static int validate_dsm(acpi_handle handle, const char *uuid, int rev, guid_t *dsm_guid)
{
@@ -485,11 +525,11 @@ int acpi_s2idle_prepare_late(void)
ACPI_LPS0_ENTRY,
lps0_dsm_func_mask, lps0_dsm_guid);
if (lps0_dsm_func_mask_microsoft > 0) {
- acpi_sleep_run_lps0_dsm(ACPI_LPS0_ENTRY,
- lps0_dsm_func_mask_microsoft, lps0_dsm_guid_microsoft);
/* modern standby entry */
acpi_sleep_run_lps0_dsm(ACPI_LPS0_MS_ENTRY,
lps0_dsm_func_mask_microsoft, lps0_dsm_guid_microsoft);
+ acpi_sleep_run_lps0_dsm(ACPI_LPS0_ENTRY,
+ lps0_dsm_func_mask_microsoft, lps0_dsm_guid_microsoft);
}
list_for_each_entry(handler, &lps0_s2idle_devops_head, list_node) {
@@ -524,11 +564,6 @@ void acpi_s2idle_restore_early(void)
if (handler->restore)
handler->restore();
- /* Modern standby exit */
- if (lps0_dsm_func_mask_microsoft > 0)
- acpi_sleep_run_lps0_dsm(ACPI_LPS0_MS_EXIT,
- lps0_dsm_func_mask_microsoft, lps0_dsm_guid_microsoft);
-
/* LPS0 exit */
if (lps0_dsm_func_mask > 0)
acpi_sleep_run_lps0_dsm(acpi_s2idle_vendor_amd() ?
@@ -539,6 +574,11 @@ void acpi_s2idle_restore_early(void)
acpi_sleep_run_lps0_dsm(ACPI_LPS0_EXIT,
lps0_dsm_func_mask_microsoft, lps0_dsm_guid_microsoft);
+ /* Modern standby exit */
+ if (lps0_dsm_func_mask_microsoft > 0)
+ acpi_sleep_run_lps0_dsm(ACPI_LPS0_MS_EXIT,
+ lps0_dsm_func_mask_microsoft, lps0_dsm_guid_microsoft);
+
/* Screen on */
if (lps0_dsm_func_mask_microsoft > 0)
acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_ON,
diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c
index 9c2d6f35f88a..c2b925f8cd4e 100644
--- a/drivers/acpi/x86/utils.c
+++ b/drivers/acpi/x86/utils.c
@@ -259,10 +259,11 @@ bool force_storage_d3(void)
* drivers/platform/x86/x86-android-tablets.c kernel module.
*/
#define ACPI_QUIRK_SKIP_I2C_CLIENTS BIT(0)
-#define ACPI_QUIRK_UART1_TTY_UART2_SKIP BIT(1)
-#define ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY BIT(2)
-#define ACPI_QUIRK_USE_ACPI_AC_AND_BATTERY BIT(3)
-#define ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS BIT(4)
+#define ACPI_QUIRK_UART1_SKIP BIT(1)
+#define ACPI_QUIRK_UART1_TTY_UART2_SKIP BIT(2)
+#define ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY BIT(3)
+#define ACPI_QUIRK_USE_ACPI_AC_AND_BATTERY BIT(4)
+#define ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS BIT(5)
static const struct dmi_system_id acpi_quirk_skip_dmi_ids[] = {
/*
@@ -319,6 +320,7 @@ static const struct dmi_system_id acpi_quirk_skip_dmi_ids[] = {
DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "YETI-11"),
},
.driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS |
+ ACPI_QUIRK_UART1_SKIP |
ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY |
ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS),
},
@@ -365,7 +367,7 @@ static const struct dmi_system_id acpi_quirk_skip_dmi_ids[] = {
ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY),
},
{
- /* Nextbook Ares 8 */
+ /* Nextbook Ares 8 (BYT version)*/
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Insyde"),
DMI_MATCH(DMI_PRODUCT_NAME, "M890BAP"),
@@ -375,6 +377,16 @@ static const struct dmi_system_id acpi_quirk_skip_dmi_ids[] = {
ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS),
},
{
+ /* Nextbook Ares 8A (CHT version)*/
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Insyde"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "CherryTrail"),
+ DMI_MATCH(DMI_BIOS_VERSION, "M882"),
+ },
+ .driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS |
+ ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY),
+ },
+ {
/* Whitelabel (sold as various brands) TM800A550L */
.matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"),
@@ -392,6 +404,7 @@ static const struct dmi_system_id acpi_quirk_skip_dmi_ids[] = {
#if IS_ENABLED(CONFIG_X86_ANDROID_TABLETS)
static const struct acpi_device_id i2c_acpi_known_good_ids[] = {
{ "10EC5640", 0 }, /* RealTek ALC5640 audio codec */
+ { "10EC5651", 0 }, /* RealTek ALC5651 audio codec */
{ "INT33F4", 0 }, /* X-Powers AXP288 PMIC */
{ "INT33FD", 0 }, /* Intel Crystal Cove PMIC */
{ "INT34D3", 0 }, /* Intel Whiskey Cove PMIC */
@@ -438,6 +451,9 @@ int acpi_quirk_skip_serdev_enumeration(struct device *controller_parent, bool *s
if (dmi_id)
quirks = (unsigned long)dmi_id->driver_data;
+ if ((quirks & ACPI_QUIRK_UART1_SKIP) && uid == 1)
+ *skip = true;
+
if (quirks & ACPI_QUIRK_UART1_TTY_UART2_SKIP) {
if (uid == 1)
return -ENODEV; /* Create tty cdev instead of serdev */
diff --git a/drivers/auxdisplay/ht16k33.c b/drivers/auxdisplay/ht16k33.c
index 02425991c159..d44814b9562a 100644
--- a/drivers/auxdisplay/ht16k33.c
+++ b/drivers/auxdisplay/ht16k33.c
@@ -820,7 +820,7 @@ static const struct of_device_id ht16k33_of_match[] = {
MODULE_DEVICE_TABLE(of, ht16k33_of_match);
static struct i2c_driver ht16k33_driver = {
- .probe_new = ht16k33_probe,
+ .probe = ht16k33_probe,
.remove = ht16k33_remove,
.driver = {
.name = DRIVER_NAME,
diff --git a/drivers/auxdisplay/lcd2s.c b/drivers/auxdisplay/lcd2s.c
index 135831a16514..6422be0dfe20 100644
--- a/drivers/auxdisplay/lcd2s.c
+++ b/drivers/auxdisplay/lcd2s.c
@@ -365,7 +365,7 @@ static struct i2c_driver lcd2s_i2c_driver = {
.name = "lcd2s",
.of_match_table = lcd2s_of_table,
},
- .probe_new = lcd2s_i2c_probe,
+ .probe = lcd2s_i2c_probe,
.remove = lcd2s_i2c_remove,
.id_table = lcd2s_i2c_id,
};
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index 9c09ca5c4ab6..878aa7646b37 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -751,14 +751,12 @@ static int really_probe_debug(struct device *dev, struct device_driver *drv)
*
* Should somehow figure out how to use a semaphore, not an atomic variable...
*/
-int driver_probe_done(void)
+bool __init driver_probe_done(void)
{
int local_probe_count = atomic_read(&probe_count);
pr_debug("%s: probe_count = %d\n", __func__, local_probe_count);
- if (local_probe_count)
- return -EBUSY;
- return 0;
+ return !local_probe_count;
}
/**
diff --git a/drivers/base/devres.c b/drivers/base/devres.c
index 5c998cfac335..3df0025d12aa 100644
--- a/drivers/base/devres.c
+++ b/drivers/base/devres.c
@@ -29,10 +29,10 @@ struct devres {
* Some archs want to perform DMA into kmalloc caches
* and need a guaranteed alignment larger than
* the alignment of a 64-bit integer.
- * Thus we use ARCH_KMALLOC_MINALIGN here and get exactly the same
- * buffer alignment as if it was allocated by plain kmalloc().
+ * Thus we use ARCH_DMA_MINALIGN for data[] which will force the same
+ * alignment for struct devres when allocated by kmalloc().
*/
- u8 __aligned(ARCH_KMALLOC_MINALIGN) data[];
+ u8 __aligned(ARCH_DMA_MINALIGN) data[];
};
struct devres_group {
diff --git a/drivers/base/node.c b/drivers/base/node.c
index b46db17124f3..655975946ef6 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -449,6 +449,9 @@ static ssize_t node_read_meminfo(struct device *dev,
"Node %d FileHugePages: %8lu kB\n"
"Node %d FilePmdMapped: %8lu kB\n"
#endif
+#ifdef CONFIG_UNACCEPTED_MEMORY
+ "Node %d Unaccepted: %8lu kB\n"
+#endif
,
nid, K(node_page_state(pgdat, NR_FILE_DIRTY)),
nid, K(node_page_state(pgdat, NR_WRITEBACK)),
@@ -478,6 +481,10 @@ static ssize_t node_read_meminfo(struct device *dev,
nid, K(node_page_state(pgdat, NR_FILE_THPS)),
nid, K(node_page_state(pgdat, NR_FILE_PMDMAPPED))
#endif
+#ifdef CONFIG_UNACCEPTED_MEMORY
+ ,
+ nid, K(sum_zone_node_page_state(nid, NR_UNACCEPTED))
+#endif
);
len += hugetlb_report_node_meminfo(buf, len, nid);
return len;
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 32084e38b73d..5cb2023581d4 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -1632,9 +1632,6 @@ static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
dev_dbg(dev, "%s()\n", __func__);
- if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(dev))
- return -EINVAL;
-
gpd_data = genpd_alloc_dev_data(dev, gd);
if (IS_ERR(gpd_data))
return PTR_ERR(gpd_data);
@@ -1676,6 +1673,9 @@ int pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev)
{
int ret;
+ if (!genpd || !dev)
+ return -EINVAL;
+
mutex_lock(&gpd_list_lock);
ret = genpd_add_device(genpd, dev, dev);
mutex_unlock(&gpd_list_lock);
@@ -2523,6 +2523,9 @@ int of_genpd_add_device(struct of_phandle_args *genpdspec, struct device *dev)
struct generic_pm_domain *genpd;
int ret;
+ if (!dev)
+ return -EINVAL;
+
mutex_lock(&gpd_list_lock);
genpd = genpd_get_from_provider(genpdspec);
@@ -2939,10 +2942,10 @@ static int genpd_parse_state(struct genpd_power_state *genpd_state,
err = of_property_read_u32(state_node, "min-residency-us", &residency);
if (!err)
- genpd_state->residency_ns = 1000 * residency;
+ genpd_state->residency_ns = 1000LL * residency;
- genpd_state->power_on_latency_ns = 1000 * exit_latency;
- genpd_state->power_off_latency_ns = 1000 * entry_latency;
+ genpd_state->power_on_latency_ns = 1000LL * exit_latency;
+ genpd_state->power_off_latency_ns = 1000LL * entry_latency;
genpd_state->fwnode = &state_node->fwnode;
return 0;
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index 7cc0c0cf8eaa..a917219feea6 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -19,11 +19,6 @@
#include "power.h"
-#ifndef CONFIG_SUSPEND
-suspend_state_t pm_suspend_target_state;
-#define pm_suspend_target_state (PM_SUSPEND_ON)
-#endif
-
#define list_for_each_entry_rcu_locked(pos, head, member) \
list_for_each_entry_rcu(pos, head, member, \
srcu_read_lock_held(&wakeup_srcu))
diff --git a/drivers/base/regmap/Makefile b/drivers/base/regmap/Makefile
index f6c6cb017200..5fdd0845b45e 100644
--- a/drivers/base/regmap/Makefile
+++ b/drivers/base/regmap/Makefile
@@ -8,7 +8,7 @@ obj-$(CONFIG_DEBUG_FS) += regmap-debugfs.o
obj-$(CONFIG_REGMAP_KUNIT) += regmap-kunit.o
obj-$(CONFIG_REGMAP_AC97) += regmap-ac97.o
obj-$(CONFIG_REGMAP_I2C) += regmap-i2c.o
-obj-$(CONFIG_REGMAP_RAM) += regmap-ram.o
+obj-$(CONFIG_REGMAP_RAM) += regmap-ram.o regmap-raw-ram.o
obj-$(CONFIG_REGMAP_SLIMBUS) += regmap-slimbus.o
obj-$(CONFIG_REGMAP_SPI) += regmap-spi.o
obj-$(CONFIG_REGMAP_SPMI) += regmap-spmi.o
diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h
index 9bd0dfd1e259..9a9ea514c2d8 100644
--- a/drivers/base/regmap/internal.h
+++ b/drivers/base/regmap/internal.h
@@ -125,6 +125,9 @@ struct regmap {
int reg_stride;
int reg_stride_order;
+ /* If set, will always write field to HW. */
+ bool force_write_field;
+
/* regcache specific members */
const struct regcache_ops *cache_ops;
enum regcache_type cache_type;
@@ -257,6 +260,8 @@ int regcache_sync_block(struct regmap *map, void *block,
unsigned long *cache_present,
unsigned int block_base, unsigned int start,
unsigned int end);
+bool regcache_reg_needs_sync(struct regmap *map, unsigned int reg,
+ unsigned int val);
static inline const void *regcache_get_val_addr(struct regmap *map,
const void *base,
@@ -267,7 +272,7 @@ static inline const void *regcache_get_val_addr(struct regmap *map,
unsigned int regcache_get_val(struct regmap *map, const void *base,
unsigned int idx);
-bool regcache_set_val(struct regmap *map, void *base, unsigned int idx,
+void regcache_set_val(struct regmap *map, void *base, unsigned int idx,
unsigned int val);
int regcache_lookup_reg(struct regmap *map, unsigned int reg);
int regcache_sync_val(struct regmap *map, unsigned int reg, unsigned int val);
@@ -312,6 +317,7 @@ struct regmap_ram_data {
unsigned int *vals; /* Allocatd by caller */
bool *read;
bool *written;
+ enum regmap_endian reg_endian;
};
/*
@@ -326,5 +332,12 @@ struct regmap *__regmap_init_ram(const struct regmap_config *config,
#define regmap_init_ram(config, data) \
__regmap_lockdep_wrapper(__regmap_init_ram, #config, config, data)
+struct regmap *__regmap_init_raw_ram(const struct regmap_config *config,
+ struct regmap_ram_data *data,
+ struct lock_class_key *lock_key,
+ const char *lock_name);
+
+#define regmap_init_raw_ram(config, data) \
+ __regmap_lockdep_wrapper(__regmap_init_raw_ram, #config, config, data)
#endif
diff --git a/drivers/base/regmap/regcache-maple.c b/drivers/base/regmap/regcache-maple.c
index c2e3a0f6c218..283c2e02a298 100644
--- a/drivers/base/regmap/regcache-maple.c
+++ b/drivers/base/regmap/regcache-maple.c
@@ -186,6 +186,55 @@ out_unlocked:
return ret;
}
+static int regcache_maple_sync_block(struct regmap *map, unsigned long *entry,
+ struct ma_state *mas,
+ unsigned int min, unsigned int max)
+{
+ void *buf;
+ unsigned long r;
+ size_t val_bytes = map->format.val_bytes;
+ int ret = 0;
+
+ mas_pause(mas);
+ rcu_read_unlock();
+
+ /*
+ * Use a raw write if writing more than one register to a
+ * device that supports raw writes to reduce transaction
+ * overheads.
+ */
+ if (max - min > 1 && regmap_can_raw_write(map)) {
+ buf = kmalloc(val_bytes * (max - min), map->alloc_flags);
+ if (!buf) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ /* Render the data for a raw write */
+ for (r = min; r < max; r++) {
+ regcache_set_val(map, buf, r - min,
+ entry[r - mas->index]);
+ }
+
+ ret = _regmap_raw_write(map, min, buf, (max - min) * val_bytes,
+ false);
+
+ kfree(buf);
+ } else {
+ for (r = min; r < max; r++) {
+ ret = _regmap_write(map, r,
+ entry[r - mas->index]);
+ if (ret != 0)
+ goto out;
+ }
+ }
+
+out:
+ rcu_read_lock();
+
+ return ret;
+}
+
static int regcache_maple_sync(struct regmap *map, unsigned int min,
unsigned int max)
{
@@ -194,8 +243,9 @@ static int regcache_maple_sync(struct regmap *map, unsigned int min,
MA_STATE(mas, mt, min, max);
unsigned long lmin = min;
unsigned long lmax = max;
- unsigned int r;
+ unsigned int r, v, sync_start;
int ret;
+ bool sync_needed = false;
map->cache_bypass = true;
@@ -203,18 +253,38 @@ static int regcache_maple_sync(struct regmap *map, unsigned int min,
mas_for_each(&mas, entry, max) {
for (r = max(mas.index, lmin); r <= min(mas.last, lmax); r++) {
- mas_pause(&mas);
- rcu_read_unlock();
- ret = regcache_sync_val(map, r, entry[r - mas.index]);
+ v = entry[r - mas.index];
+
+ if (regcache_reg_needs_sync(map, r, v)) {
+ if (!sync_needed) {
+ sync_start = r;
+ sync_needed = true;
+ }
+ continue;
+ }
+
+ if (!sync_needed)
+ continue;
+
+ ret = regcache_maple_sync_block(map, entry, &mas,
+ sync_start, r);
+ if (ret != 0)
+ goto out;
+ sync_needed = false;
+ }
+
+ if (sync_needed) {
+ ret = regcache_maple_sync_block(map, entry, &mas,
+ sync_start, r);
if (ret != 0)
goto out;
- rcu_read_lock();
+ sync_needed = false;
}
}
+out:
rcu_read_unlock();
-out:
map->cache_bypass = false;
return ret;
@@ -242,11 +312,41 @@ static int regcache_maple_exit(struct regmap *map)
return 0;
}
+static int regcache_maple_insert_block(struct regmap *map, int first,
+ int last)
+{
+ struct maple_tree *mt = map->cache;
+ MA_STATE(mas, mt, first, last);
+ unsigned long *entry;
+ int i, ret;
+
+ entry = kcalloc(last - first + 1, sizeof(unsigned long), GFP_KERNEL);
+ if (!entry)
+ return -ENOMEM;
+
+ for (i = 0; i < last - first + 1; i++)
+ entry[i] = map->reg_defaults[first + i].def;
+
+ mas_lock(&mas);
+
+ mas_set_range(&mas, map->reg_defaults[first].reg,
+ map->reg_defaults[last].reg);
+ ret = mas_store_gfp(&mas, entry, GFP_KERNEL);
+
+ mas_unlock(&mas);
+
+ if (ret)
+ kfree(entry);
+
+ return ret;
+}
+
static int regcache_maple_init(struct regmap *map)
{
struct maple_tree *mt;
int i;
int ret;
+ int range_start;
mt = kmalloc(sizeof(*mt), GFP_KERNEL);
if (!mt)
@@ -255,14 +355,30 @@ static int regcache_maple_init(struct regmap *map)
mt_init(mt);
- for (i = 0; i < map->num_reg_defaults; i++) {
- ret = regcache_maple_write(map,
- map->reg_defaults[i].reg,
- map->reg_defaults[i].def);
- if (ret)
- goto err;
+ if (!map->num_reg_defaults)
+ return 0;
+
+ range_start = 0;
+
+ /* Scan for ranges of contiguous registers */
+ for (i = 1; i < map->num_reg_defaults; i++) {
+ if (map->reg_defaults[i].reg !=
+ map->reg_defaults[i - 1].reg + 1) {
+ ret = regcache_maple_insert_block(map, range_start,
+ i - 1);
+ if (ret != 0)
+ goto err;
+
+ range_start = i;
+ }
}
+ /* Add the last block */
+ ret = regcache_maple_insert_block(map, range_start,
+ map->num_reg_defaults - 1);
+ if (ret != 0)
+ goto err;
+
return 0;
err:
diff --git a/drivers/base/regmap/regcache.c b/drivers/base/regmap/regcache.c
index 97c681fcf9f6..28bc3ae9458a 100644
--- a/drivers/base/regmap/regcache.c
+++ b/drivers/base/regmap/regcache.c
@@ -279,8 +279,8 @@ int regcache_write(struct regmap *map,
return 0;
}
-static bool regcache_reg_needs_sync(struct regmap *map, unsigned int reg,
- unsigned int val)
+bool regcache_reg_needs_sync(struct regmap *map, unsigned int reg,
+ unsigned int val)
{
int ret;
@@ -561,17 +561,14 @@ void regcache_cache_bypass(struct regmap *map, bool enable)
}
EXPORT_SYMBOL_GPL(regcache_cache_bypass);
-bool regcache_set_val(struct regmap *map, void *base, unsigned int idx,
+void regcache_set_val(struct regmap *map, void *base, unsigned int idx,
unsigned int val)
{
- if (regcache_get_val(map, base, idx) == val)
- return true;
-
/* Use device native format if possible */
if (map->format.format_val) {
map->format.format_val(base + (map->cache_word_size * idx),
val, 0);
- return false;
+ return;
}
switch (map->cache_word_size) {
@@ -604,7 +601,6 @@ bool regcache_set_val(struct regmap *map, void *base, unsigned int idx,
default:
BUG();
}
- return false;
}
unsigned int regcache_get_val(struct regmap *map, const void *base,
diff --git a/drivers/base/regmap/regmap-debugfs.c b/drivers/base/regmap/regmap-debugfs.c
index c491fabe3617..f36027591e1a 100644
--- a/drivers/base/regmap/regmap-debugfs.c
+++ b/drivers/base/regmap/regmap-debugfs.c
@@ -636,6 +636,17 @@ void regmap_debugfs_init(struct regmap *map)
&regmap_cache_bypass_fops);
}
+ /*
+ * This could interfere with driver operation. Therefore, don't provide
+ * any real compile time configuration option for this feature. One will
+ * have to modify the source code directly in order to use it.
+ */
+#undef REGMAP_ALLOW_FORCE_WRITE_FIELD_DEBUGFS
+#ifdef REGMAP_ALLOW_FORCE_WRITE_FIELD_DEBUGFS
+ debugfs_create_bool("force_write_field", 0600, map->debugfs,
+ &map->force_write_field);
+#endif
+
next = rb_first(&map->range_tree);
while (next) {
range_node = rb_entry(next, struct regmap_range_node, node);
diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c
index b99bb2369fff..ced0dcf86e0b 100644
--- a/drivers/base/regmap/regmap-irq.c
+++ b/drivers/base/regmap/regmap-irq.c
@@ -30,9 +30,6 @@ struct regmap_irq_chip_data {
int irq;
int wake_count;
- unsigned int mask_base;
- unsigned int unmask_base;
-
void *status_reg_buf;
unsigned int *main_status_buf;
unsigned int *status_buf;
@@ -41,7 +38,6 @@ struct regmap_irq_chip_data {
unsigned int *wake_buf;
unsigned int *type_buf;
unsigned int *type_buf_def;
- unsigned int **virt_buf;
unsigned int **config_buf;
unsigned int irq_reg_stride;
@@ -114,25 +110,22 @@ static void regmap_irq_sync_unlock(struct irq_data *data)
* suppress pointless writes.
*/
for (i = 0; i < d->chip->num_regs; i++) {
- if (d->mask_base) {
- if (d->chip->handle_mask_sync)
- d->chip->handle_mask_sync(d->map, i,
- d->mask_buf_def[i],
- d->mask_buf[i],
- d->chip->irq_drv_data);
- else {
- reg = d->get_irq_reg(d, d->mask_base, i);
- ret = regmap_update_bits(d->map, reg,
- d->mask_buf_def[i],
- d->mask_buf[i]);
- if (ret)
- dev_err(d->map->dev, "Failed to sync masks in %x\n",
- reg);
- }
+ if (d->chip->handle_mask_sync)
+ d->chip->handle_mask_sync(i, d->mask_buf_def[i],
+ d->mask_buf[i],
+ d->chip->irq_drv_data);
+
+ if (d->chip->mask_base && !d->chip->handle_mask_sync) {
+ reg = d->get_irq_reg(d, d->chip->mask_base, i);
+ ret = regmap_update_bits(d->map, reg,
+ d->mask_buf_def[i],
+ d->mask_buf[i]);
+ if (ret)
+ dev_err(d->map->dev, "Failed to sync masks in %x\n", reg);
}
- if (d->unmask_base) {
- reg = d->get_irq_reg(d, d->unmask_base, i);
+ if (d->chip->unmask_base && !d->chip->handle_mask_sync) {
+ reg = d->get_irq_reg(d, d->chip->unmask_base, i);
ret = regmap_update_bits(d->map, reg,
d->mask_buf_def[i], ~d->mask_buf[i]);
if (ret)
@@ -183,34 +176,6 @@ static void regmap_irq_sync_unlock(struct irq_data *data)
}
}
- /* Don't update the type bits if we're using mask bits for irq type. */
- if (!d->chip->type_in_mask) {
- for (i = 0; i < d->chip->num_type_reg; i++) {
- if (!d->type_buf_def[i])
- continue;
- reg = d->get_irq_reg(d, d->chip->type_base, i);
- ret = regmap_update_bits(d->map, reg,
- d->type_buf_def[i], d->type_buf[i]);
- if (ret != 0)
- dev_err(d->map->dev, "Failed to sync type in %x\n",
- reg);
- }
- }
-
- if (d->chip->num_virt_regs) {
- for (i = 0; i < d->chip->num_virt_regs; i++) {
- for (j = 0; j < d->chip->num_regs; j++) {
- reg = d->get_irq_reg(d, d->chip->virt_reg_base[i],
- j);
- ret = regmap_write(map, reg, d->virt_buf[i][j]);
- if (ret != 0)
- dev_err(d->map->dev,
- "Failed to write virt 0x%x: %d\n",
- reg, ret);
- }
- }
- }
-
for (i = 0; i < d->chip->num_config_bases; i++) {
for (j = 0; j < d->chip->num_config_regs; j++) {
reg = d->get_irq_reg(d, d->chip->config_base[i], j);
@@ -289,41 +254,9 @@ static int regmap_irq_set_type(struct irq_data *data, unsigned int type)
reg = t->type_reg_offset / map->reg_stride;
- if (t->type_reg_mask)
- d->type_buf[reg] &= ~t->type_reg_mask;
- else
- d->type_buf[reg] &= ~(t->type_falling_val |
- t->type_rising_val |
- t->type_level_low_val |
- t->type_level_high_val);
- switch (type) {
- case IRQ_TYPE_EDGE_FALLING:
- d->type_buf[reg] |= t->type_falling_val;
- break;
-
- case IRQ_TYPE_EDGE_RISING:
- d->type_buf[reg] |= t->type_rising_val;
- break;
-
- case IRQ_TYPE_EDGE_BOTH:
- d->type_buf[reg] |= (t->type_falling_val |
- t->type_rising_val);
- break;
-
- case IRQ_TYPE_LEVEL_HIGH:
- d->type_buf[reg] |= t->type_level_high_val;
- break;
-
- case IRQ_TYPE_LEVEL_LOW:
- d->type_buf[reg] |= t->type_level_low_val;
- break;
- default:
- return -EINVAL;
- }
-
- if (d->chip->set_type_virt) {
- ret = d->chip->set_type_virt(d->virt_buf, type, data->hwirq,
- reg);
+ if (d->chip->type_in_mask) {
+ ret = regmap_irq_set_type_config_simple(&d->type_buf, type,
+ irq_data, reg, d->chip->irq_drv_data);
if (ret)
return ret;
}
@@ -390,15 +323,8 @@ static inline int read_sub_irq_data(struct regmap_irq_chip_data *data,
unsigned int offset = subreg->offset[i];
unsigned int index = offset / map->reg_stride;
- if (chip->not_fixed_stride)
- ret = regmap_read(map,
- chip->status_base + offset,
- &data->status_buf[b]);
- else
- ret = regmap_read(map,
- chip->status_base + offset,
- &data->status_buf[index]);
-
+ ret = regmap_read(map, chip->status_base + offset,
+ &data->status_buf[index]);
if (ret)
break;
}
@@ -453,17 +379,7 @@ static irqreturn_t regmap_irq_thread(int irq, void *d)
* sake of simplicity. and add bulk reads only if needed
*/
for (i = 0; i < chip->num_main_regs; i++) {
- /*
- * For not_fixed_stride, don't use ->get_irq_reg().
- * It would produce an incorrect result.
- */
- if (data->chip->not_fixed_stride)
- reg = chip->main_status +
- i * map->reg_stride * data->irq_reg_stride;
- else
- reg = data->get_irq_reg(data,
- chip->main_status, i);
-
+ reg = data->get_irq_reg(data, chip->main_status, i);
ret = regmap_read(map, reg, &data->main_status_buf[i]);
if (ret) {
dev_err(map->dev,
@@ -586,12 +502,12 @@ static irqreturn_t regmap_irq_thread(int irq, void *d)
}
exit:
- if (chip->runtime_pm)
- pm_runtime_put(map->dev);
-
if (chip->handle_post_irq)
chip->handle_post_irq(chip->irq_drv_data);
+ if (chip->runtime_pm)
+ pm_runtime_put(map->dev);
+
if (handled)
return IRQ_HANDLED;
else
@@ -629,20 +545,8 @@ static const struct irq_domain_ops regmap_domain_ops = {
unsigned int regmap_irq_get_irq_reg_linear(struct regmap_irq_chip_data *data,
unsigned int base, int index)
{
- const struct regmap_irq_chip *chip = data->chip;
struct regmap *map = data->map;
- /*
- * FIXME: This is for backward compatibility and should be removed
- * when not_fixed_stride is dropped (it's only used by qcom-pm8008).
- */
- if (chip->not_fixed_stride && chip->sub_reg_offsets) {
- struct regmap_irq_sub_irq_map *subreg;
-
- subreg = &chip->sub_reg_offsets[0];
- return base + subreg->offset[0];
- }
-
return base + index * map->reg_stride * data->irq_reg_stride;
}
EXPORT_SYMBOL_GPL(regmap_irq_get_irq_reg_linear);
@@ -730,8 +634,6 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode,
struct regmap_irq_chip_data *d;
int i;
int ret = -ENOMEM;
- int num_type_reg;
- int num_regs;
u32 reg;
if (chip->num_regs <= 0)
@@ -740,6 +642,9 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode,
if (chip->clear_on_unmask && (chip->ack_base || chip->use_ack))
return -EINVAL;
+ if (chip->mask_base && chip->unmask_base && !chip->mask_unmask_non_inverted)
+ return -EINVAL;
+
for (i = 0; i < chip->num_irqs; i++) {
if (chip->irqs[i].reg_offset % map->reg_stride)
return -EINVAL;
@@ -748,20 +653,6 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode,
return -EINVAL;
}
- if (chip->not_fixed_stride) {
- dev_warn(map->dev, "not_fixed_stride is deprecated; use ->get_irq_reg() instead");
-
- for (i = 0; i < chip->num_regs; i++)
- if (chip->sub_reg_offsets[i].num_regs != 1)
- return -EINVAL;
- }
-
- if (chip->num_type_reg)
- dev_warn(map->dev, "type registers are deprecated; use config registers instead");
-
- if (chip->num_virt_regs || chip->virt_reg_base || chip->set_type_virt)
- dev_warn(map->dev, "virtual registers are deprecated; use config registers instead");
-
if (irq_base) {
irq_base = irq_alloc_descs(irq_base, 0, chip->num_irqs, 0);
if (irq_base < 0) {
@@ -806,43 +697,17 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode,
goto err_alloc;
}
- /*
- * Use num_config_regs if defined, otherwise fall back to num_type_reg
- * to maintain backward compatibility.
- */
- num_type_reg = chip->num_config_regs ? chip->num_config_regs
- : chip->num_type_reg;
- num_regs = chip->type_in_mask ? chip->num_regs : num_type_reg;
- if (num_regs) {
- d->type_buf_def = kcalloc(num_regs,
+ if (chip->type_in_mask) {
+ d->type_buf_def = kcalloc(chip->num_regs,
sizeof(*d->type_buf_def), GFP_KERNEL);
if (!d->type_buf_def)
goto err_alloc;
- d->type_buf = kcalloc(num_regs, sizeof(*d->type_buf),
- GFP_KERNEL);
+ d->type_buf = kcalloc(chip->num_regs, sizeof(*d->type_buf), GFP_KERNEL);
if (!d->type_buf)
goto err_alloc;
}
- if (chip->num_virt_regs) {
- /*
- * Create virt_buf[chip->num_extra_config_regs][chip->num_regs]
- */
- d->virt_buf = kcalloc(chip->num_virt_regs, sizeof(*d->virt_buf),
- GFP_KERNEL);
- if (!d->virt_buf)
- goto err_alloc;
-
- for (i = 0; i < chip->num_virt_regs; i++) {
- d->virt_buf[i] = kcalloc(chip->num_regs,
- sizeof(**d->virt_buf),
- GFP_KERNEL);
- if (!d->virt_buf[i])
- goto err_alloc;
- }
- }
-
if (chip->num_config_bases && chip->num_config_regs) {
/*
* Create config_buf[num_config_bases][num_config_regs]
@@ -868,28 +733,6 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode,
d->chip = chip;
d->irq_base = irq_base;
- if (chip->mask_base && chip->unmask_base &&
- !chip->mask_unmask_non_inverted) {
- /*
- * Chips that specify both mask_base and unmask_base used to
- * get inverted mask behavior by default, with no way to ask
- * for the normal, non-inverted behavior. This "inverted by
- * default" behavior is deprecated, but we have to support it
- * until existing drivers have been fixed.
- *
- * Existing drivers should be updated by swapping mask_base
- * and unmask_base and setting mask_unmask_non_inverted=true.
- * New drivers should always set the flag.
- */
- dev_warn(map->dev, "mask_base and unmask_base are inverted, please fix it");
-
- d->mask_base = chip->unmask_base;
- d->unmask_base = chip->mask_base;
- } else {
- d->mask_base = chip->mask_base;
- d->unmask_base = chip->unmask_base;
- }
-
if (chip->irq_reg_stride)
d->irq_reg_stride = chip->irq_reg_stride;
else
@@ -918,29 +761,28 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode,
for (i = 0; i < chip->num_regs; i++) {
d->mask_buf[i] = d->mask_buf_def[i];
- if (d->mask_base) {
- if (chip->handle_mask_sync) {
- ret = chip->handle_mask_sync(d->map, i,
- d->mask_buf_def[i],
- d->mask_buf[i],
- chip->irq_drv_data);
- if (ret)
- goto err_alloc;
- } else {
- reg = d->get_irq_reg(d, d->mask_base, i);
- ret = regmap_update_bits(d->map, reg,
- d->mask_buf_def[i],
- d->mask_buf[i]);
- if (ret) {
- dev_err(map->dev, "Failed to set masks in 0x%x: %d\n",
- reg, ret);
- goto err_alloc;
- }
+ if (chip->handle_mask_sync) {
+ ret = chip->handle_mask_sync(i, d->mask_buf_def[i],
+ d->mask_buf[i],
+ chip->irq_drv_data);
+ if (ret)
+ goto err_alloc;
+ }
+
+ if (chip->mask_base && !chip->handle_mask_sync) {
+ reg = d->get_irq_reg(d, chip->mask_base, i);
+ ret = regmap_update_bits(d->map, reg,
+ d->mask_buf_def[i],
+ d->mask_buf[i]);
+ if (ret) {
+ dev_err(map->dev, "Failed to set masks in 0x%x: %d\n",
+ reg, ret);
+ goto err_alloc;
}
}
- if (d->unmask_base) {
- reg = d->get_irq_reg(d, d->unmask_base, i);
+ if (chip->unmask_base && !chip->handle_mask_sync) {
+ reg = d->get_irq_reg(d, chip->unmask_base, i);
ret = regmap_update_bits(d->map, reg,
d->mask_buf_def[i], ~d->mask_buf[i]);
if (ret) {
@@ -1014,20 +856,6 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode,
}
}
- if (chip->num_type_reg && !chip->type_in_mask) {
- for (i = 0; i < chip->num_type_reg; ++i) {
- reg = d->get_irq_reg(d, d->chip->type_base, i);
-
- ret = regmap_read(map, reg, &d->type_buf_def[i]);
-
- if (ret) {
- dev_err(map->dev, "Failed to get type defaults at 0x%x: %d\n",
- reg, ret);
- goto err_alloc;
- }
- }
- }
-
if (irq_base)
d->domain = irq_domain_create_legacy(fwnode, chip->num_irqs,
irq_base, 0,
@@ -1064,11 +892,6 @@ err_alloc:
kfree(d->mask_buf);
kfree(d->status_buf);
kfree(d->status_reg_buf);
- if (d->virt_buf) {
- for (i = 0; i < chip->num_virt_regs; i++)
- kfree(d->virt_buf[i]);
- kfree(d->virt_buf);
- }
if (d->config_buf) {
for (i = 0; i < chip->num_config_bases; i++)
kfree(d->config_buf[i]);
diff --git a/drivers/base/regmap/regmap-kunit.c b/drivers/base/regmap/regmap-kunit.c
index f76d41688134..24257aa9004d 100644
--- a/drivers/base/regmap/regmap-kunit.c
+++ b/drivers/base/regmap/regmap-kunit.c
@@ -92,6 +92,11 @@ static struct regmap *gen_regmap(struct regmap_config *config,
return ret;
}
+static bool reg_5_false(struct device *context, unsigned int reg)
+{
+ return reg != 5;
+}
+
static void basic_read_write(struct kunit *test)
{
struct regcache_types *t = (struct regcache_types *)test->param_value;
@@ -191,6 +196,81 @@ static void bulk_read(struct kunit *test)
regmap_exit(map);
}
+static void write_readonly(struct kunit *test)
+{
+ struct regcache_types *t = (struct regcache_types *)test->param_value;
+ struct regmap *map;
+ struct regmap_config config;
+ struct regmap_ram_data *data;
+ unsigned int val;
+ int i;
+
+ config = test_regmap_config;
+ config.cache_type = t->type;
+ config.num_reg_defaults = BLOCK_TEST_SIZE;
+ config.writeable_reg = reg_5_false;
+
+ map = gen_regmap(&config, &data);
+ KUNIT_ASSERT_FALSE(test, IS_ERR(map));
+ if (IS_ERR(map))
+ return;
+
+ get_random_bytes(&val, sizeof(val));
+
+ for (i = 0; i < BLOCK_TEST_SIZE; i++)
+ data->written[i] = false;
+
+ /* Change the value of all registers, readonly should fail */
+ for (i = 0; i < BLOCK_TEST_SIZE; i++)
+ KUNIT_EXPECT_EQ(test, i != 5, regmap_write(map, i, val) == 0);
+
+ /* Did that match what we see on the device? */
+ for (i = 0; i < BLOCK_TEST_SIZE; i++)
+ KUNIT_EXPECT_EQ(test, i != 5, data->written[i]);
+
+ regmap_exit(map);
+}
+
+static void read_writeonly(struct kunit *test)
+{
+ struct regcache_types *t = (struct regcache_types *)test->param_value;
+ struct regmap *map;
+ struct regmap_config config;
+ struct regmap_ram_data *data;
+ unsigned int val;
+ int i;
+
+ config = test_regmap_config;
+ config.cache_type = t->type;
+ config.readable_reg = reg_5_false;
+
+ map = gen_regmap(&config, &data);
+ KUNIT_ASSERT_FALSE(test, IS_ERR(map));
+ if (IS_ERR(map))
+ return;
+
+ for (i = 0; i < BLOCK_TEST_SIZE; i++)
+ data->read[i] = false;
+
+ /*
+ * Try to read all the registers, the writeonly one should
+ * fail if we aren't using the flat cache.
+ */
+ for (i = 0; i < BLOCK_TEST_SIZE; i++) {
+ if (t->type != REGCACHE_FLAT) {
+ KUNIT_EXPECT_EQ(test, i != 5,
+ regmap_read(map, i, &val) == 0);
+ } else {
+ KUNIT_EXPECT_EQ(test, 0, regmap_read(map, i, &val));
+ }
+ }
+
+ /* Did we trigger a hardware access? */
+ KUNIT_EXPECT_FALSE(test, data->read[5]);
+
+ regmap_exit(map);
+}
+
static void reg_defaults(struct kunit *test)
{
struct regcache_types *t = (struct regcache_types *)test->param_value;
@@ -609,6 +689,47 @@ static void cache_sync_defaults(struct kunit *test)
regmap_exit(map);
}
+static void cache_sync_readonly(struct kunit *test)
+{
+ struct regcache_types *t = (struct regcache_types *)test->param_value;
+ struct regmap *map;
+ struct regmap_config config;
+ struct regmap_ram_data *data;
+ unsigned int val;
+ int i;
+
+ config = test_regmap_config;
+ config.cache_type = t->type;
+ config.writeable_reg = reg_5_false;
+
+ map = gen_regmap(&config, &data);
+ KUNIT_ASSERT_FALSE(test, IS_ERR(map));
+ if (IS_ERR(map))
+ return;
+
+ /* Read all registers to fill the cache */
+ for (i = 0; i < BLOCK_TEST_SIZE; i++)
+ KUNIT_EXPECT_EQ(test, 0, regmap_read(map, i, &val));
+
+ /* Change the value of all registers, readonly should fail */
+ get_random_bytes(&val, sizeof(val));
+ regcache_cache_only(map, true);
+ for (i = 0; i < BLOCK_TEST_SIZE; i++)
+ KUNIT_EXPECT_EQ(test, i != 5, regmap_write(map, i, val) == 0);
+ regcache_cache_only(map, false);
+
+ /* Resync */
+ for (i = 0; i < BLOCK_TEST_SIZE; i++)
+ data->written[i] = false;
+ KUNIT_EXPECT_EQ(test, 0, regcache_sync(map));
+
+ /* Did that match what we see on the device? */
+ for (i = 0; i < BLOCK_TEST_SIZE; i++)
+ KUNIT_EXPECT_EQ(test, i != 5, data->written[i]);
+
+ regmap_exit(map);
+}
+
static void cache_sync_patch(struct kunit *test)
{
struct regcache_types *t = (struct regcache_types *)test->param_value;
@@ -712,10 +833,333 @@ static void cache_drop(struct kunit *test)
regmap_exit(map);
}
+struct raw_test_types {
+ const char *name;
+
+ enum regcache_type cache_type;
+ enum regmap_endian val_endian;
+};
+
+static void raw_to_desc(const struct raw_test_types *t, char *desc)
+{
+ strcpy(desc, t->name);
+}
+
+static const struct raw_test_types raw_types_list[] = {
+ { "none-little", REGCACHE_NONE, REGMAP_ENDIAN_LITTLE },
+ { "none-big", REGCACHE_NONE, REGMAP_ENDIAN_BIG },
+ { "flat-little", REGCACHE_FLAT, REGMAP_ENDIAN_LITTLE },
+ { "flat-big", REGCACHE_FLAT, REGMAP_ENDIAN_BIG },
+ { "rbtree-little", REGCACHE_RBTREE, REGMAP_ENDIAN_LITTLE },
+ { "rbtree-big", REGCACHE_RBTREE, REGMAP_ENDIAN_BIG },
+ { "maple-little", REGCACHE_MAPLE, REGMAP_ENDIAN_LITTLE },
+ { "maple-big", REGCACHE_MAPLE, REGMAP_ENDIAN_BIG },
+};
+
+KUNIT_ARRAY_PARAM(raw_test_types, raw_types_list, raw_to_desc);
+
+static const struct raw_test_types raw_cache_types_list[] = {
+ { "flat-little", REGCACHE_FLAT, REGMAP_ENDIAN_LITTLE },
+ { "flat-big", REGCACHE_FLAT, REGMAP_ENDIAN_BIG },
+ { "rbtree-little", REGCACHE_RBTREE, REGMAP_ENDIAN_LITTLE },
+ { "rbtree-big", REGCACHE_RBTREE, REGMAP_ENDIAN_BIG },
+ { "maple-little", REGCACHE_MAPLE, REGMAP_ENDIAN_LITTLE },
+ { "maple-big", REGCACHE_MAPLE, REGMAP_ENDIAN_BIG },
+};
+
+KUNIT_ARRAY_PARAM(raw_test_cache_types, raw_cache_types_list, raw_to_desc);
+
+static const struct regmap_config raw_regmap_config = {
+ .max_register = BLOCK_TEST_SIZE,
+
+ .reg_format_endian = REGMAP_ENDIAN_LITTLE,
+ .reg_bits = 16,
+ .val_bits = 16,
+};
+
+static struct regmap *gen_raw_regmap(struct regmap_config *config,
+ struct raw_test_types *test_type,
+ struct regmap_ram_data **data)
+{
+ u16 *buf;
+ struct regmap *ret;
+ size_t size = (config->max_register + 1) * config->reg_bits / 8;
+ int i;
+ struct reg_default *defaults;
+
+ config->cache_type = test_type->cache_type;
+ config->val_format_endian = test_type->val_endian;
+
+ buf = kmalloc(size, GFP_KERNEL);
+ if (!buf)
+ return ERR_PTR(-ENOMEM);
+
+ get_random_bytes(buf, size);
+
+ *data = kzalloc(sizeof(**data), GFP_KERNEL);
+ if (!(*data))
+ return ERR_PTR(-ENOMEM);
+ (*data)->vals = (void *)buf;
+
+ config->num_reg_defaults = config->max_register + 1;
+ defaults = kcalloc(config->num_reg_defaults,
+ sizeof(struct reg_default),
+ GFP_KERNEL);
+ if (!defaults)
+ return ERR_PTR(-ENOMEM);
+ config->reg_defaults = defaults;
+
+ for (i = 0; i < config->num_reg_defaults; i++) {
+ defaults[i].reg = i;
+ switch (test_type->val_endian) {
+ case REGMAP_ENDIAN_LITTLE:
+ defaults[i].def = le16_to_cpu(buf[i]);
+ break;
+ case REGMAP_ENDIAN_BIG:
+ defaults[i].def = be16_to_cpu(buf[i]);
+ break;
+ default:
+ return ERR_PTR(-EINVAL);
+ }
+ }
+
+ /*
+ * We use the defaults in the tests but they don't make sense
+ * to the core if there's no cache.
+ */
+ if (config->cache_type == REGCACHE_NONE)
+ config->num_reg_defaults = 0;
+
+ ret = regmap_init_raw_ram(config, *data);
+ if (IS_ERR(ret)) {
+ kfree(buf);
+ kfree(*data);
+ }
+
+ return ret;
+}
+
+static void raw_read_defaults_single(struct kunit *test)
+{
+ struct raw_test_types *t = (struct raw_test_types *)test->param_value;
+ struct regmap *map;
+ struct regmap_config config;
+ struct regmap_ram_data *data;
+ unsigned int rval;
+ int i;
+
+ config = raw_regmap_config;
+
+ map = gen_raw_regmap(&config, t, &data);
+ KUNIT_ASSERT_FALSE(test, IS_ERR(map));
+ if (IS_ERR(map))
+ return;
+
+ /* Check that we can read the defaults via the API */
+ for (i = 0; i < config.max_register + 1; i++) {
+ KUNIT_EXPECT_EQ(test, 0, regmap_read(map, i, &rval));
+ KUNIT_EXPECT_EQ(test, config.reg_defaults[i].def, rval);
+ }
+
+ regmap_exit(map);
+}
+
+static void raw_read_defaults(struct kunit *test)
+{
+ struct raw_test_types *t = (struct raw_test_types *)test->param_value;
+ struct regmap *map;
+ struct regmap_config config;
+ struct regmap_ram_data *data;
+ u16 *rval;
+ u16 def;
+ size_t val_len;
+ int i;
+
+ config = raw_regmap_config;
+
+ map = gen_raw_regmap(&config, t, &data);
+ KUNIT_ASSERT_FALSE(test, IS_ERR(map));
+ if (IS_ERR(map))
+ return;
+
+ val_len = sizeof(*rval) * (config.max_register + 1);
+ rval = kmalloc(val_len, GFP_KERNEL);
+ KUNIT_ASSERT_TRUE(test, rval != NULL);
+ if (!rval)
+ return;
+
+ /* Check that we can read the defaults via the API */
+ KUNIT_EXPECT_EQ(test, 0, regmap_raw_read(map, 0, rval, val_len));
+ for (i = 0; i < config.max_register + 1; i++) {
+ def = config.reg_defaults[i].def;
+ if (config.val_format_endian == REGMAP_ENDIAN_BIG) {
+ KUNIT_EXPECT_EQ(test, def, be16_to_cpu(rval[i]));
+ } else {
+ KUNIT_EXPECT_EQ(test, def, le16_to_cpu(rval[i]));
+ }
+ }
+
+ kfree(rval);
+ regmap_exit(map);
+}
+
+static void raw_write_read_single(struct kunit *test)
+{
+ struct raw_test_types *t = (struct raw_test_types *)test->param_value;
+ struct regmap *map;
+ struct regmap_config config;
+ struct regmap_ram_data *data;
+ u16 val;
+ unsigned int rval;
+
+ config = raw_regmap_config;
+
+ map = gen_raw_regmap(&config, t, &data);
+ KUNIT_ASSERT_FALSE(test, IS_ERR(map));
+ if (IS_ERR(map))
+ return;
+
+ get_random_bytes(&val, sizeof(val));
+
+ /* If we write a value to a register we can read it back */
+ KUNIT_EXPECT_EQ(test, 0, regmap_write(map, 0, val));
+ KUNIT_EXPECT_EQ(test, 0, regmap_read(map, 0, &rval));
+ KUNIT_EXPECT_EQ(test, val, rval);
+
+ regmap_exit(map);
+}
+
+static void raw_write(struct kunit *test)
+{
+ struct raw_test_types *t = (struct raw_test_types *)test->param_value;
+ struct regmap *map;
+ struct regmap_config config;
+ struct regmap_ram_data *data;
+ u16 *hw_buf;
+ u16 val[2];
+ unsigned int rval;
+ int i;
+
+ config = raw_regmap_config;
+
+ map = gen_raw_regmap(&config, t, &data);
+ KUNIT_ASSERT_FALSE(test, IS_ERR(map));
+ if (IS_ERR(map))
+ return;
+
+ hw_buf = (u16 *)data->vals;
+
+ get_random_bytes(&val, sizeof(val));
+
+ /* Do a raw write */
+ KUNIT_EXPECT_EQ(test, 0, regmap_raw_write(map, 2, val, sizeof(val)));
+
+ /* We should read back the new values, and defaults for the rest */
+ for (i = 0; i < config.max_register + 1; i++) {
+ KUNIT_EXPECT_EQ(test, 0, regmap_read(map, i, &rval));
+
+ switch (i) {
+ case 2:
+ case 3:
+ if (config.val_format_endian == REGMAP_ENDIAN_BIG) {
+ KUNIT_EXPECT_EQ(test, rval,
+ be16_to_cpu(val[i % 2]));
+ } else {
+ KUNIT_EXPECT_EQ(test, rval,
+ le16_to_cpu(val[i % 2]));
+ }
+ break;
+ default:
+ KUNIT_EXPECT_EQ(test, config.reg_defaults[i].def, rval);
+ break;
+ }
+ }
+
+ /* The values should appear in the "hardware" */
+ KUNIT_EXPECT_MEMEQ(test, &hw_buf[2], val, sizeof(val));
+
+ regmap_exit(map);
+}
+
+static void raw_sync(struct kunit *test)
+{
+ struct raw_test_types *t = (struct raw_test_types *)test->param_value;
+ struct regmap *map;
+ struct regmap_config config;
+ struct regmap_ram_data *data;
+ u16 val[2];
+ u16 *hw_buf;
+ unsigned int rval;
+ int i;
+
+ config = raw_regmap_config;
+
+ map = gen_raw_regmap(&config, t, &data);
+ KUNIT_ASSERT_FALSE(test, IS_ERR(map));
+ if (IS_ERR(map))
+ return;
+
+ hw_buf = (u16 *)data->vals;
+
+ get_random_bytes(&val, sizeof(val));
+
+ /* Do a regular write and a raw write in cache only mode */
+ regcache_cache_only(map, true);
+ KUNIT_EXPECT_EQ(test, 0, regmap_raw_write(map, 2, val, sizeof(val)));
+ if (config.val_format_endian == REGMAP_ENDIAN_BIG)
+ KUNIT_EXPECT_EQ(test, 0, regmap_write(map, 6,
+ be16_to_cpu(val[0])));
+ else
+ KUNIT_EXPECT_EQ(test, 0, regmap_write(map, 6,
+ le16_to_cpu(val[0])));
+
+ /* We should read back the new values, and defaults for the rest */
+ for (i = 0; i < config.max_register + 1; i++) {
+ KUNIT_EXPECT_EQ(test, 0, regmap_read(map, i, &rval));
+
+ switch (i) {
+ case 2:
+ case 3:
+ case 6:
+ if (config.val_format_endian == REGMAP_ENDIAN_BIG) {
+ KUNIT_EXPECT_EQ(test, rval,
+ be16_to_cpu(val[i % 2]));
+ } else {
+ KUNIT_EXPECT_EQ(test, rval,
+ le16_to_cpu(val[i % 2]));
+ }
+ break;
+ default:
+ KUNIT_EXPECT_EQ(test, config.reg_defaults[i].def, rval);
+ break;
+ }
+ }
+
+ /* The values should not appear in the "hardware" */
+ KUNIT_EXPECT_MEMNEQ(test, &hw_buf[2], val, sizeof(val));
+ KUNIT_EXPECT_MEMNEQ(test, &hw_buf[6], val, sizeof(u16));
+
+ for (i = 0; i < config.max_register + 1; i++)
+ data->written[i] = false;
+
+ /* Do the sync */
+ regcache_cache_only(map, false);
+ regcache_mark_dirty(map);
+ KUNIT_EXPECT_EQ(test, 0, regcache_sync(map));
+
+ /* The values should now appear in the "hardware" */
+ KUNIT_EXPECT_MEMEQ(test, &hw_buf[2], val, sizeof(val));
+ KUNIT_EXPECT_MEMEQ(test, &hw_buf[6], val, sizeof(u16));
+
+ regmap_exit(map);
+}
+
static struct kunit_case regmap_test_cases[] = {
KUNIT_CASE_PARAM(basic_read_write, regcache_types_gen_params),
KUNIT_CASE_PARAM(bulk_write, regcache_types_gen_params),
KUNIT_CASE_PARAM(bulk_read, regcache_types_gen_params),
+ KUNIT_CASE_PARAM(write_readonly, regcache_types_gen_params),
+ KUNIT_CASE_PARAM(read_writeonly, regcache_types_gen_params),
KUNIT_CASE_PARAM(reg_defaults, regcache_types_gen_params),
KUNIT_CASE_PARAM(reg_defaults_read_dev, regcache_types_gen_params),
KUNIT_CASE_PARAM(register_patch, regcache_types_gen_params),
@@ -725,8 +1169,15 @@ static struct kunit_case regmap_test_cases[] = {
KUNIT_CASE_PARAM(cache_bypass, real_cache_types_gen_params),
KUNIT_CASE_PARAM(cache_sync, real_cache_types_gen_params),
KUNIT_CASE_PARAM(cache_sync_defaults, real_cache_types_gen_params),
+ KUNIT_CASE_PARAM(cache_sync_readonly, real_cache_types_gen_params),
KUNIT_CASE_PARAM(cache_sync_patch, real_cache_types_gen_params),
KUNIT_CASE_PARAM(cache_drop, sparse_cache_types_gen_params),
+
+ KUNIT_CASE_PARAM(raw_read_defaults_single, raw_test_types_gen_params),
+ KUNIT_CASE_PARAM(raw_read_defaults, raw_test_types_gen_params),
+ KUNIT_CASE_PARAM(raw_write_read_single, raw_test_types_gen_params),
+ KUNIT_CASE_PARAM(raw_write, raw_test_types_gen_params),
+ KUNIT_CASE_PARAM(raw_sync, raw_test_cache_types_gen_params),
{}
};
diff --git a/drivers/base/regmap/regmap-mmio.c b/drivers/base/regmap/regmap-mmio.c
index 3ccdd86a97e7..8132b5c101c4 100644
--- a/drivers/base/regmap/regmap-mmio.c
+++ b/drivers/base/regmap/regmap-mmio.c
@@ -448,7 +448,7 @@ static struct regmap_mmio_context *regmap_mmio_gen_context(struct device *dev,
if (min_stride < 0)
return ERR_PTR(min_stride);
- if (config->reg_stride < min_stride)
+ if (config->reg_stride && config->reg_stride < min_stride)
return ERR_PTR(-EINVAL);
if (config->use_relaxed_mmio && config->io_port)
diff --git a/drivers/base/regmap/regmap-raw-ram.c b/drivers/base/regmap/regmap-raw-ram.c
new file mode 100644
index 000000000000..c9b800885f3b
--- /dev/null
+++ b/drivers/base/regmap/regmap-raw-ram.c
@@ -0,0 +1,133 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Register map access API - Memory region with raw access
+//
+// This is intended for testing only
+//
+// Copyright (c) 2023, Arm Ltd
+
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+#include <linux/swab.h>
+
+#include "internal.h"
+
+static unsigned int decode_reg(enum regmap_endian endian, const void *reg)
+{
+ const u16 *r = reg;
+
+ if (endian == REGMAP_ENDIAN_BIG)
+ return be16_to_cpu(*r);
+ else
+ return le16_to_cpu(*r);
+}
+
+static int regmap_raw_ram_gather_write(void *context,
+ const void *reg, size_t reg_len,
+ const void *val, size_t val_len)
+{
+ struct regmap_ram_data *data = context;
+ unsigned int r;
+ u16 *our_buf = (u16 *)data->vals;
+ int i;
+
+ if (reg_len != 2)
+ return -EINVAL;
+ if (val_len % 2)
+ return -EINVAL;
+
+ r = decode_reg(data->reg_endian, reg);
+ memcpy(&our_buf[r], val, val_len);
+
+ for (i = 0; i < val_len / 2; i++)
+ data->written[r + i] = true;
+
+ return 0;
+}
+
+static int regmap_raw_ram_write(void *context, const void *data, size_t count)
+{
+ return regmap_raw_ram_gather_write(context, data, 2,
+ data + 2, count - 2);
+}
+
+static int regmap_raw_ram_read(void *context,
+ const void *reg, size_t reg_len,
+ void *val, size_t val_len)
+{
+ struct regmap_ram_data *data = context;
+ unsigned int r;
+ u16 *our_buf = (u16 *)data->vals;
+ int i;
+
+ if (reg_len != 2)
+ return -EINVAL;
+ if (val_len % 2)
+ return -EINVAL;
+
+ r = decode_reg(data->reg_endian, reg);
+ memcpy(val, &our_buf[r], val_len);
+
+ for (i = 0; i < val_len / 2; i++)
+ data->read[r + i] = true;
+
+ return 0;
+}
+
+static void regmap_raw_ram_free_context(void *context)
+{
+ struct regmap_ram_data *data = context;
+
+ kfree(data->vals);
+ kfree(data->read);
+ kfree(data->written);
+ kfree(data);
+}
+
+static const struct regmap_bus regmap_raw_ram = {
+ .fast_io = true,
+ .write = regmap_raw_ram_write,
+ .gather_write = regmap_raw_ram_gather_write,
+ .read = regmap_raw_ram_read,
+ .free_context = regmap_raw_ram_free_context,
+};
+
+struct regmap *__regmap_init_raw_ram(const struct regmap_config *config,
+ struct regmap_ram_data *data,
+ struct lock_class_key *lock_key,
+ const char *lock_name)
+{
+ struct regmap *map;
+
+ if (config->reg_bits != 16)
+ return ERR_PTR(-EINVAL);
+
+ if (!config->max_register) {
+ pr_crit("No max_register specified for RAM regmap\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ data->read = kcalloc(sizeof(bool), config->max_register + 1,
+ GFP_KERNEL);
+ if (!data->read)
+ return ERR_PTR(-ENOMEM);
+
+ data->written = kcalloc(sizeof(bool), config->max_register + 1,
+ GFP_KERNEL);
+ if (!data->written)
+ return ERR_PTR(-ENOMEM);
+
+ data->reg_endian = config->reg_format_endian;
+
+ map = __regmap_init(NULL, &regmap_raw_ram, data, config,
+ lock_key, lock_name);
+
+ return map;
+}
+EXPORT_SYMBOL_GPL(__regmap_init_raw_ram);
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/base/regmap/regmap-spi-avmm.c b/drivers/base/regmap/regmap-spi-avmm.c
index 4c2b94b3e30b..6af692844c19 100644
--- a/drivers/base/regmap/regmap-spi-avmm.c
+++ b/drivers/base/regmap/regmap-spi-avmm.c
@@ -660,7 +660,7 @@ static const struct regmap_bus regmap_spi_avmm_bus = {
.reg_format_endian_default = REGMAP_ENDIAN_NATIVE,
.val_format_endian_default = REGMAP_ENDIAN_NATIVE,
.max_raw_read = SPI_AVMM_VAL_SIZE * MAX_READ_CNT,
- .max_raw_write = SPI_AVMM_VAL_SIZE * MAX_WRITE_CNT,
+ .max_raw_write = SPI_AVMM_REG_SIZE + SPI_AVMM_VAL_SIZE * MAX_WRITE_CNT,
.free_context = spi_avmm_bridge_ctx_free,
};
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index fa2d3fba6ac9..89a7f1c459c1 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -2983,6 +2983,11 @@ int regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
size_t chunk_count, chunk_bytes;
size_t chunk_regs = val_count;
+ if (!map->cache_bypass && map->cache_only) {
+ ret = -EBUSY;
+ goto out;
+ }
+
if (!map->read) {
ret = -ENOTSUPP;
goto out;
@@ -3078,18 +3083,19 @@ int regmap_noinc_read(struct regmap *map, unsigned int reg,
goto out_unlock;
}
+ /*
+ * We have not defined the FIFO semantics for cache, as the
+ * cache is just one value deep. Should we return the last
+ * written value? Just avoid this by always reading the FIFO
+ * even when using cache. Cache only will not work.
+ */
+ if (!map->cache_bypass && map->cache_only) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+
/* Use the accelerated operation if we can */
if (map->bus->reg_noinc_read) {
- /*
- * We have not defined the FIFO semantics for cache, as the
- * cache is just one value deep. Should we return the last
- * written value? Just avoid this by always reading the FIFO
- * even when using cache. Cache only will not work.
- */
- if (map->cache_only) {
- ret = -EBUSY;
- goto out_unlock;
- }
ret = regmap_noinc_readwrite(map, reg, val, val_len, false);
goto out_unlock;
}
@@ -3273,7 +3279,7 @@ static int _regmap_update_bits(struct regmap *map, unsigned int reg,
tmp = orig & ~mask;
tmp |= val & mask;
- if (force_write || (tmp != orig)) {
+ if (force_write || (tmp != orig) || map->force_write_field) {
ret = _regmap_write(map, reg, tmp);
if (ret == 0 && change)
*change = true;
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index 4c8b2ba579ee..e460c9799d9f 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1532,7 +1532,7 @@ static int fd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
return 0;
}
-static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode,
+static int fd_locked_ioctl(struct block_device *bdev, blk_mode_t mode,
unsigned int cmd, unsigned long param)
{
struct amiga_floppy_struct *p = bdev->bd_disk->private_data;
@@ -1607,7 +1607,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode,
return 0;
}
-static int fd_ioctl(struct block_device *bdev, fmode_t mode,
+static int fd_ioctl(struct block_device *bdev, blk_mode_t mode,
unsigned int cmd, unsigned long param)
{
int ret;
@@ -1654,10 +1654,10 @@ static void fd_probe(int dev)
* /dev/PS0 etc), and disallows simultaneous access to the same
* drive with different device numbers.
*/
-static int floppy_open(struct block_device *bdev, fmode_t mode)
+static int floppy_open(struct gendisk *disk, blk_mode_t mode)
{
- int drive = MINOR(bdev->bd_dev) & 3;
- int system = (MINOR(bdev->bd_dev) & 4) >> 2;
+ int drive = disk->first_minor & 3;
+ int system = (disk->first_minor & 4) >> 2;
int old_dev;
unsigned long flags;
@@ -1673,10 +1673,9 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
mutex_unlock(&amiflop_mutex);
return -ENXIO;
}
-
- if (mode & (FMODE_READ|FMODE_WRITE)) {
- bdev_check_media_change(bdev);
- if (mode & FMODE_WRITE) {
+ if (mode & (BLK_OPEN_READ | BLK_OPEN_WRITE)) {
+ disk_check_media_change(disk);
+ if (mode & BLK_OPEN_WRITE) {
int wrprot;
get_fdc(drive);
@@ -1691,7 +1690,6 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
}
}
}
-
local_irq_save(flags);
fd_ref[drive]++;
fd_device[drive] = system;
@@ -1709,7 +1707,7 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
return 0;
}
-static void floppy_release(struct gendisk *disk, fmode_t mode)
+static void floppy_release(struct gendisk *disk)
{
struct amiga_floppy_struct *p = disk->private_data;
int drive = p - unit;
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 128722cf6c3c..cf6883756155 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -204,9 +204,9 @@ aoedisk_rm_debugfs(struct aoedev *d)
}
static int
-aoeblk_open(struct block_device *bdev, fmode_t mode)
+aoeblk_open(struct gendisk *disk, blk_mode_t mode)
{
- struct aoedev *d = bdev->bd_disk->private_data;
+ struct aoedev *d = disk->private_data;
ulong flags;
if (!virt_addr_valid(d)) {
@@ -232,7 +232,7 @@ aoeblk_open(struct block_device *bdev, fmode_t mode)
}
static void
-aoeblk_release(struct gendisk *disk, fmode_t mode)
+aoeblk_release(struct gendisk *disk)
{
struct aoedev *d = disk->private_data;
ulong flags;
@@ -285,7 +285,7 @@ aoeblk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
}
static int
-aoeblk_ioctl(struct block_device *bdev, fmode_t mode, uint cmd, ulong arg)
+aoeblk_ioctl(struct block_device *bdev, blk_mode_t mode, uint cmd, ulong arg)
{
struct aoedev *d;
diff --git a/drivers/block/aoe/aoechr.c b/drivers/block/aoe/aoechr.c
index 4c666f72203f..a42c4bcc85ba 100644
--- a/drivers/block/aoe/aoechr.c
+++ b/drivers/block/aoe/aoechr.c
@@ -49,7 +49,7 @@ static int emsgs_head_idx, emsgs_tail_idx;
static struct completion emsgs_comp;
static spinlock_t emsgs_lock;
static int nblocked_emsgs_readers;
-static struct class *aoe_class;
+
static struct aoe_chardev chardevs[] = {
{ MINOR_ERR, "err" },
{ MINOR_DISCOVER, "discover" },
@@ -58,6 +58,16 @@ static struct aoe_chardev chardevs[] = {
{ MINOR_FLUSH, "flush" },
};
+static char *aoe_devnode(const struct device *dev, umode_t *mode)
+{
+ return kasprintf(GFP_KERNEL, "etherd/%s", dev_name(dev));
+}
+
+static const struct class aoe_class = {
+ .name = "aoe",
+ .devnode = aoe_devnode,
+};
+
static int
discover(void)
{
@@ -273,11 +283,6 @@ static const struct file_operations aoe_fops = {
.llseek = noop_llseek,
};
-static char *aoe_devnode(const struct device *dev, umode_t *mode)
-{
- return kasprintf(GFP_KERNEL, "etherd/%s", dev_name(dev));
-}
-
int __init
aoechr_init(void)
{
@@ -290,15 +295,14 @@ aoechr_init(void)
}
init_completion(&emsgs_comp);
spin_lock_init(&emsgs_lock);
- aoe_class = class_create("aoe");
- if (IS_ERR(aoe_class)) {
+ n = class_register(&aoe_class);
+ if (n) {
unregister_chrdev(AOE_MAJOR, "aoechr");
- return PTR_ERR(aoe_class);
+ return n;
}
- aoe_class->devnode = aoe_devnode;
for (i = 0; i < ARRAY_SIZE(chardevs); ++i)
- device_create(aoe_class, NULL,
+ device_create(&aoe_class, NULL,
MKDEV(AOE_MAJOR, chardevs[i].minor), NULL,
chardevs[i].name);
@@ -311,8 +315,8 @@ aoechr_exit(void)
int i;
for (i = 0; i < ARRAY_SIZE(chardevs); ++i)
- device_destroy(aoe_class, MKDEV(AOE_MAJOR, chardevs[i].minor));
- class_destroy(aoe_class);
+ device_destroy(&aoe_class, MKDEV(AOE_MAJOR, chardevs[i].minor));
+ class_unregister(&aoe_class);
unregister_chrdev(AOE_MAJOR, "aoechr");
}
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index 9deb4df6bdb8..cd738cab725f 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -442,13 +442,13 @@ static void fd_times_out(struct timer_list *unused);
static void finish_fdc( void );
static void finish_fdc_done( int dummy );
static void setup_req_params( int drive );
-static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
- cmd, unsigned long param);
+static int fd_locked_ioctl(struct block_device *bdev, blk_mode_t mode,
+ unsigned int cmd, unsigned long param);
static void fd_probe( int drive );
static int fd_test_drive_present( int drive );
static void config_types( void );
-static int floppy_open(struct block_device *bdev, fmode_t mode);
-static void floppy_release(struct gendisk *disk, fmode_t mode);
+static int floppy_open(struct gendisk *disk, blk_mode_t mode);
+static void floppy_release(struct gendisk *disk);
/************************* End of Prototypes **************************/
@@ -1581,7 +1581,7 @@ out:
return BLK_STS_OK;
}
-static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode,
+static int fd_locked_ioctl(struct block_device *bdev, blk_mode_t mode,
unsigned int cmd, unsigned long param)
{
struct gendisk *disk = bdev->bd_disk;
@@ -1760,15 +1760,15 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode,
/* invalidate the buffer track to force a reread */
BufferDrive = -1;
set_bit(drive, &fake_change);
- if (bdev_check_media_change(bdev))
- floppy_revalidate(bdev->bd_disk);
+ if (disk_check_media_change(disk))
+ floppy_revalidate(disk);
return 0;
default:
return -EINVAL;
}
}
-static int fd_ioctl(struct block_device *bdev, fmode_t mode,
+static int fd_ioctl(struct block_device *bdev, blk_mode_t mode,
unsigned int cmd, unsigned long arg)
{
int ret;
@@ -1915,32 +1915,31 @@ static void __init config_types( void )
* drive with different device numbers.
*/
-static int floppy_open(struct block_device *bdev, fmode_t mode)
+static int floppy_open(struct gendisk *disk, blk_mode_t mode)
{
- struct atari_floppy_struct *p = bdev->bd_disk->private_data;
- int type = MINOR(bdev->bd_dev) >> 2;
+ struct atari_floppy_struct *p = disk->private_data;
+ int type = disk->first_minor >> 2;
DPRINT(("fd_open: type=%d\n",type));
if (p->ref && p->type != type)
return -EBUSY;
- if (p->ref == -1 || (p->ref && mode & FMODE_EXCL))
+ if (p->ref == -1 || (p->ref && mode & BLK_OPEN_EXCL))
return -EBUSY;
-
- if (mode & FMODE_EXCL)
+ if (mode & BLK_OPEN_EXCL)
p->ref = -1;
else
p->ref++;
p->type = type;
- if (mode & FMODE_NDELAY)
+ if (mode & BLK_OPEN_NDELAY)
return 0;
- if (mode & (FMODE_READ|FMODE_WRITE)) {
- if (bdev_check_media_change(bdev))
- floppy_revalidate(bdev->bd_disk);
- if (mode & FMODE_WRITE) {
+ if (mode & (BLK_OPEN_READ | BLK_OPEN_WRITE)) {
+ if (disk_check_media_change(disk))
+ floppy_revalidate(disk);
+ if (mode & BLK_OPEN_WRITE) {
if (p->wpstat) {
if (p->ref < 0)
p->ref = 0;
@@ -1953,18 +1952,18 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
return 0;
}
-static int floppy_unlocked_open(struct block_device *bdev, fmode_t mode)
+static int floppy_unlocked_open(struct gendisk *disk, blk_mode_t mode)
{
int ret;
mutex_lock(&ataflop_mutex);
- ret = floppy_open(bdev, mode);
+ ret = floppy_open(disk, mode);
mutex_unlock(&ataflop_mutex);
return ret;
}
-static void floppy_release(struct gendisk *disk, fmode_t mode)
+static void floppy_release(struct gendisk *disk)
{
struct atari_floppy_struct *p = disk->private_data;
mutex_lock(&ataflop_mutex);
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index bcad9b926b0c..970bd6ff38c4 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -19,7 +19,7 @@
#include <linux/highmem.h>
#include <linux/mutex.h>
#include <linux/pagemap.h>
-#include <linux/radix-tree.h>
+#include <linux/xarray.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/backing-dev.h>
@@ -28,7 +28,7 @@
#include <linux/uaccess.h>
/*
- * Each block ramdisk device has a radix_tree brd_pages of pages that stores
+ * Each block ramdisk device has a xarray brd_pages of pages that stores
* the pages containing the block device's contents. A brd page's ->index is
* its offset in PAGE_SIZE units. This is similar to, but in no way connected
* with, the kernel's pagecache or buffer cache (which sit above our block
@@ -40,11 +40,9 @@ struct brd_device {
struct list_head brd_list;
/*
- * Backing store of pages and lock to protect it. This is the contents
- * of the block device.
+ * Backing store of pages. This is the contents of the block device.
*/
- spinlock_t brd_lock;
- struct radix_tree_root brd_pages;
+ struct xarray brd_pages;
u64 brd_nr_pages;
};
@@ -56,21 +54,8 @@ static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector)
pgoff_t idx;
struct page *page;
- /*
- * The page lifetime is protected by the fact that we have opened the
- * device node -- brd pages will never be deleted under us, so we
- * don't need any further locking or refcounting.
- *
- * This is strictly true for the radix-tree nodes as well (ie. we
- * don't actually need the rcu_read_lock()), however that is not a
- * documented feature of the radix-tree API so it is better to be
- * safe here (we don't have total exclusion from radix tree updates
- * here, only deletes).
- */
- rcu_read_lock();
idx = sector >> PAGE_SECTORS_SHIFT; /* sector to page index */
- page = radix_tree_lookup(&brd->brd_pages, idx);
- rcu_read_unlock();
+ page = xa_load(&brd->brd_pages, idx);
BUG_ON(page && page->index != idx);
@@ -83,7 +68,7 @@ static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector)
static int brd_insert_page(struct brd_device *brd, sector_t sector, gfp_t gfp)
{
pgoff_t idx;
- struct page *page;
+ struct page *page, *cur;
int ret = 0;
page = brd_lookup_page(brd, sector);
@@ -94,71 +79,42 @@ static int brd_insert_page(struct brd_device *brd, sector_t sector, gfp_t gfp)
if (!page)
return -ENOMEM;
- if (radix_tree_maybe_preload(gfp)) {
- __free_page(page);
- return -ENOMEM;
- }
+ xa_lock(&brd->brd_pages);
- spin_lock(&brd->brd_lock);
idx = sector >> PAGE_SECTORS_SHIFT;
page->index = idx;
- if (radix_tree_insert(&brd->brd_pages, idx, page)) {
+
+ cur = __xa_cmpxchg(&brd->brd_pages, idx, NULL, page, gfp);
+
+ if (unlikely(cur)) {
__free_page(page);
- page = radix_tree_lookup(&brd->brd_pages, idx);
- if (!page)
- ret = -ENOMEM;
- else if (page->index != idx)
+ ret = xa_err(cur);
+ if (!ret && (cur->index != idx))
ret = -EIO;
} else {
brd->brd_nr_pages++;
}
- spin_unlock(&brd->brd_lock);
- radix_tree_preload_end();
+ xa_unlock(&brd->brd_pages);
+
return ret;
}
/*
- * Free all backing store pages and radix tree. This must only be called when
+ * Free all backing store pages and xarray. This must only be called when
* there are no other users of the device.
*/
-#define FREE_BATCH 16
static void brd_free_pages(struct brd_device *brd)
{
- unsigned long pos = 0;
- struct page *pages[FREE_BATCH];
- int nr_pages;
-
- do {
- int i;
-
- nr_pages = radix_tree_gang_lookup(&brd->brd_pages,
- (void **)pages, pos, FREE_BATCH);
-
- for (i = 0; i < nr_pages; i++) {
- void *ret;
-
- BUG_ON(pages[i]->index < pos);
- pos = pages[i]->index;
- ret = radix_tree_delete(&brd->brd_pages, pos);
- BUG_ON(!ret || ret != pages[i]);
- __free_page(pages[i]);
- }
-
- pos++;
+ struct page *page;
+ pgoff_t idx;
- /*
- * It takes 3.4 seconds to remove 80GiB ramdisk.
- * So, we need cond_resched to avoid stalling the CPU.
- */
+ xa_for_each(&brd->brd_pages, idx, page) {
+ __free_page(page);
cond_resched();
+ }
- /*
- * This assumes radix_tree_gang_lookup always returns as
- * many pages as possible. If the radix-tree code changes,
- * so will this have to.
- */
- } while (nr_pages == FREE_BATCH);
+ xa_destroy(&brd->brd_pages);
}
/*
@@ -372,8 +328,7 @@ static int brd_alloc(int i)
brd->brd_number = i;
list_add_tail(&brd->brd_list, &brd_devices);
- spin_lock_init(&brd->brd_lock);
- INIT_RADIX_TREE(&brd->brd_pages, GFP_ATOMIC);
+ xa_init(&brd->brd_pages);
snprintf(buf, DISK_NAME_LEN, "ram%d", i);
if (!IS_ERR_OR_NULL(brd_debugfs_dir))
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index 6ac8c54b44c7..85ca000a0564 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -1043,9 +1043,7 @@ static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_ho
bio = bio_alloc_bioset(device->ldev->md_bdev, 1, op, GFP_NOIO,
&drbd_md_io_bio_set);
bio->bi_iter.bi_sector = on_disk_sector;
- /* bio_add_page of a single page to an empty bio will always succeed,
- * according to api. Do we want to assert that? */
- bio_add_page(bio, page, len, 0);
+ __bio_add_page(bio, page, len, 0);
bio->bi_private = ctx;
bio->bi_end_io = drbd_bm_endio;
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 83987e7a5ef2..79ab532aabaf 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -37,7 +37,6 @@
#include <linux/notifier.h>
#include <linux/kthread.h>
#include <linux/workqueue.h>
-#define __KERNEL_SYSCALLS__
#include <linux/unistd.h>
#include <linux/vmalloc.h>
#include <linux/sched/signal.h>
@@ -50,8 +49,8 @@
#include "drbd_debugfs.h"
static DEFINE_MUTEX(drbd_main_mutex);
-static int drbd_open(struct block_device *bdev, fmode_t mode);
-static void drbd_release(struct gendisk *gd, fmode_t mode);
+static int drbd_open(struct gendisk *disk, blk_mode_t mode);
+static void drbd_release(struct gendisk *gd);
static void md_sync_timer_fn(struct timer_list *t);
static int w_bitmap_io(struct drbd_work *w, int unused);
@@ -1540,6 +1539,8 @@ static int _drbd_send_page(struct drbd_peer_device *peer_device, struct page *pa
int offset, size_t size, unsigned msg_flags)
{
struct socket *socket = peer_device->connection->data.socket;
+ struct msghdr msg = { .msg_flags = msg_flags, };
+ struct bio_vec bvec;
int len = size;
int err = -EIO;
@@ -1549,15 +1550,17 @@ static int _drbd_send_page(struct drbd_peer_device *peer_device, struct page *pa
* put_page(); and would cause either a VM_BUG directly, or
* __page_cache_release a page that would actually still be referenced
* by someone, leading to some obscure delayed Oops somewhere else. */
- if (drbd_disable_sendpage || !sendpage_ok(page))
- return _drbd_no_send_page(peer_device, page, offset, size, msg_flags);
+ if (!drbd_disable_sendpage && sendpage_ok(page))
+ msg.msg_flags |= MSG_NOSIGNAL | MSG_SPLICE_PAGES;
- msg_flags |= MSG_NOSIGNAL;
drbd_update_congested(peer_device->connection);
do {
int sent;
- sent = socket->ops->sendpage(socket, page, offset, len, msg_flags);
+ bvec_set_page(&bvec, page, offset, len);
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, len);
+
+ sent = sock_sendmsg(socket, &msg);
if (sent <= 0) {
if (sent == -EAGAIN) {
if (we_should_drop_the_connection(peer_device->connection, socket))
@@ -1883,9 +1886,9 @@ int drbd_send_all(struct drbd_connection *connection, struct socket *sock, void
return 0;
}
-static int drbd_open(struct block_device *bdev, fmode_t mode)
+static int drbd_open(struct gendisk *disk, blk_mode_t mode)
{
- struct drbd_device *device = bdev->bd_disk->private_data;
+ struct drbd_device *device = disk->private_data;
unsigned long flags;
int rv = 0;
@@ -1895,7 +1898,7 @@ static int drbd_open(struct block_device *bdev, fmode_t mode)
* and no race with updating open_cnt */
if (device->state.role != R_PRIMARY) {
- if (mode & FMODE_WRITE)
+ if (mode & BLK_OPEN_WRITE)
rv = -EROFS;
else if (!drbd_allow_oos)
rv = -EMEDIUMTYPE;
@@ -1909,9 +1912,10 @@ static int drbd_open(struct block_device *bdev, fmode_t mode)
return rv;
}
-static void drbd_release(struct gendisk *gd, fmode_t mode)
+static void drbd_release(struct gendisk *gd)
{
struct drbd_device *device = gd->private_data;
+
mutex_lock(&drbd_main_mutex);
device->open_cnt--;
mutex_unlock(&drbd_main_mutex);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 1a5d3d72d91d..cddae6f4b00f 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1640,8 +1640,8 @@ static struct block_device *open_backing_dev(struct drbd_device *device,
struct block_device *bdev;
int err = 0;
- bdev = blkdev_get_by_path(bdev_path,
- FMODE_READ | FMODE_WRITE | FMODE_EXCL, claim_ptr);
+ bdev = blkdev_get_by_path(bdev_path, BLK_OPEN_READ | BLK_OPEN_WRITE,
+ claim_ptr, NULL);
if (IS_ERR(bdev)) {
drbd_err(device, "open(\"%s\") failed with %ld\n",
bdev_path, PTR_ERR(bdev));
@@ -1653,7 +1653,7 @@ static struct block_device *open_backing_dev(struct drbd_device *device,
err = bd_link_disk_holder(bdev, device->vdisk);
if (err) {
- blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+ blkdev_put(bdev, claim_ptr);
drbd_err(device, "bd_link_disk_holder(\"%s\", ...) failed with %d\n",
bdev_path, err);
bdev = ERR_PTR(err);
@@ -1695,13 +1695,13 @@ static int open_backing_devices(struct drbd_device *device,
}
static void close_backing_dev(struct drbd_device *device, struct block_device *bdev,
- bool do_bd_unlink)
+ void *claim_ptr, bool do_bd_unlink)
{
if (!bdev)
return;
if (do_bd_unlink)
bd_unlink_disk_holder(bdev, device->vdisk);
- blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+ blkdev_put(bdev, claim_ptr);
}
void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev *ldev)
@@ -1709,8 +1709,11 @@ void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev *
if (ldev == NULL)
return;
- close_backing_dev(device, ldev->md_bdev, ldev->md_bdev != ldev->backing_bdev);
- close_backing_dev(device, ldev->backing_bdev, true);
+ close_backing_dev(device, ldev->md_bdev,
+ ldev->md.meta_dev_idx < 0 ?
+ (void *)device : (void *)drbd_m_holder,
+ ldev->md_bdev != ldev->backing_bdev);
+ close_backing_dev(device, ldev->backing_bdev, device, true);
kfree(ldev->disk_conf);
kfree(ldev);
@@ -2126,8 +2129,11 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
fail:
conn_reconfig_done(connection);
if (nbc) {
- close_backing_dev(device, nbc->md_bdev, nbc->md_bdev != nbc->backing_bdev);
- close_backing_dev(device, nbc->backing_bdev, true);
+ close_backing_dev(device, nbc->md_bdev,
+ nbc->disk_conf->meta_dev_idx < 0 ?
+ (void *)device : (void *)drbd_m_holder,
+ nbc->md_bdev != nbc->backing_bdev);
+ close_backing_dev(device, nbc->backing_bdev, device, true);
kfree(nbc);
}
kfree(new_disk_conf);
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 8c2bc47de473..0c9f54197768 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -27,7 +27,6 @@
#include <uapi/linux/sched/types.h>
#include <linux/sched/signal.h>
#include <linux/pkt_sched.h>
-#define __KERNEL_SYSCALLS__
#include <linux/unistd.h>
#include <linux/vmalloc.h>
#include <linux/random.h>
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index cec2c20f5e59..2db9b186b977 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -402,7 +402,7 @@ static struct floppy_drive_struct drive_state[N_DRIVE];
static struct floppy_write_errors write_errors[N_DRIVE];
static struct timer_list motor_off_timer[N_DRIVE];
static struct blk_mq_tag_set tag_sets[N_DRIVE];
-static struct block_device *opened_bdev[N_DRIVE];
+static struct gendisk *opened_disk[N_DRIVE];
static DEFINE_MUTEX(open_lock);
static struct floppy_raw_cmd *raw_cmd, default_raw_cmd;
@@ -3210,13 +3210,13 @@ static int floppy_raw_cmd_ioctl(int type, int drive, int cmd,
#endif
-static int invalidate_drive(struct block_device *bdev)
+static int invalidate_drive(struct gendisk *disk)
{
/* invalidate the buffer track to force a reread */
- set_bit((long)bdev->bd_disk->private_data, &fake_change);
+ set_bit((long)disk->private_data, &fake_change);
process_fd_request();
- if (bdev_check_media_change(bdev))
- floppy_revalidate(bdev->bd_disk);
+ if (disk_check_media_change(disk))
+ floppy_revalidate(disk);
return 0;
}
@@ -3251,10 +3251,11 @@ static int set_geometry(unsigned int cmd, struct floppy_struct *g,
floppy_type[type].size + 1;
process_fd_request();
for (cnt = 0; cnt < N_DRIVE; cnt++) {
- struct block_device *bdev = opened_bdev[cnt];
- if (!bdev || ITYPE(drive_state[cnt].fd_device) != type)
+ struct gendisk *disk = opened_disk[cnt];
+
+ if (!disk || ITYPE(drive_state[cnt].fd_device) != type)
continue;
- __invalidate_device(bdev, true);
+ __invalidate_device(disk->part0, true);
}
mutex_unlock(&open_lock);
} else {
@@ -3287,7 +3288,7 @@ static int set_geometry(unsigned int cmd, struct floppy_struct *g,
drive_state[current_drive].maxtrack ||
((user_params[drive].sect ^ oldStretch) &
(FD_SWAPSIDES | FD_SECTBASEMASK)))
- invalidate_drive(bdev);
+ invalidate_drive(bdev->bd_disk);
else
process_fd_request();
}
@@ -3393,8 +3394,8 @@ static bool valid_floppy_drive_params(const short autodetect[FD_AUTODETECT_SIZE]
return true;
}
-static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
- unsigned long param)
+static int fd_locked_ioctl(struct block_device *bdev, blk_mode_t mode,
+ unsigned int cmd, unsigned long param)
{
int drive = (long)bdev->bd_disk->private_data;
int type = ITYPE(drive_state[drive].fd_device);
@@ -3427,7 +3428,8 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
return ret;
/* permission checks */
- if (((cmd & 0x40) && !(mode & (FMODE_WRITE | FMODE_WRITE_IOCTL))) ||
+ if (((cmd & 0x40) &&
+ !(mode & (BLK_OPEN_WRITE | BLK_OPEN_WRITE_IOCTL))) ||
((cmd & 0x80) && !capable(CAP_SYS_ADMIN)))
return -EPERM;
@@ -3464,7 +3466,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
current_type[drive] = NULL;
floppy_sizes[drive] = MAX_DISK_SIZE << 1;
drive_state[drive].keep_data = 0;
- return invalidate_drive(bdev);
+ return invalidate_drive(bdev->bd_disk);
case FDSETPRM:
case FDDEFPRM:
return set_geometry(cmd, &inparam.g, drive, type, bdev);
@@ -3503,7 +3505,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
case FDFLUSH:
if (lock_fdc(drive))
return -EINTR;
- return invalidate_drive(bdev);
+ return invalidate_drive(bdev->bd_disk);
case FDSETEMSGTRESH:
drive_params[drive].max_errors.reporting = (unsigned short)(param & 0x0f);
return 0;
@@ -3565,7 +3567,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
return 0;
}
-static int fd_ioctl(struct block_device *bdev, fmode_t mode,
+static int fd_ioctl(struct block_device *bdev, blk_mode_t mode,
unsigned int cmd, unsigned long param)
{
int ret;
@@ -3653,8 +3655,8 @@ struct compat_floppy_write_errors {
#define FDGETFDCSTAT32 _IOR(2, 0x15, struct compat_floppy_fdc_state)
#define FDWERRORGET32 _IOR(2, 0x17, struct compat_floppy_write_errors)
-static int compat_set_geometry(struct block_device *bdev, fmode_t mode, unsigned int cmd,
- struct compat_floppy_struct __user *arg)
+static int compat_set_geometry(struct block_device *bdev, blk_mode_t mode,
+ unsigned int cmd, struct compat_floppy_struct __user *arg)
{
struct floppy_struct v;
int drive, type;
@@ -3663,7 +3665,7 @@ static int compat_set_geometry(struct block_device *bdev, fmode_t mode, unsigned
BUILD_BUG_ON(offsetof(struct floppy_struct, name) !=
offsetof(struct compat_floppy_struct, name));
- if (!(mode & (FMODE_WRITE | FMODE_WRITE_IOCTL)))
+ if (!(mode & (BLK_OPEN_WRITE | BLK_OPEN_WRITE_IOCTL)))
return -EPERM;
memset(&v, 0, sizeof(struct floppy_struct));
@@ -3860,8 +3862,8 @@ static int compat_werrorget(int drive,
return 0;
}
-static int fd_compat_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
- unsigned long param)
+static int fd_compat_ioctl(struct block_device *bdev, blk_mode_t mode,
+ unsigned int cmd, unsigned long param)
{
int drive = (long)bdev->bd_disk->private_data;
switch (cmd) {
@@ -3962,7 +3964,7 @@ static void __init config_types(void)
pr_cont("\n");
}
-static void floppy_release(struct gendisk *disk, fmode_t mode)
+static void floppy_release(struct gendisk *disk)
{
int drive = (long)disk->private_data;
@@ -3973,7 +3975,7 @@ static void floppy_release(struct gendisk *disk, fmode_t mode)
drive_state[drive].fd_ref = 0;
}
if (!drive_state[drive].fd_ref)
- opened_bdev[drive] = NULL;
+ opened_disk[drive] = NULL;
mutex_unlock(&open_lock);
mutex_unlock(&floppy_mutex);
}
@@ -3983,9 +3985,9 @@ static void floppy_release(struct gendisk *disk, fmode_t mode)
* /dev/PS0 etc), and disallows simultaneous access to the same
* drive with different device numbers.
*/
-static int floppy_open(struct block_device *bdev, fmode_t mode)
+static int floppy_open(struct gendisk *disk, blk_mode_t mode)
{
- int drive = (long)bdev->bd_disk->private_data;
+ int drive = (long)disk->private_data;
int old_dev, new_dev;
int try;
int res = -EBUSY;
@@ -3994,7 +3996,7 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
mutex_lock(&floppy_mutex);
mutex_lock(&open_lock);
old_dev = drive_state[drive].fd_device;
- if (opened_bdev[drive] && opened_bdev[drive] != bdev)
+ if (opened_disk[drive] && opened_disk[drive] != disk)
goto out2;
if (!drive_state[drive].fd_ref && (drive_params[drive].flags & FD_BROKEN_DCL)) {
@@ -4004,7 +4006,7 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
drive_state[drive].fd_ref++;
- opened_bdev[drive] = bdev;
+ opened_disk[drive] = disk;
res = -ENXIO;
@@ -4038,7 +4040,7 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
}
}
- new_dev = MINOR(bdev->bd_dev);
+ new_dev = disk->first_minor;
drive_state[drive].fd_device = new_dev;
set_capacity(disks[drive][ITYPE(new_dev)], floppy_sizes[new_dev]);
if (old_dev != -1 && old_dev != new_dev) {
@@ -4048,21 +4050,20 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
if (fdc_state[FDC(drive)].rawcmd == 1)
fdc_state[FDC(drive)].rawcmd = 2;
-
- if (!(mode & FMODE_NDELAY)) {
- if (mode & (FMODE_READ|FMODE_WRITE)) {
+ if (!(mode & BLK_OPEN_NDELAY)) {
+ if (mode & (BLK_OPEN_READ | BLK_OPEN_WRITE)) {
drive_state[drive].last_checked = 0;
clear_bit(FD_OPEN_SHOULD_FAIL_BIT,
&drive_state[drive].flags);
- if (bdev_check_media_change(bdev))
- floppy_revalidate(bdev->bd_disk);
+ if (disk_check_media_change(disk))
+ floppy_revalidate(disk);
if (test_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags))
goto out;
if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &drive_state[drive].flags))
goto out;
}
res = -EROFS;
- if ((mode & FMODE_WRITE) &&
+ if ((mode & BLK_OPEN_WRITE) &&
!test_bit(FD_DISK_WRITABLE_BIT, &drive_state[drive].flags))
goto out;
}
@@ -4073,7 +4074,7 @@ out:
drive_state[drive].fd_ref--;
if (!drive_state[drive].fd_ref)
- opened_bdev[drive] = NULL;
+ opened_disk[drive] = NULL;
out2:
mutex_unlock(&open_lock);
mutex_unlock(&floppy_mutex);
@@ -4147,7 +4148,7 @@ static int __floppy_read_block_0(struct block_device *bdev, int drive)
cbdata.drive = drive;
bio_init(&bio, bdev, &bio_vec, 1, REQ_OP_READ);
- bio_add_page(&bio, page, block_size(bdev), 0);
+ __bio_add_page(&bio, page, block_size(bdev), 0);
bio.bi_iter.bi_sector = 0;
bio.bi_flags |= (1 << BIO_QUIET);
@@ -4203,7 +4204,8 @@ static int floppy_revalidate(struct gendisk *disk)
drive_state[drive].generation++;
if (drive_no_geom(drive)) {
/* auto-sensing */
- res = __floppy_read_block_0(opened_bdev[drive], drive);
+ res = __floppy_read_block_0(opened_disk[drive]->part0,
+ drive);
} else {
if (cf)
poll_drive(false, FD_RAW_NEED_DISK);
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index bc31bb7072a2..37511d2b2caf 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -990,7 +990,7 @@ loop_set_status_from_info(struct loop_device *lo,
return 0;
}
-static int loop_configure(struct loop_device *lo, fmode_t mode,
+static int loop_configure(struct loop_device *lo, blk_mode_t mode,
struct block_device *bdev,
const struct loop_config *config)
{
@@ -1014,8 +1014,8 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
* If we don't hold exclusive handle for the device, upgrade to it
* here to avoid changing device under exclusive owner.
*/
- if (!(mode & FMODE_EXCL)) {
- error = bd_prepare_to_claim(bdev, loop_configure);
+ if (!(mode & BLK_OPEN_EXCL)) {
+ error = bd_prepare_to_claim(bdev, loop_configure, NULL);
if (error)
goto out_putf;
}
@@ -1050,7 +1050,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
if (error)
goto out_unlock;
- if (!(file->f_mode & FMODE_WRITE) || !(mode & FMODE_WRITE) ||
+ if (!(file->f_mode & FMODE_WRITE) || !(mode & BLK_OPEN_WRITE) ||
!file->f_op->write_iter)
lo->lo_flags |= LO_FLAGS_READ_ONLY;
@@ -1116,7 +1116,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
if (partscan)
loop_reread_partitions(lo);
- if (!(mode & FMODE_EXCL))
+ if (!(mode & BLK_OPEN_EXCL))
bd_abort_claiming(bdev, loop_configure);
return 0;
@@ -1124,7 +1124,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
out_unlock:
loop_global_unlock(lo, is_loop);
out_bdev:
- if (!(mode & FMODE_EXCL))
+ if (!(mode & BLK_OPEN_EXCL))
bd_abort_claiming(bdev, loop_configure);
out_putf:
fput(file);
@@ -1528,7 +1528,7 @@ static int lo_simple_ioctl(struct loop_device *lo, unsigned int cmd,
return err;
}
-static int lo_ioctl(struct block_device *bdev, fmode_t mode,
+static int lo_ioctl(struct block_device *bdev, blk_mode_t mode,
unsigned int cmd, unsigned long arg)
{
struct loop_device *lo = bdev->bd_disk->private_data;
@@ -1563,24 +1563,22 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
return loop_clr_fd(lo);
case LOOP_SET_STATUS:
err = -EPERM;
- if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) {
+ if ((mode & BLK_OPEN_WRITE) || capable(CAP_SYS_ADMIN))
err = loop_set_status_old(lo, argp);
- }
break;
case LOOP_GET_STATUS:
return loop_get_status_old(lo, argp);
case LOOP_SET_STATUS64:
err = -EPERM;
- if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) {
+ if ((mode & BLK_OPEN_WRITE) || capable(CAP_SYS_ADMIN))
err = loop_set_status64(lo, argp);
- }
break;
case LOOP_GET_STATUS64:
return loop_get_status64(lo, argp);
case LOOP_SET_CAPACITY:
case LOOP_SET_DIRECT_IO:
case LOOP_SET_BLOCK_SIZE:
- if (!(mode & FMODE_WRITE) && !capable(CAP_SYS_ADMIN))
+ if (!(mode & BLK_OPEN_WRITE) && !capable(CAP_SYS_ADMIN))
return -EPERM;
fallthrough;
default:
@@ -1691,7 +1689,7 @@ loop_get_status_compat(struct loop_device *lo,
return err;
}
-static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
+static int lo_compat_ioctl(struct block_device *bdev, blk_mode_t mode,
unsigned int cmd, unsigned long arg)
{
struct loop_device *lo = bdev->bd_disk->private_data;
@@ -1727,7 +1725,7 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
}
#endif
-static void lo_release(struct gendisk *disk, fmode_t mode)
+static void lo_release(struct gendisk *disk)
{
struct loop_device *lo = disk->private_data;
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 815d77ba6381..b200950e8fb5 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -3041,7 +3041,7 @@ static int rssd_disk_name_format(char *prefix,
* structure pointer.
*/
static int mtip_block_ioctl(struct block_device *dev,
- fmode_t mode,
+ blk_mode_t mode,
unsigned cmd,
unsigned long arg)
{
@@ -3079,7 +3079,7 @@ static int mtip_block_ioctl(struct block_device *dev,
* structure pointer.
*/
static int mtip_block_compat_ioctl(struct block_device *dev,
- fmode_t mode,
+ blk_mode_t mode,
unsigned cmd,
unsigned long arg)
{
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 65ecde3e2a5b..8576d696c7a2 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -1502,7 +1502,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
return -ENOTTY;
}
-static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
+static int nbd_ioctl(struct block_device *bdev, blk_mode_t mode,
unsigned int cmd, unsigned long arg)
{
struct nbd_device *nbd = bdev->bd_disk->private_data;
@@ -1553,13 +1553,13 @@ static struct nbd_config *nbd_alloc_config(void)
return config;
}
-static int nbd_open(struct block_device *bdev, fmode_t mode)
+static int nbd_open(struct gendisk *disk, blk_mode_t mode)
{
struct nbd_device *nbd;
int ret = 0;
mutex_lock(&nbd_index_mutex);
- nbd = bdev->bd_disk->private_data;
+ nbd = disk->private_data;
if (!nbd) {
ret = -ENXIO;
goto out;
@@ -1587,17 +1587,17 @@ static int nbd_open(struct block_device *bdev, fmode_t mode)
refcount_inc(&nbd->refs);
mutex_unlock(&nbd->config_lock);
if (max_part)
- set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state);
+ set_bit(GD_NEED_PART_SCAN, &disk->state);
} else if (nbd_disconnected(nbd->config)) {
if (max_part)
- set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state);
+ set_bit(GD_NEED_PART_SCAN, &disk->state);
}
out:
mutex_unlock(&nbd_index_mutex);
return ret;
}
-static void nbd_release(struct gendisk *disk, fmode_t mode)
+static void nbd_release(struct gendisk *disk)
{
struct nbd_device *nbd = disk->private_data;
@@ -1776,7 +1776,8 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
if (err == -ENOSPC)
err = -EEXIST;
} else {
- err = idr_alloc(&nbd_index_idr, nbd, 0, 0, GFP_KERNEL);
+ err = idr_alloc(&nbd_index_idr, nbd, 0,
+ (MINORMASK >> part_shift) + 1, GFP_KERNEL);
if (err >= 0)
index = err;
}
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index d5d7884cedd4..a1428538bda5 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -46,47 +46,34 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/pktcdvd.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
+#include <linux/backing-dev.h>
#include <linux/compat.h>
-#include <linux/kthread.h>
+#include <linux/debugfs.h>
+#include <linux/device.h>
#include <linux/errno.h>
-#include <linux/spinlock.h>
#include <linux/file.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/miscdevice.h>
#include <linux/freezer.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
#include <linux/mutex.h>
+#include <linux/nospec.h>
+#include <linux/pktcdvd.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
#include <linux/slab.h>
-#include <linux/backing-dev.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+
+#include <scsi/scsi.h>
#include <scsi/scsi_cmnd.h>
#include <scsi/scsi_ioctl.h>
-#include <scsi/scsi.h>
-#include <linux/debugfs.h>
-#include <linux/device.h>
-#include <linux/nospec.h>
-#include <linux/uaccess.h>
-#define DRIVER_NAME "pktcdvd"
+#include <asm/unaligned.h>
-#define pkt_err(pd, fmt, ...) \
- pr_err("%s: " fmt, pd->name, ##__VA_ARGS__)
-#define pkt_notice(pd, fmt, ...) \
- pr_notice("%s: " fmt, pd->name, ##__VA_ARGS__)
-#define pkt_info(pd, fmt, ...) \
- pr_info("%s: " fmt, pd->name, ##__VA_ARGS__)
-
-#define pkt_dbg(level, pd, fmt, ...) \
-do { \
- if (level == 2 && PACKET_DEBUG >= 2) \
- pr_notice("%s: %s():" fmt, \
- pd->name, __func__, ##__VA_ARGS__); \
- else if (level == 1 && PACKET_DEBUG >= 1) \
- pr_notice("%s: " fmt, pd->name, ##__VA_ARGS__); \
-} while (0)
+#define DRIVER_NAME "pktcdvd"
#define MAX_SPEED 0xffff
@@ -107,7 +94,6 @@ static struct dentry *pkt_debugfs_root = NULL; /* /sys/kernel/debug/pktcdvd */
/* forward declaration */
static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev);
static int pkt_remove_dev(dev_t pkt_dev);
-static int pkt_seq_show(struct seq_file *m, void *p);
static sector_t get_zone(sector_t sector, struct pktcdvd_device *pd)
{
@@ -253,15 +239,16 @@ static ssize_t congestion_off_store(struct device *dev,
const char *buf, size_t len)
{
struct pktcdvd_device *pd = dev_get_drvdata(dev);
- int val;
+ int val, ret;
- if (sscanf(buf, "%d", &val) == 1) {
- spin_lock(&pd->lock);
- pd->write_congestion_off = val;
- init_write_congestion_marks(&pd->write_congestion_off,
- &pd->write_congestion_on);
- spin_unlock(&pd->lock);
- }
+ ret = kstrtoint(buf, 10, &val);
+ if (ret)
+ return ret;
+
+ spin_lock(&pd->lock);
+ pd->write_congestion_off = val;
+ init_write_congestion_marks(&pd->write_congestion_off, &pd->write_congestion_on);
+ spin_unlock(&pd->lock);
return len;
}
static DEVICE_ATTR_RW(congestion_off);
@@ -283,15 +270,16 @@ static ssize_t congestion_on_store(struct device *dev,
const char *buf, size_t len)
{
struct pktcdvd_device *pd = dev_get_drvdata(dev);
- int val;
+ int val, ret;
- if (sscanf(buf, "%d", &val) == 1) {
- spin_lock(&pd->lock);
- pd->write_congestion_on = val;
- init_write_congestion_marks(&pd->write_congestion_off,
- &pd->write_congestion_on);
- spin_unlock(&pd->lock);
- }
+ ret = kstrtoint(buf, 10, &val);
+ if (ret)
+ return ret;
+
+ spin_lock(&pd->lock);
+ pd->write_congestion_on = val;
+ init_write_congestion_marks(&pd->write_congestion_off, &pd->write_congestion_on);
+ spin_unlock(&pd->lock);
return len;
}
static DEVICE_ATTR_RW(congestion_on);
@@ -319,7 +307,7 @@ static void pkt_sysfs_dev_new(struct pktcdvd_device *pd)
if (class_is_registered(&class_pktcdvd)) {
pd->dev = device_create_with_groups(&class_pktcdvd, NULL,
MKDEV(0, 0), pd, pkt_groups,
- "%s", pd->name);
+ "%s", pd->disk->disk_name);
if (IS_ERR(pd->dev))
pd->dev = NULL;
}
@@ -349,8 +337,8 @@ static ssize_t device_map_show(const struct class *c, const struct class_attribu
struct pktcdvd_device *pd = pkt_devs[idx];
if (!pd)
continue;
- n += sprintf(data+n, "%s %u:%u %u:%u\n",
- pd->name,
+ n += sysfs_emit_at(data, n, "%s %u:%u %u:%u\n",
+ pd->disk->disk_name,
MAJOR(pd->pkt_dev), MINOR(pd->pkt_dev),
MAJOR(pd->bdev->bd_dev),
MINOR(pd->bdev->bd_dev));
@@ -428,34 +416,92 @@ static void pkt_sysfs_cleanup(void)
*******************************************************************/
-static int pkt_debugfs_seq_show(struct seq_file *m, void *p)
+static void pkt_count_states(struct pktcdvd_device *pd, int *states)
{
- return pkt_seq_show(m, p);
+ struct packet_data *pkt;
+ int i;
+
+ for (i = 0; i < PACKET_NUM_STATES; i++)
+ states[i] = 0;
+
+ spin_lock(&pd->cdrw.active_list_lock);
+ list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) {
+ states[pkt->state]++;
+ }
+ spin_unlock(&pd->cdrw.active_list_lock);
}
-static int pkt_debugfs_fops_open(struct inode *inode, struct file *file)
+static int pkt_seq_show(struct seq_file *m, void *p)
{
- return single_open(file, pkt_debugfs_seq_show, inode->i_private);
-}
+ struct pktcdvd_device *pd = m->private;
+ char *msg;
+ int states[PACKET_NUM_STATES];
-static const struct file_operations debug_fops = {
- .open = pkt_debugfs_fops_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
- .owner = THIS_MODULE,
-};
+ seq_printf(m, "Writer %s mapped to %pg:\n", pd->disk->disk_name, pd->bdev);
+
+ seq_printf(m, "\nSettings:\n");
+ seq_printf(m, "\tpacket size:\t\t%dkB\n", pd->settings.size / 2);
+
+ if (pd->settings.write_type == 0)
+ msg = "Packet";
+ else
+ msg = "Unknown";
+ seq_printf(m, "\twrite type:\t\t%s\n", msg);
+
+ seq_printf(m, "\tpacket type:\t\t%s\n", pd->settings.fp ? "Fixed" : "Variable");
+ seq_printf(m, "\tlink loss:\t\t%d\n", pd->settings.link_loss);
+
+ seq_printf(m, "\ttrack mode:\t\t%d\n", pd->settings.track_mode);
+
+ if (pd->settings.block_mode == PACKET_BLOCK_MODE1)
+ msg = "Mode 1";
+ else if (pd->settings.block_mode == PACKET_BLOCK_MODE2)
+ msg = "Mode 2";
+ else
+ msg = "Unknown";
+ seq_printf(m, "\tblock mode:\t\t%s\n", msg);
+
+ seq_printf(m, "\nStatistics:\n");
+ seq_printf(m, "\tpackets started:\t%lu\n", pd->stats.pkt_started);
+ seq_printf(m, "\tpackets ended:\t\t%lu\n", pd->stats.pkt_ended);
+ seq_printf(m, "\twritten:\t\t%lukB\n", pd->stats.secs_w >> 1);
+ seq_printf(m, "\tread gather:\t\t%lukB\n", pd->stats.secs_rg >> 1);
+ seq_printf(m, "\tread:\t\t\t%lukB\n", pd->stats.secs_r >> 1);
+
+ seq_printf(m, "\nMisc:\n");
+ seq_printf(m, "\treference count:\t%d\n", pd->refcnt);
+ seq_printf(m, "\tflags:\t\t\t0x%lx\n", pd->flags);
+ seq_printf(m, "\tread speed:\t\t%ukB/s\n", pd->read_speed);
+ seq_printf(m, "\twrite speed:\t\t%ukB/s\n", pd->write_speed);
+ seq_printf(m, "\tstart offset:\t\t%lu\n", pd->offset);
+ seq_printf(m, "\tmode page offset:\t%u\n", pd->mode_offset);
+
+ seq_printf(m, "\nQueue state:\n");
+ seq_printf(m, "\tbios queued:\t\t%d\n", pd->bio_queue_size);
+ seq_printf(m, "\tbios pending:\t\t%d\n", atomic_read(&pd->cdrw.pending_bios));
+ seq_printf(m, "\tcurrent sector:\t\t0x%llx\n", pd->current_sector);
+
+ pkt_count_states(pd, states);
+ seq_printf(m, "\tstate:\t\t\ti:%d ow:%d rw:%d ww:%d rec:%d fin:%d\n",
+ states[0], states[1], states[2], states[3], states[4], states[5]);
+
+ seq_printf(m, "\twrite congestion marks:\toff=%d on=%d\n",
+ pd->write_congestion_off,
+ pd->write_congestion_on);
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(pkt_seq);
static void pkt_debugfs_dev_new(struct pktcdvd_device *pd)
{
if (!pkt_debugfs_root)
return;
- pd->dfs_d_root = debugfs_create_dir(pd->name, pkt_debugfs_root);
+ pd->dfs_d_root = debugfs_create_dir(pd->disk->disk_name, pkt_debugfs_root);
if (!pd->dfs_d_root)
return;
- pd->dfs_f_info = debugfs_create_file("info", 0444,
- pd->dfs_d_root, pd, &debug_fops);
+ pd->dfs_f_info = debugfs_create_file("info", 0444, pd->dfs_d_root,
+ pd, &pkt_seq_fops);
}
static void pkt_debugfs_dev_remove(struct pktcdvd_device *pd)
@@ -484,9 +530,11 @@ static void pkt_debugfs_cleanup(void)
static void pkt_bio_finished(struct pktcdvd_device *pd)
{
+ struct device *ddev = disk_to_dev(pd->disk);
+
BUG_ON(atomic_read(&pd->cdrw.pending_bios) <= 0);
if (atomic_dec_and_test(&pd->cdrw.pending_bios)) {
- pkt_dbg(2, pd, "queue empty\n");
+ dev_dbg(ddev, "queue empty\n");
atomic_set(&pd->iosched.attention, 1);
wake_up(&pd->wqueue);
}
@@ -717,15 +765,16 @@ static const char *sense_key_string(__u8 index)
static void pkt_dump_sense(struct pktcdvd_device *pd,
struct packet_command *cgc)
{
+ struct device *ddev = disk_to_dev(pd->disk);
struct scsi_sense_hdr *sshdr = cgc->sshdr;
if (sshdr)
- pkt_err(pd, "%*ph - sense %02x.%02x.%02x (%s)\n",
+ dev_err(ddev, "%*ph - sense %02x.%02x.%02x (%s)\n",
CDROM_PACKET_SIZE, cgc->cmd,
sshdr->sense_key, sshdr->asc, sshdr->ascq,
sense_key_string(sshdr->sense_key));
else
- pkt_err(pd, "%*ph - no sense\n", CDROM_PACKET_SIZE, cgc->cmd);
+ dev_err(ddev, "%*ph - no sense\n", CDROM_PACKET_SIZE, cgc->cmd);
}
/*
@@ -762,10 +811,8 @@ static noinline_for_stack int pkt_set_speed(struct pktcdvd_device *pd,
init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE);
cgc.sshdr = &sshdr;
cgc.cmd[0] = GPCMD_SET_SPEED;
- cgc.cmd[2] = (read_speed >> 8) & 0xff;
- cgc.cmd[3] = read_speed & 0xff;
- cgc.cmd[4] = (write_speed >> 8) & 0xff;
- cgc.cmd[5] = write_speed & 0xff;
+ put_unaligned_be16(read_speed, &cgc.cmd[2]);
+ put_unaligned_be16(write_speed, &cgc.cmd[4]);
ret = pkt_generic_packet(pd, &cgc);
if (ret)
@@ -809,6 +856,7 @@ static void pkt_queue_bio(struct pktcdvd_device *pd, struct bio *bio)
*/
static void pkt_iosched_process_queue(struct pktcdvd_device *pd)
{
+ struct device *ddev = disk_to_dev(pd->disk);
if (atomic_read(&pd->iosched.attention) == 0)
return;
@@ -836,7 +884,7 @@ static void pkt_iosched_process_queue(struct pktcdvd_device *pd)
need_write_seek = 0;
if (need_write_seek && reads_queued) {
if (atomic_read(&pd->cdrw.pending_bios) > 0) {
- pkt_dbg(2, pd, "write, waiting\n");
+ dev_dbg(ddev, "write, waiting\n");
break;
}
pkt_flush_cache(pd);
@@ -845,7 +893,7 @@ static void pkt_iosched_process_queue(struct pktcdvd_device *pd)
} else {
if (!reads_queued && writes_queued) {
if (atomic_read(&pd->cdrw.pending_bios) > 0) {
- pkt_dbg(2, pd, "read, waiting\n");
+ dev_dbg(ddev, "read, waiting\n");
break;
}
pd->iosched.writing = 1;
@@ -892,25 +940,27 @@ static void pkt_iosched_process_queue(struct pktcdvd_device *pd)
*/
static int pkt_set_segment_merging(struct pktcdvd_device *pd, struct request_queue *q)
{
- if ((pd->settings.size << 9) / CD_FRAMESIZE
- <= queue_max_segments(q)) {
+ struct device *ddev = disk_to_dev(pd->disk);
+
+ if ((pd->settings.size << 9) / CD_FRAMESIZE <= queue_max_segments(q)) {
/*
* The cdrom device can handle one segment/frame
*/
clear_bit(PACKET_MERGE_SEGS, &pd->flags);
return 0;
- } else if ((pd->settings.size << 9) / PAGE_SIZE
- <= queue_max_segments(q)) {
+ }
+
+ if ((pd->settings.size << 9) / PAGE_SIZE <= queue_max_segments(q)) {
/*
* We can handle this case at the expense of some extra memory
* copies during write operations
*/
set_bit(PACKET_MERGE_SEGS, &pd->flags);
return 0;
- } else {
- pkt_err(pd, "cdrom max_phys_segments too small\n");
- return -EIO;
}
+
+ dev_err(ddev, "cdrom max_phys_segments too small\n");
+ return -EIO;
}
static void pkt_end_io_read(struct bio *bio)
@@ -919,9 +969,8 @@ static void pkt_end_io_read(struct bio *bio)
struct pktcdvd_device *pd = pkt->pd;
BUG_ON(!pd);
- pkt_dbg(2, pd, "bio=%p sec0=%llx sec=%llx err=%d\n",
- bio, (unsigned long long)pkt->sector,
- (unsigned long long)bio->bi_iter.bi_sector, bio->bi_status);
+ dev_dbg(disk_to_dev(pd->disk), "bio=%p sec0=%llx sec=%llx err=%d\n",
+ bio, pkt->sector, bio->bi_iter.bi_sector, bio->bi_status);
if (bio->bi_status)
atomic_inc(&pkt->io_errors);
@@ -939,7 +988,7 @@ static void pkt_end_io_packet_write(struct bio *bio)
struct pktcdvd_device *pd = pkt->pd;
BUG_ON(!pd);
- pkt_dbg(2, pd, "id=%d, err=%d\n", pkt->id, bio->bi_status);
+ dev_dbg(disk_to_dev(pd->disk), "id=%d, err=%d\n", pkt->id, bio->bi_status);
pd->stats.pkt_ended++;
@@ -955,6 +1004,7 @@ static void pkt_end_io_packet_write(struct bio *bio)
*/
static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
{
+ struct device *ddev = disk_to_dev(pd->disk);
int frames_read = 0;
struct bio *bio;
int f;
@@ -983,8 +1033,7 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
spin_unlock(&pkt->lock);
if (pkt->cache_valid) {
- pkt_dbg(2, pd, "zone %llx cached\n",
- (unsigned long long)pkt->sector);
+ dev_dbg(ddev, "zone %llx cached\n", pkt->sector);
goto out_account;
}
@@ -1005,8 +1054,8 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
p = (f * CD_FRAMESIZE) / PAGE_SIZE;
offset = (f * CD_FRAMESIZE) % PAGE_SIZE;
- pkt_dbg(2, pd, "Adding frame %d, page:%p offs:%d\n",
- f, pkt->pages[p], offset);
+ dev_dbg(ddev, "Adding frame %d, page:%p offs:%d\n", f,
+ pkt->pages[p], offset);
if (!bio_add_page(bio, pkt->pages[p], CD_FRAMESIZE, offset))
BUG();
@@ -1016,8 +1065,7 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
}
out_account:
- pkt_dbg(2, pd, "need %d frames for zone %llx\n",
- frames_read, (unsigned long long)pkt->sector);
+ dev_dbg(ddev, "need %d frames for zone %llx\n", frames_read, pkt->sector);
pd->stats.pkt_started++;
pd->stats.secs_rg += frames_read * (CD_FRAMESIZE >> 9);
}
@@ -1051,17 +1099,17 @@ static void pkt_put_packet_data(struct pktcdvd_device *pd, struct packet_data *p
}
}
-static inline void pkt_set_state(struct packet_data *pkt, enum packet_data_state state)
+static inline void pkt_set_state(struct device *ddev, struct packet_data *pkt,
+ enum packet_data_state state)
{
-#if PACKET_DEBUG > 1
static const char *state_name[] = {
"IDLE", "WAITING", "READ_WAIT", "WRITE_WAIT", "RECOVERY", "FINISHED"
};
enum packet_data_state old_state = pkt->state;
- pkt_dbg(2, pd, "pkt %2d : s=%6llx %s -> %s\n",
- pkt->id, (unsigned long long)pkt->sector,
- state_name[old_state], state_name[state]);
-#endif
+
+ dev_dbg(ddev, "pkt %2d : s=%6llx %s -> %s\n",
+ pkt->id, pkt->sector, state_name[old_state], state_name[state]);
+
pkt->state = state;
}
@@ -1071,6 +1119,7 @@ static inline void pkt_set_state(struct packet_data *pkt, enum packet_data_state
*/
static int pkt_handle_queue(struct pktcdvd_device *pd)
{
+ struct device *ddev = disk_to_dev(pd->disk);
struct packet_data *pkt, *p;
struct bio *bio = NULL;
sector_t zone = 0; /* Suppress gcc warning */
@@ -1080,7 +1129,7 @@ static int pkt_handle_queue(struct pktcdvd_device *pd)
atomic_set(&pd->scan_queue, 0);
if (list_empty(&pd->cdrw.pkt_free_list)) {
- pkt_dbg(2, pd, "no pkt\n");
+ dev_dbg(ddev, "no pkt\n");
return 0;
}
@@ -1117,7 +1166,7 @@ try_next_bio:
}
spin_unlock(&pd->lock);
if (!bio) {
- pkt_dbg(2, pd, "no bio\n");
+ dev_dbg(ddev, "no bio\n");
return 0;
}
@@ -1133,12 +1182,13 @@ try_next_bio:
* to this packet.
*/
spin_lock(&pd->lock);
- pkt_dbg(2, pd, "looking for zone %llx\n", (unsigned long long)zone);
+ dev_dbg(ddev, "looking for zone %llx\n", zone);
while ((node = pkt_rbtree_find(pd, zone)) != NULL) {
+ sector_t tmp = get_zone(node->bio->bi_iter.bi_sector, pd);
+
bio = node->bio;
- pkt_dbg(2, pd, "found zone=%llx\n", (unsigned long long)
- get_zone(bio->bi_iter.bi_sector, pd));
- if (get_zone(bio->bi_iter.bi_sector, pd) != zone)
+ dev_dbg(ddev, "found zone=%llx\n", tmp);
+ if (tmp != zone)
break;
pkt_rbtree_erase(pd, node);
spin_lock(&pkt->lock);
@@ -1157,7 +1207,7 @@ try_next_bio:
spin_unlock(&pd->lock);
pkt->sleep_time = max(PACKET_WAIT_TIME, 1);
- pkt_set_state(pkt, PACKET_WAITING_STATE);
+ pkt_set_state(ddev, pkt, PACKET_WAITING_STATE);
atomic_set(&pkt->run_sm, 1);
spin_lock(&pd->cdrw.active_list_lock);
@@ -1209,6 +1259,7 @@ static void bio_list_copy_data(struct bio *dst, struct bio *src)
*/
static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
{
+ struct device *ddev = disk_to_dev(pd->disk);
int f;
bio_init(pkt->w_bio, pd->bdev, pkt->w_bio->bi_inline_vecs, pkt->frames,
@@ -1225,7 +1276,7 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
if (!bio_add_page(pkt->w_bio, page, CD_FRAMESIZE, offset))
BUG();
}
- pkt_dbg(2, pd, "vcnt=%d\n", pkt->w_bio->bi_vcnt);
+ dev_dbg(ddev, "vcnt=%d\n", pkt->w_bio->bi_vcnt);
/*
* Fill-in bvec with data from orig_bios.
@@ -1233,11 +1284,10 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
spin_lock(&pkt->lock);
bio_list_copy_data(pkt->w_bio, pkt->orig_bios.head);
- pkt_set_state(pkt, PACKET_WRITE_WAIT_STATE);
+ pkt_set_state(ddev, pkt, PACKET_WRITE_WAIT_STATE);
spin_unlock(&pkt->lock);
- pkt_dbg(2, pd, "Writing %d frames for zone %llx\n",
- pkt->write_size, (unsigned long long)pkt->sector);
+ dev_dbg(ddev, "Writing %d frames for zone %llx\n", pkt->write_size, pkt->sector);
if (test_bit(PACKET_MERGE_SEGS, &pd->flags) || (pkt->write_size < pkt->frames))
pkt->cache_valid = 1;
@@ -1265,7 +1315,9 @@ static void pkt_finish_packet(struct packet_data *pkt, blk_status_t status)
static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data *pkt)
{
- pkt_dbg(2, pd, "pkt %d\n", pkt->id);
+ struct device *ddev = disk_to_dev(pd->disk);
+
+ dev_dbg(ddev, "pkt %d\n", pkt->id);
for (;;) {
switch (pkt->state) {
@@ -1275,7 +1327,7 @@ static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data
pkt->sleep_time = 0;
pkt_gather_data(pd, pkt);
- pkt_set_state(pkt, PACKET_READ_WAIT_STATE);
+ pkt_set_state(ddev, pkt, PACKET_READ_WAIT_STATE);
break;
case PACKET_READ_WAIT_STATE:
@@ -1283,7 +1335,7 @@ static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data
return;
if (atomic_read(&pkt->io_errors) > 0) {
- pkt_set_state(pkt, PACKET_RECOVERY_STATE);
+ pkt_set_state(ddev, pkt, PACKET_RECOVERY_STATE);
} else {
pkt_start_write(pd, pkt);
}
@@ -1294,15 +1346,15 @@ static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data
return;
if (!pkt->w_bio->bi_status) {
- pkt_set_state(pkt, PACKET_FINISHED_STATE);
+ pkt_set_state(ddev, pkt, PACKET_FINISHED_STATE);
} else {
- pkt_set_state(pkt, PACKET_RECOVERY_STATE);
+ pkt_set_state(ddev, pkt, PACKET_RECOVERY_STATE);
}
break;
case PACKET_RECOVERY_STATE:
- pkt_dbg(2, pd, "No recovery possible\n");
- pkt_set_state(pkt, PACKET_FINISHED_STATE);
+ dev_dbg(ddev, "No recovery possible\n");
+ pkt_set_state(ddev, pkt, PACKET_FINISHED_STATE);
break;
case PACKET_FINISHED_STATE:
@@ -1318,6 +1370,7 @@ static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data
static void pkt_handle_packets(struct pktcdvd_device *pd)
{
+ struct device *ddev = disk_to_dev(pd->disk);
struct packet_data *pkt, *next;
/*
@@ -1338,28 +1391,13 @@ static void pkt_handle_packets(struct pktcdvd_device *pd)
if (pkt->state == PACKET_FINISHED_STATE) {
list_del(&pkt->list);
pkt_put_packet_data(pd, pkt);
- pkt_set_state(pkt, PACKET_IDLE_STATE);
+ pkt_set_state(ddev, pkt, PACKET_IDLE_STATE);
atomic_set(&pd->scan_queue, 1);
}
}
spin_unlock(&pd->cdrw.active_list_lock);
}
-static void pkt_count_states(struct pktcdvd_device *pd, int *states)
-{
- struct packet_data *pkt;
- int i;
-
- for (i = 0; i < PACKET_NUM_STATES; i++)
- states[i] = 0;
-
- spin_lock(&pd->cdrw.active_list_lock);
- list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) {
- states[pkt->state]++;
- }
- spin_unlock(&pd->cdrw.active_list_lock);
-}
-
/*
* kcdrwd is woken up when writes have been queued for one of our
* registered devices
@@ -1367,7 +1405,9 @@ static void pkt_count_states(struct pktcdvd_device *pd, int *states)
static int kcdrwd(void *foobar)
{
struct pktcdvd_device *pd = foobar;
+ struct device *ddev = disk_to_dev(pd->disk);
struct packet_data *pkt;
+ int states[PACKET_NUM_STATES];
long min_sleep_time, residue;
set_user_nice(current, MIN_NICE);
@@ -1398,13 +1438,9 @@ static int kcdrwd(void *foobar)
goto work_to_do;
/* Otherwise, go to sleep */
- if (PACKET_DEBUG > 1) {
- int states[PACKET_NUM_STATES];
- pkt_count_states(pd, states);
- pkt_dbg(2, pd, "i:%d ow:%d rw:%d ww:%d rec:%d fin:%d\n",
- states[0], states[1], states[2],
- states[3], states[4], states[5]);
- }
+ pkt_count_states(pd, states);
+ dev_dbg(ddev, "i:%d ow:%d rw:%d ww:%d rec:%d fin:%d\n",
+ states[0], states[1], states[2], states[3], states[4], states[5]);
min_sleep_time = MAX_SCHEDULE_TIMEOUT;
list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) {
@@ -1412,9 +1448,9 @@ static int kcdrwd(void *foobar)
min_sleep_time = pkt->sleep_time;
}
- pkt_dbg(2, pd, "sleeping\n");
+ dev_dbg(ddev, "sleeping\n");
residue = schedule_timeout(min_sleep_time);
- pkt_dbg(2, pd, "wake up\n");
+ dev_dbg(ddev, "wake up\n");
/* make swsusp happy with our thread */
try_to_freeze();
@@ -1462,7 +1498,7 @@ work_to_do:
static void pkt_print_settings(struct pktcdvd_device *pd)
{
- pkt_info(pd, "%s packets, %u blocks, Mode-%c disc\n",
+ dev_info(disk_to_dev(pd->disk), "%s packets, %u blocks, Mode-%c disc\n",
pd->settings.fp ? "Fixed" : "Variable",
pd->settings.size >> 2,
pd->settings.block_mode == 8 ? '1' : '2');
@@ -1474,8 +1510,7 @@ static int pkt_mode_sense(struct pktcdvd_device *pd, struct packet_command *cgc,
cgc->cmd[0] = GPCMD_MODE_SENSE_10;
cgc->cmd[2] = page_code | (page_control << 6);
- cgc->cmd[7] = cgc->buflen >> 8;
- cgc->cmd[8] = cgc->buflen & 0xff;
+ put_unaligned_be16(cgc->buflen, &cgc->cmd[7]);
cgc->data_direction = CGC_DATA_READ;
return pkt_generic_packet(pd, cgc);
}
@@ -1486,8 +1521,7 @@ static int pkt_mode_select(struct pktcdvd_device *pd, struct packet_command *cgc
memset(cgc->buffer, 0, 2);
cgc->cmd[0] = GPCMD_MODE_SELECT_10;
cgc->cmd[1] = 0x10; /* PF */
- cgc->cmd[7] = cgc->buflen >> 8;
- cgc->cmd[8] = cgc->buflen & 0xff;
+ put_unaligned_be16(cgc->buflen, &cgc->cmd[7]);
cgc->data_direction = CGC_DATA_WRITE;
return pkt_generic_packet(pd, cgc);
}
@@ -1528,8 +1562,7 @@ static int pkt_get_track_info(struct pktcdvd_device *pd, __u16 track, __u8 type,
init_cdrom_command(&cgc, ti, 8, CGC_DATA_READ);
cgc.cmd[0] = GPCMD_READ_TRACK_RZONE_INFO;
cgc.cmd[1] = type & 3;
- cgc.cmd[4] = (track & 0xff00) >> 8;
- cgc.cmd[5] = track & 0xff;
+ put_unaligned_be16(track, &cgc.cmd[4]);
cgc.cmd[8] = 8;
cgc.quiet = 1;
@@ -1590,6 +1623,7 @@ static noinline_for_stack int pkt_get_last_written(struct pktcdvd_device *pd,
*/
static noinline_for_stack int pkt_set_write_settings(struct pktcdvd_device *pd)
{
+ struct device *ddev = disk_to_dev(pd->disk);
struct packet_command cgc;
struct scsi_sense_hdr sshdr;
write_param_page *wp;
@@ -1609,8 +1643,8 @@ static noinline_for_stack int pkt_set_write_settings(struct pktcdvd_device *pd)
return ret;
}
- size = 2 + ((buffer[0] << 8) | (buffer[1] & 0xff));
- pd->mode_offset = (buffer[6] << 8) | (buffer[7] & 0xff);
+ size = 2 + get_unaligned_be16(&buffer[0]);
+ pd->mode_offset = get_unaligned_be16(&buffer[6]);
if (size > sizeof(buffer))
size = sizeof(buffer);
@@ -1656,7 +1690,7 @@ static noinline_for_stack int pkt_set_write_settings(struct pktcdvd_device *pd)
/*
* paranoia
*/
- pkt_err(pd, "write mode wrong %d\n", wp->data_block_type);
+ dev_err(ddev, "write mode wrong %d\n", wp->data_block_type);
return 1;
}
wp->packet_size = cpu_to_be32(pd->settings.size >> 2);
@@ -1677,6 +1711,8 @@ static noinline_for_stack int pkt_set_write_settings(struct pktcdvd_device *pd)
*/
static int pkt_writable_track(struct pktcdvd_device *pd, track_information *ti)
{
+ struct device *ddev = disk_to_dev(pd->disk);
+
switch (pd->mmc3_profile) {
case 0x1a: /* DVD+RW */
case 0x12: /* DVD-RAM */
@@ -1701,7 +1737,7 @@ static int pkt_writable_track(struct pktcdvd_device *pd, track_information *ti)
if (ti->rt == 1 && ti->blank == 0)
return 1;
- pkt_err(pd, "bad state %d-%d-%d\n", ti->rt, ti->blank, ti->packet);
+ dev_err(ddev, "bad state %d-%d-%d\n", ti->rt, ti->blank, ti->packet);
return 0;
}
@@ -1710,6 +1746,8 @@ static int pkt_writable_track(struct pktcdvd_device *pd, track_information *ti)
*/
static int pkt_writable_disc(struct pktcdvd_device *pd, disc_information *di)
{
+ struct device *ddev = disk_to_dev(pd->disk);
+
switch (pd->mmc3_profile) {
case 0x0a: /* CD-RW */
case 0xffff: /* MMC3 not supported */
@@ -1719,8 +1757,7 @@ static int pkt_writable_disc(struct pktcdvd_device *pd, disc_information *di)
case 0x12: /* DVD-RAM */
return 1;
default:
- pkt_dbg(2, pd, "Wrong disc profile (%x)\n",
- pd->mmc3_profile);
+ dev_dbg(ddev, "Wrong disc profile (%x)\n", pd->mmc3_profile);
return 0;
}
@@ -1729,22 +1766,22 @@ static int pkt_writable_disc(struct pktcdvd_device *pd, disc_information *di)
* but i'm not sure, should we leave this to user apps? probably.
*/
if (di->disc_type == 0xff) {
- pkt_notice(pd, "unknown disc - no track?\n");
+ dev_notice(ddev, "unknown disc - no track?\n");
return 0;
}
if (di->disc_type != 0x20 && di->disc_type != 0) {
- pkt_err(pd, "wrong disc type (%x)\n", di->disc_type);
+ dev_err(ddev, "wrong disc type (%x)\n", di->disc_type);
return 0;
}
if (di->erasable == 0) {
- pkt_notice(pd, "disc not erasable\n");
+ dev_err(ddev, "disc not erasable\n");
return 0;
}
if (di->border_status == PACKET_SESSION_RESERVED) {
- pkt_err(pd, "can't write to last track (reserved)\n");
+ dev_err(ddev, "can't write to last track (reserved)\n");
return 0;
}
@@ -1753,6 +1790,7 @@ static int pkt_writable_disc(struct pktcdvd_device *pd, disc_information *di)
static noinline_for_stack int pkt_probe_settings(struct pktcdvd_device *pd)
{
+ struct device *ddev = disk_to_dev(pd->disk);
struct packet_command cgc;
unsigned char buf[12];
disc_information di;
@@ -1763,14 +1801,14 @@ static noinline_for_stack int pkt_probe_settings(struct pktcdvd_device *pd)
cgc.cmd[0] = GPCMD_GET_CONFIGURATION;
cgc.cmd[8] = 8;
ret = pkt_generic_packet(pd, &cgc);
- pd->mmc3_profile = ret ? 0xffff : buf[6] << 8 | buf[7];
+ pd->mmc3_profile = ret ? 0xffff : get_unaligned_be16(&buf[6]);
memset(&di, 0, sizeof(disc_information));
memset(&ti, 0, sizeof(track_information));
ret = pkt_get_disc_info(pd, &di);
if (ret) {
- pkt_err(pd, "failed get_disc\n");
+ dev_err(ddev, "failed get_disc\n");
return ret;
}
@@ -1782,12 +1820,12 @@ static noinline_for_stack int pkt_probe_settings(struct pktcdvd_device *pd)
track = 1; /* (di.last_track_msb << 8) | di.last_track_lsb; */
ret = pkt_get_track_info(pd, track, 1, &ti);
if (ret) {
- pkt_err(pd, "failed get_track\n");
+ dev_err(ddev, "failed get_track\n");
return ret;
}
if (!pkt_writable_track(pd, &ti)) {
- pkt_err(pd, "can't write to this track\n");
+ dev_err(ddev, "can't write to this track\n");
return -EROFS;
}
@@ -1797,11 +1835,11 @@ static noinline_for_stack int pkt_probe_settings(struct pktcdvd_device *pd)
*/
pd->settings.size = be32_to_cpu(ti.fixed_packet_size) << 2;
if (pd->settings.size == 0) {
- pkt_notice(pd, "detected zero packet size!\n");
+ dev_notice(ddev, "detected zero packet size!\n");
return -ENXIO;
}
if (pd->settings.size > PACKET_MAX_SECTORS) {
- pkt_err(pd, "packet size is too big\n");
+ dev_err(ddev, "packet size is too big\n");
return -EROFS;
}
pd->settings.fp = ti.fp;
@@ -1843,7 +1881,7 @@ static noinline_for_stack int pkt_probe_settings(struct pktcdvd_device *pd)
pd->settings.block_mode = PACKET_BLOCK_MODE2;
break;
default:
- pkt_err(pd, "unknown data mode\n");
+ dev_err(ddev, "unknown data mode\n");
return -EROFS;
}
return 0;
@@ -1854,6 +1892,7 @@ static noinline_for_stack int pkt_probe_settings(struct pktcdvd_device *pd)
*/
static noinline_for_stack int pkt_write_caching(struct pktcdvd_device *pd)
{
+ struct device *ddev = disk_to_dev(pd->disk);
struct packet_command cgc;
struct scsi_sense_hdr sshdr;
unsigned char buf[64];
@@ -1880,13 +1919,13 @@ static noinline_for_stack int pkt_write_caching(struct pktcdvd_device *pd)
*/
buf[pd->mode_offset + 10] |= (set << 2);
- cgc.buflen = cgc.cmd[8] = 2 + ((buf[0] << 8) | (buf[1] & 0xff));
+ cgc.buflen = cgc.cmd[8] = 2 + get_unaligned_be16(&buf[0]);
ret = pkt_mode_select(pd, &cgc);
if (ret) {
- pkt_err(pd, "write caching control failed\n");
+ dev_err(ddev, "write caching control failed\n");
pkt_dump_sense(pd, &cgc);
} else if (!ret && set)
- pkt_notice(pd, "enabled write caching\n");
+ dev_notice(ddev, "enabled write caching\n");
return ret;
}
@@ -1935,12 +1974,12 @@ static noinline_for_stack int pkt_get_max_speed(struct pktcdvd_device *pd,
* Speed Performance Descriptor Block", use the information
* in the first block. (contains the highest speed)
*/
- int num_spdb = (cap_buf[30] << 8) + cap_buf[31];
+ int num_spdb = get_unaligned_be16(&cap_buf[30]);
if (num_spdb > 0)
offset = 34;
}
- *write_speed = (cap_buf[offset] << 8) | cap_buf[offset + 1];
+ *write_speed = get_unaligned_be16(&cap_buf[offset]);
return 0;
}
@@ -1967,6 +2006,7 @@ static char us_clv_to_speed[16] = {
static noinline_for_stack int pkt_media_speed(struct pktcdvd_device *pd,
unsigned *speed)
{
+ struct device *ddev = disk_to_dev(pd->disk);
struct packet_command cgc;
struct scsi_sense_hdr sshdr;
unsigned char buf[64];
@@ -1984,7 +2024,7 @@ static noinline_for_stack int pkt_media_speed(struct pktcdvd_device *pd,
pkt_dump_sense(pd, &cgc);
return ret;
}
- size = ((unsigned int) buf[0]<<8) + buf[1] + 2;
+ size = 2 + get_unaligned_be16(&buf[0]);
if (size > sizeof(buf))
size = sizeof(buf);
@@ -2001,11 +2041,11 @@ static noinline_for_stack int pkt_media_speed(struct pktcdvd_device *pd,
}
if (!(buf[6] & 0x40)) {
- pkt_notice(pd, "disc type is not CD-RW\n");
+ dev_notice(ddev, "disc type is not CD-RW\n");
return 1;
}
if (!(buf[6] & 0x4)) {
- pkt_notice(pd, "A1 values on media are not valid, maybe not CDRW?\n");
+ dev_notice(ddev, "A1 values on media are not valid, maybe not CDRW?\n");
return 1;
}
@@ -2025,25 +2065,26 @@ static noinline_for_stack int pkt_media_speed(struct pktcdvd_device *pd,
*speed = us_clv_to_speed[sp];
break;
default:
- pkt_notice(pd, "unknown disc sub-type %d\n", st);
+ dev_notice(ddev, "unknown disc sub-type %d\n", st);
return 1;
}
if (*speed) {
- pkt_info(pd, "maximum media speed: %d\n", *speed);
+ dev_info(ddev, "maximum media speed: %d\n", *speed);
return 0;
} else {
- pkt_notice(pd, "unknown speed %d for sub-type %d\n", sp, st);
+ dev_notice(ddev, "unknown speed %d for sub-type %d\n", sp, st);
return 1;
}
}
static noinline_for_stack int pkt_perform_opc(struct pktcdvd_device *pd)
{
+ struct device *ddev = disk_to_dev(pd->disk);
struct packet_command cgc;
struct scsi_sense_hdr sshdr;
int ret;
- pkt_dbg(2, pd, "Performing OPC\n");
+ dev_dbg(ddev, "Performing OPC\n");
init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE);
cgc.sshdr = &sshdr;
@@ -2058,18 +2099,19 @@ static noinline_for_stack int pkt_perform_opc(struct pktcdvd_device *pd)
static int pkt_open_write(struct pktcdvd_device *pd)
{
+ struct device *ddev = disk_to_dev(pd->disk);
int ret;
unsigned int write_speed, media_write_speed, read_speed;
ret = pkt_probe_settings(pd);
if (ret) {
- pkt_dbg(2, pd, "failed probe\n");
+ dev_dbg(ddev, "failed probe\n");
return ret;
}
ret = pkt_set_write_settings(pd);
if (ret) {
- pkt_dbg(1, pd, "failed saving write settings\n");
+ dev_notice(ddev, "failed saving write settings\n");
return -EIO;
}
@@ -2082,30 +2124,29 @@ static int pkt_open_write(struct pktcdvd_device *pd)
case 0x13: /* DVD-RW */
case 0x1a: /* DVD+RW */
case 0x12: /* DVD-RAM */
- pkt_dbg(1, pd, "write speed %ukB/s\n", write_speed);
+ dev_notice(ddev, "write speed %ukB/s\n", write_speed);
break;
default:
ret = pkt_media_speed(pd, &media_write_speed);
if (ret)
media_write_speed = 16;
write_speed = min(write_speed, media_write_speed * 177);
- pkt_dbg(1, pd, "write speed %ux\n", write_speed / 176);
+ dev_notice(ddev, "write speed %ux\n", write_speed / 176);
break;
}
read_speed = write_speed;
ret = pkt_set_speed(pd, write_speed, read_speed);
if (ret) {
- pkt_dbg(1, pd, "couldn't set write speed\n");
+ dev_notice(ddev, "couldn't set write speed\n");
return -EIO;
}
pd->write_speed = write_speed;
pd->read_speed = read_speed;
ret = pkt_perform_opc(pd);
- if (ret) {
- pkt_dbg(1, pd, "Optimum Power Calibration failed\n");
- }
+ if (ret)
+ dev_notice(ddev, "Optimum Power Calibration failed\n");
return 0;
}
@@ -2113,8 +2154,9 @@ static int pkt_open_write(struct pktcdvd_device *pd)
/*
* called at open time.
*/
-static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write)
+static int pkt_open_dev(struct pktcdvd_device *pd, bool write)
{
+ struct device *ddev = disk_to_dev(pd->disk);
int ret;
long lba;
struct request_queue *q;
@@ -2125,7 +2167,7 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write)
* to read/write from/to it. It is already opened in O_NONBLOCK mode
* so open should not fail.
*/
- bdev = blkdev_get_by_dev(pd->bdev->bd_dev, FMODE_READ | FMODE_EXCL, pd);
+ bdev = blkdev_get_by_dev(pd->bdev->bd_dev, BLK_OPEN_READ, pd, NULL);
if (IS_ERR(bdev)) {
ret = PTR_ERR(bdev);
goto out;
@@ -2133,7 +2175,7 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write)
ret = pkt_get_last_written(pd, &lba);
if (ret) {
- pkt_err(pd, "pkt_get_last_written failed\n");
+ dev_err(ddev, "pkt_get_last_written failed\n");
goto out_putdev;
}
@@ -2162,17 +2204,17 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write)
if (write) {
if (!pkt_grow_pktlist(pd, CONFIG_CDROM_PKTCDVD_BUFFERS)) {
- pkt_err(pd, "not enough memory for buffers\n");
+ dev_err(ddev, "not enough memory for buffers\n");
ret = -ENOMEM;
goto out_putdev;
}
- pkt_info(pd, "%lukB available on disc\n", lba << 1);
+ dev_info(ddev, "%lukB available on disc\n", lba << 1);
}
return 0;
out_putdev:
- blkdev_put(bdev, FMODE_READ | FMODE_EXCL);
+ blkdev_put(bdev, pd);
out:
return ret;
}
@@ -2183,13 +2225,15 @@ out:
*/
static void pkt_release_dev(struct pktcdvd_device *pd, int flush)
{
+ struct device *ddev = disk_to_dev(pd->disk);
+
if (flush && pkt_flush_cache(pd))
- pkt_dbg(1, pd, "not flushing cache\n");
+ dev_notice(ddev, "not flushing cache\n");
pkt_lock_door(pd, 0);
pkt_set_speed(pd, MAX_SPEED, MAX_SPEED);
- blkdev_put(pd->bdev, FMODE_READ | FMODE_EXCL);
+ blkdev_put(pd->bdev, pd);
pkt_shrink_pktlist(pd);
}
@@ -2203,14 +2247,14 @@ static struct pktcdvd_device *pkt_find_dev_from_minor(unsigned int dev_minor)
return pkt_devs[dev_minor];
}
-static int pkt_open(struct block_device *bdev, fmode_t mode)
+static int pkt_open(struct gendisk *disk, blk_mode_t mode)
{
struct pktcdvd_device *pd = NULL;
int ret;
mutex_lock(&pktcdvd_mutex);
mutex_lock(&ctl_mutex);
- pd = pkt_find_dev_from_minor(MINOR(bdev->bd_dev));
+ pd = pkt_find_dev_from_minor(disk->first_minor);
if (!pd) {
ret = -ENODEV;
goto out;
@@ -2219,22 +2263,21 @@ static int pkt_open(struct block_device *bdev, fmode_t mode)
pd->refcnt++;
if (pd->refcnt > 1) {
- if ((mode & FMODE_WRITE) &&
+ if ((mode & BLK_OPEN_WRITE) &&
!test_bit(PACKET_WRITABLE, &pd->flags)) {
ret = -EBUSY;
goto out_dec;
}
} else {
- ret = pkt_open_dev(pd, mode & FMODE_WRITE);
+ ret = pkt_open_dev(pd, mode & BLK_OPEN_WRITE);
if (ret)
goto out_dec;
/*
* needed here as well, since ext2 (among others) may change
* the blocksize at mount time
*/
- set_blocksize(bdev, CD_FRAMESIZE);
+ set_blocksize(disk->part0, CD_FRAMESIZE);
}
-
mutex_unlock(&ctl_mutex);
mutex_unlock(&pktcdvd_mutex);
return 0;
@@ -2247,7 +2290,7 @@ out:
return ret;
}
-static void pkt_close(struct gendisk *disk, fmode_t mode)
+static void pkt_release(struct gendisk *disk)
{
struct pktcdvd_device *pd = disk->private_data;
@@ -2385,15 +2428,15 @@ static void pkt_make_request_write(struct request_queue *q, struct bio *bio)
static void pkt_submit_bio(struct bio *bio)
{
struct pktcdvd_device *pd = bio->bi_bdev->bd_disk->queue->queuedata;
+ struct device *ddev = disk_to_dev(pd->disk);
struct bio *split;
bio = bio_split_to_limits(bio);
if (!bio)
return;
- pkt_dbg(2, pd, "start = %6llx stop = %6llx\n",
- (unsigned long long)bio->bi_iter.bi_sector,
- (unsigned long long)bio_end_sector(bio));
+ dev_dbg(ddev, "start = %6llx stop = %6llx\n",
+ bio->bi_iter.bi_sector, bio_end_sector(bio));
/*
* Clone READ bios so we can have our own bi_end_io callback.
@@ -2404,13 +2447,12 @@ static void pkt_submit_bio(struct bio *bio)
}
if (!test_bit(PACKET_WRITABLE, &pd->flags)) {
- pkt_notice(pd, "WRITE for ro device (%llu)\n",
- (unsigned long long)bio->bi_iter.bi_sector);
+ dev_notice(ddev, "WRITE for ro device (%llu)\n", bio->bi_iter.bi_sector);
goto end_io;
}
if (!bio->bi_iter.bi_size || (bio->bi_iter.bi_size % CD_FRAMESIZE)) {
- pkt_err(pd, "wrong bio size\n");
+ dev_err(ddev, "wrong bio size\n");
goto end_io;
}
@@ -2446,74 +2488,15 @@ static void pkt_init_queue(struct pktcdvd_device *pd)
q->queuedata = pd;
}
-static int pkt_seq_show(struct seq_file *m, void *p)
-{
- struct pktcdvd_device *pd = m->private;
- char *msg;
- int states[PACKET_NUM_STATES];
-
- seq_printf(m, "Writer %s mapped to %pg:\n", pd->name, pd->bdev);
-
- seq_printf(m, "\nSettings:\n");
- seq_printf(m, "\tpacket size:\t\t%dkB\n", pd->settings.size / 2);
-
- if (pd->settings.write_type == 0)
- msg = "Packet";
- else
- msg = "Unknown";
- seq_printf(m, "\twrite type:\t\t%s\n", msg);
-
- seq_printf(m, "\tpacket type:\t\t%s\n", pd->settings.fp ? "Fixed" : "Variable");
- seq_printf(m, "\tlink loss:\t\t%d\n", pd->settings.link_loss);
-
- seq_printf(m, "\ttrack mode:\t\t%d\n", pd->settings.track_mode);
-
- if (pd->settings.block_mode == PACKET_BLOCK_MODE1)
- msg = "Mode 1";
- else if (pd->settings.block_mode == PACKET_BLOCK_MODE2)
- msg = "Mode 2";
- else
- msg = "Unknown";
- seq_printf(m, "\tblock mode:\t\t%s\n", msg);
-
- seq_printf(m, "\nStatistics:\n");
- seq_printf(m, "\tpackets started:\t%lu\n", pd->stats.pkt_started);
- seq_printf(m, "\tpackets ended:\t\t%lu\n", pd->stats.pkt_ended);
- seq_printf(m, "\twritten:\t\t%lukB\n", pd->stats.secs_w >> 1);
- seq_printf(m, "\tread gather:\t\t%lukB\n", pd->stats.secs_rg >> 1);
- seq_printf(m, "\tread:\t\t\t%lukB\n", pd->stats.secs_r >> 1);
-
- seq_printf(m, "\nMisc:\n");
- seq_printf(m, "\treference count:\t%d\n", pd->refcnt);
- seq_printf(m, "\tflags:\t\t\t0x%lx\n", pd->flags);
- seq_printf(m, "\tread speed:\t\t%ukB/s\n", pd->read_speed);
- seq_printf(m, "\twrite speed:\t\t%ukB/s\n", pd->write_speed);
- seq_printf(m, "\tstart offset:\t\t%lu\n", pd->offset);
- seq_printf(m, "\tmode page offset:\t%u\n", pd->mode_offset);
-
- seq_printf(m, "\nQueue state:\n");
- seq_printf(m, "\tbios queued:\t\t%d\n", pd->bio_queue_size);
- seq_printf(m, "\tbios pending:\t\t%d\n", atomic_read(&pd->cdrw.pending_bios));
- seq_printf(m, "\tcurrent sector:\t\t0x%llx\n", (unsigned long long)pd->current_sector);
-
- pkt_count_states(pd, states);
- seq_printf(m, "\tstate:\t\t\ti:%d ow:%d rw:%d ww:%d rec:%d fin:%d\n",
- states[0], states[1], states[2], states[3], states[4], states[5]);
-
- seq_printf(m, "\twrite congestion marks:\toff=%d on=%d\n",
- pd->write_congestion_off,
- pd->write_congestion_on);
- return 0;
-}
-
static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
{
+ struct device *ddev = disk_to_dev(pd->disk);
int i;
struct block_device *bdev;
struct scsi_device *sdev;
if (pd->pkt_dev == dev) {
- pkt_err(pd, "recursive setup not allowed\n");
+ dev_err(ddev, "recursive setup not allowed\n");
return -EBUSY;
}
for (i = 0; i < MAX_WRITERS; i++) {
@@ -2521,21 +2504,22 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
if (!pd2)
continue;
if (pd2->bdev->bd_dev == dev) {
- pkt_err(pd, "%pg already setup\n", pd2->bdev);
+ dev_err(ddev, "%pg already setup\n", pd2->bdev);
return -EBUSY;
}
if (pd2->pkt_dev == dev) {
- pkt_err(pd, "can't chain pktcdvd devices\n");
+ dev_err(ddev, "can't chain pktcdvd devices\n");
return -EBUSY;
}
}
- bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_NDELAY, NULL);
+ bdev = blkdev_get_by_dev(dev, BLK_OPEN_READ | BLK_OPEN_NDELAY, NULL,
+ NULL);
if (IS_ERR(bdev))
return PTR_ERR(bdev);
sdev = scsi_device_from_queue(bdev->bd_disk->queue);
if (!sdev) {
- blkdev_put(bdev, FMODE_READ | FMODE_NDELAY);
+ blkdev_put(bdev, NULL);
return -EINVAL;
}
put_device(&sdev->sdev_gendev);
@@ -2549,30 +2533,31 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
pkt_init_queue(pd);
atomic_set(&pd->cdrw.pending_bios, 0);
- pd->cdrw.thread = kthread_run(kcdrwd, pd, "%s", pd->name);
+ pd->cdrw.thread = kthread_run(kcdrwd, pd, "%s", pd->disk->disk_name);
if (IS_ERR(pd->cdrw.thread)) {
- pkt_err(pd, "can't start kernel thread\n");
+ dev_err(ddev, "can't start kernel thread\n");
goto out_mem;
}
- proc_create_single_data(pd->name, 0, pkt_proc, pkt_seq_show, pd);
- pkt_dbg(1, pd, "writer mapped to %pg\n", bdev);
+ proc_create_single_data(pd->disk->disk_name, 0, pkt_proc, pkt_seq_show, pd);
+ dev_notice(ddev, "writer mapped to %pg\n", bdev);
return 0;
out_mem:
- blkdev_put(bdev, FMODE_READ | FMODE_NDELAY);
+ blkdev_put(bdev, NULL);
/* This is safe: open() is still holding a reference. */
module_put(THIS_MODULE);
return -ENOMEM;
}
-static int pkt_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg)
+static int pkt_ioctl(struct block_device *bdev, blk_mode_t mode,
+ unsigned int cmd, unsigned long arg)
{
struct pktcdvd_device *pd = bdev->bd_disk->private_data;
+ struct device *ddev = disk_to_dev(pd->disk);
int ret;
- pkt_dbg(2, pd, "cmd %x, dev %d:%d\n",
- cmd, MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev));
+ dev_dbg(ddev, "cmd %x, dev %d:%d\n", cmd, MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev));
mutex_lock(&pktcdvd_mutex);
switch (cmd) {
@@ -2598,7 +2583,7 @@ static int pkt_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
ret = bdev->bd_disk->fops->ioctl(bdev, mode, cmd, arg);
break;
default:
- pkt_dbg(2, pd, "Unknown ioctl (%x)\n", cmd);
+ dev_dbg(ddev, "Unknown ioctl (%x)\n", cmd);
ret = -ENOTTY;
}
mutex_unlock(&pktcdvd_mutex);
@@ -2631,7 +2616,7 @@ static const struct block_device_operations pktcdvd_ops = {
.owner = THIS_MODULE,
.submit_bio = pkt_submit_bio,
.open = pkt_open,
- .release = pkt_close,
+ .release = pkt_release,
.ioctl = pkt_ioctl,
.compat_ioctl = blkdev_compat_ptr_ioctl,
.check_events = pkt_check_events,
@@ -2676,7 +2661,6 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
spin_lock_init(&pd->iosched.lock);
bio_list_init(&pd->iosched.read_queue);
bio_list_init(&pd->iosched.write_queue);
- sprintf(pd->name, DRIVER_NAME"%d", idx);
init_waitqueue_head(&pd->wqueue);
pd->bio_queue = RB_ROOT;
@@ -2693,7 +2677,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
disk->minors = 1;
disk->fops = &pktcdvd_ops;
disk->flags = GENHD_FL_REMOVABLE | GENHD_FL_NO_PART;
- strcpy(disk->disk_name, pd->name);
+ snprintf(disk->disk_name, sizeof(disk->disk_name), DRIVER_NAME"%d", idx);
disk->private_data = pd;
pd->pkt_dev = MKDEV(pktdev_major, idx);
@@ -2735,6 +2719,7 @@ out_mutex:
static int pkt_remove_dev(dev_t pkt_dev)
{
struct pktcdvd_device *pd;
+ struct device *ddev;
int idx;
int ret = 0;
@@ -2755,6 +2740,9 @@ static int pkt_remove_dev(dev_t pkt_dev)
ret = -EBUSY;
goto out;
}
+
+ ddev = disk_to_dev(pd->disk);
+
if (!IS_ERR(pd->cdrw.thread))
kthread_stop(pd->cdrw.thread);
@@ -2763,10 +2751,10 @@ static int pkt_remove_dev(dev_t pkt_dev)
pkt_debugfs_dev_remove(pd);
pkt_sysfs_dev_remove(pd);
- blkdev_put(pd->bdev, FMODE_READ | FMODE_NDELAY);
+ blkdev_put(pd->bdev, NULL);
- remove_proc_entry(pd->name, pkt_proc);
- pkt_dbg(1, pd, "writer unmapped\n");
+ remove_proc_entry(pd->disk->disk_name, pkt_proc);
+ dev_notice(ddev, "writer unmapped\n");
del_gendisk(pd->disk);
put_disk(pd->disk);
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 632751ddb287..bd0e075a5d89 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -660,9 +660,9 @@ static bool pending_result_dec(struct pending_result *pending, int *result)
return true;
}
-static int rbd_open(struct block_device *bdev, fmode_t mode)
+static int rbd_open(struct gendisk *disk, blk_mode_t mode)
{
- struct rbd_device *rbd_dev = bdev->bd_disk->private_data;
+ struct rbd_device *rbd_dev = disk->private_data;
bool removing = false;
spin_lock_irq(&rbd_dev->lock);
@@ -679,7 +679,7 @@ static int rbd_open(struct block_device *bdev, fmode_t mode)
return 0;
}
-static void rbd_release(struct gendisk *disk, fmode_t mode)
+static void rbd_release(struct gendisk *disk)
{
struct rbd_device *rbd_dev = disk->private_data;
unsigned long open_count_before;
diff --git a/drivers/block/rnbd/Makefile b/drivers/block/rnbd/Makefile
index 40b31630822c..208e5f865497 100644
--- a/drivers/block/rnbd/Makefile
+++ b/drivers/block/rnbd/Makefile
@@ -3,13 +3,11 @@
ccflags-y := -I$(srctree)/drivers/infiniband/ulp/rtrs
rnbd-client-y := rnbd-clt.o \
- rnbd-clt-sysfs.o \
- rnbd-common.o
+ rnbd-clt-sysfs.o
CFLAGS_rnbd-srv-trace.o = -I$(src)
-rnbd-server-y := rnbd-common.o \
- rnbd-srv.o \
+rnbd-server-y := rnbd-srv.o \
rnbd-srv-sysfs.o \
rnbd-srv-trace.o
diff --git a/drivers/block/rnbd/rnbd-clt-sysfs.c b/drivers/block/rnbd/rnbd-clt-sysfs.c
index 8c6087949794..c36d8b1ceeed 100644
--- a/drivers/block/rnbd/rnbd-clt-sysfs.c
+++ b/drivers/block/rnbd/rnbd-clt-sysfs.c
@@ -24,7 +24,9 @@
#include "rnbd-clt.h"
static struct device *rnbd_dev;
-static struct class *rnbd_dev_class;
+static const struct class rnbd_dev_class = {
+ .name = "rnbd_client",
+};
static struct kobject *rnbd_devs_kobj;
enum {
@@ -278,7 +280,7 @@ static ssize_t access_mode_show(struct kobject *kobj,
dev = container_of(kobj, struct rnbd_clt_dev, kobj);
- return sysfs_emit(page, "%s\n", rnbd_access_mode_str(dev->access_mode));
+ return sysfs_emit(page, "%s\n", rnbd_access_modes[dev->access_mode].str);
}
static struct kobj_attribute rnbd_clt_access_mode =
@@ -596,7 +598,7 @@ static ssize_t rnbd_clt_map_device_store(struct kobject *kobj,
pr_info("Mapping device %s on session %s, (access_mode: %s, nr_poll_queues: %d)\n",
pathname, sessname,
- rnbd_access_mode_str(access_mode),
+ rnbd_access_modes[access_mode].str,
nr_poll_queues);
dev = rnbd_clt_map_device(sessname, paths, path_cnt, port_nr, pathname,
@@ -646,11 +648,11 @@ int rnbd_clt_create_sysfs_files(void)
{
int err;
- rnbd_dev_class = class_create("rnbd-client");
- if (IS_ERR(rnbd_dev_class))
- return PTR_ERR(rnbd_dev_class);
+ err = class_register(&rnbd_dev_class);
+ if (err)
+ return err;
- rnbd_dev = device_create_with_groups(rnbd_dev_class, NULL,
+ rnbd_dev = device_create_with_groups(&rnbd_dev_class, NULL,
MKDEV(0, 0), NULL,
default_attr_groups, "ctl");
if (IS_ERR(rnbd_dev)) {
@@ -666,9 +668,9 @@ int rnbd_clt_create_sysfs_files(void)
return 0;
dev_destroy:
- device_destroy(rnbd_dev_class, MKDEV(0, 0));
+ device_destroy(&rnbd_dev_class, MKDEV(0, 0));
cls_destroy:
- class_destroy(rnbd_dev_class);
+ class_unregister(&rnbd_dev_class);
return err;
}
@@ -678,6 +680,6 @@ void rnbd_clt_destroy_sysfs_files(void)
sysfs_remove_group(&rnbd_dev->kobj, &default_attr_group);
kobject_del(rnbd_devs_kobj);
kobject_put(rnbd_devs_kobj);
- device_destroy(rnbd_dev_class, MKDEV(0, 0));
- class_destroy(rnbd_dev_class);
+ device_destroy(&rnbd_dev_class, MKDEV(0, 0));
+ class_unregister(&rnbd_dev_class);
}
diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c
index 5eb8c7855970..b0550b68645d 100644
--- a/drivers/block/rnbd/rnbd-clt.c
+++ b/drivers/block/rnbd/rnbd-clt.c
@@ -921,11 +921,11 @@ rnbd_clt_session *find_or_create_sess(const char *sessname, bool *first)
return sess;
}
-static int rnbd_client_open(struct block_device *block_device, fmode_t mode)
+static int rnbd_client_open(struct gendisk *disk, blk_mode_t mode)
{
- struct rnbd_clt_dev *dev = block_device->bd_disk->private_data;
+ struct rnbd_clt_dev *dev = disk->private_data;
- if (get_disk_ro(dev->gd) && (mode & FMODE_WRITE))
+ if (get_disk_ro(dev->gd) && (mode & BLK_OPEN_WRITE))
return -EPERM;
if (dev->dev_state == DEV_STATE_UNMAPPED ||
@@ -935,7 +935,7 @@ static int rnbd_client_open(struct block_device *block_device, fmode_t mode)
return 0;
}
-static void rnbd_client_release(struct gendisk *gen, fmode_t mode)
+static void rnbd_client_release(struct gendisk *gen)
{
struct rnbd_clt_dev *dev = gen->private_data;
diff --git a/drivers/block/rnbd/rnbd-common.c b/drivers/block/rnbd/rnbd-common.c
deleted file mode 100644
index 596c3f732403..000000000000
--- a/drivers/block/rnbd/rnbd-common.c
+++ /dev/null
@@ -1,23 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * RDMA Network Block Driver
- *
- * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved.
- * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved.
- * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved.
- */
-#include "rnbd-proto.h"
-
-const char *rnbd_access_mode_str(enum rnbd_access_mode mode)
-{
- switch (mode) {
- case RNBD_ACCESS_RO:
- return "ro";
- case RNBD_ACCESS_RW:
- return "rw";
- case RNBD_ACCESS_MIGRATION:
- return "migration";
- default:
- return "unknown";
- }
-}
diff --git a/drivers/block/rnbd/rnbd-proto.h b/drivers/block/rnbd/rnbd-proto.h
index da1d0542d7e2..e32f8f2c868a 100644
--- a/drivers/block/rnbd/rnbd-proto.h
+++ b/drivers/block/rnbd/rnbd-proto.h
@@ -61,6 +61,15 @@ enum rnbd_access_mode {
RNBD_ACCESS_MIGRATION,
};
+static const __maybe_unused struct {
+ enum rnbd_access_mode mode;
+ const char *str;
+} rnbd_access_modes[] = {
+ [RNBD_ACCESS_RO] = {RNBD_ACCESS_RO, "ro"},
+ [RNBD_ACCESS_RW] = {RNBD_ACCESS_RW, "rw"},
+ [RNBD_ACCESS_MIGRATION] = {RNBD_ACCESS_MIGRATION, "migration"},
+};
+
/**
* struct rnbd_msg_sess_info - initial session info from client to server
* @hdr: message header
@@ -185,7 +194,6 @@ struct rnbd_msg_io {
enum rnbd_io_flags {
/* Operations */
-
RNBD_OP_READ = 0,
RNBD_OP_WRITE = 1,
RNBD_OP_FLUSH = 2,
@@ -193,15 +201,9 @@ enum rnbd_io_flags {
RNBD_OP_SECURE_ERASE = 4,
RNBD_OP_WRITE_SAME = 5,
- RNBD_OP_LAST,
-
/* Flags */
-
RNBD_F_SYNC = 1<<(RNBD_OP_BITS + 0),
RNBD_F_FUA = 1<<(RNBD_OP_BITS + 1),
-
- RNBD_F_ALL = (RNBD_F_SYNC | RNBD_F_FUA)
-
};
static inline u32 rnbd_op(u32 flags)
@@ -214,21 +216,6 @@ static inline u32 rnbd_flags(u32 flags)
return flags & ~RNBD_OP_MASK;
}
-static inline bool rnbd_flags_supported(u32 flags)
-{
- u32 op;
-
- op = rnbd_op(flags);
- flags = rnbd_flags(flags);
-
- if (op >= RNBD_OP_LAST)
- return false;
- if (flags & ~RNBD_F_ALL)
- return false;
-
- return true;
-}
-
static inline blk_opf_t rnbd_to_bio_flags(u32 rnbd_opf)
{
blk_opf_t bio_opf;
diff --git a/drivers/block/rnbd/rnbd-srv-sysfs.c b/drivers/block/rnbd/rnbd-srv-sysfs.c
index d5d9267e1fa5..cba6ba43c2c2 100644
--- a/drivers/block/rnbd/rnbd-srv-sysfs.c
+++ b/drivers/block/rnbd/rnbd-srv-sysfs.c
@@ -9,7 +9,6 @@
#undef pr_fmt
#define pr_fmt(fmt) KBUILD_MODNAME " L" __stringify(__LINE__) ": " fmt
-#include <uapi/linux/limits.h>
#include <linux/kobject.h>
#include <linux/sysfs.h>
#include <linux/stat.h>
@@ -20,7 +19,9 @@
#include "rnbd-srv.h"
static struct device *rnbd_dev;
-static struct class *rnbd_dev_class;
+static const struct class rnbd_dev_class = {
+ .name = "rnbd-server",
+};
static struct kobject *rnbd_devs_kobj;
static void rnbd_srv_dev_release(struct kobject *kobj)
@@ -88,8 +89,7 @@ static ssize_t read_only_show(struct kobject *kobj, struct kobj_attribute *attr,
sess_dev = container_of(kobj, struct rnbd_srv_sess_dev, kobj);
- return sysfs_emit(page, "%d\n",
- !(sess_dev->open_flags & FMODE_WRITE));
+ return sysfs_emit(page, "%d\n", sess_dev->readonly);
}
static struct kobj_attribute rnbd_srv_dev_session_ro_attr =
@@ -104,7 +104,7 @@ static ssize_t access_mode_show(struct kobject *kobj,
sess_dev = container_of(kobj, struct rnbd_srv_sess_dev, kobj);
return sysfs_emit(page, "%s\n",
- rnbd_access_mode_str(sess_dev->access_mode));
+ rnbd_access_modes[sess_dev->access_mode].str);
}
static struct kobj_attribute rnbd_srv_dev_session_access_mode_attr =
@@ -215,12 +215,12 @@ int rnbd_srv_create_sysfs_files(void)
{
int err;
- rnbd_dev_class = class_create("rnbd-server");
- if (IS_ERR(rnbd_dev_class))
- return PTR_ERR(rnbd_dev_class);
+ err = class_register(&rnbd_dev_class);
+ if (err)
+ return err;
- rnbd_dev = device_create(rnbd_dev_class, NULL,
- MKDEV(0, 0), NULL, "ctl");
+ rnbd_dev = device_create(&rnbd_dev_class, NULL,
+ MKDEV(0, 0), NULL, "ctl");
if (IS_ERR(rnbd_dev)) {
err = PTR_ERR(rnbd_dev);
goto cls_destroy;
@@ -234,9 +234,9 @@ int rnbd_srv_create_sysfs_files(void)
return 0;
dev_destroy:
- device_destroy(rnbd_dev_class, MKDEV(0, 0));
+ device_destroy(&rnbd_dev_class, MKDEV(0, 0));
cls_destroy:
- class_destroy(rnbd_dev_class);
+ class_unregister(&rnbd_dev_class);
return err;
}
@@ -245,6 +245,6 @@ void rnbd_srv_destroy_sysfs_files(void)
{
kobject_del(rnbd_devs_kobj);
kobject_put(rnbd_devs_kobj);
- device_destroy(rnbd_dev_class, MKDEV(0, 0));
- class_destroy(rnbd_dev_class);
+ device_destroy(&rnbd_dev_class, MKDEV(0, 0));
+ class_unregister(&rnbd_dev_class);
}
diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c
index 2cfed2e58d64..c186df0ec641 100644
--- a/drivers/block/rnbd/rnbd-srv.c
+++ b/drivers/block/rnbd/rnbd-srv.c
@@ -96,7 +96,7 @@ rnbd_get_sess_dev(int dev_id, struct rnbd_srv_session *srv_sess)
ret = kref_get_unless_zero(&sess_dev->kref);
rcu_read_unlock();
- if (!sess_dev || !ret)
+ if (!ret)
return ERR_PTR(-ENXIO);
return sess_dev;
@@ -180,7 +180,7 @@ static void destroy_device(struct kref *kref)
WARN_ONCE(!list_empty(&dev->sess_dev_list),
"Device %s is being destroyed but still in use!\n",
- dev->id);
+ dev->name);
spin_lock(&dev_lock);
list_del(&dev->list);
@@ -219,10 +219,10 @@ void rnbd_destroy_sess_dev(struct rnbd_srv_sess_dev *sess_dev, bool keep_id)
rnbd_put_sess_dev(sess_dev);
wait_for_completion(&dc); /* wait for inflights to drop to zero */
- blkdev_put(sess_dev->bdev, sess_dev->open_flags);
+ blkdev_put(sess_dev->bdev, NULL);
mutex_lock(&sess_dev->dev->lock);
list_del(&sess_dev->dev_list);
- if (sess_dev->open_flags & FMODE_WRITE)
+ if (!sess_dev->readonly)
sess_dev->dev->open_write_cnt--;
mutex_unlock(&sess_dev->dev->lock);
@@ -356,7 +356,7 @@ static int process_msg_open(struct rnbd_srv_session *srv_sess,
const void *msg, size_t len,
void *data, size_t datalen);
-static int process_msg_sess_info(struct rnbd_srv_session *srv_sess,
+static void process_msg_sess_info(struct rnbd_srv_session *srv_sess,
const void *msg, size_t len,
void *data, size_t datalen);
@@ -384,8 +384,7 @@ static int rnbd_srv_rdma_ev(void *priv, struct rtrs_srv_op *id,
ret = process_msg_open(srv_sess, usr, usrlen, data, datalen);
break;
case RNBD_MSG_SESS_INFO:
- ret = process_msg_sess_info(srv_sess, usr, usrlen, data,
- datalen);
+ process_msg_sess_info(srv_sess, usr, usrlen, data, datalen);
break;
default:
pr_warn("Received unexpected message type %d from session %s\n",
@@ -431,7 +430,7 @@ static struct rnbd_srv_dev *rnbd_srv_init_srv_dev(struct block_device *bdev)
if (!dev)
return ERR_PTR(-ENOMEM);
- snprintf(dev->id, sizeof(dev->id), "%pg", bdev);
+ snprintf(dev->name, sizeof(dev->name), "%pg", bdev);
kref_init(&dev->kref);
INIT_LIST_HEAD(&dev->sess_dev_list);
mutex_init(&dev->lock);
@@ -446,7 +445,7 @@ rnbd_srv_find_or_add_srv_dev(struct rnbd_srv_dev *new_dev)
spin_lock(&dev_lock);
list_for_each_entry(dev, &dev_list, list) {
- if (!strncmp(dev->id, new_dev->id, sizeof(dev->id))) {
+ if (!strncmp(dev->name, new_dev->name, sizeof(dev->name))) {
if (!kref_get_unless_zero(&dev->kref))
/*
* We lost the race, device is almost dead.
@@ -467,39 +466,38 @@ static int rnbd_srv_check_update_open_perm(struct rnbd_srv_dev *srv_dev,
struct rnbd_srv_session *srv_sess,
enum rnbd_access_mode access_mode)
{
- int ret = -EPERM;
+ int ret = 0;
mutex_lock(&srv_dev->lock);
switch (access_mode) {
case RNBD_ACCESS_RO:
- ret = 0;
break;
case RNBD_ACCESS_RW:
if (srv_dev->open_write_cnt == 0) {
srv_dev->open_write_cnt++;
- ret = 0;
} else {
pr_err("Mapping device '%s' for session %s with RW permissions failed. Device already opened as 'RW' by %d client(s), access mode %s.\n",
- srv_dev->id, srv_sess->sessname,
+ srv_dev->name, srv_sess->sessname,
srv_dev->open_write_cnt,
- rnbd_access_mode_str(access_mode));
+ rnbd_access_modes[access_mode].str);
+ ret = -EPERM;
}
break;
case RNBD_ACCESS_MIGRATION:
if (srv_dev->open_write_cnt < 2) {
srv_dev->open_write_cnt++;
- ret = 0;
} else {
pr_err("Mapping device '%s' for session %s with migration permissions failed. Device already opened as 'RW' by %d client(s), access mode %s.\n",
- srv_dev->id, srv_sess->sessname,
+ srv_dev->name, srv_sess->sessname,
srv_dev->open_write_cnt,
- rnbd_access_mode_str(access_mode));
+ rnbd_access_modes[access_mode].str);
+ ret = -EPERM;
}
break;
default:
pr_err("Received mapping request for device '%s' on session %s with invalid access mode: %d\n",
- srv_dev->id, srv_sess->sessname, access_mode);
+ srv_dev->name, srv_sess->sessname, access_mode);
ret = -EINVAL;
}
@@ -561,7 +559,7 @@ static void rnbd_srv_fill_msg_open_rsp(struct rnbd_msg_open_rsp *rsp,
static struct rnbd_srv_sess_dev *
rnbd_srv_create_set_sess_dev(struct rnbd_srv_session *srv_sess,
const struct rnbd_msg_open *open_msg,
- struct block_device *bdev, fmode_t open_flags,
+ struct block_device *bdev, bool readonly,
struct rnbd_srv_dev *srv_dev)
{
struct rnbd_srv_sess_dev *sdev = rnbd_sess_dev_alloc(srv_sess);
@@ -576,7 +574,7 @@ rnbd_srv_create_set_sess_dev(struct rnbd_srv_session *srv_sess,
sdev->bdev = bdev;
sdev->sess = srv_sess;
sdev->dev = srv_dev;
- sdev->open_flags = open_flags;
+ sdev->readonly = readonly;
sdev->access_mode = open_msg->access_mode;
return sdev;
@@ -631,7 +629,7 @@ static char *rnbd_srv_get_full_path(struct rnbd_srv_session *srv_sess,
return full_path;
}
-static int process_msg_sess_info(struct rnbd_srv_session *srv_sess,
+static void process_msg_sess_info(struct rnbd_srv_session *srv_sess,
const void *msg, size_t len,
void *data, size_t datalen)
{
@@ -644,8 +642,6 @@ static int process_msg_sess_info(struct rnbd_srv_session *srv_sess,
rsp->hdr.type = cpu_to_le16(RNBD_MSG_SESS_INFO_RSP);
rsp->ver = srv_sess->ver;
-
- return 0;
}
/**
@@ -681,15 +677,14 @@ static int process_msg_open(struct rnbd_srv_session *srv_sess,
struct rnbd_srv_sess_dev *srv_sess_dev;
const struct rnbd_msg_open *open_msg = msg;
struct block_device *bdev;
- fmode_t open_flags;
+ blk_mode_t open_flags = BLK_OPEN_READ;
char *full_path;
struct rnbd_msg_open_rsp *rsp = data;
trace_process_msg_open(srv_sess, open_msg);
- open_flags = FMODE_READ;
if (open_msg->access_mode != RNBD_ACCESS_RO)
- open_flags |= FMODE_WRITE;
+ open_flags |= BLK_OPEN_WRITE;
mutex_lock(&srv_sess->lock);
@@ -719,7 +714,7 @@ static int process_msg_open(struct rnbd_srv_session *srv_sess,
goto reject;
}
- bdev = blkdev_get_by_path(full_path, open_flags, THIS_MODULE);
+ bdev = blkdev_get_by_path(full_path, open_flags, NULL, NULL);
if (IS_ERR(bdev)) {
ret = PTR_ERR(bdev);
pr_err("Opening device '%s' on session %s failed, failed to open the block device, err: %d\n",
@@ -736,9 +731,9 @@ static int process_msg_open(struct rnbd_srv_session *srv_sess,
goto blkdev_put;
}
- srv_sess_dev = rnbd_srv_create_set_sess_dev(srv_sess, open_msg,
- bdev, open_flags,
- srv_dev);
+ srv_sess_dev = rnbd_srv_create_set_sess_dev(srv_sess, open_msg, bdev,
+ open_msg->access_mode == RNBD_ACCESS_RO,
+ srv_dev);
if (IS_ERR(srv_sess_dev)) {
pr_err("Opening device '%s' on session %s failed, creating sess_dev failed, err: %ld\n",
full_path, srv_sess->sessname, PTR_ERR(srv_sess_dev));
@@ -774,7 +769,7 @@ static int process_msg_open(struct rnbd_srv_session *srv_sess,
list_add(&srv_sess_dev->dev_list, &srv_dev->sess_dev_list);
mutex_unlock(&srv_dev->lock);
- rnbd_srv_info(srv_sess_dev, "Opened device '%s'\n", srv_dev->id);
+ rnbd_srv_info(srv_sess_dev, "Opened device '%s'\n", srv_dev->name);
kfree(full_path);
@@ -795,7 +790,7 @@ srv_dev_put:
}
rnbd_put_srv_dev(srv_dev);
blkdev_put:
- blkdev_put(bdev, open_flags);
+ blkdev_put(bdev, NULL);
free_path:
kfree(full_path);
reject:
@@ -808,7 +803,7 @@ static struct rtrs_srv_ctx *rtrs_ctx;
static struct rtrs_srv_ops rtrs_ops;
static int __init rnbd_srv_init_module(void)
{
- int err;
+ int err = 0;
BUILD_BUG_ON(sizeof(struct rnbd_msg_hdr) != 4);
BUILD_BUG_ON(sizeof(struct rnbd_msg_sess_info) != 36);
@@ -822,19 +817,17 @@ static int __init rnbd_srv_init_module(void)
};
rtrs_ctx = rtrs_srv_open(&rtrs_ops, port_nr);
if (IS_ERR(rtrs_ctx)) {
- err = PTR_ERR(rtrs_ctx);
pr_err("rtrs_srv_open(), err: %d\n", err);
- return err;
+ return PTR_ERR(rtrs_ctx);
}
err = rnbd_srv_create_sysfs_files();
if (err) {
pr_err("rnbd_srv_create_sysfs_files(), err: %d\n", err);
rtrs_srv_close(rtrs_ctx);
- return err;
}
- return 0;
+ return err;
}
static void __exit rnbd_srv_cleanup_module(void)
diff --git a/drivers/block/rnbd/rnbd-srv.h b/drivers/block/rnbd/rnbd-srv.h
index f5962fd31d62..1027656dedb0 100644
--- a/drivers/block/rnbd/rnbd-srv.h
+++ b/drivers/block/rnbd/rnbd-srv.h
@@ -35,7 +35,7 @@ struct rnbd_srv_dev {
struct kobject dev_kobj;
struct kobject *dev_sessions_kobj;
struct kref kref;
- char id[NAME_MAX];
+ char name[NAME_MAX];
/* List of rnbd_srv_sess_dev structs */
struct list_head sess_dev_list;
struct mutex lock;
@@ -52,7 +52,7 @@ struct rnbd_srv_sess_dev {
struct kobject kobj;
u32 device_id;
bool keep_id;
- fmode_t open_flags;
+ bool readonly;
struct kref kref;
struct completion *destroy_comp;
char pathname[NAME_MAX];
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 9fa821fa76b0..7bf4b48e2282 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -139,7 +139,7 @@ static int vdc_getgeo(struct block_device *bdev, struct hd_geometry *geo)
* when vdisk_mtype is VD_MEDIA_TYPE_CD or VD_MEDIA_TYPE_DVD.
* Needed to be able to install inside an ldom from an iso image.
*/
-static int vdc_ioctl(struct block_device *bdev, fmode_t mode,
+static int vdc_ioctl(struct block_device *bdev, blk_mode_t mode,
unsigned command, unsigned long argument)
{
struct vdc_port *port = bdev->bd_disk->private_data;
diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index 42b4b6828690..f85b6af414b4 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c
@@ -608,20 +608,18 @@ static void setup_medium(struct floppy_state *fs)
}
}
-static int floppy_open(struct block_device *bdev, fmode_t mode)
+static int floppy_open(struct gendisk *disk, blk_mode_t mode)
{
- struct floppy_state *fs = bdev->bd_disk->private_data;
+ struct floppy_state *fs = disk->private_data;
struct swim __iomem *base = fs->swd->base;
int err;
- if (fs->ref_count == -1 || (fs->ref_count && mode & FMODE_EXCL))
+ if (fs->ref_count == -1 || (fs->ref_count && mode & BLK_OPEN_EXCL))
return -EBUSY;
-
- if (mode & FMODE_EXCL)
+ if (mode & BLK_OPEN_EXCL)
fs->ref_count = -1;
else
fs->ref_count++;
-
swim_write(base, setup, S_IBM_DRIVE | S_FCLK_DIV2);
udelay(10);
swim_drive(base, fs->location);
@@ -636,13 +634,13 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
set_capacity(fs->disk, fs->total_secs);
- if (mode & FMODE_NDELAY)
+ if (mode & BLK_OPEN_NDELAY)
return 0;
- if (mode & (FMODE_READ|FMODE_WRITE)) {
- if (bdev_check_media_change(bdev) && fs->disk_in)
+ if (mode & (BLK_OPEN_READ | BLK_OPEN_WRITE)) {
+ if (disk_check_media_change(disk) && fs->disk_in)
fs->ejected = 0;
- if ((mode & FMODE_WRITE) && fs->write_protected) {
+ if ((mode & BLK_OPEN_WRITE) && fs->write_protected) {
err = -EROFS;
goto out;
}
@@ -659,18 +657,18 @@ out:
return err;
}
-static int floppy_unlocked_open(struct block_device *bdev, fmode_t mode)
+static int floppy_unlocked_open(struct gendisk *disk, blk_mode_t mode)
{
int ret;
mutex_lock(&swim_mutex);
- ret = floppy_open(bdev, mode);
+ ret = floppy_open(disk, mode);
mutex_unlock(&swim_mutex);
return ret;
}
-static void floppy_release(struct gendisk *disk, fmode_t mode)
+static void floppy_release(struct gendisk *disk)
{
struct floppy_state *fs = disk->private_data;
struct swim __iomem *base = fs->swd->base;
@@ -686,7 +684,7 @@ static void floppy_release(struct gendisk *disk, fmode_t mode)
mutex_unlock(&swim_mutex);
}
-static int floppy_ioctl(struct block_device *bdev, fmode_t mode,
+static int floppy_ioctl(struct block_device *bdev, blk_mode_t mode,
unsigned int cmd, unsigned long param)
{
struct floppy_state *fs = bdev->bd_disk->private_data;
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index da811a7da03f..dc43a63b3469 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -246,10 +246,9 @@ static int grab_drive(struct floppy_state *fs, enum swim_state state,
int interruptible);
static void release_drive(struct floppy_state *fs);
static int fd_eject(struct floppy_state *fs);
-static int floppy_ioctl(struct block_device *bdev, fmode_t mode,
+static int floppy_ioctl(struct block_device *bdev, blk_mode_t mode,
unsigned int cmd, unsigned long param);
-static int floppy_open(struct block_device *bdev, fmode_t mode);
-static void floppy_release(struct gendisk *disk, fmode_t mode);
+static int floppy_open(struct gendisk *disk, blk_mode_t mode);
static unsigned int floppy_check_events(struct gendisk *disk,
unsigned int clearing);
static int floppy_revalidate(struct gendisk *disk);
@@ -883,7 +882,7 @@ static int fd_eject(struct floppy_state *fs)
static struct floppy_struct floppy_type =
{ 2880,18,2,80,0,0x1B,0x00,0xCF,0x6C,NULL }; /* 7 1.44MB 3.5" */
-static int floppy_locked_ioctl(struct block_device *bdev, fmode_t mode,
+static int floppy_locked_ioctl(struct block_device *bdev, blk_mode_t mode,
unsigned int cmd, unsigned long param)
{
struct floppy_state *fs = bdev->bd_disk->private_data;
@@ -911,7 +910,7 @@ static int floppy_locked_ioctl(struct block_device *bdev, fmode_t mode,
return -ENOTTY;
}
-static int floppy_ioctl(struct block_device *bdev, fmode_t mode,
+static int floppy_ioctl(struct block_device *bdev, blk_mode_t mode,
unsigned int cmd, unsigned long param)
{
int ret;
@@ -923,9 +922,9 @@ static int floppy_ioctl(struct block_device *bdev, fmode_t mode,
return ret;
}
-static int floppy_open(struct block_device *bdev, fmode_t mode)
+static int floppy_open(struct gendisk *disk, blk_mode_t mode)
{
- struct floppy_state *fs = bdev->bd_disk->private_data;
+ struct floppy_state *fs = disk->private_data;
struct swim3 __iomem *sw = fs->swim3;
int n, err = 0;
@@ -958,18 +957,18 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
swim3_action(fs, SETMFM);
swim3_select(fs, RELAX);
- } else if (fs->ref_count == -1 || mode & FMODE_EXCL)
+ } else if (fs->ref_count == -1 || mode & BLK_OPEN_EXCL)
return -EBUSY;
- if (err == 0 && (mode & FMODE_NDELAY) == 0
- && (mode & (FMODE_READ|FMODE_WRITE))) {
- if (bdev_check_media_change(bdev))
- floppy_revalidate(bdev->bd_disk);
+ if (err == 0 && !(mode & BLK_OPEN_NDELAY) &&
+ (mode & (BLK_OPEN_READ | BLK_OPEN_WRITE))) {
+ if (disk_check_media_change(disk))
+ floppy_revalidate(disk);
if (fs->ejected)
err = -ENXIO;
}
- if (err == 0 && (mode & FMODE_WRITE)) {
+ if (err == 0 && (mode & BLK_OPEN_WRITE)) {
if (fs->write_prot < 0)
fs->write_prot = swim3_readbit(fs, WRITE_PROT);
if (fs->write_prot)
@@ -985,7 +984,7 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
return err;
}
- if (mode & FMODE_EXCL)
+ if (mode & BLK_OPEN_EXCL)
fs->ref_count = -1;
else
++fs->ref_count;
@@ -993,18 +992,18 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
return 0;
}
-static int floppy_unlocked_open(struct block_device *bdev, fmode_t mode)
+static int floppy_unlocked_open(struct gendisk *disk, blk_mode_t mode)
{
int ret;
mutex_lock(&swim3_mutex);
- ret = floppy_open(bdev, mode);
+ ret = floppy_open(disk, mode);
mutex_unlock(&swim3_mutex);
return ret;
}
-static void floppy_release(struct gendisk *disk, fmode_t mode)
+static void floppy_release(struct gendisk *disk)
{
struct floppy_state *fs = disk->private_data;
struct swim3 __iomem *sw = fs->swim3;
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 33d3298a0da1..1c823750c95a 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -43,6 +43,7 @@
#include <asm/page.h>
#include <linux/task_work.h>
#include <linux/namei.h>
+#include <linux/kref.h>
#include <uapi/linux/ublk_cmd.h>
#define UBLK_MINORS (1U << MINORBITS)
@@ -54,7 +55,8 @@
| UBLK_F_USER_RECOVERY \
| UBLK_F_USER_RECOVERY_REISSUE \
| UBLK_F_UNPRIVILEGED_DEV \
- | UBLK_F_CMD_IOCTL_ENCODE)
+ | UBLK_F_CMD_IOCTL_ENCODE \
+ | UBLK_F_USER_COPY)
/* All UBLK_PARAM_TYPE_* should be included here */
#define UBLK_PARAM_TYPE_ALL (UBLK_PARAM_TYPE_BASIC | \
@@ -62,7 +64,8 @@
struct ublk_rq_data {
struct llist_node node;
- struct callback_head work;
+
+ struct kref ref;
};
struct ublk_uring_cmd_pdu {
@@ -182,8 +185,13 @@ struct ublk_params_header {
__u32 types;
};
+static inline void __ublk_complete_rq(struct request *req);
+static void ublk_complete_rq(struct kref *ref);
+
static dev_t ublk_chr_devt;
-static struct class *ublk_chr_class;
+static const struct class ublk_chr_class = {
+ .name = "ublk-char",
+};
static DEFINE_IDR(ublk_index_idr);
static DEFINE_SPINLOCK(ublk_idr_lock);
@@ -202,6 +210,23 @@ static unsigned int ublks_added; /* protected by ublk_ctl_mutex */
static struct miscdevice ublk_misc;
+static inline unsigned ublk_pos_to_hwq(loff_t pos)
+{
+ return ((pos - UBLKSRV_IO_BUF_OFFSET) >> UBLK_QID_OFF) &
+ UBLK_QID_BITS_MASK;
+}
+
+static inline unsigned ublk_pos_to_buf_off(loff_t pos)
+{
+ return (pos - UBLKSRV_IO_BUF_OFFSET) & UBLK_IO_BUF_BITS_MASK;
+}
+
+static inline unsigned ublk_pos_to_tag(loff_t pos)
+{
+ return ((pos - UBLKSRV_IO_BUF_OFFSET) >> UBLK_TAG_OFF) &
+ UBLK_TAG_BITS_MASK;
+}
+
static void ublk_dev_param_basic_apply(struct ublk_device *ub)
{
struct request_queue *q = ub->ub_disk->queue;
@@ -290,12 +315,52 @@ static int ublk_apply_params(struct ublk_device *ub)
return 0;
}
-static inline bool ublk_can_use_task_work(const struct ublk_queue *ubq)
+static inline bool ublk_support_user_copy(const struct ublk_queue *ubq)
{
- if (IS_BUILTIN(CONFIG_BLK_DEV_UBLK) &&
- !(ubq->flags & UBLK_F_URING_CMD_COMP_IN_TASK))
- return true;
- return false;
+ return ubq->flags & UBLK_F_USER_COPY;
+}
+
+static inline bool ublk_need_req_ref(const struct ublk_queue *ubq)
+{
+ /*
+ * read()/write() is involved in user copy, so request reference
+ * has to be grabbed
+ */
+ return ublk_support_user_copy(ubq);
+}
+
+static inline void ublk_init_req_ref(const struct ublk_queue *ubq,
+ struct request *req)
+{
+ if (ublk_need_req_ref(ubq)) {
+ struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
+
+ kref_init(&data->ref);
+ }
+}
+
+static inline bool ublk_get_req_ref(const struct ublk_queue *ubq,
+ struct request *req)
+{
+ if (ublk_need_req_ref(ubq)) {
+ struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
+
+ return kref_get_unless_zero(&data->ref);
+ }
+
+ return true;
+}
+
+static inline void ublk_put_req_ref(const struct ublk_queue *ubq,
+ struct request *req)
+{
+ if (ublk_need_req_ref(ubq)) {
+ struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
+
+ kref_put(&data->ref, ublk_complete_rq);
+ } else {
+ __ublk_complete_rq(req);
+ }
}
static inline bool ublk_need_get_data(const struct ublk_queue *ubq)
@@ -384,9 +449,9 @@ static void ublk_store_owner_uid_gid(unsigned int *owner_uid,
*owner_gid = from_kgid(&init_user_ns, gid);
}
-static int ublk_open(struct block_device *bdev, fmode_t mode)
+static int ublk_open(struct gendisk *disk, blk_mode_t mode)
{
- struct ublk_device *ub = bdev->bd_disk->private_data;
+ struct ublk_device *ub = disk->private_data;
if (capable(CAP_SYS_ADMIN))
return 0;
@@ -421,49 +486,39 @@ static const struct block_device_operations ub_fops = {
#define UBLK_MAX_PIN_PAGES 32
-struct ublk_map_data {
- const struct request *rq;
- unsigned long ubuf;
- unsigned int len;
-};
-
struct ublk_io_iter {
struct page *pages[UBLK_MAX_PIN_PAGES];
- unsigned pg_off; /* offset in the 1st page in pages */
- int nr_pages; /* how many page pointers in pages */
struct bio *bio;
struct bvec_iter iter;
};
-static inline unsigned ublk_copy_io_pages(struct ublk_io_iter *data,
- unsigned max_bytes, bool to_vm)
+/* return how many pages are copied */
+static void ublk_copy_io_pages(struct ublk_io_iter *data,
+ size_t total, size_t pg_off, int dir)
{
- const unsigned total = min_t(unsigned, max_bytes,
- PAGE_SIZE - data->pg_off +
- ((data->nr_pages - 1) << PAGE_SHIFT));
unsigned done = 0;
unsigned pg_idx = 0;
while (done < total) {
struct bio_vec bv = bio_iter_iovec(data->bio, data->iter);
- const unsigned int bytes = min3(bv.bv_len, total - done,
- (unsigned)(PAGE_SIZE - data->pg_off));
+ unsigned int bytes = min3(bv.bv_len, (unsigned)total - done,
+ (unsigned)(PAGE_SIZE - pg_off));
void *bv_buf = bvec_kmap_local(&bv);
void *pg_buf = kmap_local_page(data->pages[pg_idx]);
- if (to_vm)
- memcpy(pg_buf + data->pg_off, bv_buf, bytes);
+ if (dir == ITER_DEST)
+ memcpy(pg_buf + pg_off, bv_buf, bytes);
else
- memcpy(bv_buf, pg_buf + data->pg_off, bytes);
+ memcpy(bv_buf, pg_buf + pg_off, bytes);
kunmap_local(pg_buf);
kunmap_local(bv_buf);
/* advance page array */
- data->pg_off += bytes;
- if (data->pg_off == PAGE_SIZE) {
+ pg_off += bytes;
+ if (pg_off == PAGE_SIZE) {
pg_idx += 1;
- data->pg_off = 0;
+ pg_off = 0;
}
done += bytes;
@@ -477,41 +532,58 @@ static inline unsigned ublk_copy_io_pages(struct ublk_io_iter *data,
data->iter = data->bio->bi_iter;
}
}
+}
- return done;
+static bool ublk_advance_io_iter(const struct request *req,
+ struct ublk_io_iter *iter, unsigned int offset)
+{
+ struct bio *bio = req->bio;
+
+ for_each_bio(bio) {
+ if (bio->bi_iter.bi_size > offset) {
+ iter->bio = bio;
+ iter->iter = bio->bi_iter;
+ bio_advance_iter(iter->bio, &iter->iter, offset);
+ return true;
+ }
+ offset -= bio->bi_iter.bi_size;
+ }
+ return false;
}
-static int ublk_copy_user_pages(struct ublk_map_data *data, bool to_vm)
+/*
+ * Copy data between request pages and io_iter, and 'offset'
+ * is the start point of linear offset of request.
+ */
+static size_t ublk_copy_user_pages(const struct request *req,
+ unsigned offset, struct iov_iter *uiter, int dir)
{
- const unsigned int gup_flags = to_vm ? FOLL_WRITE : 0;
- const unsigned long start_vm = data->ubuf;
- unsigned int done = 0;
- struct ublk_io_iter iter = {
- .pg_off = start_vm & (PAGE_SIZE - 1),
- .bio = data->rq->bio,
- .iter = data->rq->bio->bi_iter,
- };
- const unsigned int nr_pages = round_up(data->len +
- (start_vm & (PAGE_SIZE - 1)), PAGE_SIZE) >> PAGE_SHIFT;
-
- while (done < nr_pages) {
- const unsigned to_pin = min_t(unsigned, UBLK_MAX_PIN_PAGES,
- nr_pages - done);
- unsigned i, len;
-
- iter.nr_pages = get_user_pages_fast(start_vm +
- (done << PAGE_SHIFT), to_pin, gup_flags,
- iter.pages);
- if (iter.nr_pages <= 0)
- return done == 0 ? iter.nr_pages : done;
- len = ublk_copy_io_pages(&iter, data->len, to_vm);
- for (i = 0; i < iter.nr_pages; i++) {
- if (to_vm)
+ struct ublk_io_iter iter;
+ size_t done = 0;
+
+ if (!ublk_advance_io_iter(req, &iter, offset))
+ return 0;
+
+ while (iov_iter_count(uiter) && iter.bio) {
+ unsigned nr_pages;
+ ssize_t len;
+ size_t off;
+ int i;
+
+ len = iov_iter_get_pages2(uiter, iter.pages,
+ iov_iter_count(uiter),
+ UBLK_MAX_PIN_PAGES, &off);
+ if (len <= 0)
+ return done;
+
+ ublk_copy_io_pages(&iter, len, off, dir);
+ nr_pages = DIV_ROUND_UP(len + off, PAGE_SIZE);
+ for (i = 0; i < nr_pages; i++) {
+ if (dir == ITER_DEST)
set_page_dirty(iter.pages[i]);
put_page(iter.pages[i]);
}
- data->len -= len;
- done += iter.nr_pages;
+ done += len;
}
return done;
@@ -532,21 +604,23 @@ static int ublk_map_io(const struct ublk_queue *ubq, const struct request *req,
{
const unsigned int rq_bytes = blk_rq_bytes(req);
+ if (ublk_support_user_copy(ubq))
+ return rq_bytes;
+
/*
* no zero copy, we delay copy WRITE request data into ublksrv
* context and the big benefit is that pinning pages in current
* context is pretty fast, see ublk_pin_user_pages
*/
if (ublk_need_map_req(req)) {
- struct ublk_map_data data = {
- .rq = req,
- .ubuf = io->addr,
- .len = rq_bytes,
- };
+ struct iov_iter iter;
+ struct iovec iov;
+ const int dir = ITER_DEST;
- ublk_copy_user_pages(&data, true);
+ import_single_range(dir, u64_to_user_ptr(io->addr), rq_bytes,
+ &iov, &iter);
- return rq_bytes - data.len;
+ return ublk_copy_user_pages(req, 0, &iter, dir);
}
return rq_bytes;
}
@@ -557,18 +631,19 @@ static int ublk_unmap_io(const struct ublk_queue *ubq,
{
const unsigned int rq_bytes = blk_rq_bytes(req);
+ if (ublk_support_user_copy(ubq))
+ return rq_bytes;
+
if (ublk_need_unmap_req(req)) {
- struct ublk_map_data data = {
- .rq = req,
- .ubuf = io->addr,
- .len = io->res,
- };
+ struct iov_iter iter;
+ struct iovec iov;
+ const int dir = ITER_SOURCE;
WARN_ON_ONCE(io->res > rq_bytes);
- ublk_copy_user_pages(&data, false);
-
- return io->res - data.len;
+ import_single_range(dir, u64_to_user_ptr(io->addr), io->res,
+ &iov, &iter);
+ return ublk_copy_user_pages(req, 0, &iter, dir);
}
return rq_bytes;
}
@@ -648,13 +723,19 @@ static inline bool ubq_daemon_is_dying(struct ublk_queue *ubq)
}
/* todo: handle partial completion */
-static void ublk_complete_rq(struct request *req)
+static inline void __ublk_complete_rq(struct request *req)
{
struct ublk_queue *ubq = req->mq_hctx->driver_data;
struct ublk_io *io = &ubq->ios[req->tag];
unsigned int unmapped_bytes;
blk_status_t res = BLK_STS_OK;
+ /* called from ublk_abort_queue() code path */
+ if (io->flags & UBLK_IO_FLAG_ABORTED) {
+ res = BLK_STS_IOERR;
+ goto exit;
+ }
+
/* failed read IO if nothing is read */
if (!io->res && req_op(req) == REQ_OP_READ)
io->res = -EIO;
@@ -694,6 +775,15 @@ exit:
blk_mq_end_request(req, res);
}
+static void ublk_complete_rq(struct kref *ref)
+{
+ struct ublk_rq_data *data = container_of(ref, struct ublk_rq_data,
+ ref);
+ struct request *req = blk_mq_rq_from_pdu(data);
+
+ __ublk_complete_rq(req);
+}
+
/*
* Since __ublk_rq_task_work always fails requests immediately during
* exiting, __ublk_fail_req() is only called from abort context during
@@ -712,7 +802,7 @@ static void __ublk_fail_req(struct ublk_queue *ubq, struct ublk_io *io,
if (ublk_queue_can_use_recovery_reissue(ubq))
blk_mq_requeue_request(req, false);
else
- blk_mq_end_request(req, BLK_STS_IOERR);
+ ublk_put_req_ref(ubq, req);
}
}
@@ -821,6 +911,7 @@ static inline void __ublk_rq_task_work(struct request *req,
mapped_bytes >> 9;
}
+ ublk_init_req_ref(ubq, req);
ubq_complete_io_cmd(io, UBLK_IO_RES_OK, issue_flags);
}
@@ -852,17 +943,6 @@ static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd, unsigned issue_flags)
ublk_forward_io_cmds(ubq, issue_flags);
}
-static void ublk_rq_task_work_fn(struct callback_head *work)
-{
- struct ublk_rq_data *data = container_of(work,
- struct ublk_rq_data, work);
- struct request *req = blk_mq_rq_from_pdu(data);
- struct ublk_queue *ubq = req->mq_hctx->driver_data;
- unsigned issue_flags = IO_URING_F_UNLOCKED;
-
- ublk_forward_io_cmds(ubq, issue_flags);
-}
-
static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq)
{
struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq);
@@ -886,10 +966,6 @@ static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq)
*/
if (unlikely(io->flags & UBLK_IO_FLAG_ABORTED)) {
ublk_abort_io_cmds(ubq);
- } else if (ublk_can_use_task_work(ubq)) {
- if (task_work_add(ubq->ubq_daemon, &data->work,
- TWA_SIGNAL_NO_IPI))
- ublk_abort_io_cmds(ubq);
} else {
struct io_uring_cmd *cmd = io->cmd;
struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
@@ -961,19 +1037,9 @@ static int ublk_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data,
return 0;
}
-static int ublk_init_rq(struct blk_mq_tag_set *set, struct request *req,
- unsigned int hctx_idx, unsigned int numa_node)
-{
- struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
-
- init_task_work(&data->work, ublk_rq_task_work_fn);
- return 0;
-}
-
static const struct blk_mq_ops ublk_mq_ops = {
.queue_rq = ublk_queue_rq,
.init_hctx = ublk_init_hctx,
- .init_request = ublk_init_rq,
.timeout = ublk_timeout,
};
@@ -1050,7 +1116,7 @@ static void ublk_commit_completion(struct ublk_device *ub,
req = blk_mq_tag_to_rq(ub->tag_set.tags[qid], tag);
if (req && likely(!blk_should_fake_timeout(req->q)))
- ublk_complete_rq(req);
+ ublk_put_req_ref(ubq, req);
}
/*
@@ -1295,6 +1361,14 @@ static inline int ublk_check_cmd_op(u32 cmd_op)
return 0;
}
+static inline void ublk_fill_io_cmd(struct ublk_io *io,
+ struct io_uring_cmd *cmd, unsigned long buf_addr)
+{
+ io->cmd = cmd;
+ io->flags |= UBLK_IO_FLAG_ACTIVE;
+ io->addr = buf_addr;
+}
+
static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
unsigned int issue_flags,
const struct ublksrv_io_cmd *ub_cmd)
@@ -1340,6 +1414,11 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
^ (_IOC_NR(cmd_op) == UBLK_IO_NEED_GET_DATA))
goto out;
+ if (ublk_support_user_copy(ubq) && ub_cmd->addr) {
+ ret = -EINVAL;
+ goto out;
+ }
+
ret = ublk_check_cmd_op(cmd_op);
if (ret)
goto out;
@@ -1358,36 +1437,41 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
*/
if (io->flags & UBLK_IO_FLAG_OWNED_BY_SRV)
goto out;
- /* FETCH_RQ has to provide IO buffer if NEED GET DATA is not enabled */
- if (!ub_cmd->addr && !ublk_need_get_data(ubq))
- goto out;
- io->cmd = cmd;
- io->flags |= UBLK_IO_FLAG_ACTIVE;
- io->addr = ub_cmd->addr;
+ if (!ublk_support_user_copy(ubq)) {
+ /*
+ * FETCH_RQ has to provide IO buffer if NEED GET
+ * DATA is not enabled
+ */
+ if (!ub_cmd->addr && !ublk_need_get_data(ubq))
+ goto out;
+ }
+
+ ublk_fill_io_cmd(io, cmd, ub_cmd->addr);
ublk_mark_io_ready(ub, ubq);
break;
case UBLK_IO_COMMIT_AND_FETCH_REQ:
req = blk_mq_tag_to_rq(ub->tag_set.tags[ub_cmd->q_id], tag);
- /*
- * COMMIT_AND_FETCH_REQ has to provide IO buffer if NEED GET DATA is
- * not enabled or it is Read IO.
- */
- if (!ub_cmd->addr && (!ublk_need_get_data(ubq) || req_op(req) == REQ_OP_READ))
- goto out;
+
if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV))
goto out;
- io->addr = ub_cmd->addr;
- io->flags |= UBLK_IO_FLAG_ACTIVE;
- io->cmd = cmd;
+
+ if (!ublk_support_user_copy(ubq)) {
+ /*
+ * COMMIT_AND_FETCH_REQ has to provide IO buffer if
+ * NEED GET DATA is not enabled or it is Read IO.
+ */
+ if (!ub_cmd->addr && (!ublk_need_get_data(ubq) ||
+ req_op(req) == REQ_OP_READ))
+ goto out;
+ }
+ ublk_fill_io_cmd(io, cmd, ub_cmd->addr);
ublk_commit_completion(ub, ub_cmd);
break;
case UBLK_IO_NEED_GET_DATA:
if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV))
goto out;
- io->addr = ub_cmd->addr;
- io->cmd = cmd;
- io->flags |= UBLK_IO_FLAG_ACTIVE;
+ ublk_fill_io_cmd(io, cmd, ub_cmd->addr);
ublk_handle_need_get_data(ub, ub_cmd->q_id, ub_cmd->tag);
break;
default:
@@ -1402,6 +1486,36 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
return -EIOCBQUEUED;
}
+static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub,
+ struct ublk_queue *ubq, int tag, size_t offset)
+{
+ struct request *req;
+
+ if (!ublk_need_req_ref(ubq))
+ return NULL;
+
+ req = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], tag);
+ if (!req)
+ return NULL;
+
+ if (!ublk_get_req_ref(ubq, req))
+ return NULL;
+
+ if (unlikely(!blk_mq_request_started(req) || req->tag != tag))
+ goto fail_put;
+
+ if (!ublk_rq_has_data(req))
+ goto fail_put;
+
+ if (offset > blk_rq_bytes(req))
+ goto fail_put;
+
+ return req;
+fail_put:
+ ublk_put_req_ref(ubq, req);
+ return NULL;
+}
+
static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
{
/*
@@ -1419,11 +1533,112 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
return __ublk_ch_uring_cmd(cmd, issue_flags, &ub_cmd);
}
+static inline bool ublk_check_ubuf_dir(const struct request *req,
+ int ubuf_dir)
+{
+ /* copy ubuf to request pages */
+ if (req_op(req) == REQ_OP_READ && ubuf_dir == ITER_SOURCE)
+ return true;
+
+ /* copy request pages to ubuf */
+ if (req_op(req) == REQ_OP_WRITE && ubuf_dir == ITER_DEST)
+ return true;
+
+ return false;
+}
+
+static struct request *ublk_check_and_get_req(struct kiocb *iocb,
+ struct iov_iter *iter, size_t *off, int dir)
+{
+ struct ublk_device *ub = iocb->ki_filp->private_data;
+ struct ublk_queue *ubq;
+ struct request *req;
+ size_t buf_off;
+ u16 tag, q_id;
+
+ if (!ub)
+ return ERR_PTR(-EACCES);
+
+ if (!user_backed_iter(iter))
+ return ERR_PTR(-EACCES);
+
+ if (ub->dev_info.state == UBLK_S_DEV_DEAD)
+ return ERR_PTR(-EACCES);
+
+ tag = ublk_pos_to_tag(iocb->ki_pos);
+ q_id = ublk_pos_to_hwq(iocb->ki_pos);
+ buf_off = ublk_pos_to_buf_off(iocb->ki_pos);
+
+ if (q_id >= ub->dev_info.nr_hw_queues)
+ return ERR_PTR(-EINVAL);
+
+ ubq = ublk_get_queue(ub, q_id);
+ if (!ubq)
+ return ERR_PTR(-EINVAL);
+
+ if (tag >= ubq->q_depth)
+ return ERR_PTR(-EINVAL);
+
+ req = __ublk_check_and_get_req(ub, ubq, tag, buf_off);
+ if (!req)
+ return ERR_PTR(-EINVAL);
+
+ if (!req->mq_hctx || !req->mq_hctx->driver_data)
+ goto fail;
+
+ if (!ublk_check_ubuf_dir(req, dir))
+ goto fail;
+
+ *off = buf_off;
+ return req;
+fail:
+ ublk_put_req_ref(ubq, req);
+ return ERR_PTR(-EACCES);
+}
+
+static ssize_t ublk_ch_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+ struct ublk_queue *ubq;
+ struct request *req;
+ size_t buf_off;
+ size_t ret;
+
+ req = ublk_check_and_get_req(iocb, to, &buf_off, ITER_DEST);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ ret = ublk_copy_user_pages(req, buf_off, to, ITER_DEST);
+ ubq = req->mq_hctx->driver_data;
+ ublk_put_req_ref(ubq, req);
+
+ return ret;
+}
+
+static ssize_t ublk_ch_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+ struct ublk_queue *ubq;
+ struct request *req;
+ size_t buf_off;
+ size_t ret;
+
+ req = ublk_check_and_get_req(iocb, from, &buf_off, ITER_SOURCE);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ ret = ublk_copy_user_pages(req, buf_off, from, ITER_SOURCE);
+ ubq = req->mq_hctx->driver_data;
+ ublk_put_req_ref(ubq, req);
+
+ return ret;
+}
+
static const struct file_operations ublk_ch_fops = {
.owner = THIS_MODULE,
.open = ublk_ch_open,
.release = ublk_ch_release,
.llseek = no_llseek,
+ .read_iter = ublk_ch_read_iter,
+ .write_iter = ublk_ch_write_iter,
.uring_cmd = ublk_ch_uring_cmd,
.mmap = ublk_ch_mmap,
};
@@ -1547,7 +1762,7 @@ static int ublk_add_chdev(struct ublk_device *ub)
dev->parent = ublk_misc.this_device;
dev->devt = MKDEV(MAJOR(ublk_chr_devt), minor);
- dev->class = ublk_chr_class;
+ dev->class = &ublk_chr_class;
dev->release = ublk_cdev_rel;
device_initialize(dev);
@@ -1818,10 +2033,12 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
*/
ub->dev_info.flags &= UBLK_F_ALL;
- if (!IS_BUILTIN(CONFIG_BLK_DEV_UBLK))
- ub->dev_info.flags |= UBLK_F_URING_CMD_COMP_IN_TASK;
+ ub->dev_info.flags |= UBLK_F_CMD_IOCTL_ENCODE |
+ UBLK_F_URING_CMD_COMP_IN_TASK;
- ub->dev_info.flags |= UBLK_F_CMD_IOCTL_ENCODE;
+ /* GET_DATA isn't needed any more with USER_COPY */
+ if (ub->dev_info.flags & UBLK_F_USER_COPY)
+ ub->dev_info.flags &= ~UBLK_F_NEED_GET_DATA;
/* We are not ready to support zero copy */
ub->dev_info.flags &= ~UBLK_F_SUPPORT_ZERO_COPY;
@@ -2133,6 +2350,21 @@ static int ublk_ctrl_end_recovery(struct ublk_device *ub,
return ret;
}
+static int ublk_ctrl_get_features(struct io_uring_cmd *cmd)
+{
+ const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
+ void __user *argp = (void __user *)(unsigned long)header->addr;
+ u64 features = UBLK_F_ALL & ~UBLK_F_SUPPORT_ZERO_COPY;
+
+ if (header->len != UBLK_FEATURES_LEN || !header->addr)
+ return -EINVAL;
+
+ if (copy_to_user(argp, &features, UBLK_FEATURES_LEN))
+ return -EFAULT;
+
+ return 0;
+}
+
/*
* All control commands are sent via /dev/ublk-control, so we have to check
* the destination device's permission
@@ -2213,6 +2445,7 @@ static int ublk_ctrl_uring_cmd_permission(struct ublk_device *ub,
case UBLK_CMD_GET_DEV_INFO2:
case UBLK_CMD_GET_QUEUE_AFFINITY:
case UBLK_CMD_GET_PARAMS:
+ case (_IOC_NR(UBLK_U_CMD_GET_FEATURES)):
mask = MAY_READ;
break;
case UBLK_CMD_START_DEV:
@@ -2262,6 +2495,11 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
if (ret)
goto out;
+ if (cmd_op == UBLK_U_CMD_GET_FEATURES) {
+ ret = ublk_ctrl_get_features(cmd);
+ goto out;
+ }
+
if (_IOC_NR(cmd_op) != UBLK_CMD_ADD_DEV) {
ret = -ENODEV;
ub = ublk_get_device_from_id(header->dev_id);
@@ -2337,6 +2575,9 @@ static int __init ublk_init(void)
{
int ret;
+ BUILD_BUG_ON((u64)UBLKSRV_IO_BUF_OFFSET +
+ UBLKSRV_IO_BUF_TOTAL_SIZE < UBLKSRV_IO_BUF_OFFSET);
+
init_waitqueue_head(&ublk_idr_wq);
ret = misc_register(&ublk_misc);
@@ -2347,11 +2588,10 @@ static int __init ublk_init(void)
if (ret)
goto unregister_mis;
- ublk_chr_class = class_create("ublk-char");
- if (IS_ERR(ublk_chr_class)) {
- ret = PTR_ERR(ublk_chr_class);
+ ret = class_register(&ublk_chr_class);
+ if (ret)
goto free_chrdev_region;
- }
+
return 0;
free_chrdev_region:
@@ -2369,7 +2609,7 @@ static void __exit ublk_exit(void)
idr_for_each_entry(&ublk_index_idr, ub, id)
ublk_remove(ub);
- class_destroy(ublk_chr_class);
+ class_unregister(&ublk_chr_class);
misc_deregister(&ublk_misc);
idr_destroy(&ublk_index_idr);
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 2b918e28acaa..b47358da92a2 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -348,63 +348,33 @@ static inline void virtblk_request_done(struct request *req)
blk_mq_end_request(req, status);
}
-static void virtblk_complete_batch(struct io_comp_batch *iob)
-{
- struct request *req;
-
- rq_list_for_each(&iob->req_list, req) {
- virtblk_unmap_data(req, blk_mq_rq_to_pdu(req));
- virtblk_cleanup_cmd(req);
- }
- blk_mq_end_request_batch(iob);
-}
-
-static int virtblk_handle_req(struct virtio_blk_vq *vq,
- struct io_comp_batch *iob)
-{
- struct virtblk_req *vbr;
- int req_done = 0;
- unsigned int len;
-
- while ((vbr = virtqueue_get_buf(vq->vq, &len)) != NULL) {
- struct request *req = blk_mq_rq_from_pdu(vbr);
-
- if (likely(!blk_should_fake_timeout(req->q)) &&
- !blk_mq_complete_request_remote(req) &&
- !blk_mq_add_to_batch(req, iob, virtblk_vbr_status(vbr),
- virtblk_complete_batch))
- virtblk_request_done(req);
- req_done++;
- }
-
- return req_done;
-}
-
static void virtblk_done(struct virtqueue *vq)
{
struct virtio_blk *vblk = vq->vdev->priv;
- struct virtio_blk_vq *vblk_vq = &vblk->vqs[vq->index];
- int req_done = 0;
+ bool req_done = false;
+ int qid = vq->index;
+ struct virtblk_req *vbr;
unsigned long flags;
- DEFINE_IO_COMP_BATCH(iob);
+ unsigned int len;
- spin_lock_irqsave(&vblk_vq->lock, flags);
+ spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
do {
virtqueue_disable_cb(vq);
- req_done += virtblk_handle_req(vblk_vq, &iob);
+ while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) {
+ struct request *req = blk_mq_rq_from_pdu(vbr);
+ if (likely(!blk_should_fake_timeout(req->q)))
+ blk_mq_complete_request(req);
+ req_done = true;
+ }
if (unlikely(virtqueue_is_broken(vq)))
break;
} while (!virtqueue_enable_cb(vq));
- if (req_done) {
- if (!rq_list_empty(iob.req_list))
- iob.complete(&iob);
-
- /* In case queue is stopped waiting for more buffers. */
+ /* In case queue is stopped waiting for more buffers. */
+ if (req_done)
blk_mq_start_stopped_hw_queues(vblk->disk->queue, true);
- }
- spin_unlock_irqrestore(&vblk_vq->lock, flags);
+ spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
}
static void virtio_commit_rqs(struct blk_mq_hw_ctx *hctx)
@@ -1283,15 +1253,37 @@ static void virtblk_map_queues(struct blk_mq_tag_set *set)
}
}
+static void virtblk_complete_batch(struct io_comp_batch *iob)
+{
+ struct request *req;
+
+ rq_list_for_each(&iob->req_list, req) {
+ virtblk_unmap_data(req, blk_mq_rq_to_pdu(req));
+ virtblk_cleanup_cmd(req);
+ }
+ blk_mq_end_request_batch(iob);
+}
+
static int virtblk_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
{
struct virtio_blk *vblk = hctx->queue->queuedata;
struct virtio_blk_vq *vq = get_virtio_blk_vq(hctx);
+ struct virtblk_req *vbr;
unsigned long flags;
+ unsigned int len;
int found = 0;
spin_lock_irqsave(&vq->lock, flags);
- found = virtblk_handle_req(vq, iob);
+
+ while ((vbr = virtqueue_get_buf(vq->vq, &len)) != NULL) {
+ struct request *req = blk_mq_rq_from_pdu(vbr);
+
+ found++;
+ if (!blk_mq_complete_request_remote(req) &&
+ !blk_mq_add_to_batch(req, iob, virtblk_vbr_status(vbr),
+ virtblk_complete_batch))
+ virtblk_request_done(req);
+ }
if (found)
blk_mq_start_stopped_hw_queues(vblk->disk->queue, true);
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 4807af1d5805..bb66178c432b 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -473,7 +473,7 @@ static void xenvbd_sysfs_delif(struct xenbus_device *dev)
static void xen_vbd_free(struct xen_vbd *vbd)
{
if (vbd->bdev)
- blkdev_put(vbd->bdev, vbd->readonly ? FMODE_READ : FMODE_WRITE);
+ blkdev_put(vbd->bdev, NULL);
vbd->bdev = NULL;
}
@@ -492,7 +492,7 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
vbd->pdevice = MKDEV(major, minor);
bdev = blkdev_get_by_dev(vbd->pdevice, vbd->readonly ?
- FMODE_READ : FMODE_WRITE, NULL);
+ BLK_OPEN_READ : BLK_OPEN_WRITE, NULL, NULL);
if (IS_ERR(bdev)) {
pr_warn("xen_vbd_create: device %08x could not be opened\n",
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index c1890c8a9f6e..434fab306777 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -509,7 +509,7 @@ static int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg)
return 0;
}
-static int blkif_ioctl(struct block_device *bdev, fmode_t mode,
+static int blkif_ioctl(struct block_device *bdev, blk_mode_t mode,
unsigned command, unsigned long argument)
{
struct blkfront_info *info = bdev->bd_disk->private_data;
diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c
index c1e85f356e4d..11493167b0a8 100644
--- a/drivers/block/z2ram.c
+++ b/drivers/block/z2ram.c
@@ -140,16 +140,14 @@ static void get_chipram(void)
return;
}
-static int z2_open(struct block_device *bdev, fmode_t mode)
+static int z2_open(struct gendisk *disk, blk_mode_t mode)
{
- int device;
+ int device = disk->first_minor;
int max_z2_map = (Z2RAM_SIZE / Z2RAM_CHUNKSIZE) * sizeof(z2ram_map[0]);
int max_chip_map = (amiga_chip_size / Z2RAM_CHUNKSIZE) *
sizeof(z2ram_map[0]);
int rc = -ENOMEM;
- device = MINOR(bdev->bd_dev);
-
mutex_lock(&z2ram_mutex);
if (current_device != -1 && current_device != device) {
rc = -EBUSY;
@@ -290,7 +288,7 @@ err_out:
return rc;
}
-static void z2_release(struct gendisk *disk, fmode_t mode)
+static void z2_release(struct gendisk *disk)
{
mutex_lock(&z2ram_mutex);
if (current_device == -1) {
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index f6d90f1ba5cf..5676e6dd5b16 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -420,7 +420,7 @@ static void reset_bdev(struct zram *zram)
return;
bdev = zram->bdev;
- blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
+ blkdev_put(bdev, zram);
/* hope filp_close flush all of IO */
filp_close(zram->backing_dev, NULL);
zram->backing_dev = NULL;
@@ -507,8 +507,8 @@ static ssize_t backing_dev_store(struct device *dev,
goto out;
}
- bdev = blkdev_get_by_dev(inode->i_rdev,
- FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram);
+ bdev = blkdev_get_by_dev(inode->i_rdev, BLK_OPEN_READ | BLK_OPEN_WRITE,
+ zram, NULL);
if (IS_ERR(bdev)) {
err = PTR_ERR(bdev);
bdev = NULL;
@@ -539,7 +539,7 @@ out:
kvfree(bitmap);
if (bdev)
- blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+ blkdev_put(bdev, zram);
if (backing_dev)
filp_close(backing_dev, NULL);
@@ -700,7 +700,7 @@ static ssize_t writeback_store(struct device *dev,
bio_init(&bio, zram->bdev, &bio_vec, 1,
REQ_OP_WRITE | REQ_SYNC);
bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9);
- bio_add_page(&bio, page, PAGE_SIZE, 0);
+ __bio_add_page(&bio, page, PAGE_SIZE, 0);
/*
* XXX: A single page IO would be inefficient for write
@@ -1753,7 +1753,7 @@ static ssize_t recompress_store(struct device *dev,
}
}
- if (threshold >= PAGE_SIZE)
+ if (threshold >= huge_class_size)
return -EINVAL;
down_read(&zram->init_lock);
@@ -2097,19 +2097,16 @@ static ssize_t reset_store(struct device *dev,
return len;
}
-static int zram_open(struct block_device *bdev, fmode_t mode)
+static int zram_open(struct gendisk *disk, blk_mode_t mode)
{
- int ret = 0;
- struct zram *zram;
+ struct zram *zram = disk->private_data;
- WARN_ON(!mutex_is_locked(&bdev->bd_disk->open_mutex));
+ WARN_ON(!mutex_is_locked(&disk->open_mutex));
- zram = bdev->bd_disk->private_data;
/* zram was claimed to reset so open request fails */
if (zram->claim)
- ret = -EBUSY;
-
- return ret;
+ return -EBUSY;
+ return 0;
}
static const struct block_device_operations zram_devops = {
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 416f723a2dbb..cc2839805983 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -264,6 +264,7 @@
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/mm.h>
+#include <linux/nospec.h>
#include <linux/slab.h>
#include <linux/cdrom.h>
#include <linux/sysctl.h>
@@ -978,15 +979,6 @@ static void cdrom_dvd_rw_close_write(struct cdrom_device_info *cdi)
cdi->media_written = 0;
}
-static int cdrom_close_write(struct cdrom_device_info *cdi)
-{
-#if 0
- return cdrom_flush_cache(cdi);
-#else
- return 0;
-#endif
-}
-
/* badly broken, I know. Is due for a fixup anytime. */
static void cdrom_count_tracks(struct cdrom_device_info *cdi, tracktype *tracks)
{
@@ -1155,8 +1147,7 @@ clean_up_and_return:
* is in their own interest: device control becomes a lot easier
* this way.
*/
-int cdrom_open(struct cdrom_device_info *cdi, struct block_device *bdev,
- fmode_t mode)
+int cdrom_open(struct cdrom_device_info *cdi, blk_mode_t mode)
{
int ret;
@@ -1165,7 +1156,7 @@ int cdrom_open(struct cdrom_device_info *cdi, struct block_device *bdev,
/* if this was a O_NONBLOCK open and we should honor the flags,
* do a quick open without drive/disc integrity checks. */
cdi->use_count++;
- if ((mode & FMODE_NDELAY) && (cdi->options & CDO_USE_FFLAGS)) {
+ if ((mode & BLK_OPEN_NDELAY) && (cdi->options & CDO_USE_FFLAGS)) {
ret = cdi->ops->open(cdi, 1);
} else {
ret = open_for_data(cdi);
@@ -1173,7 +1164,7 @@ int cdrom_open(struct cdrom_device_info *cdi, struct block_device *bdev,
goto err;
if (CDROM_CAN(CDC_GENERIC_PACKET))
cdrom_mmc3_profile(cdi);
- if (mode & FMODE_WRITE) {
+ if (mode & BLK_OPEN_WRITE) {
ret = -EROFS;
if (cdrom_open_write(cdi))
goto err_release;
@@ -1182,6 +1173,7 @@ int cdrom_open(struct cdrom_device_info *cdi, struct block_device *bdev,
ret = 0;
cdi->media_written = 0;
}
+ cdi->opened_for_data = true;
}
if (ret)
@@ -1259,10 +1251,9 @@ static int check_for_audio_disc(struct cdrom_device_info *cdi,
return 0;
}
-void cdrom_release(struct cdrom_device_info *cdi, fmode_t mode)
+void cdrom_release(struct cdrom_device_info *cdi)
{
const struct cdrom_device_ops *cdo = cdi->ops;
- int opened_for_data;
cd_dbg(CD_CLOSE, "entering cdrom_release\n");
@@ -1280,20 +1271,12 @@ void cdrom_release(struct cdrom_device_info *cdi, fmode_t mode)
}
}
- opened_for_data = !(cdi->options & CDO_USE_FFLAGS) ||
- !(mode & FMODE_NDELAY);
-
- /*
- * flush cache on last write release
- */
- if (CDROM_CAN(CDC_RAM) && !cdi->use_count && cdi->for_data)
- cdrom_close_write(cdi);
-
cdo->release(cdi);
- if (cdi->use_count == 0) { /* last process that closes dev*/
- if (opened_for_data &&
- cdi->options & CDO_AUTO_EJECT && CDROM_CAN(CDC_OPEN_TRAY))
+
+ if (cdi->use_count == 0 && cdi->opened_for_data) {
+ if (cdi->options & CDO_AUTO_EJECT && CDROM_CAN(CDC_OPEN_TRAY))
cdo->tray_move(cdi, 1);
+ cdi->opened_for_data = false;
}
}
EXPORT_SYMBOL(cdrom_release);
@@ -2329,6 +2312,9 @@ static int cdrom_ioctl_media_changed(struct cdrom_device_info *cdi,
if (arg >= cdi->capacity)
return -EINVAL;
+ /* Prevent arg from speculatively bypassing the length check */
+ barrier_nospec();
+
info = kmalloc(sizeof(*info), GFP_KERNEL);
if (!info)
return -ENOMEM;
@@ -3337,7 +3323,7 @@ static int mmc_ioctl(struct cdrom_device_info *cdi, unsigned int cmd,
* ATAPI / SCSI specific code now mainly resides in mmc_ioctl().
*/
int cdrom_ioctl(struct cdrom_device_info *cdi, struct block_device *bdev,
- fmode_t mode, unsigned int cmd, unsigned long arg)
+ unsigned int cmd, unsigned long arg)
{
void __user *argp = (void __user *)arg;
int ret;
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index ceded5772aac..3a46e27479ff 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -474,19 +474,19 @@ static const struct cdrom_device_ops gdrom_ops = {
CDC_RESET | CDC_DRIVE_STATUS | CDC_CD_R,
};
-static int gdrom_bdops_open(struct block_device *bdev, fmode_t mode)
+static int gdrom_bdops_open(struct gendisk *disk, blk_mode_t mode)
{
int ret;
- bdev_check_media_change(bdev);
+ disk_check_media_change(disk);
mutex_lock(&gdrom_mutex);
- ret = cdrom_open(gd.cd_info, bdev, mode);
+ ret = cdrom_open(gd.cd_info);
mutex_unlock(&gdrom_mutex);
return ret;
}
-static void gdrom_bdops_release(struct gendisk *disk, fmode_t mode)
+static void gdrom_bdops_release(struct gendisk *disk)
{
mutex_lock(&gdrom_mutex);
cdrom_release(gd.cd_info, mode);
@@ -499,13 +499,13 @@ static unsigned int gdrom_bdops_check_events(struct gendisk *disk,
return cdrom_check_events(gd.cd_info, clearing);
}
-static int gdrom_bdops_ioctl(struct block_device *bdev, fmode_t mode,
+static int gdrom_bdops_ioctl(struct block_device *bdev, blk_mode_t mode,
unsigned cmd, unsigned long arg)
{
int ret;
mutex_lock(&gdrom_mutex);
- ret = cdrom_ioctl(gd.cd_info, bdev, mode, cmd, arg);
+ ret = cdrom_ioctl(gd.cd_info, bdev, cmd, arg);
mutex_unlock(&gdrom_mutex);
return ret;
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 253f2ddb8913..3cb37760dfec 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1546,7 +1546,7 @@ const struct file_operations random_fops = {
.compat_ioctl = compat_ptr_ioctl,
.fasync = random_fasync,
.llseek = noop_llseek,
- .splice_read = generic_file_splice_read,
+ .splice_read = copy_splice_read,
.splice_write = iter_file_splice_write,
};
@@ -1557,7 +1557,7 @@ const struct file_operations urandom_fops = {
.compat_ioctl = compat_ptr_ioctl,
.fasync = random_fasync,
.llseek = noop_llseek,
- .splice_read = generic_file_splice_read,
+ .splice_read = copy_splice_read,
.splice_write = iter_file_splice_write,
};
diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig
index 016814e15536..c0c8e526a1e9 100644
--- a/drivers/clk/Kconfig
+++ b/drivers/clk/Kconfig
@@ -82,7 +82,7 @@ config COMMON_CLK_MAX9485
config COMMON_CLK_RK808
tristate "Clock driver for RK805/RK808/RK809/RK817/RK818"
- depends on MFD_RK808
+ depends on MFD_RK8XX
help
This driver supports RK805, RK809 and RK817, RK808 and RK818 crystal oscillator clock.
These multi-function devices have two fixed-rate oscillators, clocked at 32KHz each.
diff --git a/drivers/clk/clk-rk808.c b/drivers/clk/clk-rk808.c
index 32f833d732ed..f7412b137e5e 100644
--- a/drivers/clk/clk-rk808.c
+++ b/drivers/clk/clk-rk808.c
@@ -12,10 +12,9 @@
#include <linux/slab.h>
#include <linux/platform_device.h>
#include <linux/mfd/rk808.h>
-#include <linux/i2c.h>
struct rk808_clkout {
- struct rk808 *rk808;
+ struct regmap *regmap;
struct clk_hw clkout1_hw;
struct clk_hw clkout2_hw;
};
@@ -31,9 +30,8 @@ static int rk808_clkout2_enable(struct clk_hw *hw, bool enable)
struct rk808_clkout *rk808_clkout = container_of(hw,
struct rk808_clkout,
clkout2_hw);
- struct rk808 *rk808 = rk808_clkout->rk808;
- return regmap_update_bits(rk808->regmap, RK808_CLK32OUT_REG,
+ return regmap_update_bits(rk808_clkout->regmap, RK808_CLK32OUT_REG,
CLK32KOUT2_EN, enable ? CLK32KOUT2_EN : 0);
}
@@ -52,10 +50,9 @@ static int rk808_clkout2_is_prepared(struct clk_hw *hw)
struct rk808_clkout *rk808_clkout = container_of(hw,
struct rk808_clkout,
clkout2_hw);
- struct rk808 *rk808 = rk808_clkout->rk808;
uint32_t val;
- int ret = regmap_read(rk808->regmap, RK808_CLK32OUT_REG, &val);
+ int ret = regmap_read(rk808_clkout->regmap, RK808_CLK32OUT_REG, &val);
if (ret < 0)
return ret;
@@ -93,9 +90,8 @@ static int rk817_clkout2_enable(struct clk_hw *hw, bool enable)
struct rk808_clkout *rk808_clkout = container_of(hw,
struct rk808_clkout,
clkout2_hw);
- struct rk808 *rk808 = rk808_clkout->rk808;
- return regmap_update_bits(rk808->regmap, RK817_SYS_CFG(1),
+ return regmap_update_bits(rk808_clkout->regmap, RK817_SYS_CFG(1),
RK817_CLK32KOUT2_EN,
enable ? RK817_CLK32KOUT2_EN : 0);
}
@@ -115,10 +111,9 @@ static int rk817_clkout2_is_prepared(struct clk_hw *hw)
struct rk808_clkout *rk808_clkout = container_of(hw,
struct rk808_clkout,
clkout2_hw);
- struct rk808 *rk808 = rk808_clkout->rk808;
unsigned int val;
- int ret = regmap_read(rk808->regmap, RK817_SYS_CFG(1), &val);
+ int ret = regmap_read(rk808_clkout->regmap, RK817_SYS_CFG(1), &val);
if (ret < 0)
return 0;
@@ -153,18 +148,21 @@ static const struct clk_ops *rkpmic_get_ops(long variant)
static int rk808_clkout_probe(struct platform_device *pdev)
{
struct rk808 *rk808 = dev_get_drvdata(pdev->dev.parent);
- struct i2c_client *client = rk808->i2c;
- struct device_node *node = client->dev.of_node;
+ struct device *dev = &pdev->dev;
struct clk_init_data init = {};
struct rk808_clkout *rk808_clkout;
int ret;
- rk808_clkout = devm_kzalloc(&client->dev,
+ dev->of_node = pdev->dev.parent->of_node;
+
+ rk808_clkout = devm_kzalloc(dev,
sizeof(*rk808_clkout), GFP_KERNEL);
if (!rk808_clkout)
return -ENOMEM;
- rk808_clkout->rk808 = rk808;
+ rk808_clkout->regmap = dev_get_regmap(pdev->dev.parent, NULL);
+ if (!rk808_clkout->regmap)
+ return -ENODEV;
init.parent_names = NULL;
init.num_parents = 0;
@@ -173,10 +171,10 @@ static int rk808_clkout_probe(struct platform_device *pdev)
rk808_clkout->clkout1_hw.init = &init;
/* optional override of the clockname */
- of_property_read_string_index(node, "clock-output-names",
+ of_property_read_string_index(dev->of_node, "clock-output-names",
0, &init.name);
- ret = devm_clk_hw_register(&client->dev, &rk808_clkout->clkout1_hw);
+ ret = devm_clk_hw_register(dev, &rk808_clkout->clkout1_hw);
if (ret)
return ret;
@@ -185,10 +183,10 @@ static int rk808_clkout_probe(struct platform_device *pdev)
rk808_clkout->clkout2_hw.init = &init;
/* optional override of the clockname */
- of_property_read_string_index(node, "clock-output-names",
+ of_property_read_string_index(dev->of_node, "clock-output-names",
1, &init.name);
- ret = devm_clk_hw_register(&client->dev, &rk808_clkout->clkout2_hw);
+ ret = devm_clk_hw_register(dev, &rk808_clkout->clkout2_hw);
if (ret)
return ret;
diff --git a/drivers/clk/imx/clk-imx1.c b/drivers/clk/imx/clk-imx1.c
index 22fc7491ba00..f6ea7e5052d5 100644
--- a/drivers/clk/imx/clk-imx1.c
+++ b/drivers/clk/imx/clk-imx1.c
@@ -10,7 +10,6 @@
#include <linux/of.h>
#include <linux/of_address.h>
#include <dt-bindings/clock/imx1-clock.h>
-#include <soc/imx/timer.h>
#include <asm/irq.h>
#include "clk.h"
diff --git a/drivers/clk/imx/clk-imx27.c b/drivers/clk/imx/clk-imx27.c
index 5d177125728d..99618ded0939 100644
--- a/drivers/clk/imx/clk-imx27.c
+++ b/drivers/clk/imx/clk-imx27.c
@@ -8,7 +8,6 @@
#include <linux/of_address.h>
#include <dt-bindings/clock/imx27-clock.h>
#include <soc/imx/revision.h>
-#include <soc/imx/timer.h>
#include <asm/irq.h>
#include "clk.h"
diff --git a/drivers/clk/imx/clk-imx31.c b/drivers/clk/imx/clk-imx31.c
index c44e18c6f63f..4c8d9ff0b2ad 100644
--- a/drivers/clk/imx/clk-imx31.c
+++ b/drivers/clk/imx/clk-imx31.c
@@ -11,7 +11,6 @@
#include <linux/of.h>
#include <linux/of_address.h>
#include <soc/imx/revision.h>
-#include <soc/imx/timer.h>
#include <asm/irq.h>
#include "clk.h"
diff --git a/drivers/clk/imx/clk-imx35.c b/drivers/clk/imx/clk-imx35.c
index 7dcbaea3fea3..3b6fdb4e0be7 100644
--- a/drivers/clk/imx/clk-imx35.c
+++ b/drivers/clk/imx/clk-imx35.c
@@ -10,7 +10,6 @@
#include <linux/of.h>
#include <linux/err.h>
#include <soc/imx/revision.h>
-#include <soc/imx/timer.h>
#include <asm/irq.h>
#include "clk.h"
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index 526382dc7482..c4d671a5a13d 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -612,6 +612,15 @@ config TIMER_IMX_SYS_CTR
Enable this option to use i.MX system counter timer as a
clockevent.
+config CLKSRC_LOONGSON1_PWM
+ bool "Clocksource using Loongson1 PWM"
+ depends on MACH_LOONGSON32 || COMPILE_TEST
+ select MIPS_EXTERNAL_TIMER
+ select TIMER_OF
+ help
+ Enable this option to use Loongson1 PWM timer as clocksource
+ instead of the performance counter.
+
config CLKSRC_ST_LPC
bool "Low power clocksource found in the LPC" if COMPILE_TEST
select TIMER_OF if OF
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index f12d3987a960..5d93c9e3fc55 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -89,3 +89,4 @@ obj-$(CONFIG_MICROCHIP_PIT64B) += timer-microchip-pit64b.o
obj-$(CONFIG_MSC313E_TIMER) += timer-msc313e.o
obj-$(CONFIG_GOLDFISH_TIMER) += timer-goldfish.o
obj-$(CONFIG_GXP_TIMER) += timer-gxp.o
+obj-$(CONFIG_CLKSRC_LOONGSON1_PWM) += timer-loongson1-pwm.o
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index e09d4427f604..e733a2a1927a 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -191,22 +191,40 @@ u32 arch_timer_reg_read(int access, enum arch_timer_reg reg,
return val;
}
-static notrace u64 arch_counter_get_cntpct_stable(void)
+static noinstr u64 raw_counter_get_cntpct_stable(void)
{
return __arch_counter_get_cntpct_stable();
}
-static notrace u64 arch_counter_get_cntpct(void)
+static notrace u64 arch_counter_get_cntpct_stable(void)
+{
+ u64 val;
+ preempt_disable_notrace();
+ val = __arch_counter_get_cntpct_stable();
+ preempt_enable_notrace();
+ return val;
+}
+
+static noinstr u64 arch_counter_get_cntpct(void)
{
return __arch_counter_get_cntpct();
}
-static notrace u64 arch_counter_get_cntvct_stable(void)
+static noinstr u64 raw_counter_get_cntvct_stable(void)
{
return __arch_counter_get_cntvct_stable();
}
-static notrace u64 arch_counter_get_cntvct(void)
+static notrace u64 arch_counter_get_cntvct_stable(void)
+{
+ u64 val;
+ preempt_disable_notrace();
+ val = __arch_counter_get_cntvct_stable();
+ preempt_enable_notrace();
+ return val;
+}
+
+static noinstr u64 arch_counter_get_cntvct(void)
{
return __arch_counter_get_cntvct();
}
@@ -753,14 +771,14 @@ static int arch_timer_set_next_event_phys(unsigned long evt,
return 0;
}
-static u64 arch_counter_get_cnt_mem(struct arch_timer *t, int offset_lo)
+static noinstr u64 arch_counter_get_cnt_mem(struct arch_timer *t, int offset_lo)
{
u32 cnt_lo, cnt_hi, tmp_hi;
do {
- cnt_hi = readl_relaxed(t->base + offset_lo + 4);
- cnt_lo = readl_relaxed(t->base + offset_lo);
- tmp_hi = readl_relaxed(t->base + offset_lo + 4);
+ cnt_hi = __le32_to_cpu((__le32 __force)__raw_readl(t->base + offset_lo + 4));
+ cnt_lo = __le32_to_cpu((__le32 __force)__raw_readl(t->base + offset_lo));
+ tmp_hi = __le32_to_cpu((__le32 __force)__raw_readl(t->base + offset_lo + 4));
} while (cnt_hi != tmp_hi);
return ((u64) cnt_hi << 32) | cnt_lo;
@@ -1060,7 +1078,7 @@ bool arch_timer_evtstrm_available(void)
return cpumask_test_cpu(raw_smp_processor_id(), &evtstrm_available);
}
-static u64 arch_counter_get_cntvct_mem(void)
+static noinstr u64 arch_counter_get_cntvct_mem(void)
{
return arch_counter_get_cnt_mem(arch_timer_mem, CNTVCT_LO);
}
@@ -1074,6 +1092,7 @@ struct arch_timer_kvm_info *arch_timer_get_kvm_info(void)
static void __init arch_counter_register(unsigned type)
{
+ u64 (*scr)(void);
u64 start_count;
int width;
@@ -1083,21 +1102,28 @@ static void __init arch_counter_register(unsigned type)
if ((IS_ENABLED(CONFIG_ARM64) && !is_hyp_mode_available()) ||
arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI) {
- if (arch_timer_counter_has_wa())
+ if (arch_timer_counter_has_wa()) {
rd = arch_counter_get_cntvct_stable;
- else
+ scr = raw_counter_get_cntvct_stable;
+ } else {
rd = arch_counter_get_cntvct;
+ scr = arch_counter_get_cntvct;
+ }
} else {
- if (arch_timer_counter_has_wa())
+ if (arch_timer_counter_has_wa()) {
rd = arch_counter_get_cntpct_stable;
- else
+ scr = raw_counter_get_cntpct_stable;
+ } else {
rd = arch_counter_get_cntpct;
+ scr = arch_counter_get_cntpct;
+ }
}
arch_timer_read_counter = rd;
clocksource_counter.vdso_clock_mode = vdso_default;
} else {
arch_timer_read_counter = arch_counter_get_cntvct_mem;
+ scr = arch_counter_get_cntvct_mem;
}
width = arch_counter_get_width();
@@ -1113,7 +1139,7 @@ static void __init arch_counter_register(unsigned type)
timecounter_init(&arch_timer_kvm_info.timecounter,
&cyclecounter, start_count);
- sched_clock_register(arch_timer_read_counter, width, arch_timer_rate);
+ sched_clock_register(scr, width, arch_timer_rate);
}
static void arch_timer_stop(struct clock_event_device *clk)
diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c
index bcd9042a0c9f..e56307a81f4d 100644
--- a/drivers/clocksource/hyperv_timer.c
+++ b/drivers/clocksource/hyperv_timer.c
@@ -365,6 +365,20 @@ void hv_stimer_global_cleanup(void)
}
EXPORT_SYMBOL_GPL(hv_stimer_global_cleanup);
+static __always_inline u64 read_hv_clock_msr(void)
+{
+ /*
+ * Read the partition counter to get the current tick count. This count
+ * is set to 0 when the partition is created and is incremented in 100
+ * nanosecond units.
+ *
+ * Use hv_raw_get_register() because this function is used from
+ * noinstr. Notable; while HV_REGISTER_TIME_REF_COUNT is a synthetic
+ * register it doesn't need the GHCB path.
+ */
+ return hv_raw_get_register(HV_REGISTER_TIME_REF_COUNT);
+}
+
/*
* Code and definitions for the Hyper-V clocksources. Two
* clocksources are defined: one that reads the Hyper-V defined MSR, and
@@ -393,14 +407,20 @@ struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
}
EXPORT_SYMBOL_GPL(hv_get_tsc_page);
-static u64 notrace read_hv_clock_tsc(void)
+static __always_inline u64 read_hv_clock_tsc(void)
{
- u64 current_tick = hv_read_tsc_page(hv_get_tsc_page());
+ u64 cur_tsc, time;
- if (current_tick == U64_MAX)
- current_tick = hv_get_register(HV_REGISTER_TIME_REF_COUNT);
+ /*
+ * The Hyper-V Top-Level Function Spec (TLFS), section Timers,
+ * subsection Refererence Counter, guarantees that the TSC and MSR
+ * times are in sync and monotonic. Therefore we can fall back
+ * to the MSR in case the TSC page indicates unavailability.
+ */
+ if (!hv_read_tsc_page_tsc(tsc_page, &cur_tsc, &time))
+ time = read_hv_clock_msr();
- return current_tick;
+ return time;
}
static u64 notrace read_hv_clock_tsc_cs(struct clocksource *arg)
@@ -408,7 +428,7 @@ static u64 notrace read_hv_clock_tsc_cs(struct clocksource *arg)
return read_hv_clock_tsc();
}
-static u64 notrace read_hv_sched_clock_tsc(void)
+static u64 noinstr read_hv_sched_clock_tsc(void)
{
return (read_hv_clock_tsc() - hv_sched_clock_offset) *
(NSEC_PER_SEC / HV_CLOCK_HZ);
@@ -460,30 +480,14 @@ static struct clocksource hyperv_cs_tsc = {
#endif
};
-static u64 notrace read_hv_clock_msr(void)
-{
- /*
- * Read the partition counter to get the current tick count. This count
- * is set to 0 when the partition is created and is incremented in
- * 100 nanosecond units.
- */
- return hv_get_register(HV_REGISTER_TIME_REF_COUNT);
-}
-
static u64 notrace read_hv_clock_msr_cs(struct clocksource *arg)
{
return read_hv_clock_msr();
}
-static u64 notrace read_hv_sched_clock_msr(void)
-{
- return (read_hv_clock_msr() - hv_sched_clock_offset) *
- (NSEC_PER_SEC / HV_CLOCK_HZ);
-}
-
static struct clocksource hyperv_cs_msr = {
.name = "hyperv_clocksource_msr",
- .rating = 500,
+ .rating = 495,
.read = read_hv_clock_msr_cs,
.mask = CLOCKSOURCE_MASK(64),
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
@@ -513,7 +517,7 @@ static __always_inline void hv_setup_sched_clock(void *sched_clock)
static __always_inline void hv_setup_sched_clock(void *sched_clock) {}
#endif /* CONFIG_GENERIC_SCHED_CLOCK */
-static bool __init hv_init_tsc_clocksource(void)
+static void __init hv_init_tsc_clocksource(void)
{
union hv_reference_tsc_msr tsc_msr;
@@ -524,17 +528,14 @@ static bool __init hv_init_tsc_clocksource(void)
* Hyper-V Reference TSC rating, causing the generic TSC to be used.
* TSC_INVARIANT is not offered on ARM64, so the Hyper-V Reference
* TSC will be preferred over the virtualized ARM64 arch counter.
- * While the Hyper-V MSR clocksource won't be used since the
- * Reference TSC clocksource is present, change its rating as
- * well for consistency.
*/
if (ms_hyperv.features & HV_ACCESS_TSC_INVARIANT) {
hyperv_cs_tsc.rating = 250;
- hyperv_cs_msr.rating = 250;
+ hyperv_cs_msr.rating = 245;
}
if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE))
- return false;
+ return;
hv_read_reference_counter = read_hv_clock_tsc;
@@ -565,33 +566,34 @@ static bool __init hv_init_tsc_clocksource(void)
clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100);
- hv_sched_clock_offset = hv_read_reference_counter();
- hv_setup_sched_clock(read_hv_sched_clock_tsc);
-
- return true;
+ /*
+ * If TSC is invariant, then let it stay as the sched clock since it
+ * will be faster than reading the TSC page. But if not invariant, use
+ * the TSC page so that live migrations across hosts with different
+ * frequencies is handled correctly.
+ */
+ if (!(ms_hyperv.features & HV_ACCESS_TSC_INVARIANT)) {
+ hv_sched_clock_offset = hv_read_reference_counter();
+ hv_setup_sched_clock(read_hv_sched_clock_tsc);
+ }
}
void __init hv_init_clocksource(void)
{
/*
- * Try to set up the TSC page clocksource. If it succeeds, we're
- * done. Otherwise, set up the MSR clocksource. At least one of
- * these will always be available except on very old versions of
- * Hyper-V on x86. In that case we won't have a Hyper-V
+ * Try to set up the TSC page clocksource, then the MSR clocksource.
+ * At least one of these will always be available except on very old
+ * versions of Hyper-V on x86. In that case we won't have a Hyper-V
* clocksource, but Linux will still run with a clocksource based
* on the emulated PIT or LAPIC timer.
+ *
+ * Never use the MSR clocksource as sched clock. It's too slow.
+ * Better to use the native sched clock as the fallback.
*/
- if (hv_init_tsc_clocksource())
- return;
-
- if (!(ms_hyperv.features & HV_MSR_TIME_REF_COUNT_AVAILABLE))
- return;
-
- hv_read_reference_counter = read_hv_clock_msr;
- clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100);
+ hv_init_tsc_clocksource();
- hv_sched_clock_offset = hv_read_reference_counter();
- hv_setup_sched_clock(read_hv_sched_clock_msr);
+ if (ms_hyperv.features & HV_MSR_TIME_REF_COUNT_AVAILABLE)
+ clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100);
}
void __init hv_remap_tsc_clocksource(void)
diff --git a/drivers/clocksource/ingenic-timer.c b/drivers/clocksource/ingenic-timer.c
index 089ce64b1c3f..154ee5f7954a 100644
--- a/drivers/clocksource/ingenic-timer.c
+++ b/drivers/clocksource/ingenic-timer.c
@@ -369,7 +369,7 @@ static int __init ingenic_tcu_probe(struct platform_device *pdev)
return 0;
}
-static int __maybe_unused ingenic_tcu_suspend(struct device *dev)
+static int ingenic_tcu_suspend(struct device *dev)
{
struct ingenic_tcu *tcu = dev_get_drvdata(dev);
unsigned int cpu;
@@ -382,7 +382,7 @@ static int __maybe_unused ingenic_tcu_suspend(struct device *dev)
return 0;
}
-static int __maybe_unused ingenic_tcu_resume(struct device *dev)
+static int ingenic_tcu_resume(struct device *dev)
{
struct ingenic_tcu *tcu = dev_get_drvdata(dev);
unsigned int cpu;
@@ -406,7 +406,7 @@ err_timer_clk_disable:
return ret;
}
-static const struct dev_pm_ops __maybe_unused ingenic_tcu_pm_ops = {
+static const struct dev_pm_ops ingenic_tcu_pm_ops = {
/* _noirq: We want the TCU clocks to be gated last / ungated first */
.suspend_noirq = ingenic_tcu_suspend,
.resume_noirq = ingenic_tcu_resume,
@@ -415,9 +415,7 @@ static const struct dev_pm_ops __maybe_unused ingenic_tcu_pm_ops = {
static struct platform_driver ingenic_tcu_driver = {
.driver = {
.name = "ingenic-tcu-timer",
-#ifdef CONFIG_PM_SLEEP
- .pm = &ingenic_tcu_pm_ops,
-#endif
+ .pm = pm_sleep_ptr(&ingenic_tcu_pm_ops),
.of_match_table = ingenic_tcu_of_match,
},
};
diff --git a/drivers/clocksource/timer-cadence-ttc.c b/drivers/clocksource/timer-cadence-ttc.c
index 4efd0cf3b602..0d52e28fea4d 100644
--- a/drivers/clocksource/timer-cadence-ttc.c
+++ b/drivers/clocksource/timer-cadence-ttc.c
@@ -486,10 +486,10 @@ static int __init ttc_timer_probe(struct platform_device *pdev)
* and use it. Note that the event timer uses the interrupt and it's the
* 2nd TTC hence the irq_of_parse_and_map(,1)
*/
- timer_baseaddr = of_iomap(timer, 0);
- if (!timer_baseaddr) {
+ timer_baseaddr = devm_of_iomap(&pdev->dev, timer, 0, NULL);
+ if (IS_ERR(timer_baseaddr)) {
pr_err("ERROR: invalid timer base address\n");
- return -ENXIO;
+ return PTR_ERR(timer_baseaddr);
}
irq = irq_of_parse_and_map(timer, 1);
@@ -513,20 +513,27 @@ static int __init ttc_timer_probe(struct platform_device *pdev)
clk_ce = of_clk_get(timer, clksel);
if (IS_ERR(clk_ce)) {
pr_err("ERROR: timer input clock not found\n");
- return PTR_ERR(clk_ce);
+ ret = PTR_ERR(clk_ce);
+ goto put_clk_cs;
}
ret = ttc_setup_clocksource(clk_cs, timer_baseaddr, timer_width);
if (ret)
- return ret;
+ goto put_clk_ce;
ret = ttc_setup_clockevent(clk_ce, timer_baseaddr + 4, irq);
if (ret)
- return ret;
+ goto put_clk_ce;
pr_info("%pOFn #0 at %p, irq=%d\n", timer, timer_baseaddr, irq);
return 0;
+
+put_clk_ce:
+ clk_put(clk_ce);
+put_clk_cs:
+ clk_put(clk_cs);
+ return ret;
}
static const struct of_device_id ttc_timer_of_match[] = {
diff --git a/drivers/clocksource/timer-imx-gpt.c b/drivers/clocksource/timer-imx-gpt.c
index ca3e4cbc80c6..28ab4f1a7c71 100644
--- a/drivers/clocksource/timer-imx-gpt.c
+++ b/drivers/clocksource/timer-imx-gpt.c
@@ -16,7 +16,6 @@
#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/of_irq.h>
-#include <soc/imx/timer.h>
/*
* There are 4 versions of the timer hardware on Freescale MXC hardware.
@@ -25,6 +24,12 @@
* - MX25, MX31, MX35, MX37, MX51, MX6Q(rev1.0)
* - MX6DL, MX6SX, MX6Q(rev1.1+)
*/
+enum imx_gpt_type {
+ GPT_TYPE_IMX1, /* i.MX1 */
+ GPT_TYPE_IMX21, /* i.MX21/27 */
+ GPT_TYPE_IMX31, /* i.MX31/35/25/37/51/6Q */
+ GPT_TYPE_IMX6DL, /* i.MX6DL/SX/SL */
+};
/* defines common for all i.MX */
#define MXC_TCTL 0x00
@@ -93,13 +98,11 @@ static void imx1_gpt_irq_disable(struct imx_timer *imxtm)
tmp = readl_relaxed(imxtm->base + MXC_TCTL);
writel_relaxed(tmp & ~MX1_2_TCTL_IRQEN, imxtm->base + MXC_TCTL);
}
-#define imx21_gpt_irq_disable imx1_gpt_irq_disable
static void imx31_gpt_irq_disable(struct imx_timer *imxtm)
{
writel_relaxed(0, imxtm->base + V2_IR);
}
-#define imx6dl_gpt_irq_disable imx31_gpt_irq_disable
static void imx1_gpt_irq_enable(struct imx_timer *imxtm)
{
@@ -108,13 +111,11 @@ static void imx1_gpt_irq_enable(struct imx_timer *imxtm)
tmp = readl_relaxed(imxtm->base + MXC_TCTL);
writel_relaxed(tmp | MX1_2_TCTL_IRQEN, imxtm->base + MXC_TCTL);
}
-#define imx21_gpt_irq_enable imx1_gpt_irq_enable
static void imx31_gpt_irq_enable(struct imx_timer *imxtm)
{
writel_relaxed(1<<0, imxtm->base + V2_IR);
}
-#define imx6dl_gpt_irq_enable imx31_gpt_irq_enable
static void imx1_gpt_irq_acknowledge(struct imx_timer *imxtm)
{
@@ -131,7 +132,6 @@ static void imx31_gpt_irq_acknowledge(struct imx_timer *imxtm)
{
writel_relaxed(V2_TSTAT_OF1, imxtm->base + V2_TSTAT);
}
-#define imx6dl_gpt_irq_acknowledge imx31_gpt_irq_acknowledge
static void __iomem *sched_clock_reg;
@@ -296,7 +296,6 @@ static void imx1_gpt_setup_tctl(struct imx_timer *imxtm)
tctl_val = MX1_2_TCTL_FRR | MX1_2_TCTL_CLK_PCLK1 | MXC_TCTL_TEN;
writel_relaxed(tctl_val, imxtm->base + MXC_TCTL);
}
-#define imx21_gpt_setup_tctl imx1_gpt_setup_tctl
static void imx31_gpt_setup_tctl(struct imx_timer *imxtm)
{
@@ -343,10 +342,10 @@ static const struct imx_gpt_data imx21_gpt_data = {
.reg_tstat = MX1_2_TSTAT,
.reg_tcn = MX1_2_TCN,
.reg_tcmp = MX1_2_TCMP,
- .gpt_irq_enable = imx21_gpt_irq_enable,
- .gpt_irq_disable = imx21_gpt_irq_disable,
+ .gpt_irq_enable = imx1_gpt_irq_enable,
+ .gpt_irq_disable = imx1_gpt_irq_disable,
.gpt_irq_acknowledge = imx21_gpt_irq_acknowledge,
- .gpt_setup_tctl = imx21_gpt_setup_tctl,
+ .gpt_setup_tctl = imx1_gpt_setup_tctl,
.set_next_event = mx1_2_set_next_event,
};
@@ -365,9 +364,9 @@ static const struct imx_gpt_data imx6dl_gpt_data = {
.reg_tstat = V2_TSTAT,
.reg_tcn = V2_TCN,
.reg_tcmp = V2_TCMP,
- .gpt_irq_enable = imx6dl_gpt_irq_enable,
- .gpt_irq_disable = imx6dl_gpt_irq_disable,
- .gpt_irq_acknowledge = imx6dl_gpt_irq_acknowledge,
+ .gpt_irq_enable = imx31_gpt_irq_enable,
+ .gpt_irq_disable = imx31_gpt_irq_disable,
+ .gpt_irq_acknowledge = imx31_gpt_irq_acknowledge,
.gpt_setup_tctl = imx6dl_gpt_setup_tctl,
.set_next_event = v2_set_next_event,
};
diff --git a/drivers/clocksource/timer-loongson1-pwm.c b/drivers/clocksource/timer-loongson1-pwm.c
new file mode 100644
index 000000000000..6335fee03017
--- /dev/null
+++ b/drivers/clocksource/timer-loongson1-pwm.c
@@ -0,0 +1,236 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Clocksource driver for Loongson-1 SoC
+ *
+ * Copyright (c) 2023 Keguang Zhang <keguang.zhang@gmail.com>
+ */
+
+#include <linux/clockchips.h>
+#include <linux/interrupt.h>
+#include <linux/sizes.h>
+#include "timer-of.h"
+
+/* Loongson-1 PWM Timer Register Definitions */
+#define PWM_CNTR 0x0
+#define PWM_HRC 0x4
+#define PWM_LRC 0x8
+#define PWM_CTRL 0xc
+
+/* PWM Control Register Bits */
+#define INT_LRC_EN BIT(11)
+#define INT_HRC_EN BIT(10)
+#define CNTR_RST BIT(7)
+#define INT_SR BIT(6)
+#define INT_EN BIT(5)
+#define PWM_SINGLE BIT(4)
+#define PWM_OE BIT(3)
+#define CNT_EN BIT(0)
+
+#define CNTR_WIDTH 24
+
+DEFINE_RAW_SPINLOCK(ls1x_timer_lock);
+
+struct ls1x_clocksource {
+ void __iomem *reg_base;
+ unsigned long ticks_per_jiffy;
+ struct clocksource clksrc;
+};
+
+static inline struct ls1x_clocksource *to_ls1x_clksrc(struct clocksource *c)
+{
+ return container_of(c, struct ls1x_clocksource, clksrc);
+}
+
+static inline void ls1x_pwmtimer_set_period(unsigned int period,
+ struct timer_of *to)
+{
+ writel(period, timer_of_base(to) + PWM_LRC);
+ writel(period, timer_of_base(to) + PWM_HRC);
+}
+
+static inline void ls1x_pwmtimer_clear(struct timer_of *to)
+{
+ writel(0, timer_of_base(to) + PWM_CNTR);
+}
+
+static inline void ls1x_pwmtimer_start(struct timer_of *to)
+{
+ writel((INT_EN | PWM_OE | CNT_EN), timer_of_base(to) + PWM_CTRL);
+}
+
+static inline void ls1x_pwmtimer_stop(struct timer_of *to)
+{
+ writel(0, timer_of_base(to) + PWM_CTRL);
+}
+
+static inline void ls1x_pwmtimer_irq_ack(struct timer_of *to)
+{
+ int val;
+
+ val = readl(timer_of_base(to) + PWM_CTRL);
+ val |= INT_SR;
+ writel(val, timer_of_base(to) + PWM_CTRL);
+}
+
+static irqreturn_t ls1x_clockevent_isr(int irq, void *dev_id)
+{
+ struct clock_event_device *clkevt = dev_id;
+ struct timer_of *to = to_timer_of(clkevt);
+
+ ls1x_pwmtimer_irq_ack(to);
+ ls1x_pwmtimer_clear(to);
+ ls1x_pwmtimer_start(to);
+
+ clkevt->event_handler(clkevt);
+
+ return IRQ_HANDLED;
+}
+
+static int ls1x_clockevent_set_state_periodic(struct clock_event_device *clkevt)
+{
+ struct timer_of *to = to_timer_of(clkevt);
+
+ raw_spin_lock(&ls1x_timer_lock);
+ ls1x_pwmtimer_set_period(timer_of_period(to), to);
+ ls1x_pwmtimer_clear(to);
+ ls1x_pwmtimer_start(to);
+ raw_spin_unlock(&ls1x_timer_lock);
+
+ return 0;
+}
+
+static int ls1x_clockevent_tick_resume(struct clock_event_device *clkevt)
+{
+ raw_spin_lock(&ls1x_timer_lock);
+ ls1x_pwmtimer_start(to_timer_of(clkevt));
+ raw_spin_unlock(&ls1x_timer_lock);
+
+ return 0;
+}
+
+static int ls1x_clockevent_set_state_shutdown(struct clock_event_device *clkevt)
+{
+ raw_spin_lock(&ls1x_timer_lock);
+ ls1x_pwmtimer_stop(to_timer_of(clkevt));
+ raw_spin_unlock(&ls1x_timer_lock);
+
+ return 0;
+}
+
+static int ls1x_clockevent_set_next(unsigned long evt,
+ struct clock_event_device *clkevt)
+{
+ struct timer_of *to = to_timer_of(clkevt);
+
+ raw_spin_lock(&ls1x_timer_lock);
+ ls1x_pwmtimer_set_period(evt, to);
+ ls1x_pwmtimer_clear(to);
+ ls1x_pwmtimer_start(to);
+ raw_spin_unlock(&ls1x_timer_lock);
+
+ return 0;
+}
+
+static struct timer_of ls1x_to = {
+ .flags = TIMER_OF_IRQ | TIMER_OF_BASE | TIMER_OF_CLOCK,
+ .clkevt = {
+ .name = "ls1x-pwmtimer",
+ .features = CLOCK_EVT_FEAT_PERIODIC |
+ CLOCK_EVT_FEAT_ONESHOT,
+ .rating = 300,
+ .set_next_event = ls1x_clockevent_set_next,
+ .set_state_periodic = ls1x_clockevent_set_state_periodic,
+ .set_state_oneshot = ls1x_clockevent_set_state_shutdown,
+ .set_state_shutdown = ls1x_clockevent_set_state_shutdown,
+ .tick_resume = ls1x_clockevent_tick_resume,
+ },
+ .of_irq = {
+ .handler = ls1x_clockevent_isr,
+ .flags = IRQF_TIMER,
+ },
+};
+
+/*
+ * Since the PWM timer overflows every two ticks, its not very useful
+ * to just read by itself. So use jiffies to emulate a free
+ * running counter:
+ */
+static u64 ls1x_clocksource_read(struct clocksource *cs)
+{
+ struct ls1x_clocksource *ls1x_cs = to_ls1x_clksrc(cs);
+ unsigned long flags;
+ int count;
+ u32 jifs;
+ static int old_count;
+ static u32 old_jifs;
+
+ raw_spin_lock_irqsave(&ls1x_timer_lock, flags);
+ /*
+ * Although our caller may have the read side of xtime_lock,
+ * this is now a seqlock, and we are cheating in this routine
+ * by having side effects on state that we cannot undo if
+ * there is a collision on the seqlock and our caller has to
+ * retry. (Namely, old_jifs and old_count.) So we must treat
+ * jiffies as volatile despite the lock. We read jiffies
+ * before latching the timer count to guarantee that although
+ * the jiffies value might be older than the count (that is,
+ * the counter may underflow between the last point where
+ * jiffies was incremented and the point where we latch the
+ * count), it cannot be newer.
+ */
+ jifs = jiffies;
+ /* read the count */
+ count = readl(ls1x_cs->reg_base + PWM_CNTR);
+
+ /*
+ * It's possible for count to appear to go the wrong way for this
+ * reason:
+ *
+ * The timer counter underflows, but we haven't handled the resulting
+ * interrupt and incremented jiffies yet.
+ *
+ * Previous attempts to handle these cases intelligently were buggy, so
+ * we just do the simple thing now.
+ */
+ if (count < old_count && jifs == old_jifs)
+ count = old_count;
+
+ old_count = count;
+ old_jifs = jifs;
+
+ raw_spin_unlock_irqrestore(&ls1x_timer_lock, flags);
+
+ return (u64)(jifs * ls1x_cs->ticks_per_jiffy) + count;
+}
+
+static struct ls1x_clocksource ls1x_clocksource = {
+ .clksrc = {
+ .name = "ls1x-pwmtimer",
+ .rating = 300,
+ .read = ls1x_clocksource_read,
+ .mask = CLOCKSOURCE_MASK(CNTR_WIDTH),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+ },
+};
+
+static int __init ls1x_pwm_clocksource_init(struct device_node *np)
+{
+ struct timer_of *to = &ls1x_to;
+ int ret;
+
+ ret = timer_of_init(np, to);
+ if (ret)
+ return ret;
+
+ clockevents_config_and_register(&to->clkevt, timer_of_rate(to),
+ 0x1, GENMASK(CNTR_WIDTH - 1, 0));
+
+ ls1x_clocksource.reg_base = timer_of_base(to);
+ ls1x_clocksource.ticks_per_jiffy = timer_of_period(to);
+
+ return clocksource_register_hz(&ls1x_clocksource.clksrc,
+ timer_of_rate(to));
+}
+
+TIMER_OF_DECLARE(ls1x_pwm_clocksource, "loongson,ls1b-pwmtimer",
+ ls1x_pwm_clocksource_init);
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index 2c839bd2b051..a1c51abddbc5 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -38,7 +38,7 @@ choice
prompt "Default CPUFreq governor"
default CPU_FREQ_DEFAULT_GOV_USERSPACE if ARM_SA1110_CPUFREQ
default CPU_FREQ_DEFAULT_GOV_SCHEDUTIL if ARM64 || ARM
- default CPU_FREQ_DEFAULT_GOV_SCHEDUTIL if X86_INTEL_PSTATE && SMP
+ default CPU_FREQ_DEFAULT_GOV_SCHEDUTIL if (X86_INTEL_PSTATE || X86_AMD_PSTATE) && SMP
default CPU_FREQ_DEFAULT_GOV_PERFORMANCE
help
This option sets which CPUFreq governor shall be loaded at
diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86
index 00476e94db90..438c9e75a04d 100644
--- a/drivers/cpufreq/Kconfig.x86
+++ b/drivers/cpufreq/Kconfig.x86
@@ -51,6 +51,23 @@ config X86_AMD_PSTATE
If in doubt, say N.
+config X86_AMD_PSTATE_DEFAULT_MODE
+ int "AMD Processor P-State default mode"
+ depends on X86_AMD_PSTATE
+ default 3 if X86_AMD_PSTATE
+ range 1 4
+ help
+ Select the default mode the amd-pstate driver will use on
+ supported hardware.
+ The value set has the following meanings:
+ 1 -> Disabled
+ 2 -> Passive
+ 3 -> Active (EPP)
+ 4 -> Guided
+
+ For details, take a look at:
+ <file:Documentation/admin-guide/pm/amd-pstate.rst>.
+
config X86_AMD_PSTATE_UT
tristate "selftest for AMD Processor P-State driver"
depends on X86 && ACPI_PROCESSOR
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index ddd346a239e0..81fba0dcbee9 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -62,7 +62,8 @@
static struct cpufreq_driver *current_pstate_driver;
static struct cpufreq_driver amd_pstate_driver;
static struct cpufreq_driver amd_pstate_epp_driver;
-static int cppc_state = AMD_PSTATE_DISABLE;
+static int cppc_state = AMD_PSTATE_UNDEFINED;
+static bool cppc_enabled;
/*
* AMD Energy Preference Performance (EPP)
@@ -228,7 +229,28 @@ static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata,
static inline int pstate_enable(bool enable)
{
- return wrmsrl_safe(MSR_AMD_CPPC_ENABLE, enable);
+ int ret, cpu;
+ unsigned long logical_proc_id_mask = 0;
+
+ if (enable == cppc_enabled)
+ return 0;
+
+ for_each_present_cpu(cpu) {
+ unsigned long logical_id = topology_logical_die_id(cpu);
+
+ if (test_bit(logical_id, &logical_proc_id_mask))
+ continue;
+
+ set_bit(logical_id, &logical_proc_id_mask);
+
+ ret = wrmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_ENABLE,
+ enable);
+ if (ret)
+ return ret;
+ }
+
+ cppc_enabled = enable;
+ return 0;
}
static int cppc_enable(bool enable)
@@ -236,6 +258,9 @@ static int cppc_enable(bool enable)
int cpu, ret = 0;
struct cppc_perf_ctrls perf_ctrls;
+ if (enable == cppc_enabled)
+ return 0;
+
for_each_present_cpu(cpu) {
ret = cppc_set_enable(cpu, enable);
if (ret)
@@ -251,6 +276,7 @@ static int cppc_enable(bool enable)
}
}
+ cppc_enabled = enable;
return ret;
}
@@ -1045,6 +1071,26 @@ static const struct attribute_group amd_pstate_global_attr_group = {
.attrs = pstate_global_attributes,
};
+static bool amd_pstate_acpi_pm_profile_server(void)
+{
+ switch (acpi_gbl_FADT.preferred_profile) {
+ case PM_ENTERPRISE_SERVER:
+ case PM_SOHO_SERVER:
+ case PM_PERFORMANCE_SERVER:
+ return true;
+ }
+ return false;
+}
+
+static bool amd_pstate_acpi_pm_profile_undefined(void)
+{
+ if (acpi_gbl_FADT.preferred_profile == PM_UNSPECIFIED)
+ return true;
+ if (acpi_gbl_FADT.preferred_profile >= NR_PM_PROFILES)
+ return true;
+ return false;
+}
+
static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
{
int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
@@ -1102,10 +1148,14 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
policy->max = policy->cpuinfo.max_freq;
/*
- * Set the policy to powersave to provide a valid fallback value in case
+ * Set the policy to provide a valid fallback value in case
* the default cpufreq governor is neither powersave nor performance.
*/
- policy->policy = CPUFREQ_POLICY_POWERSAVE;
+ if (amd_pstate_acpi_pm_profile_server() ||
+ amd_pstate_acpi_pm_profile_undefined())
+ policy->policy = CPUFREQ_POLICY_PERFORMANCE;
+ else
+ policy->policy = CPUFREQ_POLICY_POWERSAVE;
if (boot_cpu_has(X86_FEATURE_CPPC)) {
ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value);
@@ -1356,10 +1406,29 @@ static struct cpufreq_driver amd_pstate_epp_driver = {
.online = amd_pstate_epp_cpu_online,
.suspend = amd_pstate_epp_suspend,
.resume = amd_pstate_epp_resume,
- .name = "amd_pstate_epp",
+ .name = "amd-pstate-epp",
.attr = amd_pstate_epp_attr,
};
+static int __init amd_pstate_set_driver(int mode_idx)
+{
+ if (mode_idx >= AMD_PSTATE_DISABLE && mode_idx < AMD_PSTATE_MAX) {
+ cppc_state = mode_idx;
+ if (cppc_state == AMD_PSTATE_DISABLE)
+ pr_info("driver is explicitly disabled\n");
+
+ if (cppc_state == AMD_PSTATE_ACTIVE)
+ current_pstate_driver = &amd_pstate_epp_driver;
+
+ if (cppc_state == AMD_PSTATE_PASSIVE || cppc_state == AMD_PSTATE_GUIDED)
+ current_pstate_driver = &amd_pstate_driver;
+
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
static int __init amd_pstate_init(void)
{
struct device *dev_root;
@@ -1367,15 +1436,6 @@ static int __init amd_pstate_init(void)
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
return -ENODEV;
- /*
- * by default the pstate driver is disabled to load
- * enable the amd_pstate passive mode driver explicitly
- * with amd_pstate=passive or other modes in kernel command line
- */
- if (cppc_state == AMD_PSTATE_DISABLE) {
- pr_info("driver load is disabled, boot with specific mode to enable this\n");
- return -ENODEV;
- }
if (!acpi_cpc_valid()) {
pr_warn_once("the _CPC object is not present in SBIOS or ACPI disabled\n");
@@ -1386,6 +1446,33 @@ static int __init amd_pstate_init(void)
if (cpufreq_get_current_driver())
return -EEXIST;
+ switch (cppc_state) {
+ case AMD_PSTATE_UNDEFINED:
+ /* Disable on the following configs by default:
+ * 1. Undefined platforms
+ * 2. Server platforms
+ * 3. Shared memory designs
+ */
+ if (amd_pstate_acpi_pm_profile_undefined() ||
+ amd_pstate_acpi_pm_profile_server() ||
+ !boot_cpu_has(X86_FEATURE_CPPC)) {
+ pr_info("driver load is disabled, boot with specific mode to enable this\n");
+ return -ENODEV;
+ }
+ ret = amd_pstate_set_driver(CONFIG_X86_AMD_PSTATE_DEFAULT_MODE);
+ if (ret)
+ return ret;
+ break;
+ case AMD_PSTATE_DISABLE:
+ return -ENODEV;
+ case AMD_PSTATE_PASSIVE:
+ case AMD_PSTATE_ACTIVE:
+ case AMD_PSTATE_GUIDED:
+ break;
+ default:
+ return -EINVAL;
+ }
+
/* capability check */
if (boot_cpu_has(X86_FEATURE_CPPC)) {
pr_debug("AMD CPPC MSR based functionality is supported\n");
@@ -1438,21 +1525,7 @@ static int __init amd_pstate_param(char *str)
size = strlen(str);
mode_idx = get_mode_idx_from_str(str, size);
- if (mode_idx >= AMD_PSTATE_DISABLE && mode_idx < AMD_PSTATE_MAX) {
- cppc_state = mode_idx;
- if (cppc_state == AMD_PSTATE_DISABLE)
- pr_info("driver is explicitly disabled\n");
-
- if (cppc_state == AMD_PSTATE_ACTIVE)
- current_pstate_driver = &amd_pstate_epp_driver;
-
- if (cppc_state == AMD_PSTATE_PASSIVE || cppc_state == AMD_PSTATE_GUIDED)
- current_pstate_driver = &amd_pstate_driver;
-
- return 0;
- }
-
- return -EINVAL;
+ return amd_pstate_set_driver(mode_idx);
}
early_param("amd_pstate", amd_pstate_param);
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 6b52ebe5a890..50bbc969ffe5 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -2828,7 +2828,8 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
(driver_data->setpolicy && (driver_data->target_index ||
driver_data->target)) ||
(!driver_data->get_intermediate != !driver_data->target_intermediate) ||
- (!driver_data->online != !driver_data->offline))
+ (!driver_data->online != !driver_data->offline) ||
+ (driver_data->adjust_perf && !driver_data->fast_switch))
return -EINVAL;
pr_debug("trying to register driver %s\n", driver_data->name);
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 2548ec92faa2..f29182512b98 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -824,6 +824,8 @@ static ssize_t store_energy_performance_preference(
err = cpufreq_start_governor(policy);
if (!ret)
ret = err;
+ } else {
+ ret = 0;
}
}
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 8e929f6602ce..737a026ef58a 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -145,7 +145,7 @@ static noinstr void enter_s2idle_proper(struct cpuidle_driver *drv,
instrumentation_begin();
- time_start = ns_to_ktime(local_clock());
+ time_start = ns_to_ktime(local_clock_noinstr());
tick_freeze();
/*
@@ -169,7 +169,7 @@ static noinstr void enter_s2idle_proper(struct cpuidle_driver *drv,
tick_unfreeze();
start_critical_timings();
- time_end = ns_to_ktime(local_clock());
+ time_end = ns_to_ktime(local_clock_noinstr());
dev->states_usage[index].s2idle_time += ktime_us_delta(time_end, time_start);
dev->states_usage[index].s2idle_usage++;
@@ -243,7 +243,7 @@ noinstr int cpuidle_enter_state(struct cpuidle_device *dev,
sched_idle_set_state(target_state);
trace_cpu_idle(index, dev->cpu);
- time_start = ns_to_ktime(local_clock());
+ time_start = ns_to_ktime(local_clock_noinstr());
stop_critical_timings();
if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) {
@@ -276,7 +276,7 @@ noinstr int cpuidle_enter_state(struct cpuidle_device *dev,
start_critical_timings();
sched_clock_idle_wakeup_event();
- time_end = ns_to_ktime(local_clock());
+ time_end = ns_to_ktime(local_clock_noinstr());
trace_cpu_idle(PWR_EVENT_EXIT, dev->cpu);
/* The cpu is no longer idle or about to enter idle. */
diff --git a/drivers/cpuidle/poll_state.c b/drivers/cpuidle/poll_state.c
index bdcfeaecd228..9b6d90a72601 100644
--- a/drivers/cpuidle/poll_state.c
+++ b/drivers/cpuidle/poll_state.c
@@ -15,7 +15,7 @@ static int __cpuidle poll_idle(struct cpuidle_device *dev,
{
u64 time_start;
- time_start = local_clock();
+ time_start = local_clock_noinstr();
dev->poll_time_limit = false;
@@ -32,7 +32,7 @@ static int __cpuidle poll_idle(struct cpuidle_device *dev,
continue;
loop_count = 0;
- if (local_clock() - time_start > limit) {
+ if (local_clock_noinstr() - time_start > limit) {
dev->poll_time_limit = true;
break;
}
diff --git a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c
index 10fe9f73a5fb..f2dd66706c10 100644
--- a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c
+++ b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c
@@ -8,7 +8,7 @@
* keysize in CBC and ECB mode.
* Add support also for DES and 3DES in CBC and ECB mode.
*
- * You could find the datasheet in Documentation/arm/sunxi.rst
+ * You could find the datasheet in Documentation/arch/arm/sunxi.rst
*/
#include "sun4i-ss.h"
diff --git a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-core.c b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-core.c
index 006e40133c28..51a3a7b5b985 100644
--- a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-core.c
+++ b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-core.c
@@ -6,7 +6,7 @@
*
* Core file which registers crypto algorithms supported by the SS.
*
- * You could find a link for the datasheet in Documentation/arm/sunxi.rst
+ * You could find a link for the datasheet in Documentation/arch/arm/sunxi.rst
*/
#include <linux/clk.h>
#include <linux/crypto.h>
diff --git a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-hash.c b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-hash.c
index d28292762b32..f7893e4ac59d 100644
--- a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-hash.c
+++ b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-hash.c
@@ -6,7 +6,7 @@
*
* This file add support for MD5 and SHA1.
*
- * You could find the datasheet in Documentation/arm/sunxi.rst
+ * You could find the datasheet in Documentation/arch/arm/sunxi.rst
*/
#include "sun4i-ss.h"
#include <asm/unaligned.h>
diff --git a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss.h b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss.h
index ba59c7a48825..6c5d4aa6453c 100644
--- a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss.h
+++ b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss.h
@@ -8,7 +8,7 @@
* Support MD5 and SHA1 hash algorithms.
* Support DES and 3DES
*
- * You could find the datasheet in Documentation/arm/sunxi.rst
+ * You could find the datasheet in Documentation/arch/arm/sunxi.rst
*/
#include <linux/clk.h>
diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c
index 74b4e910a38d..c13550090785 100644
--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c
@@ -8,7 +8,7 @@
* This file add support for AES cipher with 128,192,256 bits keysize in
* CBC and ECB mode.
*
- * You could find a link for the datasheet in Documentation/arm/sunxi.rst
+ * You could find a link for the datasheet in Documentation/arch/arm/sunxi.rst
*/
#include <linux/bottom_half.h>
diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c
index a6865ff4d400..07ea0cc82b16 100644
--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c
@@ -7,7 +7,7 @@
*
* Core file which registers crypto algorithms supported by the CryptoEngine.
*
- * You could find a link for the datasheet in Documentation/arm/sunxi.rst
+ * You could find a link for the datasheet in Documentation/arch/arm/sunxi.rst
*/
#include <linux/clk.h>
#include <linux/crypto.h>
diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c
index 8b5b9b9d04c3..930ad157004c 100644
--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c
@@ -7,7 +7,7 @@
*
* This file add support for MD5 and SHA1/SHA224/SHA256/SHA384/SHA512.
*
- * You could find the datasheet in Documentation/arm/sunxi.rst
+ * You could find the datasheet in Documentation/arch/arm/sunxi.rst
*/
#include <linux/bottom_half.h>
#include <linux/dma-mapping.h>
diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-prng.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-prng.c
index b3cc43ea6c8a..80815379f6fc 100644
--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-prng.c
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-prng.c
@@ -7,7 +7,7 @@
*
* This file handle the PRNG
*
- * You could find a link for the datasheet in Documentation/arm/sunxi.rst
+ * You could find a link for the datasheet in Documentation/arch/arm/sunxi.rst
*/
#include "sun8i-ce.h"
#include <linux/dma-mapping.h>
diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c
index e2b9b9104694..9c35f2a83eda 100644
--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c
@@ -7,7 +7,7 @@
*
* This file handle the TRNG
*
- * You could find a link for the datasheet in Documentation/arm/sunxi.rst
+ * You could find a link for the datasheet in Documentation/arch/arm/sunxi.rst
*/
#include "sun8i-ce.h"
#include <linux/dma-mapping.h>
diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c
index 16966cc94e24..381a90fbeaff 100644
--- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c
+++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c
@@ -8,7 +8,7 @@
* This file add support for AES cipher with 128,192,256 bits keysize in
* CBC and ECB mode.
*
- * You could find a link for the datasheet in Documentation/arm/sunxi.rst
+ * You could find a link for the datasheet in Documentation/arch/arm/sunxi.rst
*/
#include <linux/bottom_half.h>
diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c
index c9dc06f97857..3dd844b40ff7 100644
--- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c
+++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c
@@ -7,7 +7,7 @@
*
* Core file which registers crypto algorithms supported by the SecuritySystem
*
- * You could find a link for the datasheet in Documentation/arm/sunxi.rst
+ * You could find a link for the datasheet in Documentation/arch/arm/sunxi.rst
*/
#include <linux/clk.h>
#include <linux/crypto.h>
diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c
index 577bf636f7fb..a4b67d130d11 100644
--- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c
+++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c
@@ -7,7 +7,7 @@
*
* This file add support for MD5 and SHA1/SHA224/SHA256.
*
- * You could find the datasheet in Documentation/arm/sunxi.rst
+ * You could find the datasheet in Documentation/arch/arm/sunxi.rst
*/
#include <linux/bottom_half.h>
#include <linux/dma-mapping.h>
diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-prng.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-prng.c
index 70c7b5d571b8..a923cfc6553f 100644
--- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-prng.c
+++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-prng.c
@@ -7,7 +7,7 @@
*
* This file handle the PRNG found in the SS
*
- * You could find a link for the datasheet in Documentation/arm/sunxi.rst
+ * You could find a link for the datasheet in Documentation/arch/arm/sunxi.rst
*/
#include "sun8i-ss.h"
#include <linux/dma-mapping.h>
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c
index ddf6e913c1c4..30e6acfc93d9 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c
@@ -357,9 +357,9 @@ static int cptpf_vfpf_mbox_init(struct otx2_cptpf_dev *cptpf, int num_vfs)
u64 vfpf_mbox_base;
int err, i;
- cptpf->vfpf_mbox_wq = alloc_workqueue("cpt_vfpf_mailbox",
- WQ_UNBOUND | WQ_HIGHPRI |
- WQ_MEM_RECLAIM, 1);
+ cptpf->vfpf_mbox_wq =
+ alloc_ordered_workqueue("cpt_vfpf_mailbox",
+ WQ_HIGHPRI | WQ_MEM_RECLAIM);
if (!cptpf->vfpf_mbox_wq)
return -ENOMEM;
@@ -453,9 +453,9 @@ static int cptpf_afpf_mbox_init(struct otx2_cptpf_dev *cptpf)
resource_size_t offset;
int err;
- cptpf->afpf_mbox_wq = alloc_workqueue("cpt_afpf_mailbox",
- WQ_UNBOUND | WQ_HIGHPRI |
- WQ_MEM_RECLAIM, 1);
+ cptpf->afpf_mbox_wq =
+ alloc_ordered_workqueue("cpt_afpf_mailbox",
+ WQ_HIGHPRI | WQ_MEM_RECLAIM);
if (!cptpf->afpf_mbox_wq)
return -ENOMEM;
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c
index 392e9fee05e8..6023a7adb70c 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c
@@ -75,9 +75,9 @@ static int cptvf_pfvf_mbox_init(struct otx2_cptvf_dev *cptvf)
resource_size_t offset, size;
int ret;
- cptvf->pfvf_mbox_wq = alloc_workqueue("cpt_pfvf_mailbox",
- WQ_UNBOUND | WQ_HIGHPRI |
- WQ_MEM_RECLAIM, 1);
+ cptvf->pfvf_mbox_wq =
+ alloc_ordered_workqueue("cpt_pfvf_mailbox",
+ WQ_HIGHPRI | WQ_MEM_RECLAIM);
if (!cptvf->pfvf_mbox_wq)
return -ENOMEM;
diff --git a/drivers/devfreq/exynos-bus.c b/drivers/devfreq/exynos-bus.c
index 88414445adf3..245898f1a88e 100644
--- a/drivers/devfreq/exynos-bus.c
+++ b/drivers/devfreq/exynos-bus.c
@@ -518,6 +518,7 @@ static struct platform_driver exynos_bus_platdrv = {
};
module_platform_driver(exynos_bus_platdrv);
+MODULE_SOFTDEP("pre: exynos_ppmu");
MODULE_DESCRIPTION("Generic Exynos Bus frequency driver");
MODULE_AUTHOR("Chanwoo Choi <cw00.choi@samsung.com>");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/devfreq/mtk-cci-devfreq.c b/drivers/devfreq/mtk-cci-devfreq.c
index e5458ada5197..6354622eda65 100644
--- a/drivers/devfreq/mtk-cci-devfreq.c
+++ b/drivers/devfreq/mtk-cci-devfreq.c
@@ -127,7 +127,7 @@ static int mtk_ccifreq_target(struct device *dev, unsigned long *freq,
u32 flags)
{
struct mtk_ccifreq_drv *drv = dev_get_drvdata(dev);
- struct clk *cci_pll = clk_get_parent(drv->cci_clk);
+ struct clk *cci_pll;
struct dev_pm_opp *opp;
unsigned long opp_rate;
int voltage, pre_voltage, inter_voltage, target_voltage, ret;
@@ -139,6 +139,7 @@ static int mtk_ccifreq_target(struct device *dev, unsigned long *freq,
return 0;
inter_voltage = drv->inter_voltage;
+ cci_pll = clk_get_parent(drv->cci_clk);
opp_rate = *freq;
opp = devfreq_recommended_opp(dev, &opp_rate, 1);
diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c
index 01f2e86f3f7c..12cf6bb2e3ce 100644
--- a/drivers/dma-buf/udmabuf.c
+++ b/drivers/dma-buf/udmabuf.c
@@ -12,7 +12,6 @@
#include <linux/shmem_fs.h>
#include <linux/slab.h>
#include <linux/udmabuf.h>
-#include <linux/hugetlb.h>
#include <linux/vmalloc.h>
#include <linux/iosys-map.h>
@@ -207,9 +206,7 @@ static long udmabuf_create(struct miscdevice *device,
struct udmabuf *ubuf;
struct dma_buf *buf;
pgoff_t pgoff, pgcnt, pgidx, pgbuf = 0, pglimit;
- struct page *page, *hpage = NULL;
- pgoff_t subpgoff, maxsubpgs;
- struct hstate *hpstate;
+ struct page *page;
int seals, ret = -EINVAL;
u32 i, flags;
@@ -245,7 +242,7 @@ static long udmabuf_create(struct miscdevice *device,
if (!memfd)
goto err;
mapping = memfd->f_mapping;
- if (!shmem_mapping(mapping) && !is_file_hugepages(memfd))
+ if (!shmem_mapping(mapping))
goto err;
seals = memfd_fcntl(memfd, F_GET_SEALS, 0);
if (seals == -EINVAL)
@@ -256,48 +253,16 @@ static long udmabuf_create(struct miscdevice *device,
goto err;
pgoff = list[i].offset >> PAGE_SHIFT;
pgcnt = list[i].size >> PAGE_SHIFT;
- if (is_file_hugepages(memfd)) {
- hpstate = hstate_file(memfd);
- pgoff = list[i].offset >> huge_page_shift(hpstate);
- subpgoff = (list[i].offset &
- ~huge_page_mask(hpstate)) >> PAGE_SHIFT;
- maxsubpgs = huge_page_size(hpstate) >> PAGE_SHIFT;
- }
for (pgidx = 0; pgidx < pgcnt; pgidx++) {
- if (is_file_hugepages(memfd)) {
- if (!hpage) {
- hpage = find_get_page_flags(mapping, pgoff,
- FGP_ACCESSED);
- if (!hpage) {
- ret = -EINVAL;
- goto err;
- }
- }
- page = hpage + subpgoff;
- get_page(page);
- subpgoff++;
- if (subpgoff == maxsubpgs) {
- put_page(hpage);
- hpage = NULL;
- subpgoff = 0;
- pgoff++;
- }
- } else {
- page = shmem_read_mapping_page(mapping,
- pgoff + pgidx);
- if (IS_ERR(page)) {
- ret = PTR_ERR(page);
- goto err;
- }
+ page = shmem_read_mapping_page(mapping, pgoff + pgidx);
+ if (IS_ERR(page)) {
+ ret = PTR_ERR(page);
+ goto err;
}
ubuf->pages[pgbuf++] = page;
}
fput(memfd);
memfd = NULL;
- if (hpage) {
- put_page(hpage);
- hpage = NULL;
- }
}
exp_info.ops = &udmabuf_ops;
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 68f576700911..110e99b86a66 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -550,4 +550,15 @@ config EDAC_ZYNQMP
Xilinx ZynqMP OCM (On Chip Memory) controller. It can also be
built as a module. In that case it will be called zynqmp_edac.
+config EDAC_NPCM
+ tristate "Nuvoton NPCM DDR Memory Controller"
+ depends on (ARCH_NPCM || COMPILE_TEST)
+ help
+ Support for error detection and correction on the Nuvoton NPCM DDR
+ memory controller.
+
+ The memory controller supports single bit error correction, double bit
+ error detection (in-line ECC in which a section 1/8th of the memory
+ device used to store data is used for ECC storage).
+
endif # EDAC
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index 9b025c5b3061..61945d3113cc 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -84,4 +84,5 @@ obj-$(CONFIG_EDAC_QCOM) += qcom_edac.o
obj-$(CONFIG_EDAC_ASPEED) += aspeed_edac.o
obj-$(CONFIG_EDAC_BLUEFIELD) += bluefield_edac.o
obj-$(CONFIG_EDAC_DMC520) += dmc520_edac.o
+obj-$(CONFIG_EDAC_NPCM) += npcm_edac.o
obj-$(CONFIG_EDAC_ZYNQMP) += zynqmp_edac.o
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 5c4292e65b96..597dae7692b1 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -975,6 +975,74 @@ static int sys_addr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr)
return csrow;
}
+/*
+ * See AMD PPR DF::LclNodeTypeMap
+ *
+ * This register gives information for nodes of the same type within a system.
+ *
+ * Reading this register from a GPU node will tell how many GPU nodes are in the
+ * system and what the lowest AMD Node ID value is for the GPU nodes. Use this
+ * info to fixup the Linux logical "Node ID" value set in the AMD NB code and EDAC.
+ */
+static struct local_node_map {
+ u16 node_count;
+ u16 base_node_id;
+} gpu_node_map;
+
+#define PCI_DEVICE_ID_AMD_MI200_DF_F1 0x14d1
+#define REG_LOCAL_NODE_TYPE_MAP 0x144
+
+/* Local Node Type Map (LNTM) fields */
+#define LNTM_NODE_COUNT GENMASK(27, 16)
+#define LNTM_BASE_NODE_ID GENMASK(11, 0)
+
+static int gpu_get_node_map(void)
+{
+ struct pci_dev *pdev;
+ int ret;
+ u32 tmp;
+
+ /*
+ * Node ID 0 is reserved for CPUs.
+ * Therefore, a non-zero Node ID means we've already cached the values.
+ */
+ if (gpu_node_map.base_node_id)
+ return 0;
+
+ pdev = pci_get_device(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_DF_F1, NULL);
+ if (!pdev) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ ret = pci_read_config_dword(pdev, REG_LOCAL_NODE_TYPE_MAP, &tmp);
+ if (ret)
+ goto out;
+
+ gpu_node_map.node_count = FIELD_GET(LNTM_NODE_COUNT, tmp);
+ gpu_node_map.base_node_id = FIELD_GET(LNTM_BASE_NODE_ID, tmp);
+
+out:
+ pci_dev_put(pdev);
+ return ret;
+}
+
+static int fixup_node_id(int node_id, struct mce *m)
+{
+ /* MCA_IPID[InstanceIdHi] give the AMD Node ID for the bank. */
+ u8 nid = (m->ipid >> 44) & 0xF;
+
+ if (smca_get_bank_type(m->extcpu, m->bank) != SMCA_UMC_V2)
+ return node_id;
+
+ /* Nodes below the GPU base node are CPU nodes and don't need a fixup. */
+ if (nid < gpu_node_map.base_node_id)
+ return node_id;
+
+ /* Convert the hardware-provided AMD Node ID to a Linux logical one. */
+ return nid - gpu_node_map.base_node_id + 1;
+}
+
/* Protect the PCI config register pairs used for DF indirect access. */
static DEFINE_MUTEX(df_indirect_mutex);
@@ -1426,12 +1494,47 @@ static int umc_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt)
return cs_mode;
}
+static int __addr_mask_to_cs_size(u32 addr_mask_orig, unsigned int cs_mode,
+ int csrow_nr, int dimm)
+{
+ u32 msb, weight, num_zero_bits;
+ u32 addr_mask_deinterleaved;
+ int size = 0;
+
+ /*
+ * The number of zero bits in the mask is equal to the number of bits
+ * in a full mask minus the number of bits in the current mask.
+ *
+ * The MSB is the number of bits in the full mask because BIT[0] is
+ * always 0.
+ *
+ * In the special 3 Rank interleaving case, a single bit is flipped
+ * without swapping with the most significant bit. This can be handled
+ * by keeping the MSB where it is and ignoring the single zero bit.
+ */
+ msb = fls(addr_mask_orig) - 1;
+ weight = hweight_long(addr_mask_orig);
+ num_zero_bits = msb - weight - !!(cs_mode & CS_3R_INTERLEAVE);
+
+ /* Take the number of zero bits off from the top of the mask. */
+ addr_mask_deinterleaved = GENMASK_ULL(msb - num_zero_bits, 1);
+
+ edac_dbg(1, "CS%d DIMM%d AddrMasks:\n", csrow_nr, dimm);
+ edac_dbg(1, " Original AddrMask: 0x%x\n", addr_mask_orig);
+ edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", addr_mask_deinterleaved);
+
+ /* Register [31:1] = Address [39:9]. Size is in kBs here. */
+ size = (addr_mask_deinterleaved >> 2) + 1;
+
+ /* Return size in MBs. */
+ return size >> 10;
+}
+
static int umc_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
unsigned int cs_mode, int csrow_nr)
{
- u32 addr_mask_orig, addr_mask_deinterleaved;
- u32 msb, weight, num_zero_bits;
int cs_mask_nr = csrow_nr;
+ u32 addr_mask_orig;
int dimm, size = 0;
/* No Chip Selects are enabled. */
@@ -1475,33 +1578,7 @@ static int umc_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
else
addr_mask_orig = pvt->csels[umc].csmasks[cs_mask_nr];
- /*
- * The number of zero bits in the mask is equal to the number of bits
- * in a full mask minus the number of bits in the current mask.
- *
- * The MSB is the number of bits in the full mask because BIT[0] is
- * always 0.
- *
- * In the special 3 Rank interleaving case, a single bit is flipped
- * without swapping with the most significant bit. This can be handled
- * by keeping the MSB where it is and ignoring the single zero bit.
- */
- msb = fls(addr_mask_orig) - 1;
- weight = hweight_long(addr_mask_orig);
- num_zero_bits = msb - weight - !!(cs_mode & CS_3R_INTERLEAVE);
-
- /* Take the number of zero bits off from the top of the mask. */
- addr_mask_deinterleaved = GENMASK_ULL(msb - num_zero_bits, 1);
-
- edac_dbg(1, "CS%d DIMM%d AddrMasks:\n", csrow_nr, dimm);
- edac_dbg(1, " Original AddrMask: 0x%x\n", addr_mask_orig);
- edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", addr_mask_deinterleaved);
-
- /* Register [31:1] = Address [39:9]. Size is in kBs here. */
- size = (addr_mask_deinterleaved >> 2) + 1;
-
- /* Return size in MBs. */
- return size >> 10;
+ return __addr_mask_to_cs_size(addr_mask_orig, cs_mode, csrow_nr, dimm);
}
static void umc_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl)
@@ -2992,6 +3069,8 @@ static void decode_umc_error(int node_id, struct mce *m)
struct err_info err;
u64 sys_addr;
+ node_id = fixup_node_id(node_id, m);
+
mci = edac_mc_find(node_id);
if (!mci)
return;
@@ -3675,6 +3754,227 @@ static int umc_hw_info_get(struct amd64_pvt *pvt)
return 0;
}
+/*
+ * The CPUs have one channel per UMC, so UMC number is equivalent to a
+ * channel number. The GPUs have 8 channels per UMC, so the UMC number no
+ * longer works as a channel number.
+ *
+ * The channel number within a GPU UMC is given in MCA_IPID[15:12].
+ * However, the IDs are split such that two UMC values go to one UMC, and
+ * the channel numbers are split in two groups of four.
+ *
+ * Refer to comment on gpu_get_umc_base().
+ *
+ * For example,
+ * UMC0 CH[3:0] = 0x0005[3:0]000
+ * UMC0 CH[7:4] = 0x0015[3:0]000
+ * UMC1 CH[3:0] = 0x0025[3:0]000
+ * UMC1 CH[7:4] = 0x0035[3:0]000
+ */
+static void gpu_get_err_info(struct mce *m, struct err_info *err)
+{
+ u8 ch = (m->ipid & GENMASK(31, 0)) >> 20;
+ u8 phy = ((m->ipid >> 12) & 0xf);
+
+ err->channel = ch % 2 ? phy + 4 : phy;
+ err->csrow = phy;
+}
+
+static int gpu_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
+ unsigned int cs_mode, int csrow_nr)
+{
+ u32 addr_mask_orig = pvt->csels[umc].csmasks[csrow_nr];
+
+ return __addr_mask_to_cs_size(addr_mask_orig, cs_mode, csrow_nr, csrow_nr >> 1);
+}
+
+static void gpu_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl)
+{
+ int size, cs_mode, cs = 0;
+
+ edac_printk(KERN_DEBUG, EDAC_MC, "UMC%d chip selects:\n", ctrl);
+
+ cs_mode = CS_EVEN_PRIMARY | CS_ODD_PRIMARY;
+
+ for_each_chip_select(cs, ctrl, pvt) {
+ size = gpu_addr_mask_to_cs_size(pvt, ctrl, cs_mode, cs);
+ amd64_info(EDAC_MC ": %d: %5dMB\n", cs, size);
+ }
+}
+
+static void gpu_dump_misc_regs(struct amd64_pvt *pvt)
+{
+ struct amd64_umc *umc;
+ u32 i;
+
+ for_each_umc(i) {
+ umc = &pvt->umc[i];
+
+ edac_dbg(1, "UMC%d UMC cfg: 0x%x\n", i, umc->umc_cfg);
+ edac_dbg(1, "UMC%d SDP ctrl: 0x%x\n", i, umc->sdp_ctrl);
+ edac_dbg(1, "UMC%d ECC ctrl: 0x%x\n", i, umc->ecc_ctrl);
+ edac_dbg(1, "UMC%d All HBMs support ECC: yes\n", i);
+
+ gpu_debug_display_dimm_sizes(pvt, i);
+ }
+}
+
+static u32 gpu_get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr)
+{
+ u32 nr_pages;
+ int cs_mode = CS_EVEN_PRIMARY | CS_ODD_PRIMARY;
+
+ nr_pages = gpu_addr_mask_to_cs_size(pvt, dct, cs_mode, csrow_nr);
+ nr_pages <<= 20 - PAGE_SHIFT;
+
+ edac_dbg(0, "csrow: %d, channel: %d\n", csrow_nr, dct);
+ edac_dbg(0, "nr_pages/channel: %u\n", nr_pages);
+
+ return nr_pages;
+}
+
+static void gpu_init_csrows(struct mem_ctl_info *mci)
+{
+ struct amd64_pvt *pvt = mci->pvt_info;
+ struct dimm_info *dimm;
+ u8 umc, cs;
+
+ for_each_umc(umc) {
+ for_each_chip_select(cs, umc, pvt) {
+ if (!csrow_enabled(cs, umc, pvt))
+ continue;
+
+ dimm = mci->csrows[umc]->channels[cs]->dimm;
+
+ edac_dbg(1, "MC node: %d, csrow: %d\n",
+ pvt->mc_node_id, cs);
+
+ dimm->nr_pages = gpu_get_csrow_nr_pages(pvt, umc, cs);
+ dimm->edac_mode = EDAC_SECDED;
+ dimm->mtype = MEM_HBM2;
+ dimm->dtype = DEV_X16;
+ dimm->grain = 64;
+ }
+ }
+}
+
+static void gpu_setup_mci_misc_attrs(struct mem_ctl_info *mci)
+{
+ struct amd64_pvt *pvt = mci->pvt_info;
+
+ mci->mtype_cap = MEM_FLAG_HBM2;
+ mci->edac_ctl_cap = EDAC_FLAG_SECDED;
+
+ mci->edac_cap = EDAC_FLAG_EC;
+ mci->mod_name = EDAC_MOD_STR;
+ mci->ctl_name = pvt->ctl_name;
+ mci->dev_name = pci_name(pvt->F3);
+ mci->ctl_page_to_phys = NULL;
+
+ gpu_init_csrows(mci);
+}
+
+/* ECC is enabled by default on GPU nodes */
+static bool gpu_ecc_enabled(struct amd64_pvt *pvt)
+{
+ return true;
+}
+
+static inline u32 gpu_get_umc_base(u8 umc, u8 channel)
+{
+ /*
+ * On CPUs, there is one channel per UMC, so UMC numbering equals
+ * channel numbering. On GPUs, there are eight channels per UMC,
+ * so the channel numbering is different from UMC numbering.
+ *
+ * On CPU nodes channels are selected in 6th nibble
+ * UMC chY[3:0]= [(chY*2 + 1) : (chY*2)]50000;
+ *
+ * On GPU nodes channels are selected in 3rd nibble
+ * HBM chX[3:0]= [Y ]5X[3:0]000;
+ * HBM chX[7:4]= [Y+1]5X[3:0]000
+ */
+ umc *= 2;
+
+ if (channel >= 4)
+ umc++;
+
+ return 0x50000 + (umc << 20) + ((channel % 4) << 12);
+}
+
+static void gpu_read_mc_regs(struct amd64_pvt *pvt)
+{
+ u8 nid = pvt->mc_node_id;
+ struct amd64_umc *umc;
+ u32 i, umc_base;
+
+ /* Read registers from each UMC */
+ for_each_umc(i) {
+ umc_base = gpu_get_umc_base(i, 0);
+ umc = &pvt->umc[i];
+
+ amd_smn_read(nid, umc_base + UMCCH_UMC_CFG, &umc->umc_cfg);
+ amd_smn_read(nid, umc_base + UMCCH_SDP_CTRL, &umc->sdp_ctrl);
+ amd_smn_read(nid, umc_base + UMCCH_ECC_CTRL, &umc->ecc_ctrl);
+ }
+}
+
+static void gpu_read_base_mask(struct amd64_pvt *pvt)
+{
+ u32 base_reg, mask_reg;
+ u32 *base, *mask;
+ int umc, cs;
+
+ for_each_umc(umc) {
+ for_each_chip_select(cs, umc, pvt) {
+ base_reg = gpu_get_umc_base(umc, cs) + UMCCH_BASE_ADDR;
+ base = &pvt->csels[umc].csbases[cs];
+
+ if (!amd_smn_read(pvt->mc_node_id, base_reg, base)) {
+ edac_dbg(0, " DCSB%d[%d]=0x%08x reg: 0x%x\n",
+ umc, cs, *base, base_reg);
+ }
+
+ mask_reg = gpu_get_umc_base(umc, cs) + UMCCH_ADDR_MASK;
+ mask = &pvt->csels[umc].csmasks[cs];
+
+ if (!amd_smn_read(pvt->mc_node_id, mask_reg, mask)) {
+ edac_dbg(0, " DCSM%d[%d]=0x%08x reg: 0x%x\n",
+ umc, cs, *mask, mask_reg);
+ }
+ }
+ }
+}
+
+static void gpu_prep_chip_selects(struct amd64_pvt *pvt)
+{
+ int umc;
+
+ for_each_umc(umc) {
+ pvt->csels[umc].b_cnt = 8;
+ pvt->csels[umc].m_cnt = 8;
+ }
+}
+
+static int gpu_hw_info_get(struct amd64_pvt *pvt)
+{
+ int ret;
+
+ ret = gpu_get_node_map();
+ if (ret)
+ return ret;
+
+ pvt->umc = kcalloc(pvt->max_mcs, sizeof(struct amd64_umc), GFP_KERNEL);
+ if (!pvt->umc)
+ return -ENOMEM;
+
+ gpu_prep_chip_selects(pvt);
+ gpu_read_base_mask(pvt);
+ gpu_read_mc_regs(pvt);
+
+ return 0;
+}
+
static void hw_info_put(struct amd64_pvt *pvt)
{
pci_dev_put(pvt->F1);
@@ -3690,6 +3990,14 @@ static struct low_ops umc_ops = {
.get_err_info = umc_get_err_info,
};
+static struct low_ops gpu_ops = {
+ .hw_info_get = gpu_hw_info_get,
+ .ecc_enabled = gpu_ecc_enabled,
+ .setup_mci_misc_attrs = gpu_setup_mci_misc_attrs,
+ .dump_misc_regs = gpu_dump_misc_regs,
+ .get_err_info = gpu_get_err_info,
+};
+
/* Use Family 16h versions for defaults and adjust as needed below. */
static struct low_ops dct_ops = {
.map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
@@ -3813,9 +4121,27 @@ static int per_family_init(struct amd64_pvt *pvt)
case 0x20 ... 0x2f:
pvt->ctl_name = "F19h_M20h";
break;
+ case 0x30 ... 0x3f:
+ if (pvt->F3->device == PCI_DEVICE_ID_AMD_MI200_DF_F3) {
+ pvt->ctl_name = "MI200";
+ pvt->max_mcs = 4;
+ pvt->ops = &gpu_ops;
+ } else {
+ pvt->ctl_name = "F19h_M30h";
+ pvt->max_mcs = 8;
+ }
+ break;
case 0x50 ... 0x5f:
pvt->ctl_name = "F19h_M50h";
break;
+ case 0x60 ... 0x6f:
+ pvt->ctl_name = "F19h_M60h";
+ pvt->flags.zn_regs_v2 = 1;
+ break;
+ case 0x70 ... 0x7f:
+ pvt->ctl_name = "F19h_M70h";
+ pvt->flags.zn_regs_v2 = 1;
+ break;
case 0xa0 ... 0xaf:
pvt->ctl_name = "F19h_MA0h";
pvt->max_mcs = 12;
@@ -3846,11 +4172,17 @@ static int init_one_instance(struct amd64_pvt *pvt)
struct edac_mc_layer layers[2];
int ret = -ENOMEM;
+ /*
+ * For Heterogeneous family EDAC CHIP_SELECT and CHANNEL layers should
+ * be swapped to fit into the layers.
+ */
layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
- layers[0].size = pvt->csels[0].b_cnt;
+ layers[0].size = (pvt->F3->device == PCI_DEVICE_ID_AMD_MI200_DF_F3) ?
+ pvt->max_mcs : pvt->csels[0].b_cnt;
layers[0].is_virt_csrow = true;
layers[1].type = EDAC_MC_LAYER_CHANNEL;
- layers[1].size = pvt->max_mcs;
+ layers[1].size = (pvt->F3->device == PCI_DEVICE_ID_AMD_MI200_DF_F3) ?
+ pvt->csels[0].b_cnt : pvt->max_mcs;
layers[1].is_virt_csrow = false;
mci = edac_mc_alloc(pvt->mc_node_id, ARRAY_SIZE(layers), layers, 0);
@@ -4074,8 +4406,6 @@ static int __init amd64_edac_init(void)
amd64_err("%s on 32-bit is unsupported. USE AT YOUR OWN RISK!\n", EDAC_MOD_STR);
#endif
- printk(KERN_INFO "AMD64 EDAC driver v%s\n", EDAC_AMD64_VERSION);
-
return 0;
err_pci:
@@ -4121,7 +4451,7 @@ module_exit(amd64_edac_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("SoftwareBitMaker: Doug Thompson, Dave Peterson, Thayne Harbaugh; AMD");
-MODULE_DESCRIPTION("MC support for AMD64 memory controllers - " EDAC_AMD64_VERSION);
+MODULE_DESCRIPTION("MC support for AMD64 memory controllers");
module_param(edac_op_state, int, 0444);
MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index e84fe0d4120a..5a4e4a59682b 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -16,6 +16,7 @@
#include <linux/slab.h>
#include <linux/mmzone.h>
#include <linux/edac.h>
+#include <linux/bitfield.h>
#include <asm/cpu_device_id.h>
#include <asm/msr.h>
#include "edac_module.h"
@@ -85,7 +86,6 @@
* sections 3.5.4 and 3.5.5 for more information.
*/
-#define EDAC_AMD64_VERSION "3.5.0"
#define EDAC_MOD_STR "amd64_edac"
/* Extended Model from CPUID, for CPU Revision numbers */
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index cc5c63feb26a..9215c06783df 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -1186,7 +1186,8 @@ static void decode_smca_error(struct mce *m)
if (xec < smca_mce_descs[bank_type].num_descs)
pr_cont(", %s.\n", smca_mce_descs[bank_type].descs[xec]);
- if (bank_type == SMCA_UMC && xec == 0 && decode_dram_ecc)
+ if ((bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2) &&
+ xec == 0 && decode_dram_ecc)
decode_dram_ecc(topology_die_id(m->extcpu), m);
}
diff --git a/drivers/edac/npcm_edac.c b/drivers/edac/npcm_edac.c
new file mode 100644
index 000000000000..12b95be3e989
--- /dev/null
+++ b/drivers/edac/npcm_edac.c
@@ -0,0 +1,543 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (c) 2022 Nuvoton Technology Corporation
+
+#include <linux/debugfs.h>
+#include <linux/iopoll.h>
+#include <linux/of_device.h>
+#include <linux/regmap.h>
+#include "edac_module.h"
+
+#define EDAC_MOD_NAME "npcm-edac"
+#define EDAC_MSG_SIZE 256
+
+/* chip serials */
+#define NPCM7XX_CHIP BIT(0)
+#define NPCM8XX_CHIP BIT(1)
+
+/* syndrome values */
+#define UE_SYNDROME 0x03
+
+/* error injection */
+#define ERROR_TYPE_CORRECTABLE 0
+#define ERROR_TYPE_UNCORRECTABLE 1
+#define ERROR_LOCATION_DATA 0
+#define ERROR_LOCATION_CHECKCODE 1
+#define ERROR_BIT_DATA_MAX 63
+#define ERROR_BIT_CHECKCODE_MAX 7
+
+static char data_synd[] = {
+ 0xf4, 0xf1, 0xec, 0xea, 0xe9, 0xe6, 0xe5, 0xe3,
+ 0xdc, 0xda, 0xd9, 0xd6, 0xd5, 0xd3, 0xce, 0xcb,
+ 0xb5, 0xb0, 0xad, 0xab, 0xa8, 0xa7, 0xa4, 0xa2,
+ 0x9d, 0x9b, 0x98, 0x97, 0x94, 0x92, 0x8f, 0x8a,
+ 0x75, 0x70, 0x6d, 0x6b, 0x68, 0x67, 0x64, 0x62,
+ 0x5e, 0x5b, 0x58, 0x57, 0x54, 0x52, 0x4f, 0x4a,
+ 0x34, 0x31, 0x2c, 0x2a, 0x29, 0x26, 0x25, 0x23,
+ 0x1c, 0x1a, 0x19, 0x16, 0x15, 0x13, 0x0e, 0x0b
+};
+
+static struct regmap *npcm_regmap;
+
+struct npcm_platform_data {
+ /* chip serials */
+ int chip;
+
+ /* memory controller registers */
+ u32 ctl_ecc_en;
+ u32 ctl_int_status;
+ u32 ctl_int_ack;
+ u32 ctl_int_mask_master;
+ u32 ctl_int_mask_ecc;
+ u32 ctl_ce_addr_l;
+ u32 ctl_ce_addr_h;
+ u32 ctl_ce_data_l;
+ u32 ctl_ce_data_h;
+ u32 ctl_ce_synd;
+ u32 ctl_ue_addr_l;
+ u32 ctl_ue_addr_h;
+ u32 ctl_ue_data_l;
+ u32 ctl_ue_data_h;
+ u32 ctl_ue_synd;
+ u32 ctl_source_id;
+ u32 ctl_controller_busy;
+ u32 ctl_xor_check_bits;
+
+ /* masks and shifts */
+ u32 ecc_en_mask;
+ u32 int_status_ce_mask;
+ u32 int_status_ue_mask;
+ u32 int_ack_ce_mask;
+ u32 int_ack_ue_mask;
+ u32 int_mask_master_non_ecc_mask;
+ u32 int_mask_master_global_mask;
+ u32 int_mask_ecc_non_event_mask;
+ u32 ce_addr_h_mask;
+ u32 ce_synd_mask;
+ u32 ce_synd_shift;
+ u32 ue_addr_h_mask;
+ u32 ue_synd_mask;
+ u32 ue_synd_shift;
+ u32 source_id_ce_mask;
+ u32 source_id_ce_shift;
+ u32 source_id_ue_mask;
+ u32 source_id_ue_shift;
+ u32 controller_busy_mask;
+ u32 xor_check_bits_mask;
+ u32 xor_check_bits_shift;
+ u32 writeback_en_mask;
+ u32 fwc_mask;
+};
+
+struct priv_data {
+ void __iomem *reg;
+ char message[EDAC_MSG_SIZE];
+ const struct npcm_platform_data *pdata;
+
+ /* error injection */
+ struct dentry *debugfs;
+ u8 error_type;
+ u8 location;
+ u8 bit;
+};
+
+static void handle_ce(struct mem_ctl_info *mci)
+{
+ struct priv_data *priv = mci->pvt_info;
+ const struct npcm_platform_data *pdata;
+ u32 val_h = 0, val_l, id, synd;
+ u64 addr = 0, data = 0;
+
+ pdata = priv->pdata;
+ regmap_read(npcm_regmap, pdata->ctl_ce_addr_l, &val_l);
+ if (pdata->chip == NPCM8XX_CHIP) {
+ regmap_read(npcm_regmap, pdata->ctl_ce_addr_h, &val_h);
+ val_h &= pdata->ce_addr_h_mask;
+ }
+ addr = ((addr | val_h) << 32) | val_l;
+
+ regmap_read(npcm_regmap, pdata->ctl_ce_data_l, &val_l);
+ if (pdata->chip == NPCM8XX_CHIP)
+ regmap_read(npcm_regmap, pdata->ctl_ce_data_h, &val_h);
+ data = ((data | val_h) << 32) | val_l;
+
+ regmap_read(npcm_regmap, pdata->ctl_source_id, &id);
+ id = (id & pdata->source_id_ce_mask) >> pdata->source_id_ce_shift;
+
+ regmap_read(npcm_regmap, pdata->ctl_ce_synd, &synd);
+ synd = (synd & pdata->ce_synd_mask) >> pdata->ce_synd_shift;
+
+ snprintf(priv->message, EDAC_MSG_SIZE,
+ "addr = 0x%llx, data = 0x%llx, id = 0x%x", addr, data, id);
+
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, addr >> PAGE_SHIFT,
+ addr & ~PAGE_MASK, synd, 0, 0, -1, priv->message, "");
+}
+
+static void handle_ue(struct mem_ctl_info *mci)
+{
+ struct priv_data *priv = mci->pvt_info;
+ const struct npcm_platform_data *pdata;
+ u32 val_h = 0, val_l, id, synd;
+ u64 addr = 0, data = 0;
+
+ pdata = priv->pdata;
+ regmap_read(npcm_regmap, pdata->ctl_ue_addr_l, &val_l);
+ if (pdata->chip == NPCM8XX_CHIP) {
+ regmap_read(npcm_regmap, pdata->ctl_ue_addr_h, &val_h);
+ val_h &= pdata->ue_addr_h_mask;
+ }
+ addr = ((addr | val_h) << 32) | val_l;
+
+ regmap_read(npcm_regmap, pdata->ctl_ue_data_l, &val_l);
+ if (pdata->chip == NPCM8XX_CHIP)
+ regmap_read(npcm_regmap, pdata->ctl_ue_data_h, &val_h);
+ data = ((data | val_h) << 32) | val_l;
+
+ regmap_read(npcm_regmap, pdata->ctl_source_id, &id);
+ id = (id & pdata->source_id_ue_mask) >> pdata->source_id_ue_shift;
+
+ regmap_read(npcm_regmap, pdata->ctl_ue_synd, &synd);
+ synd = (synd & pdata->ue_synd_mask) >> pdata->ue_synd_shift;
+
+ snprintf(priv->message, EDAC_MSG_SIZE,
+ "addr = 0x%llx, data = 0x%llx, id = 0x%x", addr, data, id);
+
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, addr >> PAGE_SHIFT,
+ addr & ~PAGE_MASK, synd, 0, 0, -1, priv->message, "");
+}
+
+static irqreturn_t edac_ecc_isr(int irq, void *dev_id)
+{
+ const struct npcm_platform_data *pdata;
+ struct mem_ctl_info *mci = dev_id;
+ u32 status;
+
+ pdata = ((struct priv_data *)mci->pvt_info)->pdata;
+ regmap_read(npcm_regmap, pdata->ctl_int_status, &status);
+ if (status & pdata->int_status_ce_mask) {
+ handle_ce(mci);
+
+ /* acknowledge the CE interrupt */
+ regmap_write(npcm_regmap, pdata->ctl_int_ack,
+ pdata->int_ack_ce_mask);
+ return IRQ_HANDLED;
+ } else if (status & pdata->int_status_ue_mask) {
+ handle_ue(mci);
+
+ /* acknowledge the UE interrupt */
+ regmap_write(npcm_regmap, pdata->ctl_int_ack,
+ pdata->int_ack_ue_mask);
+ return IRQ_HANDLED;
+ }
+
+ WARN_ON_ONCE(1);
+ return IRQ_NONE;
+}
+
+static ssize_t force_ecc_error(struct file *file, const char __user *data,
+ size_t count, loff_t *ppos)
+{
+ struct device *dev = file->private_data;
+ struct mem_ctl_info *mci = to_mci(dev);
+ struct priv_data *priv = mci->pvt_info;
+ const struct npcm_platform_data *pdata;
+ u32 val, syndrome;
+ int ret;
+
+ pdata = priv->pdata;
+ edac_printk(KERN_INFO, EDAC_MOD_NAME,
+ "force an ECC error, type = %d, location = %d, bit = %d\n",
+ priv->error_type, priv->location, priv->bit);
+
+ /* ensure no pending writes */
+ ret = regmap_read_poll_timeout(npcm_regmap, pdata->ctl_controller_busy,
+ val, !(val & pdata->controller_busy_mask),
+ 1000, 10000);
+ if (ret) {
+ edac_printk(KERN_INFO, EDAC_MOD_NAME,
+ "wait pending writes timeout\n");
+ return count;
+ }
+
+ regmap_read(npcm_regmap, pdata->ctl_xor_check_bits, &val);
+ val &= ~pdata->xor_check_bits_mask;
+
+ /* write syndrome to XOR_CHECK_BITS */
+ if (priv->error_type == ERROR_TYPE_CORRECTABLE) {
+ if (priv->location == ERROR_LOCATION_DATA &&
+ priv->bit > ERROR_BIT_DATA_MAX) {
+ edac_printk(KERN_INFO, EDAC_MOD_NAME,
+ "data bit should not exceed %d (%d)\n",
+ ERROR_BIT_DATA_MAX, priv->bit);
+ return count;
+ }
+
+ if (priv->location == ERROR_LOCATION_CHECKCODE &&
+ priv->bit > ERROR_BIT_CHECKCODE_MAX) {
+ edac_printk(KERN_INFO, EDAC_MOD_NAME,
+ "checkcode bit should not exceed %d (%d)\n",
+ ERROR_BIT_CHECKCODE_MAX, priv->bit);
+ return count;
+ }
+
+ syndrome = priv->location ? 1 << priv->bit
+ : data_synd[priv->bit];
+
+ regmap_write(npcm_regmap, pdata->ctl_xor_check_bits,
+ val | (syndrome << pdata->xor_check_bits_shift) |
+ pdata->writeback_en_mask);
+ } else if (priv->error_type == ERROR_TYPE_UNCORRECTABLE) {
+ regmap_write(npcm_regmap, pdata->ctl_xor_check_bits,
+ val | (UE_SYNDROME << pdata->xor_check_bits_shift));
+ }
+
+ /* force write check */
+ regmap_update_bits(npcm_regmap, pdata->ctl_xor_check_bits,
+ pdata->fwc_mask, pdata->fwc_mask);
+
+ return count;
+}
+
+static const struct file_operations force_ecc_error_fops = {
+ .open = simple_open,
+ .write = force_ecc_error,
+ .llseek = generic_file_llseek,
+};
+
+/*
+ * Setup debugfs for error injection.
+ *
+ * Nodes:
+ * error_type - 0: CE, 1: UE
+ * location - 0: data, 1: checkcode
+ * bit - 0 ~ 63 for data and 0 ~ 7 for checkcode
+ * force_ecc_error - trigger
+ *
+ * Examples:
+ * 1. Inject a correctable error (CE) at checkcode bit 7.
+ * ~# echo 0 > /sys/kernel/debug/edac/npcm-edac/error_type
+ * ~# echo 1 > /sys/kernel/debug/edac/npcm-edac/location
+ * ~# echo 7 > /sys/kernel/debug/edac/npcm-edac/bit
+ * ~# echo 1 > /sys/kernel/debug/edac/npcm-edac/force_ecc_error
+ *
+ * 2. Inject an uncorrectable error (UE).
+ * ~# echo 1 > /sys/kernel/debug/edac/npcm-edac/error_type
+ * ~# echo 1 > /sys/kernel/debug/edac/npcm-edac/force_ecc_error
+ */
+static void setup_debugfs(struct mem_ctl_info *mci)
+{
+ struct priv_data *priv = mci->pvt_info;
+
+ priv->debugfs = edac_debugfs_create_dir(mci->mod_name);
+ if (!priv->debugfs)
+ return;
+
+ edac_debugfs_create_x8("error_type", 0644, priv->debugfs, &priv->error_type);
+ edac_debugfs_create_x8("location", 0644, priv->debugfs, &priv->location);
+ edac_debugfs_create_x8("bit", 0644, priv->debugfs, &priv->bit);
+ edac_debugfs_create_file("force_ecc_error", 0200, priv->debugfs,
+ &mci->dev, &force_ecc_error_fops);
+}
+
+static int setup_irq(struct mem_ctl_info *mci, struct platform_device *pdev)
+{
+ const struct npcm_platform_data *pdata;
+ int ret, irq;
+
+ pdata = ((struct priv_data *)mci->pvt_info)->pdata;
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0) {
+ edac_printk(KERN_ERR, EDAC_MOD_NAME, "IRQ not defined in DTS\n");
+ return irq;
+ }
+
+ ret = devm_request_irq(&pdev->dev, irq, edac_ecc_isr, 0,
+ dev_name(&pdev->dev), mci);
+ if (ret < 0) {
+ edac_printk(KERN_ERR, EDAC_MOD_NAME, "failed to request IRQ\n");
+ return ret;
+ }
+
+ /* enable the functional group of ECC and mask the others */
+ regmap_write(npcm_regmap, pdata->ctl_int_mask_master,
+ pdata->int_mask_master_non_ecc_mask);
+
+ if (pdata->chip == NPCM8XX_CHIP)
+ regmap_write(npcm_regmap, pdata->ctl_int_mask_ecc,
+ pdata->int_mask_ecc_non_event_mask);
+
+ return 0;
+}
+
+static const struct regmap_config npcm_regmap_cfg = {
+ .reg_bits = 32,
+ .reg_stride = 4,
+ .val_bits = 32,
+};
+
+static int edac_probe(struct platform_device *pdev)
+{
+ const struct npcm_platform_data *pdata;
+ struct device *dev = &pdev->dev;
+ struct edac_mc_layer layers[1];
+ struct mem_ctl_info *mci;
+ struct priv_data *priv;
+ void __iomem *reg;
+ u32 val;
+ int rc;
+
+ reg = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(reg))
+ return PTR_ERR(reg);
+
+ npcm_regmap = devm_regmap_init_mmio(dev, reg, &npcm_regmap_cfg);
+ if (IS_ERR(npcm_regmap))
+ return PTR_ERR(npcm_regmap);
+
+ pdata = of_device_get_match_data(dev);
+ if (!pdata)
+ return -EINVAL;
+
+ /* bail out if ECC is not enabled */
+ regmap_read(npcm_regmap, pdata->ctl_ecc_en, &val);
+ if (!(val & pdata->ecc_en_mask)) {
+ edac_printk(KERN_ERR, EDAC_MOD_NAME, "ECC is not enabled\n");
+ return -EPERM;
+ }
+
+ edac_op_state = EDAC_OPSTATE_INT;
+
+ layers[0].type = EDAC_MC_LAYER_ALL_MEM;
+ layers[0].size = 1;
+
+ mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers,
+ sizeof(struct priv_data));
+ if (!mci)
+ return -ENOMEM;
+
+ mci->pdev = &pdev->dev;
+ priv = mci->pvt_info;
+ priv->reg = reg;
+ priv->pdata = pdata;
+ platform_set_drvdata(pdev, mci);
+
+ mci->mtype_cap = MEM_FLAG_DDR4;
+ mci->edac_ctl_cap = EDAC_FLAG_SECDED;
+ mci->scrub_cap = SCRUB_FLAG_HW_SRC;
+ mci->scrub_mode = SCRUB_HW_SRC;
+ mci->edac_cap = EDAC_FLAG_SECDED;
+ mci->ctl_name = "npcm_ddr_controller";
+ mci->dev_name = dev_name(&pdev->dev);
+ mci->mod_name = EDAC_MOD_NAME;
+ mci->ctl_page_to_phys = NULL;
+
+ rc = setup_irq(mci, pdev);
+ if (rc)
+ goto free_edac_mc;
+
+ rc = edac_mc_add_mc(mci);
+ if (rc)
+ goto free_edac_mc;
+
+ if (IS_ENABLED(CONFIG_EDAC_DEBUG) && pdata->chip == NPCM8XX_CHIP)
+ setup_debugfs(mci);
+
+ return rc;
+
+free_edac_mc:
+ edac_mc_free(mci);
+ return rc;
+}
+
+static int edac_remove(struct platform_device *pdev)
+{
+ struct mem_ctl_info *mci = platform_get_drvdata(pdev);
+ struct priv_data *priv = mci->pvt_info;
+ const struct npcm_platform_data *pdata;
+
+ pdata = priv->pdata;
+ if (IS_ENABLED(CONFIG_EDAC_DEBUG) && pdata->chip == NPCM8XX_CHIP)
+ edac_debugfs_remove_recursive(priv->debugfs);
+
+ edac_mc_del_mc(&pdev->dev);
+ edac_mc_free(mci);
+
+ regmap_write(npcm_regmap, pdata->ctl_int_mask_master,
+ pdata->int_mask_master_global_mask);
+ regmap_update_bits(npcm_regmap, pdata->ctl_ecc_en, pdata->ecc_en_mask, 0);
+
+ return 0;
+}
+
+static const struct npcm_platform_data npcm750_edac = {
+ .chip = NPCM7XX_CHIP,
+
+ /* memory controller registers */
+ .ctl_ecc_en = 0x174,
+ .ctl_int_status = 0x1d0,
+ .ctl_int_ack = 0x1d4,
+ .ctl_int_mask_master = 0x1d8,
+ .ctl_ce_addr_l = 0x188,
+ .ctl_ce_data_l = 0x190,
+ .ctl_ce_synd = 0x18c,
+ .ctl_ue_addr_l = 0x17c,
+ .ctl_ue_data_l = 0x184,
+ .ctl_ue_synd = 0x180,
+ .ctl_source_id = 0x194,
+
+ /* masks and shifts */
+ .ecc_en_mask = BIT(24),
+ .int_status_ce_mask = GENMASK(4, 3),
+ .int_status_ue_mask = GENMASK(6, 5),
+ .int_ack_ce_mask = GENMASK(4, 3),
+ .int_ack_ue_mask = GENMASK(6, 5),
+ .int_mask_master_non_ecc_mask = GENMASK(30, 7) | GENMASK(2, 0),
+ .int_mask_master_global_mask = BIT(31),
+ .ce_synd_mask = GENMASK(6, 0),
+ .ce_synd_shift = 0,
+ .ue_synd_mask = GENMASK(6, 0),
+ .ue_synd_shift = 0,
+ .source_id_ce_mask = GENMASK(29, 16),
+ .source_id_ce_shift = 16,
+ .source_id_ue_mask = GENMASK(13, 0),
+ .source_id_ue_shift = 0,
+};
+
+static const struct npcm_platform_data npcm845_edac = {
+ .chip = NPCM8XX_CHIP,
+
+ /* memory controller registers */
+ .ctl_ecc_en = 0x16c,
+ .ctl_int_status = 0x228,
+ .ctl_int_ack = 0x244,
+ .ctl_int_mask_master = 0x220,
+ .ctl_int_mask_ecc = 0x260,
+ .ctl_ce_addr_l = 0x18c,
+ .ctl_ce_addr_h = 0x190,
+ .ctl_ce_data_l = 0x194,
+ .ctl_ce_data_h = 0x198,
+ .ctl_ce_synd = 0x190,
+ .ctl_ue_addr_l = 0x17c,
+ .ctl_ue_addr_h = 0x180,
+ .ctl_ue_data_l = 0x184,
+ .ctl_ue_data_h = 0x188,
+ .ctl_ue_synd = 0x180,
+ .ctl_source_id = 0x19c,
+ .ctl_controller_busy = 0x20c,
+ .ctl_xor_check_bits = 0x174,
+
+ /* masks and shifts */
+ .ecc_en_mask = GENMASK(17, 16),
+ .int_status_ce_mask = GENMASK(1, 0),
+ .int_status_ue_mask = GENMASK(3, 2),
+ .int_ack_ce_mask = GENMASK(1, 0),
+ .int_ack_ue_mask = GENMASK(3, 2),
+ .int_mask_master_non_ecc_mask = GENMASK(30, 3) | GENMASK(1, 0),
+ .int_mask_master_global_mask = BIT(31),
+ .int_mask_ecc_non_event_mask = GENMASK(8, 4),
+ .ce_addr_h_mask = GENMASK(1, 0),
+ .ce_synd_mask = GENMASK(15, 8),
+ .ce_synd_shift = 8,
+ .ue_addr_h_mask = GENMASK(1, 0),
+ .ue_synd_mask = GENMASK(15, 8),
+ .ue_synd_shift = 8,
+ .source_id_ce_mask = GENMASK(29, 16),
+ .source_id_ce_shift = 16,
+ .source_id_ue_mask = GENMASK(13, 0),
+ .source_id_ue_shift = 0,
+ .controller_busy_mask = BIT(0),
+ .xor_check_bits_mask = GENMASK(23, 16),
+ .xor_check_bits_shift = 16,
+ .writeback_en_mask = BIT(24),
+ .fwc_mask = BIT(8),
+};
+
+static const struct of_device_id npcm_edac_of_match[] = {
+ {
+ .compatible = "nuvoton,npcm750-memory-controller",
+ .data = &npcm750_edac
+ },
+ {
+ .compatible = "nuvoton,npcm845-memory-controller",
+ .data = &npcm845_edac
+ },
+ {},
+};
+
+MODULE_DEVICE_TABLE(of, npcm_edac_of_match);
+
+static struct platform_driver npcm_edac_driver = {
+ .driver = {
+ .name = "npcm-edac",
+ .of_match_table = npcm_edac_of_match,
+ },
+ .probe = edac_probe,
+ .remove = edac_remove,
+};
+
+module_platform_driver(npcm_edac_driver);
+
+MODULE_AUTHOR("Medad CChien <medadyoung@gmail.com>");
+MODULE_AUTHOR("Marvin Lin <kflin@nuvoton.com>");
+MODULE_DESCRIPTION("Nuvoton NPCM EDAC Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/edac/thunderx_edac.c b/drivers/edac/thunderx_edac.c
index 0bcd9f02c84a..b9c5772da959 100644
--- a/drivers/edac/thunderx_edac.c
+++ b/drivers/edac/thunderx_edac.c
@@ -481,7 +481,7 @@ static int thunderx_create_debugfs_nodes(struct dentry *parent,
ent = edac_debugfs_create_file(attrs[i]->name, attrs[i]->mode,
parent, data, &attrs[i]->fops);
- if (!ent)
+ if (IS_ERR(ent))
break;
}
diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig
index 043ca31c114e..231f1c70d1db 100644
--- a/drivers/firmware/efi/Kconfig
+++ b/drivers/firmware/efi/Kconfig
@@ -269,6 +269,20 @@ config EFI_COCO_SECRET
virt/coco/efi_secret module to access the secrets, which in turn
allows userspace programs to access the injected secrets.
+config UNACCEPTED_MEMORY
+ bool
+ depends on EFI_STUB
+ help
+ Some Virtual Machine platforms, such as Intel TDX, require
+ some memory to be "accepted" by the guest before it can be used.
+ This mechanism helps prevent malicious hosts from making changes
+ to guest memory.
+
+ UEFI specification v2.9 introduced EFI_UNACCEPTED_MEMORY memory type.
+
+ This option adds support for unaccepted memory and makes such memory
+ usable by the kernel.
+
config EFI_EMBEDDED_FIRMWARE
bool
select CRYPTO_LIB_SHA256
diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile
index b51f2a4c821e..e489fefd23da 100644
--- a/drivers/firmware/efi/Makefile
+++ b/drivers/firmware/efi/Makefile
@@ -41,3 +41,4 @@ obj-$(CONFIG_EFI_CAPSULE_LOADER) += capsule-loader.o
obj-$(CONFIG_EFI_EARLYCON) += earlycon.o
obj-$(CONFIG_UEFI_CPER_ARM) += cper-arm.o
obj-$(CONFIG_UEFI_CPER_X86) += cper-x86.o
+obj-$(CONFIG_UNACCEPTED_MEMORY) += unaccepted_memory.o
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index abeff7dc0b58..3a6ee7bb06f1 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -50,6 +50,9 @@ struct efi __read_mostly efi = {
#ifdef CONFIG_EFI_COCO_SECRET
.coco_secret = EFI_INVALID_TABLE_ADDR,
#endif
+#ifdef CONFIG_UNACCEPTED_MEMORY
+ .unaccepted = EFI_INVALID_TABLE_ADDR,
+#endif
};
EXPORT_SYMBOL(efi);
@@ -361,24 +364,6 @@ static void __init efi_debugfs_init(void)
static inline void efi_debugfs_init(void) {}
#endif
-static void refresh_nv_rng_seed(struct work_struct *work)
-{
- u8 seed[EFI_RANDOM_SEED_SIZE];
-
- get_random_bytes(seed, sizeof(seed));
- efi.set_variable(L"RandomSeed", &LINUX_EFI_RANDOM_SEED_TABLE_GUID,
- EFI_VARIABLE_NON_VOLATILE | EFI_VARIABLE_BOOTSERVICE_ACCESS |
- EFI_VARIABLE_RUNTIME_ACCESS, sizeof(seed), seed);
- memzero_explicit(seed, sizeof(seed));
-}
-static int refresh_nv_rng_seed_notification(struct notifier_block *nb, unsigned long action, void *data)
-{
- static DECLARE_WORK(work, refresh_nv_rng_seed);
- schedule_work(&work);
- return NOTIFY_DONE;
-}
-static struct notifier_block refresh_nv_rng_seed_nb = { .notifier_call = refresh_nv_rng_seed_notification };
-
/*
* We register the efi subsystem with the firmware subsystem and the
* efivars subsystem with the efi subsystem, if the system was booted with
@@ -451,9 +436,6 @@ static int __init efisubsys_init(void)
platform_device_register_simple("efi_secret", 0, NULL, 0);
#endif
- if (efi_rt_services_supported(EFI_RT_SUPPORTED_SET_VARIABLE))
- execute_with_initialized_rng(&refresh_nv_rng_seed_nb);
-
return 0;
err_remove_group:
@@ -605,6 +587,9 @@ static const efi_config_table_type_t common_tables[] __initconst = {
#ifdef CONFIG_EFI_COCO_SECRET
{LINUX_EFI_COCO_SECRET_AREA_GUID, &efi.coco_secret, "CocoSecret" },
#endif
+#ifdef CONFIG_UNACCEPTED_MEMORY
+ {LINUX_EFI_UNACCEPTED_MEM_TABLE_GUID, &efi.unaccepted, "Unaccepted" },
+#endif
#ifdef CONFIG_EFI_GENERIC_STUB
{LINUX_EFI_SCREEN_INFO_TABLE_GUID, &screen_info_table },
#endif
@@ -759,6 +744,25 @@ int __init efi_config_parse_tables(const efi_config_table_t *config_tables,
}
}
+ if (IS_ENABLED(CONFIG_UNACCEPTED_MEMORY) &&
+ efi.unaccepted != EFI_INVALID_TABLE_ADDR) {
+ struct efi_unaccepted_memory *unaccepted;
+
+ unaccepted = early_memremap(efi.unaccepted, sizeof(*unaccepted));
+ if (unaccepted) {
+ unsigned long size;
+
+ if (unaccepted->version == 1) {
+ size = sizeof(*unaccepted) + unaccepted->size;
+ memblock_reserve(efi.unaccepted, size);
+ } else {
+ efi.unaccepted = EFI_INVALID_TABLE_ADDR;
+ }
+
+ early_memunmap(unaccepted, sizeof(*unaccepted));
+ }
+ }
+
return 0;
}
@@ -843,6 +847,7 @@ static __initdata char memory_type_name[][13] = {
"MMIO Port",
"PAL Code",
"Persistent",
+ "Unaccepted",
};
char * __init efi_md_typeattr_format(char *buf, size_t size,
diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index 3abb2b357482..16d64a34d1e1 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -96,6 +96,8 @@ CFLAGS_arm32-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
zboot-obj-$(CONFIG_RISCV) := lib-clz_ctz.o lib-ashldi3.o
lib-$(CONFIG_EFI_ZBOOT) += zboot.o $(zboot-obj-y)
+lib-$(CONFIG_UNACCEPTED_MEMORY) += unaccepted_memory.o bitmap.o find.o
+
extra-y := $(lib-y)
lib-y := $(patsubst %.o,%.stub.o,$(lib-y))
diff --git a/drivers/firmware/efi/libstub/bitmap.c b/drivers/firmware/efi/libstub/bitmap.c
new file mode 100644
index 000000000000..5c9bba0d549b
--- /dev/null
+++ b/drivers/firmware/efi/libstub/bitmap.c
@@ -0,0 +1,41 @@
+#include <linux/bitmap.h>
+
+void __bitmap_set(unsigned long *map, unsigned int start, int len)
+{
+ unsigned long *p = map + BIT_WORD(start);
+ const unsigned int size = start + len;
+ int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG);
+ unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start);
+
+ while (len - bits_to_set >= 0) {
+ *p |= mask_to_set;
+ len -= bits_to_set;
+ bits_to_set = BITS_PER_LONG;
+ mask_to_set = ~0UL;
+ p++;
+ }
+ if (len) {
+ mask_to_set &= BITMAP_LAST_WORD_MASK(size);
+ *p |= mask_to_set;
+ }
+}
+
+void __bitmap_clear(unsigned long *map, unsigned int start, int len)
+{
+ unsigned long *p = map + BIT_WORD(start);
+ const unsigned int size = start + len;
+ int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
+ unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
+
+ while (len - bits_to_clear >= 0) {
+ *p &= ~mask_to_clear;
+ len -= bits_to_clear;
+ bits_to_clear = BITS_PER_LONG;
+ mask_to_clear = ~0UL;
+ p++;
+ }
+ if (len) {
+ mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
+ *p &= ~mask_to_clear;
+ }
+}
diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h
index 54a2822cae77..6aa38a1bf126 100644
--- a/drivers/firmware/efi/libstub/efistub.h
+++ b/drivers/firmware/efi/libstub/efistub.h
@@ -1136,4 +1136,10 @@ void efi_remap_image(unsigned long image_base, unsigned alloc_size,
asmlinkage efi_status_t __efiapi
efi_zboot_entry(efi_handle_t handle, efi_system_table_t *systab);
+efi_status_t allocate_unaccepted_bitmap(__u32 nr_desc,
+ struct efi_boot_memmap *map);
+void process_unaccepted_memory(u64 start, u64 end);
+void accept_memory(phys_addr_t start, phys_addr_t end);
+void arch_accept_memory(phys_addr_t start, phys_addr_t end);
+
#endif
diff --git a/drivers/firmware/efi/libstub/find.c b/drivers/firmware/efi/libstub/find.c
new file mode 100644
index 000000000000..4e7740d28987
--- /dev/null
+++ b/drivers/firmware/efi/libstub/find.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/bitmap.h>
+#include <linux/math.h>
+#include <linux/minmax.h>
+
+/*
+ * Common helper for find_next_bit() function family
+ * @FETCH: The expression that fetches and pre-processes each word of bitmap(s)
+ * @MUNGE: The expression that post-processes a word containing found bit (may be empty)
+ * @size: The bitmap size in bits
+ * @start: The bitnumber to start searching at
+ */
+#define FIND_NEXT_BIT(FETCH, MUNGE, size, start) \
+({ \
+ unsigned long mask, idx, tmp, sz = (size), __start = (start); \
+ \
+ if (unlikely(__start >= sz)) \
+ goto out; \
+ \
+ mask = MUNGE(BITMAP_FIRST_WORD_MASK(__start)); \
+ idx = __start / BITS_PER_LONG; \
+ \
+ for (tmp = (FETCH) & mask; !tmp; tmp = (FETCH)) { \
+ if ((idx + 1) * BITS_PER_LONG >= sz) \
+ goto out; \
+ idx++; \
+ } \
+ \
+ sz = min(idx * BITS_PER_LONG + __ffs(MUNGE(tmp)), sz); \
+out: \
+ sz; \
+})
+
+unsigned long _find_next_bit(const unsigned long *addr, unsigned long nbits, unsigned long start)
+{
+ return FIND_NEXT_BIT(addr[idx], /* nop */, nbits, start);
+}
+
+unsigned long _find_next_zero_bit(const unsigned long *addr, unsigned long nbits,
+ unsigned long start)
+{
+ return FIND_NEXT_BIT(~addr[idx], /* nop */, nbits, start);
+}
diff --git a/drivers/firmware/efi/libstub/unaccepted_memory.c b/drivers/firmware/efi/libstub/unaccepted_memory.c
new file mode 100644
index 000000000000..ca61f4733ea5
--- /dev/null
+++ b/drivers/firmware/efi/libstub/unaccepted_memory.c
@@ -0,0 +1,222 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/efi.h>
+#include <asm/efi.h>
+#include "efistub.h"
+
+struct efi_unaccepted_memory *unaccepted_table;
+
+efi_status_t allocate_unaccepted_bitmap(__u32 nr_desc,
+ struct efi_boot_memmap *map)
+{
+ efi_guid_t unaccepted_table_guid = LINUX_EFI_UNACCEPTED_MEM_TABLE_GUID;
+ u64 unaccepted_start = ULLONG_MAX, unaccepted_end = 0, bitmap_size;
+ efi_status_t status;
+ int i;
+
+ /* Check if the table is already installed */
+ unaccepted_table = get_efi_config_table(unaccepted_table_guid);
+ if (unaccepted_table) {
+ if (unaccepted_table->version != 1) {
+ efi_err("Unknown version of unaccepted memory table\n");
+ return EFI_UNSUPPORTED;
+ }
+ return EFI_SUCCESS;
+ }
+
+ /* Check if there's any unaccepted memory and find the max address */
+ for (i = 0; i < nr_desc; i++) {
+ efi_memory_desc_t *d;
+ unsigned long m = (unsigned long)map->map;
+
+ d = efi_early_memdesc_ptr(m, map->desc_size, i);
+ if (d->type != EFI_UNACCEPTED_MEMORY)
+ continue;
+
+ unaccepted_start = min(unaccepted_start, d->phys_addr);
+ unaccepted_end = max(unaccepted_end,
+ d->phys_addr + d->num_pages * PAGE_SIZE);
+ }
+
+ if (unaccepted_start == ULLONG_MAX)
+ return EFI_SUCCESS;
+
+ unaccepted_start = round_down(unaccepted_start,
+ EFI_UNACCEPTED_UNIT_SIZE);
+ unaccepted_end = round_up(unaccepted_end, EFI_UNACCEPTED_UNIT_SIZE);
+
+ /*
+ * If unaccepted memory is present, allocate a bitmap to track what
+ * memory has to be accepted before access.
+ *
+ * One bit in the bitmap represents 2MiB in the address space:
+ * A 4k bitmap can track 64GiB of physical address space.
+ *
+ * In the worst case scenario -- a huge hole in the middle of the
+ * address space -- It needs 256MiB to handle 4PiB of the address
+ * space.
+ *
+ * The bitmap will be populated in setup_e820() according to the memory
+ * map after efi_exit_boot_services().
+ */
+ bitmap_size = DIV_ROUND_UP(unaccepted_end - unaccepted_start,
+ EFI_UNACCEPTED_UNIT_SIZE * BITS_PER_BYTE);
+
+ status = efi_bs_call(allocate_pool, EFI_LOADER_DATA,
+ sizeof(*unaccepted_table) + bitmap_size,
+ (void **)&unaccepted_table);
+ if (status != EFI_SUCCESS) {
+ efi_err("Failed to allocate unaccepted memory config table\n");
+ return status;
+ }
+
+ unaccepted_table->version = 1;
+ unaccepted_table->unit_size = EFI_UNACCEPTED_UNIT_SIZE;
+ unaccepted_table->phys_base = unaccepted_start;
+ unaccepted_table->size = bitmap_size;
+ memset(unaccepted_table->bitmap, 0, bitmap_size);
+
+ status = efi_bs_call(install_configuration_table,
+ &unaccepted_table_guid, unaccepted_table);
+ if (status != EFI_SUCCESS) {
+ efi_bs_call(free_pool, unaccepted_table);
+ efi_err("Failed to install unaccepted memory config table!\n");
+ }
+
+ return status;
+}
+
+/*
+ * The accepted memory bitmap only works at unit_size granularity. Take
+ * unaligned start/end addresses and either:
+ * 1. Accepts the memory immediately and in its entirety
+ * 2. Accepts unaligned parts, and marks *some* aligned part unaccepted
+ *
+ * The function will never reach the bitmap_set() with zero bits to set.
+ */
+void process_unaccepted_memory(u64 start, u64 end)
+{
+ u64 unit_size = unaccepted_table->unit_size;
+ u64 unit_mask = unaccepted_table->unit_size - 1;
+ u64 bitmap_size = unaccepted_table->size;
+
+ /*
+ * Ensure that at least one bit will be set in the bitmap by
+ * immediately accepting all regions under 2*unit_size. This is
+ * imprecise and may immediately accept some areas that could
+ * have been represented in the bitmap. But, results in simpler
+ * code below
+ *
+ * Consider case like this (assuming unit_size == 2MB):
+ *
+ * | 4k | 2044k | 2048k |
+ * ^ 0x0 ^ 2MB ^ 4MB
+ *
+ * Only the first 4k has been accepted. The 0MB->2MB region can not be
+ * represented in the bitmap. The 2MB->4MB region can be represented in
+ * the bitmap. But, the 0MB->4MB region is <2*unit_size and will be
+ * immediately accepted in its entirety.
+ */
+ if (end - start < 2 * unit_size) {
+ arch_accept_memory(start, end);
+ return;
+ }
+
+ /*
+ * No matter how the start and end are aligned, at least one unaccepted
+ * unit_size area will remain to be marked in the bitmap.
+ */
+
+ /* Immediately accept a <unit_size piece at the start: */
+ if (start & unit_mask) {
+ arch_accept_memory(start, round_up(start, unit_size));
+ start = round_up(start, unit_size);
+ }
+
+ /* Immediately accept a <unit_size piece at the end: */
+ if (end & unit_mask) {
+ arch_accept_memory(round_down(end, unit_size), end);
+ end = round_down(end, unit_size);
+ }
+
+ /*
+ * Accept part of the range that before phys_base and cannot be recorded
+ * into the bitmap.
+ */
+ if (start < unaccepted_table->phys_base) {
+ arch_accept_memory(start,
+ min(unaccepted_table->phys_base, end));
+ start = unaccepted_table->phys_base;
+ }
+
+ /* Nothing to record */
+ if (end < unaccepted_table->phys_base)
+ return;
+
+ /* Translate to offsets from the beginning of the bitmap */
+ start -= unaccepted_table->phys_base;
+ end -= unaccepted_table->phys_base;
+
+ /* Accept memory that doesn't fit into bitmap */
+ if (end > bitmap_size * unit_size * BITS_PER_BYTE) {
+ unsigned long phys_start, phys_end;
+
+ phys_start = bitmap_size * unit_size * BITS_PER_BYTE +
+ unaccepted_table->phys_base;
+ phys_end = end + unaccepted_table->phys_base;
+
+ arch_accept_memory(phys_start, phys_end);
+ end = bitmap_size * unit_size * BITS_PER_BYTE;
+ }
+
+ /*
+ * 'start' and 'end' are now both unit_size-aligned.
+ * Record the range as being unaccepted:
+ */
+ bitmap_set(unaccepted_table->bitmap,
+ start / unit_size, (end - start) / unit_size);
+}
+
+void accept_memory(phys_addr_t start, phys_addr_t end)
+{
+ unsigned long range_start, range_end;
+ unsigned long bitmap_size;
+ u64 unit_size;
+
+ if (!unaccepted_table)
+ return;
+
+ unit_size = unaccepted_table->unit_size;
+
+ /*
+ * Only care for the part of the range that is represented
+ * in the bitmap.
+ */
+ if (start < unaccepted_table->phys_base)
+ start = unaccepted_table->phys_base;
+ if (end < unaccepted_table->phys_base)
+ return;
+
+ /* Translate to offsets from the beginning of the bitmap */
+ start -= unaccepted_table->phys_base;
+ end -= unaccepted_table->phys_base;
+
+ /* Make sure not to overrun the bitmap */
+ if (end > unaccepted_table->size * unit_size * BITS_PER_BYTE)
+ end = unaccepted_table->size * unit_size * BITS_PER_BYTE;
+
+ range_start = start / unit_size;
+ bitmap_size = DIV_ROUND_UP(end, unit_size);
+
+ for_each_set_bitrange_from(range_start, range_end,
+ unaccepted_table->bitmap, bitmap_size) {
+ unsigned long phys_start, phys_end;
+
+ phys_start = range_start * unit_size + unaccepted_table->phys_base;
+ phys_end = range_end * unit_size + unaccepted_table->phys_base;
+
+ arch_accept_memory(phys_start, phys_end);
+ bitmap_clear(unaccepted_table->bitmap,
+ range_start, range_end - range_start);
+ }
+}
diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c
index a0bfd31358ba..220be75a5cdc 100644
--- a/drivers/firmware/efi/libstub/x86-stub.c
+++ b/drivers/firmware/efi/libstub/x86-stub.c
@@ -26,6 +26,17 @@ const efi_dxe_services_table_t *efi_dxe_table;
u32 image_offset __section(".data");
static efi_loaded_image_t *image = NULL;
+typedef union sev_memory_acceptance_protocol sev_memory_acceptance_protocol_t;
+union sev_memory_acceptance_protocol {
+ struct {
+ efi_status_t (__efiapi * allow_unaccepted_memory)(
+ sev_memory_acceptance_protocol_t *);
+ };
+ struct {
+ u32 allow_unaccepted_memory;
+ } mixed_mode;
+};
+
static efi_status_t
preserve_pci_rom_image(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom)
{
@@ -310,6 +321,29 @@ setup_memory_protection(unsigned long image_base, unsigned long image_size)
#endif
}
+static void setup_unaccepted_memory(void)
+{
+ efi_guid_t mem_acceptance_proto = OVMF_SEV_MEMORY_ACCEPTANCE_PROTOCOL_GUID;
+ sev_memory_acceptance_protocol_t *proto;
+ efi_status_t status;
+
+ if (!IS_ENABLED(CONFIG_UNACCEPTED_MEMORY))
+ return;
+
+ /*
+ * Enable unaccepted memory before calling exit boot services in order
+ * for the UEFI to not accept all memory on EBS.
+ */
+ status = efi_bs_call(locate_protocol, &mem_acceptance_proto, NULL,
+ (void **)&proto);
+ if (status != EFI_SUCCESS)
+ return;
+
+ status = efi_call_proto(proto, allow_unaccepted_memory);
+ if (status != EFI_SUCCESS)
+ efi_err("Memory acceptance protocol failed\n");
+}
+
static const efi_char16_t apple[] = L"Apple";
static void setup_quirks(struct boot_params *boot_params,
@@ -613,6 +647,16 @@ setup_e820(struct boot_params *params, struct setup_data *e820ext, u32 e820ext_s
e820_type = E820_TYPE_PMEM;
break;
+ case EFI_UNACCEPTED_MEMORY:
+ if (!IS_ENABLED(CONFIG_UNACCEPTED_MEMORY)) {
+ efi_warn_once(
+"The system has unaccepted memory, but kernel does not support it\nConsider enabling CONFIG_UNACCEPTED_MEMORY\n");
+ continue;
+ }
+ e820_type = E820_TYPE_RAM;
+ process_unaccepted_memory(d->phys_addr,
+ d->phys_addr + PAGE_SIZE * d->num_pages);
+ break;
default:
continue;
}
@@ -681,28 +725,27 @@ static efi_status_t allocate_e820(struct boot_params *params,
struct setup_data **e820ext,
u32 *e820ext_size)
{
- unsigned long map_size, desc_size, map_key;
+ struct efi_boot_memmap *map;
efi_status_t status;
- __u32 nr_desc, desc_version;
-
- /* Only need the size of the mem map and size of each mem descriptor */
- map_size = 0;
- status = efi_bs_call(get_memory_map, &map_size, NULL, &map_key,
- &desc_size, &desc_version);
- if (status != EFI_BUFFER_TOO_SMALL)
- return (status != EFI_SUCCESS) ? status : EFI_UNSUPPORTED;
+ __u32 nr_desc;
- nr_desc = map_size / desc_size + EFI_MMAP_NR_SLACK_SLOTS;
+ status = efi_get_memory_map(&map, false);
+ if (status != EFI_SUCCESS)
+ return status;
- if (nr_desc > ARRAY_SIZE(params->e820_table)) {
- u32 nr_e820ext = nr_desc - ARRAY_SIZE(params->e820_table);
+ nr_desc = map->map_size / map->desc_size;
+ if (nr_desc > ARRAY_SIZE(params->e820_table) - EFI_MMAP_NR_SLACK_SLOTS) {
+ u32 nr_e820ext = nr_desc - ARRAY_SIZE(params->e820_table) +
+ EFI_MMAP_NR_SLACK_SLOTS;
status = alloc_e820ext(nr_e820ext, e820ext, e820ext_size);
- if (status != EFI_SUCCESS)
- return status;
}
- return EFI_SUCCESS;
+ if (IS_ENABLED(CONFIG_UNACCEPTED_MEMORY) && status == EFI_SUCCESS)
+ status = allocate_unaccepted_bitmap(nr_desc, map);
+
+ efi_bs_call(free_pool, map);
+ return status;
}
struct exit_boot_struct {
@@ -899,6 +942,8 @@ asmlinkage unsigned long efi_main(efi_handle_t handle,
setup_quirks(boot_params, bzimage_addr, buffer_end - buffer_start);
+ setup_unaccepted_memory();
+
status = exit_boot(boot_params, handle);
if (status != EFI_SUCCESS) {
efi_err("exit_boot() failed!\n");
diff --git a/drivers/firmware/efi/unaccepted_memory.c b/drivers/firmware/efi/unaccepted_memory.c
new file mode 100644
index 000000000000..853f7dc3c21d
--- /dev/null
+++ b/drivers/firmware/efi/unaccepted_memory.c
@@ -0,0 +1,147 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/efi.h>
+#include <linux/memblock.h>
+#include <linux/spinlock.h>
+#include <asm/unaccepted_memory.h>
+
+/* Protects unaccepted memory bitmap */
+static DEFINE_SPINLOCK(unaccepted_memory_lock);
+
+/*
+ * accept_memory() -- Consult bitmap and accept the memory if needed.
+ *
+ * Only memory that is explicitly marked as unaccepted in the bitmap requires
+ * an action. All the remaining memory is implicitly accepted and doesn't need
+ * acceptance.
+ *
+ * No need to accept:
+ * - anything if the system has no unaccepted table;
+ * - memory that is below phys_base;
+ * - memory that is above the memory that addressable by the bitmap;
+ */
+void accept_memory(phys_addr_t start, phys_addr_t end)
+{
+ struct efi_unaccepted_memory *unaccepted;
+ unsigned long range_start, range_end;
+ unsigned long flags;
+ u64 unit_size;
+
+ unaccepted = efi_get_unaccepted_table();
+ if (!unaccepted)
+ return;
+
+ unit_size = unaccepted->unit_size;
+
+ /*
+ * Only care for the part of the range that is represented
+ * in the bitmap.
+ */
+ if (start < unaccepted->phys_base)
+ start = unaccepted->phys_base;
+ if (end < unaccepted->phys_base)
+ return;
+
+ /* Translate to offsets from the beginning of the bitmap */
+ start -= unaccepted->phys_base;
+ end -= unaccepted->phys_base;
+
+ /*
+ * load_unaligned_zeropad() can lead to unwanted loads across page
+ * boundaries. The unwanted loads are typically harmless. But, they
+ * might be made to totally unrelated or even unmapped memory.
+ * load_unaligned_zeropad() relies on exception fixup (#PF, #GP and now
+ * #VE) to recover from these unwanted loads.
+ *
+ * But, this approach does not work for unaccepted memory. For TDX, a
+ * load from unaccepted memory will not lead to a recoverable exception
+ * within the guest. The guest will exit to the VMM where the only
+ * recourse is to terminate the guest.
+ *
+ * There are two parts to fix this issue and comprehensively avoid
+ * access to unaccepted memory. Together these ensure that an extra
+ * "guard" page is accepted in addition to the memory that needs to be
+ * used:
+ *
+ * 1. Implicitly extend the range_contains_unaccepted_memory(start, end)
+ * checks up to end+unit_size if 'end' is aligned on a unit_size
+ * boundary.
+ *
+ * 2. Implicitly extend accept_memory(start, end) to end+unit_size if
+ * 'end' is aligned on a unit_size boundary. (immediately following
+ * this comment)
+ */
+ if (!(end % unit_size))
+ end += unit_size;
+
+ /* Make sure not to overrun the bitmap */
+ if (end > unaccepted->size * unit_size * BITS_PER_BYTE)
+ end = unaccepted->size * unit_size * BITS_PER_BYTE;
+
+ range_start = start / unit_size;
+
+ spin_lock_irqsave(&unaccepted_memory_lock, flags);
+ for_each_set_bitrange_from(range_start, range_end, unaccepted->bitmap,
+ DIV_ROUND_UP(end, unit_size)) {
+ unsigned long phys_start, phys_end;
+ unsigned long len = range_end - range_start;
+
+ phys_start = range_start * unit_size + unaccepted->phys_base;
+ phys_end = range_end * unit_size + unaccepted->phys_base;
+
+ arch_accept_memory(phys_start, phys_end);
+ bitmap_clear(unaccepted->bitmap, range_start, len);
+ }
+ spin_unlock_irqrestore(&unaccepted_memory_lock, flags);
+}
+
+bool range_contains_unaccepted_memory(phys_addr_t start, phys_addr_t end)
+{
+ struct efi_unaccepted_memory *unaccepted;
+ unsigned long flags;
+ bool ret = false;
+ u64 unit_size;
+
+ unaccepted = efi_get_unaccepted_table();
+ if (!unaccepted)
+ return false;
+
+ unit_size = unaccepted->unit_size;
+
+ /*
+ * Only care for the part of the range that is represented
+ * in the bitmap.
+ */
+ if (start < unaccepted->phys_base)
+ start = unaccepted->phys_base;
+ if (end < unaccepted->phys_base)
+ return false;
+
+ /* Translate to offsets from the beginning of the bitmap */
+ start -= unaccepted->phys_base;
+ end -= unaccepted->phys_base;
+
+ /*
+ * Also consider the unaccepted state of the *next* page. See fix #1 in
+ * the comment on load_unaligned_zeropad() in accept_memory().
+ */
+ if (!(end % unit_size))
+ end += unit_size;
+
+ /* Make sure not to overrun the bitmap */
+ if (end > unaccepted->size * unit_size * BITS_PER_BYTE)
+ end = unaccepted->size * unit_size * BITS_PER_BYTE;
+
+ spin_lock_irqsave(&unaccepted_memory_lock, flags);
+ while (start < end) {
+ if (test_bit(start / unit_size, unaccepted->bitmap)) {
+ ret = true;
+ break;
+ }
+
+ start += unit_size;
+ }
+ spin_unlock_irqrestore(&unaccepted_memory_lock, flags);
+
+ return ret;
+}
diff --git a/drivers/firmware/iscsi_ibft_find.c b/drivers/firmware/iscsi_ibft_find.c
index 94b49ccd23ac..71f51303c2ba 100644
--- a/drivers/firmware/iscsi_ibft_find.c
+++ b/drivers/firmware/iscsi_ibft_find.c
@@ -42,8 +42,6 @@ static const struct {
};
#define IBFT_SIGN_LEN 4
-#define IBFT_START 0x80000 /* 512kB */
-#define IBFT_END 0x100000 /* 1MB */
#define VGA_MEM 0xA0000 /* VGA buffer */
#define VGA_SIZE 0x20000 /* 128kB */
@@ -52,9 +50,9 @@ static const struct {
*/
void __init reserve_ibft_region(void)
{
- unsigned long pos;
+ unsigned long pos, virt_pos = 0;
unsigned int len = 0;
- void *virt;
+ void *virt = NULL;
int i;
ibft_phys_addr = 0;
@@ -70,13 +68,20 @@ void __init reserve_ibft_region(void)
* so skip that area */
if (pos == VGA_MEM)
pos += VGA_SIZE;
- virt = isa_bus_to_virt(pos);
+
+ /* Map page by page */
+ if (offset_in_page(pos) == 0) {
+ if (virt)
+ early_memunmap(virt, PAGE_SIZE);
+ virt = early_memremap_ro(pos, PAGE_SIZE);
+ virt_pos = pos;
+ }
for (i = 0; i < ARRAY_SIZE(ibft_signs); i++) {
- if (memcmp(virt, ibft_signs[i].sign, IBFT_SIGN_LEN) ==
- 0) {
+ if (memcmp(virt + (pos - virt_pos), ibft_signs[i].sign,
+ IBFT_SIGN_LEN) == 0) {
unsigned long *addr =
- (unsigned long *)isa_bus_to_virt(pos + 4);
+ (unsigned long *)(virt + pos - virt_pos + 4);
len = *addr;
/* if the length of the table extends past 1M,
* the table cannot be valid. */
@@ -84,9 +89,12 @@ void __init reserve_ibft_region(void)
ibft_phys_addr = pos;
memblock_reserve(ibft_phys_addr, PAGE_ALIGN(len));
pr_info("iBFT found at %pa.\n", &ibft_phys_addr);
- return;
+ goto out;
}
}
}
}
+
+out:
+ early_memunmap(virt, PAGE_SIZE);
}
diff --git a/drivers/gpio/gpio-104-dio-48e.c b/drivers/gpio/gpio-104-dio-48e.c
index f2253fd5ab4b..8ff5f4ff5958 100644
--- a/drivers/gpio/gpio-104-dio-48e.c
+++ b/drivers/gpio/gpio-104-dio-48e.c
@@ -100,13 +100,23 @@ static const struct regmap_irq dio48e_regmap_irqs[] = {
DIO48E_REGMAP_IRQ(0), DIO48E_REGMAP_IRQ(1),
};
-static int dio48e_handle_mask_sync(struct regmap *const map, const int index,
+/**
+ * struct dio48e_gpio - GPIO device private data structure
+ * @map: Regmap for the device
+ * @irq_mask: Current IRQ mask state on the device
+ */
+struct dio48e_gpio {
+ struct regmap *map;
+ unsigned int irq_mask;
+};
+
+static int dio48e_handle_mask_sync(const int index,
const unsigned int mask_buf_def,
const unsigned int mask_buf,
void *const irq_drv_data)
{
- unsigned int *const irq_mask = irq_drv_data;
- const unsigned int prev_mask = *irq_mask;
+ struct dio48e_gpio *const dio48egpio = irq_drv_data;
+ const unsigned int prev_mask = dio48egpio->irq_mask;
int err;
unsigned int val;
@@ -115,19 +125,19 @@ static int dio48e_handle_mask_sync(struct regmap *const map, const int index,
return 0;
/* remember the current mask for the next mask sync */
- *irq_mask = mask_buf;
+ dio48egpio->irq_mask = mask_buf;
/* if all previously masked, enable interrupts when unmasking */
if (prev_mask == mask_buf_def) {
- err = regmap_write(map, DIO48E_CLEAR_INTERRUPT, 0x00);
+ err = regmap_write(dio48egpio->map, DIO48E_CLEAR_INTERRUPT, 0x00);
if (err)
return err;
- return regmap_write(map, DIO48E_ENABLE_INTERRUPT, 0x00);
+ return regmap_write(dio48egpio->map, DIO48E_ENABLE_INTERRUPT, 0x00);
}
/* if all are currently masked, disable interrupts */
if (mask_buf == mask_buf_def)
- return regmap_read(map, DIO48E_DISABLE_INTERRUPT, &val);
+ return regmap_read(dio48egpio->map, DIO48E_DISABLE_INTERRUPT, &val);
return 0;
}
@@ -168,7 +178,7 @@ static int dio48e_probe(struct device *dev, unsigned int id)
struct regmap *map;
int err;
struct regmap_irq_chip *chip;
- unsigned int irq_mask;
+ struct dio48e_gpio *dio48egpio;
struct regmap_irq_chip_data *chip_data;
if (!devm_request_region(dev, base[id], DIO48E_EXTENT, name)) {
@@ -186,12 +196,14 @@ static int dio48e_probe(struct device *dev, unsigned int id)
return dev_err_probe(dev, PTR_ERR(map),
"Unable to initialize register map\n");
- chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL);
- if (!chip)
+ dio48egpio = devm_kzalloc(dev, sizeof(*dio48egpio), GFP_KERNEL);
+ if (!dio48egpio)
return -ENOMEM;
- chip->irq_drv_data = devm_kzalloc(dev, sizeof(irq_mask), GFP_KERNEL);
- if (!chip->irq_drv_data)
+ dio48egpio->map = map;
+
+ chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL);
+ if (!chip)
return -ENOMEM;
chip->name = name;
@@ -202,6 +214,7 @@ static int dio48e_probe(struct device *dev, unsigned int id)
chip->irqs = dio48e_regmap_irqs;
chip->num_irqs = ARRAY_SIZE(dio48e_regmap_irqs);
chip->handle_mask_sync = dio48e_handle_mask_sync;
+ chip->irq_drv_data = dio48egpio;
/* Initialize to prevent spurious interrupts before we're ready */
err = dio48e_irq_init_hw(map);
diff --git a/drivers/gpio/gpio-sifive.c b/drivers/gpio/gpio-sifive.c
index 98939cd4a71e..745e5f67254e 100644
--- a/drivers/gpio/gpio-sifive.c
+++ b/drivers/gpio/gpio-sifive.c
@@ -221,8 +221,12 @@ static int sifive_gpio_probe(struct platform_device *pdev)
return -ENODEV;
}
- for (i = 0; i < ngpio; i++)
- chip->irq_number[i] = platform_get_irq(pdev, i);
+ for (i = 0; i < ngpio; i++) {
+ ret = platform_get_irq(pdev, i);
+ if (ret < 0)
+ return ret;
+ chip->irq_number[i] = ret;
+ }
ret = bgpio_init(&chip->gc, dev, 4,
chip->base + SIFIVE_GPIO_INPUT_VAL,
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index a7220e04a93e..5be8ad61523e 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1745,7 +1745,7 @@ static void gpiochip_irqchip_remove(struct gpio_chip *gc)
}
/* Remove all IRQ mappings and delete the domain */
- if (gc->irq.domain) {
+ if (!gc->irq.domain_is_allocated_externally && gc->irq.domain) {
unsigned int irq;
for (offset = 0; offset < gc->ngpio; offset++) {
@@ -1791,6 +1791,15 @@ int gpiochip_irqchip_add_domain(struct gpio_chip *gc,
gc->to_irq = gpiochip_to_irq;
gc->irq.domain = domain;
+ gc->irq.domain_is_allocated_externally = true;
+
+ /*
+ * Using barrier() here to prevent compiler from reordering
+ * gc->irq.initialized before adding irqdomain.
+ */
+ barrier();
+
+ gc->irq.initialized = true;
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c
index 1c5d9388ad0b..5f610e9a5f0f 100644
--- a/drivers/gpu/drm/amd/amdgpu/atom.c
+++ b/drivers/gpu/drm/amd/amdgpu/atom.c
@@ -1509,7 +1509,7 @@ struct atom_context *amdgpu_atom_parse(struct card_info *card, void *bios)
str = CSTR(idx);
if (*str != '\0') {
pr_info("ATOM BIOS: %s\n", str);
- strlcpy(ctx->vbios_version, str, sizeof(ctx->vbios_version));
+ strscpy(ctx->vbios_version, str, sizeof(ctx->vbios_version));
}
atom_rom_header = (struct _ATOM_ROM_HEADER *)CSTR(base);
diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
index d3fe149d8476..81fb4e5dd804 100644
--- a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
+++ b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
@@ -794,7 +794,7 @@ void amdgpu_add_thermal_controller(struct amdgpu_device *adev)
struct i2c_board_info info = { };
const char *name = pp_lib_thermal_controller_names[controller->ucType];
info.addr = controller->ucI2cAddress >> 1;
- strlcpy(info.type, name, sizeof(info.type));
+ strscpy(info.type, name, sizeof(info.type));
i2c_new_client_device(&adev->pm.i2c_bus->adapter, &info);
}
} else {
diff --git a/drivers/gpu/drm/display/drm_dp_helper.c b/drivers/gpu/drm/display/drm_dp_helper.c
index 16565a0a5da6..e6a78fd32380 100644
--- a/drivers/gpu/drm/display/drm_dp_helper.c
+++ b/drivers/gpu/drm/display/drm_dp_helper.c
@@ -2103,7 +2103,7 @@ int drm_dp_aux_register(struct drm_dp_aux *aux)
aux->ddc.owner = THIS_MODULE;
aux->ddc.dev.parent = aux->dev;
- strlcpy(aux->ddc.name, aux->name ? aux->name : dev_name(aux->dev),
+ strscpy(aux->ddc.name, aux->name ? aux->name : dev_name(aux->dev),
sizeof(aux->ddc.name));
ret = drm_dp_aux_register_devnode(aux);
diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c
index 38dab76ae69e..943a00db77d4 100644
--- a/drivers/gpu/drm/display/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c
@@ -3404,7 +3404,7 @@ int drm_dp_add_payload_part2(struct drm_dp_mst_topology_mgr *mgr,
/* Skip failed payloads */
if (payload->vc_start_slot == -1) {
- drm_dbg_kms(state->dev, "Part 1 of payload creation for %s failed, skipping part 2\n",
+ drm_dbg_kms(mgr->dev, "Part 1 of payload creation for %s failed, skipping part 2\n",
payload->port->connector->name);
return -EIO;
}
@@ -5702,7 +5702,7 @@ static int drm_dp_mst_register_i2c_bus(struct drm_dp_mst_port *port)
aux->ddc.dev.parent = parent_dev;
aux->ddc.dev.of_node = parent_dev->of_node;
- strlcpy(aux->ddc.name, aux->name ? aux->name : dev_name(parent_dev),
+ strscpy(aux->ddc.name, aux->name ? aux->name : dev_name(parent_dev),
sizeof(aux->ddc.name));
return i2c_add_adapter(&aux->ddc);
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 1a5a2cd0d4ec..78dcae201cc6 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -496,13 +496,13 @@ int drm_gem_create_mmap_offset(struct drm_gem_object *obj)
EXPORT_SYMBOL(drm_gem_create_mmap_offset);
/*
- * Move pages to appropriate lru and release the pagevec, decrementing the
- * ref count of those pages.
+ * Move folios to appropriate lru and release the folios, decrementing the
+ * ref count of those folios.
*/
-static void drm_gem_check_release_pagevec(struct pagevec *pvec)
+static void drm_gem_check_release_batch(struct folio_batch *fbatch)
{
- check_move_unevictable_pages(pvec);
- __pagevec_release(pvec);
+ check_move_unevictable_folios(fbatch);
+ __folio_batch_release(fbatch);
cond_resched();
}
@@ -534,10 +534,10 @@ static void drm_gem_check_release_pagevec(struct pagevec *pvec)
struct page **drm_gem_get_pages(struct drm_gem_object *obj)
{
struct address_space *mapping;
- struct page *p, **pages;
- struct pagevec pvec;
- int i, npages;
-
+ struct page **pages;
+ struct folio *folio;
+ struct folio_batch fbatch;
+ int i, j, npages;
if (WARN_ON(!obj->filp))
return ERR_PTR(-EINVAL);
@@ -559,11 +559,14 @@ struct page **drm_gem_get_pages(struct drm_gem_object *obj)
mapping_set_unevictable(mapping);
- for (i = 0; i < npages; i++) {
- p = shmem_read_mapping_page(mapping, i);
- if (IS_ERR(p))
+ i = 0;
+ while (i < npages) {
+ folio = shmem_read_folio_gfp(mapping, i,
+ mapping_gfp_mask(mapping));
+ if (IS_ERR(folio))
goto fail;
- pages[i] = p;
+ for (j = 0; j < folio_nr_pages(folio); j++, i++)
+ pages[i] = folio_file_page(folio, i);
/* Make sure shmem keeps __GFP_DMA32 allocated pages in the
* correct region during swapin. Note that this requires
@@ -571,23 +574,26 @@ struct page **drm_gem_get_pages(struct drm_gem_object *obj)
* so shmem can relocate pages during swapin if required.
*/
BUG_ON(mapping_gfp_constraint(mapping, __GFP_DMA32) &&
- (page_to_pfn(p) >= 0x00100000UL));
+ (folio_pfn(folio) >= 0x00100000UL));
}
return pages;
fail:
mapping_clear_unevictable(mapping);
- pagevec_init(&pvec);
- while (i--) {
- if (!pagevec_add(&pvec, pages[i]))
- drm_gem_check_release_pagevec(&pvec);
+ folio_batch_init(&fbatch);
+ j = 0;
+ while (j < i) {
+ struct folio *f = page_folio(pages[j]);
+ if (!folio_batch_add(&fbatch, f))
+ drm_gem_check_release_batch(&fbatch);
+ j += folio_nr_pages(f);
}
- if (pagevec_count(&pvec))
- drm_gem_check_release_pagevec(&pvec);
+ if (fbatch.nr)
+ drm_gem_check_release_batch(&fbatch);
kvfree(pages);
- return ERR_CAST(p);
+ return ERR_CAST(folio);
}
EXPORT_SYMBOL(drm_gem_get_pages);
@@ -603,7 +609,7 @@ void drm_gem_put_pages(struct drm_gem_object *obj, struct page **pages,
{
int i, npages;
struct address_space *mapping;
- struct pagevec pvec;
+ struct folio_batch fbatch;
mapping = file_inode(obj->filp)->i_mapping;
mapping_clear_unevictable(mapping);
@@ -616,23 +622,27 @@ void drm_gem_put_pages(struct drm_gem_object *obj, struct page **pages,
npages = obj->size >> PAGE_SHIFT;
- pagevec_init(&pvec);
+ folio_batch_init(&fbatch);
for (i = 0; i < npages; i++) {
+ struct folio *folio;
+
if (!pages[i])
continue;
+ folio = page_folio(pages[i]);
if (dirty)
- set_page_dirty(pages[i]);
+ folio_mark_dirty(folio);
if (accessed)
- mark_page_accessed(pages[i]);
+ folio_mark_accessed(folio);
/* Undo the reference we took when populating the table */
- if (!pagevec_add(&pvec, pages[i]))
- drm_gem_check_release_pagevec(&pvec);
+ if (!folio_batch_add(&fbatch, folio))
+ drm_gem_check_release_batch(&fbatch);
+ i += folio_nr_pages(folio) - 1;
}
- if (pagevec_count(&pvec))
- drm_gem_check_release_pagevec(&pvec);
+ if (folio_batch_count(&fbatch))
+ drm_gem_check_release_batch(&fbatch);
kvfree(pages);
}
diff --git a/drivers/gpu/drm/drm_managed.c b/drivers/gpu/drm/drm_managed.c
index c21c3f623033..5423ad883729 100644
--- a/drivers/gpu/drm/drm_managed.c
+++ b/drivers/gpu/drm/drm_managed.c
@@ -49,10 +49,10 @@ struct drmres {
* Some archs want to perform DMA into kmalloc caches
* and need a guaranteed alignment larger than
* the alignment of a 64-bit integer.
- * Thus we use ARCH_KMALLOC_MINALIGN here and get exactly the same
- * buffer alignment as if it was allocated by plain kmalloc().
+ * Thus we use ARCH_DMA_MINALIGN for data[] which will force the same
+ * alignment for struct drmres when allocated by kmalloc().
*/
- u8 __aligned(ARCH_KMALLOC_MINALIGN) data[];
+ u8 __aligned(ARCH_DMA_MINALIGN) data[];
};
static void free_dr(struct drmres *dr)
diff --git a/drivers/gpu/drm/drm_mipi_dsi.c b/drivers/gpu/drm/drm_mipi_dsi.c
index 3fd6c733ff4e..6252ac01e945 100644
--- a/drivers/gpu/drm/drm_mipi_dsi.c
+++ b/drivers/gpu/drm/drm_mipi_dsi.c
@@ -223,7 +223,7 @@ mipi_dsi_device_register_full(struct mipi_dsi_host *host,
device_set_node(&dsi->dev, of_fwnode_handle(info->node));
dsi->channel = info->channel;
- strlcpy(dsi->name, info->type, sizeof(dsi->name));
+ strscpy(dsi->name, info->type, sizeof(dsi->name));
ret = mipi_dsi_device_add(dsi);
if (ret) {
diff --git a/drivers/gpu/drm/i2c/tda998x_drv.c b/drivers/gpu/drm/i2c/tda998x_drv.c
index db5c9343a3d2..0918d80672bb 100644
--- a/drivers/gpu/drm/i2c/tda998x_drv.c
+++ b/drivers/gpu/drm/i2c/tda998x_drv.c
@@ -1951,7 +1951,7 @@ static int tda998x_create(struct device *dev)
* offset.
*/
memset(&cec_info, 0, sizeof(cec_info));
- strlcpy(cec_info.type, "tda9950", sizeof(cec_info.type));
+ strscpy(cec_info.type, "tda9950", sizeof(cec_info.type));
cec_info.addr = priv->cec_addr;
cec_info.platform_data = &priv->cec_glue;
cec_info.irq = client->irq;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 37d1efcd3ca6..adf1154c0e10 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -19,13 +19,13 @@
#include "i915_trace.h"
/*
- * Move pages to appropriate lru and release the pagevec, decrementing the
- * ref count of those pages.
+ * Move folios to appropriate lru and release the batch, decrementing the
+ * ref count of those folios.
*/
-static void check_release_pagevec(struct pagevec *pvec)
+static void check_release_folio_batch(struct folio_batch *fbatch)
{
- check_move_unevictable_pages(pvec);
- __pagevec_release(pvec);
+ check_move_unevictable_folios(fbatch);
+ __folio_batch_release(fbatch);
cond_resched();
}
@@ -33,24 +33,29 @@ void shmem_sg_free_table(struct sg_table *st, struct address_space *mapping,
bool dirty, bool backup)
{
struct sgt_iter sgt_iter;
- struct pagevec pvec;
+ struct folio_batch fbatch;
+ struct folio *last = NULL;
struct page *page;
mapping_clear_unevictable(mapping);
- pagevec_init(&pvec);
+ folio_batch_init(&fbatch);
for_each_sgt_page(page, sgt_iter, st) {
- if (dirty)
- set_page_dirty(page);
+ struct folio *folio = page_folio(page);
+ if (folio == last)
+ continue;
+ last = folio;
+ if (dirty)
+ folio_mark_dirty(folio);
if (backup)
- mark_page_accessed(page);
+ folio_mark_accessed(folio);
- if (!pagevec_add(&pvec, page))
- check_release_pagevec(&pvec);
+ if (!folio_batch_add(&fbatch, folio))
+ check_release_folio_batch(&fbatch);
}
- if (pagevec_count(&pvec))
- check_release_pagevec(&pvec);
+ if (fbatch.nr)
+ check_release_folio_batch(&fbatch);
sg_free_table(st);
}
@@ -63,8 +68,7 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
unsigned int page_count; /* restricted by sg_alloc_table */
unsigned long i;
struct scatterlist *sg;
- struct page *page;
- unsigned long last_pfn = 0; /* suppress gcc warning */
+ unsigned long next_pfn = 0; /* suppress gcc warning */
gfp_t noreclaim;
int ret;
@@ -95,6 +99,7 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
sg = st->sgl;
st->nents = 0;
for (i = 0; i < page_count; i++) {
+ struct folio *folio;
const unsigned int shrink[] = {
I915_SHRINK_BOUND | I915_SHRINK_UNBOUND,
0,
@@ -103,12 +108,12 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
do {
cond_resched();
- page = shmem_read_mapping_page_gfp(mapping, i, gfp);
- if (!IS_ERR(page))
+ folio = shmem_read_folio_gfp(mapping, i, gfp);
+ if (!IS_ERR(folio))
break;
if (!*s) {
- ret = PTR_ERR(page);
+ ret = PTR_ERR(folio);
goto err_sg;
}
@@ -147,19 +152,21 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
if (!i ||
sg->length >= max_segment ||
- page_to_pfn(page) != last_pfn + 1) {
+ folio_pfn(folio) != next_pfn) {
if (i)
sg = sg_next(sg);
st->nents++;
- sg_set_page(sg, page, PAGE_SIZE, 0);
+ sg_set_folio(sg, folio, folio_size(folio), 0);
} else {
- sg->length += PAGE_SIZE;
+ /* XXX: could overflow? */
+ sg->length += folio_size(folio);
}
- last_pfn = page_to_pfn(page);
+ next_pfn = folio_pfn(folio) + folio_nr_pages(folio);
+ i += folio_nr_pages(folio) - 1;
/* Check that the i965g/gm workaround works. */
- GEM_BUG_ON(gfp & __GFP_DMA32 && last_pfn >= 0x00100000UL);
+ GEM_BUG_ON(gfp & __GFP_DMA32 && next_pfn >= 0x00100000UL);
}
if (sg) /* loop terminated early; short sg table */
sg_mark_end(sg);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 56279908ed30..01e271b6ad21 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -1681,7 +1681,9 @@ static int igt_mmap_gpu(void *arg)
static int check_present_pte(pte_t *pte, unsigned long addr, void *data)
{
- if (!pte_present(*pte) || pte_none(*pte)) {
+ pte_t ptent = ptep_get(pte);
+
+ if (!pte_present(ptent) || pte_none(ptent)) {
pr_err("missing PTE:%lx\n",
(addr - (unsigned long)data) >> PAGE_SHIFT);
return -EINVAL;
@@ -1692,7 +1694,9 @@ static int check_present_pte(pte_t *pte, unsigned long addr, void *data)
static int check_absent_pte(pte_t *pte, unsigned long addr, void *data)
{
- if (pte_present(*pte) && !pte_none(*pte)) {
+ pte_t ptent = ptep_get(pte);
+
+ if (pte_present(ptent) && !pte_none(ptent)) {
pr_err("present PTE:%lx; expected to be revoked\n",
(addr - (unsigned long)data) >> PAGE_SHIFT);
return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index f020c0086fbc..35f70bb8e4fb 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -187,64 +187,64 @@ i915_error_printer(struct drm_i915_error_state_buf *e)
}
/* single threaded page allocator with a reserved stash for emergencies */
-static void pool_fini(struct pagevec *pv)
+static void pool_fini(struct folio_batch *fbatch)
{
- pagevec_release(pv);
+ folio_batch_release(fbatch);
}
-static int pool_refill(struct pagevec *pv, gfp_t gfp)
+static int pool_refill(struct folio_batch *fbatch, gfp_t gfp)
{
- while (pagevec_space(pv)) {
- struct page *p;
+ while (folio_batch_space(fbatch)) {
+ struct folio *folio;
- p = alloc_page(gfp);
- if (!p)
+ folio = folio_alloc(gfp, 0);
+ if (!folio)
return -ENOMEM;
- pagevec_add(pv, p);
+ folio_batch_add(fbatch, folio);
}
return 0;
}
-static int pool_init(struct pagevec *pv, gfp_t gfp)
+static int pool_init(struct folio_batch *fbatch, gfp_t gfp)
{
int err;
- pagevec_init(pv);
+ folio_batch_init(fbatch);
- err = pool_refill(pv, gfp);
+ err = pool_refill(fbatch, gfp);
if (err)
- pool_fini(pv);
+ pool_fini(fbatch);
return err;
}
-static void *pool_alloc(struct pagevec *pv, gfp_t gfp)
+static void *pool_alloc(struct folio_batch *fbatch, gfp_t gfp)
{
- struct page *p;
+ struct folio *folio;
- p = alloc_page(gfp);
- if (!p && pagevec_count(pv))
- p = pv->pages[--pv->nr];
+ folio = folio_alloc(gfp, 0);
+ if (!folio && folio_batch_count(fbatch))
+ folio = fbatch->folios[--fbatch->nr];
- return p ? page_address(p) : NULL;
+ return folio ? folio_address(folio) : NULL;
}
-static void pool_free(struct pagevec *pv, void *addr)
+static void pool_free(struct folio_batch *fbatch, void *addr)
{
- struct page *p = virt_to_page(addr);
+ struct folio *folio = virt_to_folio(addr);
- if (pagevec_space(pv))
- pagevec_add(pv, p);
+ if (folio_batch_space(fbatch))
+ folio_batch_add(fbatch, folio);
else
- __free_page(p);
+ folio_put(folio);
}
#ifdef CONFIG_DRM_I915_COMPRESS_ERROR
struct i915_vma_compress {
- struct pagevec pool;
+ struct folio_batch pool;
struct z_stream_s zstream;
void *tmp;
};
@@ -381,7 +381,7 @@ static void err_compression_marker(struct drm_i915_error_state_buf *m)
#else
struct i915_vma_compress {
- struct pagevec pool;
+ struct folio_batch pool;
};
static bool compress_init(struct i915_vma_compress *c)
diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi_ddc.c b/drivers/gpu/drm/mediatek/mtk_hdmi_ddc.c
index 2fc9214ffa82..4d39ea0a05ca 100644
--- a/drivers/gpu/drm/mediatek/mtk_hdmi_ddc.c
+++ b/drivers/gpu/drm/mediatek/mtk_hdmi_ddc.c
@@ -295,7 +295,7 @@ static int mtk_hdmi_ddc_probe(struct platform_device *pdev)
return ret;
}
- strlcpy(ddc->adap.name, "mediatek-hdmi-ddc", sizeof(ddc->adap.name));
+ strscpy(ddc->adap.name, "mediatek-hdmi-ddc", sizeof(ddc->adap.name));
ddc->adap.owner = THIS_MODULE;
ddc->adap.class = I2C_CLASS_DDC;
ddc->adap.algo = &mtk_hdmi_ddc_algorithm;
diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index 4ad5a328d920..bf3c411a55c5 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -2105,7 +2105,7 @@ static int radeon_atombios_parse_power_table_1_3(struct radeon_device *rdev)
const char *name = thermal_controller_names[power_info->info.
ucOverdriveThermalController];
info.addr = power_info->info.ucOverdriveControllerAddress >> 1;
- strlcpy(info.type, name, sizeof(info.type));
+ strscpy(info.type, name, sizeof(info.type));
i2c_new_client_device(&rdev->pm.i2c_bus->adapter, &info);
}
}
@@ -2355,7 +2355,7 @@ static void radeon_atombios_add_pplib_thermal_controller(struct radeon_device *r
struct i2c_board_info info = { };
const char *name = pp_lib_thermal_controller_names[controller->ucType];
info.addr = controller->ucI2cAddress >> 1;
- strlcpy(info.type, name, sizeof(info.type));
+ strscpy(info.type, name, sizeof(info.type));
i2c_new_client_device(&rdev->pm.i2c_bus->adapter, &info);
}
} else {
diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c
index 783a6b8802d5..795c3667f6d6 100644
--- a/drivers/gpu/drm/radeon/radeon_combios.c
+++ b/drivers/gpu/drm/radeon/radeon_combios.c
@@ -2702,7 +2702,7 @@ void radeon_combios_get_power_modes(struct radeon_device *rdev)
struct i2c_board_info info = { };
const char *name = thermal_controller_names[thermal_controller];
info.addr = i2c_addr >> 1;
- strlcpy(info.type, name, sizeof(info.type));
+ strscpy(info.type, name, sizeof(info.type));
i2c_new_client_device(&rdev->pm.i2c_bus->adapter, &info);
}
}
@@ -2719,7 +2719,7 @@ void radeon_combios_get_power_modes(struct radeon_device *rdev)
struct i2c_board_info info = { };
const char *name = "f75375";
info.addr = 0x28;
- strlcpy(info.type, name, sizeof(info.type));
+ strscpy(info.type, name, sizeof(info.type));
i2c_new_client_device(&rdev->pm.i2c_bus->adapter, &info);
DRM_INFO("Possible %s thermal controller at 0x%02x\n",
name, info.addr);
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 2220cdf6a3f6..3a9db030f98f 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -359,7 +359,7 @@ static int radeon_ttm_tt_pin_userptr(struct ttm_device *bdev, struct ttm_tt *ttm
struct page **pages = ttm->pages + pinned;
r = get_user_pages(userptr, num_pages, write ? FOLL_WRITE : 0,
- pages, NULL);
+ pages);
if (r < 0)
goto release_pages;
diff --git a/drivers/gpu/drm/rockchip/inno_hdmi.c b/drivers/gpu/drm/rockchip/inno_hdmi.c
index f51774866f41..9afb889963c1 100644
--- a/drivers/gpu/drm/rockchip/inno_hdmi.c
+++ b/drivers/gpu/drm/rockchip/inno_hdmi.c
@@ -797,7 +797,7 @@ static struct i2c_adapter *inno_hdmi_i2c_adapter(struct inno_hdmi *hdmi)
adap->dev.parent = hdmi->dev;
adap->dev.of_node = hdmi->dev->of_node;
adap->algo = &inno_hdmi_algorithm;
- strlcpy(adap->name, "Inno HDMI", sizeof(adap->name));
+ strscpy(adap->name, "Inno HDMI", sizeof(adap->name));
i2c_set_adapdata(adap, hdmi);
ret = i2c_add_adapter(adap);
diff --git a/drivers/gpu/drm/rockchip/rk3066_hdmi.c b/drivers/gpu/drm/rockchip/rk3066_hdmi.c
index 90145ad96984..b5d042ee052f 100644
--- a/drivers/gpu/drm/rockchip/rk3066_hdmi.c
+++ b/drivers/gpu/drm/rockchip/rk3066_hdmi.c
@@ -730,7 +730,7 @@ static struct i2c_adapter *rk3066_hdmi_i2c_adapter(struct rk3066_hdmi *hdmi)
adap->dev.parent = hdmi->dev;
adap->dev.of_node = hdmi->dev->of_node;
adap->algo = &rk3066_hdmi_algorithm;
- strlcpy(adap->name, "RK3066 HDMI", sizeof(adap->name));
+ strscpy(adap->name, "RK3066 HDMI", sizeof(adap->name));
i2c_set_adapdata(adap, hdmi);
ret = i2c_add_adapter(adap);
diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_i2c.c b/drivers/gpu/drm/sun4i/sun4i_hdmi_i2c.c
index c7d7e9fff91c..d1a65a921f5a 100644
--- a/drivers/gpu/drm/sun4i/sun4i_hdmi_i2c.c
+++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_i2c.c
@@ -304,7 +304,7 @@ int sun4i_hdmi_i2c_create(struct device *dev, struct sun4i_hdmi *hdmi)
adap->owner = THIS_MODULE;
adap->class = I2C_CLASS_DDC;
adap->algo = &sun4i_hdmi_i2c_algorithm;
- strlcpy(adap->name, "sun4i_hdmi_i2c adapter", sizeof(adap->name));
+ strscpy(adap->name, "sun4i_hdmi_i2c adapter", sizeof(adap->name));
i2c_set_adapdata(adap, hdmi);
ret = i2c_add_adapter(adap);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg_x86.h b/drivers/gpu/drm/vmwgfx/vmwgfx_msg_x86.h
index 0b74ca2dfb7b..23899d743a90 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_msg_x86.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_msg_x86.h
@@ -105,10 +105,14 @@
flags, magic, bp, \
eax, ebx, ecx, edx, si, di) \
({ \
- asm volatile ("push %%rbp;" \
+ asm volatile ( \
+ UNWIND_HINT_SAVE \
+ "push %%rbp;" \
+ UNWIND_HINT_UNDEFINED \
"mov %12, %%rbp;" \
VMWARE_HYPERCALL_HB_OUT \
- "pop %%rbp;" : \
+ "pop %%rbp;" \
+ UNWIND_HINT_RESTORE : \
"=a"(eax), \
"=b"(ebx), \
"=c"(ecx), \
@@ -130,10 +134,14 @@
flags, magic, bp, \
eax, ebx, ecx, edx, si, di) \
({ \
- asm volatile ("push %%rbp;" \
+ asm volatile ( \
+ UNWIND_HINT_SAVE \
+ "push %%rbp;" \
+ UNWIND_HINT_UNDEFINED \
"mov %12, %%rbp;" \
VMWARE_HYPERCALL_HB_IN \
- "pop %%rbp" : \
+ "pop %%rbp;" \
+ UNWIND_HINT_RESTORE : \
"=a"(eax), \
"=b"(ebx), \
"=c"(ecx), \
diff --git a/drivers/greybus/connection.c b/drivers/greybus/connection.c
index e3799a53a193..9c88861986c8 100644
--- a/drivers/greybus/connection.c
+++ b/drivers/greybus/connection.c
@@ -187,8 +187,8 @@ _gb_connection_create(struct gb_host_device *hd, int hd_cport_id,
spin_lock_init(&connection->lock);
INIT_LIST_HEAD(&connection->operations);
- connection->wq = alloc_workqueue("%s:%d", WQ_UNBOUND, 1,
- dev_name(&hd->dev), hd_cport_id);
+ connection->wq = alloc_ordered_workqueue("%s:%d", 0, dev_name(&hd->dev),
+ hd_cport_id);
if (!connection->wq) {
ret = -ENOMEM;
goto err_free_connection;
diff --git a/drivers/greybus/svc.c b/drivers/greybus/svc.c
index 16cced80867a..0d7e749174a4 100644
--- a/drivers/greybus/svc.c
+++ b/drivers/greybus/svc.c
@@ -1318,7 +1318,7 @@ struct gb_svc *gb_svc_create(struct gb_host_device *hd)
if (!svc)
return NULL;
- svc->wq = alloc_workqueue("%s:svc", WQ_UNBOUND, 1, dev_name(&hd->dev));
+ svc->wq = alloc_ordered_workqueue("%s:svc", 0, dev_name(&hd->dev));
if (!svc->wq) {
kfree(svc);
return NULL;
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 007f26d5f1a4..2f4d09ce027a 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -829,11 +829,22 @@ static void vmbus_wait_for_unload(void)
if (completion_done(&vmbus_connection.unload_event))
goto completed;
- for_each_online_cpu(cpu) {
+ for_each_present_cpu(cpu) {
struct hv_per_cpu_context *hv_cpu
= per_cpu_ptr(hv_context.cpu_context, cpu);
+ /*
+ * In a CoCo VM the synic_message_page is not allocated
+ * in hv_synic_alloc(). Instead it is set/cleared in
+ * hv_synic_enable_regs() and hv_synic_disable_regs()
+ * such that it is set only when the CPU is online. If
+ * not all present CPUs are online, the message page
+ * might be NULL, so skip such CPUs.
+ */
page_addr = hv_cpu->synic_message_page;
+ if (!page_addr)
+ continue;
+
msg = (struct hv_message *)page_addr
+ VMBUS_MESSAGE_SINT;
@@ -867,11 +878,14 @@ completed:
* maybe-pending messages on all CPUs to be able to receive new
* messages after we reconnect.
*/
- for_each_online_cpu(cpu) {
+ for_each_present_cpu(cpu) {
struct hv_per_cpu_context *hv_cpu
= per_cpu_ptr(hv_context.cpu_context, cpu);
page_addr = hv_cpu->synic_message_page;
+ if (!page_addr)
+ continue;
+
msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT;
msg->header.message_type = HVMSG_NONE;
}
diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
index 64f9ceca887b..542a1d53b303 100644
--- a/drivers/hv/hv_common.c
+++ b/drivers/hv/hv_common.c
@@ -364,13 +364,20 @@ int hv_common_cpu_init(unsigned int cpu)
flags = irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL;
inputarg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
- *inputarg = kmalloc(pgcount * HV_HYP_PAGE_SIZE, flags);
- if (!(*inputarg))
- return -ENOMEM;
- if (hv_root_partition) {
- outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg);
- *outputarg = (char *)(*inputarg) + HV_HYP_PAGE_SIZE;
+ /*
+ * hyperv_pcpu_input_arg and hyperv_pcpu_output_arg memory is already
+ * allocated if this CPU was previously online and then taken offline
+ */
+ if (!*inputarg) {
+ *inputarg = kmalloc(pgcount * HV_HYP_PAGE_SIZE, flags);
+ if (!(*inputarg))
+ return -ENOMEM;
+
+ if (hv_root_partition) {
+ outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg);
+ *outputarg = (char *)(*inputarg) + HV_HYP_PAGE_SIZE;
+ }
}
msr_vp_index = hv_get_register(HV_REGISTER_VP_INDEX);
@@ -385,24 +392,17 @@ int hv_common_cpu_init(unsigned int cpu)
int hv_common_cpu_die(unsigned int cpu)
{
- unsigned long flags;
- void **inputarg, **outputarg;
- void *mem;
-
- local_irq_save(flags);
-
- inputarg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
- mem = *inputarg;
- *inputarg = NULL;
-
- if (hv_root_partition) {
- outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg);
- *outputarg = NULL;
- }
-
- local_irq_restore(flags);
-
- kfree(mem);
+ /*
+ * The hyperv_pcpu_input_arg and hyperv_pcpu_output_arg memory
+ * is not freed when the CPU goes offline as the hyperv_pcpu_input_arg
+ * may be used by the Hyper-V vPCI driver in reassigning interrupts
+ * as part of the offlining process. The interrupt reassignment
+ * happens *after* the CPUHP_AP_HYPERV_ONLINE state has run and
+ * called this function.
+ *
+ * If a previously offlined CPU is brought back online again, the
+ * originally allocated memory is reused in hv_common_cpu_init().
+ */
return 0;
}
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 1c65a6dfb9fa..67f95a29aeca 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -1372,7 +1372,7 @@ static int vmbus_bus_init(void)
ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hyperv/vmbus:online",
hv_synic_init, hv_synic_cleanup);
if (ret < 0)
- goto err_cpuhp;
+ goto err_alloc;
hyperv_cpuhp_online = ret;
ret = vmbus_connect();
@@ -1392,9 +1392,8 @@ static int vmbus_bus_init(void)
err_connect:
cpuhp_remove_state(hyperv_cpuhp_online);
-err_cpuhp:
- hv_synic_free();
err_alloc:
+ hv_synic_free();
if (vmbus_irq == -1) {
hv_remove_vmbus_handler();
} else {
diff --git a/drivers/hwtracing/coresight/coresight-trbe.c b/drivers/hwtracing/coresight/coresight-trbe.c
index 1fc4fd79a1c6..1bab91ce8e95 100644
--- a/drivers/hwtracing/coresight/coresight-trbe.c
+++ b/drivers/hwtracing/coresight/coresight-trbe.c
@@ -218,7 +218,7 @@ static inline void set_trbe_enabled(struct trbe_cpudata *cpudata, u64 trblimitr)
* Enable the TRBE without clearing LIMITPTR which
* might be required for fetching the buffer limits.
*/
- trblimitr |= TRBLIMITR_ENABLE;
+ trblimitr |= TRBLIMITR_EL1_E;
write_sysreg_s(trblimitr, SYS_TRBLIMITR_EL1);
/* Synchronize the TRBE enable event */
@@ -236,7 +236,7 @@ static inline void set_trbe_disabled(struct trbe_cpudata *cpudata)
* Disable the TRBE without clearing LIMITPTR which
* might be required for fetching the buffer limits.
*/
- trblimitr &= ~TRBLIMITR_ENABLE;
+ trblimitr &= ~TRBLIMITR_EL1_E;
write_sysreg_s(trblimitr, SYS_TRBLIMITR_EL1);
if (trbe_needs_drain_after_disable(cpudata))
@@ -582,12 +582,12 @@ static void clr_trbe_status(void)
u64 trbsr = read_sysreg_s(SYS_TRBSR_EL1);
WARN_ON(is_trbe_enabled());
- trbsr &= ~TRBSR_IRQ;
- trbsr &= ~TRBSR_TRG;
- trbsr &= ~TRBSR_WRAP;
- trbsr &= ~(TRBSR_EC_MASK << TRBSR_EC_SHIFT);
- trbsr &= ~(TRBSR_BSC_MASK << TRBSR_BSC_SHIFT);
- trbsr &= ~TRBSR_STOP;
+ trbsr &= ~TRBSR_EL1_IRQ;
+ trbsr &= ~TRBSR_EL1_TRG;
+ trbsr &= ~TRBSR_EL1_WRAP;
+ trbsr &= ~TRBSR_EL1_EC_MASK;
+ trbsr &= ~TRBSR_EL1_BSC_MASK;
+ trbsr &= ~TRBSR_EL1_S;
write_sysreg_s(trbsr, SYS_TRBSR_EL1);
}
@@ -596,13 +596,13 @@ static void set_trbe_limit_pointer_enabled(struct trbe_buf *buf)
u64 trblimitr = read_sysreg_s(SYS_TRBLIMITR_EL1);
unsigned long addr = buf->trbe_limit;
- WARN_ON(!IS_ALIGNED(addr, (1UL << TRBLIMITR_LIMIT_SHIFT)));
+ WARN_ON(!IS_ALIGNED(addr, (1UL << TRBLIMITR_EL1_LIMIT_SHIFT)));
WARN_ON(!IS_ALIGNED(addr, PAGE_SIZE));
- trblimitr &= ~TRBLIMITR_NVM;
- trblimitr &= ~(TRBLIMITR_FILL_MODE_MASK << TRBLIMITR_FILL_MODE_SHIFT);
- trblimitr &= ~(TRBLIMITR_TRIG_MODE_MASK << TRBLIMITR_TRIG_MODE_SHIFT);
- trblimitr &= ~(TRBLIMITR_LIMIT_MASK << TRBLIMITR_LIMIT_SHIFT);
+ trblimitr &= ~TRBLIMITR_EL1_nVM;
+ trblimitr &= ~TRBLIMITR_EL1_FM_MASK;
+ trblimitr &= ~TRBLIMITR_EL1_TM_MASK;
+ trblimitr &= ~TRBLIMITR_EL1_LIMIT_MASK;
/*
* Fill trace buffer mode is used here while configuring the
@@ -613,14 +613,15 @@ static void set_trbe_limit_pointer_enabled(struct trbe_buf *buf)
* trace data in the interrupt handler, before reconfiguring
* the TRBE.
*/
- trblimitr |= (TRBE_FILL_MODE_FILL & TRBLIMITR_FILL_MODE_MASK) << TRBLIMITR_FILL_MODE_SHIFT;
+ trblimitr |= (TRBLIMITR_EL1_FM_FILL << TRBLIMITR_EL1_FM_SHIFT) &
+ TRBLIMITR_EL1_FM_MASK;
/*
* Trigger mode is not used here while configuring the TRBE for
* the trace capture. Hence just keep this in the ignore mode.
*/
- trblimitr |= (TRBE_TRIG_MODE_IGNORE & TRBLIMITR_TRIG_MODE_MASK) <<
- TRBLIMITR_TRIG_MODE_SHIFT;
+ trblimitr |= (TRBLIMITR_EL1_TM_IGNR << TRBLIMITR_EL1_TM_SHIFT) &
+ TRBLIMITR_EL1_TM_MASK;
trblimitr |= (addr & PAGE_MASK);
set_trbe_enabled(buf->cpudata, trblimitr);
}
diff --git a/drivers/hwtracing/coresight/coresight-trbe.h b/drivers/hwtracing/coresight/coresight-trbe.h
index 98ff1b17ad07..77cbb5c63878 100644
--- a/drivers/hwtracing/coresight/coresight-trbe.h
+++ b/drivers/hwtracing/coresight/coresight-trbe.h
@@ -30,7 +30,7 @@ static inline bool is_trbe_enabled(void)
{
u64 trblimitr = read_sysreg_s(SYS_TRBLIMITR_EL1);
- return trblimitr & TRBLIMITR_ENABLE;
+ return trblimitr & TRBLIMITR_EL1_E;
}
#define TRBE_EC_OTHERS 0
@@ -39,7 +39,7 @@ static inline bool is_trbe_enabled(void)
static inline int get_trbe_ec(u64 trbsr)
{
- return (trbsr >> TRBSR_EC_SHIFT) & TRBSR_EC_MASK;
+ return (trbsr & TRBSR_EL1_EC_MASK) >> TRBSR_EL1_EC_SHIFT;
}
#define TRBE_BSC_NOT_STOPPED 0
@@ -48,63 +48,55 @@ static inline int get_trbe_ec(u64 trbsr)
static inline int get_trbe_bsc(u64 trbsr)
{
- return (trbsr >> TRBSR_BSC_SHIFT) & TRBSR_BSC_MASK;
+ return (trbsr & TRBSR_EL1_BSC_MASK) >> TRBSR_EL1_BSC_SHIFT;
}
static inline void clr_trbe_irq(void)
{
u64 trbsr = read_sysreg_s(SYS_TRBSR_EL1);
- trbsr &= ~TRBSR_IRQ;
+ trbsr &= ~TRBSR_EL1_IRQ;
write_sysreg_s(trbsr, SYS_TRBSR_EL1);
}
static inline bool is_trbe_irq(u64 trbsr)
{
- return trbsr & TRBSR_IRQ;
+ return trbsr & TRBSR_EL1_IRQ;
}
static inline bool is_trbe_trg(u64 trbsr)
{
- return trbsr & TRBSR_TRG;
+ return trbsr & TRBSR_EL1_TRG;
}
static inline bool is_trbe_wrap(u64 trbsr)
{
- return trbsr & TRBSR_WRAP;
+ return trbsr & TRBSR_EL1_WRAP;
}
static inline bool is_trbe_abort(u64 trbsr)
{
- return trbsr & TRBSR_ABORT;
+ return trbsr & TRBSR_EL1_EA;
}
static inline bool is_trbe_running(u64 trbsr)
{
- return !(trbsr & TRBSR_STOP);
+ return !(trbsr & TRBSR_EL1_S);
}
-#define TRBE_TRIG_MODE_STOP 0
-#define TRBE_TRIG_MODE_IRQ 1
-#define TRBE_TRIG_MODE_IGNORE 3
-
-#define TRBE_FILL_MODE_FILL 0
-#define TRBE_FILL_MODE_WRAP 1
-#define TRBE_FILL_MODE_CIRCULAR_BUFFER 3
-
static inline bool get_trbe_flag_update(u64 trbidr)
{
- return trbidr & TRBIDR_FLAG;
+ return trbidr & TRBIDR_EL1_F;
}
static inline bool is_trbe_programmable(u64 trbidr)
{
- return !(trbidr & TRBIDR_PROG);
+ return !(trbidr & TRBIDR_EL1_P);
}
static inline int get_trbe_address_align(u64 trbidr)
{
- return (trbidr >> TRBIDR_ALIGN_SHIFT) & TRBIDR_ALIGN_MASK;
+ return (trbidr & TRBIDR_EL1_Align_MASK) >> TRBIDR_EL1_Align_SHIFT;
}
static inline unsigned long get_trbe_write_pointer(void)
@@ -121,7 +113,7 @@ static inline void set_trbe_write_pointer(unsigned long addr)
static inline unsigned long get_trbe_limit_pointer(void)
{
u64 trblimitr = read_sysreg_s(SYS_TRBLIMITR_EL1);
- unsigned long addr = trblimitr & (TRBLIMITR_LIMIT_MASK << TRBLIMITR_LIMIT_SHIFT);
+ unsigned long addr = trblimitr & TRBLIMITR_EL1_LIMIT_MASK;
WARN_ON(!IS_ALIGNED(addr, PAGE_SIZE));
return addr;
@@ -130,7 +122,7 @@ static inline unsigned long get_trbe_limit_pointer(void)
static inline unsigned long get_trbe_base_pointer(void)
{
u64 trbbaser = read_sysreg_s(SYS_TRBBASER_EL1);
- unsigned long addr = trbbaser & (TRBBASER_BASE_MASK << TRBBASER_BASE_SHIFT);
+ unsigned long addr = trbbaser & TRBBASER_EL1_BASE_MASK;
WARN_ON(!IS_ALIGNED(addr, PAGE_SIZE));
return addr;
@@ -139,7 +131,7 @@ static inline unsigned long get_trbe_base_pointer(void)
static inline void set_trbe_base_pointer(unsigned long addr)
{
WARN_ON(is_trbe_enabled());
- WARN_ON(!IS_ALIGNED(addr, (1UL << TRBBASER_BASE_SHIFT)));
+ WARN_ON(!IS_ALIGNED(addr, (1UL << TRBBASER_EL1_BASE_SHIFT)));
WARN_ON(!IS_ALIGNED(addr, PAGE_SIZE));
write_sysreg_s(addr, SYS_TRBBASER_EL1);
}
diff --git a/drivers/i2c/busses/i2c-imx-lpi2c.c b/drivers/i2c/busses/i2c-imx-lpi2c.c
index 1af0a637d7f1..4d24ceb57ee7 100644
--- a/drivers/i2c/busses/i2c-imx-lpi2c.c
+++ b/drivers/i2c/busses/i2c-imx-lpi2c.c
@@ -201,8 +201,8 @@ static void lpi2c_imx_stop(struct lpi2c_imx_struct *lpi2c_imx)
/* CLKLO = I2C_CLK_RATIO * CLKHI, SETHOLD = CLKHI, DATAVD = CLKHI/2 */
static int lpi2c_imx_config(struct lpi2c_imx_struct *lpi2c_imx)
{
- u8 prescale, filt, sethold, clkhi, clklo, datavd;
- unsigned int clk_rate, clk_cycle;
+ u8 prescale, filt, sethold, datavd;
+ unsigned int clk_rate, clk_cycle, clkhi, clklo;
enum lpi2c_imx_pincfg pincfg;
unsigned int temp;
diff --git a/drivers/i2c/busses/i2c-qup.c b/drivers/i2c/busses/i2c-qup.c
index 2e153f2f71b6..78682388e02e 100644
--- a/drivers/i2c/busses/i2c-qup.c
+++ b/drivers/i2c/busses/i2c-qup.c
@@ -1752,16 +1752,21 @@ nodma:
if (!clk_freq || clk_freq > I2C_MAX_FAST_MODE_PLUS_FREQ) {
dev_err(qup->dev, "clock frequency not supported %d\n",
clk_freq);
- return -EINVAL;
+ ret = -EINVAL;
+ goto fail_dma;
}
qup->base = devm_platform_ioremap_resource(pdev, 0);
- if (IS_ERR(qup->base))
- return PTR_ERR(qup->base);
+ if (IS_ERR(qup->base)) {
+ ret = PTR_ERR(qup->base);
+ goto fail_dma;
+ }
qup->irq = platform_get_irq(pdev, 0);
- if (qup->irq < 0)
- return qup->irq;
+ if (qup->irq < 0) {
+ ret = qup->irq;
+ goto fail_dma;
+ }
if (has_acpi_companion(qup->dev)) {
ret = device_property_read_u32(qup->dev,
@@ -1775,13 +1780,15 @@ nodma:
qup->clk = devm_clk_get(qup->dev, "core");
if (IS_ERR(qup->clk)) {
dev_err(qup->dev, "Could not get core clock\n");
- return PTR_ERR(qup->clk);
+ ret = PTR_ERR(qup->clk);
+ goto fail_dma;
}
qup->pclk = devm_clk_get(qup->dev, "iface");
if (IS_ERR(qup->pclk)) {
dev_err(qup->dev, "Could not get iface clock\n");
- return PTR_ERR(qup->pclk);
+ ret = PTR_ERR(qup->pclk);
+ goto fail_dma;
}
qup_i2c_enable_clocks(qup);
src_clk_freq = clk_get_rate(qup->clk);
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index aa2d19db2b1d..34201d7ef33e 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -199,6 +199,43 @@ static __cpuidle int intel_idle_xstate(struct cpuidle_device *dev,
return __intel_idle(dev, drv, index);
}
+static __always_inline int __intel_idle_hlt(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
+{
+ raw_safe_halt();
+ raw_local_irq_disable();
+ return index;
+}
+
+/**
+ * intel_idle_hlt - Ask the processor to enter the given idle state using hlt.
+ * @dev: cpuidle device of the target CPU.
+ * @drv: cpuidle driver (assumed to point to intel_idle_driver).
+ * @index: Target idle state index.
+ *
+ * Use the HLT instruction to notify the processor that the CPU represented by
+ * @dev is idle and it can try to enter the idle state corresponding to @index.
+ *
+ * Must be called under local_irq_disable().
+ */
+static __cpuidle int intel_idle_hlt(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
+{
+ return __intel_idle_hlt(dev, drv, index);
+}
+
+static __cpuidle int intel_idle_hlt_irq_on(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
+{
+ int ret;
+
+ raw_local_irq_enable();
+ ret = __intel_idle_hlt(dev, drv, index);
+ raw_local_irq_disable();
+
+ return ret;
+}
+
/**
* intel_idle_s2idle - Ask the processor to enter the given idle state.
* @dev: cpuidle device of the target CPU.
@@ -1242,6 +1279,25 @@ static struct cpuidle_state snr_cstates[] __initdata = {
.enter = NULL }
};
+static struct cpuidle_state vmguest_cstates[] __initdata = {
+ {
+ .name = "C1",
+ .desc = "HLT",
+ .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE,
+ .exit_latency = 5,
+ .target_residency = 10,
+ .enter = &intel_idle_hlt, },
+ {
+ .name = "C1L",
+ .desc = "Long HLT",
+ .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 5,
+ .target_residency = 200,
+ .enter = &intel_idle_hlt, },
+ {
+ .enter = NULL }
+};
+
static const struct idle_cpu idle_cpu_nehalem __initconst = {
.state_table = nehalem_cstates,
.auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
@@ -1839,6 +1895,66 @@ static bool __init intel_idle_verify_cstate(unsigned int mwait_hint)
return true;
}
+static void state_update_enter_method(struct cpuidle_state *state, int cstate)
+{
+ if (state->enter == intel_idle_hlt) {
+ if (force_irq_on) {
+ pr_info("forced intel_idle_irq for state %d\n", cstate);
+ state->enter = intel_idle_hlt_irq_on;
+ }
+ return;
+ }
+ if (state->enter == intel_idle_hlt_irq_on)
+ return; /* no update scenarios */
+
+ if (state->flags & CPUIDLE_FLAG_INIT_XSTATE) {
+ /*
+ * Combining with XSTATE with IBRS or IRQ_ENABLE flags
+ * is not currently supported but this driver.
+ */
+ WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IBRS);
+ WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IRQ_ENABLE);
+ state->enter = intel_idle_xstate;
+ return;
+ }
+
+ if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) &&
+ state->flags & CPUIDLE_FLAG_IBRS) {
+ /*
+ * IBRS mitigation requires that C-states are entered
+ * with interrupts disabled.
+ */
+ WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IRQ_ENABLE);
+ state->enter = intel_idle_ibrs;
+ return;
+ }
+
+ if (state->flags & CPUIDLE_FLAG_IRQ_ENABLE) {
+ state->enter = intel_idle_irq;
+ return;
+ }
+
+ if (force_irq_on) {
+ pr_info("forced intel_idle_irq for state %d\n", cstate);
+ state->enter = intel_idle_irq;
+ }
+}
+
+/*
+ * For mwait based states, we want to verify the cpuid data to see if the state
+ * is actually supported by this specific CPU.
+ * For non-mwait based states, this check should be skipped.
+ */
+static bool should_verify_mwait(struct cpuidle_state *state)
+{
+ if (state->enter == intel_idle_hlt)
+ return false;
+ if (state->enter == intel_idle_hlt_irq_on)
+ return false;
+
+ return true;
+}
+
static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
{
int cstate;
@@ -1887,35 +2003,15 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
}
mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
- if (!intel_idle_verify_cstate(mwait_hint))
+ if (should_verify_mwait(&cpuidle_state_table[cstate]) && !intel_idle_verify_cstate(mwait_hint))
continue;
/* Structure copy. */
drv->states[drv->state_count] = cpuidle_state_table[cstate];
state = &drv->states[drv->state_count];
- if (state->flags & CPUIDLE_FLAG_INIT_XSTATE) {
- /*
- * Combining with XSTATE with IBRS or IRQ_ENABLE flags
- * is not currently supported but this driver.
- */
- WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IBRS);
- WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IRQ_ENABLE);
- state->enter = intel_idle_xstate;
- } else if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) &&
- state->flags & CPUIDLE_FLAG_IBRS) {
- /*
- * IBRS mitigation requires that C-states are entered
- * with interrupts disabled.
- */
- WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IRQ_ENABLE);
- state->enter = intel_idle_ibrs;
- } else if (state->flags & CPUIDLE_FLAG_IRQ_ENABLE) {
- state->enter = intel_idle_irq;
- } else if (force_irq_on) {
- pr_info("forced intel_idle_irq for state %d\n", cstate);
- state->enter = intel_idle_irq;
- }
+ state_update_enter_method(state, cstate);
+
if ((disabled_states_mask & BIT(drv->state_count)) ||
((icpu->use_acpi || force_use_acpi) &&
@@ -2041,6 +2137,93 @@ static void __init intel_idle_cpuidle_devices_uninit(void)
cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i));
}
+/*
+ * Match up the latency and break even point of the bare metal (cpu based)
+ * states with the deepest VM available state.
+ *
+ * We only want to do this for the deepest state, the ones that has
+ * the TLB_FLUSHED flag set on the .
+ *
+ * All our short idle states are dominated by vmexit/vmenter latencies,
+ * not the underlying hardware latencies so we keep our values for these.
+ */
+static void matchup_vm_state_with_baremetal(void)
+{
+ int cstate;
+
+ for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
+ int matching_cstate;
+
+ if (intel_idle_max_cstate_reached(cstate))
+ break;
+
+ if (!cpuidle_state_table[cstate].enter)
+ break;
+
+ if (!(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_TLB_FLUSHED))
+ continue;
+
+ for (matching_cstate = 0; matching_cstate < CPUIDLE_STATE_MAX; ++matching_cstate) {
+ if (!icpu->state_table[matching_cstate].enter)
+ break;
+ if (icpu->state_table[matching_cstate].exit_latency > cpuidle_state_table[cstate].exit_latency) {
+ cpuidle_state_table[cstate].exit_latency = icpu->state_table[matching_cstate].exit_latency;
+ cpuidle_state_table[cstate].target_residency = icpu->state_table[matching_cstate].target_residency;
+ }
+ }
+
+ }
+}
+
+
+static int __init intel_idle_vminit(const struct x86_cpu_id *id)
+{
+ int retval;
+
+ cpuidle_state_table = vmguest_cstates;
+
+ icpu = (const struct idle_cpu *)id->driver_data;
+
+ pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n",
+ boot_cpu_data.x86_model);
+
+ intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
+ if (!intel_idle_cpuidle_devices)
+ return -ENOMEM;
+
+ /*
+ * We don't know exactly what the host will do when we go idle, but as a worst estimate
+ * we can assume that the exit latency of the deepest host state will be hit for our
+ * deep (long duration) guest idle state.
+ * The same logic applies to the break even point for the long duration guest idle state.
+ * So lets copy these two properties from the table we found for the host CPU type.
+ */
+ matchup_vm_state_with_baremetal();
+
+ intel_idle_cpuidle_driver_init(&intel_idle_driver);
+
+ retval = cpuidle_register_driver(&intel_idle_driver);
+ if (retval) {
+ struct cpuidle_driver *drv = cpuidle_get_driver();
+ printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"),
+ drv ? drv->name : "none");
+ goto init_driver_fail;
+ }
+
+ retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online",
+ intel_idle_cpu_online, NULL);
+ if (retval < 0)
+ goto hp_setup_fail;
+
+ return 0;
+hp_setup_fail:
+ intel_idle_cpuidle_devices_uninit();
+ cpuidle_unregister_driver(&intel_idle_driver);
+init_driver_fail:
+ free_percpu(intel_idle_cpuidle_devices);
+ return retval;
+}
+
static int __init intel_idle_init(void)
{
const struct x86_cpu_id *id;
@@ -2059,6 +2242,8 @@ static int __init intel_idle_init(void)
id = x86_match_cpu(intel_idle_ids);
if (id) {
if (!boot_cpu_has(X86_FEATURE_MWAIT)) {
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
+ return intel_idle_vminit(id);
pr_debug("Please enable MWAIT in BIOS SETUP\n");
return -ENODEV;
}
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c
index ddcfc116b19a..c7a4ee896121 100644
--- a/drivers/infiniband/hw/mlx5/ib_rep.c
+++ b/drivers/infiniband/hw/mlx5/ib_rep.c
@@ -30,45 +30,65 @@ mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev,
static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev);
+static void mlx5_ib_num_ports_update(struct mlx5_core_dev *dev, u32 *num_ports)
+{
+ struct mlx5_core_dev *peer_dev;
+ int i;
+
+ mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
+ u32 peer_num_ports = mlx5_eswitch_get_total_vports(peer_dev);
+
+ if (mlx5_lag_is_mpesw(peer_dev))
+ *num_ports += peer_num_ports;
+ else
+ /* Only 1 ib port is the representor for all uplinks */
+ *num_ports += peer_num_ports - 1;
+ }
+}
+
static int
mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
{
u32 num_ports = mlx5_eswitch_get_total_vports(dev);
+ struct mlx5_core_dev *lag_master = dev;
const struct mlx5_ib_profile *profile;
struct mlx5_core_dev *peer_dev;
struct mlx5_ib_dev *ibdev;
- int second_uplink = false;
- u32 peer_num_ports;
+ int new_uplink = false;
int vport_index;
int ret;
+ int i;
vport_index = rep->vport_index;
if (mlx5_lag_is_shared_fdb(dev)) {
- peer_dev = mlx5_lag_get_peer_mdev(dev);
- peer_num_ports = mlx5_eswitch_get_total_vports(peer_dev);
if (mlx5_lag_is_master(dev)) {
- if (mlx5_lag_is_mpesw(dev))
- num_ports += peer_num_ports;
- else
- num_ports += peer_num_ports - 1;
-
+ mlx5_ib_num_ports_update(dev, &num_ports);
} else {
if (rep->vport == MLX5_VPORT_UPLINK) {
if (!mlx5_lag_is_mpesw(dev))
return 0;
- second_uplink = true;
+ new_uplink = true;
}
+ mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
+ u32 peer_n_ports = mlx5_eswitch_get_total_vports(peer_dev);
+
+ if (mlx5_lag_is_master(peer_dev))
+ lag_master = peer_dev;
+ else if (!mlx5_lag_is_mpesw(dev))
+ /* Only 1 ib port is the representor for all uplinks */
+ peer_n_ports--;
- vport_index += peer_num_ports;
- dev = peer_dev;
+ if (mlx5_get_dev_index(peer_dev) < mlx5_get_dev_index(dev))
+ vport_index += peer_n_ports;
+ }
}
}
- if (rep->vport == MLX5_VPORT_UPLINK && !second_uplink)
+ if (rep->vport == MLX5_VPORT_UPLINK && !new_uplink)
profile = &raw_eth_profile;
else
- return mlx5_ib_set_vport_rep(dev, rep, vport_index);
+ return mlx5_ib_set_vport_rep(lag_master, rep, vport_index);
ibdev = ib_alloc_device(mlx5_ib_dev, ib_dev);
if (!ibdev)
@@ -85,8 +105,8 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
vport_index = rep->vport_index;
ibdev->port[vport_index].rep = rep;
ibdev->port[vport_index].roce.netdev =
- mlx5_ib_get_rep_netdev(dev->priv.eswitch, rep->vport);
- ibdev->mdev = dev;
+ mlx5_ib_get_rep_netdev(lag_master->priv.eswitch, rep->vport);
+ ibdev->mdev = lag_master;
ibdev->num_ports = num_ports;
ret = __mlx5_ib_add(ibdev, profile);
@@ -94,8 +114,8 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
goto fail_add;
rep->rep_data[REP_IB].priv = ibdev;
- if (mlx5_lag_is_shared_fdb(dev))
- mlx5_ib_register_peer_vport_reps(dev);
+ if (mlx5_lag_is_shared_fdb(lag_master))
+ mlx5_ib_register_peer_vport_reps(lag_master);
return 0;
@@ -118,23 +138,27 @@ mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
struct mlx5_ib_dev *dev = mlx5_ib_rep_to_dev(rep);
int vport_index = rep->vport_index;
struct mlx5_ib_port *port;
+ int i;
if (WARN_ON(!mdev))
return;
+ if (!dev)
+ return;
+
if (mlx5_lag_is_shared_fdb(mdev) &&
!mlx5_lag_is_master(mdev)) {
- struct mlx5_core_dev *peer_mdev;
-
- if (rep->vport == MLX5_VPORT_UPLINK)
+ if (rep->vport == MLX5_VPORT_UPLINK && !mlx5_lag_is_mpesw(mdev))
return;
- peer_mdev = mlx5_lag_get_peer_mdev(mdev);
- vport_index += mlx5_eswitch_get_total_vports(peer_mdev);
+ for (i = 0; i < dev->num_ports; i++) {
+ if (dev->port[i].rep == rep)
+ break;
+ }
+ if (WARN_ON(i == dev->num_ports))
+ return;
+ vport_index = i;
}
- if (!dev)
- return;
-
port = &dev->port[vport_index];
write_lock(&port->roce.netdev_lock);
port->roce.netdev = NULL;
@@ -143,13 +167,18 @@ mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
port->rep = NULL;
if (rep->vport == MLX5_VPORT_UPLINK) {
- struct mlx5_core_dev *peer_mdev;
- struct mlx5_eswitch *esw;
+
+ if (mlx5_lag_is_shared_fdb(mdev) && !mlx5_lag_is_master(mdev))
+ return;
if (mlx5_lag_is_shared_fdb(mdev)) {
- peer_mdev = mlx5_lag_get_peer_mdev(mdev);
- esw = peer_mdev->priv.eswitch;
- mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
+ struct mlx5_core_dev *peer_mdev;
+ struct mlx5_eswitch *esw;
+
+ mlx5_lag_for_each_peer_mdev(mdev, peer_mdev, i) {
+ esw = peer_mdev->priv.eswitch;
+ mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
+ }
}
__mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
}
@@ -163,14 +192,14 @@ static const struct mlx5_eswitch_rep_ops rep_ops = {
static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev)
{
- struct mlx5_core_dev *peer_mdev = mlx5_lag_get_peer_mdev(mdev);
+ struct mlx5_core_dev *peer_mdev;
struct mlx5_eswitch *esw;
+ int i;
- if (!peer_mdev)
- return;
-
- esw = peer_mdev->priv.eswitch;
- mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB);
+ mlx5_lag_for_each_peer_mdev(mdev, peer_mdev, i) {
+ esw = peer_mdev->priv.eswitch;
+ mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB);
+ }
}
struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c
index f693bc753b6b..1bb7507325bc 100644
--- a/drivers/infiniband/hw/qib/qib_user_pages.c
+++ b/drivers/infiniband/hw/qib/qib_user_pages.c
@@ -111,7 +111,7 @@ int qib_get_user_pages(unsigned long start_page, size_t num_pages,
ret = pin_user_pages(start_page + got * PAGE_SIZE,
num_pages - got,
FOLL_LONGTERM | FOLL_WRITE,
- p + got, NULL);
+ p + got);
if (ret < 0) {
mmap_read_unlock(current->mm);
goto bail_release;
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
index 2a5cac2658ec..84e0f41e7dfa 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -140,7 +140,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
ret = pin_user_pages(cur_base,
min_t(unsigned long, npages,
PAGE_SIZE / sizeof(struct page *)),
- gup_flags, page_list, NULL);
+ gup_flags, page_list);
if (ret < 0)
goto out;
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 4d8f6b8051ff..83093e16b6c6 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -1357,7 +1357,7 @@ static int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
if (cleanup_err)
rxe_err_mr(mr, "cleanup failed, err = %d", cleanup_err);
- kfree_rcu(mr);
+ kfree_rcu_mightsleep(mr);
return 0;
err_out:
diff --git a/drivers/infiniband/sw/siw/siw_mem.c b/drivers/infiniband/sw/siw/siw_mem.c
index f51ab2ccf151..e6e25f15567d 100644
--- a/drivers/infiniband/sw/siw/siw_mem.c
+++ b/drivers/infiniband/sw/siw/siw_mem.c
@@ -422,7 +422,7 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
umem->page_chunk[i].plist = plist;
while (nents) {
rv = pin_user_pages(first_page_va, nents, foll_flags,
- plist, NULL);
+ plist);
if (rv < 0)
goto out_sem_up;
diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c
index 4b292e0504f1..7c7a51d36d0c 100644
--- a/drivers/infiniband/sw/siw/siw_qp_tx.c
+++ b/drivers/infiniband/sw/siw/siw_qp_tx.c
@@ -312,7 +312,7 @@ static int siw_tx_ctrl(struct siw_iwarp_tx *c_tx, struct socket *s,
}
/*
- * 0copy TCP transmit interface: Use do_tcp_sendpages.
+ * 0copy TCP transmit interface: Use MSG_SPLICE_PAGES.
*
* Using sendpage to push page by page appears to be less efficient
* than using sendmsg, even if data are copied.
@@ -323,20 +323,26 @@ static int siw_tx_ctrl(struct siw_iwarp_tx *c_tx, struct socket *s,
static int siw_tcp_sendpages(struct socket *s, struct page **page, int offset,
size_t size)
{
+ struct bio_vec bvec;
+ struct msghdr msg = {
+ .msg_flags = (MSG_MORE | MSG_DONTWAIT | MSG_SPLICE_PAGES),
+ };
struct sock *sk = s->sk;
- int i = 0, rv = 0, sent = 0,
- flags = MSG_MORE | MSG_DONTWAIT | MSG_SENDPAGE_NOTLAST;
+ int i = 0, rv = 0, sent = 0;
while (size) {
size_t bytes = min_t(size_t, PAGE_SIZE - offset, size);
if (size + offset <= PAGE_SIZE)
- flags = MSG_MORE | MSG_DONTWAIT;
+ msg.msg_flags &= ~MSG_MORE;
tcp_rate_check_app_limited(sk);
+ bvec_set_page(&bvec, page[i], bytes, offset);
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
+
try_page_again:
lock_sock(sk);
- rv = do_tcp_sendpages(sk, page[i], offset, bytes, flags);
+ rv = tcp_sendmsg_locked(sk, &msg, size);
release_sock(sk);
if (rv > 0) {
diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig
index 81a54a59e13c..8a320e6218e3 100644
--- a/drivers/input/misc/Kconfig
+++ b/drivers/input/misc/Kconfig
@@ -609,7 +609,7 @@ config INPUT_PWM_VIBRA
config INPUT_RK805_PWRKEY
tristate "Rockchip RK805 PMIC power key support"
- depends on MFD_RK808
+ depends on MFD_RK8XX
help
Select this option to enable power key driver for RK805.
diff --git a/drivers/input/touchscreen/sun4i-ts.c b/drivers/input/touchscreen/sun4i-ts.c
index 577c75c83e25..bb3c6072fc82 100644
--- a/drivers/input/touchscreen/sun4i-ts.c
+++ b/drivers/input/touchscreen/sun4i-ts.c
@@ -22,7 +22,7 @@
* in the kernel). So this driver offers straight forward, reliable single
* touch functionality only.
*
- * s.a. A20 User Manual "1.15 TP" (Documentation/arm/sunxi.rst)
+ * s.a. A20 User Manual "1.15 TP" (Documentation/arch/arm/sunxi.rst)
* (looks like the description in the A20 User Manual v1.3 is better
* than the one in the A10 User Manual v.1.5)
*/
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 4d800601e8ec..2b12b583ef4b 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -152,6 +152,7 @@ config IOMMU_DMA
select IOMMU_IOVA
select IRQ_MSI_IOMMU
select NEED_SG_DMA_LENGTH
+ select NEED_SG_DMA_FLAGS if SWIOTLB
# Shared Virtual Addressing
config IOMMU_SVA
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index 2ddbda3a4374..ab8aa8f77cc4 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -986,8 +986,13 @@ union irte_ga_hi {
};
struct irte_ga {
- union irte_ga_lo lo;
- union irte_ga_hi hi;
+ union {
+ struct {
+ union irte_ga_lo lo;
+ union irte_ga_hi hi;
+ };
+ u128 irte;
+ };
};
struct irq_2_irte {
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index dc1ec6849775..9ea40960978b 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -2078,10 +2078,6 @@ static struct protection_domain *protection_domain_alloc(unsigned int type)
int mode = DEFAULT_PGTABLE_LEVEL;
int ret;
- domain = kzalloc(sizeof(*domain), GFP_KERNEL);
- if (!domain)
- return NULL;
-
/*
* Force IOMMU v1 page table when iommu=pt and
* when allocating domain for pass-through devices.
@@ -2097,6 +2093,10 @@ static struct protection_domain *protection_domain_alloc(unsigned int type)
return NULL;
}
+ domain = kzalloc(sizeof(*domain), GFP_KERNEL);
+ if (!domain)
+ return NULL;
+
switch (pgtable) {
case AMD_IOMMU_V1:
ret = protection_domain_init_v1(domain, mode);
@@ -3023,10 +3023,10 @@ out:
static int modify_irte_ga(struct amd_iommu *iommu, u16 devid, int index,
struct irte_ga *irte, struct amd_ir_data *data)
{
- bool ret;
struct irq_remap_table *table;
- unsigned long flags;
struct irte_ga *entry;
+ unsigned long flags;
+ u128 old;
table = get_irq_table(iommu, devid);
if (!table)
@@ -3037,16 +3037,14 @@ static int modify_irte_ga(struct amd_iommu *iommu, u16 devid, int index,
entry = (struct irte_ga *)table->table;
entry = &entry[index];
- ret = cmpxchg_double(&entry->lo.val, &entry->hi.val,
- entry->lo.val, entry->hi.val,
- irte->lo.val, irte->hi.val);
/*
* We use cmpxchg16 to atomically update the 128-bit IRTE,
* and it cannot be updated by the hardware or other processors
* behind us, so the return value of cmpxchg16 should be the
* same as the old value.
*/
- WARN_ON(!ret);
+ old = entry->irte;
+ WARN_ON(!try_cmpxchg128(&entry->irte, &old, irte->irte));
if (data)
data->ref = entry;
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 7a9f0b0bddbd..e86ae462cade 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -520,9 +520,38 @@ static bool dev_is_untrusted(struct device *dev)
return dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
}
-static bool dev_use_swiotlb(struct device *dev)
+static bool dev_use_swiotlb(struct device *dev, size_t size,
+ enum dma_data_direction dir)
{
- return IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev);
+ return IS_ENABLED(CONFIG_SWIOTLB) &&
+ (dev_is_untrusted(dev) ||
+ dma_kmalloc_needs_bounce(dev, size, dir));
+}
+
+static bool dev_use_sg_swiotlb(struct device *dev, struct scatterlist *sg,
+ int nents, enum dma_data_direction dir)
+{
+ struct scatterlist *s;
+ int i;
+
+ if (!IS_ENABLED(CONFIG_SWIOTLB))
+ return false;
+
+ if (dev_is_untrusted(dev))
+ return true;
+
+ /*
+ * If kmalloc() buffers are not DMA-safe for this device and
+ * direction, check the individual lengths in the sg list. If any
+ * element is deemed unsafe, use the swiotlb for bouncing.
+ */
+ if (!dma_kmalloc_safe(dev, dir)) {
+ for_each_sg(sg, s, nents, i)
+ if (!dma_kmalloc_size_aligned(s->length))
+ return true;
+ }
+
+ return false;
}
/**
@@ -922,7 +951,7 @@ static void iommu_dma_sync_single_for_cpu(struct device *dev,
{
phys_addr_t phys;
- if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
+ if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev, size, dir))
return;
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
@@ -938,7 +967,7 @@ static void iommu_dma_sync_single_for_device(struct device *dev,
{
phys_addr_t phys;
- if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
+ if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev, size, dir))
return;
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
@@ -956,7 +985,7 @@ static void iommu_dma_sync_sg_for_cpu(struct device *dev,
struct scatterlist *sg;
int i;
- if (dev_use_swiotlb(dev))
+ if (sg_dma_is_swiotlb(sgl))
for_each_sg(sgl, sg, nelems, i)
iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
sg->length, dir);
@@ -972,7 +1001,7 @@ static void iommu_dma_sync_sg_for_device(struct device *dev,
struct scatterlist *sg;
int i;
- if (dev_use_swiotlb(dev))
+ if (sg_dma_is_swiotlb(sgl))
for_each_sg(sgl, sg, nelems, i)
iommu_dma_sync_single_for_device(dev,
sg_dma_address(sg),
@@ -998,7 +1027,8 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
* If both the physical buffer start address and size are
* page aligned, we don't need to use a bounce page.
*/
- if (dev_use_swiotlb(dev) && iova_offset(iovad, phys | size)) {
+ if (dev_use_swiotlb(dev, size, dir) &&
+ iova_offset(iovad, phys | size)) {
void *padding_start;
size_t padding_size, aligned_size;
@@ -1080,7 +1110,7 @@ static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents,
sg_dma_address(s) = DMA_MAPPING_ERROR;
sg_dma_len(s) = 0;
- if (sg_is_dma_bus_address(s)) {
+ if (sg_dma_is_bus_address(s)) {
if (i > 0)
cur = sg_next(cur);
@@ -1136,7 +1166,7 @@ static void __invalidate_sg(struct scatterlist *sg, int nents)
int i;
for_each_sg(sg, s, nents, i) {
- if (sg_is_dma_bus_address(s)) {
+ if (sg_dma_is_bus_address(s)) {
sg_dma_unmark_bus_address(s);
} else {
if (sg_dma_address(s) != DMA_MAPPING_ERROR)
@@ -1166,6 +1196,8 @@ static int iommu_dma_map_sg_swiotlb(struct device *dev, struct scatterlist *sg,
struct scatterlist *s;
int i;
+ sg_dma_mark_swiotlb(sg);
+
for_each_sg(sg, s, nents, i) {
sg_dma_address(s) = iommu_dma_map_page(dev, sg_page(s),
s->offset, s->length, dir, attrs);
@@ -1210,7 +1242,7 @@ static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
goto out;
}
- if (dev_use_swiotlb(dev))
+ if (dev_use_sg_swiotlb(dev, sg, nents, dir))
return iommu_dma_map_sg_swiotlb(dev, sg, nents, dir, attrs);
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
@@ -1315,7 +1347,7 @@ static void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
struct scatterlist *tmp;
int i;
- if (dev_use_swiotlb(dev)) {
+ if (sg_dma_is_swiotlb(sg)) {
iommu_dma_unmap_sg_swiotlb(dev, sg, nents, dir, attrs);
return;
}
@@ -1329,7 +1361,7 @@ static void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
* just have to be determined.
*/
for_each_sg(sg, tmp, nents, i) {
- if (sg_is_dma_bus_address(tmp)) {
+ if (sg_dma_is_bus_address(tmp)) {
sg_dma_unmark_bus_address(tmp);
continue;
}
@@ -1343,7 +1375,7 @@ static void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
nents -= i;
for_each_sg(tmp, tmp, nents, i) {
- if (sg_is_dma_bus_address(tmp)) {
+ if (sg_dma_is_bus_address(tmp)) {
sg_dma_unmark_bus_address(tmp);
continue;
}
diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c
index a1b987335b31..08f56326e2f8 100644
--- a/drivers/iommu/intel/irq_remapping.c
+++ b/drivers/iommu/intel/irq_remapping.c
@@ -175,18 +175,14 @@ static int modify_irte(struct irq_2_iommu *irq_iommu,
irte = &iommu->ir_table->base[index];
if ((irte->pst == 1) || (irte_modified->pst == 1)) {
- bool ret;
-
- ret = cmpxchg_double(&irte->low, &irte->high,
- irte->low, irte->high,
- irte_modified->low, irte_modified->high);
/*
* We use cmpxchg16 to atomically update the 128-bit IRTE,
* and it cannot be updated by the hardware or other processors
* behind us, so the return value of cmpxchg16 should be the
* same as the old value.
*/
- WARN_ON(!ret);
+ u128 old = irte->irte;
+ WARN_ON(!try_cmpxchg128(&irte->irte, &old, irte_modified->irte));
} else {
WRITE_ONCE(irte->low, irte_modified->low);
WRITE_ONCE(irte->high, irte_modified->high);
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index f1dcfa3f1a1b..eb620552967b 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2567,7 +2567,7 @@ ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
len = 0;
}
- if (sg_is_dma_bus_address(sg))
+ if (sg_dma_is_bus_address(sg))
goto next;
if (len) {
diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c
index 3c47846cc5ef..412ca96be128 100644
--- a/drivers/iommu/iommufd/pages.c
+++ b/drivers/iommu/iommufd/pages.c
@@ -786,7 +786,7 @@ static int pfn_reader_user_pin(struct pfn_reader_user *user,
user->locked = 1;
}
rc = pin_user_pages_remote(pages->source_mm, uptr, npages,
- user->gup_flags, user->upages, NULL,
+ user->gup_flags, user->upages,
&user->locked);
}
if (rc <= 0) {
@@ -1799,7 +1799,7 @@ static int iopt_pages_rw_page(struct iopt_pages *pages, unsigned long index,
rc = pin_user_pages_remote(
pages->source_mm, (uintptr_t)(pages->uptr + index * PAGE_SIZE),
1, (flags & IOMMUFD_ACCESS_RW_WRITE) ? FOLL_WRITE : 0, &page,
- NULL, NULL);
+ NULL);
mmap_read_unlock(pages->source_mm);
if (rc != 1) {
if (WARN_ON(rc >= 0))
diff --git a/drivers/irqchip/irq-clps711x.c b/drivers/irqchip/irq-clps711x.c
index 77ebe7e47e0e..e731e0784f7e 100644
--- a/drivers/irqchip/irq-clps711x.c
+++ b/drivers/irqchip/irq-clps711x.c
@@ -212,12 +212,6 @@ out_kfree:
return err;
}
-void __init clps711x_intc_init(phys_addr_t base, resource_size_t size)
-{
- BUG_ON(_clps711x_intc_init(NULL, base, size));
-}
-
-#ifdef CONFIG_IRQCHIP
static int __init clps711x_intc_init_dt(struct device_node *np,
struct device_node *parent)
{
@@ -231,4 +225,3 @@ static int __init clps711x_intc_init_dt(struct device_node *np,
return _clps711x_intc_init(np, res.start, resource_size(&res));
}
IRQCHIP_DECLARE(clps711x, "cirrus,ep7209-intc", clps711x_intc_init_dt);
-#endif
diff --git a/drivers/irqchip/irq-ftintc010.c b/drivers/irqchip/irq-ftintc010.c
index 46a3aa60e50e..359efc1d1be7 100644
--- a/drivers/irqchip/irq-ftintc010.c
+++ b/drivers/irqchip/irq-ftintc010.c
@@ -125,7 +125,7 @@ static struct irq_chip ft010_irq_chip = {
/* Local static for the IRQ entry call */
static struct ft010_irq_data firq;
-asmlinkage void __exception_irq_entry ft010_irqchip_handle_irq(struct pt_regs *regs)
+static asmlinkage void __exception_irq_entry ft010_irqchip_handle_irq(struct pt_regs *regs)
{
struct ft010_irq_data *f = &firq;
int irq;
@@ -162,7 +162,7 @@ static const struct irq_domain_ops ft010_irqdomain_ops = {
.xlate = irq_domain_xlate_onetwocell,
};
-int __init ft010_of_init_irq(struct device_node *node,
+static int __init ft010_of_init_irq(struct device_node *node,
struct device_node *parent)
{
struct ft010_irq_data *f = &firq;
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 0ec2b1e1df75..1994541eaef8 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -3585,6 +3585,7 @@ static int its_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
irqd = irq_get_irq_data(virq + i);
irqd_set_single_target(irqd);
irqd_set_affinity_on_activate(irqd);
+ irqd_set_resend_when_in_progress(irqd);
pr_debug("ID:%d pID:%d vID:%d\n",
(int)(hwirq + i - its_dev->event_map.lpi_base),
(int)(hwirq + i), virq + i);
@@ -4523,6 +4524,7 @@ static int its_vpe_irq_domain_alloc(struct irq_domain *domain, unsigned int virq
irq_domain_set_hwirq_and_chip(domain, virq + i, i,
irqchip, vm->vpes[i]);
set_bit(i, bitmap);
+ irqd_set_resend_when_in_progress(irq_get_irq_data(virq + i));
}
if (err) {
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index a605aa79435a..0c6c1af9a5b7 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -40,6 +40,7 @@
#define FLAGS_WORKAROUND_GICR_WAKER_MSM8996 (1ULL << 0)
#define FLAGS_WORKAROUND_CAVIUM_ERRATUM_38539 (1ULL << 1)
#define FLAGS_WORKAROUND_MTK_GICR_SAVE (1ULL << 2)
+#define FLAGS_WORKAROUND_ASR_ERRATUM_8601001 (1ULL << 3)
#define GIC_IRQ_TYPE_PARTITION (GIC_IRQ_TYPE_LPI + 1)
@@ -656,10 +657,16 @@ static int gic_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu)
return 0;
}
-static u64 gic_mpidr_to_affinity(unsigned long mpidr)
+static u64 gic_cpu_to_affinity(int cpu)
{
+ u64 mpidr = cpu_logical_map(cpu);
u64 aff;
+ /* ASR8601 needs to have its affinities shifted down... */
+ if (unlikely(gic_data.flags & FLAGS_WORKAROUND_ASR_ERRATUM_8601001))
+ mpidr = (MPIDR_AFFINITY_LEVEL(mpidr, 1) |
+ (MPIDR_AFFINITY_LEVEL(mpidr, 2) << 8));
+
aff = ((u64)MPIDR_AFFINITY_LEVEL(mpidr, 3) << 32 |
MPIDR_AFFINITY_LEVEL(mpidr, 2) << 16 |
MPIDR_AFFINITY_LEVEL(mpidr, 1) << 8 |
@@ -914,7 +921,7 @@ static void __init gic_dist_init(void)
* Set all global interrupts to the boot CPU only. ARE must be
* enabled.
*/
- affinity = gic_mpidr_to_affinity(cpu_logical_map(smp_processor_id()));
+ affinity = gic_cpu_to_affinity(smp_processor_id());
for (i = 32; i < GIC_LINE_NR; i++)
gic_write_irouter(affinity, base + GICD_IROUTER + i * 8);
@@ -963,7 +970,7 @@ static int gic_iterate_rdists(int (*fn)(struct redist_region *, void __iomem *))
static int __gic_populate_rdist(struct redist_region *region, void __iomem *ptr)
{
- unsigned long mpidr = cpu_logical_map(smp_processor_id());
+ unsigned long mpidr;
u64 typer;
u32 aff;
@@ -971,6 +978,8 @@ static int __gic_populate_rdist(struct redist_region *region, void __iomem *ptr)
* Convert affinity to a 32bit value that can be matched to
* GICR_TYPER bits [63:32].
*/
+ mpidr = gic_cpu_to_affinity(smp_processor_id());
+
aff = (MPIDR_AFFINITY_LEVEL(mpidr, 3) << 24 |
MPIDR_AFFINITY_LEVEL(mpidr, 2) << 16 |
MPIDR_AFFINITY_LEVEL(mpidr, 1) << 8 |
@@ -1084,7 +1093,7 @@ static inline bool gic_dist_security_disabled(void)
static void gic_cpu_sys_reg_init(void)
{
int i, cpu = smp_processor_id();
- u64 mpidr = cpu_logical_map(cpu);
+ u64 mpidr = gic_cpu_to_affinity(cpu);
u64 need_rss = MPIDR_RS(mpidr);
bool group0;
u32 pribits;
@@ -1183,11 +1192,11 @@ static void gic_cpu_sys_reg_init(void)
for_each_online_cpu(i) {
bool have_rss = per_cpu(has_rss, i) && per_cpu(has_rss, cpu);
- need_rss |= MPIDR_RS(cpu_logical_map(i));
+ need_rss |= MPIDR_RS(gic_cpu_to_affinity(i));
if (need_rss && (!have_rss))
pr_crit("CPU%d (%lx) can't SGI CPU%d (%lx), no RSS\n",
cpu, (unsigned long)mpidr,
- i, (unsigned long)cpu_logical_map(i));
+ i, (unsigned long)gic_cpu_to_affinity(i));
}
/**
@@ -1263,9 +1272,11 @@ static u16 gic_compute_target_list(int *base_cpu, const struct cpumask *mask,
unsigned long cluster_id)
{
int next_cpu, cpu = *base_cpu;
- unsigned long mpidr = cpu_logical_map(cpu);
+ unsigned long mpidr;
u16 tlist = 0;
+ mpidr = gic_cpu_to_affinity(cpu);
+
while (cpu < nr_cpu_ids) {
tlist |= 1 << (mpidr & 0xf);
@@ -1274,7 +1285,7 @@ static u16 gic_compute_target_list(int *base_cpu, const struct cpumask *mask,
goto out;
cpu = next_cpu;
- mpidr = cpu_logical_map(cpu);
+ mpidr = gic_cpu_to_affinity(cpu);
if (cluster_id != MPIDR_TO_SGI_CLUSTER_ID(mpidr)) {
cpu--;
@@ -1319,7 +1330,7 @@ static void gic_ipi_send_mask(struct irq_data *d, const struct cpumask *mask)
dsb(ishst);
for_each_cpu(cpu, mask) {
- u64 cluster_id = MPIDR_TO_SGI_CLUSTER_ID(cpu_logical_map(cpu));
+ u64 cluster_id = MPIDR_TO_SGI_CLUSTER_ID(gic_cpu_to_affinity(cpu));
u16 tlist;
tlist = gic_compute_target_list(&cpu, mask, cluster_id);
@@ -1377,7 +1388,7 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
offset = convert_offset_index(d, GICD_IROUTER, &index);
reg = gic_dist_base(d) + offset + (index * 8);
- val = gic_mpidr_to_affinity(cpu_logical_map(cpu));
+ val = gic_cpu_to_affinity(cpu);
gic_write_irouter(val, reg);
@@ -1796,6 +1807,15 @@ static bool gic_enable_quirk_nvidia_t241(void *data)
return true;
}
+static bool gic_enable_quirk_asr8601(void *data)
+{
+ struct gic_chip_data *d = data;
+
+ d->flags |= FLAGS_WORKAROUND_ASR_ERRATUM_8601001;
+
+ return true;
+}
+
static const struct gic_quirk gic_quirks[] = {
{
.desc = "GICv3: Qualcomm MSM8996 broken firmware",
@@ -1803,6 +1823,11 @@ static const struct gic_quirk gic_quirks[] = {
.init = gic_enable_quirk_msm8996,
},
{
+ .desc = "GICv3: ASR erratum 8601001",
+ .compatible = "asr,asr8601-gic-v3",
+ .init = gic_enable_quirk_asr8601,
+ },
+ {
.desc = "GICv3: Mediatek Chromebook GICR save problem",
.property = "mediatek,broken-save-restore-fw",
.init = gic_enable_quirk_mtk_gicr,
diff --git a/drivers/irqchip/irq-jcore-aic.c b/drivers/irqchip/irq-jcore-aic.c
index 5f47d8ee4ae3..b9dcc8e78c75 100644
--- a/drivers/irqchip/irq-jcore-aic.c
+++ b/drivers/irqchip/irq-jcore-aic.c
@@ -68,6 +68,7 @@ static int __init aic_irq_of_init(struct device_node *node,
unsigned min_irq = JCORE_AIC2_MIN_HWIRQ;
unsigned dom_sz = JCORE_AIC_MAX_HWIRQ+1;
struct irq_domain *domain;
+ int ret;
pr_info("Initializing J-Core AIC\n");
@@ -100,6 +101,12 @@ static int __init aic_irq_of_init(struct device_node *node,
jcore_aic.irq_unmask = noop;
jcore_aic.name = "AIC";
+ ret = irq_alloc_descs(-1, min_irq, dom_sz - min_irq,
+ of_node_to_nid(node));
+
+ if (ret < 0)
+ return ret;
+
domain = irq_domain_add_legacy(node, dom_sz - min_irq, min_irq, min_irq,
&jcore_aic_irqdomain_ops,
&jcore_aic);
diff --git a/drivers/irqchip/irq-loongson-eiointc.c b/drivers/irqchip/irq-loongson-eiointc.c
index 71ef19f77a5a..92d8aa28bdf5 100644
--- a/drivers/irqchip/irq-loongson-eiointc.c
+++ b/drivers/irqchip/irq-loongson-eiointc.c
@@ -36,6 +36,7 @@ static int nr_pics;
struct eiointc_priv {
u32 node;
+ u32 vec_count;
nodemask_t node_map;
cpumask_t cpuspan_map;
struct fwnode_handle *domain_handle;
@@ -153,18 +154,18 @@ static int eiointc_router_init(unsigned int cpu)
if ((cpu_logical_map(cpu) % CORES_PER_EIO_NODE) == 0) {
eiointc_enable();
- for (i = 0; i < VEC_COUNT / 32; i++) {
+ for (i = 0; i < eiointc_priv[0]->vec_count / 32; i++) {
data = (((1 << (i * 2 + 1)) << 16) | (1 << (i * 2)));
iocsr_write32(data, EIOINTC_REG_NODEMAP + i * 4);
}
- for (i = 0; i < VEC_COUNT / 32 / 4; i++) {
+ for (i = 0; i < eiointc_priv[0]->vec_count / 32 / 4; i++) {
bit = BIT(1 + index); /* Route to IP[1 + index] */
data = bit | (bit << 8) | (bit << 16) | (bit << 24);
iocsr_write32(data, EIOINTC_REG_IPMAP + i * 4);
}
- for (i = 0; i < VEC_COUNT / 4; i++) {
+ for (i = 0; i < eiointc_priv[0]->vec_count / 4; i++) {
/* Route to Node-0 Core-0 */
if (index == 0)
bit = BIT(cpu_logical_map(0));
@@ -175,7 +176,7 @@ static int eiointc_router_init(unsigned int cpu)
iocsr_write32(data, EIOINTC_REG_ROUTE + i * 4);
}
- for (i = 0; i < VEC_COUNT / 32; i++) {
+ for (i = 0; i < eiointc_priv[0]->vec_count / 32; i++) {
data = 0xffffffff;
iocsr_write32(data, EIOINTC_REG_ENABLE + i * 4);
iocsr_write32(data, EIOINTC_REG_BOUNCE + i * 4);
@@ -195,7 +196,7 @@ static void eiointc_irq_dispatch(struct irq_desc *desc)
chained_irq_enter(chip, desc);
- for (i = 0; i < VEC_REG_COUNT; i++) {
+ for (i = 0; i < eiointc_priv[0]->vec_count / VEC_COUNT_PER_REG; i++) {
pending = iocsr_read64(EIOINTC_REG_ISR + (i << 3));
iocsr_write64(pending, EIOINTC_REG_ISR + (i << 3));
while (pending) {
@@ -310,11 +311,11 @@ static void eiointc_resume(void)
eiointc_router_init(0);
for (i = 0; i < nr_pics; i++) {
- for (j = 0; j < VEC_COUNT; j++) {
+ for (j = 0; j < eiointc_priv[0]->vec_count; j++) {
desc = irq_resolve_mapping(eiointc_priv[i]->eiointc_domain, j);
if (desc && desc->handle_irq && desc->handle_irq != handle_bad_irq) {
raw_spin_lock(&desc->lock);
- irq_data = &desc->irq_data;
+ irq_data = irq_domain_get_irq_data(eiointc_priv[i]->eiointc_domain, irq_desc_get_irq(desc));
eiointc_set_irq_affinity(irq_data, irq_data->common->affinity, 0);
raw_spin_unlock(&desc->lock);
}
@@ -375,11 +376,47 @@ static int __init acpi_cascade_irqdomain_init(void)
return 0;
}
+static int __init eiointc_init(struct eiointc_priv *priv, int parent_irq,
+ u64 node_map)
+{
+ int i;
+
+ node_map = node_map ? node_map : -1ULL;
+ for_each_possible_cpu(i) {
+ if (node_map & (1ULL << (cpu_to_eio_node(i)))) {
+ node_set(cpu_to_eio_node(i), priv->node_map);
+ cpumask_or(&priv->cpuspan_map, &priv->cpuspan_map,
+ cpumask_of(i));
+ }
+ }
+
+ priv->eiointc_domain = irq_domain_create_linear(priv->domain_handle,
+ priv->vec_count,
+ &eiointc_domain_ops,
+ priv);
+ if (!priv->eiointc_domain) {
+ pr_err("loongson-extioi: cannot add IRQ domain\n");
+ return -ENOMEM;
+ }
+
+ eiointc_priv[nr_pics++] = priv;
+ eiointc_router_init(0);
+ irq_set_chained_handler_and_data(parent_irq, eiointc_irq_dispatch, priv);
+
+ if (nr_pics == 1) {
+ register_syscore_ops(&eiointc_syscore_ops);
+ cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_LOONGARCH_STARTING,
+ "irqchip/loongarch/intc:starting",
+ eiointc_router_init, NULL);
+ }
+
+ return 0;
+}
+
int __init eiointc_acpi_init(struct irq_domain *parent,
struct acpi_madt_eio_pic *acpi_eiointc)
{
- int i, ret, parent_irq;
- unsigned long node_map;
+ int parent_irq, ret;
struct eiointc_priv *priv;
int node;
@@ -394,37 +431,14 @@ int __init eiointc_acpi_init(struct irq_domain *parent,
goto out_free_priv;
}
+ priv->vec_count = VEC_COUNT;
priv->node = acpi_eiointc->node;
- node_map = acpi_eiointc->node_map ? : -1ULL;
-
- for_each_possible_cpu(i) {
- if (node_map & (1ULL << cpu_to_eio_node(i))) {
- node_set(cpu_to_eio_node(i), priv->node_map);
- cpumask_or(&priv->cpuspan_map, &priv->cpuspan_map, cpumask_of(i));
- }
- }
-
- /* Setup IRQ domain */
- priv->eiointc_domain = irq_domain_create_linear(priv->domain_handle, VEC_COUNT,
- &eiointc_domain_ops, priv);
- if (!priv->eiointc_domain) {
- pr_err("loongson-eiointc: cannot add IRQ domain\n");
- goto out_free_handle;
- }
-
- eiointc_priv[nr_pics++] = priv;
-
- eiointc_router_init(0);
parent_irq = irq_create_mapping(parent, acpi_eiointc->cascade);
- irq_set_chained_handler_and_data(parent_irq, eiointc_irq_dispatch, priv);
- if (nr_pics == 1) {
- register_syscore_ops(&eiointc_syscore_ops);
- cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_LOONGARCH_STARTING,
- "irqchip/loongarch/intc:starting",
- eiointc_router_init, NULL);
- }
+ ret = eiointc_init(priv, parent_irq, acpi_eiointc->node_map);
+ if (ret < 0)
+ goto out_free_handle;
if (cpu_has_flatmode)
node = cpu_to_node(acpi_eiointc->node * CORES_PER_EIO_NODE);
@@ -432,7 +446,10 @@ int __init eiointc_acpi_init(struct irq_domain *parent,
node = acpi_eiointc->node;
acpi_set_vec_parent(node, priv->eiointc_domain, pch_group);
acpi_set_vec_parent(node, priv->eiointc_domain, msi_group);
+
ret = acpi_cascade_irqdomain_init();
+ if (ret < 0)
+ goto out_free_handle;
return ret;
@@ -444,3 +461,49 @@ out_free_priv:
return -ENOMEM;
}
+
+static int __init eiointc_of_init(struct device_node *of_node,
+ struct device_node *parent)
+{
+ int parent_irq, ret;
+ struct eiointc_priv *priv;
+
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ parent_irq = irq_of_parse_and_map(of_node, 0);
+ if (parent_irq <= 0) {
+ ret = -ENODEV;
+ goto out_free_priv;
+ }
+
+ ret = irq_set_handler_data(parent_irq, priv);
+ if (ret < 0)
+ goto out_free_priv;
+
+ /*
+ * In particular, the number of devices supported by the LS2K0500
+ * extended I/O interrupt vector is 128.
+ */
+ if (of_device_is_compatible(of_node, "loongson,ls2k0500-eiointc"))
+ priv->vec_count = 128;
+ else
+ priv->vec_count = VEC_COUNT;
+
+ priv->node = 0;
+ priv->domain_handle = of_node_to_fwnode(of_node);
+
+ ret = eiointc_init(priv, parent_irq, 0);
+ if (ret < 0)
+ goto out_free_priv;
+
+ return 0;
+
+out_free_priv:
+ kfree(priv);
+ return ret;
+}
+
+IRQCHIP_DECLARE(loongson_ls2k0500_eiointc, "loongson,ls2k0500-eiointc", eiointc_of_init);
+IRQCHIP_DECLARE(loongson_ls2k2000_eiointc, "loongson,ls2k2000-eiointc", eiointc_of_init);
diff --git a/drivers/irqchip/irq-loongson-liointc.c b/drivers/irqchip/irq-loongson-liointc.c
index 8d00a9ad5b00..e4b33aed1c97 100644
--- a/drivers/irqchip/irq-loongson-liointc.c
+++ b/drivers/irqchip/irq-loongson-liointc.c
@@ -32,6 +32,10 @@
#define LIOINTC_REG_INTC_EN_STATUS (LIOINTC_INTC_CHIP_START + 0x04)
#define LIOINTC_REG_INTC_ENABLE (LIOINTC_INTC_CHIP_START + 0x08)
#define LIOINTC_REG_INTC_DISABLE (LIOINTC_INTC_CHIP_START + 0x0c)
+/*
+ * LIOINTC_REG_INTC_POL register is only valid for Loongson-2K series, and
+ * Loongson-3 series behave as noops.
+ */
#define LIOINTC_REG_INTC_POL (LIOINTC_INTC_CHIP_START + 0x10)
#define LIOINTC_REG_INTC_EDGE (LIOINTC_INTC_CHIP_START + 0x14)
@@ -116,19 +120,19 @@ static int liointc_set_type(struct irq_data *data, unsigned int type)
switch (type) {
case IRQ_TYPE_LEVEL_HIGH:
liointc_set_bit(gc, LIOINTC_REG_INTC_EDGE, mask, false);
- liointc_set_bit(gc, LIOINTC_REG_INTC_POL, mask, true);
+ liointc_set_bit(gc, LIOINTC_REG_INTC_POL, mask, false);
break;
case IRQ_TYPE_LEVEL_LOW:
liointc_set_bit(gc, LIOINTC_REG_INTC_EDGE, mask, false);
- liointc_set_bit(gc, LIOINTC_REG_INTC_POL, mask, false);
+ liointc_set_bit(gc, LIOINTC_REG_INTC_POL, mask, true);
break;
case IRQ_TYPE_EDGE_RISING:
liointc_set_bit(gc, LIOINTC_REG_INTC_EDGE, mask, true);
- liointc_set_bit(gc, LIOINTC_REG_INTC_POL, mask, true);
+ liointc_set_bit(gc, LIOINTC_REG_INTC_POL, mask, false);
break;
case IRQ_TYPE_EDGE_FALLING:
liointc_set_bit(gc, LIOINTC_REG_INTC_EDGE, mask, true);
- liointc_set_bit(gc, LIOINTC_REG_INTC_POL, mask, false);
+ liointc_set_bit(gc, LIOINTC_REG_INTC_POL, mask, true);
break;
default:
irq_gc_unlock_irqrestore(gc, flags);
@@ -291,6 +295,7 @@ static int liointc_init(phys_addr_t addr, unsigned long size, int revision,
ct->chip.irq_mask = irq_gc_mask_disable_reg;
ct->chip.irq_mask_ack = irq_gc_mask_disable_reg;
ct->chip.irq_set_type = liointc_set_type;
+ ct->chip.flags = IRQCHIP_SKIP_SET_WAKE;
gc->mask_cache = 0;
priv->gc = gc;
diff --git a/drivers/irqchip/irq-loongson-pch-pic.c b/drivers/irqchip/irq-loongson-pch-pic.c
index e5fe4d50be05..93a71f66efeb 100644
--- a/drivers/irqchip/irq-loongson-pch-pic.c
+++ b/drivers/irqchip/irq-loongson-pch-pic.c
@@ -164,7 +164,7 @@ static int pch_pic_domain_translate(struct irq_domain *d,
if (fwspec->param_count < 2)
return -EINVAL;
- *hwirq = fwspec->param[0] + priv->ht_vec_base;
+ *hwirq = fwspec->param[0];
*type = fwspec->param[1] & IRQ_TYPE_SENSE_MASK;
} else {
if (fwspec->param_count < 1)
@@ -196,7 +196,7 @@ static int pch_pic_alloc(struct irq_domain *domain, unsigned int virq,
parent_fwspec.fwnode = domain->parent->fwnode;
parent_fwspec.param_count = 1;
- parent_fwspec.param[0] = hwirq;
+ parent_fwspec.param[0] = hwirq + priv->ht_vec_base;
err = irq_domain_alloc_irqs_parent(domain, virq, 1, &parent_fwspec);
if (err)
@@ -401,14 +401,12 @@ static int __init acpi_cascade_irqdomain_init(void)
int __init pch_pic_acpi_init(struct irq_domain *parent,
struct acpi_madt_bio_pic *acpi_pchpic)
{
- int ret, vec_base;
+ int ret;
struct fwnode_handle *domain_handle;
if (find_pch_pic(acpi_pchpic->gsi_base) >= 0)
return 0;
- vec_base = acpi_pchpic->gsi_base - GSI_MIN_PCH_IRQ;
-
domain_handle = irq_domain_alloc_fwnode(&acpi_pchpic->address);
if (!domain_handle) {
pr_err("Unable to allocate domain handle\n");
@@ -416,7 +414,7 @@ int __init pch_pic_acpi_init(struct irq_domain *parent,
}
ret = pch_pic_init(acpi_pchpic->address, acpi_pchpic->size,
- vec_base, parent, domain_handle, acpi_pchpic->gsi_base);
+ 0, parent, domain_handle, acpi_pchpic->gsi_base);
if (ret < 0) {
irq_domain_free_fwnode(domain_handle);
diff --git a/drivers/irqchip/irq-mmp.c b/drivers/irqchip/irq-mmp.c
index 83455ca72439..25cf4f80e767 100644
--- a/drivers/irqchip/irq-mmp.c
+++ b/drivers/irqchip/irq-mmp.c
@@ -244,132 +244,6 @@ static void __exception_irq_entry mmp2_handle_irq(struct pt_regs *regs)
generic_handle_domain_irq(icu_data[0].domain, hwirq);
}
-/* MMP (ARMv5) */
-void __init icu_init_irq(void)
-{
- int irq;
-
- max_icu_nr = 1;
- mmp_icu_base = ioremap(0xd4282000, 0x1000);
- icu_data[0].conf_enable = mmp_conf.conf_enable;
- icu_data[0].conf_disable = mmp_conf.conf_disable;
- icu_data[0].conf_mask = mmp_conf.conf_mask;
- icu_data[0].nr_irqs = 64;
- icu_data[0].virq_base = 0;
- icu_data[0].domain = irq_domain_add_legacy(NULL, 64, 0, 0,
- &irq_domain_simple_ops,
- &icu_data[0]);
- for (irq = 0; irq < 64; irq++) {
- icu_mask_irq(irq_get_irq_data(irq));
- irq_set_chip_and_handler(irq, &icu_irq_chip, handle_level_irq);
- }
- irq_set_default_host(icu_data[0].domain);
- set_handle_irq(mmp_handle_irq);
-}
-
-/* MMP2 (ARMv7) */
-void __init mmp2_init_icu(void)
-{
- int irq, end;
-
- max_icu_nr = 8;
- mmp_icu_base = ioremap(0xd4282000, 0x1000);
- icu_data[0].conf_enable = mmp2_conf.conf_enable;
- icu_data[0].conf_disable = mmp2_conf.conf_disable;
- icu_data[0].conf_mask = mmp2_conf.conf_mask;
- icu_data[0].nr_irqs = 64;
- icu_data[0].virq_base = 0;
- icu_data[0].domain = irq_domain_add_legacy(NULL, 64, 0, 0,
- &irq_domain_simple_ops,
- &icu_data[0]);
- icu_data[1].reg_status = mmp_icu_base + 0x150;
- icu_data[1].reg_mask = mmp_icu_base + 0x168;
- icu_data[1].clr_mfp_irq_base = icu_data[0].virq_base +
- icu_data[0].nr_irqs;
- icu_data[1].clr_mfp_hwirq = 1; /* offset to IRQ_MMP2_PMIC_BASE */
- icu_data[1].nr_irqs = 2;
- icu_data[1].cascade_irq = 4;
- icu_data[1].virq_base = icu_data[0].virq_base + icu_data[0].nr_irqs;
- icu_data[1].domain = irq_domain_add_legacy(NULL, icu_data[1].nr_irqs,
- icu_data[1].virq_base, 0,
- &irq_domain_simple_ops,
- &icu_data[1]);
- icu_data[2].reg_status = mmp_icu_base + 0x154;
- icu_data[2].reg_mask = mmp_icu_base + 0x16c;
- icu_data[2].nr_irqs = 2;
- icu_data[2].cascade_irq = 5;
- icu_data[2].virq_base = icu_data[1].virq_base + icu_data[1].nr_irqs;
- icu_data[2].domain = irq_domain_add_legacy(NULL, icu_data[2].nr_irqs,
- icu_data[2].virq_base, 0,
- &irq_domain_simple_ops,
- &icu_data[2]);
- icu_data[3].reg_status = mmp_icu_base + 0x180;
- icu_data[3].reg_mask = mmp_icu_base + 0x17c;
- icu_data[3].nr_irqs = 3;
- icu_data[3].cascade_irq = 9;
- icu_data[3].virq_base = icu_data[2].virq_base + icu_data[2].nr_irqs;
- icu_data[3].domain = irq_domain_add_legacy(NULL, icu_data[3].nr_irqs,
- icu_data[3].virq_base, 0,
- &irq_domain_simple_ops,
- &icu_data[3]);
- icu_data[4].reg_status = mmp_icu_base + 0x158;
- icu_data[4].reg_mask = mmp_icu_base + 0x170;
- icu_data[4].nr_irqs = 5;
- icu_data[4].cascade_irq = 17;
- icu_data[4].virq_base = icu_data[3].virq_base + icu_data[3].nr_irqs;
- icu_data[4].domain = irq_domain_add_legacy(NULL, icu_data[4].nr_irqs,
- icu_data[4].virq_base, 0,
- &irq_domain_simple_ops,
- &icu_data[4]);
- icu_data[5].reg_status = mmp_icu_base + 0x15c;
- icu_data[5].reg_mask = mmp_icu_base + 0x174;
- icu_data[5].nr_irqs = 15;
- icu_data[5].cascade_irq = 35;
- icu_data[5].virq_base = icu_data[4].virq_base + icu_data[4].nr_irqs;
- icu_data[5].domain = irq_domain_add_legacy(NULL, icu_data[5].nr_irqs,
- icu_data[5].virq_base, 0,
- &irq_domain_simple_ops,
- &icu_data[5]);
- icu_data[6].reg_status = mmp_icu_base + 0x160;
- icu_data[6].reg_mask = mmp_icu_base + 0x178;
- icu_data[6].nr_irqs = 2;
- icu_data[6].cascade_irq = 51;
- icu_data[6].virq_base = icu_data[5].virq_base + icu_data[5].nr_irqs;
- icu_data[6].domain = irq_domain_add_legacy(NULL, icu_data[6].nr_irqs,
- icu_data[6].virq_base, 0,
- &irq_domain_simple_ops,
- &icu_data[6]);
- icu_data[7].reg_status = mmp_icu_base + 0x188;
- icu_data[7].reg_mask = mmp_icu_base + 0x184;
- icu_data[7].nr_irqs = 2;
- icu_data[7].cascade_irq = 55;
- icu_data[7].virq_base = icu_data[6].virq_base + icu_data[6].nr_irqs;
- icu_data[7].domain = irq_domain_add_legacy(NULL, icu_data[7].nr_irqs,
- icu_data[7].virq_base, 0,
- &irq_domain_simple_ops,
- &icu_data[7]);
- end = icu_data[7].virq_base + icu_data[7].nr_irqs;
- for (irq = 0; irq < end; irq++) {
- icu_mask_irq(irq_get_irq_data(irq));
- if (irq == icu_data[1].cascade_irq ||
- irq == icu_data[2].cascade_irq ||
- irq == icu_data[3].cascade_irq ||
- irq == icu_data[4].cascade_irq ||
- irq == icu_data[5].cascade_irq ||
- irq == icu_data[6].cascade_irq ||
- irq == icu_data[7].cascade_irq) {
- irq_set_chip(irq, &icu_irq_chip);
- irq_set_chained_handler(irq, icu_mux_irq_demux);
- } else {
- irq_set_chip_and_handler(irq, &icu_irq_chip,
- handle_level_irq);
- }
- }
- irq_set_default_host(icu_data[0].domain);
- set_handle_irq(mmp2_handle_irq);
-}
-
-#ifdef CONFIG_OF
static int __init mmp_init_bases(struct device_node *node)
{
int ret, nr_irqs, irq, i = 0;
@@ -548,4 +422,3 @@ err:
return -EINVAL;
}
IRQCHIP_DECLARE(mmp2_mux_intc, "mrvl,mmp2-mux-intc", mmp2_mux_of_init);
-#endif
diff --git a/drivers/irqchip/irq-mxs.c b/drivers/irqchip/irq-mxs.c
index 55cb6b5a686e..be9680645545 100644
--- a/drivers/irqchip/irq-mxs.c
+++ b/drivers/irqchip/irq-mxs.c
@@ -201,6 +201,7 @@ static int __init icoll_of_init(struct device_node *np,
stmp_reset_block(icoll_priv.ctrl);
icoll_add_domain(np, ICOLL_NUM_IRQS);
+ set_handle_irq(icoll_handle_irq);
return 0;
}
diff --git a/drivers/irqchip/irq-stm32-exti.c b/drivers/irqchip/irq-stm32-exti.c
index 6a3f7498ea8e..b5fa76ce5046 100644
--- a/drivers/irqchip/irq-stm32-exti.c
+++ b/drivers/irqchip/irq-stm32-exti.c
@@ -173,6 +173,16 @@ static struct irq_chip stm32_exti_h_chip_direct;
#define EXTI_INVALID_IRQ U8_MAX
#define STM32MP1_DESC_IRQ_SIZE (ARRAY_SIZE(stm32mp1_exti_banks) * IRQS_PER_BANK)
+/*
+ * Use some intentionally tricky logic here to initialize the whole array to
+ * EXTI_INVALID_IRQ, but then override certain fields, requiring us to indicate
+ * that we "know" that there are overrides in this structure, and we'll need to
+ * disable that warning from W=1 builds.
+ */
+__diag_push();
+__diag_ignore_all("-Woverride-init",
+ "logic to initialize all and then override some is OK");
+
static const u8 stm32mp1_desc_irq[] = {
/* default value */
[0 ... (STM32MP1_DESC_IRQ_SIZE - 1)] = EXTI_INVALID_IRQ,
@@ -208,6 +218,7 @@ static const u8 stm32mp1_desc_irq[] = {
[31] = 53,
[32] = 82,
[33] = 83,
+ [46] = 151,
[47] = 93,
[48] = 138,
[50] = 139,
@@ -266,6 +277,8 @@ static const u8 stm32mp13_desc_irq[] = {
[70] = 98,
};
+__diag_pop();
+
static const struct stm32_exti_drv_data stm32mp1_drv_data = {
.exti_banks = stm32mp1_exti_banks,
.bank_nr = ARRAY_SIZE(stm32mp1_exti_banks),
diff --git a/drivers/leds/trigger/ledtrig-netdev.c b/drivers/leds/trigger/ledtrig-netdev.c
index d5e774d83021..32b66703068a 100644
--- a/drivers/leds/trigger/ledtrig-netdev.c
+++ b/drivers/leds/trigger/ledtrig-netdev.c
@@ -13,6 +13,7 @@
#include <linux/atomic.h>
#include <linux/ctype.h>
#include <linux/device.h>
+#include <linux/ethtool.h>
#include <linux/init.h>
#include <linux/jiffies.h>
#include <linux/kernel.h>
@@ -20,10 +21,13 @@
#include <linux/list.h>
#include <linux/module.h>
#include <linux/netdevice.h>
-#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/rtnetlink.h>
#include <linux/timer.h>
#include "../leds.h"
+#define NETDEV_LED_DEFAULT_INTERVAL 50
+
/*
* Configurable sysfs attributes:
*
@@ -37,7 +41,7 @@
*/
struct led_netdev_data {
- spinlock_t lock;
+ struct mutex lock;
struct delayed_work work;
struct notifier_block notifier;
@@ -50,16 +54,11 @@ struct led_netdev_data {
unsigned int last_activity;
unsigned long mode;
-#define NETDEV_LED_LINK 0
-#define NETDEV_LED_TX 1
-#define NETDEV_LED_RX 2
-#define NETDEV_LED_MODE_LINKUP 3
-};
+ int link_speed;
+ u8 duplex;
-enum netdev_led_attr {
- NETDEV_ATTR_LINK,
- NETDEV_ATTR_TX,
- NETDEV_ATTR_RX
+ bool carrier_link_up;
+ bool hw_control;
};
static void set_baseline_state(struct led_netdev_data *trigger_data)
@@ -67,16 +66,48 @@ static void set_baseline_state(struct led_netdev_data *trigger_data)
int current_brightness;
struct led_classdev *led_cdev = trigger_data->led_cdev;
+ /* Already validated, hw control is possible with the requested mode */
+ if (trigger_data->hw_control) {
+ led_cdev->hw_control_set(led_cdev, trigger_data->mode);
+
+ return;
+ }
+
current_brightness = led_cdev->brightness;
if (current_brightness)
led_cdev->blink_brightness = current_brightness;
if (!led_cdev->blink_brightness)
led_cdev->blink_brightness = led_cdev->max_brightness;
- if (!test_bit(NETDEV_LED_MODE_LINKUP, &trigger_data->mode))
+ if (!trigger_data->carrier_link_up) {
led_set_brightness(led_cdev, LED_OFF);
- else {
- if (test_bit(NETDEV_LED_LINK, &trigger_data->mode))
+ } else {
+ bool blink_on = false;
+
+ if (test_bit(TRIGGER_NETDEV_LINK, &trigger_data->mode))
+ blink_on = true;
+
+ if (test_bit(TRIGGER_NETDEV_LINK_10, &trigger_data->mode) &&
+ trigger_data->link_speed == SPEED_10)
+ blink_on = true;
+
+ if (test_bit(TRIGGER_NETDEV_LINK_100, &trigger_data->mode) &&
+ trigger_data->link_speed == SPEED_100)
+ blink_on = true;
+
+ if (test_bit(TRIGGER_NETDEV_LINK_1000, &trigger_data->mode) &&
+ trigger_data->link_speed == SPEED_1000)
+ blink_on = true;
+
+ if (test_bit(TRIGGER_NETDEV_HALF_DUPLEX, &trigger_data->mode) &&
+ trigger_data->duplex == DUPLEX_HALF)
+ blink_on = true;
+
+ if (test_bit(TRIGGER_NETDEV_FULL_DUPLEX, &trigger_data->mode) &&
+ trigger_data->duplex == DUPLEX_FULL)
+ blink_on = true;
+
+ if (blink_on)
led_set_brightness(led_cdev,
led_cdev->blink_brightness);
else
@@ -85,44 +116,121 @@ static void set_baseline_state(struct led_netdev_data *trigger_data)
/* If we are looking for RX/TX start periodically
* checking stats
*/
- if (test_bit(NETDEV_LED_TX, &trigger_data->mode) ||
- test_bit(NETDEV_LED_RX, &trigger_data->mode))
+ if (test_bit(TRIGGER_NETDEV_TX, &trigger_data->mode) ||
+ test_bit(TRIGGER_NETDEV_RX, &trigger_data->mode))
schedule_delayed_work(&trigger_data->work, 0);
}
}
+static bool supports_hw_control(struct led_classdev *led_cdev)
+{
+ if (!led_cdev->hw_control_get || !led_cdev->hw_control_set ||
+ !led_cdev->hw_control_is_supported)
+ return false;
+
+ return !strcmp(led_cdev->hw_control_trigger, led_cdev->trigger->name);
+}
+
+/*
+ * Validate the configured netdev is the same as the one associated with
+ * the LED driver in hw control.
+ */
+static bool validate_net_dev(struct led_classdev *led_cdev,
+ struct net_device *net_dev)
+{
+ struct device *dev = led_cdev->hw_control_get_device(led_cdev);
+ struct net_device *ndev;
+
+ if (!dev)
+ return false;
+
+ ndev = to_net_dev(dev);
+
+ return ndev == net_dev;
+}
+
+static bool can_hw_control(struct led_netdev_data *trigger_data)
+{
+ unsigned long default_interval = msecs_to_jiffies(NETDEV_LED_DEFAULT_INTERVAL);
+ unsigned int interval = atomic_read(&trigger_data->interval);
+ struct led_classdev *led_cdev = trigger_data->led_cdev;
+ int ret;
+
+ if (!supports_hw_control(led_cdev))
+ return false;
+
+ /*
+ * Interval must be set to the default
+ * value. Any different value is rejected if in hw
+ * control.
+ */
+ if (interval != default_interval)
+ return false;
+
+ /*
+ * net_dev must be set with hw control, otherwise no
+ * blinking can be happening and there is nothing to
+ * offloaded. Additionally, for hw control to be
+ * valid, the configured netdev must be the same as
+ * netdev associated to the LED.
+ */
+ if (!validate_net_dev(led_cdev, trigger_data->net_dev))
+ return false;
+
+ /* Check if the requested mode is supported */
+ ret = led_cdev->hw_control_is_supported(led_cdev, trigger_data->mode);
+ /* Fall back to software blinking if not supported */
+ if (ret == -EOPNOTSUPP)
+ return false;
+ if (ret) {
+ dev_warn(led_cdev->dev,
+ "Current mode check failed with error %d\n", ret);
+ return false;
+ }
+
+ return true;
+}
+
+static void get_device_state(struct led_netdev_data *trigger_data)
+{
+ struct ethtool_link_ksettings cmd;
+
+ trigger_data->carrier_link_up = netif_carrier_ok(trigger_data->net_dev);
+ if (!trigger_data->carrier_link_up)
+ return;
+
+ if (!__ethtool_get_link_ksettings(trigger_data->net_dev, &cmd)) {
+ trigger_data->link_speed = cmd.base.speed;
+ trigger_data->duplex = cmd.base.duplex;
+ }
+}
+
static ssize_t device_name_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct led_netdev_data *trigger_data = led_trigger_get_drvdata(dev);
ssize_t len;
- spin_lock_bh(&trigger_data->lock);
+ mutex_lock(&trigger_data->lock);
len = sprintf(buf, "%s\n", trigger_data->device_name);
- spin_unlock_bh(&trigger_data->lock);
+ mutex_unlock(&trigger_data->lock);
return len;
}
-static ssize_t device_name_store(struct device *dev,
- struct device_attribute *attr, const char *buf,
- size_t size)
+static int set_device_name(struct led_netdev_data *trigger_data,
+ const char *name, size_t size)
{
- struct led_netdev_data *trigger_data = led_trigger_get_drvdata(dev);
-
- if (size >= IFNAMSIZ)
- return -EINVAL;
-
cancel_delayed_work_sync(&trigger_data->work);
- spin_lock_bh(&trigger_data->lock);
+ mutex_lock(&trigger_data->lock);
if (trigger_data->net_dev) {
dev_put(trigger_data->net_dev);
trigger_data->net_dev = NULL;
}
- memcpy(trigger_data->device_name, buf, size);
+ memcpy(trigger_data->device_name, name, size);
trigger_data->device_name[size] = 0;
if (size > 0 && trigger_data->device_name[size - 1] == '\n')
trigger_data->device_name[size - 1] = 0;
@@ -131,36 +239,58 @@ static ssize_t device_name_store(struct device *dev,
trigger_data->net_dev =
dev_get_by_name(&init_net, trigger_data->device_name);
- clear_bit(NETDEV_LED_MODE_LINKUP, &trigger_data->mode);
- if (trigger_data->net_dev != NULL)
- if (netif_carrier_ok(trigger_data->net_dev))
- set_bit(NETDEV_LED_MODE_LINKUP, &trigger_data->mode);
+ trigger_data->carrier_link_up = false;
+ trigger_data->link_speed = SPEED_UNKNOWN;
+ trigger_data->duplex = DUPLEX_UNKNOWN;
+ if (trigger_data->net_dev != NULL) {
+ rtnl_lock();
+ get_device_state(trigger_data);
+ rtnl_unlock();
+ }
trigger_data->last_activity = 0;
set_baseline_state(trigger_data);
- spin_unlock_bh(&trigger_data->lock);
+ mutex_unlock(&trigger_data->lock);
+
+ return 0;
+}
+static ssize_t device_name_store(struct device *dev,
+ struct device_attribute *attr, const char *buf,
+ size_t size)
+{
+ struct led_netdev_data *trigger_data = led_trigger_get_drvdata(dev);
+ int ret;
+
+ if (size >= IFNAMSIZ)
+ return -EINVAL;
+
+ ret = set_device_name(trigger_data, buf, size);
+
+ if (ret < 0)
+ return ret;
return size;
}
static DEVICE_ATTR_RW(device_name);
static ssize_t netdev_led_attr_show(struct device *dev, char *buf,
- enum netdev_led_attr attr)
+ enum led_trigger_netdev_modes attr)
{
struct led_netdev_data *trigger_data = led_trigger_get_drvdata(dev);
int bit;
switch (attr) {
- case NETDEV_ATTR_LINK:
- bit = NETDEV_LED_LINK;
- break;
- case NETDEV_ATTR_TX:
- bit = NETDEV_LED_TX;
- break;
- case NETDEV_ATTR_RX:
- bit = NETDEV_LED_RX;
+ case TRIGGER_NETDEV_LINK:
+ case TRIGGER_NETDEV_LINK_10:
+ case TRIGGER_NETDEV_LINK_100:
+ case TRIGGER_NETDEV_LINK_1000:
+ case TRIGGER_NETDEV_HALF_DUPLEX:
+ case TRIGGER_NETDEV_FULL_DUPLEX:
+ case TRIGGER_NETDEV_TX:
+ case TRIGGER_NETDEV_RX:
+ bit = attr;
break;
default:
return -EINVAL;
@@ -170,10 +300,10 @@ static ssize_t netdev_led_attr_show(struct device *dev, char *buf,
}
static ssize_t netdev_led_attr_store(struct device *dev, const char *buf,
- size_t size, enum netdev_led_attr attr)
+ size_t size, enum led_trigger_netdev_modes attr)
{
struct led_netdev_data *trigger_data = led_trigger_get_drvdata(dev);
- unsigned long state;
+ unsigned long state, mode = trigger_data->mode;
int ret;
int bit;
@@ -182,72 +312,62 @@ static ssize_t netdev_led_attr_store(struct device *dev, const char *buf,
return ret;
switch (attr) {
- case NETDEV_ATTR_LINK:
- bit = NETDEV_LED_LINK;
- break;
- case NETDEV_ATTR_TX:
- bit = NETDEV_LED_TX;
- break;
- case NETDEV_ATTR_RX:
- bit = NETDEV_LED_RX;
+ case TRIGGER_NETDEV_LINK:
+ case TRIGGER_NETDEV_LINK_10:
+ case TRIGGER_NETDEV_LINK_100:
+ case TRIGGER_NETDEV_LINK_1000:
+ case TRIGGER_NETDEV_HALF_DUPLEX:
+ case TRIGGER_NETDEV_FULL_DUPLEX:
+ case TRIGGER_NETDEV_TX:
+ case TRIGGER_NETDEV_RX:
+ bit = attr;
break;
default:
return -EINVAL;
}
- cancel_delayed_work_sync(&trigger_data->work);
-
if (state)
- set_bit(bit, &trigger_data->mode);
+ set_bit(bit, &mode);
else
- clear_bit(bit, &trigger_data->mode);
-
- set_baseline_state(trigger_data);
-
- return size;
-}
-
-static ssize_t link_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- return netdev_led_attr_show(dev, buf, NETDEV_ATTR_LINK);
-}
+ clear_bit(bit, &mode);
-static ssize_t link_store(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t size)
-{
- return netdev_led_attr_store(dev, buf, size, NETDEV_ATTR_LINK);
-}
-
-static DEVICE_ATTR_RW(link);
-
-static ssize_t tx_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- return netdev_led_attr_show(dev, buf, NETDEV_ATTR_TX);
-}
+ if (test_bit(TRIGGER_NETDEV_LINK, &mode) &&
+ (test_bit(TRIGGER_NETDEV_LINK_10, &mode) ||
+ test_bit(TRIGGER_NETDEV_LINK_100, &mode) ||
+ test_bit(TRIGGER_NETDEV_LINK_1000, &mode)))
+ return -EINVAL;
-static ssize_t tx_store(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t size)
-{
- return netdev_led_attr_store(dev, buf, size, NETDEV_ATTR_TX);
-}
+ cancel_delayed_work_sync(&trigger_data->work);
-static DEVICE_ATTR_RW(tx);
+ trigger_data->mode = mode;
+ trigger_data->hw_control = can_hw_control(trigger_data);
-static ssize_t rx_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- return netdev_led_attr_show(dev, buf, NETDEV_ATTR_RX);
-}
+ set_baseline_state(trigger_data);
-static ssize_t rx_store(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t size)
-{
- return netdev_led_attr_store(dev, buf, size, NETDEV_ATTR_RX);
+ return size;
}
-static DEVICE_ATTR_RW(rx);
+#define DEFINE_NETDEV_TRIGGER(trigger_name, trigger) \
+ static ssize_t trigger_name##_show(struct device *dev, \
+ struct device_attribute *attr, char *buf) \
+ { \
+ return netdev_led_attr_show(dev, buf, trigger); \
+ } \
+ static ssize_t trigger_name##_store(struct device *dev, \
+ struct device_attribute *attr, const char *buf, size_t size) \
+ { \
+ return netdev_led_attr_store(dev, buf, size, trigger); \
+ } \
+ static DEVICE_ATTR_RW(trigger_name)
+
+DEFINE_NETDEV_TRIGGER(link, TRIGGER_NETDEV_LINK);
+DEFINE_NETDEV_TRIGGER(link_10, TRIGGER_NETDEV_LINK_10);
+DEFINE_NETDEV_TRIGGER(link_100, TRIGGER_NETDEV_LINK_100);
+DEFINE_NETDEV_TRIGGER(link_1000, TRIGGER_NETDEV_LINK_1000);
+DEFINE_NETDEV_TRIGGER(half_duplex, TRIGGER_NETDEV_HALF_DUPLEX);
+DEFINE_NETDEV_TRIGGER(full_duplex, TRIGGER_NETDEV_FULL_DUPLEX);
+DEFINE_NETDEV_TRIGGER(tx, TRIGGER_NETDEV_TX);
+DEFINE_NETDEV_TRIGGER(rx, TRIGGER_NETDEV_RX);
static ssize_t interval_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -266,6 +386,9 @@ static ssize_t interval_store(struct device *dev,
unsigned long value;
int ret;
+ if (trigger_data->hw_control)
+ return -EINVAL;
+
ret = kstrtoul(buf, 0, &value);
if (ret)
return ret;
@@ -283,12 +406,28 @@ static ssize_t interval_store(struct device *dev,
static DEVICE_ATTR_RW(interval);
+static ssize_t hw_control_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct led_netdev_data *trigger_data = led_trigger_get_drvdata(dev);
+
+ return sprintf(buf, "%d\n", trigger_data->hw_control);
+}
+
+static DEVICE_ATTR_RO(hw_control);
+
static struct attribute *netdev_trig_attrs[] = {
&dev_attr_device_name.attr,
&dev_attr_link.attr,
+ &dev_attr_link_10.attr,
+ &dev_attr_link_100.attr,
+ &dev_attr_link_1000.attr,
+ &dev_attr_full_duplex.attr,
+ &dev_attr_half_duplex.attr,
&dev_attr_rx.attr,
&dev_attr_tx.attr,
&dev_attr_interval.attr,
+ &dev_attr_hw_control.attr,
NULL
};
ATTRIBUTE_GROUPS(netdev_trig);
@@ -313,11 +452,15 @@ static int netdev_trig_notify(struct notifier_block *nb,
cancel_delayed_work_sync(&trigger_data->work);
- spin_lock_bh(&trigger_data->lock);
+ mutex_lock(&trigger_data->lock);
- clear_bit(NETDEV_LED_MODE_LINKUP, &trigger_data->mode);
+ trigger_data->carrier_link_up = false;
+ trigger_data->link_speed = SPEED_UNKNOWN;
+ trigger_data->duplex = DUPLEX_UNKNOWN;
switch (evt) {
case NETDEV_CHANGENAME:
+ get_device_state(trigger_data);
+ fallthrough;
case NETDEV_REGISTER:
if (trigger_data->net_dev)
dev_put(trigger_data->net_dev);
@@ -330,14 +473,13 @@ static int netdev_trig_notify(struct notifier_block *nb,
break;
case NETDEV_UP:
case NETDEV_CHANGE:
- if (netif_carrier_ok(dev))
- set_bit(NETDEV_LED_MODE_LINKUP, &trigger_data->mode);
+ get_device_state(trigger_data);
break;
}
set_baseline_state(trigger_data);
- spin_unlock_bh(&trigger_data->lock);
+ mutex_unlock(&trigger_data->lock);
return NOTIFY_DONE;
}
@@ -360,21 +502,26 @@ static void netdev_trig_work(struct work_struct *work)
}
/* If we are not looking for RX/TX then return */
- if (!test_bit(NETDEV_LED_TX, &trigger_data->mode) &&
- !test_bit(NETDEV_LED_RX, &trigger_data->mode))
+ if (!test_bit(TRIGGER_NETDEV_TX, &trigger_data->mode) &&
+ !test_bit(TRIGGER_NETDEV_RX, &trigger_data->mode))
return;
dev_stats = dev_get_stats(trigger_data->net_dev, &temp);
new_activity =
- (test_bit(NETDEV_LED_TX, &trigger_data->mode) ?
+ (test_bit(TRIGGER_NETDEV_TX, &trigger_data->mode) ?
dev_stats->tx_packets : 0) +
- (test_bit(NETDEV_LED_RX, &trigger_data->mode) ?
+ (test_bit(TRIGGER_NETDEV_RX, &trigger_data->mode) ?
dev_stats->rx_packets : 0);
if (trigger_data->last_activity != new_activity) {
led_stop_software_blink(trigger_data->led_cdev);
- invert = test_bit(NETDEV_LED_LINK, &trigger_data->mode);
+ invert = test_bit(TRIGGER_NETDEV_LINK, &trigger_data->mode) ||
+ test_bit(TRIGGER_NETDEV_LINK_10, &trigger_data->mode) ||
+ test_bit(TRIGGER_NETDEV_LINK_100, &trigger_data->mode) ||
+ test_bit(TRIGGER_NETDEV_LINK_1000, &trigger_data->mode) ||
+ test_bit(TRIGGER_NETDEV_HALF_DUPLEX, &trigger_data->mode) ||
+ test_bit(TRIGGER_NETDEV_FULL_DUPLEX, &trigger_data->mode);
interval = jiffies_to_msecs(
atomic_read(&trigger_data->interval));
/* base state is ON (link present) */
@@ -392,13 +539,15 @@ static void netdev_trig_work(struct work_struct *work)
static int netdev_trig_activate(struct led_classdev *led_cdev)
{
struct led_netdev_data *trigger_data;
+ unsigned long mode = 0;
+ struct device *dev;
int rc;
trigger_data = kzalloc(sizeof(struct led_netdev_data), GFP_KERNEL);
if (!trigger_data)
return -ENOMEM;
- spin_lock_init(&trigger_data->lock);
+ mutex_init(&trigger_data->lock);
trigger_data->notifier.notifier_call = netdev_trig_notify;
trigger_data->notifier.priority = 10;
@@ -410,9 +559,24 @@ static int netdev_trig_activate(struct led_classdev *led_cdev)
trigger_data->device_name[0] = 0;
trigger_data->mode = 0;
- atomic_set(&trigger_data->interval, msecs_to_jiffies(50));
+ atomic_set(&trigger_data->interval, msecs_to_jiffies(NETDEV_LED_DEFAULT_INTERVAL));
trigger_data->last_activity = 0;
+ /* Check if hw control is active by default on the LED.
+ * Init already enabled mode in hw control.
+ */
+ if (supports_hw_control(led_cdev) &&
+ !led_cdev->hw_control_get(led_cdev, &mode)) {
+ dev = led_cdev->hw_control_get_device(led_cdev);
+ if (dev) {
+ const char *name = dev_name(dev);
+
+ set_device_name(trigger_data, name, strlen(name));
+ trigger_data->hw_control = true;
+ trigger_data->mode = mode;
+ }
+ }
+
led_set_trigger_data(led_cdev, trigger_data);
rc = register_netdevice_notifier(&trigger_data->notifier);
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index aebb7ef10e63..5a79bb3c272f 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -275,7 +275,7 @@ struct bcache_device {
int (*cache_miss)(struct btree *b, struct search *s,
struct bio *bio, unsigned int sectors);
- int (*ioctl)(struct bcache_device *d, fmode_t mode,
+ int (*ioctl)(struct bcache_device *d, blk_mode_t mode,
unsigned int cmd, unsigned long arg);
};
@@ -1004,11 +1004,11 @@ extern struct workqueue_struct *bch_flush_wq;
extern struct mutex bch_register_lock;
extern struct list_head bch_cache_sets;
-extern struct kobj_type bch_cached_dev_ktype;
-extern struct kobj_type bch_flash_dev_ktype;
-extern struct kobj_type bch_cache_set_ktype;
-extern struct kobj_type bch_cache_set_internal_ktype;
-extern struct kobj_type bch_cache_ktype;
+extern const struct kobj_type bch_cached_dev_ktype;
+extern const struct kobj_type bch_flash_dev_ktype;
+extern const struct kobj_type bch_cache_set_ktype;
+extern const struct kobj_type bch_cache_set_internal_ktype;
+extern const struct kobj_type bch_cache_ktype;
void bch_cached_dev_release(struct kobject *kobj);
void bch_flash_dev_release(struct kobject *kobj);
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 147c493a989a..fd121a61f17c 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -559,6 +559,27 @@ static void mca_data_alloc(struct btree *b, struct bkey *k, gfp_t gfp)
}
}
+#define cmp_int(l, r) ((l > r) - (l < r))
+
+#ifdef CONFIG_PROVE_LOCKING
+static int btree_lock_cmp_fn(const struct lockdep_map *_a,
+ const struct lockdep_map *_b)
+{
+ const struct btree *a = container_of(_a, struct btree, lock.dep_map);
+ const struct btree *b = container_of(_b, struct btree, lock.dep_map);
+
+ return -cmp_int(a->level, b->level) ?: bkey_cmp(&a->key, &b->key);
+}
+
+static void btree_lock_print_fn(const struct lockdep_map *map)
+{
+ const struct btree *b = container_of(map, struct btree, lock.dep_map);
+
+ printk(KERN_CONT " l=%u %llu:%llu", b->level,
+ KEY_INODE(&b->key), KEY_OFFSET(&b->key));
+}
+#endif
+
static struct btree *mca_bucket_alloc(struct cache_set *c,
struct bkey *k, gfp_t gfp)
{
@@ -572,7 +593,7 @@ static struct btree *mca_bucket_alloc(struct cache_set *c,
return NULL;
init_rwsem(&b->lock);
- lockdep_set_novalidate_class(&b->lock);
+ lock_set_cmp_fn(&b->lock, btree_lock_cmp_fn, btree_lock_print_fn);
mutex_init(&b->write_lock);
lockdep_set_novalidate_class(&b->write_lock);
INIT_LIST_HEAD(&b->list);
@@ -885,7 +906,7 @@ static struct btree *mca_cannibalize(struct cache_set *c, struct btree_op *op,
* cannibalize_bucket() will take. This means every time we unlock the root of
* the btree, we need to release this lock if we have it held.
*/
-static void bch_cannibalize_unlock(struct cache_set *c)
+void bch_cannibalize_unlock(struct cache_set *c)
{
spin_lock(&c->btree_cannibalize_lock);
if (c->btree_cache_alloc_lock == current) {
@@ -1090,10 +1111,12 @@ struct btree *__bch_btree_node_alloc(struct cache_set *c, struct btree_op *op,
struct btree *parent)
{
BKEY_PADDED(key) k;
- struct btree *b = ERR_PTR(-EAGAIN);
+ struct btree *b;
mutex_lock(&c->bucket_lock);
retry:
+ /* return ERR_PTR(-EAGAIN) when it fails */
+ b = ERR_PTR(-EAGAIN);
if (__bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, wait))
goto err;
@@ -1138,7 +1161,7 @@ static struct btree *btree_node_alloc_replacement(struct btree *b,
{
struct btree *n = bch_btree_node_alloc(b->c, op, b->level, b->parent);
- if (!IS_ERR_OR_NULL(n)) {
+ if (!IS_ERR(n)) {
mutex_lock(&n->write_lock);
bch_btree_sort_into(&b->keys, &n->keys, &b->c->sort);
bkey_copy_key(&n->key, &b->key);
@@ -1340,7 +1363,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op,
memset(new_nodes, 0, sizeof(new_nodes));
closure_init_stack(&cl);
- while (nodes < GC_MERGE_NODES && !IS_ERR_OR_NULL(r[nodes].b))
+ while (nodes < GC_MERGE_NODES && !IS_ERR(r[nodes].b))
keys += r[nodes++].keys;
blocks = btree_default_blocks(b->c) * 2 / 3;
@@ -1352,7 +1375,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op,
for (i = 0; i < nodes; i++) {
new_nodes[i] = btree_node_alloc_replacement(r[i].b, NULL);
- if (IS_ERR_OR_NULL(new_nodes[i]))
+ if (IS_ERR(new_nodes[i]))
goto out_nocoalesce;
}
@@ -1487,7 +1510,7 @@ out_nocoalesce:
bch_keylist_free(&keylist);
for (i = 0; i < nodes; i++)
- if (!IS_ERR_OR_NULL(new_nodes[i])) {
+ if (!IS_ERR(new_nodes[i])) {
btree_node_free(new_nodes[i]);
rw_unlock(true, new_nodes[i]);
}
@@ -1669,7 +1692,7 @@ static int bch_btree_gc_root(struct btree *b, struct btree_op *op,
if (should_rewrite) {
n = btree_node_alloc_replacement(b, NULL);
- if (!IS_ERR_OR_NULL(n)) {
+ if (!IS_ERR(n)) {
bch_btree_node_write_sync(n);
bch_btree_set_root(n);
@@ -1968,6 +1991,15 @@ static int bch_btree_check_thread(void *arg)
c->gc_stats.nodes++;
bch_btree_op_init(&op, 0);
ret = bcache_btree(check_recurse, p, c->root, &op);
+ /*
+ * The op may be added to cache_set's btree_cache_wait
+ * in mca_cannibalize(), must ensure it is removed from
+ * the list and release btree_cache_alloc_lock before
+ * free op memory.
+ * Otherwise, the btree_cache_wait will be damaged.
+ */
+ bch_cannibalize_unlock(c);
+ finish_wait(&c->btree_cache_wait, &(&op)->wait);
if (ret)
goto out;
}
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h
index 1b5fdbc0d83e..45d64b54115a 100644
--- a/drivers/md/bcache/btree.h
+++ b/drivers/md/bcache/btree.h
@@ -247,8 +247,8 @@ static inline void bch_btree_op_init(struct btree_op *op, int write_lock_level)
static inline void rw_lock(bool w, struct btree *b, int level)
{
- w ? down_write_nested(&b->lock, level + 1)
- : down_read_nested(&b->lock, level + 1);
+ w ? down_write(&b->lock)
+ : down_read(&b->lock);
if (w)
b->seq++;
}
@@ -282,6 +282,7 @@ void bch_initial_gc_finish(struct cache_set *c);
void bch_moving_gc(struct cache_set *c);
int bch_btree_check(struct cache_set *c);
void bch_initial_mark_key(struct cache_set *c, int level, struct bkey *k);
+void bch_cannibalize_unlock(struct cache_set *c);
static inline void wake_up_gc(struct cache_set *c)
{
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 67a2e29e0b40..a9b1f3896249 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -1228,7 +1228,7 @@ void cached_dev_submit_bio(struct bio *bio)
detached_dev_do_request(d, bio, orig_bdev, start_time);
}
-static int cached_dev_ioctl(struct bcache_device *d, fmode_t mode,
+static int cached_dev_ioctl(struct bcache_device *d, blk_mode_t mode,
unsigned int cmd, unsigned long arg)
{
struct cached_dev *dc = container_of(d, struct cached_dev, disk);
@@ -1318,7 +1318,7 @@ void flash_dev_submit_bio(struct bio *bio)
continue_at(cl, search_free, NULL);
}
-static int flash_dev_ioctl(struct bcache_device *d, fmode_t mode,
+static int flash_dev_ioctl(struct bcache_device *d, blk_mode_t mode,
unsigned int cmd, unsigned long arg)
{
return -ENOTTY;
diff --git a/drivers/md/bcache/stats.h b/drivers/md/bcache/stats.h
index bd3afc856d53..21b445f8af15 100644
--- a/drivers/md/bcache/stats.h
+++ b/drivers/md/bcache/stats.h
@@ -18,7 +18,6 @@ struct cache_stats {
unsigned long cache_misses;
unsigned long cache_bypass_hits;
unsigned long cache_bypass_misses;
- unsigned long cache_readaheads;
unsigned long cache_miss_collisions;
unsigned long sectors_bypassed;
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 7e9d19fd21dd..e2a803683105 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -732,9 +732,9 @@ out:
/* Bcache device */
-static int open_dev(struct block_device *b, fmode_t mode)
+static int open_dev(struct gendisk *disk, blk_mode_t mode)
{
- struct bcache_device *d = b->bd_disk->private_data;
+ struct bcache_device *d = disk->private_data;
if (test_bit(BCACHE_DEV_CLOSING, &d->flags))
return -ENXIO;
@@ -743,14 +743,14 @@ static int open_dev(struct block_device *b, fmode_t mode)
return 0;
}
-static void release_dev(struct gendisk *b, fmode_t mode)
+static void release_dev(struct gendisk *b)
{
struct bcache_device *d = b->private_data;
closure_put(&d->cl);
}
-static int ioctl_dev(struct block_device *b, fmode_t mode,
+static int ioctl_dev(struct block_device *b, blk_mode_t mode,
unsigned int cmd, unsigned long arg)
{
struct bcache_device *d = b->bd_disk->private_data;
@@ -1369,7 +1369,7 @@ static void cached_dev_free(struct closure *cl)
put_page(virt_to_page(dc->sb_disk));
if (!IS_ERR_OR_NULL(dc->bdev))
- blkdev_put(dc->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
+ blkdev_put(dc->bdev, bcache_kobj);
wake_up(&unregister_wait);
@@ -1723,7 +1723,7 @@ static void cache_set_flush(struct closure *cl)
if (!IS_ERR_OR_NULL(c->gc_thread))
kthread_stop(c->gc_thread);
- if (!IS_ERR_OR_NULL(c->root))
+ if (!IS_ERR(c->root))
list_add(&c->root->list, &c->btree_cache);
/*
@@ -2087,7 +2087,7 @@ static int run_cache_set(struct cache_set *c)
err = "cannot allocate new btree root";
c->root = __bch_btree_node_alloc(c, NULL, 0, true, NULL);
- if (IS_ERR_OR_NULL(c->root))
+ if (IS_ERR(c->root))
goto err;
mutex_lock(&c->root->write_lock);
@@ -2218,7 +2218,7 @@ void bch_cache_release(struct kobject *kobj)
put_page(virt_to_page(ca->sb_disk));
if (!IS_ERR_OR_NULL(ca->bdev))
- blkdev_put(ca->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
+ blkdev_put(ca->bdev, bcache_kobj);
kfree(ca);
module_put(THIS_MODULE);
@@ -2359,7 +2359,7 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
* call blkdev_put() to bdev in bch_cache_release(). So we
* explicitly call blkdev_put() here.
*/
- blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
+ blkdev_put(bdev, bcache_kobj);
if (ret == -ENOMEM)
err = "cache_alloc(): -ENOMEM";
else if (ret == -EPERM)
@@ -2461,7 +2461,7 @@ static void register_bdev_worker(struct work_struct *work)
if (!dc) {
fail = true;
put_page(virt_to_page(args->sb_disk));
- blkdev_put(args->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+ blkdev_put(args->bdev, bcache_kobj);
goto out;
}
@@ -2491,7 +2491,7 @@ static void register_cache_worker(struct work_struct *work)
if (!ca) {
fail = true;
put_page(virt_to_page(args->sb_disk));
- blkdev_put(args->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+ blkdev_put(args->bdev, bcache_kobj);
goto out;
}
@@ -2558,9 +2558,8 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
ret = -EINVAL;
err = "failed to open device";
- bdev = blkdev_get_by_path(strim(path),
- FMODE_READ|FMODE_WRITE|FMODE_EXCL,
- sb);
+ bdev = blkdev_get_by_path(strim(path), BLK_OPEN_READ | BLK_OPEN_WRITE,
+ bcache_kobj, NULL);
if (IS_ERR(bdev)) {
if (bdev == ERR_PTR(-EBUSY)) {
dev_t dev;
@@ -2648,7 +2647,7 @@ async_done:
out_put_sb_page:
put_page(virt_to_page(sb_disk));
out_blkdev_put:
- blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+ blkdev_put(bdev, register_bcache);
out_free_sb:
kfree(sb);
out_free_path:
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index c6f677059214..0e2c1880f60b 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -1111,26 +1111,25 @@ SHOW(__bch_cache)
vfree(p);
- ret = scnprintf(buf, PAGE_SIZE,
- "Unused: %zu%%\n"
- "Clean: %zu%%\n"
- "Dirty: %zu%%\n"
- "Metadata: %zu%%\n"
- "Average: %llu\n"
- "Sectors per Q: %zu\n"
- "Quantiles: [",
- unused * 100 / (size_t) ca->sb.nbuckets,
- available * 100 / (size_t) ca->sb.nbuckets,
- dirty * 100 / (size_t) ca->sb.nbuckets,
- meta * 100 / (size_t) ca->sb.nbuckets, sum,
- n * ca->sb.bucket_size / (ARRAY_SIZE(q) + 1));
+ ret = sysfs_emit(buf,
+ "Unused: %zu%%\n"
+ "Clean: %zu%%\n"
+ "Dirty: %zu%%\n"
+ "Metadata: %zu%%\n"
+ "Average: %llu\n"
+ "Sectors per Q: %zu\n"
+ "Quantiles: [",
+ unused * 100 / (size_t) ca->sb.nbuckets,
+ available * 100 / (size_t) ca->sb.nbuckets,
+ dirty * 100 / (size_t) ca->sb.nbuckets,
+ meta * 100 / (size_t) ca->sb.nbuckets, sum,
+ n * ca->sb.bucket_size / (ARRAY_SIZE(q) + 1));
for (i = 0; i < ARRAY_SIZE(q); i++)
- ret += scnprintf(buf + ret, PAGE_SIZE - ret,
- "%u ", q[i]);
+ ret += sysfs_emit_at(buf, ret, "%u ", q[i]);
ret--;
- ret += scnprintf(buf + ret, PAGE_SIZE - ret, "]\n");
+ ret += sysfs_emit_at(buf, ret, "]\n");
return ret;
}
diff --git a/drivers/md/bcache/sysfs.h b/drivers/md/bcache/sysfs.h
index a2ff6447b699..65b8bd975ab1 100644
--- a/drivers/md/bcache/sysfs.h
+++ b/drivers/md/bcache/sysfs.h
@@ -3,7 +3,7 @@
#define _BCACHE_SYSFS_H_
#define KTYPE(type) \
-struct kobj_type type ## _ktype = { \
+const struct kobj_type type ## _ktype = { \
.release = type ## _release, \
.sysfs_ops = &((const struct sysfs_ops) { \
.show = type ## _show, \
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index d4a5fc0650bb..24c049067f61 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -890,6 +890,16 @@ static int bch_root_node_dirty_init(struct cache_set *c,
if (ret < 0)
pr_warn("sectors dirty init failed, ret=%d!\n", ret);
+ /*
+ * The op may be added to cache_set's btree_cache_wait
+ * in mca_cannibalize(), must ensure it is removed from
+ * the list and release btree_cache_alloc_lock before
+ * free op memory.
+ * Otherwise, the btree_cache_wait will be damaged.
+ */
+ bch_cannibalize_unlock(c);
+ finish_wait(&c->btree_cache_wait, &(&op.op)->wait);
+
return ret;
}
diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
index 9e0c69958587..acffed750e3e 100644
--- a/drivers/md/dm-cache-metadata.c
+++ b/drivers/md/dm-cache-metadata.c
@@ -1828,7 +1828,7 @@ int dm_cache_metadata_abort(struct dm_cache_metadata *cmd)
* Replacement block manager (new_bm) is created and old_bm destroyed outside of
* cmd root_lock to avoid ABBA deadlock that would result (due to life-cycle of
* shrinker associated with the block manager's bufio client vs cmd root_lock).
- * - must take shrinker_mutex without holding cmd->root_lock
+ * - must take shrinker_rwsem without holding cmd->root_lock
*/
new_bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
CACHE_MAX_CONCURRENT_LOCKS);
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 872896218550..911f73f7ebba 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -2051,8 +2051,8 @@ static int parse_metadata_dev(struct cache_args *ca, struct dm_arg_set *as,
if (!at_least_one_arg(as, error))
return -EINVAL;
- r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
- &ca->metadata_dev);
+ r = dm_get_device(ca->ti, dm_shift_arg(as),
+ BLK_OPEN_READ | BLK_OPEN_WRITE, &ca->metadata_dev);
if (r) {
*error = "Error opening metadata device";
return r;
@@ -2074,8 +2074,8 @@ static int parse_cache_dev(struct cache_args *ca, struct dm_arg_set *as,
if (!at_least_one_arg(as, error))
return -EINVAL;
- r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
- &ca->cache_dev);
+ r = dm_get_device(ca->ti, dm_shift_arg(as),
+ BLK_OPEN_READ | BLK_OPEN_WRITE, &ca->cache_dev);
if (r) {
*error = "Error opening cache device";
return r;
@@ -2093,8 +2093,8 @@ static int parse_origin_dev(struct cache_args *ca, struct dm_arg_set *as,
if (!at_least_one_arg(as, error))
return -EINVAL;
- r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
- &ca->origin_dev);
+ r = dm_get_device(ca->ti, dm_shift_arg(as),
+ BLK_OPEN_READ | BLK_OPEN_WRITE, &ca->origin_dev);
if (r) {
*error = "Error opening origin device";
return r;
diff --git a/drivers/md/dm-clone-target.c b/drivers/md/dm-clone-target.c
index f467cdb5a022..94b2fc33f64b 100644
--- a/drivers/md/dm-clone-target.c
+++ b/drivers/md/dm-clone-target.c
@@ -1683,8 +1683,8 @@ static int parse_metadata_dev(struct clone *clone, struct dm_arg_set *as, char *
int r;
sector_t metadata_dev_size;
- r = dm_get_device(clone->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
- &clone->metadata_dev);
+ r = dm_get_device(clone->ti, dm_shift_arg(as),
+ BLK_OPEN_READ | BLK_OPEN_WRITE, &clone->metadata_dev);
if (r) {
*error = "Error opening metadata device";
return r;
@@ -1703,8 +1703,8 @@ static int parse_dest_dev(struct clone *clone, struct dm_arg_set *as, char **err
int r;
sector_t dest_dev_size;
- r = dm_get_device(clone->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
- &clone->dest_dev);
+ r = dm_get_device(clone->ti, dm_shift_arg(as),
+ BLK_OPEN_READ | BLK_OPEN_WRITE, &clone->dest_dev);
if (r) {
*error = "Error opening destination device";
return r;
@@ -1725,7 +1725,7 @@ static int parse_source_dev(struct clone *clone, struct dm_arg_set *as, char **e
int r;
sector_t source_dev_size;
- r = dm_get_device(clone->ti, dm_shift_arg(as), FMODE_READ,
+ r = dm_get_device(clone->ti, dm_shift_arg(as), BLK_OPEN_READ,
&clone->source_dev);
if (r) {
*error = "Error opening source device";
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index aecab0c0720f..ce913ad91a52 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -207,11 +207,10 @@ struct dm_table {
unsigned integrity_added:1;
/*
- * Indicates the rw permissions for the new logical
- * device. This should be a combination of FMODE_READ
- * and FMODE_WRITE.
+ * Indicates the rw permissions for the new logical device. This
+ * should be a combination of BLK_OPEN_READ and BLK_OPEN_WRITE.
*/
- fmode_t mode;
+ blk_mode_t mode;
/* a list of devices used by this table */
struct list_head devices;
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 8b47b913ee83..15424bfea7ee 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1693,8 +1693,7 @@ retry:
len = (remaining_size > PAGE_SIZE) ? PAGE_SIZE : remaining_size;
- bio_add_page(clone, page, len, 0);
-
+ __bio_add_page(clone, page, len, 0);
remaining_size -= len;
}
@@ -3256,7 +3255,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
cc->per_bio_data_size = ti->per_io_data_size =
ALIGN(sizeof(struct dm_crypt_io) + cc->dmreq_start + additional_req_size,
- ARCH_KMALLOC_MINALIGN);
+ ARCH_DMA_MINALIGN);
ret = mempool_init(&cc->page_pool, BIO_MAX_VECS, crypt_page_alloc, crypt_page_free, cc);
if (ret) {
diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c
index 0d70914217ee..6acfa5bf97a4 100644
--- a/drivers/md/dm-era-target.c
+++ b/drivers/md/dm-era-target.c
@@ -1482,14 +1482,16 @@ static int era_ctr(struct dm_target *ti, unsigned int argc, char **argv)
era->ti = ti;
- r = dm_get_device(ti, argv[0], FMODE_READ | FMODE_WRITE, &era->metadata_dev);
+ r = dm_get_device(ti, argv[0], BLK_OPEN_READ | BLK_OPEN_WRITE,
+ &era->metadata_dev);
if (r) {
ti->error = "Error opening metadata device";
era_destroy(era);
return -EINVAL;
}
- r = dm_get_device(ti, argv[1], FMODE_READ | FMODE_WRITE, &era->origin_dev);
+ r = dm_get_device(ti, argv[1], BLK_OPEN_READ | BLK_OPEN_WRITE,
+ &era->origin_dev);
if (r) {
ti->error = "Error opening data device";
era_destroy(era);
diff --git a/drivers/md/dm-init.c b/drivers/md/dm-init.c
index d369457dbed0..2a71bcdba92d 100644
--- a/drivers/md/dm-init.c
+++ b/drivers/md/dm-init.c
@@ -293,8 +293,10 @@ static int __init dm_init_init(void)
for (i = 0; i < ARRAY_SIZE(waitfor); i++) {
if (waitfor[i]) {
+ dev_t dev;
+
DMINFO("waiting for device %s ...", waitfor[i]);
- while (!dm_get_dev_t(waitfor[i]))
+ while (early_lookup_bdev(waitfor[i], &dev))
fsleep(5000);
}
}
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 31838b13ea54..63ec502fcb12 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -4268,10 +4268,10 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv
}
/*
- * If this workqueue were percpu, it would cause bio reordering
+ * If this workqueue weren't ordered, it would cause bio reordering
* and reduced performance.
*/
- ic->wait_wq = alloc_workqueue("dm-integrity-wait", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
+ ic->wait_wq = alloc_ordered_workqueue("dm-integrity-wait", WQ_MEM_RECLAIM);
if (!ic->wait_wq) {
ti->error = "Cannot allocate workqueue";
r = -ENOMEM;
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 7d5c9c582ed2..6d301019e5e3 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -861,7 +861,7 @@ static void __dev_status(struct mapped_device *md, struct dm_ioctl *param)
table = dm_get_inactive_table(md, &srcu_idx);
if (table) {
- if (!(dm_table_get_mode(table) & FMODE_WRITE))
+ if (!(dm_table_get_mode(table) & BLK_OPEN_WRITE))
param->flags |= DM_READONLY_FLAG;
param->target_count = table->num_targets;
}
@@ -1189,7 +1189,7 @@ static int do_resume(struct dm_ioctl *param)
if (old_size && new_size && old_size != new_size)
need_resize_uevent = true;
- if (dm_table_get_mode(new_map) & FMODE_WRITE)
+ if (dm_table_get_mode(new_map) & BLK_OPEN_WRITE)
set_disk_ro(dm_disk(md), 0);
else
set_disk_ro(dm_disk(md), 1);
@@ -1378,12 +1378,12 @@ static int dev_arm_poll(struct file *filp, struct dm_ioctl *param, size_t param_
return 0;
}
-static inline fmode_t get_mode(struct dm_ioctl *param)
+static inline blk_mode_t get_mode(struct dm_ioctl *param)
{
- fmode_t mode = FMODE_READ | FMODE_WRITE;
+ blk_mode_t mode = BLK_OPEN_READ | BLK_OPEN_WRITE;
if (param->flags & DM_READONLY_FLAG)
- mode = FMODE_READ;
+ mode = BLK_OPEN_READ;
return mode;
}
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index c8821fcb8299..8846bf510a35 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -3750,11 +3750,11 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv,
* canceling read-auto mode
*/
mddev->ro = 0;
- if (!mddev->suspended && mddev->sync_thread)
+ if (!mddev->suspended)
md_wakeup_thread(mddev->sync_thread);
}
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- if (!mddev->suspended && mddev->thread)
+ if (!mddev->suspended)
md_wakeup_thread(mddev->thread);
return 0;
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 9c49f53760d0..bf7a574499a3 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1241,9 +1241,8 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
int i;
int r = -EINVAL;
char *origin_path, *cow_path;
- dev_t origin_dev, cow_dev;
unsigned int args_used, num_flush_bios = 1;
- fmode_t origin_mode = FMODE_READ;
+ blk_mode_t origin_mode = BLK_OPEN_READ;
if (argc < 4) {
ti->error = "requires 4 or more arguments";
@@ -1253,7 +1252,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
if (dm_target_is_snapshot_merge(ti)) {
num_flush_bios = 2;
- origin_mode = FMODE_WRITE;
+ origin_mode = BLK_OPEN_WRITE;
}
s = kzalloc(sizeof(*s), GFP_KERNEL);
@@ -1279,24 +1278,21 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->error = "Cannot get origin device";
goto bad_origin;
}
- origin_dev = s->origin->bdev->bd_dev;
cow_path = argv[0];
argv++;
argc--;
- cow_dev = dm_get_dev_t(cow_path);
- if (cow_dev && cow_dev == origin_dev) {
- ti->error = "COW device cannot be the same as origin device";
- r = -EINVAL;
- goto bad_cow;
- }
-
r = dm_get_device(ti, cow_path, dm_table_get_mode(ti->table), &s->cow);
if (r) {
ti->error = "Cannot get COW device";
goto bad_cow;
}
+ if (s->cow->bdev && s->cow->bdev == s->origin->bdev) {
+ ti->error = "COW device cannot be the same as origin device";
+ r = -EINVAL;
+ goto bad_store;
+ }
r = dm_exception_store_create(ti, argc, argv, s, &args_used, &s->store);
if (r) {
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 1398f1d6e83e..7d208b2b1a19 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -126,7 +126,7 @@ static int alloc_targets(struct dm_table *t, unsigned int num)
return 0;
}
-int dm_table_create(struct dm_table **result, fmode_t mode,
+int dm_table_create(struct dm_table **result, blk_mode_t mode,
unsigned int num_targets, struct mapped_device *md)
{
struct dm_table *t = kzalloc(sizeof(*t), GFP_KERNEL);
@@ -304,7 +304,7 @@ static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev,
* device and not to touch the existing bdev field in case
* it is accessed concurrently.
*/
-static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode,
+static int upgrade_mode(struct dm_dev_internal *dd, blk_mode_t new_mode,
struct mapped_device *md)
{
int r;
@@ -324,23 +324,13 @@ static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode,
}
/*
- * Convert the path to a device
- */
-dev_t dm_get_dev_t(const char *path)
-{
- dev_t dev;
-
- if (lookup_bdev(path, &dev))
- dev = name_to_dev_t(path);
- return dev;
-}
-EXPORT_SYMBOL_GPL(dm_get_dev_t);
-
-/*
* Add a device to the list, or just increment the usage count if
* it's already present.
+ *
+ * Note: the __ref annotation is because this function can call the __init
+ * marked early_lookup_bdev when called during early boot code from dm-init.c.
*/
-int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
+int __ref dm_get_device(struct dm_target *ti, const char *path, blk_mode_t mode,
struct dm_dev **result)
{
int r;
@@ -358,9 +348,13 @@ int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
if (MAJOR(dev) != major || MINOR(dev) != minor)
return -EOVERFLOW;
} else {
- dev = dm_get_dev_t(path);
- if (!dev)
- return -ENODEV;
+ r = lookup_bdev(path, &dev);
+#ifndef MODULE
+ if (r && system_state < SYSTEM_RUNNING)
+ r = early_lookup_bdev(path, &dev);
+#endif
+ if (r)
+ return r;
}
if (dev == disk_devt(t->md->disk))
return -EINVAL;
@@ -668,7 +662,8 @@ int dm_table_add_target(struct dm_table *t, const char *type,
t->singleton = true;
}
- if (dm_target_always_writeable(ti->type) && !(t->mode & FMODE_WRITE)) {
+ if (dm_target_always_writeable(ti->type) &&
+ !(t->mode & BLK_OPEN_WRITE)) {
ti->error = "target type may not be included in a read-only table";
goto bad;
}
@@ -2039,7 +2034,7 @@ struct list_head *dm_table_get_devices(struct dm_table *t)
return &t->devices;
}
-fmode_t dm_table_get_mode(struct dm_table *t)
+blk_mode_t dm_table_get_mode(struct dm_table *t)
{
return t->mode;
}
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index b9461faa9f0d..9dd0409848ab 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -1891,7 +1891,7 @@ int dm_pool_abort_metadata(struct dm_pool_metadata *pmd)
* Replacement block manager (new_bm) is created and old_bm destroyed outside of
* pmd root_lock to avoid ABBA deadlock that would result (due to life-cycle of
* shrinker associated with the block manager's bufio client vs pmd root_lock).
- * - must take shrinker_mutex without holding pmd->root_lock
+ * - must take shrinker_rwsem without holding pmd->root_lock
*/
new_bm = dm_block_manager_create(pmd->bdev, THIN_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
THIN_MAX_CONCURRENT_LOCKS);
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 39410bf186cf..f1d0dcb9db22 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -3300,7 +3300,7 @@ static int pool_ctr(struct dm_target *ti, unsigned int argc, char **argv)
unsigned long block_size;
dm_block_t low_water_blocks;
struct dm_dev *metadata_dev;
- fmode_t metadata_mode;
+ blk_mode_t metadata_mode;
/*
* FIXME Remove validation from scope of lock.
@@ -3333,7 +3333,8 @@ static int pool_ctr(struct dm_target *ti, unsigned int argc, char **argv)
if (r)
goto out_unlock;
- metadata_mode = FMODE_READ | ((pf.mode == PM_READ_ONLY) ? 0 : FMODE_WRITE);
+ metadata_mode = BLK_OPEN_READ |
+ ((pf.mode == PM_READ_ONLY) ? 0 : BLK_OPEN_WRITE);
r = dm_get_device(ti, argv[0], metadata_mode, &metadata_dev);
if (r) {
ti->error = "Error opening metadata block device";
@@ -3341,7 +3342,7 @@ static int pool_ctr(struct dm_target *ti, unsigned int argc, char **argv)
}
warn_if_metadata_device_too_big(metadata_dev->bdev);
- r = dm_get_device(ti, argv[1], FMODE_READ | FMODE_WRITE, &data_dev);
+ r = dm_get_device(ti, argv[1], BLK_OPEN_READ | BLK_OPEN_WRITE, &data_dev);
if (r) {
ti->error = "Error getting data device";
goto out_metadata;
@@ -4222,7 +4223,7 @@ static int thin_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad_origin_dev;
}
- r = dm_get_device(ti, argv[2], FMODE_READ, &origin_dev);
+ r = dm_get_device(ti, argv[2], BLK_OPEN_READ, &origin_dev);
if (r) {
ti->error = "Error opening origin device";
goto bad_origin_dev;
diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c
index a9ee2faa75a2..3ef9f018da60 100644
--- a/drivers/md/dm-verity-fec.c
+++ b/drivers/md/dm-verity-fec.c
@@ -607,7 +607,7 @@ int verity_fec_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v,
(*argc)--;
if (!strcasecmp(arg_name, DM_VERITY_OPT_FEC_DEV)) {
- r = dm_get_device(ti, arg_value, FMODE_READ, &v->fec->dev);
+ r = dm_get_device(ti, arg_value, BLK_OPEN_READ, &v->fec->dev);
if (r) {
ti->error = "FEC device lookup failed";
return r;
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index e35c16e06d06..26adcfea0302 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -1196,7 +1196,7 @@ static int verity_ctr(struct dm_target *ti, unsigned int argc, char **argv)
if (r)
goto bad;
- if ((dm_table_get_mode(ti->table) & ~FMODE_READ)) {
+ if ((dm_table_get_mode(ti->table) & ~BLK_OPEN_READ)) {
ti->error = "Device must be readonly";
r = -EINVAL;
goto bad;
@@ -1225,13 +1225,13 @@ static int verity_ctr(struct dm_target *ti, unsigned int argc, char **argv)
}
v->version = num;
- r = dm_get_device(ti, argv[1], FMODE_READ, &v->data_dev);
+ r = dm_get_device(ti, argv[1], BLK_OPEN_READ, &v->data_dev);
if (r) {
ti->error = "Data device lookup failed";
goto bad;
}
- r = dm_get_device(ti, argv[2], FMODE_READ, &v->hash_dev);
+ r = dm_get_device(ti, argv[2], BLK_OPEN_READ, &v->hash_dev);
if (r) {
ti->error = "Hash device lookup failed";
goto bad;
diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c
index 8f0896a6990b..9d3cca8e3dc9 100644
--- a/drivers/md/dm-zoned-metadata.c
+++ b/drivers/md/dm-zoned-metadata.c
@@ -577,7 +577,7 @@ static struct dmz_mblock *dmz_get_mblock_slow(struct dmz_metadata *zmd,
bio->bi_iter.bi_sector = dmz_blk2sect(block);
bio->bi_private = mblk;
bio->bi_end_io = dmz_mblock_bio_end_io;
- bio_add_page(bio, mblk->page, DMZ_BLOCK_SIZE, 0);
+ __bio_add_page(bio, mblk->page, DMZ_BLOCK_SIZE, 0);
submit_bio(bio);
return mblk;
@@ -728,7 +728,7 @@ static int dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk,
bio->bi_iter.bi_sector = dmz_blk2sect(block);
bio->bi_private = mblk;
bio->bi_end_io = dmz_mblock_bio_end_io;
- bio_add_page(bio, mblk->page, DMZ_BLOCK_SIZE, 0);
+ __bio_add_page(bio, mblk->page, DMZ_BLOCK_SIZE, 0);
submit_bio(bio);
return 0;
@@ -752,7 +752,7 @@ static int dmz_rdwr_block(struct dmz_dev *dev, enum req_op op,
bio = bio_alloc(dev->bdev, 1, op | REQ_SYNC | REQ_META | REQ_PRIO,
GFP_NOIO);
bio->bi_iter.bi_sector = dmz_blk2sect(block);
- bio_add_page(bio, page, DMZ_BLOCK_SIZE, 0);
+ __bio_add_page(bio, page, DMZ_BLOCK_SIZE, 0);
ret = submit_bio_wait(bio);
bio_put(bio);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index fffb0cbe2ac8..fe2d4750d9c7 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -207,7 +207,7 @@ static int __init local_init(void)
if (r)
return r;
- deferred_remove_workqueue = alloc_workqueue("kdmremove", WQ_UNBOUND, 1);
+ deferred_remove_workqueue = alloc_ordered_workqueue("kdmremove", 0);
if (!deferred_remove_workqueue) {
r = -ENOMEM;
goto out_uevent_exit;
@@ -310,13 +310,13 @@ int dm_deleting_md(struct mapped_device *md)
return test_bit(DMF_DELETING, &md->flags);
}
-static int dm_blk_open(struct block_device *bdev, fmode_t mode)
+static int dm_blk_open(struct gendisk *disk, blk_mode_t mode)
{
struct mapped_device *md;
spin_lock(&_minor_lock);
- md = bdev->bd_disk->private_data;
+ md = disk->private_data;
if (!md)
goto out;
@@ -334,7 +334,7 @@ out:
return md ? 0 : -ENXIO;
}
-static void dm_blk_close(struct gendisk *disk, fmode_t mode)
+static void dm_blk_close(struct gendisk *disk)
{
struct mapped_device *md;
@@ -448,7 +448,7 @@ static void dm_unprepare_ioctl(struct mapped_device *md, int srcu_idx)
dm_put_live_table(md, srcu_idx);
}
-static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
+static int dm_blk_ioctl(struct block_device *bdev, blk_mode_t mode,
unsigned int cmd, unsigned long arg)
{
struct mapped_device *md = bdev->bd_disk->private_data;
@@ -734,7 +734,7 @@ static char *_dm_claim_ptr = "I belong to device-mapper";
* Open a table device so we can use it as a map destination.
*/
static struct table_device *open_table_device(struct mapped_device *md,
- dev_t dev, fmode_t mode)
+ dev_t dev, blk_mode_t mode)
{
struct table_device *td;
struct block_device *bdev;
@@ -746,7 +746,7 @@ static struct table_device *open_table_device(struct mapped_device *md,
return ERR_PTR(-ENOMEM);
refcount_set(&td->count, 1);
- bdev = blkdev_get_by_dev(dev, mode | FMODE_EXCL, _dm_claim_ptr);
+ bdev = blkdev_get_by_dev(dev, mode, _dm_claim_ptr, NULL);
if (IS_ERR(bdev)) {
r = PTR_ERR(bdev);
goto out_free_td;
@@ -771,7 +771,7 @@ static struct table_device *open_table_device(struct mapped_device *md,
return td;
out_blkdev_put:
- blkdev_put(bdev, mode | FMODE_EXCL);
+ blkdev_put(bdev, _dm_claim_ptr);
out_free_td:
kfree(td);
return ERR_PTR(r);
@@ -784,14 +784,14 @@ static void close_table_device(struct table_device *td, struct mapped_device *md
{
if (md->disk->slave_dir)
bd_unlink_disk_holder(td->dm_dev.bdev, md->disk);
- blkdev_put(td->dm_dev.bdev, td->dm_dev.mode | FMODE_EXCL);
+ blkdev_put(td->dm_dev.bdev, _dm_claim_ptr);
put_dax(td->dm_dev.dax_dev);
list_del(&td->list);
kfree(td);
}
static struct table_device *find_table_device(struct list_head *l, dev_t dev,
- fmode_t mode)
+ blk_mode_t mode)
{
struct table_device *td;
@@ -802,7 +802,7 @@ static struct table_device *find_table_device(struct list_head *l, dev_t dev,
return NULL;
}
-int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode,
+int dm_get_table_device(struct mapped_device *md, dev_t dev, blk_mode_t mode,
struct dm_dev **result)
{
struct table_device *td;
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index a856e0aee73b..63d9010d8e61 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -203,7 +203,7 @@ int dm_open_count(struct mapped_device *md);
int dm_lock_for_deletion(struct mapped_device *md, bool mark_deferred, bool only_deferred);
int dm_cancel_deferred_remove(struct mapped_device *md);
int dm_request_based(struct mapped_device *md);
-int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode,
+int dm_get_table_device(struct mapped_device *md, dev_t dev, blk_mode_t mode,
struct dm_dev **result);
void dm_put_table_device(struct mapped_device *md, struct dm_dev *d);
diff --git a/drivers/md/md-autodetect.c b/drivers/md/md-autodetect.c
index 91836e6de326..6eaa0eab40f9 100644
--- a/drivers/md/md-autodetect.c
+++ b/drivers/md/md-autodetect.c
@@ -147,7 +147,8 @@ static void __init md_setup_drive(struct md_setup_args *args)
if (p)
*p++ = 0;
- dev = name_to_dev_t(devname);
+ if (early_lookup_bdev(devname, &dev))
+ dev = 0;
if (strncmp(devname, "/dev/", 5) == 0)
devname += 5;
snprintf(comp_name, 63, "/dev/%s", devname);
diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c
index bc8d7565171d..1ff712889a3b 100644
--- a/drivers/md/md-bitmap.c
+++ b/drivers/md/md-bitmap.c
@@ -54,14 +54,7 @@ __acquires(bitmap->lock)
{
unsigned char *mappage;
- if (page >= bitmap->pages) {
- /* This can happen if bitmap_start_sync goes beyond
- * End-of-device while looking for a whole page.
- * It is harmless.
- */
- return -EINVAL;
- }
-
+ WARN_ON_ONCE(page >= bitmap->pages);
if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */
return 0;
@@ -1023,7 +1016,6 @@ static int md_bitmap_file_test_bit(struct bitmap *bitmap, sector_t block)
return set;
}
-
/* this gets called when the md device is ready to unplug its underlying
* (slave) device queues -- before we let any writes go down, we need to
* sync the dirty pages of the bitmap file to disk */
@@ -1033,8 +1025,7 @@ void md_bitmap_unplug(struct bitmap *bitmap)
int dirty, need_write;
int writing = 0;
- if (!bitmap || !bitmap->storage.filemap ||
- test_bit(BITMAP_STALE, &bitmap->flags))
+ if (!md_bitmap_enabled(bitmap))
return;
/* look at each page to see if there are any set bits that need to be
@@ -1063,6 +1054,35 @@ void md_bitmap_unplug(struct bitmap *bitmap)
}
EXPORT_SYMBOL(md_bitmap_unplug);
+struct bitmap_unplug_work {
+ struct work_struct work;
+ struct bitmap *bitmap;
+ struct completion *done;
+};
+
+static void md_bitmap_unplug_fn(struct work_struct *work)
+{
+ struct bitmap_unplug_work *unplug_work =
+ container_of(work, struct bitmap_unplug_work, work);
+
+ md_bitmap_unplug(unplug_work->bitmap);
+ complete(unplug_work->done);
+}
+
+void md_bitmap_unplug_async(struct bitmap *bitmap)
+{
+ DECLARE_COMPLETION_ONSTACK(done);
+ struct bitmap_unplug_work unplug_work;
+
+ INIT_WORK_ONSTACK(&unplug_work.work, md_bitmap_unplug_fn);
+ unplug_work.bitmap = bitmap;
+ unplug_work.done = &done;
+
+ queue_work(md_bitmap_wq, &unplug_work.work);
+ wait_for_completion(&done);
+}
+EXPORT_SYMBOL(md_bitmap_unplug_async);
+
static void md_bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed);
/* * bitmap_init_from_disk -- called at bitmap_create time to initialize
* the in-memory bitmap from the on-disk bitmap -- also, sets up the
@@ -1241,11 +1261,28 @@ static bitmap_counter_t *md_bitmap_get_counter(struct bitmap_counts *bitmap,
sector_t offset, sector_t *blocks,
int create);
+static void mddev_set_timeout(struct mddev *mddev, unsigned long timeout,
+ bool force)
+{
+ struct md_thread *thread;
+
+ rcu_read_lock();
+ thread = rcu_dereference(mddev->thread);
+
+ if (!thread)
+ goto out;
+
+ if (force || thread->timeout < MAX_SCHEDULE_TIMEOUT)
+ thread->timeout = timeout;
+
+out:
+ rcu_read_unlock();
+}
+
/*
* bitmap daemon -- periodically wakes up to clean bits and flush pages
* out to disk
*/
-
void md_bitmap_daemon_work(struct mddev *mddev)
{
struct bitmap *bitmap;
@@ -1269,7 +1306,7 @@ void md_bitmap_daemon_work(struct mddev *mddev)
bitmap->daemon_lastrun = jiffies;
if (bitmap->allclean) {
- mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
+ mddev_set_timeout(mddev, MAX_SCHEDULE_TIMEOUT, true);
goto done;
}
bitmap->allclean = 1;
@@ -1366,8 +1403,7 @@ void md_bitmap_daemon_work(struct mddev *mddev)
done:
if (bitmap->allclean == 0)
- mddev->thread->timeout =
- mddev->bitmap_info.daemon_sleep;
+ mddev_set_timeout(mddev, mddev->bitmap_info.daemon_sleep, true);
mutex_unlock(&mddev->bitmap_info.mutex);
}
@@ -1387,6 +1423,14 @@ __acquires(bitmap->lock)
sector_t csize;
int err;
+ if (page >= bitmap->pages) {
+ /*
+ * This can happen if bitmap_start_sync goes beyond
+ * End-of-device while looking for a whole page or
+ * user set a huge number to sysfs bitmap_set_bits.
+ */
+ return NULL;
+ }
err = md_bitmap_checkpage(bitmap, page, create, 0);
if (bitmap->bp[page].hijacked ||
@@ -1820,8 +1864,7 @@ void md_bitmap_destroy(struct mddev *mddev)
mddev->bitmap = NULL; /* disconnect from the md device */
spin_unlock(&mddev->lock);
mutex_unlock(&mddev->bitmap_info.mutex);
- if (mddev->thread)
- mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
+ mddev_set_timeout(mddev, MAX_SCHEDULE_TIMEOUT, true);
md_bitmap_free(bitmap);
}
@@ -1964,7 +2007,7 @@ int md_bitmap_load(struct mddev *mddev)
/* Kick recovery in case any bits were set */
set_bit(MD_RECOVERY_NEEDED, &bitmap->mddev->recovery);
- mddev->thread->timeout = mddev->bitmap_info.daemon_sleep;
+ mddev_set_timeout(mddev, mddev->bitmap_info.daemon_sleep, true);
md_wakeup_thread(mddev->thread);
md_bitmap_update_sb(bitmap);
@@ -2469,17 +2512,11 @@ timeout_store(struct mddev *mddev, const char *buf, size_t len)
timeout = MAX_SCHEDULE_TIMEOUT-1;
if (timeout < 1)
timeout = 1;
+
mddev->bitmap_info.daemon_sleep = timeout;
- if (mddev->thread) {
- /* if thread->timeout is MAX_SCHEDULE_TIMEOUT, then
- * the bitmap is all clean and we don't need to
- * adjust the timeout right now
- */
- if (mddev->thread->timeout < MAX_SCHEDULE_TIMEOUT) {
- mddev->thread->timeout = timeout;
- md_wakeup_thread(mddev->thread);
- }
- }
+ mddev_set_timeout(mddev, timeout, false);
+ md_wakeup_thread(mddev->thread);
+
return len;
}
diff --git a/drivers/md/md-bitmap.h b/drivers/md/md-bitmap.h
index cfd7395de8fd..8a3788c9bfef 100644
--- a/drivers/md/md-bitmap.h
+++ b/drivers/md/md-bitmap.h
@@ -264,6 +264,7 @@ void md_bitmap_sync_with_cluster(struct mddev *mddev,
sector_t new_lo, sector_t new_hi);
void md_bitmap_unplug(struct bitmap *bitmap);
+void md_bitmap_unplug_async(struct bitmap *bitmap);
void md_bitmap_daemon_work(struct mddev *mddev);
int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks,
@@ -273,6 +274,13 @@ int md_bitmap_copy_from_slot(struct mddev *mddev, int slot,
sector_t *lo, sector_t *hi, bool clear_bits);
void md_bitmap_free(struct bitmap *bitmap);
void md_bitmap_wait_behind_writes(struct mddev *mddev);
+
+static inline bool md_bitmap_enabled(struct bitmap *bitmap)
+{
+ return bitmap && bitmap->storage.filemap &&
+ !test_bit(BITMAP_STALE, &bitmap->flags);
+}
+
#endif
#endif
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 10e0c5381d01..3d9fd74233df 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -75,14 +75,14 @@ struct md_cluster_info {
sector_t suspend_hi;
int suspend_from; /* the slot which broadcast suspend_lo/hi */
- struct md_thread *recovery_thread;
+ struct md_thread __rcu *recovery_thread;
unsigned long recovery_map;
/* communication loc resources */
struct dlm_lock_resource *ack_lockres;
struct dlm_lock_resource *message_lockres;
struct dlm_lock_resource *token_lockres;
struct dlm_lock_resource *no_new_dev_lockres;
- struct md_thread *recv_thread;
+ struct md_thread __rcu *recv_thread;
struct completion newdisk_completion;
wait_queue_head_t wait;
unsigned long state;
@@ -362,8 +362,8 @@ static void __recover_slot(struct mddev *mddev, int slot)
set_bit(slot, &cinfo->recovery_map);
if (!cinfo->recovery_thread) {
- cinfo->recovery_thread = md_register_thread(recover_bitmaps,
- mddev, "recover");
+ rcu_assign_pointer(cinfo->recovery_thread,
+ md_register_thread(recover_bitmaps, mddev, "recover"));
if (!cinfo->recovery_thread) {
pr_warn("md-cluster: Could not create recovery thread\n");
return;
@@ -526,11 +526,15 @@ static void process_add_new_disk(struct mddev *mddev, struct cluster_msg *cmsg)
static void process_metadata_update(struct mddev *mddev, struct cluster_msg *msg)
{
int got_lock = 0;
+ struct md_thread *thread;
struct md_cluster_info *cinfo = mddev->cluster_info;
mddev->good_device_nr = le32_to_cpu(msg->raid_slot);
dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR);
- wait_event(mddev->thread->wqueue,
+
+ /* daemaon thread must exist */
+ thread = rcu_dereference_protected(mddev->thread, true);
+ wait_event(thread->wqueue,
(got_lock = mddev_trylock(mddev)) ||
test_bit(MD_CLUSTER_HOLDING_MUTEX_FOR_RECVD, &cinfo->state));
md_reload_sb(mddev, mddev->good_device_nr);
@@ -889,7 +893,8 @@ static int join(struct mddev *mddev, int nodes)
}
/* Initiate the communication resources */
ret = -ENOMEM;
- cinfo->recv_thread = md_register_thread(recv_daemon, mddev, "cluster_recv");
+ rcu_assign_pointer(cinfo->recv_thread,
+ md_register_thread(recv_daemon, mddev, "cluster_recv"));
if (!cinfo->recv_thread) {
pr_err("md-cluster: cannot allocate memory for recv_thread!\n");
goto err;
diff --git a/drivers/md/md-multipath.c b/drivers/md/md-multipath.c
index 66edf5e72bd6..92c45be203d7 100644
--- a/drivers/md/md-multipath.c
+++ b/drivers/md/md-multipath.c
@@ -400,8 +400,8 @@ static int multipath_run (struct mddev *mddev)
if (ret)
goto out_free_conf;
- mddev->thread = md_register_thread(multipathd, mddev,
- "multipath");
+ rcu_assign_pointer(mddev->thread,
+ md_register_thread(multipathd, mddev, "multipath"));
if (!mddev->thread)
goto out_free_conf;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 8e344b4b3444..cf3733c90c47 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c