summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/RCU/Design/Data-Structures/Data-Structures.html3
-rw-r--r--Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html4
-rw-r--r--Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.html5
-rw-r--r--Documentation/RCU/NMI-RCU.txt13
-rw-r--r--Documentation/RCU/UP.txt6
-rw-r--r--Documentation/RCU/checklist.txt91
-rw-r--r--Documentation/RCU/rcu.txt8
-rw-r--r--Documentation/RCU/rcu_dereference.txt103
-rw-r--r--Documentation/RCU/rcubarrier.txt27
-rw-r--r--Documentation/RCU/whatisRCU.txt10
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt36
-rw-r--r--Documentation/atomic_t.txt17
-rw-r--r--Documentation/core-api/cachetlb.rst10
-rw-r--r--Documentation/devicetree/bindings/net/davinci_emac.txt2
-rw-r--r--Documentation/devicetree/bindings/net/ethernet.txt5
-rw-r--r--Documentation/devicetree/bindings/net/macb.txt4
-rw-r--r--Documentation/driver-api/usb/power-management.rst14
-rw-r--r--Documentation/kprobes.txt6
-rw-r--r--Documentation/media/uapi/rc/rc-tables.rst4
-rw-r--r--Documentation/networking/decnet.txt2
-rw-r--r--Documentation/networking/ip-sysctl.txt3
-rw-r--r--Documentation/networking/netdev-FAQ.rst2
-rw-r--r--Documentation/networking/rxrpc.txt16
-rw-r--r--Documentation/sysctl/vm.txt16
-rw-r--r--Documentation/translations/ko_KR/memory-barriers.txt49
-rw-r--r--Documentation/virtual/kvm/api.txt11
-rw-r--r--MAINTAINERS24
-rw-r--r--Makefile4
-rw-r--r--arch/Kconfig8
-rw-r--r--arch/alpha/Kconfig1
-rw-r--r--arch/alpha/include/asm/tlb.h6
-rw-r--r--arch/alpha/kernel/syscalls/syscall.tbl4
-rw-r--r--arch/arc/boot/dts/hsdk.dts13
-rw-r--r--arch/arc/include/asm/tlb.h32
-rw-r--r--arch/arc/lib/memset-archs.S4
-rw-r--r--arch/arc/mm/cache.c31
-rw-r--r--arch/arm/Kconfig2
-rw-r--r--arch/arm/Kconfig.debug6
-rw-r--r--arch/arm/boot/compressed/head.S16
-rw-r--r--arch/arm/include/asm/tlb.h255
-rw-r--r--arch/arm/kernel/head-nommu.S2
-rw-r--r--arch/arm/kernel/signal.c3
-rw-r--r--arch/arm/tools/syscall.tbl4
-rw-r--r--arch/arm64/Kconfig1
-rw-r--r--arch/arm64/include/asm/futex.h2
-rw-r--r--arch/arm64/include/asm/tlb.h1
-rw-r--r--arch/arm64/include/asm/unistd.h2
-rw-r--r--arch/arm64/include/asm/unistd32.h8
-rw-r--r--arch/arm64/kernel/ftrace.c9
-rw-r--r--arch/arm64/mm/init.c2
-rw-r--r--arch/c6x/Kconfig1
-rw-r--r--arch/c6x/include/asm/tlb.h2
-rw-r--r--arch/h8300/include/asm/tlb.h2
-rw-r--r--arch/hexagon/include/asm/tlb.h12
-rw-r--r--arch/ia64/include/asm/machvec.h13
-rw-r--r--arch/ia64/include/asm/machvec_sn2.h2
-rw-r--r--arch/ia64/include/asm/tlb.h259
-rw-r--r--arch/ia64/include/asm/tlbflush.h25
-rw-r--r--arch/ia64/kernel/syscalls/syscall.tbl4
-rw-r--r--arch/ia64/mm/tlb.c23
-rw-r--r--arch/ia64/sn/kernel/sn2/sn2_smp.c7
-rw-r--r--arch/m68k/Kconfig1
-rw-r--r--arch/m68k/include/asm/tlb.h14
-rw-r--r--arch/m68k/kernel/syscalls/syscall.tbl4
-rw-r--r--arch/microblaze/Kconfig1
-rw-r--r--arch/microblaze/include/asm/tlb.h9
-rw-r--r--arch/microblaze/kernel/syscalls/syscall.tbl4
-rw-r--r--arch/mips/ath79/setup.c6
-rw-r--r--arch/mips/include/asm/tlb.h17
-rw-r--r--arch/mips/kernel/scall64-o32.S2
-rw-r--r--arch/mips/kernel/syscalls/syscall_n32.tbl4
-rw-r--r--arch/mips/kernel/syscalls/syscall_n64.tbl4
-rw-r--r--arch/mips/kernel/syscalls/syscall_o32.tbl4
-rw-r--r--arch/mips/net/ebpf_jit.c5
-rw-r--r--arch/nds32/include/asm/tlb.h16
-rw-r--r--arch/nds32/include/asm/tlbflush.h1
-rw-r--r--arch/nios2/Kconfig1
-rw-r--r--arch/nios2/include/asm/tlb.h14
-rw-r--r--arch/openrisc/Kconfig1
-rw-r--r--arch/openrisc/include/asm/tlb.h8
-rw-r--r--arch/parisc/include/asm/tlb.h18
-rw-r--r--arch/parisc/kernel/syscalls/syscall.tbl4
-rw-r--r--arch/powerpc/Kconfig2
-rw-r--r--arch/powerpc/configs/skiroot_defconfig1
-rw-r--r--arch/powerpc/include/asm/tlb.h18
-rw-r--r--arch/powerpc/kernel/security.c6
-rw-r--r--arch/powerpc/kernel/setup_64.c2
-rw-r--r--arch/powerpc/kernel/syscalls/syscall.tbl4
-rw-r--r--arch/powerpc/kvm/book3s_64_vio.c6
-rw-r--r--arch/powerpc/kvm/book3s_hv.c4
-rw-r--r--arch/powerpc/mm/mmu_context_iommu.c97
-rw-r--r--arch/powerpc/mm/ppc_mmu_32.c18
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype2
-rw-r--r--arch/riscv/configs/rv32_defconfig84
-rw-r--r--arch/riscv/include/asm/tlb.h1
-rw-r--r--arch/riscv/mm/init.c8
-rw-r--r--arch/s390/Kconfig2
-rw-r--r--arch/s390/boot/mem_detect.c2
-rw-r--r--arch/s390/include/asm/tlb.h130
-rw-r--r--arch/s390/kernel/fpu.c2
-rw-r--r--arch/s390/kernel/nospec-branch.c3
-rw-r--r--arch/s390/kernel/syscalls/syscall.tbl4
-rw-r--r--arch/s390/kernel/vtime.c8
-rw-r--r--arch/s390/mm/pgalloc.c63
-rw-r--r--arch/sh/include/asm/pgalloc.h9
-rw-r--r--arch/sh/include/asm/tlb.h132
-rw-r--r--arch/sh/kernel/syscalls/syscall.tbl4
-rw-r--r--arch/sparc/Kconfig1
-rw-r--r--arch/sparc/include/asm/tlb_32.h18
-rw-r--r--arch/sparc/kernel/syscalls/syscall.tbl4
-rw-r--r--arch/um/include/asm/tlb.h158
-rw-r--r--arch/unicore32/Kconfig1
-rw-r--r--arch/unicore32/include/asm/tlb.h7
-rw-r--r--arch/x86/Kconfig3
-rw-r--r--arch/x86/boot/compressed/misc.c2
-rw-r--r--arch/x86/crypto/poly1305-avx2-x86_64.S14
-rw-r--r--arch/x86/crypto/poly1305-sse2-x86_64.S22
-rw-r--r--arch/x86/entry/entry_32.S2
-rw-r--r--arch/x86/entry/vdso/vclock_gettime.c4
-rw-r--r--arch/x86/events/amd/core.c146
-rw-r--r--arch/x86/events/intel/core.c12
-rw-r--r--arch/x86/events/intel/cstate.c10
-rw-r--r--arch/x86/events/intel/pt.c3
-rw-r--r--arch/x86/events/perf_event.h38
-rw-r--r--arch/x86/ia32/ia32_signal.c29
-rw-r--r--arch/x86/include/asm/alternative-asm.h11
-rw-r--r--arch/x86/include/asm/alternative.h10
-rw-r--r--arch/x86/include/asm/asm.h24
-rw-r--r--arch/x86/include/asm/kvm_emulate.h4
-rw-r--r--arch/x86/include/asm/kvm_host.h18
-rw-r--r--arch/x86/include/asm/nospec-branch.h28
-rw-r--r--arch/x86/include/asm/pgtable.h2
-rw-r--r--arch/x86/include/asm/smap.h37
-rw-r--r--arch/x86/include/asm/switch_to.h1
-rw-r--r--arch/x86/include/asm/tlb.h1
-rw-r--r--arch/x86/include/asm/uaccess.h12
-rw-r--r--arch/x86/include/asm/uaccess_64.h3
-rw-r--r--arch/x86/include/asm/xen/hypercall.h24
-rw-r--r--arch/x86/include/uapi/asm/kvm.h1
-rw-r--r--arch/x86/include/uapi/asm/vmx.h1
-rw-r--r--arch/x86/kernel/cpu/bugs.c17
-rw-r--r--arch/x86/kernel/cpu/intel.c4
-rw-r--r--arch/x86/kernel/cpu/resctrl/rdtgroup.c3
-rw-r--r--arch/x86/kernel/kprobes/core.c48
-rw-r--r--arch/x86/kernel/process.c8
-rw-r--r--arch/x86/kernel/process_32.c7
-rw-r--r--arch/x86/kernel/process_64.c1
-rw-r--r--arch/x86/kernel/reboot.c21
-rw-r--r--arch/x86/kernel/signal.c34
-rw-r--r--arch/x86/kernel/vmlinux.lds.S2
-rw-r--r--arch/x86/kvm/emulate.c191
-rw-r--r--arch/x86/kvm/hyperv.c11
-rw-r--r--arch/x86/kvm/lapic.c77
-rw-r--r--arch/x86/kvm/lapic.h4
-rw-r--r--arch/x86/kvm/mmu.c16
-rw-r--r--arch/x86/kvm/mmu.h2
-rw-r--r--arch/x86/kvm/pmu.c4
-rw-r--r--arch/x86/kvm/svm.c57
-rw-r--r--arch/x86/kvm/trace.h4
-rw-r--r--arch/x86/kvm/vmx/nested.c51
-rw-r--r--arch/x86/kvm/vmx/vmenter.S12
-rw-r--r--arch/x86/kvm/vmx/vmx.c42
-rw-r--r--arch/x86/kvm/vmx/vmx.h2
-rw-r--r--arch/x86/kvm/x86.c100
-rw-r--r--arch/x86/kvm/x86.h4
-rw-r--r--arch/x86/lib/Makefile12
-rw-r--r--arch/x86/lib/copy_user_64.S48
-rw-r--r--arch/x86/lib/memcpy_64.S3
-rw-r--r--arch/x86/lib/usercopy_64.c20
-rw-r--r--arch/x86/mm/dump_pagetables.c3
-rw-r--r--arch/x86/mm/init.c6
-rw-r--r--arch/x86/mm/ioremap.c2
-rw-r--r--arch/x86/mm/kaslr.c2
-rw-r--r--arch/x86/mm/pti.c4
-rw-r--r--arch/x86/mm/tlb.c2
-rw-r--r--arch/xtensa/include/asm/tlb.h26
-rw-r--r--arch/xtensa/kernel/syscalls/syscall.tbl4
-rw-r--r--block/bfq-iosched.c8
-rw-r--r--block/blk-mq.c2
-rw-r--r--crypto/lrw.c6
-rw-r--r--crypto/testmgr.h44
-rw-r--r--crypto/xts.c6
-rw-r--r--drivers/acpi/acpica/evgpe.c6
-rw-r--r--drivers/acpi/nfit/core.c12
-rw-r--r--drivers/acpi/nfit/intel.c10
-rw-r--r--drivers/atm/firestream.c2
-rw-r--r--drivers/base/memory.c2
-rw-r--r--drivers/block/zram/zram_drv.c5
-rw-r--r--drivers/char/ipmi/ipmi_dmi.c1
-rw-r--r--drivers/char/ipmi/ipmi_msghandler.c19
-rw-r--r--drivers/char/ipmi/ipmi_si_hardcode.c2
-rw-r--r--drivers/clk/clkdev.c5
-rw-r--r--drivers/clk/sunxi-ng/ccu_nkmp.c24
-rw-r--r--drivers/clocksource/Kconfig1
-rw-r--r--drivers/clocksource/arm_arch_timer.c5
-rw-r--r--drivers/clocksource/timer-oxnas-rps.c2
-rw-r--r--drivers/clocksource/timer-ti-dm.c28
-rw-r--r--drivers/dma/bcm2835-dma.c2
-rw-r--r--drivers/dma/mediatek/mtk-cqdma.c2
-rw-r--r--drivers/dma/sh/rcar-dmac.c30
-rw-r--r--drivers/extcon/Kconfig2
-rw-r--r--drivers/gpio/gpio-eic-sprd.c1
-rw-r--r--drivers/gpio/gpiolib.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc.c19
-rw-r--r--drivers/gpu/drm/amd/display/dc/dc.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_aux.c9
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_aux.h6
-rw-r--r--drivers/gpu/drm/bridge/synopsys/dw-hdmi.c16
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c6
-rw-r--r--drivers/gpu/drm/i915/intel_ddi.c6
-rw-r--r--drivers/gpu/drm/i915/intel_dp.c6
-rw-r--r--drivers/gpu/drm/i915/intel_fbdev.c12
-rw-r--r--drivers/gpu/drm/imx/ipuv3-crtc.c2
-rw-r--r--drivers/gpu/drm/qxl/qxl_drv.c4
-rw-r--r--drivers/gpu/drm/qxl/qxl_prime.c12
-rw-r--r--drivers/gpu/drm/scheduler/sched_main.c3
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_drv.c9
-rw-r--r--drivers/gpu/drm/tegra/hdmi.c12
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo.c14
-rw-r--r--drivers/gpu/drm/ttm/ttm_memory.c5
-rw-r--r--drivers/gpu/drm/ttm/ttm_page_alloc.c13
-rw-r--r--drivers/gpu/drm/vc4/vc4_crtc.c2
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_drv.c4
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_drv.h4
-rw-r--r--drivers/gpu/drm/virtio/virtgpu_prime.c12
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_drv.c33
-rw-r--r--drivers/gpu/host1x/hw/channel_hw.c8
-rw-r--r--drivers/gpu/ipu-v3/ipu-dp.c12
-rw-r--r--drivers/hid/hid-input.c17
-rw-r--r--drivers/i2c/busses/i2c-designware-master.c3
-rw-r--r--drivers/i2c/busses/i2c-imx.c4
-rw-r--r--drivers/i2c/busses/i2c-synquacer.c2
-rw-r--r--drivers/i2c/i2c-core-base.c6
-rw-r--r--drivers/i3c/master.c5
-rw-r--r--drivers/i3c/master/dw-i3c-master.c2
-rw-r--r--drivers/iio/accel/kxcjk-1013.c2
-rw-r--r--drivers/iio/adc/ad_sigma_delta.c1
-rw-r--r--drivers/iio/adc/at91_adc.c28
-rw-r--r--drivers/iio/adc/xilinx-xadc-core.c3
-rw-r--r--drivers/iio/chemical/Kconfig14
-rw-r--r--drivers/iio/chemical/bme680.h6
-rw-r--r--drivers/iio/chemical/bme680_core.c54
-rw-r--r--drivers/iio/chemical/bme680_i2c.c21
-rw-r--r--drivers/iio/chemical/bme680_spi.c115
-rw-r--r--drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c7
-rw-r--r--drivers/iio/dac/mcp4725.c1
-rw-r--r--drivers/iio/gyro/bmg160_core.c6
-rw-r--r--drivers/iio/gyro/mpu3050-core.c8
-rw-r--r--drivers/iio/industrialio-buffer.c5
-rw-r--r--drivers/iio/industrialio-core.c4
-rw-r--r--drivers/infiniband/core/uverbs.h1
-rw-r--r--drivers/infiniband/core/uverbs_main.c55
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_qp.c2
-rw-r--r--drivers/infiniband/hw/mlx5/main.c12
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c11
-rw-r--r--drivers/infiniband/sw/rdmavt/mr.c17
-rw-r--r--drivers/input/keyboard/Kconfig2
-rw-r--r--drivers/input/keyboard/snvs_pwrkey.c6
-rw-r--r--drivers/input/mouse/elan_i2c_core.c25
-rw-r--r--drivers/input/rmi4/rmi_driver.c6
-rw-r--r--drivers/input/rmi4/rmi_f11.c2
-rw-r--r--drivers/irqchip/irq-ath79-misc.c11
-rw-r--r--drivers/isdn/mISDN/socket.c4
-rw-r--r--drivers/misc/fastrpc.c7
-rw-r--r--drivers/misc/habanalabs/goya/goya.c9
-rw-r--r--drivers/mtd/nand/raw/marvell_nand.c12
-rw-r--r--drivers/net/bonding/bond_main.c6
-rw-r--r--drivers/net/dsa/bcm_sf2_cfp.c6
-rw-r--r--drivers/net/ethernet/atheros/atlx/atl1.c4
-rw-r--r--drivers/net/ethernet/atheros/atlx/atl1.h2
-rw-r--r--drivers/net/ethernet/atheros/atlx/atl2.c2
-rw-r--r--drivers/net/ethernet/atheros/atlx/atl2.h2
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c2
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c53
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_main.c22
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c30
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.c32
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c24
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c26
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c94
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c61
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/port.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/pci_hw.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c19
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c2
-rw-r--r--drivers/net/ethernet/mscc/ocelot.c24
-rw-r--r--drivers/net/ethernet/neterion/vxge/vxge-config.c1
-rw-r--r--drivers/net/ethernet/netronome/nfp/abm/cls.c4
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed.h7
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_dev.c85
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_int.c83
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_int.h4
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_main.c2
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_sriov.c2
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_ptp.c7
-rw-r--r--drivers/net/ethernet/socionext/netsec.c11
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c3
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/norm_desc.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c4
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c8
-rw-r--r--drivers/net/ieee802154/mcr20a.c6
-rw-r--r--drivers/net/phy/marvell.c6
-rw-r--r--drivers/net/phy/spi_ks8995.c9
-rw-r--r--drivers/net/slip/slhc.c2
-rw-r--r--drivers/net/team/team.c33
-rw-r--r--drivers/net/usb/qmi_wwan.c10
-rw-r--r--drivers/net/vrf.c2
-rw-r--r--drivers/net/wireless/ath/ath10k/ce.c2
-rw-r--r--drivers/net/wireless/ath/ath10k/core.c1
-rw-r--r--drivers/net/wireless/ath/ath10k/core.h3
-rw-r--r--drivers/net/wireless/ath/ath10k/coredump.c6
-rw-r--r--drivers/net/wireless/ath/ath10k/htt_rx.c2
-rw-r--r--drivers/net/wireless/ath/ath10k/mac.c8
-rw-r--r--drivers/net/wireless/ath/ath10k/pci.c24
-rw-r--r--drivers/net/wireless/ath/ath10k/pci.h2
-rw-r--r--drivers/net/wireless/ath/ath9k/xmit.c5
-rw-r--r--drivers/net/wireless/intel/iwlwifi/cfg/22000.c32
-rw-r--r--drivers/net/wireless/intel/iwlwifi/cfg/5000.c3
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/dbg.c34
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/file.h15
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/init.c1
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-config.h3
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-csr.h1
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c3
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-trans.h12
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c3
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/fw.c4
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c71
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/mvm.h1
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/ops.c2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c28
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/sta.c43
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/sta.h7
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/drv.c13
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/internal.h2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/trans.c30
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/tx.c2
-rw-r--r--drivers/net/wireless/mac80211_hwsim.c19
-rw-r--r--drivers/net/wireless/marvell/mwifiex/sdio.c2
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7603/init.c2
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7603/mac.c53
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7603/main.c8
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7603/mt7603.h2
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76x02_mac.c14
-rw-r--r--drivers/net/wireless/ralink/rt2x00/rt2x00.h1
-rw-r--r--drivers/net/wireless/ralink/rt2x00/rt2x00mac.c10
-rw-r--r--drivers/net/wireless/ralink/rt2x00/rt2x00queue.c15
-rw-r--r--drivers/nfc/st95hf/core.c7
-rw-r--r--drivers/nvdimm/btt_devs.c18
-rw-r--r--drivers/nvdimm/namespace_devs.c5
-rw-r--r--drivers/nvdimm/pmem.c8
-rw-r--r--drivers/nvdimm/security.c118
-rw-r--r--drivers/nvme/host/core.c2
-rw-r--r--drivers/of/of_net.c1
-rw-r--r--drivers/pci/pci.c19
-rw-r--r--drivers/pci/pcie/Kconfig8
-rw-r--r--drivers/pci/pcie/Makefile2
-rw-r--r--drivers/pci/pcie/portdrv.h4
-rw-r--r--drivers/pci/pcie/portdrv_core.c3
-rw-r--r--drivers/power/supply/cpcap-battery.c3
-rw-r--r--drivers/power/supply/goldfish_battery.c2
-rw-r--r--drivers/power/supply/power_supply_sysfs.c6
-rw-r--r--drivers/s390/block/dasd_eckd.c6
-rw-r--r--drivers/s390/char/con3270.c2
-rw-r--r--drivers/s390/char/fs3270.c3
-rw-r--r--drivers/s390/char/raw3270.c3
-rw-r--r--drivers/s390/char/raw3270.h4
-rw-r--r--drivers/s390/char/tty3270.c3
-rw-r--r--drivers/s390/crypto/ap_queue.c2
-rw-r--r--drivers/s390/crypto/pkey_api.c3
-rw-r--r--drivers/s390/net/ctcm_main.c1
-rw-r--r--drivers/scsi/aic7xxx/aic7770_osm.c1
-rw-r--r--drivers/scsi/aic7xxx/aic7xxx.h1
-rw-r--r--drivers/scsi/aic7xxx/aic7xxx_osm.c10
-rw-r--r--drivers/scsi/aic7xxx/aic7xxx_osm_pci.c1
-rw-r--r--drivers/scsi/libfc/fc_rport.c1
-rw-r--r--drivers/scsi/scsi_lib.c6
-rw-r--r--drivers/staging/comedi/drivers/ni_usb6501.c10
-rw-r--r--drivers/staging/comedi/drivers/vmk80xx.c8
-rw-r--r--drivers/staging/erofs/data.c2
-rw-r--r--drivers/staging/iio/adc/ad7192.c8
-rw-r--r--drivers/staging/iio/meter/ade7854.c2
-rw-r--r--drivers/staging/most/core.c2
-rw-r--r--drivers/tty/rocket.c2
-rw-r--r--drivers/tty/serial/sc16is7xx.c4
-rw-r--r--drivers/tty/serial/sh-sci.c6
-rw-r--r--drivers/tty/vt/vt.c3
-rw-r--r--drivers/usb/core/driver.c13
-rw-r--r--drivers/usb/core/message.c4
-rw-r--r--drivers/usb/gadget/udc/dummy_hcd.c19
-rw-r--r--drivers/usb/misc/yurex.c1
-rw-r--r--drivers/usb/storage/realtek_cr.c13
-rw-r--r--drivers/usb/usbip/stub_rx.c12
-rw-r--r--drivers/usb/usbip/usbip_common.h7
-rw-r--r--drivers/vhost/vhost.c6
-rw-r--r--drivers/w1/masters/ds2490.c6
-rw-r--r--fs/afs/callback.c3
-rw-r--r--fs/afs/cmservice.c2
-rw-r--r--fs/afs/inode.c4
-rw-r--r--fs/afs/internal.h4
-rw-r--r--fs/afs/rxrpc.c30
-rw-r--r--fs/afs/server.c1
-rw-r--r--fs/afs/write.c1
-rw-r--r--fs/block_dev.c3
-rw-r--r--fs/btrfs/file-item.c15
-rw-r--r--fs/btrfs/inode.c2
-rw-r--r--fs/btrfs/ordered-data.c3
-rw-r--r--fs/ceph/dir.c6
-rw-r--r--fs/ceph/inode.c16
-rw-r--r--fs/ceph/mds_client.c70
-rw-r--r--fs/ceph/snap.c7
-rw-r--r--fs/cifs/cifsglob.h2
-rw-r--r--fs/cifs/file.c45
-rw-r--r--fs/cifs/inode.c4
-rw-r--r--fs/cifs/misc.c48
-rw-r--r--fs/cifs/smb2misc.c6
-rw-r--r--fs/cifs/smb2ops.c2
-rw-r--r--fs/cifs/smb2pdu.c12
-rw-r--r--fs/dax.c15
-rw-r--r--fs/fuse/dev.c12
-rw-r--r--fs/inode.c9
-rw-r--r--fs/io_uring.c330
-rw-r--r--fs/nfsd/nfs3proc.c17
-rw-r--r--fs/nfsd/nfs3xdr.c11
-rw-r--r--fs/nfsd/nfs4callback.c8
-rw-r--r--fs/nfsd/nfs4state.c12
-rw-r--r--fs/nfsd/state.h1
-rw-r--r--fs/notify/fanotify/fanotify.c14
-rw-r--r--fs/notify/mark.c12
-rw-r--r--fs/pipe.c4
-rw-r--r--fs/proc/proc_sysctl.c6
-rw-r--r--fs/proc/task_mmu.c18
-rw-r--r--fs/splice.c16
-rw-r--r--fs/super.c5
-rw-r--r--fs/ufs/util.h2
-rw-r--r--fs/userfaultfd.c9
-rw-r--r--include/asm-generic/tlb.h288
-rw-r--r--include/drm/ttm/ttm_bo_driver.h1
-rw-r--r--include/dt-bindings/clock/sifive-fu540-prci.h18
-rw-r--r--include/linux/blkdev.h1
-rw-r--r--include/linux/bpf.h2
-rw-r--r--include/linux/bvec.h5
-rw-r--r--include/linux/clk.h16
-rw-r--r--include/linux/compiler.h2
-rw-r--r--include/linux/cpu.h24
-rw-r--r--include/linux/efi.h7
-rw-r--r--include/linux/elevator.h1
-rw-r--r--include/linux/etherdevice.h12
-rw-r--r--include/linux/kprobes.h1
-rw-r--r--include/linux/kvm_host.h10
-rw-r--r--include/linux/mm.h15
-rw-r--r--include/linux/netdevice.h3
-rw-r--r--include/linux/perf_event.h1
-rw-r--r--include/linux/pipe_fs_i.h11
-rw-r--r--include/linux/rcupdate.h6
-rw-r--r--include/linux/sched.h4
-rw-r--r--include/linux/sched/mm.h21
-rw-r--r--include/linux/shmem_fs.h1
-rw-r--r--include/linux/srcu.h36
-rw-r--r--include/linux/uaccess.h2
-rw-r--r--include/linux/uio.h2
-rw-r--r--include/linux/usb.h2
-rw-r--r--include/net/af_rxrpc.h4
-rw-r--r--include/net/cfg80211.h5
-rw-r--r--include/net/mac80211.h63
-rw-r--r--include/net/netfilter/nf_conntrack.h2
-rw-r--r--include/net/netfilter/nf_conntrack_l4proto.h6
-rw-r--r--include/net/netrom.h2
-rw-r--r--include/net/sctp/command.h1
-rw-r--r--include/net/sock.h6
-rw-r--r--include/net/tls.h4
-rw-r--r--include/net/xfrm.h20
-rw-r--r--include/uapi/linux/input-event-codes.h6
-rw-r--r--include/uapi/rdma/mlx5-abi.h1
-rw-r--r--init/main.c4
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/bpf/verifier.c76
-rw-r--r--kernel/cpu.c15
-rw-r--r--kernel/events/core.c37
-rw-r--r--kernel/events/ring_buffer.c36
-rw-r--r--kernel/iomem.c4
-rw-r--r--kernel/kprobes.c6
-rw-r--r--kernel/locking/lockdep.c9
-rw-r--r--kernel/locking/locktorture.c2
-rw-r--r--kernel/rcu/rcu.h1
-rw-r--r--kernel/rcu/rcuperf.c5
-rw-r--r--kernel/rcu/rcutorture.c21
-rw-r--r--kernel/rcu/srcutiny.c9
-rw-r--r--kernel/rcu/srcutree.c32
-rw-r--r--kernel/rcu/tiny.c2
-rw-r--r--kernel/rcu/tree.c508
-rw-r--r--kernel/rcu/tree.h14
-rw-r--r--kernel/rcu/tree_exp.h36
-rw-r--r--kernel/rcu/tree_plugin.h257
-rw-r--r--kernel/rcu/tree_stall.h709
-rw-r--r--kernel/rcu/update.c59
-rw-r--r--kernel/resource.c11
-rw-r--r--kernel/rseq.c9
-rw-r--r--kernel/sched/core.c1
-rw-r--r--kernel/sched/cpufreq_schedutil.c1
-rw-r--r--kernel/sched/deadline.c3
-rw-r--r--kernel/sched/fair.c29
-rw-r--r--kernel/seccomp.c17
-rw-r--r--kernel/signal.c2
-rw-r--r--kernel/time/sched_clock.c4
-rw-r--r--kernel/time/tick-common.c2
-rw-r--r--kernel/time/timekeeping.h7
-rw-r--r--kernel/torture.c2
-rw-r--r--kernel/trace/ftrace.c6
-rw-r--r--kernel/trace/ring_buffer.c2
-rw-r--r--kernel/trace/trace.c39
-rw-r--r--kernel/trace/trace_branch.c4
-rw-r--r--kernel/watchdog_hld.c3
-rw-r--r--lib/Kconfig.debug7
-rw-r--r--lib/Makefile12
-rw-r--r--lib/strncpy_from_user.c5
-rw-r--r--lib/strnlen_user.c4
-rw-r--r--lib/test_vmalloc.c6
-rw-r--r--lib/ubsan.c69
-rw-r--r--lib/ubsan.h5
-rw-r--r--mm/gup.c48
-rw-r--r--mm/huge_memory.c4
-rw-r--r--mm/hugetlb.c15
-rw-r--r--mm/kasan/Makefile3
-rw-r--r--mm/kasan/common.c10
-rw-r--r--mm/kasan/report.c3
-rw-r--r--mm/kmemleak.c2
-rw-r--r--mm/madvise.c2
-rw-r--r--mm/memory.c6
-rw-r--r--mm/memory_hotplug.c1
-rw-r--r--mm/mmap.c7
-rw-r--r--mm/mmu_gather.c129
-rw-r--r--mm/page_alloc.c57
-rw-r--r--mm/percpu.c8
-rw-r--r--mm/shmem.c58
-rw-r--r--mm/slab.c1
-rw-r--r--mm/swapfile.c32
-rw-r--r--mm/vmscan.c29
-rw-r--r--mm/vmstat.c5
-rw-r--r--net/appletalk/ddp.c1
-rw-r--r--net/atm/lec.c6
-rw-r--r--net/bluetooth/sco.c4
-rw-r--r--net/bridge/br_input.c23
-rw-r--r--net/bridge/br_multicast.c4
-rw-r--r--net/bridge/br_netlink.c2
-rw-r--r--net/bridge/netfilter/ebtables.c3
-rw-r--r--net/core/dev.c16
-rw-r--r--net/core/failover.c6
-rw-r--r--net/core/filter.c2
-rw-r--r--net/core/net-sysfs.c14
-rw-r--r--net/core/ptp_classifier.c7
-rw-r--r--net/core/rtnetlink.c2
-rw-r--r--net/core/skbuff.c10
-rw-r--r--net/core/sock.c4
-rw-r--r--net/ipv4/esp4.c20
-rw-r--r--net/ipv4/esp4_offload.c8
-rw-r--r--net/ipv4/fou.c4
-rw-r--r--net/ipv4/ip_output.c1
-rw-r--r--net/ipv4/ip_vti.c9
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c2
-rw-r--r--net/ipv4/route.c30
-rw-r--r--net/ipv4/sysctl_net_ipv4.c5
-rw-r--r--net/ipv4/tcp_dctcp.c45
-rw-r--r--net/ipv4/tcp_input.c10
-rw-r--r--net/ipv4/tcp_ipv4.c13
-rw-r--r--net/ipv4/udp_offload.c16
-rw-r--r--net/ipv4/xfrm4_policy.c24
-rw-r--r--net/ipv6/addrlabel.c2
-rw-r--r--net/ipv6/esp6_offload.c8
-rw-r--r--net/ipv6/ip6_fib.c4
-rw-r--r--net/ipv6/ip6_flowlabel.c22
-rw-r--r--net/ipv6/route.c74
-rw-r--r--net/ipv6/udp.c2
-rw-r--r--net/ipv6/xfrm6_tunnel.c6
-rw-r--r--net/key/af_key.c4
-rw-r--r--net/l2tp/l2tp_core.c10
-rw-r--r--net/llc/af_llc.c3
-rw-r--r--net/mac80211/debugfs_netdev.c2
-rw-r--r--net/mac80211/driver-ops.h3
-rw-r--r--net/mac80211/ht.c5
-rw-r--r--net/mac80211/iface.c3
-rw-r--r--net/mac80211/key.c9
-rw-r--r--net/mac80211/mesh_pathtbl.c2
-rw-r--r--net/mac80211/rx.c10
-rw-r--r--net/mac80211/trace_msg.h7
-rw-r--r--net/mac80211/tx.c53
-rw-r--r--net/ncsi/ncsi-rsp.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c2
-rw-r--r--net/netfilter/nf_conntrack_core.c43
-rw-r--r--net/netfilter/nf_conntrack_netlink.c34
-rw-r--r--net/netfilter/nf_conntrack_proto.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_icmp.c93
-rw-r--r--net/netfilter/nf_conntrack_proto_icmpv6.c52
-rw-r--r--net/netfilter/nf_nat_core.c11
-rw-r--r--net/netfilter/nf_tables_api.c2
-rw-r--r--net/netfilter/nfnetlink_log.c2
-rw-r--r--net/netfilter/nfnetlink_queue.c2
-rw-r--r--net/netfilter/xt_time.c23
-rw-r--r--net/netlink/af_netlink.c3
-rw-r--r--net/netlink/genetlink.c4
-rw-r--r--net/netrom/af_netrom.c76
-rw-r--r--net/netrom/nr_loopback.c2
-rw-r--r--net/netrom/nr_route.c2
-rw-r--r--net/netrom/sysctl_net_netrom.c5
-rw-r--r--net/packet/af_packet.c37
-rw-r--r--net/rds/af_rds.c3
-rw-r--r--net/rds/bind.c2
-rw-r--r--net/rds/ib_fmr.c11
-rw-r--r--net/rds/ib_rdma.c3
-rw-r--r--net/rds/ib_recv.c8
-rw-r--r--net/rose/rose_loopback.c27
-rw-r--r--net/rxrpc/af_rxrpc.c17
-rw-r--r--net/rxrpc/ar-internal.h1
-rw-r--r--net/rxrpc/call_object.c32
-rw-r--r--net/rxrpc/conn_event.c11
-rw-r--r--net/rxrpc/input.c30
-rw-r--r--net/rxrpc/local_object.c3
-rw-r--r--net/rxrpc/peer_event.c5
-rw-r--r--net/rxrpc/sendmsg.c21
-rw-r--r--net/sctp/sm_sideeffect.c29
-rw-r--r--net/sctp/sm_statefuns.c35
-rw-r--r--net/sctp/socket.c3
-rw-r--r--net/smc/af_smc.c58
-rw-r--r--net/smc/smc_close.c25
-rw-r--r--net/smc/smc_close.h1
-rw-r--r--net/smc/smc_ism.c5
-rw-r--r--net/smc/smc_pnet.c3
-rw-r--r--net/strparser/strparser.c12
-rw-r--r--net/sunrpc/cache.c3
-rw-r--r--net/sunrpc/clnt.c7
-rw-r--r--net/tipc/link.c2
-rw-r--r--net/tipc/name_table.c3
-rw-r--r--net/tipc/sysctl.c8
-rw-r--r--net/tls/tls_device.c55
-rw-r--r--net/tls/tls_device_fallback.c16
-rw-r--r--net/tls/tls_main.c29
-rw-r--r--net/tls/tls_sw.c18
-rw-r--r--net/wireless/nl80211.c18
-rw-r--r--net/wireless/reg.c44
-rw-r--r--net/wireless/scan.c3
-rw-r--r--net/wireless/util.c6
-rw-r--r--net/xfrm/xfrm_interface.c17
-rw-r--r--net/xfrm/xfrm_policy.c2
-rw-r--r--net/xfrm/xfrm_state.c2
-rw-r--r--net/xfrm/xfrm_user.c16
-rw-r--r--scripts/Makefile.build3
-rw-r--r--scripts/Makefile.ubsan1
-rw-r--r--scripts/atomic/gen-atomics.sh2
-rw-r--r--scripts/selinux/genheaders/genheaders.c1
-rw-r--r--scripts/selinux/mdp/mdp.c1
-rw-r--r--security/apparmor/apparmorfs.c13
-rw-r--r--security/device_cgroup.c2
-rw-r--r--security/inode.c13
-rw-r--r--security/selinux/include/classmap.h1
-rw-r--r--sound/core/info.c12
-rw-r--r--sound/core/init.c18
-rw-r--r--sound/pci/hda/hda_codec.c1
-rw-r--r--sound/pci/hda/patch_realtek.c19
-rw-r--r--sound/usb/line6/driver.c60
-rw-r--r--sound/usb/line6/podhd.c21
-rw-r--r--sound/usb/line6/toneport.c24
-rw-r--r--tools/arch/arc/include/uapi/asm/unistd.h51
-rw-r--r--tools/arch/hexagon/include/uapi/asm/unistd.h40
-rw-r--r--tools/arch/riscv/include/uapi/asm/unistd.h42
-rw-r--r--tools/arch/x86/include/uapi/asm/vmx.h1
-rw-r--r--tools/bpf/bpftool/map.c3
-rw-r--r--tools/include/uapi/sound/asound.h1
-rw-r--r--tools/lib/bpf/.gitignore1
-rw-r--r--tools/lib/traceevent/event-parse.c2
-rw-r--r--tools/lib/traceevent/parse-utils.c2
-rw-r--r--tools/memory-model/Documentation/explanation.txt289
-rw-r--r--tools/memory-model/README33
-rw-r--r--tools/memory-model/linux-kernel.bell35
-rw-r--r--tools/memory-model/linux-kernel.cat39
-rw-r--r--tools/memory-model/linux-kernel.def6
-rw-r--r--tools/memory-model/lock.cat3
-rw-r--r--tools/objtool/arch.h8
-rw-r--r--tools/objtool/arch/x86/decode.c21
-rw-r--r--tools/objtool/builtin-check.c4
-rw-r--r--tools/objtool/builtin.h2
-rw-r--r--tools/objtool/check.c400
-rw-r--r--tools/objtool/check.h4
-rw-r--r--tools/objtool/elf.c15
-rw-r--r--tools/objtool/elf.h3
-rw-r--r--tools/objtool/special.c18
-rw-r--r--tools/objtool/special.h1
-rw-r--r--tools/objtool/warn.h8
-rw-r--r--tools/perf/Makefile.config2
-rw-r--r--tools/perf/bench/numa.c4
-rw-r--r--tools/perf/builtin-stat.c1
-rw-r--r--tools/perf/builtin-top.c1
-rw-r--r--tools/perf/scripts/python/export-to-sqlite.py2
-rw-r--r--tools/perf/util/annotate.c8
-rw-r--r--tools/perf/util/cloexec.c1
-rw-r--r--tools/perf/util/cs-etm.c14
-rw-r--r--tools/perf/util/env.c8
-rw-r--r--tools/perf/util/evlist.c14
-rw-r--r--tools/perf/util/evsel.c12
-rw-r--r--tools/perf/util/header.c22
-rw-r--r--tools/perf/util/map.c20
-rw-r--r--tools/perf/util/map.h4
-rw-r--r--tools/perf/util/session.c8
-rw-r--r--tools/testing/nvdimm/test/nfit.c17
-rw-r--r--tools/testing/selftests/bpf/verifier/calls.c25
-rw-r--r--tools/testing/selftests/bpf/verifier/direct_packet_access.c22
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh20
-rw-r--r--tools/testing/selftests/kvm/Makefile9
-rw-r--r--tools/testing/selftests/kvm/dirty_log_test.c9
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/processor.h27
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c5
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/processor.c20
-rw-r--r--tools/testing/selftests/kvm/x86_64/evmcs_test.c5
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c9
-rw-r--r--tools/testing/selftests/kvm/x86_64/smm_test.c157
-rw-r--r--tools/testing/selftests/kvm/x86_64/state_test.c15
-rwxr-xr-xtools/testing/selftests/net/fib_rule_tests.sh10
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh94
-rwxr-xr-xtools/testing/selftests/net/run_afpackettests5
-rwxr-xr-xtools/testing/selftests/net/run_netsocktests2
-rw-r--r--tools/testing/selftests/netfilter/Makefile2
-rwxr-xr-xtools/testing/selftests/netfilter/conntrack_icmp_related.sh283
-rwxr-xr-xtools/testing/selftests/netfilter/nft_nat.sh36
-rw-r--r--tools/testing/selftests/proc/proc-pid-vm.c4
-rw-r--r--tools/testing/selftests/proc/proc-self-map-files-002.c20
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/configNR_CPUS.sh17
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/config_override.sh17
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/configcheck.sh19
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/configinit.sh17
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/cpus2use.sh17
-rw-r--r--tools/testing/selftests/rcutorture/bin/functions.sh17
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/jitter.sh17
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-build.sh17
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-find-errors.sh5
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh17
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh17
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh17
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh17
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck.sh17
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh17
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm.sh17
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/mkinitrd.sh15
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/parse-build.sh17
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/parse-console.sh17
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh17
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh17
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh17
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c34
-rw-r--r--virt/kvm/arm/arch_timer.c17
-rw-r--r--virt/kvm/arm/arm.c11
-rw-r--r--virt/kvm/arm/mmu.c6
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio-v3.c3
-rw-r--r--virt/kvm/arm/vgic/vgic.c21
-rw-r--r--virt/kvm/arm/vgic/vgic.h1
-rw-r--r--virt/kvm/irqchip.c5
-rw-r--r--virt/kvm/kvm_main.c13
768 files changed, 8436 insertions, 5714 deletions
diff --git a/Documentation/RCU/Design/Data-Structures/Data-Structures.html b/Documentation/RCU/Design/Data-Structures/Data-Structures.html
index 18f179807563..c30c1957c7e6 100644
--- a/Documentation/RCU/Design/Data-Structures/Data-Structures.html
+++ b/Documentation/RCU/Design/Data-Structures/Data-Structures.html
@@ -155,8 +155,7 @@ keeping lock contention under control at all tree levels regardless
of the level of loading on the system.
</p><p>RCU updaters wait for normal grace periods by registering
-RCU callbacks, either directly via <tt>call_rcu()</tt> and
-friends (namely <tt>call_rcu_bh()</tt> and <tt>call_rcu_sched()</tt>),
+RCU callbacks, either directly via <tt>call_rcu()</tt>
or indirectly via <tt>synchronize_rcu()</tt> and friends.
RCU callbacks are represented by <tt>rcu_head</tt> structures,
which are queued on <tt>rcu_data</tt> structures while they are
diff --git a/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html b/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html
index 19e7a5fb6b73..57300db4b5ff 100644
--- a/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html
+++ b/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html
@@ -56,6 +56,7 @@ sections.
RCU-preempt Expedited Grace Periods</a></h2>
<p>
+<tt>CONFIG_PREEMPT=y</tt> kernels implement RCU-preempt.
The overall flow of the handling of a given CPU by an RCU-preempt
expedited grace period is shown in the following diagram:
@@ -139,6 +140,7 @@ or offline, among other things.
RCU-sched Expedited Grace Periods</a></h2>
<p>
+<tt>CONFIG_PREEMPT=n</tt> kernels implement RCU-sched.
The overall flow of the handling of a given CPU by an RCU-sched
expedited grace period is shown in the following diagram:
@@ -146,7 +148,7 @@ expedited grace period is shown in the following diagram:
<p>
As with RCU-preempt, RCU-sched's
-<tt>synchronize_sched_expedited()</tt> ignores offline and
+<tt>synchronize_rcu_expedited()</tt> ignores offline and
idle CPUs, again because they are in remotely detectable
quiescent states.
However, because the
diff --git a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.html b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.html
index 8d21af02b1f0..c64f8d26609f 100644
--- a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.html
+++ b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.html
@@ -34,12 +34,11 @@ Similarly, any code that happens before the beginning of a given RCU grace
period is guaranteed to see the effects of all accesses following the end
of that grace period that are within RCU read-side critical sections.
-<p>This guarantee is particularly pervasive for <tt>synchronize_sched()</tt>,
-for which RCU-sched read-side critical sections include any region
+<p>Note well that RCU-sched read-side critical sections include any region
of code for which preemption is disabled.
Given that each individual machine instruction can be thought of as
an extremely small region of preemption-disabled code, one can think of
-<tt>synchronize_sched()</tt> as <tt>smp_mb()</tt> on steroids.
+<tt>synchronize_rcu()</tt> as <tt>smp_mb()</tt> on steroids.
<p>RCU updaters use this guarantee by splitting their updates into
two phases, one of which is executed before the grace period and
diff --git a/Documentation/RCU/NMI-RCU.txt b/Documentation/RCU/NMI-RCU.txt
index 687777f83b23..881353fd5bff 100644
--- a/Documentation/RCU/NMI-RCU.txt
+++ b/Documentation/RCU/NMI-RCU.txt
@@ -81,18 +81,19 @@ currently executing on some other CPU. We therefore cannot free
up any data structures used by the old NMI handler until execution
of it completes on all other CPUs.
-One way to accomplish this is via synchronize_sched(), perhaps as
+One way to accomplish this is via synchronize_rcu(), perhaps as
follows:
unset_nmi_callback();
- synchronize_sched();
+ synchronize_rcu();
kfree(my_nmi_data);
-This works because synchronize_sched() blocks until all CPUs complete
-any preemption-disabled segments of code that they were executing.
-Since NMI handlers disable preemption, synchronize_sched() is guaranteed
+This works because (as of v4.20) synchronize_rcu() blocks until all
+CPUs complete any preemption-disabled segments of code that they were
+executing.
+Since NMI handlers disable preemption, synchronize_rcu() is guaranteed
not to return until all ongoing NMI handlers exit. It is therefore safe
-to free up the handler's data as soon as synchronize_sched() returns.
+to free up the handler's data as soon as synchronize_rcu() returns.
Important note: for this to work, the architecture in question must
invoke nmi_enter() and nmi_exit() on NMI entry and exit, respectively.
diff --git a/Documentation/RCU/UP.txt b/Documentation/RCU/UP.txt
index 90ec5341ee98..53bde717017b 100644
--- a/Documentation/RCU/UP.txt
+++ b/Documentation/RCU/UP.txt
@@ -86,10 +86,8 @@ even on a UP system. So do not do it! Even on a UP system, the RCU
infrastructure -must- respect grace periods, and -must- invoke callbacks
from a known environment in which no locks are held.
-It -is- safe for synchronize_sched() and synchronize_rcu_bh() to return
-immediately on an UP system. It is also safe for synchronize_rcu()
-to return immediately on UP systems, except when running preemptable
-RCU.
+Note that it -is- safe for synchronize_rcu() to return immediately on
+UP systems, including !PREEMPT SMP builds running on UP systems.
Quick Quiz #3: Why can't synchronize_rcu() return immediately on
UP systems running preemptable RCU?
diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt
index 6f469864d9f5..e98ff261a438 100644
--- a/Documentation/RCU/checklist.txt
+++ b/Documentation/RCU/checklist.txt
@@ -182,16 +182,13 @@ over a rather long period of time, but improvements are always welcome!
when publicizing a pointer to a structure that can
be traversed by an RCU read-side critical section.
-5. If call_rcu(), or a related primitive such as call_rcu_bh(),
- call_rcu_sched(), or call_srcu() is used, the callback function
- will be called from softirq context. In particular, it cannot
- block.
+5. If call_rcu() or call_srcu() is used, the callback function will
+ be called from softirq context. In particular, it cannot block.
-6. Since synchronize_rcu() can block, it cannot be called from
- any sort of irq context. The same rule applies for
- synchronize_rcu_bh(), synchronize_sched(), synchronize_srcu(),
- synchronize_rcu_expedited(), synchronize_rcu_bh_expedited(),
- synchronize_sched_expedite(), and synchronize_srcu_expedited().
+6. Since synchronize_rcu() can block, it cannot be called
+ from any sort of irq context. The same rule applies
+ for synchronize_srcu(), synchronize_rcu_expedited(), and
+ synchronize_srcu_expedited().
The expedited forms of these primitives have the same semantics
as the non-expedited forms, but expediting is both expensive and
@@ -212,20 +209,20 @@ over a rather long period of time, but improvements are always welcome!
of the system, especially to real-time workloads running on
the rest of the system.
-7. If the updater uses call_rcu() or synchronize_rcu(), then the
- corresponding readers must use rcu_read_lock() and
- rcu_read_unlock(). If the updater uses call_rcu_bh() or
- synchronize_rcu_bh(), then the corresponding readers must
- use rcu_read_lock_bh() and rcu_read_unlock_bh(). If the
- updater uses call_rcu_sched() or synchronize_sched(), then
- the corresponding readers must disable preemption, possibly
- by calling rcu_read_lock_sched() and rcu_read_unlock_sched().
- If the updater uses synchronize_srcu() or call_srcu(), then
- the corresponding readers must use srcu_read_lock() and
+7. As of v4.20, a given kernel implements only one RCU flavor,
+ which is RCU-sched for PREEMPT=n and RCU-preempt for PREEMPT=y.
+ If the updater uses call_rcu() or synchronize_rcu(),
+ then the corresponding readers my use rcu_read_lock() and
+ rcu_read_unlock(), rcu_read_lock_bh() and rcu_read_unlock_bh(),
+ or any pair of primitives that disables and re-enables preemption,
+ for example, rcu_read_lock_sched() and rcu_read_unlock_sched().
+ If the updater uses synchronize_srcu() or call_srcu(),
+ then the corresponding readers must use srcu_read_lock() and
srcu_read_unlock(), and with the same srcu_struct. The rules for
the expedited primitives are the same as for their non-expedited
counterparts. Mixing things up will result in confusion and
- broken kernels.
+ broken kernels, and has even resulted in an exploitable security
+ issue.
One exception to this rule: rcu_read_lock() and rcu_read_unlock()
may be substituted for rcu_read_lock_bh() and rcu_read_unlock_bh()
@@ -288,8 +285,7 @@ over a rather long period of time, but improvements are always welcome!
d. Periodically invoke synchronize_rcu(), permitting a limited
number of updates per grace period.
- The same cautions apply to call_rcu_bh(), call_rcu_sched(),
- call_srcu(), and kfree_rcu().
+ The same cautions apply to call_srcu() and kfree_rcu().
Note that although these primitives do take action to avoid memory
exhaustion when any given CPU has too many callbacks, a determined
@@ -322,7 +318,7 @@ over a rather long period of time, but improvements are always welcome!
11. Any lock acquired by an RCU callback must be acquired elsewhere
with softirq disabled, e.g., via spin_lock_irqsave(),
- spin_lock_bh(), etc. Failing to disable irq on a given
+ spin_lock_bh(), etc. Failing to disable softirq on a given
acquisition of that lock will result in deadlock as soon as
the RCU softirq handler happens to run your RCU callback while
interrupting that acquisition's critical section.
@@ -335,13 +331,16 @@ over a rather long period of time, but improvements are always welcome!
must use whatever locking or other synchronization is required
to safely access and/or modify that data structure.
- RCU callbacks are -usually- executed on the same CPU that executed
- the corresponding call_rcu(), call_rcu_bh(), or call_rcu_sched(),
- but are by -no- means guaranteed to be. For example, if a given
- CPU goes offline while having an RCU callback pending, then that
- RCU callback will execute on some surviving CPU. (If this was
- not the case, a self-spawning RCU callback would prevent the
- victim CPU from ever going offline.)
+ Do not assume that RCU callbacks will be executed on the same
+ CPU that executed the corresponding call_rcu() or call_srcu().
+ For example, if a given CPU goes offline while having an RCU
+ callback pending, then that RCU callback will execute on some
+ surviving CPU. (If this was not the case, a self-spawning RCU
+ callback would prevent the victim CPU from ever going offline.)
+ Furthermore, CPUs designated by rcu_nocbs= might well -always-
+ have their RCU callbacks executed on some other CPUs, in fact,
+ for some real-time workloads, this is the whole point of using
+ the rcu_nocbs= kernel boot parameter.
13. Unlike other forms of RCU, it -is- permissible to block in an
SRCU read-side critical section (demarked by srcu_read_lock()
@@ -381,11 +380,11 @@ over a rather long period of time, but improvements are always welcome!
SRCU's expedited primitive (synchronize_srcu_expedited())
never sends IPIs to other CPUs, so it is easier on
- real-time workloads than is synchronize_rcu_expedited(),
- synchronize_rcu_bh_expedited() or synchronize_sched_expedited().
+ real-time workloads than is synchronize_rcu_expedited().
- Note that rcu_dereference() and rcu_assign_pointer() relate to
- SRCU just as they do to other forms of RCU.
+ Note that rcu_assign_pointer() relates to SRCU just as it does to
+ other forms of RCU, but instead of rcu_dereference() you should
+ use srcu_dereference() in order to avoid lockdep splats.
14. The whole point of call_rcu(), synchronize_rcu(), and friends
is to wait until all pre-existing readers have finished before
@@ -405,6 +404,9 @@ over a rather long period of time, but improvements are always welcome!
read-side critical sections. It is the responsibility of the
RCU update-side primitives to deal with this.
+ For SRCU readers, you can use smp_mb__after_srcu_read_unlock()
+ immediately after an srcu_read_unlock() to get a full barrier.
+
16. Use CONFIG_PROVE_LOCKING, CONFIG_DEBUG_OBJECTS_RCU_HEAD, and the
__rcu sparse checks to validate your RCU code. These can help
find problems as follows:
@@ -428,22 +430,19 @@ over a rather long period of time, but improvements are always welcome!
These debugging aids can help you find problems that are
otherwise extremely difficult to spot.
-17. If you register a callback using call_rcu(), call_rcu_bh(),
- call_rcu_sched(), or call_srcu(), and pass in a function defined
- within a loadable module, then it in necessary to wait for
- all pending callbacks to be invoked after the last invocation
- and before unloading that module. Note that it is absolutely
- -not- sufficient to wait for a grace period! The current (say)
- synchronize_rcu() implementation waits only for all previous
- callbacks registered on the CPU that synchronize_rcu() is running
- on, but it is -not- guaranteed to wait for callbacks registered
- on other CPUs.
+17. If you register a callback using call_rcu() or call_srcu(), and
+ pass in a function defined within a loadable module, then it in
+ necessary to wait for all pending callbacks to be invoked after
+ the last invocation and before unloading that module. Note that
+ it is absolutely -not- sufficient to wait for a grace period!
+ The current (say) synchronize_rcu() implementation is -not-
+ guaranteed to wait for callbacks registered on other CPUs.
+ Or even on the current CPU if that CPU recently went offline
+ and came back online.
You instead need to use one of the barrier functions:
o call_rcu() -> rcu_barrier()
- o call_rcu_bh() -> rcu_barrier()
- o call_rcu_sched() -> rcu_barrier()
o call_srcu() -> srcu_barrier()
However, these barrier functions are absolutely -not- guaranteed
diff --git a/Documentation/RCU/rcu.txt b/Documentation/RCU/rcu.txt
index 721b3e426515..c818cf65c5a9 100644
--- a/Documentation/RCU/rcu.txt
+++ b/Documentation/RCU/rcu.txt
@@ -52,10 +52,10 @@ o If I am running on a uniprocessor kernel, which can only do one
o How can I see where RCU is currently used in the Linux kernel?
Search for "rcu_read_lock", "rcu_read_unlock", "call_rcu",
- "rcu_read_lock_bh", "rcu_read_unlock_bh", "call_rcu_bh",
- "srcu_read_lock", "srcu_read_unlock", "synchronize_rcu",
- "synchronize_net", "synchronize_srcu", and the other RCU
- primitives. Or grab one of the cscope databases from:
+ "rcu_read_lock_bh", "rcu_read_unlock_bh", "srcu_read_lock",
+ "srcu_read_unlock", "synchronize_rcu", "synchronize_net",
+ "synchronize_srcu", and the other RCU primitives. Or grab one
+ of the cscope databases from:
http://www.rdrop.com/users/paulmck/RCU/linuxusage/rculocktab.html
diff --git a/Documentation/RCU/rcu_dereference.txt b/Documentation/RCU/rcu_dereference.txt
index ab96227bad42..bf699e8cfc75 100644
--- a/Documentation/RCU/rcu_dereference.txt
+++ b/Documentation/RCU/rcu_dereference.txt
@@ -351,3 +351,106 @@ garbage values.
In short, rcu_dereference() is -not- optional when you are going to
dereference the resulting pointer.
+
+
+WHICH MEMBER OF THE rcu_dereference() FAMILY SHOULD YOU USE?
+
+First, please avoid using rcu_dereference_raw() and also please avoid
+using rcu_dereference_check() and rcu_dereference_protected() with a
+second argument with a constant value of 1 (or true, for that matter).
+With that caution out of the way, here is some guidance for which
+member of the rcu_dereference() to use in various situations:
+
+1. If the access needs to be within an RCU read-side critical
+ section, use rcu_dereference(). With the new consolidated
+ RCU flavors, an RCU read-side critical section is entered
+ using rcu_read_lock(), anything that disables bottom halves,
+ anything that disables interrupts, or anything that disables
+ preemption.
+
+2. If the access might be within an RCU read-side critical section
+ on the one hand, or protected by (say) my_lock on the other,
+ use rcu_dereference_check(), for example:
+
+ p1 = rcu_dereference_check(p->rcu_protected_pointer,
+ lockdep_is_held(&my_lock));
+
+
+3. If the access might be within an RCU read-side critical section
+ on the one hand, or protected by either my_lock or your_lock on
+ the other, again use rcu_dereference_check(), for example:
+
+ p1 = rcu_dereference_check(p->rcu_protected_pointer,
+ lockdep_is_held(&my_lock) ||
+ lockdep_is_held(&your_lock));
+
+4. If the access is on the update side, so that it is always protected
+ by my_lock, use rcu_dereference_protected():
+
+ p1 = rcu_dereference_protected(p->rcu_protected_pointer,
+ lockdep_is_held(&my_lock));
+
+ This can be extended to handle multiple locks as in #3 above,
+ and both can be extended to check other conditions as well.
+
+5. If the protection is supplied by the caller, and is thus unknown
+ to this code, that is the rare case when rcu_dereference_raw()
+ is appropriate. In addition, rcu_dereference_raw() might be
+ appropriate when the lockdep expression would be excessively
+ complex, except that a better approach in that case might be to
+ take a long hard look at your synchronization design. Still,
+ there are data-locking cases where any one of a very large number
+ of locks or reference counters suffices to protect the pointer,
+ so rcu_dereference_raw() does have its place.
+
+ However, its place is probably quite a bit smaller than one
+ might expect given the number of uses in the current kernel.
+ Ditto for its synonym, rcu_dereference_check( ... , 1), and
+ its close relative, rcu_dereference_protected(... , 1).
+
+
+SPARSE CHECKING OF RCU-PROTECTED POINTERS
+
+The sparse static-analysis tool checks for direct access to RCU-protected
+pointers, which can result in "interesting" bugs due to compiler
+optimizations involving invented loads and perhaps also load tearing.
+For example, suppose someone mistakenly does something like this:
+
+ p = q->rcu_protected_pointer;
+ do_something_with(p->a);
+ do_something_else_with(p->b);
+
+If register pressure is high, the compiler might optimize "p" out
+of existence, transforming the code to something like this:
+
+ do_something_with(q->rcu_protected_pointer->a);
+ do_something_else_with(q->rcu_protected_pointer->b);
+
+This could fatally disappoint your code if q->rcu_protected_pointer
+changed in the meantime. Nor is this a theoretical problem: Exactly
+this sort of bug cost Paul E. McKenney (and several of his innocent
+colleagues) a three-day weekend back in the early 1990s.
+
+Load tearing could of course result in dereferencing a mashup of a pair
+of pointers, which also might fatally disappoint your code.
+
+These problems could have been avoided simply by making the code instead
+read as follows:
+
+ p = rcu_dereference(q->rcu_protected_pointer);
+ do_something_with(p->a);
+ do_something_else_with(p->b);
+
+Unfortunately, these sorts of bugs can be extremely hard to spot during
+review. This is where the sparse tool comes into play, along with the
+"__rcu" marker. If you mark a pointer declaration, whether in a structure
+or as a formal parameter, with "__rcu", which tells sparse to complain if
+this pointer is accessed directly. It will also cause sparse to complain
+if a pointer not marked with "__rcu" is accessed using rcu_dereference()
+and friends. For example, ->rcu_protected_pointer might be declared as
+follows:
+
+ struct foo __rcu *rcu_protected_pointer;
+
+Use of "__rcu" is opt-in. If you choose not to use it, then you should
+ignore the sparse warnings.
diff --git a/Documentation/RCU/rcubarrier.txt b/Documentation/RCU/rcubarrier.txt
index 5d7759071a3e..a2782df69732 100644
--- a/Documentation/RCU/rcubarrier.txt
+++ b/Documentation/RCU/rcubarrier.txt
@@ -83,16 +83,15 @@ Pseudo-code using rcu_barrier() is as follows:
2. Execute rcu_barrier().
3. Allow the module to be unloaded.
-There are also rcu_barrier_bh(), rcu_barrier_sched(), and srcu_barrier()
-functions for the other flavors of RCU, and you of course must match
-the flavor of rcu_barrier() with that of call_rcu(). If your module
-uses multiple flavors of call_rcu(), then it must also use multiple
+There is also an srcu_barrier() function for SRCU, and you of course
+must match the flavor of rcu_barrier() with that of call_rcu(). If your
+module uses multiple flavors of call_rcu(), then it must also use multiple
flavors of rcu_barrier() when unloading that module. For example, if
-it uses call_rcu_bh(), call_srcu() on srcu_struct_1, and call_srcu() on
+it uses call_rcu(), call_srcu() on srcu_struct_1, and call_srcu() on
srcu_struct_2(), then the following three lines of code will be required
when unloading:
- 1 rcu_barrier_bh();
+ 1 rcu_barrier();
2 srcu_barrier(&srcu_struct_1);
3 srcu_barrier(&srcu_struct_2);
@@ -185,12 +184,12 @@ module invokes call_rcu() from timers, you will need to first cancel all
the timers, and only then invoke rcu_barrier() to wait for any remaining
RCU callbacks to complete.
-Of course, if you module uses call_rcu_bh(), you will need to invoke
-rcu_barrier_bh() before unloading. Similarly, if your module uses
-call_rcu_sched(), you will need to invoke rcu_barrier_sched() before
-unloading. If your module uses call_rcu(), call_rcu_bh(), -and-
-call_rcu_sched(), then you will need to invoke each of rcu_barrier(),
-rcu_barrier_bh(), and rcu_barrier_sched().
+Of course, if you module uses call_rcu(), you will need to invoke
+rcu_barrier() before unloading. Similarly, if your module uses
+call_srcu(), you will need to invoke srcu_barrier() before unloading,
+and on the same srcu_struct structure. If your module uses call_rcu()
+-and- call_srcu(), then you will need to invoke rcu_barrier() -and-
+srcu_barrier().
Implementing rcu_barrier()
@@ -223,8 +222,8 @@ shown below. Note that the final "1" in on_each_cpu()'s argument list
ensures that all the calls to rcu_barrier_func() will have completed
before on_each_cpu() returns. Line 9 then waits for the completion.
-This code was rewritten in 2008 to support rcu_barrier_bh() and
-rcu_barrier_sched() in addition to the original rcu_barrier().
+This code was rewritten in 2008 and several times thereafter, but this
+still gives the general idea.
The rcu_barrier_func() runs on each CPU, where it invokes call_rcu()
to post an RCU callback, as follows:
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index 1ace20815bb1..981651a8b65d 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -310,7 +310,7 @@ reader, updater, and reclaimer.
rcu_assign_pointer()
- +--------+
+ +--------+
+---------------------->| reader |---------+
| +--------+ |
| | |
@@ -318,12 +318,12 @@ reader, updater, and reclaimer.
| | | rcu_read_lock()
| | | rcu_read_unlock()
| rcu_dereference() | |
- +---------+ | |
- | updater |<---------------------+ |
- +---------+ V
+ +---------+ | |
+ | updater |<----------------+ |
+ +---------+ V
| +-----------+
+----------------------------------->| reclaimer |
- +-----------+
+ +-----------+
Defer:
synchronize_rcu() & call_rcu()
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 2b8ee90bb644..b7e23e9d1770 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2544,6 +2544,38 @@
in the "bleeding edge" mini2440 support kernel at
http://repo.or.cz/w/linux-2.6/mini2440.git
+ mitigations=
+ [X86,PPC,S390] Control optional mitigations for CPU
+ vulnerabilities. This is a set of curated,
+ arch-independent options, each of which is an
+ aggregation of existing arch-specific options.
+
+ off
+ Disable all optional CPU mitigations. This
+ improves system performance, but it may also
+ expose users to several CPU vulnerabilities.
+ Equivalent to: nopti [X86,PPC]
+ nospectre_v1 [PPC]
+ nobp=0 [S390]
+ nospectre_v2 [X86,PPC,S390]
+ spectre_v2_user=off [X86]
+ spec_store_bypass_disable=off [X86,PPC]
+ l1tf=off [X86]
+
+ auto (default)
+ Mitigate all CPU vulnerabilities, but leave SMT
+ enabled, even if it's vulnerable. This is for
+ users who don't want to be surprised by SMT
+ getting disabled across kernel upgrades, or who
+ have other ways of avoiding SMT-based attacks.
+ Equivalent to: (default behavior)
+
+ auto,nosmt
+ Mitigate all CPU vulnerabilities, disabling SMT
+ if needed. This is for users who always want to
+ be fully mitigated, even if it means losing SMT.
+ Equivalent to: l1tf=flush,nosmt [X86]
+
mminit_loglevel=
[KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this
parameter allows control of the logging verbosity for
@@ -3623,7 +3655,9 @@
see CONFIG_RAS_CEC help text.
rcu_nocbs= [KNL]
- The argument is a cpu list, as described above.
+ The argument is a cpu list, as described above,
+ except that the string "all" can be used to
+ specify every CPU on the system.
In kernels built with CONFIG_RCU_NOCB_CPU=y, set
the specified list of CPUs to be no-callback CPUs.
diff --git a/Documentation/atomic_t.txt b/Documentation/atomic_t.txt
index 913396ac5824..dca3fb0554db 100644
--- a/Documentation/atomic_t.txt
+++ b/Documentation/atomic_t.txt
@@ -56,6 +56,23 @@ Barriers:
smp_mb__{before,after}_atomic()
+TYPES (signed vs unsigned)
+-----
+
+While atomic_t, atomic_long_t and atomic64_t use int, long and s64
+respectively (for hysterical raisins), the kernel uses -fno-strict-overflow
+(which implies -fwrapv) and defines signed overflow to behave like
+2s-complement.
+
+Therefore, an explicitly unsigned variant of the atomic ops is strictly
+unnecessary and we can simply cast, there is no UB.
+
+There was a bug in UBSAN prior to GCC-8 that would generate UB warnings for
+signed types.
+
+With this we also conform to the C/C++ _Atomic behaviour and things like
+P1236R1.
+
SEMANTICS
---------
diff --git a/Documentation/core-api/cachetlb.rst b/Documentation/core-api/cachetlb.rst
index 6eb9d3f090cd..93cb65d52720 100644
--- a/Documentation/core-api/cachetlb.rst
+++ b/Documentation/core-api/cachetlb.rst
@@ -101,16 +101,6 @@ changes occur:
translations for software managed TLB configurations.
The sparc64 port currently does this.
-6) ``void tlb_migrate_finish(struct mm_struct *mm)``
-
- This interface is called at the end of an explicit
- process migration. This interface provides a hook
- to allow a platform to update TLB or context-specific
- information for the address space.
-
- The ia64 sn2 platform is one example of a platform
- that uses this interface.
-
Next, we have the cache flushing interfaces. In general, when Linux
is changing an existing virtual-->physical mapping to a new value,
the sequence will be in one of the following forms::
diff --git a/Documentation/devicetree/bindings/net/davinci_emac.txt b/Documentation/devicetree/bindings/net/davinci_emac.txt
index 24c5cdaba8d2..ca83dcc84fb8 100644
--- a/Documentation/devicetree/bindings/net/davinci_emac.txt
+++ b/Documentation/devicetree/bindings/net/davinci_emac.txt
@@ -20,6 +20,8 @@ Required properties:
Optional properties:
- phy-handle: See ethernet.txt file in the same directory.
If absent, davinci_emac driver defaults to 100/FULL.
+- nvmem-cells: phandle, reference to an nvmem node for the MAC address
+- nvmem-cell-names: string, should be "mac-address" if nvmem is to be used
- ti,davinci-rmii-en: 1 byte, 1 means use RMII
- ti,davinci-no-bd-ram: boolean, does EMAC have BD RAM?
diff --git a/Documentation/devicetree/bindings/net/ethernet.txt b/Documentation/devicetree/bindings/net/ethernet.txt
index cfc376bc977a..a68621580584 100644
--- a/Documentation/devicetree/bindings/net/ethernet.txt
+++ b/Documentation/devicetree/bindings/net/ethernet.txt
@@ -10,15 +10,14 @@ Documentation/devicetree/bindings/phy/phy-bindings.txt.
the boot program; should be used in cases where the MAC address assigned to
the device by the boot program is different from the "local-mac-address"
property;
-- nvmem-cells: phandle, reference to an nvmem node for the MAC address;
-- nvmem-cell-names: string, should be "mac-address" if nvmem is to be used;
- max-speed: number, specifies maximum speed in Mbit/s supported by the device;
- max-frame-size: number, maximum transfer unit (IEEE defined MTU), rather than
the maximum frame size (there's contradiction in the Devicetree
Specification).
- phy-mode: string, operation mode of the PHY interface. This is now a de-facto
standard property; supported values are:
- * "internal"
+ * "internal" (Internal means there is not a standard bus between the MAC and
+ the PHY, something proprietary is being used to embed the PHY in the MAC.)
* "mii"
* "gmii"
* "sgmii"
diff --git a/Documentation/devicetree/bindings/net/macb.txt b/Documentation/devicetree/bindings/net/macb.txt
index 174f292d8a3e..8b80515729d7 100644
--- a/Documentation/devicetree/bindings/net/macb.txt
+++ b/Documentation/devicetree/bindings/net/macb.txt
@@ -26,6 +26,10 @@ Required properties:
Optional elements: 'tsu_clk'
- clocks: Phandles to input clocks.
+Optional properties:
+- nvmem-cells: phandle, reference to an nvmem node for the MAC address
+- nvmem-cell-names: string, should be "mac-address" if nvmem is to be used
+
Optional properties for PHY child node:
- reset-gpios : Should specify the gpio for phy reset
- magic-packet : If present, indicates that the hardware supports waking
diff --git a/Documentation/driver-api/usb/power-management.rst b/Documentation/driver-api/usb/power-management.rst
index 79beb807996b..4a74cf6f2797 100644
--- a/Documentation/driver-api/usb/power-management.rst
+++ b/Documentation/driver-api/usb/power-management.rst
@@ -370,11 +370,15 @@ autosuspend the interface's device. When the usage counter is = 0
then the interface is considered to be idle, and the kernel may
autosuspend the device.
-Drivers need not be concerned about balancing changes to the usage
-counter; the USB core will undo any remaining "get"s when a driver
-is unbound from its interface. As a corollary, drivers must not call
-any of the ``usb_autopm_*`` functions after their ``disconnect``
-routine has returned.
+Drivers must be careful to balance their overall changes to the usage
+counter. Unbalanced "get"s will remain in effect when a driver is
+unbound from its interface, preventing the device from going into
+runtime suspend should the interface be bound to a driver again. On
+the other hand, drivers are allowed to achieve this balance by calling
+the ``usb_autopm_*`` functions even after their ``disconnect`` routine
+has returned -- say from within a work-queue routine -- provided they
+retain an active reference to the interface (via ``usb_get_intf`` and
+``usb_put_intf``).
Drivers using the async routines are responsible for their own
synchronization and mutual exclusion.
diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt
index 10f4499e677c..ee60e519438a 100644
--- a/Documentation/kprobes.txt
+++ b/Documentation/kprobes.txt
@@ -243,10 +243,10 @@ Optimization
^^^^^^^^^^^^
The Kprobe-optimizer doesn't insert the jump instruction immediately;
-rather, it calls synchronize_sched() for safety first, because it's
+rather, it calls synchronize_rcu() for safety first, because it's
possible for a CPU to be interrupted in the middle of executing the
-optimized region [3]_. As you know, synchronize_sched() can ensure
-that all interruptions that were active when synchronize_sched()
+optimized region [3]_. As you know, synchronize_rcu() can ensure
+that all interruptions that were active when synchronize_rcu()
was called are done, but only if CONFIG_PREEMPT=n. So, this version
of kprobe optimization supports only kernels with CONFIG_PREEMPT=n [4]_.
diff --git a/Documentation/media/uapi/rc/rc-tables.rst b/Documentation/media/uapi/rc/rc-tables.rst
index f460031d8531..177ac44fa0fa 100644
--- a/Documentation/media/uapi/rc/rc-tables.rst
+++ b/Documentation/media/uapi/rc/rc-tables.rst
@@ -623,7 +623,7 @@ the remote via /dev/input/event devices.
- .. row 78
- - ``KEY_SCREEN``
+ - ``KEY_ASPECT_RATIO``
- Select screen aspect ratio
@@ -631,7 +631,7 @@ the remote via /dev/input/event devices.
- .. row 79
- - ``KEY_ZOOM``
+ - ``KEY_FULL_SCREEN``
- Put device into zoom/full screen mode
diff --git a/Documentation/networking/decnet.txt b/Documentation/networking/decnet.txt
index e12a4900cf72..d192f8b9948b 100644
--- a/Documentation/networking/decnet.txt
+++ b/Documentation/networking/decnet.txt
@@ -22,8 +22,6 @@ you'll need the following options as well...
CONFIG_DECNET_ROUTER (to be able to add/delete routes)
CONFIG_NETFILTER (will be required for the DECnet routing daemon)
- CONFIG_DECNET_ROUTE_FWMARK is optional
-
Don't turn on SIOCGIFCONF support for DECnet unless you are really sure
that you need it, in general you won't and it can cause ifconfig to
malfunction.
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index acdfb5d2bcaa..c4ac35234f05 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -422,6 +422,7 @@ tcp_min_rtt_wlen - INTEGER
minimum RTT when it is moved to a longer path (e.g., due to traffic
engineering). A longer window makes the filter more resistant to RTT
inflations such as transient congestion. The unit is seconds.
+ Possible values: 0 - 86400 (1 day)
Default: 300
tcp_moderate_rcvbuf - BOOLEAN
@@ -1336,6 +1337,7 @@ tag - INTEGER
Default value is 0.
xfrm4_gc_thresh - INTEGER
+ (Obsolete since linux-4.14)
The threshold at which we will start garbage collecting for IPv4
destination cache entries. At twice this value the system will
refuse new allocations.
@@ -1919,6 +1921,7 @@ echo_ignore_all - BOOLEAN
Default: 0
xfrm6_gc_thresh - INTEGER
+ (Obsolete since linux-4.14)
The threshold at which we will start garbage collecting for IPv6
destination cache entries. At twice this value the system will
refuse new allocations.
diff --git a/Documentation/networking/netdev-FAQ.rst b/Documentation/networking/netdev-FAQ.rst
index 8c7a713cf657..642fa963be3c 100644
--- a/Documentation/networking/netdev-FAQ.rst
+++ b/Documentation/networking/netdev-FAQ.rst
@@ -132,7 +132,7 @@ version that should be applied. If there is any doubt, the maintainer
will reply and ask what should be done.
Q: I made changes to only a few patches in a patch series should I resend only those changed?
---------------------------------------------------------------------------------------------
+---------------------------------------------------------------------------------------------
A: No, please resend the entire patch series and make sure you do number your
patches such that it is clear this is the latest and greatest set of patches
that can be applied.
diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt
index 2df5894353d6..cd7303d7fa25 100644
--- a/Documentation/networking/rxrpc.txt
+++ b/Documentation/networking/rxrpc.txt
@@ -1009,16 +1009,18 @@ The kernel interface functions are as follows:
(*) Check call still alive.
- u32 rxrpc_kernel_check_life(struct socket *sock,
- struct rxrpc_call *call);
+ bool rxrpc_kernel_check_life(struct socket *sock,
+ struct rxrpc_call *call,
+ u32 *_life);
void rxrpc_kernel_probe_life(struct socket *sock,
struct rxrpc_call *call);
- The first function returns a number that is updated when ACKs are received
- from the peer (notably including PING RESPONSE ACKs which we can elicit by
- sending PING ACKs to see if the call still exists on the server). The
- caller should compare the numbers of two calls to see if the call is still
- alive after waiting for a suitable interval.
+ The first function passes back in *_life a number that is updated when
+ ACKs are received from the peer (notably including PING RESPONSE ACKs
+ which we can elicit by sending PING ACKs to see if the call still exists
+ on the server). The caller should compare the numbers of two calls to see
+ if the call is still alive after waiting for a suitable interval. It also
+ returns true as long as the call hasn't yet reached the completed state.
This allows the caller to work out if the server is still contactable and
if the call is still alive on the server while waiting for the server to
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 6af24cdb25cc..3f13d8599337 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -866,14 +866,14 @@ The intent is that compaction has less work to do in the future and to
increase the success rate of future high-order allocations such as SLUB
allocations, THP and hugetlbfs pages.
-To make it sensible with respect to the watermark_scale_factor parameter,
-the unit is in fractions of 10,000. The default value of 15,000 means
-that up to 150% of the high watermark will be reclaimed in the event of
-a pageblock being mixed due to fragmentation. The level of reclaim is
-determined by the number of fragmentation events that occurred in the
-recent past. If this value is smaller than a pageblock then a pageblocks
-worth of pages will be reclaimed (e.g. 2MB on 64-bit x86). A boost factor
-of 0 will disable the feature.
+To make it sensible with respect to the watermark_scale_factor
+parameter, the unit is in fractions of 10,000. The default value of
+15,000 on !DISCONTIGMEM configurations means that up to 150% of the high
+watermark will be reclaimed in the event of a pageblock being mixed due
+to fragmentation. The level of reclaim is determined by the number of
+fragmentation events that occurred in the recent past. If this value is
+smaller than a pageblock then a pageblocks worth of pages will be reclaimed
+(e.g. 2MB on 64-bit x86). A boost factor of 0 will disable the feature.
=============================================================
diff --git a/Documentation/translations/ko_KR/memory-barriers.txt b/Documentation/translations/ko_KR/memory-barriers.txt
index 7f01fb1c1084..db0b9d8619f1 100644
--- a/Documentation/translations/ko_KR/memory-barriers.txt
+++ b/Documentation/translations/ko_KR/memory-barriers.txt
@@ -493,10 +493,8 @@ CPU ì—게 기대할 수 있는 ìµœì†Œí•œì˜ ë³´ìž¥ì‚¬í•­ 몇가지가 있습니
ì´ íƒ€ìž…ì˜ ì˜¤í¼ë ˆì´ì…˜ì€ ë‹¨ë°©í–¥ì˜ íˆ¬ê³¼ì„± 배리어처럼 ë™ìž‘합니다. ACQUIRE
오í¼ë ˆì´ì…˜ ë’¤ì˜ ëª¨ë“  메모리 오í¼ë ˆì´ì…˜ë“¤ì´ ACQUIRE 오í¼ë ˆì´ì…˜ 후ì—
ì¼ì–´ë‚œ 것으로 ì‹œìŠ¤í…œì˜ ë‚˜ë¨¸ì§€ ì»´í¬ë„ŒíŠ¸ë“¤ì— ë³´ì´ê²Œ ë  ê²ƒì´ ë³´ìž¥ë©ë‹ˆë‹¤.
- LOCK 오í¼ë ˆì´ì…˜ê³¼ smp_load_acquire(), smp_cond_acquire() 오í¼ë ˆì´ì…˜ë„
- ACQUIRE 오í¼ë ˆì´ì…˜ì— í¬í•¨ë©ë‹ˆë‹¤. smp_cond_acquire() 오í¼ë ˆì´ì…˜ì€ 컨트롤
- ì˜ì¡´ì„±ê³¼ smp_rmb() 를 사용해서 ACQUIRE ì˜ ì˜ë¯¸ì  요구사항(semantic)ì„
- 충족시킵니다.
+ LOCK 오í¼ë ˆì´ì…˜ê³¼ smp_load_acquire(), smp_cond_load_acquire() 오í¼ë ˆì´ì…˜ë„
+ ACQUIRE 오í¼ë ˆì´ì…˜ì— í¬í•¨ë©ë‹ˆë‹¤.
ACQUIRE 오í¼ë ˆì´ì…˜ ì•žì˜ ë©”ëª¨ë¦¬ 오í¼ë ˆì´ì…˜ë“¤ì€ ACQUIRE 오í¼ë ˆì´ì…˜ 완료 후ì—
ìˆ˜í–‰ëœ ê²ƒì²˜ëŸ¼ ë³´ì¼ ìˆ˜ 있습니다.
@@ -2146,33 +2144,40 @@ set_current_state() 는 다ìŒì˜ 것들로 ê°ì‹¸ì§ˆ ìˆ˜ë„ ìžˆìŠµë‹ˆë‹¤:
event_indicated = 1;
wake_up_process(event_daemon);
-wake_up() ë¥˜ì— ì˜í•´ 쓰기 메모리 배리어가 ë‚´í¬ë©ë‹ˆë‹¤. 만약 ê·¸ê²ƒë“¤ì´ ë­”ê°€ë¥¼
-깨운다면요. ì´ ë°°ë¦¬ì–´ëŠ” íƒœìŠ¤í¬ ìƒíƒœê°€ 지워지기 ì „ì— ìˆ˜í–‰ë˜ë¯€ë¡œ, ì´ë²¤íŠ¸ë¥¼
-알리기 위한 STORE 와 íƒœìŠ¤í¬ ìƒíƒœë¥¼ TASK_RUNNING 으로 설정하는 STORE 사ì´ì—
-위치하게 ë©ë‹ˆë‹¤.
+wake_up() ì´ ë¬´ì–¸ê°€ë¥¼ 깨우게 ë˜ë©´, ì´ í•¨ìˆ˜ëŠ” 범용 메모리 배리어를 수행합니다.
+ì´ í•¨ìˆ˜ê°€ ì•„ë¬´ê²ƒë„ ê¹¨ìš°ì§€ 않는다면 메모리 배리어는 ìˆ˜í–‰ë  ìˆ˜ë„, 수행ë˜ì§€ ì•Šì„
+ìˆ˜ë„ ìžˆìŠµë‹ˆë‹¤; ì´ ê²½ìš°ì— ë©”ëª¨ë¦¬ 배리어를 수행할 ê±°ë¼ ì˜¤í•´í•´ì„  안ë©ë‹ˆë‹¤. ì´
+배리어는 íƒœìŠ¤í¬ ìƒíƒœê°€ ì ‘ê·¼ë˜ê¸° ì „ì— ìˆ˜í–‰ë˜ëŠ”ë°, ìžì„¸ížˆ ë§í•˜ë©´ ì´ ì´ë²¤íŠ¸ë¥¼
+알리기 위한 STORE 와 TASK_RUNNING 으로 ìƒíƒœë¥¼ 쓰는 STORE 사ì´ì— 수행ë©ë‹ˆë‹¤:
- CPU 1 CPU 2
+ CPU 1 (Sleeper) CPU 2 (Waker)
=============================== ===============================
set_current_state(); STORE event_indicated
smp_store_mb(); wake_up();
- STORE current->state <쓰기 배리어>
- <범용 배리어> STORE current->state
- LOAD event_indicated
+ STORE current->state ...
+ <범용 배리어> <범용 배리어>
+ LOAD event_indicated if ((LOAD task->state) & TASK_NORMAL)
+ STORE task->state
-í•œë²ˆë” ë§í•©ë‹ˆë‹¤ë§Œ, ì´ ì“°ê¸° 메모리 배리어는 ì´ ì½”ë“œê°€ ì •ë§ë¡œ 뭔가를 깨울 ë•Œì—만
-실행ë©ë‹ˆë‹¤. ì´ê±¸ 설명하기 위해, X 와 Y 는 ëª¨ë‘ 0 으로 초기화 ë˜ì–´ 있다는 가정
-í•˜ì— ì•„ëž˜ì˜ ì´ë²¤íŠ¸ 시퀀스를 ìƒê°í•´ 봅시다:
+여기서 "task" 는 깨어나지는 쓰레드ì´ê³  CPU 1 ì˜ "current" 와 같습니다.
+
+반복하지만, wake_up() ì´ ë¬´ì–¸ê°€ë¥¼ ì •ë§ ê¹¨ìš´ë‹¤ë©´ 범용 메모리 배리어가 수행ë 
+ê²ƒì´ ë³´ìž¥ë˜ì§€ë§Œ, 그렇지 않다면 그런 ë³´ìž¥ì´ ì—†ìŠµë‹ˆë‹¤. ì´ê±¸ ì´í•´í•˜ê¸° 위해, X 와
+Y 는 ëª¨ë‘ 0 으로 초기화 ë˜ì–´ 있다는 가정 í•˜ì— ì•„ëž˜ì˜ ì´ë²¤íŠ¸ 시퀀스를 ìƒê°í•´
+봅시다:
CPU 1 CPU 2
=============================== ===============================
- X = 1; STORE event_indicated
+ X = 1; Y = 1;
smp_mb(); wake_up();
- Y = 1; wait_event(wq, Y == 1);
- wake_up(); load from Y sees 1, no memory barrier
- load from X might see 0
+ LOAD Y LOAD X
+
+ì •ë§ë¡œ 깨우기가 행해졌다면, ë‘ ë¡œë“œ 중 (최소한) 하나는 1 ì„ ë³´ê²Œ ë©ë‹ˆë‹¤.
+반면ì—, 실제 깨우기가 행해지지 않았다면, ë‘ ë¡œë“œ ëª¨ë‘ 0ì„ ë³¼ ìˆ˜ë„ ìžˆìŠµë‹ˆë‹¤.
-위 예제ì—ì„œì˜ ê²½ìš°ì™€ 달리 깨우기가 ì •ë§ë¡œ 행해졌다면, CPU 2 ì˜ X 로드는 1 ì„
-본다고 ë³´ìž¥ë  ìˆ˜ ìžˆì„ ê²ë‹ˆë‹¤.
+wake_up_process() 는 í•­ìƒ ë²”ìš© 메모리 배리어를 수행합니다. ì´ ë°°ë¦¬ì–´ ì—­ì‹œ
+íƒœìŠ¤í¬ ìƒíƒœê°€ ì ‘ê·¼ë˜ê¸° ì „ì— ìˆ˜í–‰ë©ë‹ˆë‹¤. 특히, ì•žì˜ ì˜ˆì œ 코드ì—ì„œ wake_up() ì´
+wake_up_process() ë¡œ 대체ëœë‹¤ë©´ ë‘ ë¡œë“œ 중 하나는 1ì„ ë³¼ ê²ƒì´ ë³´ìž¥ë©ë‹ˆë‹¤.
사용 가능한 깨우기류 함수들로 다ìŒê³¼ ê°™ì€ ê²ƒë“¤ì´ ìžˆìŠµë‹ˆë‹¤:
@@ -2192,6 +2197,8 @@ wake_up() ë¥˜ì— ì˜í•´ 쓰기 메모리 배리어가 ë‚´í¬ë©ë‹ˆë‹¤. 만약 ê
wake_up_poll();
wake_up_process();
+메모리 순서규칙 ê´€ì ì—ì„œ, ì´ í•¨ìˆ˜ë“¤ì€ ëª¨ë‘ wake_up() ê³¼ 같거나 보다 ê°•í•œ 순서
+ë³´ìž¥ì„ ì œê³µí•©ë‹ˆë‹¤.
[!] 잠재우는 코드와 깨우는 ì½”ë“œì— ë‚´í¬ë˜ëŠ” 메모리 ë°°ë¦¬ì–´ë“¤ì€ ê¹¨ìš°ê¸° ì „ì—
ì´ë£¨ì–´ì§„ 스토어를 잠재우는 코드가 set_current_state() 를 호출한 í›„ì— í–‰í•˜ëŠ”
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 67068c47c591..64b38dfcc243 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -321,7 +321,7 @@ cpu's hardware control block.
4.8 KVM_GET_DIRTY_LOG (vm ioctl)
Capability: basic
-Architectures: x86
+Architectures: all
Type: vm ioctl
Parameters: struct kvm_dirty_log (in/out)
Returns: 0 on success, -1 on error
@@ -3810,7 +3810,7 @@ to I/O ports.
4.117 KVM_CLEAR_DIRTY_LOG (vm ioctl)
Capability: KVM_CAP_MANUAL_DIRTY_LOG_PROTECT
-Architectures: x86
+Architectures: x86, arm, arm64, mips
Type: vm ioctl
Parameters: struct kvm_dirty_log (in)
Returns: 0 on success, -1 on error
@@ -3830,8 +3830,9 @@ The ioctl clears the dirty status of pages in a memory slot, according to
the bitmap that is passed in struct kvm_clear_dirty_log's dirty_bitmap
field. Bit 0 of the bitmap corresponds to page "first_page" in the
memory slot, and num_pages is the size in bits of the input bitmap.
-Both first_page and num_pages must be a multiple of 64. For each bit
-that is set in the input bitmap, the corresponding page is marked "clean"
+first_page must be a multiple of 64; num_pages must also be a multiple of
+64 unless first_page + num_pages is the size of the memory slot. For each
+bit that is set in the input bitmap, the corresponding page is marked "clean"
in KVM's dirty bitmap, and dirty tracking is re-enabled for that page
(for example via write-protection, or by clearing the dirty bit in
a page table entry).
@@ -4799,7 +4800,7 @@ and injected exceptions.
7.18 KVM_CAP_MANUAL_DIRTY_LOG_PROTECT
-Architectures: all
+Architectures: x86, arm, arm64, mips
Parameters: args[0] whether feature should be enabled or not
With this capability enabled, KVM_GET_DIRTY_LOG will not automatically
diff --git a/MAINTAINERS b/MAINTAINERS
index 3671fdea5010..52cd9341e03c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3121,6 +3121,7 @@ F: drivers/cpufreq/bmips-cpufreq.c
BROADCOM BMIPS MIPS ARCHITECTURE
M: Kevin Cernekee <cernekee@gmail.com>
M: Florian Fainelli <f.fainelli@gmail.com>
+L: bcm-kernel-feedback-list@broadcom.com
L: linux-mips@vger.kernel.org
T: git git://github.com/broadcom/stblinux.git
S: Maintained
@@ -6461,7 +6462,7 @@ S: Maintained
F: drivers/media/radio/radio-gemtek*
GENERIC GPIO I2C DRIVER
-M: Haavard Skinnemoen <hskinnemoen@gmail.com>
+M: Wolfram Sang <wsa+renesas@sang-engineering.com>
S: Supported
F: drivers/i2c/busses/i2c-gpio.c
F: include/linux/platform_data/i2c-gpio.h
@@ -7333,7 +7334,6 @@ F: Documentation/devicetree/bindings/i3c/
F: Documentation/driver-api/i3c
F: drivers/i3c/
F: include/linux/i3c/
-F: include/dt-bindings/i3c/
I3C DRIVER FOR SYNOPSYS DESIGNWARE
M: Vitor Soares <vitor.soares@synopsys.com>
@@ -8708,6 +8708,7 @@ F: scripts/leaking_addresses.pl
LED SUBSYSTEM
M: Jacek Anaszewski <jacek.anaszewski@gmail.com>
M: Pavel Machek <pavel@ucw.cz>
+R: Dan Murphy <dmurphy@ti.com>
L: linux-leds@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/j.anaszewski/linux-leds.git
S: Maintained
@@ -8993,7 +8994,7 @@ R: Daniel Lustig <dlustig@nvidia.com>
L: linux-kernel@vger.kernel.org
L: linux-arch@vger.kernel.org
S: Supported
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git dev
F: tools/memory-model/
F: Documentation/atomic_bitops.txt
F: Documentation/atomic_t.txt
@@ -10145,7 +10146,7 @@ F: drivers/spi/spi-at91-usart.c
F: Documentation/devicetree/bindings/mfd/atmel-usart.txt
MICROCHIP KSZ SERIES ETHERNET SWITCH DRIVER
-M: Woojung Huh <Woojung.Huh@microchip.com>
+M: Woojung Huh <woojung.huh@microchip.com>
M: Microchip Linux Driver Support <UNGLinuxDriver@microchip.com>
L: netdev@vger.kernel.org
S: Maintained
@@ -12175,6 +12176,7 @@ F: arch/*/kernel/*/*/perf_event*.c
F: arch/*/include/asm/perf_event.h
F: arch/*/kernel/perf_callchain.c
F: arch/*/events/*
+F: arch/*/events/*/*
F: tools/perf/
PERSONALITY HANDLING
@@ -13041,9 +13043,9 @@ M: Josh Triplett <josh@joshtriplett.org>
R: Steven Rostedt <rostedt@goodmis.org>
R: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
R: Lai Jiangshan <jiangshanlai@gmail.com>
-L: linux-kernel@vger.kernel.org
+L: rcu@vger.kernel.org
S: Supported
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git dev
F: tools/testing/selftests/rcutorture
RDC R-321X SoC
@@ -13089,10 +13091,10 @@ R: Steven Rostedt <rostedt@goodmis.org>
R: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
R: Lai Jiangshan <jiangshanlai@gmail.com>
R: Joel Fernandes <joel@joelfernandes.org>
-L: linux-kernel@vger.kernel.org
+L: rcu@vger.kernel.org
W: http://www.rdrop.com/users/paulmck/RCU/
S: Supported
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git dev
F: Documentation/RCU/
X: Documentation/RCU/torture.txt
F: include/linux/rcu*
@@ -14244,10 +14246,10 @@ M: "Paul E. McKenney" <paulmck@linux.ibm.com>
M: Josh Triplett <josh@joshtriplett.org>
R: Steven Rostedt <rostedt@goodmis.org>
R: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
-L: linux-kernel@vger.kernel.org
+L: rcu@vger.kernel.org
W: http://www.rdrop.com/users/paulmck/RCU/
S: Supported
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git dev
F: include/linux/srcu*.h
F: kernel/rcu/srcu*.c
@@ -15694,7 +15696,7 @@ M: "Paul E. McKenney" <paulmck@linux.ibm.com>
M: Josh Triplett <josh@joshtriplett.org>
L: linux-kernel@vger.kernel.org
S: Supported
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git dev
F: Documentation/RCU/torture.txt
F: kernel/torture.c
F: kernel/rcu/rcutorture.c
diff --git a/Makefile b/Makefile
index 15c8251d4d5e..26c92f892d24 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
VERSION = 5
PATCHLEVEL = 1
SUBLEVEL = 0
-EXTRAVERSION = -rc4
+EXTRAVERSION =
NAME = Shy Crocodile
# *DOCUMENTATION*
@@ -678,6 +678,7 @@ KBUILD_CFLAGS += $(call cc-disable-warning,frame-address,)
KBUILD_CFLAGS += $(call cc-disable-warning, format-truncation)
KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow)
KBUILD_CFLAGS += $(call cc-disable-warning, int-in-bool-context)
+KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
KBUILD_CFLAGS += -Os
@@ -719,7 +720,6 @@ ifdef CONFIG_CC_IS_CLANG
KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,)
KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier)
KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
-KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
# Quiet clang warning: comparison of unsigned expression < 0 is always false
KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare)
# CLANG uses a _MergedGlobals as optimization, but this breaks modpost, as the
diff --git a/arch/Kconfig b/arch/Kconfig
index 33687dddd86a..a826843470ed 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -383,7 +383,13 @@ config HAVE_ARCH_JUMP_LABEL_RELATIVE
config HAVE_RCU_TABLE_FREE
bool
-config HAVE_RCU_TABLE_INVALIDATE
+config HAVE_RCU_TABLE_NO_INVALIDATE
+ bool
+
+config HAVE_MMU_GATHER_PAGE_SIZE
+ bool
+
+config HAVE_MMU_GATHER_NO_GATHER
bool
config ARCH_HAVE_NMI_SAFE_CMPXCHG
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 584a6e114853..c7c976eb6407 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -36,6 +36,7 @@ config ALPHA
select ODD_RT_SIGACTION
select OLD_SIGSUSPEND
select CPU_NO_EFFICIENT_FFS if !ALPHA_EV67
+ select MMU_GATHER_NO_RANGE
help
The Alpha is a 64-bit general-purpose processor designed and
marketed by the Digital Equipment Corporation of blessed memory,
diff --git a/arch/alpha/include/asm/tlb.h b/arch/alpha/include/asm/tlb.h
index 8f5042b61875..4f79e331af5e 100644
--- a/arch/alpha/include/asm/tlb.h
+++ b/arch/alpha/include/asm/tlb.h
@@ -2,12 +2,6 @@
#ifndef _ALPHA_TLB_H
#define _ALPHA_TLB_H
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-#define __tlb_remove_tlb_entry(tlb, pte, addr) do { } while (0)
-
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
-
#include <asm-generic/tlb.h>
#define __pte_free_tlb(tlb, pte, address) pte_free((tlb)->mm, pte)
diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
index 63ed39cbd3bd..165f268beafc 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -463,3 +463,7 @@
532 common getppid sys_getppid
# all other architectures have common numbers for new syscall, alpha
# is the exception.
+534 common pidfd_send_signal sys_pidfd_send_signal
+535 common io_uring_setup sys_io_uring_setup
+536 common io_uring_enter sys_io_uring_enter
+537 common io_uring_register sys_io_uring_register
diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts
index 69bc1c9e8e50..7425bb0f2d1b 100644
--- a/arch/arc/boot/dts/hsdk.dts
+++ b/arch/arc/boot/dts/hsdk.dts
@@ -18,8 +18,8 @@
model = "snps,hsdk";
compatible = "snps,hsdk";
- #address-cells = <1>;
- #size-cells = <1>;
+ #address-cells = <2>;
+ #size-cells = <2>;
chosen {
bootargs = "earlycon=uart8250,mmio32,0xf0005000,115200n8 console=ttyS0,115200n8 debug print-fatal-signals=1";
@@ -105,7 +105,7 @@
#size-cells = <1>;
interrupt-parent = <&idu_intc>;
- ranges = <0x00000000 0xf0000000 0x10000000>;
+ ranges = <0x00000000 0x0 0xf0000000 0x10000000>;
cgu_rst: reset-controller@8a0 {
compatible = "snps,hsdk-reset";
@@ -269,9 +269,10 @@
};
memory@80000000 {
- #address-cells = <1>;
- #size-cells = <1>;
+ #address-cells = <2>;
+ #size-cells = <2>;
device_type = "memory";
- reg = <0x80000000 0x40000000>; /* 1 GiB */
+ reg = <0x0 0x80000000 0x0 0x40000000>; /* 1 GB lowmem */
+ /* 0x1 0x00000000 0x0 0x40000000>; 1 GB highmem */
};
};
diff --git a/arch/arc/include/asm/tlb.h b/arch/arc/include/asm/tlb.h
index a9db5f62aaf3..90cac97643a4 100644
--- a/arch/arc/include/asm/tlb.h
+++ b/arch/arc/include/asm/tlb.h
@@ -9,38 +9,6 @@
#ifndef _ASM_ARC_TLB_H
#define _ASM_ARC_TLB_H
-#define tlb_flush(tlb) \
-do { \
- if (tlb->fullmm) \
- flush_tlb_mm((tlb)->mm); \
-} while (0)
-
-/*
- * This pair is called at time of munmap/exit to flush cache and TLB entries
- * for mappings being torn down.
- * 1) cache-flush part -implemented via tlb_start_vma( ) for VIPT aliasing D$
- * 2) tlb-flush part - implemted via tlb_end_vma( ) flushes the TLB range
- *
- * Note, read http://lkml.org/lkml/2004/1/15/6
- */
-#ifndef CONFIG_ARC_CACHE_VIPT_ALIASING
-#define tlb_start_vma(tlb, vma)
-#else
-#define tlb_start_vma(tlb, vma) \
-do { \
- if (!tlb->fullmm) \
- flush_cache_range(vma, vma->vm_start, vma->vm_end); \
-} while(0)
-#endif
-
-#define tlb_end_vma(tlb, vma) \
-do { \
- if (!tlb->fullmm) \
- flush_tlb_range(vma, vma->vm_start, vma->vm_end); \
-} while (0)
-
-#define __tlb_remove_tlb_entry(tlb, ptep, address)
-
#include <linux/pagemap.h>
#include <asm-generic/tlb.h>
diff --git a/arch/arc/lib/memset-archs.S b/arch/arc/lib/memset-archs.S
index f230bb7092fd..b3373f5c88e0 100644
--- a/arch/arc/lib/memset-archs.S
+++ b/arch/arc/lib/memset-archs.S
@@ -30,10 +30,10 @@
#else
-.macro PREALLOC_INSTR
+.macro PREALLOC_INSTR reg, off
.endm
-.macro PREFETCHW_INSTR
+.macro PREFETCHW_INSTR reg, off
.endm
#endif
diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index 4135abec3fb0..63e6e6504699 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -113,10 +113,24 @@ static void read_decode_cache_bcr_arcv2(int cpu)
}
READ_BCR(ARC_REG_CLUSTER_BCR, cbcr);
- if (cbcr.c)
+ if (cbcr.c) {
ioc_exists = 1;
- else
+
+ /*
+ * As for today we don't support both IOC and ZONE_HIGHMEM enabled
+ * simultaneously. This happens because as of today IOC aperture covers
+ * only ZONE_NORMAL (low mem) and any dma transactions outside this
+ * region won't be HW coherent.
+ * If we want to use both IOC and ZONE_HIGHMEM we can use
+ * bounce_buffer to handle dma transactions to HIGHMEM.
+ * Also it is possible to modify dma_direct cache ops or increase IOC
+ * aperture size if we are planning to use HIGHMEM without PAE.
+ */
+ if (IS_ENABLED(CONFIG_HIGHMEM) || is_pae40_enabled())
+ ioc_enable = 0;
+ } else {
ioc_enable = 0;
+ }
/* HS 2.0 didn't have AUX_VOL */
if (cpuinfo_arc700[cpu].core.family > 0x51) {
@@ -1158,19 +1172,6 @@ noinline void __init arc_ioc_setup(void)
if (!ioc_enable)
return;
- /*
- * As for today we don't support both IOC and ZONE_HIGHMEM enabled
- * simultaneously. This happens because as of today IOC aperture covers
- * only ZONE_NORMAL (low mem) and any dma transactions outside this
- * region won't be HW coherent.
- * If we want to use both IOC and ZONE_HIGHMEM we can use
- * bounce_buffer to handle dma transactions to HIGHMEM.
- * Also it is possible to modify dma_direct cache ops or increase IOC
- * aperture size if we are planning to use HIGHMEM without PAE.
- */
- if (IS_ENABLED(CONFIG_HIGHMEM))
- panic("IOC and HIGHMEM can't be used simultaneously");
-
/* Flush + invalidate + disable L1 dcache */
__dc_disable();
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 850b4805e2d1..9aed25a6019b 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -73,7 +73,7 @@ config ARM
select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU
select HAVE_EXIT_THREAD
select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
- select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL
+ select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG
select HAVE_FUNCTION_TRACER if !XIP_KERNEL
select HAVE_GCC_PLUGINS
select HAVE_HW_BREAKPOINT if PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7)
diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
index 6d6e0330930b..e388af4594a6 100644
--- a/arch/arm/Kconfig.debug
+++ b/arch/arm/Kconfig.debug
@@ -47,8 +47,8 @@ config DEBUG_WX
choice
prompt "Choose kernel unwinder"
- default UNWINDER_ARM if AEABI && !FUNCTION_GRAPH_TRACER
- default UNWINDER_FRAME_POINTER if !AEABI || FUNCTION_GRAPH_TRACER
+ default UNWINDER_ARM if AEABI
+ default UNWINDER_FRAME_POINTER if !AEABI
help
This determines which method will be used for unwinding kernel stack
traces for panics, oopses, bugs, warnings, perf, /proc/<pid>/stack,
@@ -65,7 +65,7 @@ config UNWINDER_FRAME_POINTER
config UNWINDER_ARM
bool "ARM EABI stack unwinder"
- depends on AEABI
+ depends on AEABI && !FUNCTION_GRAPH_TRACER
select ARM_UNWIND
help
This option enables stack unwinding support in the kernel
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index 6c7ccb428c07..7135820f76d4 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -1438,7 +1438,21 @@ ENTRY(efi_stub_entry)
@ Preserve return value of efi_entry() in r4
mov r4, r0
- bl cache_clean_flush
+
+ @ our cache maintenance code relies on CP15 barrier instructions
+ @ but since we arrived here with the MMU and caches configured
+ @ by UEFI, we must check that the CP15BEN bit is set in SCTLR.
+ @ Note that this bit is RAO/WI on v6 and earlier, so the ISB in
+ @ the enable path will be executed on v7+ only.
+ mrc p15, 0, r1, c1, c0, 0 @ read SCTLR
+ tst r1, #(1 << 5) @ CP15BEN bit set?
+ bne 0f
+ orr r1, r1, #(1 << 5) @ CP15 barrier instructions
+ mcr p15, 0, r1, c1, c0, 0 @ write SCTLR
+ ARM( .inst 0xf57ff06f @ v7+ isb )
+ THUMB( isb )
+
+0: bl cache_clean_flush
bl cache_off
@ Set parameters for booting zImage according to boot protocol
diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index f854148c8d7c..bc6d04a09899 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -33,271 +33,42 @@
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
-#define MMU_GATHER_BUNDLE 8
-
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
static inline void __tlb_remove_table(void *_table)
{
free_page_and_swap_cache((struct page *)_table);
}
-struct mmu_table_batch {
- struct rcu_head rcu;
- unsigned int nr;
- void *tables[0];
-};
-
-#define MAX_TABLE_BATCH \
- ((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *))
-
-extern void tlb_table_flush(struct mmu_gather *tlb);
-extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
-
-#define tlb_remove_entry(tlb, entry) tlb_remove_table(tlb, entry)
-#else
-#define tlb_remove_entry(tlb, entry) tlb_remove_page(tlb, entry)
-#endif /* CONFIG_HAVE_RCU_TABLE_FREE */
-
-/*
- * TLB handling. This allows us to remove pages from the page
- * tables, and efficiently handle the TLB issues.
- */
-struct mmu_gather {
- struct mm_struct *mm;
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
- struct mmu_table_batch *batch;
- unsigned int need_flush;
-#endif
- unsigned int fullmm;
- struct vm_area_struct *vma;
- unsigned long start, end;
- unsigned long range_start;
- unsigned long range_end;
- unsigned int nr;
- unsigned int max;
- struct page **pages;
- struct page *local[MMU_GATHER_BUNDLE];
-};
-
-DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
-
-/*
- * This is unnecessarily complex. There's three ways the TLB shootdown
- * code is used:
- * 1. Unmapping a range of vmas. See zap_page_range(), unmap_region().
- * tlb->fullmm = 0, and tlb_start_vma/tlb_end_vma will be called.
- * tlb->vma will be non-NULL.
- * 2. Unmapping all vmas. See exit_mmap().
- * tlb->fullmm = 1, and tlb_start_vma/tlb_end_vma will be called.
- * tlb->vma will be non-NULL. Additionally, page tables will be freed.
- * 3. Unmapping argument pages. See shift_arg_pages().
- * tlb->fullmm = 0, but tlb_start_vma/tlb_end_vma will not be called.
- * tlb->vma will be NULL.
- */
-static inline void tlb_flush(struct mmu_gather *tlb)
-{
- if (tlb->fullmm || !tlb->vma)
- flush_tlb_mm(tlb->mm);
- else if (tlb->range_end > 0) {
- flush_tlb_range(tlb->vma, tlb->range_start, tlb->range_end);
- tlb->range_start = TASK_SIZE;
- tlb->range_end = 0;
- }
-}
-
-static inline void tlb_add_flush(struct mmu_gather *tlb, unsigned long addr)
-{
- if (!tlb->fullmm) {
- if (addr < tlb->range_start)
- tlb->range_start = addr;
- if (addr + PAGE_SIZE > tlb->range_end)
- tlb->range_end = addr + PAGE_SIZE;
- }
-}
-
-static inline void __tlb_alloc_page(struct mmu_gather *tlb)
-{
- unsigned long addr = __get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0);
-
- if (addr) {
- tlb->pages = (void *)addr;
- tlb->max = PAGE_SIZE / sizeof(struct page *);
- }
-}
-
-static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
-{
- tlb_flush(tlb);
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
- tlb_table_flush(tlb);
-#endif
-}
-
-static inline void tlb_flush_mmu_free(struct mmu_gather *tlb)
-{
- free_pages_and_swap_cache(tlb->pages, tlb->nr);
- tlb->nr = 0;
- if (tlb->pages == tlb->local)
- __tlb_alloc_page(tlb);
-}
-
-static inline void tlb_flush_mmu(struct mmu_gather *tlb)
-{
- tlb_flush_mmu_tlbonly(tlb);
- tlb_flush_mmu_free(tlb);
-}
-
-static inline void
-arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
- unsigned long start, unsigned long end)
-{
- tlb->mm = mm;
- tlb->fullmm = !(start | (end+1));
- tlb->start = start;
- tlb->end = end;
- tlb->vma = NULL;
- tlb->max = ARRAY_SIZE(tlb->local);
- tlb->pages = tlb->local;
- tlb->nr = 0;
- __tlb_alloc_page(tlb);
+#include <asm-generic/tlb.h>
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
- tlb->batch = NULL;
+#ifndef CONFIG_HAVE_RCU_TABLE_FREE
+#define tlb_remove_table(tlb, entry) tlb_remove_page(tlb, entry)
#endif
-}
-
-static inline void
-arch_tlb_finish_mmu(struct mmu_gather *tlb,
- unsigned long start, unsigned long end, bool force)
-{
- if (force) {
- tlb->range_start = start;
- tlb->range_end = end;
- }
-
- tlb_flush_mmu(tlb);
-
- /* keep the page table cache within bounds */
- check_pgt_cache();
-
- if (tlb->pages != tlb->local)
- free_pages((unsigned long)tlb->pages, 0);
-}
-
-/*
- * Memorize the range for the TLB flush.
- */
-static inline void
-tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, unsigned long addr)
-{
- tlb_add_flush(tlb, addr);
-}
-
-#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \
- tlb_remove_tlb_entry(tlb, ptep, address)
-/*
- * In the case of tlb vma handling, we can optimise these away in the
- * case where we're doing a full MM flush. When we're doing a munmap,
- * the vmas are adjusted to only cover the region to be torn down.
- */
-static inline void
-tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
-{
- if (!tlb->fullmm) {
- flush_cache_range(vma, vma->vm_start, vma->vm_end);
- tlb->vma = vma;
- tlb->range_start = TASK_SIZE;
- tlb->range_end = 0;
- }
-}
static inline void
-tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
-{
- if (!tlb->fullmm)
- tlb_flush(tlb);
-}
-
-static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
-{
- tlb->pages[tlb->nr++] = page;
- VM_WARN_ON(tlb->nr > tlb->max);
- if (tlb->nr == tlb->max)
- return true;
- return false;
-}
-
-static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
-{
- if (__tlb_remove_page(tlb, page))
- tlb_flush_mmu(tlb);
-}
-
-static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
- struct page *page, int page_size)
-{
- return __tlb_remove_page(tlb, page);
-}
-
-static inline void tlb_remove_page_size(struct mmu_gather *tlb,
- struct page *page, int page_size)
-{
- return tlb_remove_page(tlb, page);
-}
-
-static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
- unsigned long addr)
+__pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, unsigned long addr)
{
pgtable_page_dtor(pte);
-#ifdef CONFIG_ARM_LPAE
- tlb_add_flush(tlb, addr);
-#else
+#ifndef CONFIG_ARM_LPAE
/*
* With the classic ARM MMU, a pte page has two corresponding pmd
* entries, each covering 1MB.
*/
- addr &= PMD_MASK;
- tlb_add_flush(tlb, addr + SZ_1M - PAGE_SIZE);
- tlb_add_flush(tlb, addr + SZ_1M);
+ addr = (addr & PMD_MASK) + SZ_1M;
+ __tlb_adjust_range(tlb, addr - PAGE_SIZE, 2 * PAGE_SIZE);
#endif
- tlb_remove_entry(tlb, pte);
-}
-
-static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
- unsigned long addr)
-{
-#ifdef CONFIG_ARM_LPAE
- tlb_add_flush(tlb, addr);
- tlb_remove_entry(tlb, virt_to_page(pmdp));
-#endif
+ tlb_remove_table(tlb, pte);
}
static inline void
-tlb_remove_pmd_tlb_entry(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr)
-{
- tlb_add_flush(tlb, addr);
-}
-
-#define pte_free_tlb(tlb, ptep, addr) __pte_free_tlb(tlb, ptep, addr)
-#define pmd_free_tlb(tlb, pmdp, addr) __pmd_free_tlb(tlb, pmdp, addr)
-#define pud_free_tlb(tlb, pudp, addr) pud_free((tlb)->mm, pudp)
-
-#define tlb_migrate_finish(mm) do { } while (0)
-
-#define tlb_remove_check_page_size_change tlb_remove_check_page_size_change
-static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
- unsigned int page_size)
+__pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr)
{
-}
-
-static inline void tlb_flush_remove_tables(struct mm_struct *mm)
-{
-}
+#ifdef CONFIG_ARM_LPAE
+ struct page *page = virt_to_page(pmdp);
-static inline void tlb_flush_remove_tables_local(void *arg)
-{
+ tlb_remove_table(tlb, page);
+#endif
}
#endif /* CONFIG_MMU */
diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
index c08d2d890f7b..b38bbd011b35 100644
--- a/arch/arm/kernel/head-nommu.S
+++ b/arch/arm/kernel/head-nommu.S
@@ -133,9 +133,9 @@ __secondary_data:
*/
.text
__after_proc_init:
-#ifdef CONFIG_ARM_MPU
M_CLASS(movw r12, #:lower16:BASEADDR_V7M_SCB)
M_CLASS(movt r12, #:upper16:BASEADDR_V7M_SCB)
+#ifdef CONFIG_ARM_MPU
M_CLASS(ldr r3, [r12, 0x50])
AR_CLASS(mrc p15, 0, r3, c0, c1, 4) @ Read ID_MMFR0
and r3, r3, #(MMFR0_PMSA) @ PMSA field
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 76bb8de6bf6b..be5edfdde558 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -549,8 +549,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
int ret;
/*
- * Increment event counter and perform fixup for the pre-signal
- * frame.
+ * Perform fixup for the pre-signal frame.
*/
rseq_signal_deliver(ksig, regs);
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 9016f4081bb9..0393917eaa57 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -437,3 +437,7 @@
421 common rt_sigtimedwait_time64 sys_rt_sigtimedwait
422 common futex_time64 sys_futex
423 common sched_rr_get_interval_time64 sys_sched_rr_get_interval
+424 common pidfd_send_signal sys_pidfd_send_signal
+425 common io_uring_setup sys_io_uring_setup
+426 common io_uring_enter sys_io_uring_enter
+427 common io_uring_register sys_io_uring_register
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 7e34b9eba5de..78d9fafac983 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -149,7 +149,6 @@ config ARM64
select HAVE_PERF_USER_STACK_DUMP
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RCU_TABLE_FREE
- select HAVE_RCU_TABLE_INVALIDATE
select HAVE_RSEQ
select HAVE_STACKPROTECTOR
select HAVE_SYSCALL_TRACEPOINTS
diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
index e1d95f08f8e1..c7e1a7837706 100644
--- a/arch/arm64/include/asm/futex.h
+++ b/arch/arm64/include/asm/futex.h
@@ -50,7 +50,7 @@ do { \
static inline int
arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *_uaddr)
{
- int oldval, ret, tmp;
+ int oldval = 0, ret, tmp;
u32 __user *uaddr = __uaccess_mask_ptr(_uaddr);
pagefault_disable();
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index 106fdc951b6e..37603b5616a5 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -27,6 +27,7 @@ static inline void __tlb_remove_table(void *_table)
free_page_and_swap_cache((struct page *)_table);
}
+#define tlb_flush tlb_flush
static void tlb_flush(struct mmu_gather *tlb);
#include <asm-generic/tlb.h>
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index d1dd93436e1e..f2a83ff6b73c 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -44,7 +44,7 @@
#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5)
#define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800)
-#define __NR_compat_syscalls 424
+#define __NR_compat_syscalls 428
#endif
#define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 5590f2623690..23f1a44acada 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -866,6 +866,14 @@ __SYSCALL(__NR_rt_sigtimedwait_time64, compat_sys_rt_sigtimedwait_time64)
__SYSCALL(__NR_futex_time64, sys_futex)
#define __NR_sched_rr_get_interval_time64 423
__SYSCALL(__NR_sched_rr_get_interval_time64, sys_sched_rr_get_interval)
+#define __NR_pidfd_send_signal 424
+__SYSCALL(__NR_pidfd_send_signal, sys_pidfd_send_signal)
+#define __NR_io_uring_setup 425
+__SYSCALL(__NR_io_uring_setup, sys_io_uring_setup)
+#define __NR_io_uring_enter 426
+__SYSCALL(__NR_io_uring_enter, sys_io_uring_enter)
+#define __NR_io_uring_register 427
+__SYSCALL(__NR_io_uring_register, sys_io_uring_register)
/*
* Please add new compat syscalls above this comment and update
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index 07b298120182..65a51331088e 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -103,10 +103,15 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
* to be revisited if support for multiple ftrace entry points
* is added in the future, but for now, the pr_err() below
* deals with a theoretical issue only.
+ *
+ * Note that PLTs are place relative, and plt_entries_equal()
+ * checks whether they point to the same target. Here, we need
+ * to check if the actual opcodes are in fact identical,
+ * regardless of the offset in memory so use memcmp() instead.
*/
trampoline = get_plt_entry(addr, mod->arch.ftrace_trampoline);
- if (!plt_entries_equal(mod->arch.ftrace_trampoline,
- &trampoline)) {
+ if (memcmp(mod->arch.ftrace_trampoline, &trampoline,
+ sizeof(trampoline))) {
if (plt_entry_is_initialized(mod->arch.ftrace_trampoline)) {
pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n");
return -EINVAL;
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 6bc135042f5e..7cae155e81a5 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -363,7 +363,7 @@ void __init arm64_memblock_init(void)
* Otherwise, this is a no-op
*/
u64 base = phys_initrd_start & PAGE_MASK;
- u64 size = PAGE_ALIGN(phys_initrd_size);
+ u64 size = PAGE_ALIGN(phys_initrd_start + phys_initrd_size) - base;
/*
* We can only add back the initrd memory if we don't end up
diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig
index e5cd3c5f8399..3bb75e674161 100644
--- a/arch/c6x/Kconfig
+++ b/arch/c6x/Kconfig
@@ -20,6 +20,7 @@ config C6X
select GENERIC_CLOCKEVENTS
select MODULES_USE_ELF_RELA
select ARCH_NO_COHERENT_DMA_MMAP
+ select MMU_GATHER_NO_RANGE if MMU
config MMU
def_bool n
diff --git a/arch/c6x/include/asm/tlb.h b/arch/c6x/include/asm/tlb.h
index 34525dea1356..240ba0febb57 100644
--- a/arch/c6x/include/asm/tlb.h
+++ b/arch/c6x/include/asm/tlb.h
@@ -2,8 +2,6 @@
#ifndef _ASM_C6X_TLB_H
#define _ASM_C6X_TLB_H
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
-
#include <asm-generic/tlb.h>
#endif /* _ASM_C6X_TLB_H */
diff --git a/arch/h8300/include/asm/tlb.h b/arch/h8300/include/asm/tlb.h
index 98f344279904..d8201ca31206 100644
--- a/arch/h8300/include/asm/tlb.h
+++ b/arch/h8300/include/asm/tlb.h
@@ -2,8 +2,6 @@
#ifndef __H8300_TLB_H__
#define __H8300_TLB_H__
-#define tlb_flush(tlb) do { } while (0)
-
#include <asm-generic/tlb.h>
#endif
diff --git a/arch/hexagon/include/asm/tlb.h b/arch/hexagon/include/asm/tlb.h
index 2f00772cc08a..f71c4ba83614 100644
--- a/arch/hexagon/include/asm/tlb.h
+++ b/arch/hexagon/include/asm/tlb.h
@@ -22,18 +22,6 @@
#include <linux/pagemap.h>
#include <asm/tlbflush.h>
-/*
- * We don't need any special per-pte or per-vma handling...
- */
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
-
-/*
- * .. because we flush the whole mm when it fills up
- */
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
-
#include <asm-generic/tlb.h>
#endif
diff --git a/arch/ia64/include/asm/machvec.h b/arch/ia64/include/asm/machvec.h
index 5133739966bc..beae261fbcb4 100644
--- a/arch/ia64/include/asm/machvec.h
+++ b/arch/ia64/include/asm/machvec.h
@@ -30,7 +30,6 @@ typedef void ia64_mv_irq_init_t (void);
typedef void ia64_mv_send_ipi_t (int, int, int, int);
typedef void ia64_mv_timer_interrupt_t (int, void *);
typedef void ia64_mv_global_tlb_purge_t (struct mm_struct *, unsigned long, unsigned long, unsigned long);
-typedef void ia64_mv_tlb_migrate_finish_t (struct mm_struct *);
typedef u8 ia64_mv_irq_to_vector (int);
typedef unsigned int ia64_mv_local_vector_to_irq (u8);
typedef char *ia64_mv_pci_get_legacy_mem_t (struct pci_bus *);
@@ -80,11 +79,6 @@ machvec_noop (void)
}
static inline void
-machvec_noop_mm (struct mm_struct *mm)
-{
-}
-
-static inline void
machvec_noop_task (struct task_struct *task)
{
}
@@ -96,7 +90,6 @@ machvec_noop_bus (struct pci_bus *bus)
extern void machvec_setup (char **);
extern void machvec_timer_interrupt (int, void *);
-extern void machvec_tlb_migrate_finish (struct mm_struct *);
# if defined (CONFIG_IA64_HP_SIM)
# include <asm/machvec_hpsim.h>
@@ -124,7 +117,6 @@ extern void machvec_tlb_migrate_finish (struct mm_struct *);
# define platform_send_ipi ia64_mv.send_ipi
# define platform_timer_interrupt ia64_mv.timer_interrupt
# define platform_global_tlb_purge ia64_mv.global_tlb_purge
-# define platform_tlb_migrate_finish ia64_mv.tlb_migrate_finish
# define platform_dma_init ia64_mv.dma_init
# define platform_dma_get_ops ia64_mv.dma_get_ops
# define platform_irq_to_vector ia64_mv.irq_to_vector
@@ -167,7 +159,6 @@ struct ia64_machine_vector {
ia64_mv_send_ipi_t *send_ipi;
ia64_mv_timer_interrupt_t *timer_interrupt;
ia64_mv_global_tlb_purge_t *global_tlb_purge;
- ia64_mv_tlb_migrate_finish_t *tlb_migrate_finish;
ia64_mv_dma_init *dma_init;
ia64_mv_dma_get_ops *dma_get_ops;
ia64_mv_irq_to_vector *irq_to_vector;
@@ -206,7 +197,6 @@ struct ia64_machine_vector {
platform_send_ipi, \
platform_timer_interrupt, \
platform_global_tlb_purge, \
- platform_tlb_migrate_finish, \
platform_dma_init, \
platform_dma_get_ops, \
platform_irq_to_vector, \
@@ -270,9 +260,6 @@ extern const struct dma_map_ops *dma_get_ops(struct device *);
#ifndef platform_global_tlb_purge
# define platform_global_tlb_purge ia64_global_tlb_purge /* default to architected version */
#endif
-#ifndef platform_tlb_migrate_finish
-# define platform_tlb_migrate_finish machvec_noop_mm
-#endif
#ifndef platform_kernel_launch_event
# define platform_kernel_launch_event machvec_noop
#endif
diff --git a/arch/ia64/include/asm/machvec_sn2.h b/arch/ia64/include/asm/machvec_sn2.h
index b5153d300289..a243e4fb4877 100644
--- a/arch/ia64/include/asm/machvec_sn2.h
+++ b/arch/ia64/include/asm/machvec_sn2.h
@@ -34,7 +34,6 @@ extern ia64_mv_irq_init_t sn_irq_init;
extern ia64_mv_send_ipi_t sn2_send_IPI;
extern ia64_mv_timer_interrupt_t sn_timer_interrupt;
extern ia64_mv_global_tlb_purge_t sn2_global_tlb_purge;
-extern ia64_mv_tlb_migrate_finish_t sn_tlb_migrate_finish;
extern ia64_mv_irq_to_vector sn_irq_to_vector;
extern ia64_mv_local_vector_to_irq sn_local_vector_to_irq;
extern ia64_mv_pci_get_legacy_mem_t sn_pci_get_legacy_mem;
@@ -77,7 +76,6 @@ extern ia64_mv_pci_fixup_bus_t sn_pci_fixup_bus;
#define platform_send_ipi sn2_send_IPI
#define platform_timer_interrupt sn_timer_interrupt
#define platform_global_tlb_purge sn2_global_tlb_purge
-#define platform_tlb_migrate_finish sn_tlb_migrate_finish
#define platform_pci_fixup sn_pci_fixup
#define platform_inb __sn_inb
#define platform_inw __sn_inw
diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h
index 516355a774bf..86ec034ba499 100644
--- a/arch/ia64/include/asm/tlb.h
+++ b/arch/ia64/include/asm/tlb.h
@@ -47,263 +47,6 @@
#include <asm/tlbflush.h>
#include <asm/machvec.h>
-/*
- * If we can't allocate a page to make a big batch of page pointers
- * to work on, then just handle a few from the on-stack structure.
- */
-#define IA64_GATHER_BUNDLE 8
-
-struct mmu_gather {
- struct mm_struct *mm;
- unsigned int nr;
- unsigned int max;
- unsigned char fullmm; /* non-zero means full mm flush */
- unsigned char need_flush; /* really unmapped some PTEs? */
- unsigned long start, end;
- unsigned long start_addr;
- unsigned long end_addr;
- struct page **pages;
- struct page *local[IA64_GATHER_BUNDLE];
-};
-
-struct ia64_tr_entry {
- u64 ifa;
- u64 itir;
- u64 pte;
- u64 rr;
-}; /*Record for tr entry!*/
-
-extern int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size);
-extern void ia64_ptr_entry(u64 target_mask, int slot);
-
-extern struct ia64_tr_entry *ia64_idtrs[NR_CPUS];
-
-/*
- region register macros
-*/
-#define RR_TO_VE(val) (((val) >> 0) & 0x0000000000000001)
-#define RR_VE(val) (((val) & 0x0000000000000001) << 0)
-#define RR_VE_MASK 0x0000000000000001L
-#define RR_VE_SHIFT 0
-#define RR_TO_PS(val) (((val) >> 2) & 0x000000000000003f)
-#define RR_PS(val) (((val) & 0x000000000000003f) << 2)
-#define RR_PS_MASK 0x00000000000000fcL
-#define RR_PS_SHIFT 2
-#define RR_RID_MASK 0x00000000ffffff00L
-#define RR_TO_RID(val) ((val >> 8) & 0xffffff)
-
-static inline void
-ia64_tlb_flush_mmu_tlbonly(struct mmu_gather *tlb, unsigned long start, unsigned long end)
-{
- tlb->need_flush = 0;
-
- if (tlb->fullmm) {
- /*
- * Tearing down the entire address space. This happens both as a result
- * of exit() and execve(). The latter case necessitates the call to
- * flush_tlb_mm() here.
- */
- flush_tlb_mm(tlb->mm);
- } else if (unlikely (end - start >= 1024*1024*1024*1024UL
- || REGION_NUMBER(start) != REGION_NUMBER(end - 1)))
- {
- /*
- * If we flush more than a tera-byte or across regions, we're probably
- * better off just flushing the entire TLB(s). This should be very rare
- * and is not worth optimizing for.
- */
- flush_tlb_all();
- } else {
- /*
- * flush_tlb_range() takes a vma instead of a mm pointer because
- * some architectures want the vm_flags for ITLB/DTLB flush.
- */
- struct vm_area_struct vma = TLB_FLUSH_VMA(tlb->mm, 0);
-
- /* flush the address range from the tlb: */
- flush_tlb_range(&vma, start, end);
- /* now flush the virt. page-table area mapping the address range: */
- flush_tlb_range(&vma, ia64_thash(start), ia64_thash(end));
- }
-
-}
-
-static inline void
-ia64_tlb_flush_mmu_free(struct mmu_gather *tlb)
-{
- unsigned long i;
- unsigned int nr;
-
- /* lastly, release the freed pages */
- nr = tlb->nr;
-
- tlb->nr = 0;
- tlb->start_addr = ~0UL;
- for (i = 0; i < nr; ++i)
- free_page_and_swap_cache(tlb->pages[i]);
-}
-
-/*
- * Flush the TLB for address range START to END and, if not in fast mode, release the
- * freed pages that where gathered up to this point.
- */
-static inline void
-ia64_tlb_flush_mmu (struct mmu_gather *tlb, unsigned long start, unsigned long end)
-{
- if (!tlb->need_flush)
- return;
- ia64_tlb_flush_mmu_tlbonly(tlb, start, end);
- ia64_tlb_flush_mmu_free(tlb);
-}
-
-static inline void __tlb_alloc_page(struct mmu_gather *tlb)
-{
- unsigned long addr = __get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0);
-
- if (addr) {
- tlb->pages = (void *)addr;
- tlb->max = PAGE_SIZE / sizeof(void *);
- }
-}
-
-
-static inline void
-arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
- unsigned long start, unsigned long end)
-{
- tlb->mm = mm;
- tlb->max = ARRAY_SIZE(tlb->local);
- tlb->pages = tlb->local;
- tlb->nr = 0;
- tlb->fullmm = !(start | (end+1));
- tlb->start = start;
- tlb->end = end;
- tlb->start_addr = ~0UL;
-}
-
-/*
- * Called at the end of the shootdown operation to free up any resources that were
- * collected.
- */
-static inline void
-arch_tlb_finish_mmu(struct mmu_gather *tlb,
- unsigned long start, unsigned long end, bool force)
-{
- if (force)
- tlb->need_flush = 1;
- /*
- * Note: tlb->nr may be 0 at this point, so we can't rely on tlb->start_addr and
- * tlb->end_addr.
- */
- ia64_tlb_flush_mmu(tlb, start, end);
-
- /* keep the page table cache within bounds */
- check_pgt_cache();
-
- if (tlb->pages != tlb->local)
- free_pages((unsigned long)tlb->pages, 0);
-}
-
-/*
- * Logically, this routine frees PAGE. On MP machines, the actual freeing of the page
- * must be delayed until after the TLB has been flushed (see comments at the beginning of
- * this file).
- */
-static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
-{
- tlb->need_flush = 1;
-
- if (!tlb->nr && tlb->pages == tlb->local)
- __tlb_alloc_page(tlb);
-
- tlb->pages[tlb->nr++] = page;
- VM_WARN_ON(tlb->nr > tlb->max);
- if (tlb->nr == tlb->max)
- return true;
- return false;
-}
-
-static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
-{
- ia64_tlb_flush_mmu_tlbonly(tlb, tlb->start_addr, tlb->end_addr);
-}
-
-static inline void tlb_flush_mmu_free(struct mmu_gather *tlb)
-{
- ia64_tlb_flush_mmu_free(tlb);
-}
-
-static inline void tlb_flush_mmu(struct mmu_gather *tlb)
-{
- ia64_tlb_flush_mmu(tlb, tlb->start_addr, tlb->end_addr);
-}
-
-static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
-{
- if (__tlb_remove_page(tlb, page))
- tlb_flush_mmu(tlb);
-}
-
-static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
- struct page *page, int page_size)
-{
- return __tlb_remove_page(tlb, page);
-}
-
-static inline void tlb_remove_page_size(struct mmu_gather *tlb,
- struct page *page, int page_size)
-{
- return tlb_remove_page(tlb, page);
-}
-
-/*
- * Remove TLB entry for PTE mapped at virtual address ADDRESS. This is called for any
- * PTE, not just those pointing to (normal) physical memory.
- */
-static inline void
-__tlb_remove_tlb_entry (struct mmu_gather *tlb, pte_t *ptep, unsigned long address)
-{
- if (tlb->start_addr == ~0UL)
- tlb->start_addr = address;
- tlb->end_addr = address + PAGE_SIZE;
-}
-
-#define tlb_migrate_finish(mm) platform_tlb_migrate_finish(mm)
-
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-
-#define tlb_remove_tlb_entry(tlb, ptep, addr) \
-do { \
- tlb->need_flush = 1; \
- __tlb_remove_tlb_entry(tlb, ptep, addr); \
-} while (0)
-
-#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \
- tlb_remove_tlb_entry(tlb, ptep, address)
-
-#define tlb_remove_check_page_size_change tlb_remove_check_page_size_change
-static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
- unsigned int page_size)
-{
-}
-
-#define pte_free_tlb(tlb, ptep, address) \
-do { \
- tlb->need_flush = 1; \
- __pte_free_tlb(tlb, ptep, address); \
-} while (0)
-
-#define pmd_free_tlb(tlb, ptep, address) \
-do { \
- tlb->need_flush = 1; \
- __pmd_free_tlb(tlb, ptep, address); \
-} while (0)
-
-#define pud_free_tlb(tlb, pudp, address) \
-do { \
- tlb->need_flush = 1; \
- __pud_free_tlb(tlb, pudp, address); \
-} while (0)
+#include <asm-generic/tlb.h>
#endif /* _ASM_IA64_TLB_H */
diff --git a/arch/ia64/include/asm/tlbflush.h b/arch/ia64/include/asm/tlbflush.h
index 25e280810f6c..ceac10c4d6e2 100644
--- a/arch/ia64/include/asm/tlbflush.h
+++ b/arch/ia64/include/asm/tlbflush.h
@@ -14,6 +14,31 @@
#include <asm/mmu_context.h>
#include <asm/page.h>
+struct ia64_tr_entry {
+ u64 ifa;
+ u64 itir;
+ u64 pte;
+ u64 rr;
+}; /*Record for tr entry!*/
+
+extern int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size);
+extern void ia64_ptr_entry(u64 target_mask, int slot);
+extern struct ia64_tr_entry *ia64_idtrs[NR_CPUS];
+
+/*
+ region register macros
+*/
+#define RR_TO_VE(val) (((val) >> 0) & 0x0000000000000001)
+#define RR_VE(val) (((val) & 0x0000000000000001) << 0)
+#define RR_VE_MASK 0x0000000000000001L
+#define RR_VE_SHIFT 0
+#define RR_TO_PS(val) (((val) >> 2) & 0x000000000000003f)
+#define RR_PS(val) (((val) & 0x000000000000003f) << 2)
+#define RR_PS_MASK 0x00000000000000fcL
+#define RR_PS_SHIFT 2
+#define RR_RID_MASK 0x00000000ffffff00L
+#define RR_TO_RID(val) ((val >> 8) & 0xffffff)
+
/*
* Now for some TLB flushing routines. This is the kind of stuff that
* can be very expensive, so try to avoid them whenever possible.
diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl
index ab9cda5f6136..56e3d0b685e1 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -344,3 +344,7 @@
332 common pkey_free sys_pkey_free
333 common rseq sys_rseq
# 334 through 423 are reserved to sync up with other architectures
+424 common pidfd_send_signal sys_pidfd_send_signal
+425 common io_uring_setup sys_io_uring_setup
+426 common io_uring_enter sys_io_uring_enter
+427 common io_uring_register sys_io_uring_register
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c
index 5fc89aabdce1..5158bd28de05 100644
--- a/arch/ia64/mm/tlb.c
+++ b/arch/ia64/mm/tlb.c
@@ -305,8 +305,8 @@ local_flush_tlb_all (void)
ia64_srlz_i(); /* srlz.i implies srlz.d */
}
-void
-flush_tlb_range (struct vm_area_struct *vma, unsigned long start,
+static void
+__flush_tlb_range (struct vm_area_struct *vma, unsigned long start,
unsigned long end)
{
struct mm_struct *mm = vma->vm_mm;
@@ -343,6 +343,25 @@ flush_tlb_range (struct vm_area_struct *vma, unsigned long start,
preempt_enable();
ia64_srlz_i(); /* srlz.i implies srlz.d */
}
+
+void flush_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+{
+ if (unlikely(end - start >= 1024*1024*1024*1024UL
+ || REGION_NUMBER(start) != REGION_NUMBER(end - 1))) {
+ /*
+ * If we flush more than a tera-byte or across regions, we're
+ * probably better off just flushing the entire TLB(s). This
+ * should be very rare and is not worth optimizing for.
+ */
+ flush_tlb_all();
+ } else {
+ /* flush the address range from the tlb */
+ __flush_tlb_range(vma, start, end);
+ /* flush the virt. page-table area mapping the addr range */
+ __flush_tlb_range(vma, ia64_thash(start), ia64_thash(end));
+ }
+}
EXPORT_SYMBOL(flush_tlb_range);
void ia64_tlb_init(void)
diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c
index b73b0ebf8214..b510f4f17fd4 100644
--- a/arch/ia64/sn/kernel/sn2/sn2_smp.c
+++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c
@@ -120,13 +120,6 @@ void sn_migrate(struct task_struct *task)
cpu_relax();
}
-void sn_tlb_migrate_finish(struct mm_struct *mm)
-{
- /* flush_tlb_mm is inefficient if more than 1 users of mm */
- if (mm == current->mm && mm && atomic_read(&mm->mm_users) == 1)
- flush_tlb_mm(mm);
-}
-
static void
sn2_ipi_flush_all_tlb(struct mm_struct *mm)
{
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index b54206408f91..4e37efbc9296 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -28,6 +28,7 @@ config M68K
select OLD_SIGSUSPEND3
select OLD_SIGACTION
select ARCH_DISCARD_MEMBLOCK
+ select MMU_GATHER_NO_RANGE if MMU
config CPU_BIG_ENDIAN
def_bool y
diff --git a/arch/m68k/include/asm/tlb.h b/arch/m68k/include/asm/tlb.h
index b4b9efb6f963..3c81f6adfc8b 100644
--- a/arch/m68k/include/asm/tlb.h
+++ b/arch/m68k/include/asm/tlb.h
@@ -2,20 +2,6 @@
#ifndef _M68K_TLB_H
#define _M68K_TLB_H
-/*
- * m68k doesn't need any special per-pte or
- * per-vma handling..
- */
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
-
-/*
- * .. because we flush the whole mm when it
- * fills up.
- */
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
-
#include <asm-generic/tlb.h>
#endif /* _M68K_TLB_H */
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
index 125c14178979..df4ec3ec71d1 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -423,3 +423,7 @@
421 common rt_sigtimedwait_time64 sys_rt_sigtimedwait
422 common futex_time64 sys_futex
423 common sched_rr_get_interval_time64 sys_sched_rr_get_interval
+424 common pidfd_send_signal sys_pidfd_send_signal
+425 common io_uring_setup sys_io_uring_setup
+426 common io_uring_enter sys_io_uring_enter
+427 common io_uring_register sys_io_uring_register
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index a51b965b3b82..321e398ab6b5 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -41,6 +41,7 @@ config MICROBLAZE
select TRACING_SUPPORT
select VIRT_TO_BUS
select CPU_NO_EFFICIENT_FFS
+ select MMU_GATHER_NO_RANGE if MMU
# Endianness selection
choice
diff --git a/arch/microblaze/include/asm/tlb.h b/arch/microblaze/include/asm/tlb.h
index 99b6ded54849..628a78ee0a72 100644
--- a/arch/microblaze/include/asm/tlb.h
+++ b/arch/microblaze/include/asm/tlb.h
@@ -11,16 +11,7 @@
#ifndef _ASM_MICROBLAZE_TLB_H
#define _ASM_MICROBLAZE_TLB_H
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
-
#include <linux/pagemap.h>
-
-#ifdef CONFIG_MMU
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-#define __tlb_remove_tlb_entry(tlb, pte, address) do { } while (0)
-#endif
-
#include <asm-generic/tlb.h>
#endif /* _ASM_MICROBLAZE_TLB_H */
diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl
index 8ee3a8c18498..4964947732af 100644
--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@ -429,3 +429,7 @@
421 common rt_sigtimedwait_time64 sys_rt_sigtimedwait
422 common futex_time64 sys_futex
423 common sched_rr_get_interval_time64 sys_sched_rr_get_interval
+424 common pidfd_send_signal sys_pidfd_send_signal
+425 common io_uring_setup sys_io_uring_setup
+426 common io_uring_enter sys_io_uring_enter
+427 common io_uring_register sys_io_uring_register
diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c
index 4a70c5de8c92..25a57895a3a3 100644
--- a/arch/mips/ath79/setup.c
+++ b/arch/mips/ath79/setup.c
@@ -210,12 +210,6 @@ const char *get_system_type(void)
return ath79_sys_type;
}
-int get_c0_perfcount_int(void)
-{
- return ATH79_MISC_IRQ(5);
-}
-EXPORT_SYMBOL_GPL(get_c0_perfcount_int);
-
unsigned int get_c0_compare_int(void)
{
return CP0_LEGACY_COMPARE_IRQ;
diff --git a/arch/mips/include/asm/tlb.h b/arch/mips/include/asm/tlb.h
index b6823b9e94da..90f3ad76d9e0 100644
--- a/arch/mips/include/asm/tlb.h
+++ b/arch/mips/include/asm/tlb.h
@@ -5,23 +5,6 @@
#include <asm/cpu-features.h>
#include <asm/mipsregs.h>
-/*
- * MIPS doesn't need any special per-pte or per-vma handling, except
- * we need to flush cache for area to be unmapped.
- */
-#define tlb_start_vma(tlb, vma) \
- do { \
- if (!tlb->fullmm) \
- flush_cache_range(vma, vma->vm_start, vma->vm_end); \
- } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
-
-/*
- * .. because we flush the whole mm when it fills up.
- */
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
-
#define _UNIQUE_ENTRYHI(base, idx) \
(((base) + ((idx) << (PAGE_SHIFT + 1))) | \
(cpu_has_tlbinv ? MIPS_ENTRYHI_EHINV : 0))
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index f158c5894a9a..feb2653490df 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -125,7 +125,7 @@ trace_a_syscall:
subu t1, v0, __NR_O32_Linux
move a1, v0
bnez t1, 1f /* __NR_syscall at offset 0 */
- lw a1, PT_R4(sp) /* Arg1 for __NR_syscall case */
+ ld a1, PT_R4(sp) /* Arg1 for __NR_syscall case */
.set pop
1: jal syscall_trace_enter
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index 15f4117900ee..9392dfe33f97 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -362,3 +362,7 @@
421 n32 rt_sigtimedwait_time64 compat_sys_rt_sigtimedwait_time64
422 n32 futex_time64 sys_futex
423 n32 sched_rr_get_interval_time64 sys_sched_rr_get_interval
+424 n32 pidfd_send_signal sys_pidfd_send_signal
+425 n32 io_uring_setup sys_io_uring_setup
+426 n32 io_uring_enter sys_io_uring_enter
+427 n32 io_uring_register sys_io_uring_register
diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl
index c85502e67b44..cd0c8aa21fba 100644
--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
@@ -338,3 +338,7 @@
327 n64 rseq sys_rseq
328 n64 io_pgetevents sys_io_pgetevents
# 329 through 423 are reserved to sync up with other architectures
+424 n64 pidfd_send_signal sys_pidfd_send_signal
+425 n64 io_uring_setup sys_io_uring_setup
+426 n64 io_uring_enter sys_io_uring_enter
+427 n64 io_uring_register sys_io_uring_register
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 2e063d0f837e..e849e8ffe4a2 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -411,3 +411,7 @@
421 o32 rt_sigtimedwait_time64 sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time64
422 o32 futex_time64 sys_futex sys_futex
423 o32 sched_rr_get_interval_time64 sys_sched_rr_get_interval sys_sched_rr_get_interval
+424 o32 pidfd_send_signal sys_pidfd_send_signal
+425 o32 io_uring_setup sys_io_uring_setup
+426 o32 io_uring_enter sys_io_uring_enter
+427 o32 io_uring_register sys_io_uring_register
diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c
index 0effd3cba9a7..98bf0c222b5f 100644
--- a/arch/mips/net/ebpf_jit.c
+++ b/arch/mips/net/ebpf_jit.c
@@ -186,8 +186,9 @@ enum which_ebpf_reg {
* separate frame pointer, so BPF_REG_10 relative accesses are
* adjusted to be $sp relative.
*/
-int ebpf_to_mips_reg(struct jit_ctx *ctx, const struct bpf_insn *insn,
- enum which_ebpf_reg w)
+static int ebpf_to_mips_reg(struct jit_ctx *ctx,
+ const struct bpf_insn *insn,
+ enum which_ebpf_reg w)
{
int ebpf_reg = (w == src_reg || w == src_reg_no_fp) ?
insn->src_reg : insn->dst_reg;
diff --git a/arch/nds32/include/asm/tlb.h b/arch/nds32/include/asm/tlb.h
index b35ae5eae3ab..d5ae571c8d30 100644
--- a/arch/nds32/include/asm/tlb.h
+++ b/arch/nds32/include/asm/tlb.h
@@ -4,22 +4,6 @@
#ifndef __ASMNDS32_TLB_H
#define __ASMNDS32_TLB_H
-#define tlb_start_vma(tlb,vma) \
- do { \
- if (!tlb->fullmm) \
- flush_cache_range(vma, vma->vm_start, vma->vm_end); \
- } while (0)
-
-#define tlb_end_vma(tlb,vma) \
- do { \
- if(!tlb->fullmm) \
- flush_tlb_range(vma, vma->vm_start, vma->vm_end); \
- } while (0)
-
-#define __tlb_remove_tlb_entry(tlb, pte, addr) do { } while (0)
-
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
-
#include <asm-generic/tlb.h>
#define __pte_free_tlb(tlb, pte, addr) pte_free((tlb)->mm, pte)
diff --git a/arch/nds32/include/asm/tlbflush.h b/arch/nds32/include/asm/tlbflush.h
index 9b411f401903..38ee769b18d8 100644
--- a/arch/nds32/include/asm/tlbflush.h
+++ b/arch/nds32/include/asm/tlbflush.h
@@ -42,6 +42,5 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long addr);
void update_mmu_cache(struct vm_area_struct *vma,
unsigned long address, pte_t * pte);
-void tlb_migrate_finish(struct mm_struct *mm);
#endif
diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig
index 4ef15a61b7bc..3633f8144367 100644
--- a/arch/nios2/Kconfig
+++ b/arch/nios2/Kconfig
@@ -24,6 +24,7 @@ config NIOS2
select USB_ARCH_HAS_HCD if USB_SUPPORT
select CPU_NO_EFFICIENT_FFS
select ARCH_DISCARD_MEMBLOCK
+ select MMU_GATHER_NO_RANGE if MMU
config GENERIC_CSUM
def_bool y
diff --git a/arch/nios2/include/asm/tlb.h b/arch/nios2/include/asm/tlb.h
index d3bc648e08b5..f9f2e27e32dd 100644
--- a/arch/nios2/include/asm/tlb.h
+++ b/arch/nios2/include/asm/tlb.h
@@ -11,22 +11,12 @@
#ifndef _ASM_NIOS2_TLB_H
#define _ASM_NIOS2_TLB_H
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
-
extern void set_mmu_pid(unsigned long pid);
/*
- * NiosII doesn't need any special per-pte or per-vma handling, except
- * we need to flush cache for the area to be unmapped.
+ * NIOS32 does have flush_tlb_range(), but it lacks a limit and fallback to
+ * full mm invalidation. So use flush_tlb_mm() for everything.
*/
-#define tlb_start_vma(tlb, vma) \
- do { \
- if (!tlb->fullmm) \
- flush_cache_range(vma, vma->vm_start, vma->vm_end); \
- } while (0)
-
-#define tlb_end_vma(tlb, vma) do { } while (0)
-#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
#include <linux/pagemap.h>
#include <asm-generic/tlb.h>
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index a5e361fbb75a..c6cf8a49a0ab 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -36,6 +36,7 @@ config OPENRISC
select OMPIC if SMP
select ARCH_WANT_FRAME_POINTERS
select GENERIC_IRQ_MULTI_HANDLER
+ select MMU_GATHER_NO_RANGE if MMU
config CPU_BIG_ENDIAN
def_bool y
diff --git a/arch/openrisc/include/asm/tlb.h b/arch/openrisc/include/asm/tlb.h
index fa4376a4515d..92d8a4209884 100644
--- a/arch/openrisc/include/asm/tlb.h
+++ b/arch/openrisc/include/asm/tlb.h
@@ -20,14 +20,10 @@
#define __ASM_OPENRISC_TLB_H__
/*
- * or32 doesn't need any special per-pte or
- * per-vma handling..
+ * OpenRISC doesn't have an efficient flush_tlb_range() so use flush_tlb_mm()
+ * for everything.
*/
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
#include <linux/pagemap.h>
#include <asm-generic/tlb.h>
diff --git a/arch/parisc/include/asm/tlb.h b/arch/parisc/include/asm/tlb.h
index 0c881e74d8a6..8c0446b04c9e 100644
--- a/arch/parisc/include/asm/tlb.h
+++ b/arch/parisc/include/asm/tlb.h
@@ -2,24 +2,6 @@
#ifndef _PARISC_TLB_H
#define _PARISC_TLB_H
-#define tlb_flush(tlb) \
-do { if ((tlb)->fullmm) \
- flush_tlb_mm((tlb)->mm);\
-} while (0)
-
-#define tlb_start_vma(tlb, vma) \
-do { if (!(tlb)->fullmm) \
- flush_cache_range(vma, vma->vm_start, vma->vm_end); \
-} while (0)
-
-#define tlb_end_vma(tlb, vma) \
-do { if (!(tlb)->fullmm) \
- flush_tlb_range(vma, vma->vm_start, vma->vm_end); \
-} while (0)
-
-#define __tlb_remove_tlb_entry(tlb, pte, address) \
- do { } while (0)
-
#include <asm-generic/tlb.h>
#define __pmd_free_tlb(tlb, pmd, addr) pmd_free((tlb)->mm, pmd)
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index b26766c6647d..fe8ca623add8 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -420,3 +420,7 @@
421 32 rt_sigtimedwait_time64 sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time64
422 32 futex_time64 sys_futex sys_futex
423 32 sched_rr_get_interval_time64 sys_sched_rr_get_interval sys_sched_rr_get_interval
+424 common pidfd_send_signal sys_pidfd_send_signal
+425 common io_uring_setup sys_io_uring_setup
+426 common io_uring_enter sys_io_uring_enter
+427 common io_uring_register sys_io_uring_register
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 2d0be82c3061..8e1e2abf17eb 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -218,6 +218,8 @@ config PPC
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
select HAVE_RCU_TABLE_FREE if SMP
+ select HAVE_RCU_TABLE_NO_INVALIDATE if HAVE_RCU_TABLE_FREE
+ select HAVE_MMU_GATHER_PAGE_SIZE
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RELIABLE_STACKTRACE if PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN
select HAVE_SYSCALL_TRACEPOINTS
diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig
index 5ba131c30f6b..1bcd468ab422 100644
--- a/arch/powerpc/configs/skiroot_defconfig
+++ b/arch/powerpc/configs/skiroot_defconfig
@@ -266,6 +266,7 @@ CONFIG_UDF_FS=m
CONFIG_MSDOS_FS=m
CONFIG_VFAT_FS=m
CONFIG_PROC_KCORE=y
+CONFIG_HUGETLBFS=y
# CONFIG_MISC_FILESYSTEMS is not set
# CONFIG_NETWORK_FILESYSTEMS is not set
CONFIG_NLS=y
diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h
index e24c67d5ba75..34fba1ce27f7 100644
--- a/arch/powerpc/include/asm/tlb.h
+++ b/arch/powerpc/include/asm/tlb.h
@@ -27,8 +27,8 @@
#define tlb_start_vma(tlb, vma) do { } while (0)
#define tlb_end_vma(tlb, vma) do { } while (0)
#define __tlb_remove_tlb_entry __tlb_remove_tlb_entry
-#define tlb_remove_check_page_size_change tlb_remove_check_page_size_change
+#define tlb_flush tlb_flush
extern void tlb_flush(struct mmu_gather *tlb);
/* Get the generic bits... */
@@ -46,22 +46,6 @@ static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep,
#endif
}
-static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
- unsigned int page_size)
-{
- if (!tlb->page_size)
- tlb->page_size = page_size;
- else if (tlb->page_size != page_size) {
- if (!tlb->fullmm)
- tlb_flush_mmu(tlb);
- /*
- * update the page size after flush for the new
- * mmu_gather.
- */
- tlb->page_size = page_size;
- }
-}
-
#ifdef CONFIG_SMP
static inline int mm_is_core_local(struct mm_struct *mm)
{
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index b33bafb8fcea..70568ccbd9fd 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -57,7 +57,7 @@ void setup_barrier_nospec(void)
enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR);
- if (!no_nospec)
+ if (!no_nospec && !cpu_mitigations_off())
enable_barrier_nospec(enable);
}
@@ -116,7 +116,7 @@ static int __init handle_nospectre_v2(char *p)
early_param("nospectre_v2", handle_nospectre_v2);
void setup_spectre_v2(void)
{
- if (no_spectrev2)
+ if (no_spectrev2 || cpu_mitigations_off())
do_btb_flush_fixups();
else
btb_flush_enabled = true;
@@ -300,7 +300,7 @@ void setup_stf_barrier(void)
stf_enabled_flush_types = type;
- if (!no_stf_barrier)
+ if (!no_stf_barrier && !cpu_mitigations_off())
stf_barrier_enable(enable);
}
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index ba404dd9ce1d..4f49e1a3594c 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -932,7 +932,7 @@ void setup_rfi_flush(enum l1d_flush_type types, bool enable)
enabled_flush_types = types;
- if (!no_rfi_flush)
+ if (!no_rfi_flush && !cpu_mitigations_off())
rfi_flush_enable(enable);
}
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index b18abb0c3dae..00f5a63c8d9a 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -505,3 +505,7 @@
421 32 rt_sigtimedwait_time64 sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time64
422 32 futex_time64 sys_futex sys_futex
423 32 sched_rr_get_interval_time64 sys_sched_rr_get_interval sys_sched_rr_get_interval
+424 common pidfd_send_signal sys_pidfd_send_signal
+425 common io_uring_setup sys_io_uring_setup
+426 common io_uring_enter sys_io_uring_enter
+427 common io_uring_register sys_io_uring_register
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index f02b04973710..f100e331e69b 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -543,14 +543,14 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
if (ret != H_SUCCESS)
return ret;
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+
ret = kvmppc_tce_validate(stt, tce);
if (ret != H_SUCCESS)
- return ret;
+ goto unlock_exit;
dir = iommu_tce_direction(tce);
- idx = srcu_read_lock(&vcpu->kvm->srcu);
-
if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) {
ret = H_PARAMETER;
goto unlock_exit;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 06964350b97a..b2b29d4f9842 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3423,7 +3423,9 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2);
vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3);
- mtspr(SPRN_PSSCR, host_psscr);
+ /* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */
+ mtspr(SPRN_PSSCR, host_psscr |
+ (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
mtspr(SPRN_HFSCR, host_hfscr);
mtspr(SPRN_CIABR, host_ciabr);
mtspr(SPRN_DAWR, host_dawr);
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
index e7a9c4f6bfca..8330f135294f 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/mmu_context_iommu.c
@@ -95,28 +95,15 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
unsigned long entries, unsigned long dev_hpa,
struct mm_iommu_table_group_mem_t **pmem)
{
- struct mm_iommu_table_group_mem_t *mem;
- long i, ret, locked_entries = 0;
+ struct mm_iommu_table_group_mem_t *mem, *mem2;
+ long i, ret, locked_entries = 0, pinned = 0;
unsigned int pageshift;
-
- mutex_lock(&mem_list_mutex);
-
- list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list,
- next) {
- /* Overlap? */
- if ((mem->ua < (ua + (entries << PAGE_SHIFT))) &&
- (ua < (mem->ua +
- (mem->entries << PAGE_SHIFT)))) {
- ret = -EINVAL;
- goto unlock_exit;
- }
-
- }
+ unsigned long entry, chunk;
if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) {
ret = mm_iommu_adjust_locked_vm(mm, entries, true);
if (ret)
- goto unlock_exit;
+ return ret;
locked_entries = entries;
}
@@ -148,17 +135,27 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
}
down_read(&mm->mmap_sem);
- ret = get_user_pages_longterm(ua, entries, FOLL_WRITE, mem->hpages, NULL);
+ chunk = (1UL << (PAGE_SHIFT + MAX_ORDER - 1)) /
+ sizeof(struct vm_area_struct *);
+ chunk = min(chunk, entries);
+ for (entry = 0; entry < entries; entry += chunk) {
+ unsigned long n = min(entries - entry, chunk);
+
+ ret = get_user_pages_longterm(ua + (entry << PAGE_SHIFT), n,
+ FOLL_WRITE, mem->hpages + entry, NULL);
+ if (ret == n) {
+ pinned += n;
+ continue;
+ }
+ if (ret > 0)
+ pinned += ret;
+ break;
+ }
up_read(&mm->mmap_sem);
- if (ret != entries) {
- /* free the reference taken */
- for (i = 0; i < ret; i++)
- put_page(mem->hpages[i]);
-
- vfree(mem->hpas);
- kfree(mem);
- ret = -EFAULT;
- goto unlock_exit;
+ if (pinned != entries) {
+ if (!ret)
+ ret = -EFAULT;
+ goto free_exit;
}
pageshift = PAGE_SHIFT;
@@ -183,21 +180,43 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
}
good_exit:
- ret = 0;
atomic64_set(&mem->mapped, 1);
mem->used = 1;
mem->ua = ua;
mem->entries = entries;
- *pmem = mem;
- list_add_rcu(&mem->next, &mm->context.iommu_group_mem_list);
+ mutex_lock(&mem_list_mutex);
-unlock_exit:
- if (locked_entries && ret)
- mm_iommu_adjust_locked_vm(mm, locked_entries, false);
+ list_for_each_entry_rcu(mem2, &mm->context.iommu_group_mem_list, next) {
+ /* Overlap? */
+ if ((mem2->ua < (ua + (entries << PAGE_SHIFT))) &&
+ (ua < (mem2->ua +
+ (mem2->entries << PAGE_SHIFT)))) {
+ ret = -EINVAL;
+ mutex_unlock(&mem_list_mutex);
+ goto free_exit;
+ }
+ }
+
+ list_add_rcu(&mem->next, &mm->context.iommu_group_mem_list);
mutex_unlock(&mem_list_mutex);
+ *pmem = mem;
+
+ return 0;
+
+free_exit:
+ /* free the reference taken */
+ for (i = 0; i < pinned; i++)
+ put_page(mem->hpages[i]);
+
+ vfree(mem->hpas);
+ kfree(mem);
+
+unlock_exit:
+ mm_iommu_adjust_locked_vm(mm, locked_entries, false);
+
return ret;
}
@@ -266,7 +285,7 @@ static void mm_iommu_release(struct mm_iommu_table_group_mem_t *mem)
long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem)
{
long ret = 0;
- unsigned long entries, dev_hpa;
+ unsigned long unlock_entries = 0;
mutex_lock(&mem_list_mutex);
@@ -287,17 +306,17 @@ long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem)
goto unlock_exit;
}
+ if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
+ unlock_entries = mem->entries;
+
/* @mapped became 0 so now mappings are disabled, release the region */
- entries = mem->entries;
- dev_hpa = mem->dev_hpa;
mm_iommu_release(mem);
- if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
- mm_iommu_adjust_locked_vm(mm, entries, false);
-
unlock_exit:
mutex_unlock(&mem_list_mutex);
+ mm_iommu_adjust_locked_vm(mm, unlock_entries, false);
+
return ret;
}
EXPORT_SYMBOL_GPL(mm_iommu_put);
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index f29d2f118b44..5d9c3ff728c9 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -98,10 +98,20 @@ static int find_free_bat(void)
return -1;
}
+/*
+ * This function calculates the size of the larger block usable to map the
+ * beginning of an area based on the start address and size of that area:
+ * - max block size is 8M on 601 and 256 on other 6xx.
+ * - base address must be aligned to the block size. So the maximum block size
+ * is identified by the lowest bit set to 1 in the base address (for instance
+ * if base is 0x16000000, max size is 0x02000000).
+ * - block size has to be a power of two. This is calculated by finding the
+ * highest bit set to 1.
+ */
static unsigned int block_size(unsigned long base, unsigned long top)
{
unsigned int max_size = (cpu_has_feature(CPU_FTR_601) ? 8 : 256) << 20;
- unsigned int base_shift = (fls(base) - 1) & 31;
+ unsigned int base_shift = (ffs(base) - 1) & 31;
unsigned int block_shift = (fls(top - base) - 1) & 31;
return min3(max_size, 1U << base_shift, 1U << block_shift);
@@ -157,7 +167,7 @@ static unsigned long __init __mmu_mapin_ram(unsigned long base, unsigned long to
unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
{
- int done;
+ unsigned long done;
unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET;
if (__map_without_bats) {
@@ -169,10 +179,10 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
return __mmu_mapin_ram(base, top);
done = __mmu_mapin_ram(base, border);
- if (done != border - base)
+ if (done != border)
return done;
- return done + __mmu_mapin_ram(border, top);
+ return __mmu_mapin_ram(border, top);
}
void mmu_mark_initmem_nx(void)
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 842b2c7e156a..50cd09b4e05d 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -324,7 +324,7 @@ config ARCH_ENABLE_SPLIT_PMD_PTLOCK
config PPC_RADIX_MMU
bool "Radix MMU Support"
- depends on PPC_BOOK3S_64
+ depends on PPC_BOOK3S_64 && HUGETLB_PAGE
select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA
default y
help
diff --git a/arch/riscv/configs/rv32_defconfig b/arch/riscv/configs/rv32_defconfig
new file mode 100644
index 000000000000..1a911ed8e772
--- /dev/null
+++ b/arch/riscv/configs/rv32_defconfig
@@ -0,0 +1,84 @@
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_CGROUPS=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_CFS_BANDWIDTH=y
+CONFIG_CGROUP_BPF=y
+CONFIG_NAMESPACES=y
+CONFIG_USER_NS=y
+CONFIG_CHECKPOINT_RESTORE=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_ARCH_RV32I=y
+CONFIG_SMP=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+CONFIG_NETLINK_DIAG=y
+CONFIG_PCI=y
+CONFIG_PCIEPORTBUS=y
+CONFIG_PCI_HOST_GENERIC=y
+CONFIG_PCIE_XILINX=y
+CONFIG_DEVTMPFS=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_VIRTIO_BLK=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_ATA=y
+CONFIG_SATA_AHCI=y
+CONFIG_SATA_AHCI_PLATFORM=y
+CONFIG_NETDEVICES=y
+CONFIG_VIRTIO_NET=y
+CONFIG_MACB=y
+CONFIG_E1000E=y
+CONFIG_R8169=y
+CONFIG_MICROSEMI_PHY=y
+CONFIG_INPUT_MOUSEDEV=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+CONFIG_SERIAL_EARLYCON_RISCV_SBI=y
+CONFIG_HVC_RISCV_SBI=y
+# CONFIG_PTP_1588_CLOCK is not set
+CONFIG_DRM=y
+CONFIG_DRM_RADEON=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_USB=y
+CONFIG_USB_XHCI_HCD=y
+CONFIG_USB_XHCI_PLATFORM=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_EHCI_HCD_PLATFORM=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PLATFORM=y
+CONFIG_USB_STORAGE=y
+CONFIG_USB_UAS=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_SIFIVE_PLIC=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_AUTOFS4_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V4=y
+CONFIG_NFS_V4_1=y
+CONFIG_NFS_V4_2=y
+CONFIG_ROOT_NFS=y
+CONFIG_CRYPTO_USER_API_HASH=y
+CONFIG_CRYPTO_DEV_VIRTIO=y
+CONFIG_PRINTK_TIME=y
+# CONFIG_RCU_TRACE is not set
diff --git a/arch/riscv/include/asm/tlb.h b/arch/riscv/include/asm/tlb.h
index 439dc7072e05..1ad8d093c58b 100644
--- a/arch/riscv/include/asm/tlb.h
+++ b/arch/riscv/include/asm/tlb.h
@@ -18,6 +18,7 @@ struct mmu_gather;
static void tlb_flush(struct mmu_gather *tlb);
+#define tlb_flush tlb_flush
#include <asm-generic/tlb.h>
static inline void tlb_flush(struct mmu_gather *tlb)
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 5fd8c922e1c2..bc7b77e34d09 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -121,6 +121,14 @@ void __init setup_bootmem(void)
*/
memblock_reserve(reg->base, vmlinux_end - reg->base);
mem_size = min(reg->size, (phys_addr_t)-PAGE_OFFSET);
+
+ /*
+ * Remove memblock from the end of usable area to the
+ * end of region
+ */
+ if (reg->base + mem_size < end)
+ memblock_remove(reg->base + mem_size,
+ end - reg->base - mem_size);
}
}
BUG_ON(mem_size == 0);
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index b6e3d0653002..cf06e313e103 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -164,11 +164,13 @@ config S390
select HAVE_PERF_USER_STACK_DUMP
select HAVE_MEMBLOCK_NODE_MAP
select HAVE_MEMBLOCK_PHYS_MAP
+ select HAVE_MMU_GATHER_NO_GATHER
select HAVE_MOD_ARCH_SPECIFIC
select HAVE_NOP_MCOUNT
select HAVE_OPROFILE
select HAVE_PCI
select HAVE_PERF_EVENTS
+ select HAVE_RCU_TABLE_FREE
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RSEQ
select HAVE_SYSCALL_TRACEPOINTS
diff --git a/arch/s390/boot/mem_detect.c b/arch/s390/boot/mem_detect.c
index 4cb771ba13fa..5d316fe40480 100644
--- a/arch/s390/boot/mem_detect.c
+++ b/arch/s390/boot/mem_detect.c
@@ -25,7 +25,7 @@ static void *mem_detect_alloc_extended(void)
{
unsigned long offset = ALIGN(mem_safe_offset(), sizeof(u64));
- if (IS_ENABLED(BLK_DEV_INITRD) && INITRD_START && INITRD_SIZE &&
+ if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && INITRD_START && INITRD_SIZE &&
INITRD_START < offset + ENTRIES_EXTENDED_MAX)
offset = ALIGN(INITRD_START + INITRD_SIZE, sizeof(u64));
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index b31c779cf581..aa406c05a350 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -22,98 +22,39 @@
* Pages used for the page tables is a different story. FIXME: more
*/
-#include <linux/mm.h>
-#include <linux/pagemap.h>
-#include <linux/swap.h>
-#include <asm/processor.h>
-#include <asm/pgalloc.h>
-#include <asm/tlbflush.h>
-
-struct mmu_gather {
- struct mm_struct *mm;
- struct mmu_table_batch *batch;
- unsigned int fullmm;
- unsigned long start, end;
-};
-
-struct mmu_table_batch {
- struct rcu_head rcu;
- unsigned int nr;
- void *tables[0];
-};
-
-#define MAX_TABLE_BATCH \
- ((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *))
-
-extern void tlb_table_flush(struct mmu_gather *tlb);
-extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
-
-static inline void
-arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
- unsigned long start, unsigned long end)
-{
- tlb->mm = mm;
- tlb->start = start;
- tlb->end = end;
- tlb->fullmm = !(start | (end+1));
- tlb->batch = NULL;
-}
-
-static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
-{
- __tlb_flush_mm_lazy(tlb->mm);
-}
-
-static inline void tlb_flush_mmu_free(struct mmu_gather *tlb)
-{
- tlb_table_flush(tlb);
-}
-
+void __tlb_remove_table(void *_table);
+static inline void tlb_flush(struct mmu_gather *tlb);
+static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
+ struct page *page, int page_size);
-static inline void tlb_flush_mmu(struct mmu_gather *tlb)
-{
- tlb_flush_mmu_tlbonly(tlb);
- tlb_flush_mmu_free(tlb);
-}
+#define tlb_start_vma(tlb, vma) do { } while (0)
+#define tlb_end_vma(tlb, vma) do { } while (0)
-static inline void
-arch_tlb_finish_mmu(struct mmu_gather *tlb,
- unsigned long start, unsigned long end, bool force)
-{
- if (force) {
- tlb->start = start;
- tlb->end = end;
- }
+#define tlb_flush tlb_flush
+#define pte_free_tlb pte_free_tlb
+#define pmd_free_tlb pmd_free_tlb
+#define p4d_free_tlb p4d_free_tlb
+#define pud_free_tlb pud_free_tlb
- tlb_flush_mmu(tlb);
-}
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+#include <asm-generic/tlb.h>
/*
* Release the page cache reference for a pte removed by
* tlb_ptep_clear_flush. In both flush modes the tlb for a page cache page
* has already been freed, so just do free_page_and_swap_cache.
*/
-static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
-{
- free_page_and_swap_cache(page);
- return false; /* avoid calling tlb_flush_mmu */
-}
-
-static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
-{
- free_page_and_swap_cache(page);
-}
-
static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
struct page *page, int page_size)
{
- return __tlb_remove_page(tlb, page);
+ free_page_and_swap_cache(page);
+ return false;
}
-static inline void tlb_remove_page_size(struct mmu_gather *tlb,
- struct page *page, int page_size)
+static inline void tlb_flush(struct mmu_gather *tlb)
{
- return tlb_remove_page(tlb, page);
+ __tlb_flush_mm_lazy(tlb->mm);
}
/*
@@ -121,8 +62,17 @@ static inline void tlb_remove_page_size(struct mmu_gather *tlb,
* page table from the tlb.
*/
static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
- unsigned long address)
+ unsigned long address)
{
+ __tlb_adjust_range(tlb, address, PAGE_SIZE);
+ tlb->mm->context.flush_mm = 1;
+ tlb->freed_tables = 1;
+ tlb->cleared_ptes = 1;
+ /*
+ * page_table_free_rcu takes care of the allocation bit masks
+ * of the 2K table fragments in the 4K page table page,
+ * then calls tlb_remove_table.
+ */
page_table_free_rcu(tlb, (unsigned long *) pte, address);
}
@@ -139,6 +89,10 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
if (mm_pmd_folded(tlb->mm))
return;
pgtable_pmd_page_dtor(virt_to_page(pmd));
+ __tlb_adjust_range(tlb, address, PAGE_SIZE);
+ tlb->mm->context.flush_mm = 1;
+ tlb->freed_tables = 1;
+ tlb->cleared_puds = 1;
tlb_remove_table(tlb, pmd);
}
@@ -154,6 +108,10 @@ static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
{
if (mm_p4d_folded(tlb->mm))
return;
+ __tlb_adjust_range(tlb, address, PAGE_SIZE);
+ tlb->mm->context.flush_mm = 1;
+ tlb->freed_tables = 1;
+ tlb->cleared_p4ds = 1;
tlb_remove_table(tlb, p4d);
}
@@ -169,21 +127,11 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
{
if (mm_pud_folded(tlb->mm))
return;
+ tlb->mm->context.flush_mm = 1;
+ tlb->freed_tables = 1;
+ tlb->cleared_puds = 1;
tlb_remove_table(tlb, pud);
}
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-#define tlb_remove_tlb_entry(tlb, ptep, addr) do { } while (0)
-#define tlb_remove_pmd_tlb_entry(tlb, pmdp, addr) do { } while (0)
-#define tlb_migrate_finish(mm) do { } while (0)
-#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \
- tlb_remove_tlb_entry(tlb, ptep, address)
-
-#define tlb_remove_check_page_size_change tlb_remove_check_page_size_change
-static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
- unsigned int page_size)
-{
-}
#endif /* _S390_TLB_H */
diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c
index 594464f2129d..0da378e2eb25 100644
--- a/arch/s390/kernel/fpu.c
+++ b/arch/s390/kernel/fpu.c
@@ -23,7 +23,7 @@ void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags)
if (flags & KERNEL_FPC)
/* Save floating point control */
- asm volatile("stfpc %0" : "=m" (state->fpc));
+ asm volatile("stfpc %0" : "=Q" (state->fpc));
if (!MACHINE_HAS_VX) {
if (flags & KERNEL_VXR_V0V7) {
diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c
index bdddaae96559..649135cbedd5 100644
--- a/arch/s390/kernel/nospec-branch.c
+++ b/arch/s390/kernel/nospec-branch.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/module.h>
#include <linux/device.h>
+#include <linux/cpu.h>
#include <asm/nospec-branch.h>
static int __init nobp_setup_early(char *str)
@@ -58,7 +59,7 @@ early_param("nospectre_v2", nospectre_v2_setup_early);
void __init nospec_auto_detect(void)
{
- if (test_facility(156)) {
+ if (test_facility(156) || cpu_mitigations_off()) {
/*
* The machine supports etokens.
* Disable expolines and disable nobp.
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index 02579f95f391..061418f787c3 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -426,3 +426,7 @@
421 32 rt_sigtimedwait_time64 - compat_sys_rt_sigtimedwait_time64
422 32 futex_time64 - sys_futex
423 32 sched_rr_get_interval_time64 - sys_sched_rr_get_interval
+424 common pidfd_send_signal sys_pidfd_send_signal sys_pidfd_send_signal
+425 common io_uring_setup sys_io_uring_setup sys_io_uring_setup
+426 common io_uring_enter sys_io_uring_enter sys_io_uring_enter
+427 common io_uring_register sys_io_uring_register sys_io_uring_register
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index a69a0911ed0e..c475ca49cfc6 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -37,7 +37,7 @@ static inline u64 get_vtimer(void)
{
u64 timer;
- asm volatile("stpt %0" : "=m" (timer));
+ asm volatile("stpt %0" : "=Q" (timer));
return timer;
}
@@ -48,7 +48,7 @@ static inline void set_vtimer(u64 expires)
asm volatile(
" stpt %0\n" /* Store current cpu timer value */
" spt %1" /* Set new value imm. afterwards */
- : "=m" (timer) : "m" (expires));
+ : "=Q" (timer) : "Q" (expires));
S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer;
S390_lowcore.last_update_timer = expires;
}
@@ -135,8 +135,8 @@ static int do_account_vtime(struct task_struct *tsk)
#else
" stck %1" /* Store current tod clock value */
#endif
- : "=m" (S390_lowcore.last_update_timer),
- "=m" (S390_lowcore.last_update_clock));
+ : "=Q" (S390_lowcore.last_update_timer),
+ "=Q" (S390_lowcore.last_update_clock));
clock = S390_lowcore.last_update_clock - clock;
timer -= S390_lowcore.last_update_timer;
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index db6bb2f97a2c..99e06213a22b 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -290,7 +290,7 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
tlb_remove_table(tlb, table);
}
-static void __tlb_remove_table(void *_table)
+void __tlb_remove_table(void *_table)
{
unsigned int mask = (unsigned long) _table & 3;
void *table = (void *)((unsigned long) _table ^ mask);
@@ -316,67 +316,6 @@ static void __tlb_remove_table(void *_table)
}
}
-static void tlb_remove_table_smp_sync(void *arg)
-{
- /* Simply deliver the interrupt */
-}
-
-static void tlb_remove_table_one(void *table)
-{
- /*
- * This isn't an RCU grace period and hence the page-tables cannot be
- * assumed to be actually RCU-freed.
- *
- * It is however sufficient for software page-table walkers that rely
- * on IRQ disabling. See the comment near struct mmu_table_batch.
- */
- smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
- __tlb_remove_table(table);
-}
-
-static void tlb_remove_table_rcu(struct rcu_head *head)
-{
- struct mmu_table_batch *batch;
- int i;
-
- batch = container_of(head, struct mmu_table_batch, rcu);
-
- for (i = 0; i < batch->nr; i++)
- __tlb_remove_table(batch->tables[i]);
-
- free_page((unsigned long)batch);
-}
-
-void tlb_table_flush(struct mmu_gather *tlb)
-{
- struct mmu_table_batch **batch = &tlb->batch;
-
- if (*batch) {
- call_rcu(&(*batch)->rcu, tlb_remove_table_rcu);
- *batch = NULL;
- }
-}
-
-void tlb_remove_table(struct mmu_gather *tlb, void *table)
-{
- struct mmu_table_batch **batch = &tlb->batch;
-
- tlb->mm->context.flush_mm = 1;
- if (*batch == NULL) {
- *batch = (struct mmu_table_batch *)
- __get_free_page(GFP_NOWAIT | __GFP_NOWARN);
- if (*batch == NULL) {
- __tlb_flush_mm_lazy(tlb->mm);
- tlb_remove_table_one(table);
- return;
- }
- (*batch)->nr = 0;
- }
- (*batch)->tables[(*batch)->nr++] = table;
- if ((*batch)->nr == MAX_TABLE_BATCH)
- tlb_flush_mmu(tlb);
-}
-
/*
* Base infrastructure required to generate basic asces, region, segment,
* and page tables that do not make use of enhanced features like EDAT1.
diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h
index 8ad73cb31121..b56f908b1395 100644
--- a/arch/sh/include/asm/pgalloc.h
+++ b/arch/sh/include/asm/pgalloc.h
@@ -70,6 +70,15 @@ do { \
tlb_remove_page((tlb), (pte)); \
} while (0)
+#if CONFIG_PGTABLE_LEVELS > 2
+#define __pmd_free_tlb(tlb, pmdp, addr) \
+do { \
+ struct page *page = virt_to_page(pmdp); \
+ pgtable_pmd_page_dtor(page); \
+ tlb_remove_page((tlb), page); \
+} while (0);
+#endif
+
static inline void check_pgt_cache(void)
{
quicklist_trim(QUICK_PT, NULL, 25, 16);
diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h
index 77abe192fb43..bc77f3dd4261 100644
--- a/arch/sh/include/asm/tlb.h
+++ b/arch/sh/include/asm/tlb.h
@@ -11,133 +11,8 @@
#ifdef CONFIG_MMU
#include <linux/swap.h>
-#include <asm/pgalloc.h>
-#include <asm/tlbflush.h>
-#include <asm/mmu_context.h>
-/*
- * TLB handling. This allows us to remove pages from the page
- * tables, and efficiently handle the TLB issues.
- */
-struct mmu_gather {
- struct mm_struct *mm;
- unsigned int fullmm;
- unsigned long start, end;
-};
-
-static inline void init_tlb_gather(struct mmu_gather *tlb)
-{
- tlb->start = TASK_SIZE;
- tlb->end = 0;
-
- if (tlb->fullmm) {
- tlb->start = 0;
- tlb->end = TASK_SIZE;
- }
-}
-
-static inline void
-arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
- unsigned long start, unsigned long end)
-{
- tlb->mm = mm;
- tlb->start = start;
- tlb->end = end;
- tlb->fullmm = !(start | (end+1));
-
- init_tlb_gather(tlb);
-}
-
-static inline void
-arch_tlb_finish_mmu(struct mmu_gather *tlb,
- unsigned long start, unsigned long end, bool force)
-{
- if (tlb->fullmm || force)
- flush_tlb_mm(tlb->mm);
-
- /* keep the page table cache within bounds */
- check_pgt_cache();
-}
-
-static inline void
-tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, unsigned long address)
-{
- if (tlb->start > address)
- tlb->start = address;
- if (tlb->end < address + PAGE_SIZE)
- tlb->end = address + PAGE_SIZE;
-}
-
-#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \
- tlb_remove_tlb_entry(tlb, ptep, address)
-
-/*
- * In the case of tlb vma handling, we can optimise these away in the
- * case where we're doing a full MM flush. When we're doing a munmap,
- * the vmas are adjusted to only cover the region to be torn down.
- */
-static inline void
-tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
-{
- if (!tlb->fullmm)
- flush_cache_range(vma, vma->vm_start, vma->vm_end);
-}
-
-static inline void
-tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
-{
- if (!tlb->fullmm && tlb->end) {
- flush_tlb_range(vma, tlb->start, tlb->end);
- init_tlb_gather(tlb);
- }
-}
-
-static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
-{
-}
-
-static inline void tlb_flush_mmu_free(struct mmu_gather *tlb)
-{
-}
-
-static inline void tlb_flush_mmu(struct mmu_gather *tlb)
-{
-}
-
-static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
-{
- free_page_and_swap_cache(page);
- return false; /* avoid calling tlb_flush_mmu */
-}
-
-static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
-{
- __tlb_remove_page(tlb, page);
-}
-
-static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
- struct page *page, int page_size)
-{
- return __tlb_remove_page(tlb, page);
-}
-
-static inline void tlb_remove_page_size(struct mmu_gather *tlb,
- struct page *page, int page_size)
-{
- return tlb_remove_page(tlb, page);
-}
-
-#define tlb_remove_check_page_size_change tlb_remove_check_page_size_change
-static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
- unsigned int page_size)
-{
-}
-
-#define pte_free_tlb(tlb, ptep, addr) pte_free((tlb)->mm, ptep)
-#define pmd_free_tlb(tlb, pmdp, addr) pmd_free((tlb)->mm, pmdp)
-#define pud_free_tlb(tlb, pudp, addr) pud_free((tlb)->mm, pudp)
-
-#define tlb_migrate_finish(mm) do { } while (0)
+#include <asm-generic/tlb.h>
#if defined(CONFIG_CPU_SH4) || defined(CONFIG_SUPERH64)
extern void tlb_wire_entry(struct vm_area_struct *, unsigned long, pte_t);
@@ -157,11 +32,6 @@ static inline void tlb_unwire_entry(void)
#else /* CONFIG_MMU */
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-#define __tlb_remove_tlb_entry(tlb, pte, address) do { } while (0)
-#define tlb_flush(tlb) do { } while (0)
-
#include <asm-generic/tlb.h>
#endif /* CONFIG_MMU */
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
index bfda678576e4..480b057556ee 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -426,3 +426,7 @@
421 common rt_sigtimedwait_time64 sys_rt_sigtimedwait
422 common futex_time64 sys_futex
423 common sched_rr_get_interval_time64 sys_sched_rr_get_interval
+424 common pidfd_send_signal sys_pidfd_send_signal
+425 common io_uring_setup sys_io_uring_setup
+426 common io_uring_enter sys_io_uring_enter
+427 common io_uring_register sys_io_uring_register
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 40f8f4f73fe8..db79290ed6d5 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -63,6 +63,7 @@ config SPARC64
select HAVE_KRETPROBES
select HAVE_KPROBES
select HAVE_RCU_TABLE_FREE if SMP
+ select HAVE_RCU_TABLE_NO_INVALIDATE if HAVE_RCU_TABLE_FREE
select HAVE_MEMBLOCK_NODE_MAP
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
select HAVE_DYNAMIC_FTRACE
diff --git a/arch/sparc/include/asm/tlb_32.h b/arch/sparc/include/asm/tlb_32.h
index 343cea19e573..5cd28a8793e3 100644
--- a/arch/sparc/include/asm/tlb_32.h
+++ b/arch/sparc/include/asm/tlb_32.h
@@ -2,24 +2,6 @@
#ifndef _SPARC_TLB_H
#define _SPARC_TLB_H
-#define tlb_start_vma(tlb, vma) \
-do { \
- flush_cache_range(vma, vma->vm_start, vma->vm_end); \
-} while (0)
-
-#define tlb_end_vma(tlb, vma) \
-do { \
- flush_tlb_range(vma, vma->vm_start, vma->vm_end); \
-} while (0)
-
-#define __tlb_remove_tlb_entry(tlb, pte, address) \
- do { } while (0)
-
-#define tlb_flush(tlb) \
-do { \
- flush_tlb_mm((tlb)->mm); \
-} while (0)
-
#include <asm-generic/tlb.h>
#endif /* _SPARC_TLB_H */
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index b9a5a04b2d2c..a1dd24307b00 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -469,3 +469,7 @@
421 32 rt_sigtimedwait_time64 sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time64
422 32 futex_time64 sys_futex sys_futex
423 32 sched_rr_get_interval_time64 sys_sched_rr_get_interval sys_sched_rr_get_interval
+424 common pidfd_send_signal sys_pidfd_send_signal
+425 common io_uring_setup sys_io_uring_setup
+426 common io_uring_enter sys_io_uring_enter
+427 common io_uring_register sys_io_uring_register
diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h
index dce6db147f24..70ee60383900 100644
--- a/arch/um/include/asm/tlb.h
+++ b/arch/um/include/asm/tlb.h
@@ -2,162 +2,8 @@
#ifndef __UM_TLB_H
#define __UM_TLB_H
-#include <linux/pagemap.h>
-#include <linux/swap.h>
-#include <asm/percpu.h>
-#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
-
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
-
-/* struct mmu_gather is an opaque type used by the mm code for passing around
- * any data needed by arch specific code for tlb_remove_page.
- */
-struct mmu_gather {
- struct mm_struct *mm;
- unsigned int need_flush; /* Really unmapped some ptes? */
- unsigned long start;
- unsigned long end;
- unsigned int fullmm; /* non-zero means full mm flush */
-};
-
-static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep,
- unsigned long address)
-{
- if (tlb->start > address)
- tlb->start = address;
- if (tlb->end < address + PAGE_SIZE)
- tlb->end = address + PAGE_SIZE;
-}
-
-static inline void init_tlb_gather(struct mmu_gather *tlb)
-{
- tlb->need_flush = 0;
-
- tlb->start = TASK_SIZE;
- tlb->end = 0;
-
- if (tlb->fullmm) {
- tlb->start = 0;
- tlb->end = TASK_SIZE;
- }
-}
-
-static inline void
-arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
- unsigned long start, unsigned long end)
-{
- tlb->mm = mm;
- tlb->start = start;
- tlb->end = end;
- tlb->fullmm = !(start | (end+1));
-
- init_tlb_gather(tlb);
-}
-
-extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
- unsigned long end);
-
-static inline void
-tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
-{
- flush_tlb_mm_range(tlb->mm, tlb->start, tlb->end);
-}
-
-static inline void
-tlb_flush_mmu_free(struct mmu_gather *tlb)
-{
- init_tlb_gather(tlb);
-}
-
-static inline void
-tlb_flush_mmu(struct mmu_gather *tlb)
-{
- if (!tlb->need_flush)
- return;
-
- tlb_flush_mmu_tlbonly(tlb);
- tlb_flush_mmu_free(tlb);
-}
-
-/* arch_tlb_finish_mmu
- * Called at the end of the shootdown operation to free up any resources
- * that were required.
- */
-static inline void
-arch_tlb_finish_mmu(struct mmu_gather *tlb,
- unsigned long start, unsigned long end, bool force)
-{
- if (force) {
- tlb->start = start;
- tlb->end = end;
- tlb->need_flush = 1;
- }
- tlb_flush_mmu(tlb);
-
- /* keep the page table cache within bounds */
- check_pgt_cache();
-}
-
-/* tlb_remove_page
- * Must perform the equivalent to __free_pte(pte_get_and_clear(ptep)),
- * while handling the additional races in SMP caused by other CPUs
- * caching valid mappings in their TLBs.
- */
-static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
-{
- tlb->need_flush = 1;
- free_page_and_swap_cache(page);
- return false; /* avoid calling tlb_flush_mmu */
-}
-
-static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
-{
- __tlb_remove_page(tlb, page);
-}
-
-static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
- struct page *page, int page_size)
-{
- return __tlb_remove_page(tlb, page);
-}
-
-static inline void tlb_remove_page_size(struct mmu_gather *tlb,
- struct page *page, int page_size)
-{
- return tlb_remove_page(tlb, page);
-}
-
-/**
- * tlb_remove_tlb_entry - remember a pte unmapping for later tlb invalidation.
- *
- * Record the fact that pte's were really umapped in ->need_flush, so we can
- * later optimise away the tlb invalidate. This helps when userspace is
- * unmapping already-unmapped pages, which happens quite a lot.
- */
-#define tlb_remove_tlb_entry(tlb, ptep, address) \
- do { \
- tlb->need_flush = 1; \
- __tlb_remove_tlb_entry(tlb, ptep, address); \
- } while (0)
-
-#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \
- tlb_remove_tlb_entry(tlb, ptep, address)
-
-#define tlb_remove_check_page_size_change tlb_remove_check_page_size_change
-static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
- unsigned int page_size)
-{
-}
-
-#define pte_free_tlb(tlb, ptep, addr) __pte_free_tlb(tlb, ptep, addr)
-
-#define pud_free_tlb(tlb, pudp, addr) __pud_free_tlb(tlb, pudp, addr)
-
-#define pmd_free_tlb(tlb, pmdp, addr) __pmd_free_tlb(tlb, pmdp, addr)
-
-#define tlb_migrate_finish(mm) do {} while (0)
+#include <asm-generic/cacheflush.h>
+#include <asm-generic/tlb.h>
#endif
diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig
index 817d82608712..d83c8f70900d 100644
--- a/arch/unicore32/Kconfig
+++ b/arch/unicore32/Kconfig
@@ -20,6 +20,7 @@ config UNICORE32
select GENERIC_IOMAP
select MODULES_USE_ELF_REL
select NEED_DMA_MAP_STATE
+ select MMU_GATHER_NO_RANGE if MMU
help
UniCore-32 is 32-bit Instruction Set Architecture,
including a series of low-power-consumption RISC chip
diff --git a/arch/unicore32/include/asm/tlb.h b/arch/unicore32/include/asm/tlb.h
index 9cca15cdae94..00a8477333f6 100644
--- a/arch/unicore32/include/asm/tlb.h
+++ b/arch/unicore32/include/asm/tlb.h
@@ -12,10 +12,9 @@
#ifndef __UNICORE_TLB_H__
#define __UNICORE_TLB_H__
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
+/*
+ * unicore32 lacks an efficient flush_tlb_range(), use flush_tlb_mm().
+ */
#define __pte_free_tlb(tlb, pte, addr) \
do { \
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b5978e35a8a8..f1162df4a805 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -184,7 +184,6 @@ config X86
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
select HAVE_RCU_TABLE_FREE if PARAVIRT
- select HAVE_RCU_TABLE_INVALIDATE if HAVE_RCU_TABLE_FREE
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RELIABLE_STACKTRACE if X86_64 && (UNWINDER_FRAME_POINTER || UNWINDER_ORC) && STACK_VALIDATION
select HAVE_FUNCTION_ARG_ACCESS_API
@@ -1500,7 +1499,7 @@ config X86_CPA_STATISTICS
depends on DEBUG_FS
---help---
Expose statistics about the Change Page Attribute mechanims, which
- helps to determine the effectivness of preserving large and huge
+ helps to determine the effectiveness of preserving large and huge
page mappings when mapping protections are changed.
config ARCH_HAS_MEM_ENCRYPT
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index c0d6c560df69..5a237e8dbf8d 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -352,7 +352,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
boot_params->hdr.loadflags &= ~KASLR_FLAG;
/* Save RSDP address for later use. */
- boot_params->acpi_rsdp_addr = get_rsdp_addr();
+ /* boot_params->acpi_rsdp_addr = get_rsdp_addr(); */
sanitize_boot_params(boot_params);
diff --git a/arch/x86/crypto/poly1305-avx2-x86_64.S b/arch/x86/crypto/poly1305-avx2-x86_64.S
index 3b6e70d085da..8457cdd47f75 100644
--- a/arch/x86/crypto/poly1305-avx2-x86_64.S
+++ b/arch/x86/crypto/poly1305-avx2-x86_64.S
@@ -323,6 +323,12 @@ ENTRY(poly1305_4block_avx2)
vpaddq t2,t1,t1
vmovq t1x,d4
+ # Now do a partial reduction mod (2^130)-5, carrying h0 -> h1 -> h2 ->
+ # h3 -> h4 -> h0 -> h1 to get h0,h2,h3,h4 < 2^26 and h1 < 2^26 + a small
+ # amount. Careful: we must not assume the carry bits 'd0 >> 26',
+ # 'd1 >> 26', 'd2 >> 26', 'd3 >> 26', and '(d4 >> 26) * 5' fit in 32-bit
+ # integers. It's true in a single-block implementation, but not here.
+
# d1 += d0 >> 26
mov d0,%rax
shr $26,%rax
@@ -361,16 +367,16 @@ ENTRY(poly1305_4block_avx2)
# h0 += (d4 >> 26) * 5
mov d4,%rax
shr $26,%rax
- lea (%eax,%eax,4),%eax
- add %eax,%ebx
+ lea (%rax,%rax,4),%rax
+ add %rax,%rbx
# h4 = d4 & 0x3ffffff
mov d4,%rax
and $0x3ffffff,%eax
mov %eax,h4
# h1 += h0 >> 26
- mov %ebx,%eax
- shr $26,%eax
+ mov %rbx,%rax
+ shr $26,%rax
add %eax,h1
# h0 = h0 & 0x3ffffff
andl $0x3ffffff,%ebx
diff --git a/arch/x86/crypto/poly1305-sse2-x86_64.S b/arch/x86/crypto/poly1305-sse2-x86_64.S
index e6add74d78a5..6f0be7a86964 100644
--- a/arch/x86/crypto/poly1305-sse2-x86_64.S
+++ b/arch/x86/crypto/poly1305-sse2-x86_64.S
@@ -253,16 +253,16 @@ ENTRY(poly1305_block_sse2)
# h0 += (d4 >> 26) * 5
mov d4,%rax
shr $26,%rax
- lea (%eax,%eax,4),%eax
- add %eax,%ebx
+ lea (%rax,%rax,4),%rax
+ add %rax,%rbx
# h4 = d4 & 0x3ffffff
mov d4,%rax
and $0x3ffffff,%eax
mov %eax,h4
# h1 += h0 >> 26
- mov %ebx,%eax
- shr $26,%eax
+ mov %rbx,%rax
+ shr $26,%rax
add %eax,h1
# h0 = h0 & 0x3ffffff
andl $0x3ffffff,%ebx
@@ -524,6 +524,12 @@ ENTRY(poly1305_2block_sse2)
paddq t2,t1
movq t1,d4
+ # Now do a partial reduction mod (2^130)-5, carrying h0 -> h1 -> h2 ->
+ # h3 -> h4 -> h0 -> h1 to get h0,h2,h3,h4 < 2^26 and h1 < 2^26 + a small
+ # amount. Careful: we must not assume the carry bits 'd0 >> 26',
+ # 'd1 >> 26', 'd2 >> 26', 'd3 >> 26', and '(d4 >> 26) * 5' fit in 32-bit
+ # integers. It's true in a single-block implementation, but not here.
+
# d1 += d0 >> 26
mov d0,%rax
shr $26,%rax
@@ -562,16 +568,16 @@ ENTRY(poly1305_2block_sse2)
# h0 += (d4 >> 26) * 5
mov d4,%rax
shr $26,%rax
- lea (%eax,%eax,4),%eax
- add %eax,%ebx
+ lea (%rax,%rax,4),%rax
+ add %rax,%rbx
# h4 = d4 & 0x3ffffff
mov d4,%rax
and $0x3ffffff,%eax
mov %eax,h4
# h1 += h0 >> 26
- mov %ebx,%eax
- shr $26,%eax
+ mov %rbx,%rax
+ shr $26,%rax
add %eax,h1
# h0 = h0 & 0x3ffffff
andl $0x3ffffff,%ebx
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index d309f30cf7af..5fc76b755510 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -650,6 +650,7 @@ ENTRY(__switch_to_asm)
pushl %ebx
pushl %edi
pushl %esi
+ pushfl
/* switch stack */
movl %esp, TASK_threadsp(%eax)
@@ -672,6 +673,7 @@ ENTRY(__switch_to_asm)
#endif
/* restore callee-saved registers */
+ popfl
popl %esi
popl %edi
popl %ebx
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index 007b3fe9d727..98c7d12b945c 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -29,12 +29,12 @@ extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz);
extern time_t __vdso_time(time_t *t);
#ifdef CONFIG_PARAVIRT_CLOCK
-extern u8 pvclock_page
+extern u8 pvclock_page[PAGE_SIZE]
__attribute__((visibility("hidden")));
#endif
#ifdef CONFIG_HYPERV_TSCPAGE
-extern u8 hvclock_page
+extern u8 hvclock_page[PAGE_SIZE]
__attribute__((visibility("hidden")));
#endif
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 0ecfac84ba91..f15441b07dad 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -116,23 +116,144 @@ static __initconst const u64 amd_hw_cache_event_ids
},
};
+static __initconst const u64 amd_hw_cache_event_ids_f17h
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+[C(L1D)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0x0040, /* Data Cache Accesses */
+ [C(RESULT_MISS)] = 0xc860, /* L2$ access from DC Miss */
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = 0,
+ [C(RESULT_MISS)] = 0,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = 0xff5a, /* h/w prefetch DC Fills */
+ [C(RESULT_MISS)] = 0,
+ },
+},
+[C(L1I)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0x0080, /* Instruction cache fetches */
+ [C(RESULT_MISS)] = 0x0081, /* Instruction cache misses */
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = 0,
+ [C(RESULT_MISS)] = 0,
+ },
+},
+[C(LL)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0,
+ [C(RESULT_MISS)] = 0,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = 0,
+ [C(RESULT_MISS)] = 0,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = 0,
+ [C(RESULT_MISS)] = 0,
+ },
+},
+[C(DTLB)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0xff45, /* All L2 DTLB accesses */
+ [C(RESULT_MISS)] = 0xf045, /* L2 DTLB misses (PT walks) */
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = 0,
+ [C(RESULT_MISS)] = 0,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = 0,
+ [C(RESULT_MISS)] = 0,
+ },
+},
+[C(ITLB)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0x0084, /* L1 ITLB misses, L2 ITLB hits */
+ [C(RESULT_MISS)] = 0xff85, /* L1 ITLB misses, L2 misses */
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+},
+[C(BPU)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0x00c2, /* Retired Branch Instr. */
+ [C(RESULT_MISS)] = 0x00c3, /* Retired Mispredicted BI */
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+},
+[C(NODE)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0,
+ [C(RESULT_MISS)] = 0,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+},
+};
+
/*
- * AMD Performance Monitor K7 and later.
+ * AMD Performance Monitor K7 and later, up to and including Family 16h:
*/
static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] =
{
- [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
- [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
- [PERF_COUNT_HW_CACHE_REFERENCES] = 0x077d,
- [PERF_COUNT_HW_CACHE_MISSES] = 0x077e,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
- [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
- [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */
- [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x00d1, /* "Dispatch stalls" event */
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0x077d,
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x077e,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */
+ [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x00d1, /* "Dispatch stalls" event */
+};
+
+/*
+ * AMD Performance Monitor Family 17h and later:
+ */
+static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
+{
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x0287,
+ [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x0187,
};
static u64 amd_pmu_event_map(int hw_event)
{
+ if (boot_cpu_data.x86 >= 0x17)
+ return amd_f17h_perfmon_event_map[hw_event];
+
return amd_perfmon_event_map[hw_event];
}
@@ -848,9 +969,10 @@ __init int amd_pmu_init(void)
x86_pmu.amd_nb_constraints = 0;
}
- /* Events are common for all AMDs */
- memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
- sizeof(hw_cache_event_ids));
+ if (boot_cpu_data.x86 >= 0x17)
+ memcpy(hw_cache_event_ids, amd_hw_cache_event_ids_f17h, sizeof(hw_cache_event_ids));
+ else
+ memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, sizeof(hw_cache_event_ids));
return 0;
}
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index f61dcbef20ff..d35f4775d5f1 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2091,15 +2091,19 @@ static void intel_pmu_disable_event(struct perf_event *event)
cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
cpuc->intel_cp_status &= ~(1ull << hwc->idx);
- if (unlikely(event->attr.precise_ip))
- intel_pmu_pebs_disable(event);
-
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
intel_pmu_disable_fixed(hwc);
return;
}
x86_pmu_disable_event(event);
+
+ /*
+ * Needs to be called after x86_pmu_disable_event,
+ * so we don't trigger the event without PEBS bit set.
+ */
+ if (unlikely(event->attr.precise_ip))
+ intel_pmu_pebs_disable(event);
}
static void intel_pmu_del_event(struct perf_event *event)
@@ -3131,7 +3135,7 @@ static unsigned long intel_pmu_large_pebs_flags(struct perf_event *event)
flags &= ~PERF_SAMPLE_TIME;
if (!event->attr.exclude_kernel)
flags &= ~PERF_SAMPLE_REGS_USER;
- if (event->attr.sample_regs_user & ~PEBS_REGS)
+ if (event->attr.sample_regs_user & ~PEBS_GP_REGS)
flags &= ~(PERF_SAMPLE_REGS_USER | PERF_SAMPLE_REGS_INTR);
return flags;
}
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 94a4b7fc75d0..d41de9af7a39 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -76,15 +76,15 @@
* Scope: Package (physical package)
* MSR_PKG_C8_RESIDENCY: Package C8 Residency Counter.
* perf code: 0x04
- * Available model: HSW ULT,CNL
+ * Available model: HSW ULT,KBL,CNL
* Scope: Package (physical package)
* MSR_PKG_C9_RESIDENCY: Package C9 Residency Counter.
* perf code: 0x05
- * Available model: HSW ULT,CNL
+ * Available model: HSW ULT,KBL,CNL
* Scope: Package (physical package)
* MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter.
* perf code: 0x06
- * Available model: HSW ULT,GLM,CNL
+ * Available model: HSW ULT,KBL,GLM,CNL
* Scope: Package (physical package)
*
*/
@@ -566,8 +566,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_X, snb_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_MOBILE, snb_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_DESKTOP, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_MOBILE, hswult_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_DESKTOP, hswult_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_CANNONLAKE_MOBILE, cnl_cstates),
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index fb3a2f13fc70..339d7628080c 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -1525,8 +1525,7 @@ static __init int pt_init(void)
}
if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries))
- pt_pmu.pmu.capabilities =
- PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_AUX_SW_DOUBLEBUF;
+ pt_pmu.pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG;
pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE;
pt_pmu.pmu.attr_groups = pt_attr_groups;
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index a75955741c50..1e98a42b560a 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -96,25 +96,25 @@ struct amd_nb {
PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER | \
PERF_SAMPLE_PERIOD)
-#define PEBS_REGS \
- (PERF_REG_X86_AX | \
- PERF_REG_X86_BX | \
- PERF_REG_X86_CX | \
- PERF_REG_X86_DX | \
- PERF_REG_X86_DI | \
- PERF_REG_X86_SI | \
- PERF_REG_X86_SP | \
- PERF_REG_X86_BP | \
- PERF_REG_X86_IP | \
- PERF_REG_X86_FLAGS | \
- PERF_REG_X86_R8 | \
- PERF_REG_X86_R9 | \
- PERF_REG_X86_R10 | \
- PERF_REG_X86_R11 | \
- PERF_REG_X86_R12 | \
- PERF_REG_X86_R13 | \
- PERF_REG_X86_R14 | \
- PERF_REG_X86_R15)
+#define PEBS_GP_REGS \
+ ((1ULL << PERF_REG_X86_AX) | \
+ (1ULL << PERF_REG_X86_BX) | \
+ (1ULL << PERF_REG_X86_CX) | \
+ (1ULL << PERF_REG_X86_DX) | \
+ (1ULL << PERF_REG_X86_DI) | \
+ (1ULL << PERF_REG_X86_SI) | \
+ (1ULL << PERF_REG_X86_SP) | \
+ (1ULL << PERF_REG_X86_BP) | \
+ (1ULL << PERF_REG_X86_IP) | \
+ (1ULL << PERF_REG_X86_FLAGS) | \
+ (1ULL << PERF_REG_X86_R8) | \
+ (1ULL << PERF_REG_X86_R9) | \
+ (1ULL << PERF_REG_X86_R10) | \
+ (1ULL << PERF_REG_X86_R11) | \
+ (1ULL << PERF_REG_X86_R12) | \
+ (1ULL << PERF_REG_X86_R13) | \
+ (1ULL << PERF_REG_X86_R14) | \
+ (1ULL << PERF_REG_X86_R15))
/*
* Per register state.
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 321fe5f5d0e9..4d5fcd47ab75 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -61,9 +61,8 @@
} while (0)
#define RELOAD_SEG(seg) { \
- unsigned int pre = GET_SEG(seg); \
+ unsigned int pre = (seg) | 3; \
unsigned int cur = get_user_seg(seg); \
- pre |= 3; \
if (pre != cur) \
set_user_seg(seg, pre); \
}
@@ -72,6 +71,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
struct sigcontext_32 __user *sc)
{
unsigned int tmpflags, err = 0;
+ u16 gs, fs, es, ds;
void __user *buf;
u32 tmp;
@@ -79,16 +79,10 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
current->restart_block.fn = do_no_restart_syscall;
get_user_try {
- /*
- * Reload fs and gs if they have changed in the signal
- * handler. This does not handle long fs/gs base changes in
- * the handler, but does not clobber them at least in the
- * normal case.
- */
- RELOAD_SEG(gs);
- RELOAD_SEG(fs);
- RELOAD_SEG(ds);
- RELOAD_SEG(es);
+ gs = GET_SEG(gs);
+ fs = GET_SEG(fs);
+ ds = GET_SEG(ds);
+ es = GET_SEG(es);
COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
COPY(dx); COPY(cx); COPY(ip); COPY(ax);
@@ -106,6 +100,17 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
buf = compat_ptr(tmp);
} get_user_catch(err);
+ /*
+ * Reload fs and gs if they have changed in the signal
+ * handler. This does not handle long fs/gs base changes in
+ * the handler, but does not clobber them at least in the
+ * normal case.
+ */
+ RELOAD_SEG(gs);
+ RELOAD_SEG(fs);
+ RELOAD_SEG(ds);
+ RELOAD_SEG(es);
+
err |= fpu__restore_sig(buf, 1);
force_iret();
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
index 31b627b43a8e..464034db299f 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -20,6 +20,17 @@
#endif
/*
+ * objtool annotation to ignore the alternatives and only consider the original
+ * instruction(s).
+ */
+.macro ANNOTATE_IGNORE_ALTERNATIVE
+ .Lannotate_\@:
+ .pushsection .discard.ignore_alts
+ .long .Lannotate_\@ - .
+ .popsection
+.endm
+
+/*
* Issue one struct alt_instr descriptor entry (need to put it into
* the section .altinstructions, see below). This entry contains
* enough information for the alternatives patching code to patch an
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 4c74073a19cc..094fbc9c0b1c 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -45,6 +45,16 @@
#define LOCK_PREFIX ""
#endif
+/*
+ * objtool annotation to ignore the alternatives and only consider the original
+ * instruction(s).
+ */
+#define ANNOTATE_IGNORE_ALTERNATIVE \
+ "999:\n\t" \
+ ".pushsection .discard.ignore_alts\n\t" \
+ ".long 999b - .\n\t" \
+ ".popsection\n\t"
+
struct alt_instr {
s32 instr_offset; /* original instruction */
s32 repl_offset; /* offset to replacement instruction */
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 6467757bb39f..3ff577c0b102 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -148,30 +148,6 @@
_ASM_PTR (entry); \
.popsection
-.macro ALIGN_DESTINATION
- /* check for bad alignment of destination */
- movl %edi,%ecx
- andl $7,%ecx
- jz 102f /* already aligned */
- subl $8,%ecx
- negl %ecx
- subl %ecx,%edx
-100: movb (%rsi),%al
-101: movb %al,(%rdi)
- incq %rsi
- incq %rdi
- decl %ecx
- jnz 100b
-102:
- .section .fixup,"ax"
-103: addl %ecx,%edx /* ecx is zerorest also */
- jmp copy_user_handle_tail
- .previous
-
- _ASM_EXTABLE_UA(100b, 103b)
- _ASM_EXTABLE_UA(101b, 103b)
- .endm
-
#else
# define _EXPAND_EXTABLE_HANDLE(x) #x
# define _ASM_EXTABLE_HANDLE(from, to, handler) \
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 93c4bf598fb0..feab24cac610 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -226,7 +226,9 @@ struct x86_emulate_ops {
unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt);
void (*set_hflags)(struct x86_emulate_ctxt *ctxt, unsigned hflags);
- int (*pre_leave_smm)(struct x86_emulate_ctxt *ctxt, u64 smbase);
+ int (*pre_leave_smm)(struct x86_emulate_ctxt *ctxt,
+ const char *smstate);
+ void (*post_leave_smm)(struct x86_emulate_ctxt *ctxt);
};
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 159b5988292f..c79abe7ca093 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -126,7 +126,7 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
}
#define KVM_PERMILLE_MMU_PAGES 20
-#define KVM_MIN_ALLOC_MMU_PAGES 64
+#define KVM_MIN_ALLOC_MMU_PAGES 64UL
#define KVM_MMU_HASH_SHIFT 12
#define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT)
#define KVM_MIN_FREE_MMU_PAGES 5
@@ -295,6 +295,7 @@ union kvm_mmu_extended_role {
unsigned int valid:1;
unsigned int execonly:1;
unsigned int cr0_pg:1;
+ unsigned int cr4_pae:1;
unsigned int cr4_pse:1;
unsigned int cr4_pke:1;
unsigned int cr4_smap:1;
@@ -844,9 +845,9 @@ enum kvm_irqchip_mode {
};
struct kvm_arch {
- unsigned int n_used_mmu_pages;
- unsigned int n_requested_mmu_pages;
- unsigned int n_max_mmu_pages;
+ unsigned long n_used_mmu_pages;
+ unsigned long n_requested_mmu_pages;
+ unsigned long n_max_mmu_pages;
unsigned int indirect_shadow_pages;
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
/*
@@ -1182,7 +1183,7 @@ struct kvm_x86_ops {
int (*smi_allowed)(struct kvm_vcpu *vcpu);
int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
- int (*pre_leave_smm)(struct kvm_vcpu *vcpu, u64 smbase);
+ int (*pre_leave_smm)(struct kvm_vcpu *vcpu, const char *smstate);
int (*enable_smi_window)(struct kvm_vcpu *vcpu);
int (*mem_enc_op)(struct kvm *kvm, void __user *argp);
@@ -1256,8 +1257,8 @@ void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
gfn_t gfn_offset, unsigned long mask);
void kvm_mmu_zap_all(struct kvm *kvm);
void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen);
-unsigned int kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm);
-void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
+unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm);
+void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages);
int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3);
bool pdptrs_changed(struct kvm_vcpu *vcpu);
@@ -1592,4 +1593,7 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
#define put_smstate(type, buf, offset, val) \
*(type *)((buf) + (offset) - 0x7e00) = val
+#define GET_SMSTATE(type, buf, offset) \
+ (*(type *)((buf) + (offset) - 0x7e00))
+
#endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index dad12b767ba0..daf25b60c9e3 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -11,6 +11,15 @@
#include <asm/msr-index.h>
/*
+ * This should be used immediately before a retpoline alternative. It tells
+ * objtool where the retpolines are so that it can make sense of the control
+ * flow by just reading the original instruction(s) and ignoring the
+ * alternatives.
+ */
+#define ANNOTATE_NOSPEC_ALTERNATIVE \
+ ANNOTATE_IGNORE_ALTERNATIVE
+
+/*
* Fill the CPU return stack buffer.
*
* Each entry in the RSB, if used for a speculative 'ret', contains an
@@ -57,19 +66,6 @@
#ifdef __ASSEMBLY__
/*
- * This should be used immediately before a retpoline alternative. It tells
- * objtool where the retpolines are so that it can make sense of the control
- * flow by just reading the original instruction(s) and ignoring the
- * alternatives.
- */
-.macro ANNOTATE_NOSPEC_ALTERNATIVE
- .Lannotate_\@:
- .pushsection .discard.nospec
- .long .Lannotate_\@ - .
- .popsection
-.endm
-
-/*
* This should be used immediately before an indirect jump/call. It tells
* objtool the subsequent indirect jump/call is vouched safe for retpoline
* builds.
@@ -152,12 +148,6 @@
#else /* __ASSEMBLY__ */
-#define ANNOTATE_NOSPEC_ALTERNATIVE \
- "999:\n\t" \
- ".pushsection .discard.nospec\n\t" \
- ".long 999b - .\n\t" \
- ".popsection\n\t"
-
#define ANNOTATE_RETPOLINE_SAFE \
"999:\n\t" \
".pushsection .discard.retpoline_safe\n\t" \
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 2779ace16d23..50b3e2d963c9 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -46,7 +46,7 @@ void ptdump_walk_user_pgd_level_checkwx(void);
*/
extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
__visible;
-#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+#define ZERO_PAGE(vaddr) ((void)(vaddr),virt_to_page(empty_zero_page))
extern spinlock_t pgd_lock;
extern struct list_head pgd_list;
diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h
index db333300bd4b..f94a7d0ddd49 100644
--- a/arch/x86/include/asm/smap.h
+++ b/arch/x86/include/asm/smap.h
@@ -13,13 +13,12 @@
#ifndef _ASM_X86_SMAP_H
#define _ASM_X86_SMAP_H
-#include <linux/stringify.h>
#include <asm/nops.h>
#include <asm/cpufeatures.h>
/* "Raw" instruction opcodes */
-#define __ASM_CLAC .byte 0x0f,0x01,0xca
-#define __ASM_STAC .byte 0x0f,0x01,0xcb
+#define __ASM_CLAC ".byte 0x0f,0x01,0xca"
+#define __ASM_STAC ".byte 0x0f,0x01,0xcb"
#ifdef __ASSEMBLY__
@@ -28,10 +27,10 @@
#ifdef CONFIG_X86_SMAP
#define ASM_CLAC \
- ALTERNATIVE "", __stringify(__ASM_CLAC), X86_FEATURE_SMAP
+ ALTERNATIVE "", __ASM_CLAC, X86_FEATURE_SMAP
#define ASM_STAC \
- ALTERNATIVE "", __stringify(__ASM_STAC), X86_FEATURE_SMAP
+ ALTERNATIVE "", __ASM_STAC, X86_FEATURE_SMAP
#else /* CONFIG_X86_SMAP */
@@ -49,26 +48,46 @@
static __always_inline void clac(void)
{
/* Note: a barrier is implicit in alternative() */
- alternative("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP);
+ alternative("", __ASM_CLAC, X86_FEATURE_SMAP);
}
static __always_inline void stac(void)
{
/* Note: a barrier is implicit in alternative() */
- alternative("", __stringify(__ASM_STAC), X86_FEATURE_SMAP);
+ alternative("", __ASM_STAC, X86_FEATURE_SMAP);
+}
+
+static __always_inline unsigned long smap_save(void)
+{
+ unsigned long flags;
+
+ asm volatile (ALTERNATIVE("", "pushf; pop %0; " __ASM_CLAC,
+ X86_FEATURE_SMAP)
+ : "=rm" (flags) : : "memory", "cc");
+
+ return flags;
+}
+
+static __always_inline void smap_restore(unsigned long flags)
+{
+ asm volatile (ALTERNATIVE("", "push %0; popf", X86_FEATURE_SMAP)
+ : : "g" (flags) : "memory", "cc");
}
/* These macros can be used in asm() statements */
#define ASM_CLAC \
- ALTERNATIVE("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP)
+ ALTERNATIVE("", __ASM_CLAC, X86_FEATURE_SMAP)
#define ASM_STAC \
- ALTERNATIVE("", __stringify(__ASM_STAC), X86_FEATURE_SMAP)
+ ALTERNATIVE("", __ASM_STAC, X86_FEATURE_SMAP)
#else /* CONFIG_X86_SMAP */
static inline void clac(void) { }
static inline void stac(void) { }
+static inline unsigned long smap_save(void) { return 0; }
+static inline void smap_restore(unsigned long flags) { }
+
#define ASM_CLAC
#define ASM_STAC
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 7cf1a270d891..18a4b6890fa8 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -46,6 +46,7 @@ struct inactive_task_frame {
unsigned long r13;
unsigned long r12;
#else
+ unsigned long flags;
unsigned long si;
unsigned long di;
#endif
diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
index 404b8b1d44f5..f23e7aaff4cd 100644
--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
@@ -6,6 +6,7 @@
#define tlb_end_vma(tlb, vma) do { } while (0)
#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
+#define tlb_flush tlb_flush
static inline void tlb_flush(struct mmu_gather *tlb);
#include <asm-generic/tlb.h>
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 1954dd5552a2..bb21913885a3 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -427,10 +427,11 @@ do { \
({ \
__label__ __pu_label; \
int __pu_err = -EFAULT; \
- __typeof__(*(ptr)) __pu_val; \
- __pu_val = x; \
+ __typeof__(*(ptr)) __pu_val = (x); \
+ __typeof__(ptr) __pu_ptr = (ptr); \
+ __typeof__(size) __pu_size = (size); \
__uaccess_begin(); \
- __put_user_size(__pu_val, (ptr), (size), __pu_label); \
+ __put_user_size(__pu_val, __pu_ptr, __pu_size, __pu_label); \
__pu_err = 0; \
__pu_label: \
__uaccess_end(); \
@@ -705,7 +706,7 @@ extern struct movsl_mask {
* checking before using them, but you have to surround them with the
* user_access_begin/end() pair.
*/
-static __must_check inline bool user_access_begin(const void __user *ptr, size_t len)
+static __must_check __always_inline bool user_access_begin(const void __user *ptr, size_t len)
{
if (unlikely(!access_ok(ptr,len)))
return 0;
@@ -715,6 +716,9 @@ static __must_check inline bool user_access_begin(const void __user *ptr, size_t
#define user_access_begin(a,b) user_access_begin(a,b)
#define user_access_end() __uaccess_end()
+#define user_access_save() smap_save()
+#define user_access_restore(x) smap_restore(x)
+
#define unsafe_put_user(x, ptr, label) \
__put_user_size((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), label)
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index a9d637bc301d..5cd1caa8bc65 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -208,9 +208,6 @@ __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size)
}
unsigned long
-copy_user_handle_tail(char *to, char *from, unsigned len);
-
-unsigned long
mcsafe_handle_tail(char *to, char *from, unsigned len);
#endif /* _ASM_X86_UACCESS_64_H */
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 2863c2026655..d50c7b747d8b 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -217,6 +217,22 @@ xen_single_call(unsigned int call,
return (long)__res;
}
+static __always_inline void __xen_stac(void)
+{
+ /*
+ * Suppress objtool seeing the STAC/CLAC and getting confused about it
+ * calling random code with AC=1.
+ */
+ asm volatile(ANNOTATE_IGNORE_ALTERNATIVE
+ ASM_STAC ::: "memory", "flags");
+}
+
+static __always_inline void __xen_clac(void)
+{
+ asm volatile(ANNOTATE_IGNORE_ALTERNATIVE
+ ASM_CLAC ::: "memory", "flags");
+}
+
static inline long
privcmd_call(unsigned int call,
unsigned long a1, unsigned long a2,
@@ -225,9 +241,9 @@ privcmd_call(unsigned int call,
{
long res;
- stac();
+ __xen_stac();
res = xen_single_call(call, a1, a2, a3, a4, a5);
- clac();
+ __xen_clac();
return res;
}
@@ -424,9 +440,9 @@ HYPERVISOR_dm_op(
domid_t dom, unsigned int nr_bufs, struct xen_dm_op_buf *bufs)
{
int ret;
- stac();
+ __xen_stac();
ret = _hypercall3(int, dm_op, dom, nr_bufs, bufs);
- clac();
+ __xen_clac();
return ret;
}
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index dabfcf7c3941..7a0e64ccd6ff 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -381,6 +381,7 @@ struct kvm_sync_regs {
#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0)
#define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1)
#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2)
+#define KVM_X86_QUIRK_OUT_7E_INC_RIP (1 << 3)
#define KVM_STATE_NESTED_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_RUN_PENDING 0x00000002
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index f0b0c90dd398..d213ec5c3766 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -146,6 +146,7 @@
#define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1
#define VMX_ABORT_LOAD_HOST_PDPTE_FAIL 2
+#define VMX_ABORT_VMCS_CORRUPTED 3
#define VMX_ABORT_LOAD_HOST_MSR_FAIL 4
#endif /* _UAPIVMX_H */
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 2da82eff0eb4..29630393f300 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -275,7 +275,7 @@ static const struct {
const char *option;
enum spectre_v2_user_cmd cmd;
bool secure;
-} v2_user_options[] __initdata = {
+} v2_user_options[] __initconst = {
{ "auto", SPECTRE_V2_USER_CMD_AUTO, false },
{ "off", SPECTRE_V2_USER_CMD_NONE, false },
{ "on", SPECTRE_V2_USER_CMD_FORCE, true },
@@ -419,7 +419,7 @@ static const struct {
const char *option;
enum spectre_v2_mitigation_cmd cmd;
bool secure;
-} mitigation_options[] __initdata = {
+} mitigation_options[] __initconst = {
{ "off", SPECTRE_V2_CMD_NONE, false },
{ "on", SPECTRE_V2_CMD_FORCE, true },
{ "retpoline", SPECTRE_V2_CMD_RETPOLINE, false },
@@ -440,7 +440,8 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
char arg[20];
int ret, i;
- if (cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
+ if (cmdline_find_option_bool(boot_command_line, "nospectre_v2") ||
+ cpu_mitigations_off())
return SPECTRE_V2_CMD_NONE;
ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg));
@@ -658,7 +659,7 @@ static const char * const ssb_strings[] = {
static const struct {
const char *option;
enum ssb_mitigation_cmd cmd;
-} ssb_mitigation_options[] __initdata = {
+} ssb_mitigation_options[] __initconst = {
{ "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */
{ "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */
{ "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */
@@ -672,7 +673,8 @@ static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void)
char arg[20];
int ret, i;
- if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable")) {
+ if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable") ||
+ cpu_mitigations_off()) {
return SPEC_STORE_BYPASS_CMD_NONE;
} else {
ret = cmdline_find_option(boot_command_line, "spec_store_bypass_disable",
@@ -1008,6 +1010,11 @@ static void __init l1tf_select_mitigation(void)
if (!boot_cpu_has_bug(X86_BUG_L1TF))
return;
+ if (cpu_mitigations_off())
+ l1tf_mitigation = L1TF_MITIGATION_OFF;
+ else if (cpu_mitigations_auto_nosmt())
+ l1tf_mitigation = L1TF_MITIGATION_FLUSH_NOSMT;
+
override_cache_bits(&boot_cpu_data);
switch (l1tf_mitigation) {
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index fc3c07fe7df5..3142fd7a9b32 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -611,8 +611,8 @@ static void init_intel_energy_perf(struct cpuinfo_x86 *c)
if ((epb & 0xF) != ENERGY_PERF_BIAS_PERFORMANCE)
return;
- pr_warn_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n");
- pr_warn_once("ENERGY_PERF_BIAS: View and update with x86_energy_perf_policy(8)\n");
+ pr_info_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n");
+ pr_info_once("ENERGY_PERF_BIAS: View and update with x86_energy_perf_policy(8)\n");
epb = (epb & ~0xF) | ENERGY_PERF_BIAS_NORMAL;
wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
}
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 54b9eef3eea9..85212a32b54d 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -2610,9 +2610,10 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
rdt_last_cmd_puts("Failed to initialize allocations\n");
return ret;
}
- rdtgrp->mode = RDT_MODE_SHAREABLE;
}
+ rdtgrp->mode = RDT_MODE_SHAREABLE;
+
return 0;
}
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index a034cb808e7e..fed46ddb1eef 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -569,6 +569,7 @@ void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
unsigned long *sara = stack_addr(regs);
ri->ret_addr = (kprobe_opcode_t *) *sara;
+ ri->fp = sara;
/* Replace the return addr with trampoline addr */
*sara = (unsigned long) &kretprobe_trampoline;
@@ -748,26 +749,48 @@ asm(
NOKPROBE_SYMBOL(kretprobe_trampoline);
STACK_FRAME_NON_STANDARD(kretprobe_trampoline);
+static struct kprobe kretprobe_kprobe = {
+ .addr = (void *)kretprobe_trampoline,
+};
+
/*
* Called from kretprobe_trampoline
*/
static __used void *trampoline_handler(struct pt_regs *regs)
{
+ struct kprobe_ctlblk *kcb;
struct kretprobe_instance *ri = NULL;
struct hlist_head *head, empty_rp;
struct hlist_node *tmp;
unsigned long flags, orig_ret_address = 0;
unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
kprobe_opcode_t *correct_ret_addr = NULL;
+ void *frame_pointer;
+ bool skipped = false;
+
+ preempt_disable();
+
+ /*
+ * Set a dummy kprobe for avoiding kretprobe recursion.
+ * Since kretprobe never run in kprobe handler, kprobe must not
+ * be running at this point.
+ */
+ kcb = get_kprobe_ctlblk();
+ __this_cpu_write(current_kprobe, &kretprobe_kprobe);
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
INIT_HLIST_HEAD(&empty_rp);
kretprobe_hash_lock(current, &head, &flags);
/* fixup registers */
#ifdef CONFIG_X86_64
regs->cs = __KERNEL_CS;
+ /* On x86-64, we use pt_regs->sp for return address holder. */
+ frame_pointer = &regs->sp;
#else
regs->cs = __KERNEL_CS | get_kernel_rpl();
regs->gs = 0;
+ /* On x86-32, we use pt_regs->flags for return address holder. */
+ frame_pointer = &regs->flags;
#endif
regs->ip = trampoline_address;
regs->orig_ax = ~0UL;
@@ -789,8 +812,25 @@ static __used void *trampoline_handler(struct pt_regs *regs)
if (ri->task != current)
/* another task is sharing our hash bucket */
continue;
+ /*
+ * Return probes must be pushed on this hash list correct
+ * order (same as return order) so that it can be poped
+ * correctly. However, if we find it is pushed it incorrect
+ * order, this means we find a function which should not be
+ * probed, because the wrong order entry is pushed on the
+ * path of processing other kretprobe itself.
+ */
+ if (ri->fp != frame_pointer) {
+ if (!skipped)
+ pr_warn("kretprobe is stacked incorrectly. Trying to fixup.\n");
+ skipped = true;
+ continue;
+ }
orig_ret_address = (unsigned long)ri->ret_addr;
+ if (skipped)
+ pr_warn("%ps must be blacklisted because of incorrect kretprobe order\n",
+ ri->rp->kp.addr);
if (orig_ret_address != trampoline_address)
/*
@@ -808,14 +848,15 @@ static __used void *trampoline_handler(struct pt_regs *regs)
if (ri->task != current)
/* another task is sharing our hash bucket */
continue;
+ if (ri->fp != frame_pointer)
+ continue;
orig_ret_address = (unsigned long)ri->ret_addr;
if (ri->rp && ri->rp->handler) {
__this_cpu_write(current_kprobe, &ri->rp->kp);
- get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
ri->ret_addr = correct_ret_addr;
ri->rp->handler(ri, regs);
- __this_cpu_write(current_kprobe, NULL);
+ __this_cpu_write(current_kprobe, &kretprobe_kprobe);
}
recycle_rp_inst(ri, &empty_rp);
@@ -831,6 +872,9 @@ static __used void *trampoline_handler(struct pt_regs *regs)
kretprobe_hash_unlock(current, &flags);
+ __this_cpu_write(current_kprobe, NULL);
+ preempt_enable();
+
hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
hlist_del(&ri->hlist);
kfree(ri);
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 58ac7be52c7a..957eae13b370 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -426,6 +426,8 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp,
u64 msr = x86_spec_ctrl_base;
bool updmsr = false;
+ lockdep_assert_irqs_disabled();
+
/*
* If TIF_SSBD is different, select the proper mitigation
* method. Note that if SSBD mitigation is disabled or permanentely
@@ -477,10 +479,12 @@ static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk)
void speculation_ctrl_update(unsigned long tif)
{
+ unsigned long flags;
+
/* Forced update. Make sure all relevant TIF flags are different */
- preempt_disable();
+ local_irq_save(flags);
__speculation_ctrl_update(~tif, tif);
- preempt_enable();
+ local_irq_restore(flags);
}
/* Called from seccomp/prctl update */
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index e471d8e6f0b2..70933193878c 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -127,6 +127,13 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
struct task_struct *tsk;
int err;
+ /*
+ * For a new task use the RESET flags value since there is no before.
+ * All the status flags are zero; DF and all the system flags must also
+ * be 0, specifically IF must be 0 because we context switch to the new
+ * task with interrupts disabled.
+ */
+ frame->flags = X86_EFLAGS_FIXED;
frame->bp = 0;
frame->ret_addr = (unsigned long) ret_from_fork;
p->thread.sp = (unsigned long) fork_frame;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 6a62f4af9fcf..844a28b29967 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -392,6 +392,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
childregs = task_pt_regs(p);
fork_frame = container_of(childregs, struct fork_frame, regs);
frame = &fork_frame->frame;
+
frame->bp = 0;
frame->ret_addr = (unsigned long) ret_from_fork;
p->thread.sp = (unsigned long) fork_frame;
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 725624b6c0c0..8fd3cedd9acc 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -81,6 +81,19 @@ static int __init set_bios_reboot(const struct dmi_system_id *d)
return 0;
}
+/*
+ * Some machines don't handle the default ACPI reboot method and
+ * require the EFI reboot method:
+ */
+static int __init set_efi_reboot(const struct dmi_system_id *d)
+{
+ if (reboot_type != BOOT_EFI && !efi_runtime_disabled()) {
+ reboot_type = BOOT_EFI;
+ pr_info("%s series board detected. Selecting EFI-method for reboot.\n", d->ident);
+ }
+ return 0;
+}
+
void __noreturn machine_real_restart(unsigned int type)
{
local_irq_disable();
@@ -166,6 +179,14 @@ static const struct dmi_system_id reboot_dmi_table[] __initconst = {
DMI_MATCH(DMI_PRODUCT_NAME, "AOA110"),
},
},
+ { /* Handle reboot issue on Acer TravelMate X514-51T */
+ .callback = set_efi_reboot,
+ .ident = "Acer TravelMate X514-51T",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate X514-51T"),
+ },
+ },
/* Apple */
{ /* Handle problems with rebooting on Apple MacBook5 */
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 08dfd4c1a4f9..dff90fb6a9af 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -132,16 +132,6 @@ static int restore_sigcontext(struct pt_regs *regs,
COPY_SEG_CPL3(cs);
COPY_SEG_CPL3(ss);
-#ifdef CONFIG_X86_64
- /*
- * Fix up SS if needed for the benefit of old DOSEMU and
- * CRIU.
- */
- if (unlikely(!(uc_flags & UC_STRICT_RESTORE_SS) &&
- user_64bit_mode(regs)))
- force_valid_ss(regs);
-#endif
-
get_user_ex(tmpflags, &sc->flags);
regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
regs->orig_ax = -1; /* disable syscall checks */
@@ -150,6 +140,15 @@ static int restore_sigcontext(struct pt_regs *regs,
buf = (void __user *)buf_val;
} get_user_catch(err);
+#ifdef CONFIG_X86_64
+ /*
+ * Fix up SS if needed for the benefit of old DOSEMU and
+ * CRIU.
+ */
+ if (unlikely(!(uc_flags & UC_STRICT_RESTORE_SS) && user_64bit_mode(regs)))
+ force_valid_ss(regs);
+#endif
+
err |= fpu__restore_sig(buf, IS_ENABLED(CONFIG_X86_32));
force_iret();
@@ -461,6 +460,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
{
struct rt_sigframe __user *frame;
void __user *fp = NULL;
+ unsigned long uc_flags;
int err = 0;
frame = get_sigframe(&ksig->ka, regs, sizeof(struct rt_sigframe), &fp);
@@ -473,9 +473,11 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
return -EFAULT;
}
+ uc_flags = frame_uc_flags(regs);
+
put_user_try {
/* Create the ucontext. */
- put_user_ex(frame_uc_flags(regs), &frame->uc.uc_flags);
+ put_user_ex(uc_flags, &frame->uc.uc_flags);
put_user_ex(0, &frame->uc.uc_link);
save_altstack_ex(&frame->uc.uc_stack, regs->sp);
@@ -541,6 +543,7 @@ static int x32_setup_rt_frame(struct ksignal *ksig,
{
#ifdef CONFIG_X86_X32_ABI
struct rt_sigframe_x32 __user *frame;
+ unsigned long uc_flags;
void __user *restorer;
int err = 0;
void __user *fpstate = NULL;
@@ -555,9 +558,11 @@ static int x32_setup_rt_frame(struct ksignal *ksig,
return -EFAULT;
}
+ uc_flags = frame_uc_flags(regs);
+
put_user_try {
/* Create the ucontext. */
- put_user_ex(frame_uc_flags(regs), &frame->uc.uc_flags);
+ put_user_ex(uc_flags, &frame->uc.uc_flags);
put_user_ex(0, &frame->uc.uc_link);
compat_save_altstack_ex(&frame->uc.uc_stack, regs->sp);
put_user_ex(0, &frame->uc.uc__pad0);
@@ -688,10 +693,7 @@ setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
sigset_t *set = sigmask_to_save();
compat_sigset_t *cset = (compat_sigset_t *) set;
- /*
- * Increment event counter and perform fixup for the pre-signal
- * frame.
- */
+ /* Perform fixup for the pre-signal frame. */
rseq_signal_deliver(ksig, regs);
/* Set up the stack frame */
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index bad8c51fee6e..a5127b2c195f 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -362,7 +362,7 @@ SECTIONS
.bss : AT(ADDR(.bss) - LOAD_OFFSET) {
__bss_start = .;
*(.bss..page_aligned)
- *(.bss)
+ *(BSS_MAIN)
BSS_DECRYPTED
. = ALIGN(PAGE_SIZE);
__bss_stop = .;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index c338984c850d..d0d5dd44b4f4 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2331,24 +2331,18 @@ static int em_lseg(struct x86_emulate_ctxt *ctxt)
static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
{
+#ifdef CONFIG_X86_64
u32 eax, ebx, ecx, edx;
eax = 0x80000001;
ecx = 0;
ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false);
return edx & bit(X86_FEATURE_LM);
+#else
+ return false;
+#endif
}
-#define GET_SMSTATE(type, smbase, offset) \
- ({ \
- type __val; \
- int r = ctxt->ops->read_phys(ctxt, smbase + offset, &__val, \
- sizeof(__val)); \
- if (r != X86EMUL_CONTINUE) \
- return X86EMUL_UNHANDLEABLE; \
- __val; \
- })
-
static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags)
{
desc->g = (flags >> 23) & 1;
@@ -2361,27 +2355,30 @@ static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags)
desc->type = (flags >> 8) & 15;
}
-static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
+static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, const char *smstate,
+ int n)
{
struct desc_struct desc;
int offset;
u16 selector;
- selector = GET_SMSTATE(u32, smbase, 0x7fa8 + n * 4);
+ selector = GET_SMSTATE(u32, smstate, 0x7fa8 + n * 4);
if (n < 3)
offset = 0x7f84 + n * 12;
else
offset = 0x7f2c + (n - 3) * 12;
- set_desc_base(&desc, GET_SMSTATE(u32, smbase, offset + 8));
- set_desc_limit(&desc, GET_SMSTATE(u32, smbase, offset + 4));
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, offset));
+ set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8));
+ set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4));
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, offset));
ctxt->ops->set_segment(ctxt, selector, &desc, 0, n);
return X86EMUL_CONTINUE;
}
-static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
+#ifdef CONFIG_X86_64
+static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, const char *smstate,
+ int n)
{
struct desc_struct desc;
int offset;
@@ -2390,15 +2387,16 @@ static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
offset = 0x7e00 + n * 16;
- selector = GET_SMSTATE(u16, smbase, offset);
- rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smbase, offset + 2) << 8);
- set_desc_limit(&desc, GET_SMSTATE(u32, smbase, offset + 4));
- set_desc_base(&desc, GET_SMSTATE(u32, smbase, offset + 8));
- base3 = GET_SMSTATE(u32, smbase, offset + 12);
+ selector = GET_SMSTATE(u16, smstate, offset);
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smstate, offset + 2) << 8);
+ set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4));
+ set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8));
+ base3 = GET_SMSTATE(u32, smstate, offset + 12);
ctxt->ops->set_segment(ctxt, selector, &desc, base3, n);
return X86EMUL_CONTINUE;
}
+#endif
static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
u64 cr0, u64 cr3, u64 cr4)
@@ -2445,7 +2443,8 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
return X86EMUL_CONTINUE;
}
-static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
+static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
+ const char *smstate)
{
struct desc_struct desc;
struct desc_ptr dt;
@@ -2453,53 +2452,55 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
u32 val, cr0, cr3, cr4;
int i;
- cr0 = GET_SMSTATE(u32, smbase, 0x7ffc);
- cr3 = GET_SMSTATE(u32, smbase, 0x7ff8);
- ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED;
- ctxt->_eip = GET_SMSTATE(u32, smbase, 0x7ff0);
+ cr0 = GET_SMSTATE(u32, smstate, 0x7ffc);
+ cr3 = GET_SMSTATE(u32, smstate, 0x7ff8);
+ ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7ff4) | X86_EFLAGS_FIXED;
+ ctxt->_eip = GET_SMSTATE(u32, smstate, 0x7ff0);
for (i = 0; i < 8; i++)
- *reg_write(ctxt, i) = GET_SMSTATE(u32, smbase, 0x7fd0 + i * 4);
+ *reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4);
- val = GET_SMSTATE(u32, smbase, 0x7fcc);
+ val = GET_SMSTATE(u32, smstate, 0x7fcc);
ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1);
- val = GET_SMSTATE(u32, smbase, 0x7fc8);
+ val = GET_SMSTATE(u32, smstate, 0x7fc8);
ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
- selector = GET_SMSTATE(u32, smbase, 0x7fc4);
- set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7f64));
- set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7f60));
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7f5c));
+ selector = GET_SMSTATE(u32, smstate, 0x7fc4);
+ set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f64));
+ set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f60));
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f5c));
ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR);
- selector = GET_SMSTATE(u32, smbase, 0x7fc0);
- set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7f80));
- set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7f7c));
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7f78));
+ selector = GET_SMSTATE(u32, smstate, 0x7fc0);
+ set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f80));
+ set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f7c));
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f78));
ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR);
- dt.address = GET_SMSTATE(u32, smbase, 0x7f74);
- dt.size = GET_SMSTATE(u32, smbase, 0x7f70);
+ dt.address = GET_SMSTATE(u32, smstate, 0x7f74);
+ dt.size = GET_SMSTATE(u32, smstate, 0x7f70);
ctxt->ops->set_gdt(ctxt, &dt);
- dt.address = GET_SMSTATE(u32, smbase, 0x7f58);
- dt.size = GET_SMSTATE(u32, smbase, 0x7f54);
+ dt.address = GET_SMSTATE(u32, smstate, 0x7f58);
+ dt.size = GET_SMSTATE(u32, smstate, 0x7f54);
ctxt->ops->set_idt(ctxt, &dt);
for (i = 0; i < 6; i++) {
- int r = rsm_load_seg_32(ctxt, smbase, i);
+ int r = rsm_load_seg_32(ctxt, smstate, i);
if (r != X86EMUL_CONTINUE)
return r;
}
- cr4 = GET_SMSTATE(u32, smbase, 0x7f14);
+ cr4 = GET_SMSTATE(u32, smstate, 0x7f14);
- ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8));
+ ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7ef8));
return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
}
-static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
+#ifdef CONFIG_X86_64
+static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
+ const char *smstate)
{
struct desc_struct desc;
struct desc_ptr dt;
@@ -2509,43 +2510,43 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
int i, r;
for (i = 0; i < 16; i++)
- *reg_write(ctxt, i) = GET_SMSTATE(u64, smbase, 0x7ff8 - i * 8);
+ *reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8);
- ctxt->_eip = GET_SMSTATE(u64, smbase, 0x7f78);
- ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7f70) | X86_EFLAGS_FIXED;
+ ctxt->_eip = GET_SMSTATE(u64, smstate, 0x7f78);
+ ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7f70) | X86_EFLAGS_FIXED;
- val = GET_SMSTATE(u32, smbase, 0x7f68);
+ val = GET_SMSTATE(u32, smstate, 0x7f68);
ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1);
- val = GET_SMSTATE(u32, smbase, 0x7f60);
+ val = GET_SMSTATE(u32, smstate, 0x7f60);
ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
- cr0 = GET_SMSTATE(u64, smbase, 0x7f58);
- cr3 = GET_SMSTATE(u64, smbase, 0x7f50);
- cr4 = GET_SMSTATE(u64, smbase, 0x7f48);
- ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00));
- val = GET_SMSTATE(u64, smbase, 0x7ed0);
+ cr0 = GET_SMSTATE(u64, smstate, 0x7f58);
+ cr3 = GET_SMSTATE(u64, smstate, 0x7f50);
+ cr4 = GET_SMSTATE(u64, smstate, 0x7f48);
+ ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7f00));
+ val = GET_SMSTATE(u64, smstate, 0x7ed0);
ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA);
- selector = GET_SMSTATE(u32, smbase, 0x7e90);
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7e92) << 8);
- set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7e94));
- set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7e98));
- base3 = GET_SMSTATE(u32, smbase, 0x7e9c);
+ selector = GET_SMSTATE(u32, smstate, 0x7e90);
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e92) << 8);
+ set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e94));
+ set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e98));
+ base3 = GET_SMSTATE(u32, smstate, 0x7e9c);
ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR);
- dt.size = GET_SMSTATE(u32, smbase, 0x7e84);
- dt.address = GET_SMSTATE(u64, smbase, 0x7e88);
+ dt.size = GET_SMSTATE(u32, smstate, 0x7e84);
+ dt.address = GET_SMSTATE(u64, smstate, 0x7e88);
ctxt->ops->set_idt(ctxt, &dt);
- selector = GET_SMSTATE(u32, smbase, 0x7e70);
- rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7e72) << 8);
- set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7e74));
- set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7e78));
- base3 = GET_SMSTATE(u32, smbase, 0x7e7c);
+ selector = GET_SMSTATE(u32, smstate, 0x7e70);
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e72) << 8);
+ set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e74));
+ set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e78));
+ base3 = GET_SMSTATE(u32, smstate, 0x7e7c);
ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR);
- dt.size = GET_SMSTATE(u32, smbase, 0x7e64);
- dt.address = GET_SMSTATE(u64, smbase, 0x7e68);
+ dt.size = GET_SMSTATE(u32, smstate, 0x7e64);
+ dt.address = GET_SMSTATE(u64, smstate, 0x7e68);
ctxt->ops->set_gdt(ctxt, &dt);
r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
@@ -2553,37 +2554,49 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
return r;
for (i = 0; i < 6; i++) {
- r = rsm_load_seg_64(ctxt, smbase, i);
+ r = rsm_load_seg_64(ctxt, smstate, i);
if (r != X86EMUL_CONTINUE)
return r;
}
return X86EMUL_CONTINUE;
}
+#endif
static int em_rsm(struct x86_emulate_ctxt *ctxt)
{
unsigned long cr0, cr4, efer;
+ char buf[512];
u64 smbase;
int ret;
if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_MASK) == 0)
return emulate_ud(ctxt);
+ smbase = ctxt->ops->get_smbase(ctxt);
+
+ ret = ctxt->ops->read_phys(ctxt, smbase + 0xfe00, buf, sizeof(buf));
+ if (ret != X86EMUL_CONTINUE)
+ return X86EMUL_UNHANDLEABLE;
+
+ if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
+ ctxt->ops->set_nmi_mask(ctxt, false);
+
+ ctxt->ops->set_hflags(ctxt, ctxt->ops->get_hflags(ctxt) &
+ ~(X86EMUL_SMM_INSIDE_NMI_MASK | X86EMUL_SMM_MASK));
+
/*
* Get back to real mode, to prepare a safe state in which to load
* CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU
* supports long mode.
*/
- cr4 = ctxt->ops->get_cr(ctxt, 4);
if (emulator_has_longmode(ctxt)) {
struct desc_struct cs_desc;
/* Zero CR4.PCIDE before CR0.PG. */
- if (cr4 & X86_CR4_PCIDE) {
+ cr4 = ctxt->ops->get_cr(ctxt, 4);
+ if (cr4 & X86_CR4_PCIDE)
ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
- cr4 &= ~X86_CR4_PCIDE;
- }
/* A 32-bit code segment is required to clear EFER.LMA. */
memset(&cs_desc, 0, sizeof(cs_desc));
@@ -2597,39 +2610,39 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
if (cr0 & X86_CR0_PE)
ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
- /* Now clear CR4.PAE (which must be done before clearing EFER.LME). */
- if (cr4 & X86_CR4_PAE)
- ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
-
- /* And finally go back to 32-bit mode. */
- efer = 0;
- ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
+ if (emulator_has_longmode(ctxt)) {
+ /* Clear CR4.PAE before clearing EFER.LME. */
+ cr4 = ctxt->ops->get_cr(ctxt, 4);
+ if (cr4 & X86_CR4_PAE)
+ ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
- smbase = ctxt->ops->get_smbase(ctxt);
+ /* And finally go back to 32-bit mode. */
+ efer = 0;
+ ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
+ }
/*
* Give pre_leave_smm() a chance to make ISA-specific changes to the
* vCPU state (e.g. enter guest mode) before loading state from the SMM
* state-save area.
*/
- if (ctxt->ops->pre_leave_smm(ctxt, smbase))
+ if (ctxt->ops->pre_leave_smm(ctxt, buf))
return X86EMUL_UNHANDLEABLE;
+#ifdef CONFIG_X86_64
if (emulator_has_longmode(ctxt))
- ret = rsm_load_state_64(ctxt, smbase + 0x8000);
+ ret = rsm_load_state_64(ctxt, buf);
else
- ret = rsm_load_state_32(ctxt, smbase + 0x8000);
+#endif
+ ret = rsm_load_state_32(ctxt, buf);
if (ret != X86EMUL_CONTINUE) {
/* FIXME: should triple fault */
return X86EMUL_UNHANDLEABLE;
}
- if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
- ctxt->ops->set_nmi_mask(ctxt, false);
+ ctxt->ops->post_leave_smm(ctxt);
- ctxt->ops->set_hflags(ctxt, ctxt->ops->get_hflags(ctxt) &
- ~(X86EMUL_SMM_INSIDE_NMI_MASK | X86EMUL_SMM_MASK));
return X86EMUL_CONTINUE;
}
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 421899f6ad7b..cc24b3a32c44 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1371,7 +1371,16 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
valid_bank_mask = BIT_ULL(0);
sparse_banks[0] = flush.processor_mask;
- all_cpus = flush.flags & HV_FLUSH_ALL_PROCESSORS;
+
+ /*
+ * Work around possible WS2012 bug: it sends hypercalls
+ * with processor_mask = 0x0 and HV_FLUSH_ALL_PROCESSORS clear,
+ * while also expecting us to flush something and crashing if
+ * we don't. Let's treat processor_mask == 0 same as
+ * HV_FLUSH_ALL_PROCESSORS.
+ */
+ all_cpus = (flush.flags & HV_FLUSH_ALL_PROCESSORS) ||
+ flush.processor_mask == 0;
} else {
if (unlikely(kvm_read_guest(kvm, ingpa, &flush_ex,
sizeof(flush_ex))))
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 991fdf7fc17f..bd13fdddbdc4 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -70,7 +70,6 @@
#define APIC_BROADCAST 0xFF
#define X2APIC_BROADCAST 0xFFFFFFFFul
-static bool lapic_timer_advance_adjust_done = false;
#define LAPIC_TIMER_ADVANCE_ADJUST_DONE 100
/* step-by-step approximation to mitigate fluctuation */
#define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
@@ -138,6 +137,7 @@ static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
if (offset <= max_apic_id) {
u8 cluster_size = min(max_apic_id - offset + 1, 16U);
+ offset = array_index_nospec(offset, map->max_apic_id + 1);
*cluster = &map->phys_map[offset];
*mask = dest_id & (0xffff >> (16 - cluster_size));
} else {
@@ -901,7 +901,8 @@ static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm,
if (irq->dest_id > map->max_apic_id) {
*bitmap = 0;
} else {
- *dst = &map->phys_map[irq->dest_id];
+ u32 dest_id = array_index_nospec(irq->dest_id, map->max_apic_id + 1);
+ *dst = &map->phys_map[dest_id];
*bitmap = 1;
}
return true;
@@ -1480,14 +1481,32 @@ static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu)
return false;
}
+static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles)
+{
+ u64 timer_advance_ns = vcpu->arch.apic->lapic_timer.timer_advance_ns;
+
+ /*
+ * If the guest TSC is running at a different ratio than the host, then
+ * convert the delay to nanoseconds to achieve an accurate delay. Note
+ * that __delay() uses delay_tsc whenever the hardware has TSC, thus
+ * always for VMX enabled hardware.
+ */
+ if (vcpu->arch.tsc_scaling_ratio == kvm_default_tsc_scaling_ratio) {
+ __delay(min(guest_cycles,
+ nsec_to_cycles(vcpu, timer_advance_ns)));
+ } else {
+ u64 delay_ns = guest_cycles * 1000000ULL;
+ do_div(delay_ns, vcpu->arch.virtual_tsc_khz);
+ ndelay(min_t(u32, delay_ns, timer_advance_ns));
+ }
+}
+
void wait_lapic_expire(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
+ u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns;
u64 guest_tsc, tsc_deadline, ns;
- if (!lapic_in_kernel(vcpu))
- return;
-
if (apic->lapic_timer.expired_tscdeadline == 0)
return;
@@ -1499,33 +1518,37 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu)
guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline);
- /* __delay is delay_tsc whenever the hardware has TSC, thus always. */
if (guest_tsc < tsc_deadline)
- __delay(min(tsc_deadline - guest_tsc,
- nsec_to_cycles(vcpu, lapic_timer_advance_ns)));
+ __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
- if (!lapic_timer_advance_adjust_done) {
+ if (!apic->lapic_timer.timer_advance_adjust_done) {
/* too early */
if (guest_tsc < tsc_deadline) {
ns = (tsc_deadline - guest_tsc) * 1000000ULL;
do_div(ns, vcpu->arch.virtual_tsc_khz);
- lapic_timer_advance_ns -= min((unsigned int)ns,
- lapic_timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
+ timer_advance_ns -= min((u32)ns,
+ timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
} else {
/* too late */
ns = (guest_tsc - tsc_deadline) * 1000000ULL;
do_div(ns, vcpu->arch.virtual_tsc_khz);
- lapic_timer_advance_ns += min((unsigned int)ns,
- lapic_timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
+ timer_advance_ns += min((u32)ns,
+ timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
}
if (abs(guest_tsc - tsc_deadline) < LAPIC_TIMER_ADVANCE_ADJUST_DONE)
- lapic_timer_advance_adjust_done = true;
+ apic->lapic_timer.timer_advance_adjust_done = true;
+ if (unlikely(timer_advance_ns > 5000)) {
+ timer_advance_ns = 0;
+ apic->lapic_timer.timer_advance_adjust_done = true;
+ }
+ apic->lapic_timer.timer_advance_ns = timer_advance_ns;
}
}
static void start_sw_tscdeadline(struct kvm_lapic *apic)
{
- u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline;
+ struct kvm_timer *ktimer = &apic->lapic_timer;
+ u64 guest_tsc, tscdeadline = ktimer->tscdeadline;
u64 ns = 0;
ktime_t expire;
struct kvm_vcpu *vcpu = apic->vcpu;
@@ -1540,13 +1563,15 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
now = ktime_get();
guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
- if (likely(tscdeadline > guest_tsc)) {
- ns = (tscdeadline - guest_tsc) * 1000000ULL;
- do_div(ns, this_tsc_khz);
+
+ ns = (tscdeadline - guest_tsc) * 1000000ULL;
+ do_div(ns, this_tsc_khz);
+
+ if (likely(tscdeadline > guest_tsc) &&
+ likely(ns > apic->lapic_timer.timer_advance_ns)) {
expire = ktime_add_ns(now, ns);
- expire = ktime_sub_ns(expire, lapic_timer_advance_ns);
- hrtimer_start(&apic->lapic_timer.timer,
- expire, HRTIMER_MODE_ABS_PINNED);
+ expire = ktime_sub_ns(expire, ktimer->timer_advance_ns);
+ hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_PINNED);
} else
apic_timer_expired(apic);
@@ -2253,7 +2278,7 @@ static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
return HRTIMER_NORESTART;
}
-int kvm_create_lapic(struct kvm_vcpu *vcpu)
+int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
{
struct kvm_lapic *apic;
@@ -2277,6 +2302,14 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
HRTIMER_MODE_ABS_PINNED);
apic->lapic_timer.timer.function = apic_timer_fn;
+ if (timer_advance_ns == -1) {
+ apic->lapic_timer.timer_advance_ns = 1000;
+ apic->lapic_timer.timer_advance_adjust_done = false;
+ } else {
+ apic->lapic_timer.timer_advance_ns = timer_advance_ns;
+ apic->lapic_timer.timer_advance_adjust_done = true;
+ }
+
/*
* APIC is created enabled. This will prevent kvm_lapic_set_base from
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index ff6ef9c3d760..d6d049ba3045 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -31,8 +31,10 @@ struct kvm_timer {
u32 timer_mode_mask;
u64 tscdeadline;
u64 expired_tscdeadline;
+ u32 timer_advance_ns;
atomic_t pending; /* accumulated triggered timers */
bool hv_timer_in_use;
+ bool timer_advance_adjust_done;
};
struct kvm_lapic {
@@ -62,7 +64,7 @@ struct kvm_lapic {
struct dest_map;
-int kvm_create_lapic(struct kvm_vcpu *vcpu);
+int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns);
void kvm_free_lapic(struct kvm_vcpu *vcpu);
int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index eee455a8a612..d9c7b45d231f 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2007,7 +2007,7 @@ static int is_empty_shadow_page(u64 *spt)
* aggregate version in order to make the slab shrinker
* faster
*/
-static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, int nr)
+static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, unsigned long nr)
{
kvm->arch.n_used_mmu_pages += nr;
percpu_counter_add(&kvm_total_used_mmu_pages, nr);
@@ -2238,7 +2238,7 @@ static bool kvm_mmu_remote_flush_or_zap(struct kvm *kvm,
struct list_head *invalid_list,
bool remote_flush)
{
- if (!remote_flush && !list_empty(invalid_list))
+ if (!remote_flush && list_empty(invalid_list))
return false;
if (!list_empty(invalid_list))
@@ -2763,7 +2763,7 @@ static bool prepare_zap_oldest_mmu_page(struct kvm *kvm,
* Changing the number of mmu pages allocated to the vm
* Note: if goal_nr_mmu_pages is too small, you will get dead lock
*/
-void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages)
+void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long goal_nr_mmu_pages)
{
LIST_HEAD(invalid_list);
@@ -4781,6 +4781,7 @@ static union kvm_mmu_extended_role kvm_calc_mmu_role_ext(struct kvm_vcpu *vcpu)
union kvm_mmu_extended_role ext = {0};
ext.cr0_pg = !!is_paging(vcpu);
+ ext.cr4_pae = !!is_pae(vcpu);
ext.cr4_smep = !!kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
ext.cr4_smap = !!kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
ext.cr4_pse = !!is_pse(vcpu);
@@ -6031,10 +6032,10 @@ out:
/*
* Calculate mmu pages needed for kvm.
*/
-unsigned int kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm)
+unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm)
{
- unsigned int nr_mmu_pages;
- unsigned int nr_pages = 0;
+ unsigned long nr_mmu_pages;
+ unsigned long nr_pages = 0;
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
int i;
@@ -6047,8 +6048,7 @@ unsigned int kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm)
}
nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000;
- nr_mmu_pages = max(nr_mmu_pages,
- (unsigned int) KVM_MIN_ALLOC_MMU_PAGES);
+ nr_mmu_pages = max(nr_mmu_pages, KVM_MIN_ALLOC_MMU_PAGES);
return nr_mmu_pages;
}
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index bbdc60f2fae8..54c2a377795b 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -64,7 +64,7 @@ bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu);
int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
u64 fault_address, char *insn, int insn_len);
-static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
+static inline unsigned long kvm_mmu_available_pages(struct kvm *kvm)
{
if (kvm->arch.n_max_mmu_pages > kvm->arch.n_used_mmu_pages)
return kvm->arch.n_max_mmu_pages -
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 58ead7db71a3..e39741997893 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -281,9 +281,13 @@ static int kvm_pmu_rdpmc_vmware(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
{
bool fast_mode = idx & (1u << 31);
+ struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
struct kvm_pmc *pmc;
u64 ctr_val;
+ if (!pmu->version)
+ return 1;
+
if (is_vmware_backdoor_pmc(idx))
return kvm_pmu_rdpmc_vmware(vcpu, idx, data);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index e0a791c3d4fc..406b558abfef 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -262,6 +262,7 @@ struct amd_svm_iommu_ir {
};
#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF)
+#define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31
#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31)
#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK (0xFFULL)
@@ -2692,6 +2693,7 @@ static int npf_interception(struct vcpu_svm *svm)
static int db_interception(struct vcpu_svm *svm)
{
struct kvm_run *kvm_run = svm->vcpu.run;
+ struct kvm_vcpu *vcpu = &svm->vcpu;
if (!(svm->vcpu.guest_debug &
(KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
@@ -2702,6 +2704,8 @@ static int db_interception(struct vcpu_svm *svm)
if (svm->nmi_singlestep) {
disable_nmi_singlestep(svm);
+ /* Make sure we check for pending NMIs upon entry */
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
}
if (svm->vcpu.guest_debug &
@@ -4517,14 +4521,25 @@ static int avic_incomplete_ipi_interception(struct vcpu_svm *svm)
kvm_lapic_reg_write(apic, APIC_ICR, icrl);
break;
case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING: {
+ int i;
+ struct kvm_vcpu *vcpu;
+ struct kvm *kvm = svm->vcpu.kvm;
struct kvm_lapic *apic = svm->vcpu.arch.apic;
/*
- * Update ICR high and low, then emulate sending IPI,
- * which is handled when writing APIC_ICR.
+ * At this point, we expect that the AVIC HW has already
+ * set the appropriate IRR bits on the valid target
+ * vcpus. So, we just need to kick the appropriate vcpu.
*/
- kvm_lapic_reg_write(apic, APIC_ICR2, icrh);
- kvm_lapic_reg_write(apic, APIC_ICR, icrl);
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ bool m = kvm_apic_match_dest(vcpu, apic,
+ icrl & KVM_APIC_SHORT_MASK,
+ GET_APIC_DEST_FIELD(icrh),
+ icrl & KVM_APIC_DEST_MASK);
+
+ if (m && !avic_vcpu_is_running(vcpu))
+ kvm_vcpu_wake_up(vcpu);
+ }
break;
}
case AVIC_IPI_FAILURE_INVALID_TARGET:
@@ -4596,7 +4611,7 @@ static void avic_invalidate_logical_id_entry(struct kvm_vcpu *vcpu)
u32 *entry = avic_get_logical_id_entry(vcpu, svm->ldr_reg, flat);
if (entry)
- WRITE_ONCE(*entry, (u32) ~AVIC_LOGICAL_ID_ENTRY_VALID_MASK);
+ clear_bit(AVIC_LOGICAL_ID_ENTRY_VALID_BIT, (unsigned long *)entry);
}
static int avic_handle_ldr_update(struct kvm_vcpu *vcpu)
@@ -5621,6 +5636,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
svm->vmcb->save.cr2 = vcpu->arch.cr2;
clgi();
+ kvm_load_guest_xcr0(vcpu);
/*
* If this vCPU has touched SPEC_CTRL, restore the guest's value if
@@ -5766,6 +5782,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
kvm_before_interrupt(&svm->vcpu);
+ kvm_put_guest_xcr0(vcpu);
stgi();
/* Any pending NMI will happen here */
@@ -6215,32 +6232,24 @@ static int svm_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
return 0;
}
-static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase)
+static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb *nested_vmcb;
struct page *page;
- struct {
- u64 guest;
- u64 vmcb;
- } svm_state_save;
- int ret;
+ u64 guest;
+ u64 vmcb;
- ret = kvm_vcpu_read_guest(vcpu, smbase + 0xfed8, &svm_state_save,
- sizeof(svm_state_save));
- if (ret)
- return ret;
+ guest = GET_SMSTATE(u64, smstate, 0x7ed8);
+ vmcb = GET_SMSTATE(u64, smstate, 0x7ee0);
- if (svm_state_save.guest) {
- vcpu->arch.hflags &= ~HF_SMM_MASK;
- nested_vmcb = nested_svm_map(svm, svm_state_save.vmcb, &page);
- if (nested_vmcb)
- enter_svm_guest_mode(svm, svm_state_save.vmcb, nested_vmcb, page);
- else
- ret = 1;
- vcpu->arch.hflags |= HF_SMM_MASK;
+ if (guest) {
+ nested_vmcb = nested_svm_map(svm, vmcb, &page);
+ if (!nested_vmcb)
+ return 1;
+ enter_svm_guest_mode(svm, vmcb, nested_vmcb, page);
}
- return ret;
+ return 0;
}
static int enable_smi_window(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 6432d08c7de7..4d47a2631d1f 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -438,13 +438,13 @@ TRACE_EVENT(kvm_apic_ipi,
);
TRACE_EVENT(kvm_apic_accept_irq,
- TP_PROTO(__u32 apicid, __u16 dm, __u8 tm, __u8 vec),
+ TP_PROTO(__u32 apicid, __u16 dm, __u16 tm, __u8 vec),
TP_ARGS(apicid, dm, tm, vec),
TP_STRUCT__entry(
__field( __u32, apicid )
__field( __u16, dm )
- __field( __u8, tm )
+ __field( __u16, tm )
__field( __u8, vec )
),
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 7ec9bb1dd723..0c601d079cd2 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2873,20 +2873,27 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
/*
* If translation failed, VM entry will fail because
* prepare_vmcs02 set VIRTUAL_APIC_PAGE_ADDR to -1ull.
- * Failing the vm entry is _not_ what the processor
- * does but it's basically the only possibility we
- * have. We could still enter the guest if CR8 load
- * exits are enabled, CR8 store exits are enabled, and
- * virtualize APIC access is disabled; in this case
- * the processor would never use the TPR shadow and we
- * could simply clear the bit from the execution
- * control. But such a configuration is useless, so
- * let's keep the code simple.
*/
if (!is_error_page(page)) {
vmx->nested.virtual_apic_page = page;
hpa = page_to_phys(vmx->nested.virtual_apic_page);
vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, hpa);
+ } else if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING) &&
+ nested_cpu_has(vmcs12, CPU_BASED_CR8_STORE_EXITING) &&
+ !nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
+ /*
+ * The processor will never use the TPR shadow, simply
+ * clear the bit from the execution control. Such a
+ * configuration is useless, but it happens in tests.
+ * For any other configuration, failing the vm entry is
+ * _not_ what the processor does but it's basically the
+ * only possibility we have.
+ */
+ vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
+ CPU_BASED_TPR_SHADOW);
+ } else {
+ printk("bad virtual-APIC page address\n");
+ dump_vmcs();
}
}
@@ -3789,8 +3796,18 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
vmx_set_cr4(vcpu, vmcs_readl(CR4_READ_SHADOW));
nested_ept_uninit_mmu_context(vcpu);
- vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
- __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
+
+ /*
+ * This is only valid if EPT is in use, otherwise the vmcs01 GUEST_CR3
+ * points to shadow pages! Fortunately we only get here after a WARN_ON
+ * if EPT is disabled, so a VMabort is perfectly fine.
+ */
+ if (enable_ept) {
+ vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
+ __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
+ } else {
+ nested_vmx_abort(vcpu, VMX_ABORT_VMCS_CORRUPTED);
+ }
/*
* Use ept_save_pdptrs(vcpu) to load the MMU's cached PDPTRs
@@ -5406,7 +5423,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
return ret;
/* Empty 'VMXON' state is permitted */
- if (kvm_state->size < sizeof(kvm_state) + sizeof(*vmcs12))
+ if (kvm_state->size < sizeof(*kvm_state) + sizeof(*vmcs12))
return 0;
if (kvm_state->vmx.vmcs_pa != -1ull) {
@@ -5450,7 +5467,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
vmcs12->vmcs_link_pointer != -1ull) {
struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu);
- if (kvm_state->size < sizeof(kvm_state) + 2 * sizeof(*vmcs12))
+ if (kvm_state->size < sizeof(*kvm_state) + 2 * sizeof(*vmcs12))
return -EINVAL;
if (copy_from_user(shadow_vmcs12,
@@ -5738,6 +5755,14 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *))
{
int i;
+ /*
+ * Without EPT it is not possible to restore L1's CR3 and PDPTR on
+ * VMfail, because they are not available in vmcs01. Just always
+ * use hardware checks.
+ */
+ if (!enable_ept)
+ nested_early_check = 1;
+
if (!cpu_has_vmx_shadow_vmcs())
enable_shadow_vmcs = 0;
if (enable_shadow_vmcs) {
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 7b272738c576..d4cb1945b2e3 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -3,6 +3,7 @@
#include <asm/asm.h>
#include <asm/bitsperlong.h>
#include <asm/kvm_vcpu_regs.h>
+#include <asm/nospec-branch.h>
#define WORD_SIZE (BITS_PER_LONG / 8)
@@ -77,6 +78,17 @@ ENDPROC(vmx_vmenter)
* referred to by VMCS.HOST_RIP.
*/
ENTRY(vmx_vmexit)
+#ifdef CONFIG_RETPOLINE
+ ALTERNATIVE "jmp .Lvmexit_skip_rsb", "", X86_FEATURE_RETPOLINE
+ /* Preserve guest's RAX, it's used to stuff the RSB. */
+ push %_ASM_AX
+
+ /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
+ FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
+
+ pop %_ASM_AX
+.Lvmexit_skip_rsb:
+#endif
ret
ENDPROC(vmx_vmexit)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index ab432a930ae8..0c955bb286ff 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5603,7 +5603,7 @@ static void vmx_dump_dtsel(char *name, uint32_t limit)
vmcs_readl(limit + GUEST_GDTR_BASE - GUEST_GDTR_LIMIT));
}
-static void dump_vmcs(void)
+void dump_vmcs(void)
{
u32 vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS);
u32 vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS);
@@ -6410,6 +6410,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
vmx_set_interrupt_shadow(vcpu, 0);
+ kvm_load_guest_xcr0(vcpu);
+
if (static_cpu_has(X86_FEATURE_PKU) &&
kvm_read_cr4_bits(vcpu, X86_CR4_PKE) &&
vcpu->arch.pkru != vmx->host_pkru)
@@ -6460,9 +6462,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0);
- /* Eliminate branch target predictions from guest mode */
- vmexit_fill_RSB();
-
/* All fields are clean at this point */
if (static_branch_unlikely(&enable_evmcs))
current_evmcs->hv_clean_fields |=
@@ -6506,6 +6505,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
__write_pkru(vmx->host_pkru);
}
+ kvm_put_guest_xcr0(vcpu);
+
vmx->nested.nested_run_pending = 0;
vmx->idt_vectoring_info = 0;
@@ -6852,6 +6853,30 @@ static void nested_vmx_entry_exit_ctls_update(struct kvm_vcpu *vcpu)
}
}
+static bool guest_cpuid_has_pmu(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *entry;
+ union cpuid10_eax eax;
+
+ entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
+ if (!entry)
+ return false;
+
+ eax.full = entry->eax;
+ return (eax.split.version_id > 0);
+}
+
+static void nested_vmx_procbased_ctls_update(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ bool pmu_enabled = guest_cpuid_has_pmu(vcpu);
+
+ if (pmu_enabled)
+ vmx->nested.msrs.procbased_ctls_high |= CPU_BASED_RDPMC_EXITING;
+ else
+ vmx->nested.msrs.procbased_ctls_high &= ~CPU_BASED_RDPMC_EXITING;
+}
+
static void update_intel_pt_cfg(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -6940,6 +6965,7 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
if (nested_vmx_allowed(vcpu)) {
nested_vmx_cr_fixed1_bits_update(vcpu);
nested_vmx_entry_exit_ctls_update(vcpu);
+ nested_vmx_procbased_ctls_update(vcpu);
}
if (boot_cpu_has(X86_FEATURE_INTEL_PT) &&
@@ -7003,6 +7029,7 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc)
{
struct vcpu_vmx *vmx;
u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles;
+ struct kvm_timer *ktimer = &vcpu->arch.apic->lapic_timer;
if (kvm_mwait_in_guest(vcpu->kvm))
return -EOPNOTSUPP;
@@ -7011,7 +7038,8 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc)
tscl = rdtsc();
guest_tscl = kvm_read_l1_tsc(vcpu, tscl);
delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl;
- lapic_timer_advance_cycles = nsec_to_cycles(vcpu, lapic_timer_advance_ns);
+ lapic_timer_advance_cycles = nsec_to_cycles(vcpu,
+ ktimer->timer_advance_ns);
if (delta_tsc > lapic_timer_advance_cycles)
delta_tsc -= lapic_timer_advance_cycles;
@@ -7369,7 +7397,7 @@ static int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
return 0;
}
-static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase)
+static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
int ret;
@@ -7380,9 +7408,7 @@ static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase)
}
if (vmx->nested.smm.guest_mode) {
- vcpu->arch.hflags &= ~HF_SMM_MASK;
ret = nested_vmx_enter_non_root_mode(vcpu, false);
- vcpu->arch.hflags |= HF_SMM_MASK;
if (ret)
return ret;
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index a1e00d0a2482..f879529906b4 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -517,4 +517,6 @@ static inline void decache_tsc_multiplier(struct vcpu_vmx *vmx)
vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
}
+void dump_vmcs(void);
+
#endif /* __KVM_X86_VMX_H */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 099b851dabaf..b5edc8e3ce1d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -136,10 +136,14 @@ EXPORT_SYMBOL_GPL(kvm_default_tsc_scaling_ratio);
static u32 __read_mostly tsc_tolerance_ppm = 250;
module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
-/* lapic timer advance (tscdeadline mode only) in nanoseconds */
-unsigned int __read_mostly lapic_timer_advance_ns = 1000;
+/*
+ * lapic timer advance (tscdeadline mode only) in nanoseconds. '-1' enables
+ * adaptive tuning starting from default advancment of 1000ns. '0' disables
+ * advancement entirely. Any other value is used as-is and disables adaptive
+ * tuning, i.e. allows priveleged userspace to set an exact advancement time.
+ */
+static int __read_mostly lapic_timer_advance_ns = -1;
module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
-EXPORT_SYMBOL_GPL(lapic_timer_advance_ns);
static bool __read_mostly vector_hashing = true;
module_param(vector_hashing, bool, S_IRUGO);
@@ -800,7 +804,7 @@ void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
}
EXPORT_SYMBOL_GPL(kvm_lmsw);
-static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
+void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
{
if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
!vcpu->guest_xcr0_loaded) {
@@ -810,8 +814,9 @@ static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
vcpu->guest_xcr0_loaded = 1;
}
}
+EXPORT_SYMBOL_GPL(kvm_load_guest_xcr0);
-static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
+void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
{
if (vcpu->guest_xcr0_loaded) {
if (vcpu->arch.xcr0 != host_xcr0)
@@ -819,6 +824,7 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
vcpu->guest_xcr0_loaded = 0;
}
}
+EXPORT_SYMBOL_GPL(kvm_put_guest_xcr0);
static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
{
@@ -3093,7 +3099,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
break;
case KVM_CAP_NESTED_STATE:
r = kvm_x86_ops->get_nested_state ?
- kvm_x86_ops->get_nested_state(NULL, 0, 0) : 0;
+ kvm_x86_ops->get_nested_state(NULL, NULL, 0) : 0;
break;
default:
break;
@@ -3528,7 +3534,7 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
memset(&events->reserved, 0, sizeof(events->reserved));
}
-static void kvm_set_hflags(struct kvm_vcpu *vcpu, unsigned emul_flags);
+static void kvm_smm_changed(struct kvm_vcpu *vcpu);
static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
struct kvm_vcpu_events *events)
@@ -3588,12 +3594,13 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
vcpu->arch.apic->sipi_vector = events->sipi_vector;
if (events->flags & KVM_VCPUEVENT_VALID_SMM) {
- u32 hflags = vcpu->arch.hflags;
- if (events->smi.smm)
- hflags |= HF_SMM_MASK;
- else
- hflags &= ~HF_SMM_MASK;
- kvm_set_hflags(vcpu, hflags);
+ if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) {
+ if (events->smi.smm)
+ vcpu->arch.hflags |= HF_SMM_MASK;
+ else
+ vcpu->arch.hflags &= ~HF_SMM_MASK;
+ kvm_smm_changed(vcpu);
+ }
vcpu->arch.smi_pending = events->smi.pending;
@@ -4270,7 +4277,7 @@ static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
}
static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
- u32 kvm_nr_mmu_pages)
+ unsigned long kvm_nr_mmu_pages)
{
if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
return -EINVAL;
@@ -4284,7 +4291,7 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
return 0;
}
-static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
+static unsigned long kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
{
return kvm->arch.n_max_mmu_pages;
}
@@ -5958,12 +5965,18 @@ static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt)
static void emulator_set_hflags(struct x86_emulate_ctxt *ctxt, unsigned emul_flags)
{
- kvm_set_hflags(emul_to_vcpu(ctxt), emul_flags);
+ emul_to_vcpu(ctxt)->arch.hflags = emul_flags;
}
-static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt, u64 smbase)
+static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt,
+ const char *smstate)
{
- return kvm_x86_ops->pre_leave_smm(emul_to_vcpu(ctxt), smbase);
+ return kvm_x86_ops->pre_leave_smm(emul_to_vcpu(ctxt), smstate);
+}
+
+static void emulator_post_leave_smm(struct x86_emulate_ctxt *ctxt)
+{
+ kvm_smm_changed(emul_to_vcpu(ctxt));
}
static const struct x86_emulate_ops emulate_ops = {
@@ -6006,6 +6019,7 @@ static const struct x86_emulate_ops emulate_ops = {
.get_hflags = emulator_get_hflags,
.set_hflags = emulator_set_hflags,
.pre_leave_smm = emulator_pre_leave_smm,
+ .post_leave_smm = emulator_post_leave_smm,
};
static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
@@ -6247,16 +6261,6 @@ static void kvm_smm_changed(struct kvm_vcpu *vcpu)
kvm_mmu_reset_context(vcpu);
}
-static void kvm_set_hflags(struct kvm_vcpu *vcpu, unsigned emul_flags)
-{
- unsigned changed = vcpu->arch.hflags ^ emul_flags;
-
- vcpu->arch.hflags = emul_flags;
-
- if (changed & HF_SMM_MASK)
- kvm_smm_changed(vcpu);
-}
-
static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
unsigned long *db)
{
@@ -6535,6 +6539,12 @@ int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
}
EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer);
+static int complete_fast_pio_out_port_0x7e(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.pio.count = 0;
+ return 1;
+}
+
static int complete_fast_pio_out(struct kvm_vcpu *vcpu)
{
vcpu->arch.pio.count = 0;
@@ -6551,12 +6561,23 @@ static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size,
unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX);
int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt,
size, port, &val, 1);
+ if (ret)
+ return ret;
- if (!ret) {
+ /*
+ * Workaround userspace that relies on old KVM behavior of %rip being
+ * incremented prior to exiting to userspace to handle "OUT 0x7e".
+ */
+ if (port == 0x7e &&
+ kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_OUT_7E_INC_RIP)) {
+ vcpu->arch.complete_userspace_io =
+ complete_fast_pio_out_port_0x7e;
+ kvm_skip_emulated_instruction(vcpu);
+ } else {
vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu);
vcpu->arch.complete_userspace_io = complete_fast_pio_out;
}
- return ret;
+ return 0;
}
static int complete_fast_pio_in(struct kvm_vcpu *vcpu)
@@ -7441,9 +7462,9 @@ static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf)
put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase);
}
+#ifdef CONFIG_X86_64
static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
{
-#ifdef CONFIG_X86_64
struct desc_ptr dt;
struct kvm_segment seg;
unsigned long val;
@@ -7493,10 +7514,8 @@ static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
for (i = 0; i < 6; i++)
enter_smm_save_seg_64(vcpu, buf, i);
-#else
- WARN_ON_ONCE(1);
-#endif
}
+#endif
static void enter_smm(struct kvm_vcpu *vcpu)
{
@@ -7507,9 +7526,11 @@ static void enter_smm(struct kvm_vcpu *vcpu)
trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, true);
memset(buf, 0, 512);
+#ifdef CONFIG_X86_64
if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
enter_smm_save_state_64(vcpu, buf);
else
+#endif
enter_smm_save_state_32(vcpu, buf);
/*
@@ -7567,8 +7588,10 @@ static void enter_smm(struct kvm_vcpu *vcpu)
kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
+#ifdef CONFIG_X86_64
if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
kvm_x86_ops->set_efer(vcpu, 0);
+#endif
kvm_update_cpuid(vcpu);
kvm_mmu_reset_context(vcpu);
@@ -7865,15 +7888,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
goto cancel_injection;
}
- kvm_load_guest_xcr0(vcpu);
-
if (req_immediate_exit) {
kvm_make_request(KVM_REQ_EVENT, vcpu);
kvm_x86_ops->request_immediate_exit(vcpu);
}
trace_kvm_entry(vcpu->vcpu_id);
- if (lapic_timer_advance_ns)
+ if (lapic_in_kernel(vcpu) &&
+ vcpu->arch.apic->lapic_timer.timer_advance_ns)
wait_lapic_expire(vcpu);
guest_enter_irqoff();
@@ -7919,8 +7941,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
vcpu->mode = OUTSIDE_GUEST_MODE;
smp_wmb();
- kvm_put_guest_xcr0(vcpu);
-
kvm_before_interrupt(vcpu);
kvm_x86_ops->handle_external_intr(vcpu);
kvm_after_interrupt(vcpu);
@@ -9063,7 +9083,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
if (irqchip_in_kernel(vcpu->kvm)) {
vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu);
- r = kvm_create_lapic(vcpu);
+ r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
if (r < 0)
goto fail_mmu_destroy;
} else
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 28406aa1136d..534d3f28bb01 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -294,8 +294,6 @@ extern u64 kvm_supported_xcr0(void);
extern unsigned int min_timer_period_us;
-extern unsigned int lapic_timer_advance_ns;
-
extern bool enable_vmware_backdoor;
extern struct static_key kvm_no_apic_vcpu;
@@ -347,4 +345,6 @@ static inline void kvm_after_interrupt(struct kvm_vcpu *vcpu)
__this_cpu_write(current_vcpu, NULL);
}
+void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu);
+void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu);
#endif
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 140e61843a07..3cb3af51ec89 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -6,6 +6,18 @@
# Produces uninteresting flaky coverage.
KCOV_INSTRUMENT_delay.o := n
+# Early boot use of cmdline; don't instrument it
+ifdef CONFIG_AMD_MEM_ENCRYPT
+KCOV_INSTRUMENT_cmdline.o := n
+KASAN_SANITIZE_cmdline.o := n
+
+ifdef CONFIG_FUNCTION_TRACER
+CFLAGS_REMOVE_cmdline.o = -pg
+endif
+
+CFLAGS_cmdline.o := $(call cc-option, -fno-stack-protector)
+endif
+
inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk
inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt
quiet_cmd_inat_tables = GEN $@
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index db4e5aa0858b..b2f1822084ae 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -16,6 +16,30 @@
#include <asm/smap.h>
#include <asm/export.h>
+.macro ALIGN_DESTINATION
+ /* check for bad alignment of destination */
+ movl %edi,%ecx
+ andl $7,%ecx
+ jz 102f /* already aligned */
+ subl $8,%ecx
+ negl %ecx
+ subl %ecx,%edx
+100: movb (%rsi),%al
+101: movb %al,(%rdi)
+ incq %rsi
+ incq %rdi
+ decl %ecx
+ jnz 100b
+102:
+ .section .fixup,"ax"
+103: addl %ecx,%edx /* ecx is zerorest also */
+ jmp copy_user_handle_tail
+ .previous
+
+ _ASM_EXTABLE_UA(100b, 103b)
+ _ASM_EXTABLE_UA(101b, 103b)
+ .endm
+
/*
* copy_user_generic_unrolled - memory copy with exception handling.
* This version is for CPUs like P4 that don't have efficient micro
@@ -194,6 +218,30 @@ ENDPROC(copy_user_enhanced_fast_string)
EXPORT_SYMBOL(copy_user_enhanced_fast_string)
/*
+ * Try to copy last bytes and clear the rest if needed.
+ * Since protection fault in copy_from/to_user is not a normal situation,
+ * it is not necessary to optimize tail handling.
+ *
+ * Input:
+ * rdi destination
+ * rsi source
+ * rdx count
+ *
+ * Output:
+ * eax uncopied bytes or 0 if successful.
+ */
+ALIGN;
+copy_user_handle_tail:
+ movl %edx,%ecx
+1: rep movsb
+2: mov %ecx,%eax
+ ASM_CLAC
+ ret
+
+ _ASM_EXTABLE_UA(1b, 2b)
+ENDPROC(copy_user_handle_tail)
+
+/*
* copy_user_nocache - Uncached memory copy with exception handling
* This will force destination out of cache for more performance.
*
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 3b24dc05251c..9d05572370ed 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -257,6 +257,7 @@ ENTRY(__memcpy_mcsafe)
/* Copy successful. Return zero */
.L_done_memcpy_trap:
xorl %eax, %eax
+.L_done:
ret
ENDPROC(__memcpy_mcsafe)
EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
@@ -273,7 +274,7 @@ EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
addl %edx, %ecx
.E_trailing_bytes:
mov %ecx, %eax
- ret
+ jmp .L_done
/*
* For write fault handling, given the destination is unaligned,
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index ee42bb0cbeb3..9952a01cad24 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -55,26 +55,6 @@ unsigned long clear_user(void __user *to, unsigned long n)
EXPORT_SYMBOL(clear_user);
/*
- * Try to copy last bytes and clear the rest if needed.
- * Since protection fault in copy_from/to_user is not a normal situation,
- * it is not necessary to optimize tail handling.
- */
-__visible unsigned long
-copy_user_handle_tail(char *to, char *from, unsigned len)
-{
- for (; len; --len, to++) {
- char c;
-
- if (__get_user_nocheck(c, from++, sizeof(char)))
- break;
- if (__put_user_nocheck(c, to, sizeof(char)))
- break;
- }
- clac();
- return len;
-}
-
-/*
* Similar to copy_user_handle_tail, probe for the write fault point,
* but reuse __memcpy_mcsafe in case a new read error is encountered.
* clac() is handled in _copy_to_iter_mcsafe().
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index ee8f8ab46941..c0309ea9abee 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -259,7 +259,8 @@ static void note_wx(struct pg_state *st)
#endif
/* Account the WX pages */
st->wx_pages += npages;
- WARN_ONCE(1, "x86/mm: Found insecure W+X mapping at address %pS\n",
+ WARN_ONCE(__supported_pte_mask & _PAGE_NX,
+ "x86/mm: Found insecure W+X mapping at address %pS\n",
(void *)st->start_address);
}
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index f905a2371080..8dacdb96899e 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -5,6 +5,7 @@
#include <linux/memblock.h>
#include <linux/swapfile.h>
#include <linux/swapops.h>
+#include <linux/kmemleak.h>
#include <asm/set_memory.h>
#include <asm/e820/api.h>
@@ -766,6 +767,11 @@ void free_init_pages(const char *what, unsigned long begin, unsigned long end)
if (debug_pagealloc_enabled()) {
pr_info("debug: unmapping init [mem %#010lx-%#010lx]\n",
begin, end - 1);
+ /*
+ * Inform kmemleak about the hole in the memory since the
+ * corresponding pages will be unmapped.
+ */
+ kmemleak_free_part((void *)begin, end - begin);
set_memory_np(begin, (end - begin) >> PAGE_SHIFT);
} else {
/*
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 0029604af8a4..dd73d5d74393 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -825,7 +825,7 @@ void __init __early_set_fixmap(enum fixed_addresses idx,
pte = early_ioremap_pte(addr);
/* Sanitize 'prot' against any unsupported bits: */
- pgprot_val(flags) &= __default_kernel_pte_mask;
+ pgprot_val(flags) &= __supported_pte_mask;
if (pgprot_val(flags))
set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index 3f452ffed7e9..d669c5e797e0 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -94,7 +94,7 @@ void __init kernel_randomize_memory(void)
if (!kaslr_memory_enabled())
return;
- kaslr_regions[0].size_tb = 1 << (__PHYSICAL_MASK_SHIFT - TB_SHIFT);
+ kaslr_regions[0].size_tb = 1 << (MAX_PHYSMEM_BITS - TB_SHIFT);
kaslr_regions[1].size_tb = VMALLOC_SIZE_TB;
/*
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index 139b28a01ce4..d0255d64edce 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -35,6 +35,7 @@
#include <linux/spinlock.h>
#include <linux/mm.h>
#include <linux/uaccess.h>
+#include <linux/cpu.h>
#include <asm/cpufeature.h>
#include <asm/hypervisor.h>
@@ -115,7 +116,8 @@ void __init pti_check_boottime_disable(void)
}
}
- if (cmdline_find_option_bool(boot_command_line, "nopti")) {
+ if (cmdline_find_option_bool(boot_command_line, "nopti") ||
+ cpu_mitigations_off()) {
pti_mode = PTI_FORCE_OFF;
pti_print_if_insecure("disabled on command line.");
return;
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index bc4bc7b2f075..487b8474c01c 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -728,7 +728,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
{
int cpu;
- struct flush_tlb_info info __aligned(SMP_CACHE_BYTES) = {
+ struct flush_tlb_info info = {
.mm = mm,
.stride_shift = stride_shift,
.freed_tables = freed_tables,
diff --git a/arch/xtensa/include/asm/tlb.h b/arch/xtensa/include/asm/tlb.h
index 0d766f9c1083..50889935138a 100644
--- a/arch/xtensa/include/asm/tlb.h
+++ b/arch/xtensa/include/asm/tlb.h
@@ -14,32 +14,6 @@
#include <asm/cache.h>
#include <asm/page.h>
-#if (DCACHE_WAY_SIZE <= PAGE_SIZE)
-
-/* Note, read http://lkml.org/lkml/2004/1/15/6 */
-
-# define tlb_start_vma(tlb,vma) do { } while (0)
-# define tlb_end_vma(tlb,vma) do { } while (0)
-
-#else
-
-# define tlb_start_vma(tlb, vma) \
- do { \
- if (!tlb->fullmm) \
- flush_cache_range(vma, vma->vm_start, vma->vm_end); \
- } while(0)
-
-# define tlb_end_vma(tlb, vma) \
- do { \
- if (!tlb->fullmm) \
- flush_tlb_range(vma, vma->vm_start, vma->vm_end); \
- } while(0)
-
-#endif
-
-#define __tlb_remove_tlb_entry(tlb,pte,addr) do { } while (0)
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
-
#include <asm-generic/tlb.h>
#define __pte_free_tlb(tlb, pte, address) pte_free((tlb)->mm, pte)
diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl
index 6af49929de85..30084eaf8422 100644
--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@ -394,3 +394,7 @@
421 common rt_sigtimedwait_time64 sys_rt_sigtimedwait
422 common futex_time64 sys_futex
423 common sched_rr_get_interval_time64 sys_sched_rr_get_interval
+424 common pidfd_send_signal sys_pidfd_send_signal
+425 common io_uring_setup sys_io_uring_setup
+426 common io_uring_enter sys_io_uring_enter
+427 common io_uring_register sys_io_uring_register
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index dfb8cb0af13a..5ba1e0d841b4 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -5396,7 +5396,7 @@ static unsigned int bfq_update_depths(struct bfq_data *bfqd,
return min_shallow;
}
-static int bfq_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int index)
+static void bfq_depth_updated(struct blk_mq_hw_ctx *hctx)
{
struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;
struct blk_mq_tags *tags = hctx->sched_tags;
@@ -5404,6 +5404,11 @@ static int bfq_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int index)
min_shallow = bfq_update_depths(bfqd, &tags->bitmap_tags);
sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, min_shallow);
+}
+
+static int bfq_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int index)
+{
+ bfq_depth_updated(hctx);
return 0;
}
@@ -5826,6 +5831,7 @@ static struct elevator_type iosched_bfq_mq = {
.requests_merged = bfq_requests_merged,
.request_merged = bfq_request_merged,
.has_work = bfq_has_work,
+ .depth_updated = bfq_depth_updated,
.init_hctx = bfq_init_hctx,
.init_sched = bfq_init_queue,
.exit_sched = bfq_exit_queue,
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 9516304a38ee..fc60ed7e940e 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3135,6 +3135,8 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
}
if (ret)
break;
+ if (q->elevator && q->elevator->type->ops.depth_updated)
+ q->elevator->type->ops.depth_updated(hctx);
}
if (!ret)
diff --git a/crypto/lrw.c b/crypto/lrw.c
index 0430ccd08728..08a0e458bc3e 100644
--- a/crypto/lrw.c
+++ b/crypto/lrw.c
@@ -212,8 +212,12 @@ static void crypt_done(struct crypto_async_request *areq, int err)
{
struct skcipher_request *req = areq->data;
- if (!err)
+ if (!err) {
+ struct rctx *rctx = skcipher_request_ctx(req);
+
+ rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
err = xor_tweak_post(req);
+ }
skcipher_request_complete(req, err);
}
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index f267633cf13a..d18a37629f05 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -5634,7 +5634,49 @@ static const struct hash_testvec poly1305_tv_template[] = {
.psize = 80,
.digest = "\x13\x00\x00\x00\x00\x00\x00\x00"
"\x00\x00\x00\x00\x00\x00\x00\x00",
- },
+ }, { /* Regression test for overflow in AVX2 implementation */
+ .plaintext = "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff",
+ .psize = 300,
+ .digest = "\xfb\x5e\x96\xd8\x61\xd5\xc7\xc8"
+ "\x78\xe5\x87\xcc\x2d\x5a\x22\xe1",
+ }
};
/* NHPoly1305 test vectors from https://github.com/google/adiantum */
diff --git a/crypto/xts.c b/crypto/xts.c
index 847f54f76789..2f948328cabb 100644
--- a/crypto/xts.c
+++ b/crypto/xts.c
@@ -137,8 +137,12 @@ static void crypt_done(struct crypto_async_request *areq, int err)
{
struct skcipher_request *req = areq->data;
- if (!err)
+ if (!err) {
+ struct rctx *rctx = skcipher_request_ctx(req);
+
+ rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
err = xor_tweak_post(req);
+ }
skcipher_request_complete(req, err);
}
diff --git a/drivers/acpi/acpica/evgpe.c b/drivers/acpi/acpica/evgpe.c
index 5e9d7348c16f..62d3aa74277b 100644
--- a/drivers/acpi/acpica/evgpe.c
+++ b/drivers/acpi/acpica/evgpe.c
@@ -81,12 +81,8 @@ acpi_status acpi_ev_enable_gpe(struct acpi_gpe_event_info *gpe_event_info)
ACPI_FUNCTION_TRACE(ev_enable_gpe);
- /* Clear the GPE status */
- status = acpi_hw_clear_gpe(gpe_event_info);
- if (ACPI_FAILURE(status))
- return_ACPI_STATUS(status);
-
/* Enable the requested GPE */
+
status = acpi_hw_low_set_gpe(gpe_event_info, ACPI_GPE_ENABLE);
return_ACPI_STATUS(status);
}
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 5a389a4f4f65..f1ed0befe303 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -567,6 +567,12 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
goto out;
}
+ dev_dbg(dev, "%s cmd: %s output length: %d\n", dimm_name,
+ cmd_name, out_obj->buffer.length);
+ print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4, 4,
+ out_obj->buffer.pointer,
+ min_t(u32, 128, out_obj->buffer.length), true);
+
if (call_pkg) {
call_pkg->nd_fw_size = out_obj->buffer.length;
memcpy(call_pkg->nd_payload + call_pkg->nd_size_in,
@@ -585,12 +591,6 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
return 0;
}
- dev_dbg(dev, "%s cmd: %s output length: %d\n", dimm_name,
- cmd_name, out_obj->buffer.length);
- print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4, 4,
- out_obj->buffer.pointer,
- min_t(u32, 128, out_obj->buffer.length), true);
-
for (i = 0, offset = 0; i < desc->out_num; i++) {
u32 out_size = nd_cmd_out_size(nvdimm, cmd, desc, i, buf,
(u32 *) out_obj->buffer.pointer,
diff --git a/drivers/acpi/nfit/intel.c b/drivers/acpi/nfit/intel.c
index f70de71f79d6..cddd0fcf622c 100644
--- a/drivers/acpi/nfit/intel.c
+++ b/drivers/acpi/nfit/intel.c
@@ -122,9 +122,8 @@ static int intel_security_change_key(struct nvdimm *nvdimm,
if (!test_bit(cmd, &nfit_mem->dsm_mask))
return -ENOTTY;
- if (old_data)
- memcpy(nd_cmd.cmd.old_pass, old_data->data,
- sizeof(nd_cmd.cmd.old_pass));
+ memcpy(nd_cmd.cmd.old_pass, old_data->data,
+ sizeof(nd_cmd.cmd.old_pass));
memcpy(nd_cmd.cmd.new_pass, new_data->data,
sizeof(nd_cmd.cmd.new_pass));
rc = nvdimm_ctl(nvdimm, ND_CMD_CALL, &nd_cmd, sizeof(nd_cmd), NULL);
@@ -336,9 +335,8 @@ static int __maybe_unused intel_security_overwrite(struct nvdimm *nvdimm,
/* flush all cache before we erase DIMM */
nvdimm_invalidate_cache();
- if (nkey)
- memcpy(nd_cmd.cmd.passphrase, nkey->data,
- sizeof(nd_cmd.cmd.passphrase));
+ memcpy(nd_cmd.cmd.passphrase, nkey->data,
+ sizeof(nd_cmd.cmd.passphrase));
rc = nvdimm_ctl(nvdimm, ND_CMD_CALL, &nd_cmd, sizeof(nd_cmd), NULL);
if (rc < 0)
return rc;
diff --git a/drivers/atm/firestream.c b/drivers/atm/firestream.c
index 11e1663bdc4d..b2c06da4f62e 100644
--- a/drivers/atm/firestream.c
+++ b/drivers/atm/firestream.c
@@ -1646,7 +1646,7 @@ static irqreturn_t fs_irq (int irq, void *dev_id)
}
if (status & ISR_TBRQ_W) {
- fs_dprintk (FS_DEBUG_IRQ, "Data tramsitted!\n");
+ fs_dprintk (FS_DEBUG_IRQ, "Data transmitted!\n");
process_txdone_queue (dev, &dev->tx_relq);
}
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index cb8347500ce2..e49028a60429 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -506,7 +506,7 @@ static ssize_t probe_store(struct device *dev, struct device_attribute *attr,
ret = lock_device_hotplug_sysfs();
if (ret)
- goto out;
+ return ret;
nid = memory_add_physaddr_to_nid(phys_addr);
ret = __add_memory(nid, phys_addr,
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 399cad7daae7..d58a359a6622 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -774,18 +774,18 @@ struct zram_work {
struct zram *zram;
unsigned long entry;
struct bio *bio;
+ struct bio_vec bvec;
};
#if PAGE_SIZE != 4096
static void zram_sync_read(struct work_struct *work)
{
- struct bio_vec bvec;
struct zram_work *zw = container_of(work, struct zram_work, work);
struct zram *zram = zw->zram;
unsigned long entry = zw->entry;
struct bio *bio = zw->bio;
- read_from_bdev_async(zram, &bvec, entry, bio);
+ read_from_bdev_async(zram, &zw->bvec, entry, bio);
}
/*
@@ -798,6 +798,7 @@ static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
{
struct zram_work work;
+ work.bvec = *bvec;
work.zram = zram;
work.entry = entry;
work.bio = bio;
diff --git a/drivers/char/ipmi/ipmi_dmi.c b/drivers/char/ipmi/ipmi_dmi.c
index ff0b199be472..f2411468f33f 100644
--- a/drivers/char/ipmi/ipmi_dmi.c
+++ b/drivers/char/ipmi/ipmi_dmi.c
@@ -66,7 +66,6 @@ static void __init dmi_add_platform_ipmi(unsigned long base_addr,
return;
}
- memset(&p, 0, sizeof(p));
p.addr = base_addr;
p.space = space;
p.regspacing = offset;
diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index e8ba67834746..00bf4b17edbf 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -214,6 +214,9 @@ struct ipmi_user {
/* Does this interface receive IPMI events? */
bool gets_events;
+
+ /* Free must run in process context for RCU cleanup. */
+ struct work_struct remove_work;
};
static struct ipmi_user *acquire_ipmi_user(struct ipmi_user *user, int *index)
@@ -1157,6 +1160,15 @@ static int intf_err_seq(struct ipmi_smi *intf,
return rv;
}
+static void free_user_work(struct work_struct *work)
+{
+ struct ipmi_user *user = container_of(work, struct ipmi_user,
+ remove_work);
+
+ cleanup_srcu_struct(&user->release_barrier);
+ kfree(user);
+}
+
int ipmi_create_user(unsigned int if_num,
const struct ipmi_user_hndl *handler,
void *handler_data,
@@ -1200,6 +1212,8 @@ int ipmi_create_user(unsigned int if_num,
goto out_kfree;
found:
+ INIT_WORK(&new_user->remove_work, free_user_work);
+
rv = init_srcu_struct(&new_user->release_barrier);
if (rv)
goto out_kfree;
@@ -1260,8 +1274,9 @@ EXPORT_SYMBOL(ipmi_get_smi_info);
static void free_user(struct kref *ref)
{
struct ipmi_user *user = container_of(ref, struct ipmi_user, refcount);
- cleanup_srcu_struct(&user->release_barrier);
- kfree(user);
+
+ /* SRCU cleanup must happen in task context. */
+ schedule_work(&user->remove_work);
}
static void _ipmi_destroy_user(struct ipmi_user *user)
diff --git a/drivers/char/ipmi/ipmi_si_hardcode.c b/drivers/char/ipmi/ipmi_si_hardcode.c
index 01946cad3d13..682221eebd66 100644
--- a/drivers/char/ipmi/ipmi_si_hardcode.c
+++ b/drivers/char/ipmi/ipmi_si_hardcode.c
@@ -118,6 +118,8 @@ void __init ipmi_hardcode_init(void)
char *str;
char *si_type[SI_MAX_PARMS];
+ memset(si_type, 0, sizeof(si_type));
+
/* Parse out the si_type string into its components. */
str = si_type_str;
if (*str != '\0') {
diff --git a/drivers/clk/clkdev.c b/drivers/clk/clkdev.c
index 8c4435c53f09..6e787cc9e5b9 100644
--- a/drivers/clk/clkdev.c
+++ b/drivers/clk/clkdev.c
@@ -46,6 +46,8 @@ static struct clk_lookup *clk_find(const char *dev_id, const char *con_id)
if (con_id)
best_possible += 1;
+ lockdep_assert_held(&clocks_mutex);
+
list_for_each_entry(p, &clocks, node) {
match = 0;
if (p->dev_id) {
@@ -402,7 +404,10 @@ void devm_clk_release_clkdev(struct device *dev, const char *con_id,
struct clk_lookup *cl;
int rval;
+ mutex_lock(&clocks_mutex);
cl = clk_find(dev_id, con_id);
+ mutex_unlock(&clocks_mutex);
+
WARN_ON(!cl);
rval = devres_release(dev, devm_clkdev_release,
devm_clk_match_clkdev, cl);
diff --git a/drivers/clk/sunxi-ng/ccu_nkmp.c b/drivers/clk/sunxi-ng/ccu_nkmp.c
index 9b49adb20d07..cbcdf664f336 100644
--- a/drivers/clk/sunxi-ng/ccu_nkmp.c
+++ b/drivers/clk/sunxi-ng/ccu_nkmp.c
@@ -167,7 +167,7 @@ static int ccu_nkmp_set_rate(struct clk_hw *hw, unsigned long rate,
unsigned long parent_rate)
{
struct ccu_nkmp *nkmp = hw_to_ccu_nkmp(hw);
- u32 n_mask, k_mask, m_mask, p_mask;
+ u32 n_mask = 0, k_mask = 0, m_mask = 0, p_mask = 0;
struct _ccu_nkmp _nkmp;
unsigned long flags;
u32 reg;
@@ -186,10 +186,24 @@ static int ccu_nkmp_set_rate(struct clk_hw *hw, unsigned long rate,
ccu_nkmp_find_best(parent_rate, rate, &_nkmp);
- n_mask = GENMASK(nkmp->n.width + nkmp->n.shift - 1, nkmp->n.shift);
- k_mask = GENMASK(nkmp->k.width + nkmp->k.shift - 1, nkmp->k.shift);
- m_mask = GENMASK(nkmp->m.width + nkmp->m.shift - 1, nkmp->m.shift);
- p_mask = GENMASK(nkmp->p.width + nkmp->p.shift - 1, nkmp->p.shift);
+ /*
+ * If width is 0, GENMASK() macro may not generate expected mask (0)
+ * as it falls under undefined behaviour by C standard due to shifts
+ * which are equal or greater than width of left operand. This can
+ * be easily avoided by explicitly checking if width is 0.
+ */
+ if (nkmp->n.width)
+ n_mask = GENMASK(nkmp->n.width + nkmp->n.shift - 1,
+ nkmp->n.shift);
+ if (nkmp->k.width)
+ k_mask = GENMASK(nkmp->k.width + nkmp->k.shift - 1,
+ nkmp->k.shift);
+ if (nkmp->m.width)
+ m_mask = GENMASK(nkmp->m.width + nkmp->m.shift - 1,
+ nkmp->m.shift);
+ if (nkmp->p.width)
+ p_mask = GENMASK(nkmp->p.width + nkmp->p.shift - 1,
+ nkmp->p.shift);
spin_lock_irqsave(nkmp->common.lock, flags);
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index 171502a356aa..4b3d143f0f8a 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -145,6 +145,7 @@ config VT8500_TIMER
config NPCM7XX_TIMER
bool "NPCM7xx timer driver" if COMPILE_TEST
depends on HAS_IOMEM
+ select TIMER_OF
select CLKSRC_MMIO
help
Enable 24-bit TIMER0 and TIMER1 counters in the NPCM7xx architecture,
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index aa4ec53281ce..ea373cfbcecb 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -9,7 +9,7 @@
* published by the Free Software Foundation.
*/
-#define pr_fmt(fmt) "arm_arch_timer: " fmt
+#define pr_fmt(fmt) "arch_timer: " fmt
#include <linux/init.h>
#include <linux/kernel.h>
@@ -33,9 +33,6 @@
#include <clocksource/arm_arch_timer.h>
-#undef pr_fmt
-#define pr_fmt(fmt) "arch_timer: " fmt
-
#define CNTTIDR 0x08
#define CNTTIDR_VIRT(n) (BIT(1) << ((n) * 4))
diff --git a/drivers/clocksource/timer-oxnas-rps.c b/drivers/clocksource/timer-oxnas-rps.c
index eed6feff8b5f..30c6f4ce672b 100644
--- a/drivers/clocksource/timer-oxnas-rps.c
+++ b/drivers/clocksource/timer-oxnas-rps.c
@@ -296,4 +296,4 @@ err_alloc:
TIMER_OF_DECLARE(ox810se_rps,
"oxsemi,ox810se-rps-timer", oxnas_rps_timer_init);
TIMER_OF_DECLARE(ox820_rps,
- "oxsemi,ox820se-rps-timer", oxnas_rps_timer_init);
+ "oxsemi,ox820-rps-timer", oxnas_rps_timer_init);
diff --git a/drivers/clocksource/timer-ti-dm.c b/drivers/clocksource/timer-ti-dm.c
index 3352da6ed61f..ee8ec5a8cb16 100644
--- a/drivers/clocksource/timer-ti-dm.c
+++ b/drivers/clocksource/timer-ti-dm.c
@@ -585,34 +585,6 @@ static int omap_dm_timer_set_load(struct omap_dm_timer *timer, int autoreload,
return 0;
}
-/* Optimized set_load which removes costly spin wait in timer_start */
-static int omap_dm_timer_set_load_start(struct omap_dm_timer *timer,
- int autoreload, unsigned int load)
-{
- u32 l;
-
- if (unlikely(!timer))
- return -EINVAL;
-
- omap_dm_timer_enable(timer);
-
- l = omap_dm_timer_read_reg(timer, OMAP_TIMER_CTRL_REG);
- if (autoreload) {
- l |= OMAP_TIMER_CTRL_AR;
- omap_dm_timer_write_reg(timer, OMAP_TIMER_LOAD_REG, load);
- } else {
- l &= ~OMAP_TIMER_CTRL_AR;
- }
- l |= OMAP_TIMER_CTRL_ST;
-
- __omap_dm_timer_load_start(timer, l, load, timer->posted);
-
- /* Save the context */
- timer->context.tclr = l;
- timer->context.tldr = load;
- timer->context.tcrr = load;
- return 0;
-}
static int omap_dm_timer_set_match(struct omap_dm_timer *timer, int enable,
unsigned int match)
{
diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c
index ec8a291d62ba..54093ffd0aef 100644
--- a/drivers/dma/bcm2835-dma.c
+++ b/drivers/dma/bcm2835-dma.c
@@ -671,7 +671,7 @@ static struct dma_async_tx_descriptor *bcm2835_dma_prep_slave_sg(
d = bcm2835_dma_create_cb_chain(chan, direction, false,
info, extra,
frames, src, dst, 0, 0,
- GFP_KERNEL);
+ GFP_NOWAIT);
if (!d)
return NULL;
diff --git a/drivers/dma/mediatek/mtk-cqdma.c b/drivers/dma/mediatek/mtk-cqdma.c
index 131f3974740d..814853842e29 100644
--- a/drivers/dma/mediatek/mtk-cqdma.c
+++ b/drivers/dma/mediatek/mtk-cqdma.c
@@ -253,7 +253,7 @@ static void mtk_cqdma_start(struct mtk_cqdma_pchan *pc,
#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
mtk_dma_set(pc, MTK_CQDMA_DST2, cvd->dest >> MTK_CQDMA_ADDR2_SHFIT);
#else
- mtk_dma_set(pc, MTK_CQDMA_SRC2, 0);
+ mtk_dma_set(pc, MTK_CQDMA_DST2, 0);
#endif
/* setup the length */
diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
index 2b4f25698169..e2a5398f89b5 100644
--- a/drivers/dma/sh/rcar-dmac.c
+++ b/drivers/dma/sh/rcar-dmac.c
@@ -1282,6 +1282,9 @@ static unsigned int rcar_dmac_chan_get_residue(struct rcar_dmac_chan *chan,
enum dma_status status;
unsigned int residue = 0;
unsigned int dptr = 0;
+ unsigned int chcrb;
+ unsigned int tcrb;
+ unsigned int i;
if (!desc)
return 0;
@@ -1330,14 +1333,31 @@ static unsigned int rcar_dmac_chan_get_residue(struct rcar_dmac_chan *chan,
}
/*
+ * We need to read two registers.
+ * Make sure the control register does not skip to next chunk
+ * while reading the counter.
+ * Trying it 3 times should be enough: Initial read, retry, retry
+ * for the paranoid.
+ */
+ for (i = 0; i < 3; i++) {
+ chcrb = rcar_dmac_chan_read(chan, RCAR_DMACHCRB) &
+ RCAR_DMACHCRB_DPTR_MASK;
+ tcrb = rcar_dmac_chan_read(chan, RCAR_DMATCRB);
+ /* Still the same? */
+ if (chcrb == (rcar_dmac_chan_read(chan, RCAR_DMACHCRB) &
+ RCAR_DMACHCRB_DPTR_MASK))
+ break;
+ }
+ WARN_ONCE(i >= 3, "residue might be not continuous!");
+
+ /*
* In descriptor mode the descriptor running pointer is not maintained
* by the interrupt handler, find the running descriptor from the
* descriptor pointer field in the CHCRB register. In non-descriptor
* mode just use the running descriptor pointer.
*/
if (desc->hwdescs.use) {
- dptr = (rcar_dmac_chan_read(chan, RCAR_DMACHCRB) &
- RCAR_DMACHCRB_DPTR_MASK) >> RCAR_DMACHCRB_DPTR_SHIFT;
+ dptr = chcrb >> RCAR_DMACHCRB_DPTR_SHIFT;
if (dptr == 0)
dptr = desc->nchunks;
dptr--;
@@ -1355,7 +1375,7 @@ static unsigned int rcar_dmac_chan_get_residue(struct rcar_dmac_chan *chan,
}
/* Add the residue for the current chunk. */
- residue += rcar_dmac_chan_read(chan, RCAR_DMATCRB) << desc->xfer_shift;
+ residue += tcrb << desc->xfer_shift;
return residue;
}
@@ -1368,6 +1388,7 @@ static enum dma_status rcar_dmac_tx_status(struct dma_chan *chan,
enum dma_status status;
unsigned long flags;
unsigned int residue;
+ bool cyclic;
status = dma_cookie_status(chan, cookie, txstate);
if (status == DMA_COMPLETE || !txstate)
@@ -1375,10 +1396,11 @@ static enum dma_status rcar_dmac_tx_status(struct dma_chan *chan,
spin_lock_irqsave(&rchan->lock, flags);
residue = rcar_dmac_chan_get_residue(rchan, cookie);
+ cyclic = rchan->desc.running ? rchan->desc.running->cyclic : false;
spin_unlock_irqrestore(&rchan->lock, flags);
/* if there's no residue, the cookie is complete */
- if (!residue)
+ if (!residue && !cyclic)
return DMA_COMPLETE;
dma_set_residue(txstate, residue);
diff --git a/drivers/extcon/Kconfig b/drivers/extcon/Kconfig
index 8e17149655f0..540e8cd16ee6 100644
--- a/drivers/extcon/Kconfig
+++ b/drivers/extcon/Kconfig
@@ -116,7 +116,7 @@ config EXTCON_PALMAS
config EXTCON_PTN5150
tristate "NXP PTN5150 CC LOGIC USB EXTCON support"
- depends on I2C && GPIOLIB || COMPILE_TEST
+ depends on I2C && (GPIOLIB || COMPILE_TEST)
select REGMAP_I2C
help
Say Y here to enable support for USB peripheral and USB host
diff --git a/drivers/gpio/gpio-eic-sprd.c b/drivers/gpio/gpio-eic-sprd.c
index f0223cee9774..77092268ee95 100644
--- a/drivers/gpio/gpio-eic-sprd.c
+++ b/drivers/gpio/gpio-eic-sprd.c
@@ -414,6 +414,7 @@ static int sprd_eic_irq_set_type(struct irq_data *data, unsigned int flow_type)
irq_set_handler_locked(data, handle_edge_irq);
break;
case IRQ_TYPE_EDGE_BOTH:
+ sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTMODE, 0);
sprd_eic_update(chip, offset, SPRD_EIC_SYNC_INTBOTH, 1);
irq_set_handler_locked(data, handle_edge_irq);
break;
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 0495bf1d480a..bca3e7740ef6 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1379,7 +1379,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data,
status = gpiochip_add_irqchip(chip, lock_key, request_key);
if (status)
- goto err_remove_chip;
+ goto err_free_gpiochip_mask;
status = of_gpiochip_add(chip);
if (status)
@@ -1387,7 +1387,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data,
status = gpiochip_init_valid_mask(chip);
if (status)
- goto err_remove_chip;
+ goto err_remove_of_chip;
for (i = 0; i < chip->ngpio; i++) {
struct gpio_desc *desc = &gdev->descs[i];
@@ -1415,14 +1415,18 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data,
if (gpiolib_initialized) {
status = gpiochip_setup_dev(gdev);
if (status)
- goto err_remove_chip;
+ goto err_remove_acpi_chip;
}
return 0;
-err_remove_chip:
+err_remove_acpi_chip:
acpi_gpiochip_remove(chip);
+err_remove_of_chip:
gpiochip_free_hogs(chip);
of_gpiochip_remove(chip);
+err_remove_chip:
+ gpiochip_irqchip_remove(chip);
+err_free_gpiochip_mask:
gpiochip_free_valid_mask(chip);
err_remove_irqchip_mask:
gpiochip_irqchip_free_valid_mask(chip);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 5d8b30fd4534..79fb302fb954 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3165,6 +3165,7 @@ static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
/* No need to recover an evicted BO */
if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
+ shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
continue;
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index d0d966d6080a..1696644ec022 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -182,6 +182,7 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
}
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, tmp);
tmp = mmVM_L2_CNTL4_DEFAULT;
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index c68fbd55db3c..a6cda201c964 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -1377,6 +1377,11 @@ static enum surface_update_type det_surface_update(const struct dc *dc,
return UPDATE_TYPE_FULL;
}
+ if (u->surface->force_full_update) {
+ update_flags->bits.full_update = 1;
+ return UPDATE_TYPE_FULL;
+ }
+
type = get_plane_info_update_type(u);
elevate_update_type(&overall_type, type);
@@ -1802,6 +1807,14 @@ void dc_commit_updates_for_stream(struct dc *dc,
}
dc_resource_state_copy_construct(state, context);
+
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i];
+ struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+
+ if (new_pipe->plane_state && new_pipe->plane_state != old_pipe->plane_state)
+ new_pipe->plane_state->force_full_update = true;
+ }
}
@@ -1838,6 +1851,12 @@ void dc_commit_updates_for_stream(struct dc *dc,
dc->current_state = context;
dc_release_state(old);
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+
+ if (pipe_ctx->plane_state && pipe_ctx->stream == stream)
+ pipe_ctx->plane_state->force_full_update = false;
+ }
}
/*let's use current_state to update watermark etc*/
if (update_type >= UPDATE_TYPE_FULL)
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index 1a7fd6aa77eb..0515095574e7 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -503,6 +503,9 @@ struct dc_plane_state {
struct dc_plane_status status;
struct dc_context *ctx;
+ /* HACK: Workaround for forcing full reprogramming under some conditions */
+ bool force_full_update;
+
/* private to dc_surface.c */
enum dc_irq_source irq_source;
struct kref refcount;
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c
index 4febf4ef7240..4fe3664fb495 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c
@@ -190,6 +190,12 @@ static void submit_channel_request(
1,
0);
}
+
+ REG_UPDATE(AUX_INTERRUPT_CONTROL, AUX_SW_DONE_ACK, 1);
+
+ REG_WAIT(AUX_SW_STATUS, AUX_SW_DONE, 0,
+ 10, aux110->timeout_period/10);
+
/* set the delay and the number of bytes to write */
/* The length include
@@ -242,9 +248,6 @@ static void submit_channel_request(
}
}
- REG_UPDATE(AUX_INTERRUPT_CONTROL, AUX_SW_DONE_ACK, 1);
- REG_WAIT(AUX_SW_STATUS, AUX_SW_DONE, 0,
- 10, aux110->timeout_period/10);
REG_UPDATE(AUX_SW_CONTROL, AUX_SW_GO, 1);
}
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h
index d27f22c05e4b..e28ed6a00ff4 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.h
@@ -71,11 +71,11 @@ enum { /* This is the timeout as defined in DP 1.2a,
* at most within ~240usec. That means,
* increasing this timeout will not affect normal operation,
* and we'll timeout after
- * SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD = 1600usec.
+ * SW_AUX_TIMEOUT_PERIOD_MULTIPLIER * AUX_TIMEOUT_PERIOD = 2400usec.
* This timeout is especially important for
- * resume from S3 and CTS.
+ * converters, resume from S3, and CTS.
*/
- SW_AUX_TIMEOUT_PERIOD_MULTIPLIER = 4
+ SW_AUX_TIMEOUT_PERIOD_MULTIPLIER = 6
};
struct dce_aux {
diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
index db761329a1e3..ab7968c8f6a2 100644
--- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
@@ -1046,6 +1046,10 @@ static bool dw_hdmi_support_scdc(struct dw_hdmi *hdmi)
if (hdmi->version < 0x200a)
return false;
+ /* Disable if no DDC bus */
+ if (!hdmi->ddc)
+ return false;
+
/* Disable if SCDC is not supported, or if an HF-VSDB block is absent */
if (!display->hdmi.scdc.supported ||
!display->hdmi.scdc.scrambling.supported)
@@ -1684,13 +1688,13 @@ static void hdmi_av_composer(struct dw_hdmi *hdmi,
* Source Devices compliant shall set the
* Source Version = 1.
*/
- drm_scdc_readb(&hdmi->i2c->adap, SCDC_SINK_VERSION,
+ drm_scdc_readb(hdmi->ddc, SCDC_SINK_VERSION,
&bytes);
- drm_scdc_writeb(&hdmi->i2c->adap, SCDC_SOURCE_VERSION,
+ drm_scdc_writeb(hdmi->ddc, SCDC_SOURCE_VERSION,
min_t(u8, bytes, SCDC_MIN_SOURCE_VERSION));
/* Enabled Scrambling in the Sink */
- drm_scdc_set_scrambling(&hdmi->i2c->adap, 1);
+ drm_scdc_set_scrambling(hdmi->ddc, 1);
/*
* To activate the scrambler feature, you must ensure
@@ -1706,7 +1710,7 @@ static void hdmi_av_composer(struct dw_hdmi *hdmi,
hdmi_writeb(hdmi, 0, HDMI_FC_SCRAMBLER_CTRL);
hdmi_writeb(hdmi, (u8)~HDMI_MC_SWRSTZ_TMDSSWRST_REQ,
HDMI_MC_SWRSTZ);
- drm_scdc_set_scrambling(&hdmi->i2c->adap, 0);
+ drm_scdc_set_scrambling(hdmi->ddc, 0);
}
}
@@ -1800,6 +1804,8 @@ static void dw_hdmi_clear_overflow(struct dw_hdmi *hdmi)
* iteration for others.
* The Amlogic Meson GX SoCs (v2.01a) have been identified as needing
* the workaround with a single iteration.
+ * The Rockchip RK3288 SoC (v2.00a) and RK3328/RK3399 SoCs (v2.11a) have
+ * been identified as needing the workaround with a single iteration.
*/
switch (hdmi->version) {
@@ -1808,7 +1814,9 @@ static void dw_hdmi_clear_overflow(struct dw_hdmi *hdmi)
break;
case 0x131a:
case 0x132a:
+ case 0x200a:
case 0x201a:
+ case 0x211a:
case 0x212a:
count = 1;
break;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 02adcaf6ebea..16f80a448820 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1667,6 +1667,7 @@ static int eb_copy_relocations(const struct i915_execbuffer *eb)
len)) {
end_user:
user_access_end();
+end:
kvfree(relocs);
err = -EFAULT;
goto err;
@@ -1686,7 +1687,7 @@ end_user:
* relocations were valid.
*/
if (!user_access_begin(urelocs, size))
- goto end_user;
+ goto end;
for (copied = 0; copied < nreloc; copied++)
unsafe_put_user(-1,
@@ -2695,7 +2696,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
* when we did the "copy_from_user()" above.
*/
if (!user_access_begin(user_exec_list, count * sizeof(*user_exec_list)))
- goto end_user;
+ goto end;
for (i = 0; i < args->buffer_count; i++) {
if (!(exec2_list[i].offset & UPDATE))
@@ -2709,6 +2710,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
}
end_user:
user_access_end();
+end:;
}
args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS;
diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index ab4e60dfd6a3..98cea1f4b3bf 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -3862,14 +3862,16 @@ static int intel_ddi_compute_config(struct intel_encoder *encoder,
ret = intel_hdmi_compute_config(encoder, pipe_config, conn_state);
else
ret = intel_dp_compute_config(encoder, pipe_config, conn_state);
+ if (ret)
+ return ret;
- if (IS_GEN9_LP(dev_priv) && ret)
+ if (IS_GEN9_LP(dev_priv))
pipe_config->lane_lat_optim_mask =
bxt_ddi_phy_calc_lane_lat_optim_mask(pipe_config->lane_count);
intel_ddi_compute_min_voltage_level(dev_priv, pipe_config);
- return ret;
+ return 0;
}
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 8891f29a8c7f..48da4a969a0a 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -1886,6 +1886,9 @@ static int intel_dp_dsc_compute_config(struct intel_dp *intel_dp,
int pipe_bpp;
int ret;
+ pipe_config->fec_enable = !intel_dp_is_edp(intel_dp) &&
+ intel_dp_supports_fec(intel_dp, pipe_config);
+
if (!intel_dp_supports_dsc(intel_dp, pipe_config))
return -EINVAL;
@@ -2116,9 +2119,6 @@ intel_dp_compute_config(struct intel_encoder *encoder,
if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK)
return -EINVAL;
- pipe_config->fec_enable = !intel_dp_is_edp(intel_dp) &&
- intel_dp_supports_fec(intel_dp, pipe_config);
-
ret = intel_dp_compute_link_config(encoder, pipe_config, conn_state);
if (ret < 0)
return ret;
diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
index e8f694b57b8a..376ffe842e26 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -338,8 +338,8 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper,
bool *enabled, int width, int height)
{
struct drm_i915_private *dev_priv = to_i915(fb_helper->dev);
+ unsigned long conn_configured, conn_seq, mask;
unsigned int count = min(fb_helper->connector_count, BITS_PER_LONG);
- unsigned long conn_configured, conn_seq;
int i, j;
bool *save_enabled;
bool fallback = true, ret = true;
@@ -357,9 +357,10 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper,
drm_modeset_backoff(&ctx);
memcpy(save_enabled, enabled, count);
- conn_seq = GENMASK(count - 1, 0);
+ mask = GENMASK(count - 1, 0);
conn_configured = 0;
retry:
+ conn_seq = conn_configured;
for (i = 0; i < count; i++) {
struct drm_fb_helper_connector *fb_conn;
struct drm_connector *connector;
@@ -372,8 +373,7 @@ retry:
if (conn_configured & BIT(i))
continue;
- /* First pass, only consider tiled connectors */
- if (conn_seq == GENMASK(count - 1, 0) && !connector->has_tile)
+ if (conn_seq == 0 && !connector->has_tile)
continue;
if (connector->status == connector_status_connected)
@@ -477,10 +477,8 @@ retry:
conn_configured |= BIT(i);
}
- if (conn_configured != conn_seq) { /* repeat until no more are found */
- conn_seq = conn_configured;
+ if ((conn_configured & mask) != mask && conn_configured != conn_seq)
goto retry;
- }
/*
* If the BIOS didn't enable everything it could, fall back to have the
diff --git a/drivers/gpu/drm/imx/ipuv3-crtc.c b/drivers/gpu/drm/imx/ipuv3-crtc.c
index ec3602ebbc1c..54011df8c2e8 100644
--- a/drivers/gpu/drm/imx/ipuv3-crtc.c
+++ b/drivers/gpu/drm/imx/ipuv3-crtc.c
@@ -71,7 +71,7 @@ static void ipu_crtc_disable_planes(struct ipu_crtc *ipu_crtc,
if (disable_partial)
ipu_plane_disable(ipu_crtc->plane[1], true);
if (disable_full)
- ipu_plane_disable(ipu_crtc->plane[0], false);
+ ipu_plane_disable(ipu_crtc->plane[0], true);
}
static void ipu_crtc_atomic_disable(struct drm_crtc *crtc,
diff --git a/drivers/gpu/drm/qxl/qxl_drv.c b/drivers/gpu/drm/qxl/qxl_drv.c
index 578d867a81d5..f33e349c4ec5 100644
--- a/drivers/gpu/drm/qxl/qxl_drv.c
+++ b/drivers/gpu/drm/qxl/qxl_drv.c
@@ -255,10 +255,14 @@ static struct drm_driver qxl_driver = {
#if defined(CONFIG_DEBUG_FS)
.debugfs_init = qxl_debugfs_init,
#endif
+ .prime_handle_to_fd = drm_gem_prime_handle_to_fd,
+ .prime_fd_to_handle = drm_gem_prime_fd_to_handle,
.gem_prime_export = drm_gem_prime_export,
.gem_prime_import = drm_gem_prime_import,
.gem_prime_pin = qxl_gem_prime_pin,
.gem_prime_unpin = qxl_gem_prime_unpin,
+ .gem_prime_get_sg_table = qxl_gem_prime_get_sg_table,
+ .gem_prime_import_sg_table = qxl_gem_prime_import_sg_table,
.gem_prime_vmap = qxl_gem_prime_vmap,
.gem_prime_vunmap = qxl_gem_prime_vunmap,
.gem_prime_mmap = qxl_gem_prime_mmap,
diff --git a/drivers/gpu/drm/qxl/qxl_prime.c b/drivers/gpu/drm/qxl/qxl_prime.c
index 8b448eca1cd9..114653b471c6 100644
--- a/drivers/gpu/drm/qxl/qxl_prime.c
+++ b/drivers/gpu/drm/qxl/qxl_prime.c
@@ -42,6 +42,18 @@ void qxl_gem_prime_unpin(struct drm_gem_object *obj)
qxl_bo_unpin(bo);
}
+struct sg_table *qxl_gem_prime_get_sg_table(struct drm_gem_object *obj)
+{
+ return ERR_PTR(-ENOSYS);
+}
+
+struct drm_gem_object *qxl_gem_prime_import_sg_table(
+ struct drm_device *dev, struct dma_buf_attachment *attach,
+ struct sg_table *table)
+{
+ return ERR_PTR(-ENOSYS);
+}
+
void *qxl_gem_prime_vmap(struct drm_gem_object *obj)
{
struct qxl_bo *bo = gem_to_qxl_bo(obj);
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 19fc601c9eeb..a1bec2779e76 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -366,10 +366,9 @@ void drm_sched_increase_karma(struct drm_sched_job *bad)
EXPORT_SYMBOL(drm_sched_increase_karma);
/**
- * drm_sched_hw_job_reset - stop the scheduler if it contains the bad job
+ * drm_sched_stop - stop the scheduler
*
* @sched: scheduler instance
- * @bad: bad scheduler job
*
*/
void drm_sched_stop(struct drm_gpu_scheduler *sched)
diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c
index 3ebd9f5e2719..29258b404e54 100644
--- a/drivers/gpu/drm/sun4i/sun4i_drv.c
+++ b/drivers/gpu/drm/sun4i/sun4i_drv.c
@@ -16,6 +16,7 @@
#include <linux/of_reserved_mem.h>
#include <drm/drmP.h>
+#include <drm/drm_atomic_helper.h>
#include <drm/drm_fb_cma_helper.h>
#include <drm/drm_fb_helper.h>
#include <drm/drm_gem_cma_helper.h>
@@ -85,6 +86,8 @@ static int sun4i_drv_bind(struct device *dev)
ret = -ENOMEM;
goto free_drm;
}
+
+ dev_set_drvdata(dev, drm);
drm->dev_private = drv;
INIT_LIST_HEAD(&drv->frontend_list);
INIT_LIST_HEAD(&drv->engine_list);
@@ -144,8 +147,12 @@ static void sun4i_drv_unbind(struct device *dev)
drm_dev_unregister(drm);
drm_kms_helper_poll_fini(drm);
+ drm_atomic_helper_shutdown(drm);
drm_mode_config_cleanup(drm);
+
+ component_unbind_all(dev, NULL);
of_reserved_mem_device_release(dev);
+
drm_dev_put(drm);
}
@@ -395,6 +402,8 @@ static int sun4i_drv_probe(struct platform_device *pdev)
static int sun4i_drv_remove(struct platform_device *pdev)
{
+ component_master_del(&pdev->dev, &sun4i_drv_master_ops);
+
return 0;
}
diff --git a/drivers/gpu/drm/tegra/hdmi.c b/drivers/gpu/drm/tegra/hdmi.c
index 47c55974756d..d23c4bfde790 100644
--- a/drivers/gpu/drm/tegra/hdmi.c
+++ b/drivers/gpu/drm/tegra/hdmi.c
@@ -1260,9 +1260,15 @@ static void tegra_hdmi_encoder_enable(struct drm_encoder *encoder)
hdmi->dvi = !tegra_output_is_hdmi(output);
if (!hdmi->dvi) {
- err = tegra_hdmi_setup_audio(hdmi);
- if (err < 0)
- hdmi->dvi = true;
+ /*
+ * Make sure that the audio format has been configured before
+ * enabling audio, otherwise we may try to divide by zero.
+ */
+ if (hdmi->format.sample_rate > 0) {
+ err = tegra_hdmi_setup_audio(hdmi);
+ if (err < 0)
+ hdmi->dvi = true;
+ }
}
if (hdmi->config->has_hda)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 3f56647cdb35..1a01669b159a 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -49,9 +49,8 @@ static void ttm_bo_global_kobj_release(struct kobject *kobj);
* ttm_global_mutex - protecting the global BO state
*/
DEFINE_MUTEX(ttm_global_mutex);
-struct ttm_bo_global ttm_bo_glob = {
- .use_count = 0
-};
+unsigned ttm_bo_glob_use_count;
+struct ttm_bo_global ttm_bo_glob;
static struct attribute ttm_bo_count = {
.name = "bo_count",
@@ -876,8 +875,10 @@ static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo,
reservation_object_add_shared_fence(bo->resv, fence);
ret = reservation_object_reserve_shared(bo->resv, 1);
- if (unlikely(ret))
+ if (unlikely(ret)) {
+ dma_fence_put(fence);
return ret;
+ }
dma_fence_put(bo->moving);
bo->moving = fence;
@@ -1529,12 +1530,13 @@ static void ttm_bo_global_release(void)
struct ttm_bo_global *glob = &ttm_bo_glob;
mutex_lock(&ttm_global_mutex);
- if (--glob->use_count > 0)
+ if (--ttm_bo_glob_use_count > 0)
goto out;
kobject_del(&glob->kobj);
kobject_put(&glob->kobj);
ttm_mem_global_release(&ttm_mem_glob);
+ memset(glob, 0, sizeof(*glob));
out:
mutex_unlock(&ttm_global_mutex);
}
@@ -1546,7 +1548,7 @@ static int ttm_bo_global_init(void)
unsigned i;
mutex_lock(&ttm_global_mutex);
- if (++glob->use_count > 1)
+ if (++ttm_bo_glob_use_count > 1)
goto out;
ret = ttm_mem_global_init(&ttm_mem_glob);
diff --git a/drivers/gpu/drm/ttm/ttm_memory.c b/drivers/gpu/drm/ttm/ttm_memory.c
index f1567c353b54..9a0909decb36 100644
--- a/drivers/gpu/drm/ttm/ttm_memory.c
+++ b/drivers/gpu/drm/ttm/ttm_memory.c
@@ -461,8 +461,8 @@ out_no_zone:
void ttm_mem_global_release(struct ttm_mem_global *glob)
{
- unsigned int i;
struct ttm_mem_zone *zone;
+ unsigned int i;
/* let the page allocator first stop the shrink work. */
ttm_page_alloc_fini();
@@ -475,9 +475,10 @@ void ttm_mem_global_release(struct ttm_mem_global *glob)
zone = glob->zones[i];
kobject_del(&zone->kobj);
kobject_put(&zone->kobj);
- }
+ }
kobject_del(&glob->kobj);
kobject_put(&glob->kobj);
+ memset(glob, 0, sizeof(*glob));
}
static void ttm_check_swapping(struct ttm_mem_global *glob)
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
index f841accc2c00..627f8dc91d0e 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
@@ -730,9 +730,10 @@ static void ttm_put_pages(struct page **pages, unsigned npages, int flags,
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- if (!(flags & TTM_PAGE_FLAG_DMA32)) {
- for (j = 0; j < HPAGE_PMD_NR; ++j)
- if (p++ != pages[i + j])
+ if (!(flags & TTM_PAGE_FLAG_DMA32) &&
+ (npages - i) >= HPAGE_PMD_NR) {
+ for (j = 1; j < HPAGE_PMD_NR; ++j)
+ if (++p != pages[i + j])
break;
if (j == HPAGE_PMD_NR)
@@ -759,15 +760,15 @@ static void ttm_put_pages(struct page **pages, unsigned npages, int flags,
unsigned max_size, n2free;
spin_lock_irqsave(&huge->lock, irq_flags);
- while (i < npages) {
+ while ((npages - i) >= HPAGE_PMD_NR) {
struct page *p = pages[i];
unsigned j;
if (!p)
break;
- for (j = 0; j < HPAGE_PMD_NR; ++j)
- if (p++ != pages[i + j])
+ for (j = 1; j < HPAGE_PMD_NR; ++j)
+ if (++p != pages[i + j])
break;
if (j != HPAGE_PMD_NR)
diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
index 730008d3da76..1baa10e94484 100644
--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -1042,7 +1042,7 @@ static void
vc4_crtc_reset(struct drm_crtc *crtc)
{
if (crtc->state)
- __drm_atomic_helper_crtc_destroy_state(crtc->state);
+ vc4_crtc_destroy_state(crtc, crtc->state);
crtc->state = kzalloc(sizeof(struct vc4_crtc_state), GFP_KERNEL);
if (crtc->state)
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c
index b996ac1d4fcc..af92964b6889 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.c
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.c
@@ -205,10 +205,14 @@ static struct drm_driver driver = {
#if defined(CONFIG_DEBUG_FS)
.debugfs_init = virtio_gpu_debugfs_init,
#endif
+ .prime_handle_to_fd = drm_gem_prime_handle_to_fd,
+ .prime_fd_to_handle = drm_gem_prime_fd_to_handle,
.gem_prime_export = drm_gem_prime_export,
.gem_prime_import = drm_gem_prime_import,
.gem_prime_pin = virtgpu_gem_prime_pin,
.gem_prime_unpin = virtgpu_gem_prime_unpin,
+ .gem_prime_get_sg_table = virtgpu_gem_prime_get_sg_table,
+ .gem_prime_import_sg_table = virtgpu_gem_prime_import_sg_table,
.gem_prime_vmap = virtgpu_gem_prime_vmap,
.gem_prime_vunmap = virtgpu_gem_prime_vunmap,
.gem_prime_mmap = virtgpu_gem_prime_mmap,
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h
index 3238fdf58eb4..d577cb76f5ad 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.h
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.h
@@ -354,6 +354,10 @@ int virtio_gpu_object_wait(struct virtio_gpu_object *bo, bool no_wait);
/* virtgpu_prime.c */
int virtgpu_gem_prime_pin(struct drm_gem_object *obj);
void virtgpu_gem_prime_unpin(struct drm_gem_object *obj);
+struct sg_table *virtgpu_gem_prime_get_sg_table(struct drm_gem_object *obj);
+struct drm_gem_object *virtgpu_gem_prime_import_sg_table(
+ struct drm_device *dev, struct dma_buf_attachment *attach,
+ struct sg_table *sgt);
void *virtgpu_gem_prime_vmap(struct drm_gem_object *obj);
void virtgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
int virtgpu_gem_prime_mmap(struct drm_gem_object *obj,
diff --git a/drivers/gpu/drm/virtio/virtgpu_prime.c b/drivers/gpu/drm/virtio/virtgpu_prime.c
index c59ec34c80a5..eb51a78e1199 100644
--- a/drivers/gpu/drm/virtio/virtgpu_prime.c
+++ b/drivers/gpu/drm/virtio/virtgpu_prime.c
@@ -39,6 +39,18 @@ void virtgpu_gem_prime_unpin(struct drm_gem_object *obj)
WARN_ONCE(1, "not implemented");
}
+struct sg_table *virtgpu_gem_prime_get_sg_table(struct drm_gem_object *obj)
+{
+ return ERR_PTR(-ENODEV);
+}
+
+struct drm_gem_object *virtgpu_gem_prime_import_sg_table(
+ struct drm_device *dev, struct dma_buf_attachment *attach,
+ struct sg_table *table)
+{
+ return ERR_PTR(-ENODEV);
+}
+
void *virtgpu_gem_prime_vmap(struct drm_gem_object *obj)
{
struct virtio_gpu_object *bo = gem_to_virtio_gpu_obj(obj);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 6165fe2c4504..1bfa353d995c 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -546,29 +546,13 @@ static void vmw_get_initial_size(struct vmw_private *dev_priv)
}
/**
- * vmw_assume_iommu - Figure out whether coherent dma-remapping might be
- * taking place.
- * @dev: Pointer to the struct drm_device.
- *
- * Return: true if iommu present, false otherwise.
- */
-static bool vmw_assume_iommu(struct drm_device *dev)
-{
- const struct dma_map_ops *ops = get_dma_ops(dev->dev);
-
- return !dma_is_direct(ops) && ops &&
- ops->map_page != dma_direct_map_page;
-}
-
-/**
* vmw_dma_select_mode - Determine how DMA mappings should be set up for this
* system.
*
* @dev_priv: Pointer to a struct vmw_private
*
- * This functions tries to determine the IOMMU setup and what actions
- * need to be taken by the driver to make system pages visible to the
- * device.
+ * This functions tries to determine what actions need to be taken by the
+ * driver to make system pages visible to the device.
* If this function decides that DMA is not possible, it returns -EINVAL.
* The driver may then try to disable features of the device that require
* DMA.
@@ -578,23 +562,16 @@ static int vmw_dma_select_mode(struct vmw_private *dev_priv)
static const char *names[vmw_dma_map_max] = {
[vmw_dma_phys] = "Using physical TTM page addresses.",
[vmw_dma_alloc_coherent] = "Using coherent TTM pages.",
- [vmw_dma_map_populate] = "Keeping DMA mappings.",
+ [vmw_dma_map_populate] = "Caching DMA mappings.",
[vmw_dma_map_bind] = "Giving up DMA mappings early."};
if (vmw_force_coherent)
dev_priv->map_mode = vmw_dma_alloc_coherent;
- else if (vmw_assume_iommu(dev_priv->dev))
- dev_priv->map_mode = vmw_dma_map_populate;
- else if (!vmw_force_iommu)
- dev_priv->map_mode = vmw_dma_phys;
- else if (IS_ENABLED(CONFIG_SWIOTLB) && swiotlb_nr_tbl())
- dev_priv->map_mode = vmw_dma_alloc_coherent;
+ else if (vmw_restrict_iommu)
+ dev_priv->map_mode = vmw_dma_map_bind;
else
dev_priv->map_mode = vmw_dma_map_populate;
- if (dev_priv->map_mode == vmw_dma_map_populate && vmw_restrict_iommu)
- dev_priv->map_mode = vmw_dma_map_bind;
-
/* No TTM coherent page pool? FIXME: Ask TTM instead! */
if (!(IS_ENABLED(CONFIG_SWIOTLB) || IS_ENABLED(CONFIG_INTEL_IOMMU)) &&
(dev_priv->map_mode == vmw_dma_alloc_coherent))
diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
index 4030d64916f0..0c0eb43abf65 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -114,9 +114,13 @@ static inline void synchronize_syncpt_base(struct host1x_job *job)
static void host1x_channel_set_streamid(struct host1x_channel *channel)
{
-#if IS_ENABLED(CONFIG_IOMMU_API) && HOST1X_HW >= 6
+#if HOST1X_HW >= 6
+ u32 sid = 0x7f;
+#ifdef CONFIG_IOMMU_API
struct iommu_fwspec *spec = dev_iommu_fwspec_get(channel->dev->parent);
- u32 sid = spec ? spec->ids[0] & 0xffff : 0x7f;
+ if (spec)
+ sid = spec->ids[0] & 0xffff;
+#endif
host1x_ch_writel(channel, sid, HOST1X_CHANNEL_SMMU_STREAMID);
#endif
diff --git a/drivers/gpu/ipu-v3/ipu-dp.c b/drivers/gpu/ipu-v3/ipu-dp.c
index 9b2b3fa479c4..5e44ff1f2085 100644
--- a/drivers/gpu/ipu-v3/ipu-dp.c
+++ b/drivers/gpu/ipu-v3/ipu-dp.c
@@ -195,7 +195,8 @@ int ipu_dp_setup_channel(struct ipu_dp *dp,
ipu_dp_csc_init(flow, flow->foreground.in_cs, flow->out_cs,
DP_COM_CONF_CSC_DEF_BOTH);
} else {
- if (flow->foreground.in_cs == flow->out_cs)
+ if (flow->foreground.in_cs == IPUV3_COLORSPACE_UNKNOWN ||
+ flow->foreground.in_cs == flow->out_cs)
/*
* foreground identical to output, apply color
* conversion on background
@@ -261,6 +262,8 @@ void ipu_dp_disable_channel(struct ipu_dp *dp, bool sync)
struct ipu_dp_priv *priv = flow->priv;
u32 reg, csc;
+ dp->in_cs = IPUV3_COLORSPACE_UNKNOWN;
+
if (!dp->foreground)
return;
@@ -268,8 +271,9 @@ void ipu_dp_disable_channel(struct ipu_dp *dp, bool sync)
reg = readl(flow->base + DP_COM_CONF);
csc = reg & DP_COM_CONF_CSC_DEF_MASK;
- if (csc == DP_COM_CONF_CSC_DEF_FG)
- reg &= ~DP_COM_CONF_CSC_DEF_MASK;
+ reg &= ~DP_COM_CONF_CSC_DEF_MASK;
+ if (csc == DP_COM_CONF_CSC_DEF_BOTH || csc == DP_COM_CONF_CSC_DEF_BG)
+ reg |= DP_COM_CONF_CSC_DEF_BG;
reg &= ~DP_COM_CONF_FG_EN;
writel(reg, flow->base + DP_COM_CONF);
@@ -347,6 +351,8 @@ int ipu_dp_init(struct ipu_soc *ipu, struct device *dev, unsigned long base)
mutex_init(&priv->mutex);
for (i = 0; i < IPUV3_NUM_FLOWS; i++) {
+ priv->flow[i].background.in_cs = IPUV3_COLORSPACE_UNKNOWN;
+ priv->flow[i].foreground.in_cs = IPUV3_COLORSPACE_UNKNOWN;
priv->flow[i].foreground.foreground = true;
priv->flow[i].base = priv->base + ipu_dp_flow_base[i];
priv->flow[i].priv = priv;
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 1fce0076e7dc..b607286a0bc8 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -680,6 +680,14 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
break;
}
+ if ((usage->hid & 0xf0) == 0xb0) { /* SC - Display */
+ switch (usage->hid & 0xf) {
+ case 0x05: map_key_clear(KEY_SWITCHVIDEOMODE); break;
+ default: goto ignore;
+ }
+ break;
+ }
+
/*
* Some lazy vendors declare 255 usages for System Control,
* leading to the creation of ABS_X|Y axis and too many others.
@@ -902,7 +910,7 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
case 0x06a: map_key_clear(KEY_GREEN); break;
case 0x06b: map_key_clear(KEY_BLUE); break;
case 0x06c: map_key_clear(KEY_YELLOW); break;
- case 0x06d: map_key_clear(KEY_ZOOM); break;
+ case 0x06d: map_key_clear(KEY_ASPECT_RATIO); break;
case 0x06f: map_key_clear(KEY_BRIGHTNESSUP); break;
case 0x070: map_key_clear(KEY_BRIGHTNESSDOWN); break;
@@ -911,6 +919,10 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
case 0x074: map_key_clear(KEY_BRIGHTNESS_MAX); break;
case 0x075: map_key_clear(KEY_BRIGHTNESS_AUTO); break;
+ case 0x079: map_key_clear(KEY_KBDILLUMUP); break;
+ case 0x07a: map_key_clear(KEY_KBDILLUMDOWN); break;
+ case 0x07c: map_key_clear(KEY_KBDILLUMTOGGLE); break;
+
case 0x082: map_key_clear(KEY_VIDEO_NEXT); break;
case 0x083: map_key_clear(KEY_LAST); break;
case 0x084: map_key_clear(KEY_ENTER); break;
@@ -1022,6 +1034,7 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
case 0x22d: map_key_clear(KEY_ZOOMIN); break;
case 0x22e: map_key_clear(KEY_ZOOMOUT); break;
case 0x22f: map_key_clear(KEY_ZOOMRESET); break;
+ case 0x232: map_key_clear(KEY_FULL_SCREEN); break;
case 0x233: map_key_clear(KEY_SCROLLUP); break;
case 0x234: map_key_clear(KEY_SCROLLDOWN); break;
case 0x238: /* AC Pan */
@@ -1045,6 +1058,8 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
case 0x2cb: map_key_clear(KEY_KBDINPUTASSIST_ACCEPT); break;
case 0x2cc: map_key_clear(KEY_KBDINPUTASSIST_CANCEL); break;
+ case 0x29f: map_key_clear(KEY_SCALE); break;
+
default: map_key_clear(KEY_UNKNOWN);
}
break;
diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c
index bb8e3f149979..d464799e40a3 100644
--- a/drivers/i2c/busses/i2c-designware-master.c
+++ b/drivers/i2c/busses/i2c-designware-master.c
@@ -426,8 +426,7 @@ i2c_dw_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
pm_runtime_get_sync(dev->dev);
- if (dev->suspended) {
- dev_err(dev->dev, "Error %s call while suspended\n", __func__);
+ if (dev_WARN_ONCE(dev->dev, dev->suspended, "Transfer while suspended\n")) {
ret = -ESHUTDOWN;
goto done_nolock;
}
diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c
index c0c3043b5d61..fd70b110e8f4 100644
--- a/drivers/i2c/busses/i2c-imx.c
+++ b/drivers/i2c/busses/i2c-imx.c
@@ -515,9 +515,9 @@ static int i2c_imx_clk_notifier_call(struct notifier_block *nb,
unsigned long action, void *data)
{
struct clk_notifier_data *ndata = data;
- struct imx_i2c_struct *i2c_imx = container_of(&ndata->clk,
+ struct imx_i2c_struct *i2c_imx = container_of(nb,
struct imx_i2c_struct,
- clk);
+ clk_change_nb);
if (action & POST_RATE_CHANGE)
i2c_imx_set_clk(i2c_imx, ndata->new_rate);
diff --git a/drivers/i2c/busses/i2c-synquacer.c b/drivers/i2c/busses/i2c-synquacer.c
index d18b0941b71a..f14d4b3fab44 100644
--- a/drivers/i2c/busses/i2c-synquacer.c
+++ b/drivers/i2c/busses/i2c-synquacer.c
@@ -597,6 +597,8 @@ static int synquacer_i2c_probe(struct platform_device *pdev)
i2c->adapter = synquacer_i2c_ops;
i2c_set_adapdata(&i2c->adapter, i2c);
i2c->adapter.dev.parent = &pdev->dev;
+ i2c->adapter.dev.of_node = pdev->dev.of_node;
+ ACPI_COMPANION_SET(&i2c->adapter.dev, ACPI_COMPANION(&pdev->dev));
i2c->adapter.nr = pdev->id;
init_completion(&i2c->completion);
diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index 38af18645133..688aa3b5f3ac 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -185,7 +185,7 @@ static int i2c_generic_bus_free(struct i2c_adapter *adap)
int i2c_generic_scl_recovery(struct i2c_adapter *adap)
{
struct i2c_bus_recovery_info *bri = adap->bus_recovery_info;
- int i = 0, scl = 1, ret;
+ int i = 0, scl = 1, ret = 0;
if (bri->prepare_recovery)
bri->prepare_recovery(adap);
@@ -327,6 +327,8 @@ static int i2c_device_probe(struct device *dev)
if (client->flags & I2C_CLIENT_HOST_NOTIFY) {
dev_dbg(dev, "Using Host Notify IRQ\n");
+ /* Keep adapter active when Host Notify is required */
+ pm_runtime_get_sync(&client->adapter->dev);
irq = i2c_smbus_host_notify_to_irq(client);
} else if (dev->of_node) {
irq = of_irq_get_byname(dev->of_node, "irq");
@@ -431,6 +433,8 @@ static int i2c_device_remove(struct device *dev)
device_init_wakeup(&client->dev, false);
client->irq = client->init_irq;
+ if (client->flags & I2C_CLIENT_HOST_NOTIFY)
+ pm_runtime_put(&client->adapter->dev);
return status;
}
diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c
index 2dc628d4f1ae..1412abcff010 100644
--- a/drivers/i3c/master.c
+++ b/drivers/i3c/master.c
@@ -1980,7 +1980,6 @@ of_i3c_master_add_i3c_boardinfo(struct i3c_master_controller *master,
{
struct i3c_dev_boardinfo *boardinfo;
struct device *dev = &master->dev;
- struct i3c_device_info info = { };
enum i3c_addr_slot_status addrstatus;
u32 init_dyn_addr = 0;
@@ -2012,8 +2011,8 @@ of_i3c_master_add_i3c_boardinfo(struct i3c_master_controller *master,
boardinfo->pid = ((u64)reg[1] << 32) | reg[2];
- if ((info.pid & GENMASK_ULL(63, 48)) ||
- I3C_PID_RND_LOWER_32BITS(info.pid))
+ if ((boardinfo->pid & GENMASK_ULL(63, 48)) ||
+ I3C_PID_RND_LOWER_32BITS(boardinfo->pid))
return -EINVAL;
boardinfo->init_dyn_addr = init_dyn_addr;
diff --git a/drivers/i3c/master/dw-i3c-master.c b/drivers/i3c/master/dw-i3c-master.c
index 59279224e07f..10c26ffaa8ef 100644
--- a/drivers/i3c/master/dw-i3c-master.c
+++ b/drivers/i3c/master/dw-i3c-master.c
@@ -300,7 +300,7 @@ to_dw_i3c_master(struct i3c_master_controller *master)
static void dw_i3c_master_disable(struct dw_i3c_master *master)
{
- writel(readl(master->regs + DEVICE_CTRL) & DEV_CTRL_ENABLE,
+ writel(readl(master->regs + DEVICE_CTRL) & ~DEV_CTRL_ENABLE,
master->regs + DEVICE_CTRL);
}
diff --git a/drivers/iio/accel/kxcjk-1013.c b/drivers/iio/accel/kxcjk-1013.c
index 7096e577b23f..50f3ff386bea 100644
--- a/drivers/iio/accel/kxcjk-1013.c
+++ b/drivers/iio/accel/kxcjk-1013.c
@@ -1437,6 +1437,8 @@ static int kxcjk1013_resume(struct device *dev)
mutex_lock(&data->mutex);
ret = kxcjk1013_set_mode(data, OPERATION);
+ if (ret == 0)
+ ret = kxcjk1013_set_range(data, data->range);
mutex_unlock(&data->mutex);
return ret;
diff --git a/drivers/iio/adc/ad_sigma_delta.c b/drivers/iio/adc/ad_sigma_delta.c
index ff5f2da2e1b1..54d9978b2740 100644
--- a/drivers/iio/adc/ad_sigma_delta.c
+++ b/drivers/iio/adc/ad_sigma_delta.c
@@ -121,6 +121,7 @@ static int ad_sd_read_reg_raw(struct ad_sigma_delta *sigma_delta,
if (sigma_delta->info->has_registers) {
data[0] = reg << sigma_delta->info->addr_shift;
data[0] |= sigma_delta->info->read_mask;
+ data[0] |= sigma_delta->comm;
spi_message_add_tail(&t[0], &m);
}
spi_message_add_tail(&t[1], &m);
diff --git a/drivers/iio/adc/at91_adc.c b/drivers/iio/adc/at91_adc.c
index 75d2f73582a3..596841a3c4db 100644
--- a/drivers/iio/adc/at91_adc.c
+++ b/drivers/iio/adc/at91_adc.c
@@ -704,23 +704,29 @@ static int at91_adc_read_raw(struct iio_dev *idev,
ret = wait_event_interruptible_timeout(st->wq_data_avail,
st->done,
msecs_to_jiffies(1000));
- if (ret == 0)
- ret = -ETIMEDOUT;
- if (ret < 0) {
- mutex_unlock(&st->lock);
- return ret;
- }
-
- *val = st->last_value;
+ /* Disable interrupts, regardless if adc conversion was
+ * successful or not
+ */
at91_adc_writel(st, AT91_ADC_CHDR,
AT91_ADC_CH(chan->channel));
at91_adc_writel(st, AT91_ADC_IDR, BIT(chan->channel));
- st->last_value = 0;
- st->done = false;
+ if (ret > 0) {
+ /* a valid conversion took place */
+ *val = st->last_value;
+ st->last_value = 0;
+ st->done = false;
+ ret = IIO_VAL_INT;
+ } else if (ret == 0) {
+ /* conversion timeout */
+ dev_err(&idev->dev, "ADC Channel %d timeout.\n",
+ chan->channel);
+ ret = -ETIMEDOUT;
+ }
+
mutex_unlock(&st->lock);
- return IIO_VAL_INT;
+ return ret;
case IIO_CHAN_INFO_SCALE:
*val = st->vref_mv;
diff --git a/drivers/iio/adc/xilinx-xadc-core.c b/drivers/iio/adc/xilinx-xadc-core.c
index b13c61539d46..6401ca7a9a20 100644
--- a/drivers/iio/adc/xilinx-xadc-core.c
+++ b/drivers/iio/adc/xilinx-xadc-core.c
@@ -1292,6 +1292,7 @@ static int xadc_probe(struct platform_device *pdev)
err_free_irq:
free_irq(xadc->irq, indio_dev);
+ cancel_delayed_work_sync(&xadc->zynq_unmask_work);
err_clk_disable_unprepare:
clk_disable_unprepare(xadc->clk);
err_free_samplerate_trigger:
@@ -1321,8 +1322,8 @@ static int xadc_remove(struct platform_device *pdev)
iio_triggered_buffer_cleanup(indio_dev);
}
free_irq(xadc->irq, indio_dev);
+ cancel_delayed_work_sync(&xadc->zynq_unmask_work);
clk_disable_unprepare(xadc->clk);
- cancel_delayed_work(&xadc->zynq_unmask_work);
kfree(xadc->data);
kfree(indio_dev->channels);
diff --git a/drivers/iio/chemical/Kconfig b/drivers/iio/chemical/Kconfig
index d5d146e9e372..92c684d2b67e 100644
--- a/drivers/iio/chemical/Kconfig
+++ b/drivers/iio/chemical/Kconfig
@@ -64,6 +64,7 @@ config IAQCORE
config PMS7003
tristate "Plantower PMS7003 particulate matter sensor"
depends on SERIAL_DEV_BUS
+ select IIO_TRIGGERED_BUFFER
help
Say Y here to build support for the Plantower PMS7003 particulate
matter sensor.
@@ -71,6 +72,19 @@ config PMS7003
To compile this driver as a module, choose M here: the module will
be called pms7003.
+config SENSIRION_SGP30
+ tristate "Sensirion SGPxx gas sensors"
+ depends on I2C
+ select CRC8
+ help
+ Say Y here to build I2C interface support for the following
+ Sensirion SGP gas sensors:
+ * SGP30 gas sensor
+ * SGPC3 low power gas sensor
+
+ To compile this driver as module, choose M here: the
+ module will be called sgp30.
+
config SPS30
tristate "SPS30 particulate matter sensor"
depends on I2C
diff --git a/drivers/iio/chemical/bme680.h b/drivers/iio/chemical/bme680.h
index 0ae89b87e2d6..4edc5d21cb9f 100644
--- a/drivers/iio/chemical/bme680.h
+++ b/drivers/iio/chemical/bme680.h
@@ -2,11 +2,9 @@
#ifndef BME680_H_
#define BME680_H_
-#define BME680_REG_CHIP_I2C_ID 0xD0
-#define BME680_REG_CHIP_SPI_ID 0x50
+#define BME680_REG_CHIP_ID 0xD0
#define BME680_CHIP_ID_VAL 0x61
-#define BME680_REG_SOFT_RESET_I2C 0xE0
-#define BME680_REG_SOFT_RESET_SPI 0x60
+#define BME680_REG_SOFT_RESET 0xE0
#define BME680_CMD_SOFTRESET 0xB6
#define BME680_REG_STATUS 0x73
#define BME680_SPI_MEM_PAGE_BIT BIT(4)
diff --git a/drivers/iio/chemical/bme680_core.c b/drivers/iio/chemical/bme680_core.c
index 70c1fe4366f4..ccde4c65ff93 100644
--- a/drivers/iio/chemical/bme680_core.c
+++ b/drivers/iio/chemical/bme680_core.c
@@ -63,9 +63,23 @@ struct bme680_data {
s32 t_fine;
};
+static const struct regmap_range bme680_volatile_ranges[] = {
+ regmap_reg_range(BME680_REG_MEAS_STAT_0, BME680_REG_GAS_R_LSB),
+ regmap_reg_range(BME680_REG_STATUS, BME680_REG_STATUS),
+ regmap_reg_range(BME680_T2_LSB_REG, BME680_GH3_REG),
+};
+
+static const struct regmap_access_table bme680_volatile_table = {
+ .yes_ranges = bme680_volatile_ranges,
+ .n_yes_ranges = ARRAY_SIZE(bme680_volatile_ranges),
+};
+
const struct regmap_config bme680_regmap_config = {
.reg_bits = 8,
.val_bits = 8,
+ .max_register = 0xef,
+ .volatile_table = &bme680_volatile_table,
+ .cache_type = REGCACHE_RBTREE,
};
EXPORT_SYMBOL(bme680_regmap_config);
@@ -316,6 +330,10 @@ static s16 bme680_compensate_temp(struct bme680_data *data,
s64 var1, var2, var3;
s16 calc_temp;
+ /* If the calibration is invalid, attempt to reload it */
+ if (!calib->par_t2)
+ bme680_read_calib(data, calib);
+
var1 = (adc_temp >> 3) - (calib->par_t1 << 1);
var2 = (var1 * calib->par_t2) >> 11;
var3 = ((var1 >> 1) * (var1 >> 1)) >> 12;
@@ -583,8 +601,7 @@ static int bme680_gas_config(struct bme680_data *data)
return ret;
}
-static int bme680_read_temp(struct bme680_data *data,
- int *val, int *val2)
+static int bme680_read_temp(struct bme680_data *data, int *val)
{
struct device *dev = regmap_get_device(data->regmap);
int ret;
@@ -617,10 +634,9 @@ static int bme680_read_temp(struct bme680_data *data,
* compensate_press/compensate_humid to get compensated
* pressure/humidity readings.
*/
- if (val && val2) {
- *val = comp_temp;
- *val2 = 100;
- return IIO_VAL_FRACTIONAL;
+ if (val) {
+ *val = comp_temp * 10; /* Centidegrees to millidegrees */
+ return IIO_VAL_INT;
}
return ret;
@@ -635,7 +651,7 @@ static int bme680_read_press(struct bme680_data *data,
s32 adc_press;
/* Read and compensate temperature to get a reading of t_fine */
- ret = bme680_read_temp(data, NULL, NULL);
+ ret = bme680_read_temp(data, NULL);
if (ret < 0)
return ret;
@@ -668,7 +684,7 @@ static int bme680_read_humid(struct bme680_data *data,
u32 comp_humidity;
/* Read and compensate temperature to get a reading of t_fine */
- ret = bme680_read_temp(data, NULL, NULL);
+ ret = bme680_read_temp(data, NULL);
if (ret < 0)
return ret;
@@ -761,7 +777,7 @@ static int bme680_read_raw(struct iio_dev *indio_dev,
case IIO_CHAN_INFO_PROCESSED:
switch (chan->type) {
case IIO_TEMP:
- return bme680_read_temp(data, val, val2);
+ return bme680_read_temp(data, val);
case IIO_PRESSURE:
return bme680_read_press(data, val, val2);
case IIO_HUMIDITYRELATIVE:
@@ -867,8 +883,28 @@ int bme680_core_probe(struct device *dev, struct regmap *regmap,
{
struct iio_dev *indio_dev;
struct bme680_data *data;
+ unsigned int val;
int ret;
+ ret = regmap_write(regmap, BME680_REG_SOFT_RESET,
+ BME680_CMD_SOFTRESET);
+ if (ret < 0) {
+ dev_err(dev, "Failed to reset chip\n");
+ return ret;
+ }
+
+ ret = regmap_read(regmap, BME680_REG_CHIP_ID, &val);
+ if (ret < 0) {
+ dev_err(dev, "Error reading chip ID\n");
+ return ret;
+ }
+
+ if (val != BME680_CHIP_ID_VAL) {
+ dev_err(dev, "Wrong chip ID, got %x expected %x\n",
+ val, BME680_CHIP_ID_VAL);
+ return -ENODEV;
+ }
+
indio_dev = devm_iio_device_alloc(dev, sizeof(*data));
if (!indio_dev)
return -ENOMEM;
diff --git a/drivers/iio/chemical/bme680_i2c.c b/drivers/iio/chemical/bme680_i2c.c
index b2f805b6b36a..de9c9e3d23ea 100644
--- a/drivers/iio/chemical/bme680_i2c.c
+++ b/drivers/iio/chemical/bme680_i2c.c
@@ -23,8 +23,6 @@ static int bme680_i2c_probe(struct i2c_client *client,
{
struct regmap *regmap;
const char *name = NULL;
- unsigned int val;
- int ret;
regmap = devm_regmap_init_i2c(client, &bme680_regmap_config);
if (IS_ERR(regmap)) {
@@ -33,25 +31,6 @@ static int bme680_i2c_probe(struct i2c_client *client,
return PTR_ERR(regmap);
}
- ret = regmap_write(regmap, BME680_REG_SOFT_RESET_I2C,
- BME680_CMD_SOFTRESET);
- if (ret < 0) {
- dev_err(&client->dev, "Failed to reset chip\n");
- return ret;
- }
-
- ret = regmap_read(regmap, BME680_REG_CHIP_I2C_ID, &val);
- if (ret < 0) {
- dev_err(&client->dev, "Error reading I2C chip ID\n");
- return ret;
- }
-
- if (val != BME680_CHIP_ID_VAL) {
- dev_err(&client->dev, "Wrong chip ID, got %x expected %x\n",
- val, BME680_CHIP_ID_VAL);
- return -ENODEV;
- }
-
if (id)
name = id->name;
diff --git a/drivers/iio/chemical/bme680_spi.c b/drivers/iio/chemical/bme680_spi.c
index d0b7bdd3f066..3b838068a7e4 100644
--- a/drivers/iio/chemical/bme680_spi.c
+++ b/drivers/iio/chemical/bme680_spi.c
@@ -12,28 +12,93 @@
#include "bme680.h"
+struct bme680_spi_bus_context {
+ struct spi_device *spi;
+ u8 current_page;
+};
+
+/*
+ * In SPI mode there are only 7 address bits, a "page" register determines
+ * which part of the 8-bit range is active. This function looks at the address
+ * and writes the page selection bit if needed
+ */
+static int bme680_regmap_spi_select_page(
+ struct bme680_spi_bus_context *ctx, u8 reg)
+{
+ struct spi_device *spi = ctx->spi;
+ int ret;
+ u8 buf[2];
+ u8 page = (reg & 0x80) ? 0 : 1; /* Page "1" is low range */
+
+ if (page == ctx->current_page)
+ return 0;
+
+ /*
+ * Data sheet claims we're only allowed to change bit 4, so we must do
+ * a read-modify-write on each and every page select
+ */
+ buf[0] = BME680_REG_STATUS;
+ ret = spi_write_then_read(spi, buf, 1, buf + 1, 1);
+ if (ret < 0) {
+ dev_err(&spi->dev, "failed to set page %u\n", page);
+ return ret;
+ }
+
+ buf[0] = BME680_REG_STATUS;
+ if (page)
+ buf[1] |= BME680_SPI_MEM_PAGE_BIT;
+ else
+ buf[1] &= ~BME680_SPI_MEM_PAGE_BIT;
+
+ ret = spi_write(spi, buf, 2);
+ if (ret < 0) {
+ dev_err(&spi->dev, "failed to set page %u\n", page);
+ return ret;
+ }
+
+ ctx->current_page = page;
+
+ return 0;
+}
+
static int bme680_regmap_spi_write(void *context, const void *data,
size_t count)
{
- struct spi_device *spi = context;
+ struct bme680_spi_bus_context *ctx = context;
+ struct spi_device *spi = ctx->spi;
+ int ret;
u8 buf[2];
memcpy(buf, data, 2);
+
+ ret = bme680_regmap_spi_select_page(ctx, buf[0]);
+ if (ret)
+ return ret;
+
/*
* The SPI register address (= full register address without bit 7)
* and the write command (bit7 = RW = '0')
*/
buf[0] &= ~0x80;
- return spi_write_then_read(spi, buf, 2, NULL, 0);
+ return spi_write(spi, buf, 2);
}
static int bme680_regmap_spi_read(void *context, const void *reg,
size_t reg_size, void *val, size_t val_size)
{
- struct spi_device *spi = context;
+ struct bme680_spi_bus_context *ctx = context;
+ struct spi_device *spi = ctx->spi;
+ int ret;
+ u8 addr = *(const u8 *)reg;
+
+ ret = bme680_regmap_spi_select_page(ctx, addr);
+ if (ret)
+ return ret;
- return spi_write_then_read(spi, reg, reg_size, val, val_size);
+ addr |= 0x80; /* bit7 = RW = '1' */
+
+ return spi_write_then_read(spi, &addr, 1, val, val_size);
}
static struct regmap_bus bme680_regmap_bus = {
@@ -46,8 +111,8 @@ static struct regmap_bus bme680_regmap_bus = {
static int bme680_spi_probe(struct spi_device *spi)
{
const struct spi_device_id *id = spi_get_device_id(spi);
+ struct bme680_spi_bus_context *bus_context;
struct regmap *regmap;
- unsigned int val;
int ret;
spi->bits_per_word = 8;
@@ -57,45 +122,21 @@ static int bme680_spi_probe(struct spi_device *spi)
return ret;
}
+ bus_context = devm_kzalloc(&spi->dev, sizeof(*bus_context), GFP_KERNEL);
+ if (!bus_context)
+ return -ENOMEM;
+
+ bus_context->spi = spi;
+ bus_context->current_page = 0xff; /* Undefined on warm boot */
+
regmap = devm_regmap_init(&spi->dev, &bme680_regmap_bus,
- &spi->dev, &bme680_regmap_config);
+ bus_context, &bme680_regmap_config);
if (IS_ERR(regmap)) {
dev_err(&spi->dev, "Failed to register spi regmap %d\n",
(int)PTR_ERR(regmap));
return PTR_ERR(regmap);
}
- ret = regmap_write(regmap, BME680_REG_SOFT_RESET_SPI,
- BME680_CMD_SOFTRESET);
- if (ret < 0) {
- dev_err(&spi->dev, "Failed to reset chip\n");
- return ret;
- }
-
- /* after power-on reset, Page 0(0x80-0xFF) of spi_mem_page is active */
- ret = regmap_read(regmap, BME680_REG_CHIP_SPI_ID, &val);
- if (ret < 0) {
- dev_err(&spi->dev, "Error reading SPI chip ID\n");
- return ret;
- }
-
- if (val != BME680_CHIP_ID_VAL) {
- dev_err(&spi->dev, "Wrong chip ID, got %x expected %x\n",
- val, BME680_CHIP_ID_VAL);
- return -ENODEV;
- }
- /*
- * select Page 1 of spi_mem_page to enable access to
- * to registers from address 0x00 to 0x7F.
- */
- ret = regmap_write_bits(regmap, BME680_REG_STATUS,
- BME680_SPI_MEM_PAGE_BIT,
- BME680_SPI_MEM_PAGE_1_VAL);
- if (ret < 0) {
- dev_err(&spi->dev, "failed to set page 1 of spi_mem_page\n");
- return ret;
- }
-
return bme680_core_probe(&spi->dev, regmap, id->name);
}
diff --git a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c
index 89cb0066a6e0..8d76afb87d87 100644
--- a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c
+++ b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c
@@ -103,9 +103,10 @@ static int cros_ec_sensors_read(struct iio_dev *indio_dev,
* Do not use IIO_DEGREE_TO_RAD to avoid precision
* loss. Round to the nearest integer.
*/
- *val = div_s64(val64 * 314159 + 9000000ULL, 1000);
- *val2 = 18000 << (CROS_EC_SENSOR_BITS - 1);
- ret = IIO_VAL_FRACTIONAL;
+ *val = 0;
+ *val2 = div_s64(val64 * 3141592653ULL,
+ 180 << (CROS_EC_SENSOR_BITS - 1));
+ ret = IIO_VAL_INT_PLUS_NANO;
break;
case MOTIONSENSE_TYPE_MAG:
/*
diff --git a/drivers/iio/dac/mcp4725.c b/drivers/iio/dac/mcp4725.c
index 6d71fd905e29..c701a45469f6 100644
--- a/drivers/iio/dac/mcp4725.c
+++ b/drivers/iio/dac/mcp4725.c
@@ -92,6 +92,7 @@ static ssize_t mcp4725_store_eeprom(struct device *dev,
inoutbuf[0] = 0x60; /* write EEPROM */
inoutbuf[0] |= data->ref_mode << 3;
+ inoutbuf[0] |= data->powerdown ? ((data->powerdown_mode + 1) << 1) : 0;
inoutbuf[1] = data->dac_value >> 4;
inoutbuf[2] = (data->dac_value & 0xf) << 4;
diff --git a/drivers/iio/gyro/bmg160_core.c b/drivers/iio/gyro/bmg160_core.c
index 63ca31628a93..92c07ab826eb 100644
--- a/drivers/iio/gyro/bmg160_core.c
+++ b/drivers/iio/gyro/bmg160_core.c
@@ -582,11 +582,10 @@ static int bmg160_read_raw(struct iio_dev *indio_dev,
case IIO_CHAN_INFO_LOW_PASS_FILTER_3DB_FREQUENCY:
return bmg160_get_filter(data, val);
case IIO_CHAN_INFO_SCALE:
- *val = 0;
switch (chan->type) {
case IIO_TEMP:
- *val2 = 500000;
- return IIO_VAL_INT_PLUS_MICRO;
+ *val = 500;
+ return IIO_VAL_INT;
case IIO_ANGL_VEL:
{
int i;
@@ -594,6 +593,7 @@ static int bmg160_read_raw(struct iio_dev *indio_dev,
for (i = 0; i < ARRAY_SIZE(bmg160_scale_table); ++i) {
if (bmg160_scale_table[i].dps_range ==
data->dps_range) {
+ *val = 0;
*val2 = bmg160_scale_table[i].scale;
return IIO_VAL_INT_PLUS_MICRO;
}
diff --git a/drivers/iio/gyro/mpu3050-core.c b/drivers/iio/gyro/mpu3050-core.c
index 77fac81a3adc..5ddebede31a6 100644
--- a/drivers/iio/gyro/mpu3050-core.c
+++ b/drivers/iio/gyro/mpu3050-core.c
@@ -29,7 +29,8 @@
#include "mpu3050.h"
-#define MPU3050_CHIP_ID 0x69
+#define MPU3050_CHIP_ID 0x68
+#define MPU3050_CHIP_ID_MASK 0x7E
/*
* Register map: anything suffixed *_H is a big-endian high byte and always
@@ -1176,8 +1177,9 @@ int mpu3050_common_probe(struct device *dev,
goto err_power_down;
}
- if (val != MPU3050_CHIP_ID) {
- dev_err(dev, "unsupported chip id %02x\n", (u8)val);
+ if ((val & MPU3050_CHIP_ID_MASK) != MPU3050_CHIP_ID) {
+ dev_err(dev, "unsupported chip id %02x\n",
+ (u8)(val & MPU3050_CHIP_ID_MASK));
ret = -ENODEV;
goto err_power_down;
}
diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c
index cd5bfe39591b..dadd921a4a30 100644
--- a/drivers/iio/industrialio-buffer.c
+++ b/drivers/iio/industrialio-buffer.c
@@ -320,9 +320,8 @@ static int iio_scan_mask_set(struct iio_dev *indio_dev,
const unsigned long *mask;
unsigned long *trialmask;
- trialmask = kmalloc_array(BITS_TO_LONGS(indio_dev->masklength),
- sizeof(*trialmask),
- GFP_KERNEL);
+ trialmask = kcalloc(BITS_TO_LONGS(indio_dev->masklength),
+ sizeof(*trialmask), GFP_KERNEL);
if (trialmask == NULL)
return -ENOMEM;
if (!indio_dev->masklength) {
diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index 4700fd5d8c90..9c4d92115504 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -1743,10 +1743,10 @@ EXPORT_SYMBOL(__iio_device_register);
**/
void iio_device_unregister(struct iio_dev *indio_dev)
{
- mutex_lock(&indio_dev->info_exist_lock);
-
cdev_device_del(&indio_dev->chrdev, &indio_dev->dev);
+ mutex_lock(&indio_dev->info_exist_lock);
+
iio_device_unregister_debugfs(indio_dev);
iio_disable_all_buffers(indio_dev);
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index ea0bc6885517..32cc8fe7902f 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -160,6 +160,7 @@ struct ib_uverbs_file {
struct mutex umap_lock;
struct list_head umaps;
+ struct page *disassociate_page;
struct idr idr;
/* spinlock protects write access to idr */
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 70b7d80431a9..c489f545baae 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -208,6 +208,9 @@ void ib_uverbs_release_file(struct kref *ref)
kref_put(&file->async_file->ref,
ib_uverbs_release_async_event_file);
put_device(&file->device->dev);
+
+ if (file->disassociate_page)
+ __free_pages(file->disassociate_page, 0);
kfree(file);
}
@@ -877,9 +880,50 @@ static void rdma_umap_close(struct vm_area_struct *vma)
kfree(priv);
}
+/*
+ * Once the zap_vma_ptes has been called touches to the VMA will come here and
+ * we return a dummy writable zero page for all the pfns.
+ */
+static vm_fault_t rdma_umap_fault(struct vm_fault *vmf)
+{
+ struct ib_uverbs_file *ufile = vmf->vma->vm_file->private_data;
+ struct rdma_umap_priv *priv = vmf->vma->vm_private_data;
+ vm_fault_t ret = 0;
+
+ if (!priv)
+ return VM_FAULT_SIGBUS;
+
+ /* Read only pages can just use the system zero page. */
+ if (!(vmf->vma->vm_flags & (VM_WRITE | VM_MAYWRITE))) {
+ vmf->page = ZERO_PAGE(vmf->address);
+ get_page(vmf->page);
+ return 0;
+ }
+
+ mutex_lock(&ufile->umap_lock);
+ if (!ufile->disassociate_page)
+ ufile->disassociate_page =
+ alloc_pages(vmf->gfp_mask | __GFP_ZERO, 0);
+
+ if (ufile->disassociate_page) {
+ /*
+ * This VMA is forced to always be shared so this doesn't have
+ * to worry about COW.
+ */
+ vmf->page = ufile->disassociate_page;
+ get_page(vmf->page);
+ } else {
+ ret = VM_FAULT_SIGBUS;
+ }
+ mutex_unlock(&ufile->umap_lock);
+
+ return ret;
+}
+
static const struct vm_operations_struct rdma_umap_ops = {
.open = rdma_umap_open,
.close = rdma_umap_close,
+ .fault = rdma_umap_fault,
};
static struct rdma_umap_priv *rdma_user_mmap_pre(struct ib_ucontext *ucontext,
@@ -889,6 +933,9 @@ static struct rdma_umap_priv *rdma_user_mmap_pre(struct ib_ucontext *ucontext,
struct ib_uverbs_file *ufile = ucontext->ufile;
struct rdma_umap_priv *priv;
+ if (!(vma->vm_flags & VM_SHARED))
+ return ERR_PTR(-EINVAL);
+
if (vma->vm_end - vma->vm_start != size)
return ERR_PTR(-EINVAL);
@@ -992,7 +1039,9 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
* at a time to get the lock ordering right. Typically there
* will only be one mm, so no big deal.
*/
- down_write(&mm->mmap_sem);
+ down_read(&mm->mmap_sem);
+ if (!mmget_still_valid(mm))
+ goto skip_mm;
mutex_lock(&ufile->umap_lock);
list_for_each_entry_safe (priv, next_priv, &ufile->umaps,
list) {
@@ -1004,10 +1053,10 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
zap_vma_ptes(vma, vma->vm_start,
vma->vm_end - vma->vm_start);
- vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
}
mutex_unlock(&ufile->umap_lock);
- up_write(&mm->mmap_sem);
+ skip_mm:
+ up_read(&mm->mmap_sem);
mmput(mm);
}
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index 66cdf625534f..60cf9f03e941 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -533,7 +533,7 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
static int hns_roce_qp_has_sq(struct ib_qp_init_attr *attr)
{
- if (attr->qp_type == IB_QPT_XRC_TGT)
+ if (attr->qp_type == IB_QPT_XRC_TGT || !attr->cap.max_send_wr)
return 0;
return 1;
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 531ff20b32ad..d3dd290ae1b1 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1119,6 +1119,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
if (MLX5_CAP_GEN(mdev, qp_packet_based))
resp.flags |=
MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE;
+
+ resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_SCAT2CQE_DCT;
}
if (field_avail(typeof(resp), sw_parsing_caps,
@@ -2066,6 +2068,7 @@ static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev,
if (vma->vm_flags & VM_WRITE)
return -EPERM;
+ vma->vm_flags &= ~VM_MAYWRITE;
if (!dev->mdev->clock_info_page)
return -EOPNOTSUPP;
@@ -2231,19 +2234,18 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
if (vma->vm_flags & VM_WRITE)
return -EPERM;
+ vma->vm_flags &= ~VM_MAYWRITE;
/* Don't expose to user-space information it shouldn't have */
if (PAGE_SIZE > 4096)
return -EOPNOTSUPP;
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
pfn = (dev->mdev->iseg_base +
offsetof(struct mlx5_init_seg, internal_timer_h)) >>
PAGE_SHIFT;
- if (io_remap_pfn_range(vma, vma->vm_start, pfn,
- PAGE_SIZE, vma->vm_page_prot))
- return -EAGAIN;
- break;
+ return rdma_user_mmap_io(&context->ibucontext, vma, pfn,
+ PAGE_SIZE,
+ pgprot_noncached(vma->vm_page_prot));
case MLX5_IB_MMAP_CLOCK_INFO:
return mlx5_ib_mmap_clock_info_page(dev, vma, context);
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 7cd006da1dae..8870c350fda0 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -1818,13 +1818,16 @@ static void configure_responder_scat_cqe(struct ib_qp_init_attr *init_attr,
rcqe_sz = mlx5_ib_get_cqe_size(init_attr->recv_cq);
- if (rcqe_sz == 128) {
- MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA64_CQE);
+ if (init_attr->qp_type == MLX5_IB_QPT_DCT) {
+ if (rcqe_sz == 128)
+ MLX5_SET(dctc, qpc, cs_res, MLX5_RES_SCAT_DATA64_CQE);
+
return;
}
- if (init_attr->qp_type != MLX5_IB_QPT_DCT)
- MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA32_CQE);
+ MLX5_SET(qpc, qpc, cs_res,
+ rcqe_sz == 128 ? MLX5_RES_SCAT_DATA64_CQE :
+ MLX5_RES_SCAT_DATA32_CQE);
}
static void configure_requester_scat_cqe(struct mlx5_ib_dev *dev,
diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c
index 728795043496..0bb6e39dd03a 100644
--- a/drivers/infiniband/sw/rdmavt/mr.c
+++ b/drivers/infiniband/sw/rdmavt/mr.c
@@ -608,11 +608,6 @@ static int rvt_set_page(struct ib_mr *ibmr, u64 addr)
if (unlikely(mapped_segs == mr->mr.max_segs))
return -ENOMEM;
- if (mr->mr.length == 0) {
- mr->mr.user_base = addr;
- mr->mr.iova = addr;
- }
-
m = mapped_segs / RVT_SEGSZ;
n = mapped_segs % RVT_SEGSZ;
mr->mr.map[m]->segs[n].vaddr = (void *)addr;
@@ -630,17 +625,24 @@ static int rvt_set_page(struct ib_mr *ibmr, u64 addr)
* @sg_nents: number of entries in sg
* @sg_offset: offset in bytes into sg
*
+ * Overwrite rvt_mr length with mr length calculated by ib_sg_to_pages.
+ *
* Return: number of sg elements mapped to the memory region
*/
int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
int sg_nents, unsigned int *sg_offset)
{
struct rvt_mr *mr = to_imr(ibmr);
+ int ret;
mr->mr.length = 0;
mr->mr.page_shift = PAGE_SHIFT;
- return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset,
- rvt_set_page);
+ ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rvt_set_page);
+ mr->mr.user_base = ibmr->iova;
+ mr->mr.iova = ibmr->iova;
+ mr->mr.offset = ibmr->iova - (u64)mr->mr.map[0]->segs[0].vaddr;
+ mr->mr.length = (size_t)ibmr->length;
+ return ret;
}
/**
@@ -671,6 +673,7 @@ int rvt_fast_reg_mr(struct rvt_qp *qp, struct ib_mr *ibmr, u32 key,
ibmr->rkey = key;
mr->mr.lkey = key;
mr->mr.access_flags = access;
+ mr->mr.iova = ibmr->iova;
atomic_set(&mr->mr.lkey_invalid, 0);
return 0;
diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig
index a878351f1643..52d7f55fca32 100644
--- a/drivers/input/keyboard/Kconfig
+++ b/drivers/input/keyboard/Kconfig
@@ -420,7 +420,7 @@ config KEYBOARD_MPR121
config KEYBOARD_SNVS_PWRKEY
tristate "IMX SNVS Power Key Driver"
- depends on SOC_IMX6SX || SOC_IMX7D
+ depends on ARCH_MXC || COMPILE_TEST
depends on OF
help
This is the snvs powerkey driver for the Freescale i.MX application
diff --git a/drivers/input/keyboard/snvs_pwrkey.c b/drivers/input/keyboard/snvs_pwrkey.c
index effb63205d3d..4c67cf30a5d9 100644
--- a/drivers/input/keyboard/snvs_pwrkey.c
+++ b/drivers/input/keyboard/snvs_pwrkey.c
@@ -148,6 +148,9 @@ static int imx_snvs_pwrkey_probe(struct platform_device *pdev)
return error;
}
+ pdata->input = input;
+ platform_set_drvdata(pdev, pdata);
+
error = devm_request_irq(&pdev->dev, pdata->irq,
imx_snvs_pwrkey_interrupt,
0, pdev->name, pdev);
@@ -163,9 +166,6 @@ static int imx_snvs_pwrkey_probe(struct platform_device *pdev)
return error;
}
- pdata->input = input;
- platform_set_drvdata(pdev, pdata);
-
device_init_wakeup(&pdev->dev, pdata->wakeup);
return 0;
diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c
index 628ef617bb2f..f9525d6f0bfe 100644
--- a/drivers/input/mouse/elan_i2c_core.c
+++ b/drivers/input/mouse/elan_i2c_core.c
@@ -1339,21 +1339,46 @@ static const struct acpi_device_id elan_acpi_id[] = {
{ "ELAN0600", 0 },
{ "ELAN0601", 0 },
{ "ELAN0602", 0 },
+ { "ELAN0603", 0 },
+ { "ELAN0604", 0 },
{ "ELAN0605", 0 },
+ { "ELAN0606", 0 },
+ { "ELAN0607", 0 },
{ "ELAN0608", 0 },
{ "ELAN0609", 0 },
{ "ELAN060B", 0 },
{ "ELAN060C", 0 },
+ { "ELAN060F", 0 },
+ { "ELAN0610", 0 },
{ "ELAN0611", 0 },
{ "ELAN0612", 0 },
+ { "ELAN0615", 0 },
+ { "ELAN0616", 0 },
{ "ELAN0617", 0 },
{ "ELAN0618", 0 },
+ { "ELAN0619", 0 },
+ { "ELAN061A", 0 },
+ { "ELAN061B", 0 },
{ "ELAN061C", 0 },
{ "ELAN061D", 0 },
{ "ELAN061E", 0 },
+ { "ELAN061F", 0 },
{ "ELAN0620", 0 },
{ "ELAN0621", 0 },
{ "ELAN0622", 0 },
+ { "ELAN0623", 0 },
+ { "ELAN0624", 0 },
+ { "ELAN0625", 0 },
+ { "ELAN0626", 0 },
+ { "ELAN0627", 0 },
+ { "ELAN0628", 0 },
+ { "ELAN0629", 0 },
+ { "ELAN062A", 0 },
+ { "ELAN062B", 0 },
+ { "ELAN062C", 0 },
+ { "ELAN062D", 0 },
+ { "ELAN0631", 0 },
+ { "ELAN0632", 0 },
{ "ELAN1000", 0 },
{ }
};
diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c
index fc3ab93b7aea..7fb358f96195 100644
--- a/drivers/input/rmi4/rmi_driver.c
+++ b/drivers/input/rmi4/rmi_driver.c
@@ -860,7 +860,7 @@ static int rmi_create_function(struct rmi_device *rmi_dev,
error = rmi_register_function(fn);
if (error)
- goto err_put_fn;
+ return error;
if (pdt->function_number == 0x01)
data->f01_container = fn;
@@ -870,10 +870,6 @@ static int rmi_create_function(struct rmi_device *rmi_dev,
list_add_tail(&fn->node, &data->function_list);
return RMI_SCAN_CONTINUE;
-
-err_put_fn:
- put_device(&fn->dev);
- return error;
}
void rmi_enable_irq(struct rmi_device *rmi_dev, bool clear_wake)
diff --git a/drivers/input/rmi4/rmi_f11.c b/drivers/input/rmi4/rmi_f11.c
index df64d6aed4f7..93901ebd122a 100644
--- a/drivers/input/rmi4/rmi_f11.c
+++ b/drivers/input/rmi4/rmi_f11.c
@@ -1230,7 +1230,7 @@ static int rmi_f11_initialize(struct rmi_function *fn)
}
rc = f11_write_control_regs(fn, &f11->sens_query,
- &f11->dev_controls, fn->fd.query_base_addr);
+ &f11->dev_controls, fn->fd.control_base_addr);
if (rc)
dev_warn(&fn->dev, "Failed to write control registers\n");
diff --git a/drivers/irqchip/irq-ath79-misc.c b/drivers/irqchip/irq-ath79-misc.c
index aa7290784636..0390603170b4 100644
--- a/drivers/irqchip/irq-ath79-misc.c
+++ b/drivers/irqchip/irq-ath79-misc.c
@@ -22,6 +22,15 @@
#define AR71XX_RESET_REG_MISC_INT_ENABLE 4
#define ATH79_MISC_IRQ_COUNT 32
+#define ATH79_MISC_PERF_IRQ 5
+
+static int ath79_perfcount_irq;
+
+int get_c0_perfcount_int(void)
+{
+ return ath79_perfcount_irq;
+}
+EXPORT_SYMBOL_GPL(get_c0_perfcount_int);
static void ath79_misc_irq_handler(struct irq_desc *desc)
{
@@ -113,6 +122,8 @@ static void __init ath79_misc_intc_domain_init(
{
void __iomem *base = domain->host_data;
+ ath79_perfcount_irq = irq_create_mapping(domain, ATH79_MISC_PERF_IRQ);
+
/* Disable and clear all interrupts */
__raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_ENABLE);
__raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_STATUS);
diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c
index 4ab8b1b6608f..a14e35d40538 100644
--- a/drivers/isdn/mISDN/socket.c
+++ b/drivers/isdn/mISDN/socket.c
@@ -710,10 +710,10 @@ base_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
struct sock *sk = sock->sk;
int err = 0;
- if (!maddr || maddr->family != AF_ISDN)
+ if (addr_len < sizeof(struct sockaddr_mISDN))
return -EINVAL;
- if (addr_len < sizeof(struct sockaddr_mISDN))
+ if (!maddr || maddr->family != AF_ISDN)
return -EINVAL;
lock_sock(sk);
diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c
index 39f832d27288..36d0d5c9cfba 100644
--- a/drivers/misc/fastrpc.c
+++ b/drivers/misc/fastrpc.c
@@ -1184,6 +1184,7 @@ static int fastrpc_cb_probe(struct platform_device *pdev)
struct fastrpc_session_ctx *sess;
struct device *dev = &pdev->dev;
int i, sessions = 0;
+ int rc;
cctx = dev_get_drvdata(dev->parent);
if (!cctx)
@@ -1213,7 +1214,11 @@ static int fastrpc_cb_probe(struct platform_device *pdev)
}
cctx->sesscount++;
spin_unlock(&cctx->lock);
- dma_set_mask(dev, DMA_BIT_MASK(32));
+ rc = dma_set_mask(dev, DMA_BIT_MASK(32));
+ if (rc) {
+ dev_err(dev, "32-bit DMA enable failed\n");
+ return rc;
+ }
return 0;
}
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index ea979ebd62fb..3c509e19d69d 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -1688,12 +1688,11 @@ static void goya_init_golden_registers(struct hl_device *hdev)
/*
* Workaround for H2 #HW-23 bug
- * Set DMA max outstanding read requests to 240 on DMA CH 1. Set it
- * to 16 on KMD DMA
- * We need to limit only these DMAs because the user can only read
+ * Set DMA max outstanding read requests to 240 on DMA CH 1.
+ * This limitation is still large enough to not affect Gen4 bandwidth.
+ * We need to only limit that DMA channel because the user can only read
* from Host using DMA CH 1
*/
- WREG32(mmDMA_CH_0_CFG0, 0x0fff0010);
WREG32(mmDMA_CH_1_CFG0, 0x0fff00F0);
goya->hw_cap_initialized |= HW_CAP_GOLDEN;
@@ -3693,7 +3692,7 @@ static int goya_validate_dma_pkt_mmu(struct hl_device *hdev,
* WA for HW-23.
* We can't allow user to read from Host using QMANs other than 1.
*/
- if (parser->hw_queue_id > GOYA_QUEUE_ID_DMA_1 &&
+ if (parser->hw_queue_id != GOYA_QUEUE_ID_DMA_1 &&
hl_mem_area_inside_range(le64_to_cpu(user_dma_pkt->src_addr),
le32_to_cpu(user_dma_pkt->tsize),
hdev->asic_prop.va_space_host_start_address,
diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c
index f38e5c1b87e4..d984538980e2 100644
--- a/drivers/mtd/nand/raw/marvell_nand.c
+++ b/drivers/mtd/nand/raw/marvell_nand.c
@@ -722,12 +722,6 @@ static void marvell_nfc_select_target(struct nand_chip *chip,
struct marvell_nfc *nfc = to_marvell_nfc(chip->controller);
u32 ndcr_generic;
- if (chip == nfc->selected_chip && die_nr == marvell_nand->selected_die)
- return;
-
- writel_relaxed(marvell_nand->ndtr0, nfc->regs + NDTR0);
- writel_relaxed(marvell_nand->ndtr1, nfc->regs + NDTR1);
-
/*
* Reset the NDCR register to a clean state for this particular chip,
* also clear ND_RUN bit.
@@ -739,6 +733,12 @@ static void marvell_nfc_select_target(struct nand_chip *chip,
/* Also reset the interrupt status register */
marvell_nfc_clear_int(nfc, NDCR_ALL_INT);
+ if (chip == nfc->selected_chip && die_nr == marvell_nand->selected_die)
+ return;
+
+ writel_relaxed(marvell_nand->ndtr0, nfc->regs + NDTR0);
+ writel_relaxed(marvell_nand->ndtr1, nfc->regs + NDTR1);
+
nfc->selected_chip = chip;
marvell_nand->selected_die = die_nr;
}
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index b59708c35faf..ee610721098e 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3213,8 +3213,12 @@ static int bond_netdev_event(struct notifier_block *this,
return NOTIFY_DONE;
if (event_dev->flags & IFF_MASTER) {
+ int ret;
+
netdev_dbg(event_dev, "IFF_MASTER\n");
- return bond_master_netdev_event(event, event_dev);
+ ret = bond_master_netdev_event(event, event_dev);
+ if (ret != NOTIFY_DONE)
+ return ret;
}
if (event_dev->flags & IFF_SLAVE) {
diff --git a/drivers/net/dsa/bcm_sf2_cfp.c b/drivers/net/dsa/bcm_sf2_cfp.c
index e6234d209787..4212bc4a5f31 100644
--- a/drivers/net/dsa/bcm_sf2_cfp.c
+++ b/drivers/net/dsa/bcm_sf2_cfp.c
@@ -886,6 +886,9 @@ static int bcm_sf2_cfp_rule_set(struct dsa_switch *ds, int port,
fs->m_ext.data[1]))
return -EINVAL;
+ if (fs->location != RX_CLS_LOC_ANY && fs->location >= CFP_NUM_RULES)
+ return -EINVAL;
+
if (fs->location != RX_CLS_LOC_ANY &&
test_bit(fs->location, priv->cfp.used))
return -EBUSY;
@@ -974,6 +977,9 @@ static int bcm_sf2_cfp_rule_del(struct bcm_sf2_priv *priv, int port, u32 loc)
struct cfp_rule *rule;
int ret;
+ if (loc >= CFP_NUM_RULES)
+ return -EINVAL;
+
/* Refuse deleting unused rules, and those that are not unique since
* that could leave IPv6 rules with one of the chained rule in the
* table.
diff --git a/drivers/net/ethernet/atheros/atlx/atl1.c b/drivers/net/ethernet/atheros/atlx/atl1.c
index 9e07b469066a..156fbc5601ca 100644
--- a/drivers/net/ethernet/atheros/atlx/atl1.c
+++ b/drivers/net/ethernet/atheros/atlx/atl1.c
@@ -1721,7 +1721,7 @@ static void atl1_inc_smb(struct atl1_adapter *adapter)
adapter->soft_stats.scc += smb->tx_1_col;
adapter->soft_stats.mcc += smb->tx_2_col;
adapter->soft_stats.latecol += smb->tx_late_col;
- adapter->soft_stats.tx_underun += smb->tx_underrun;
+ adapter->soft_stats.tx_underrun += smb->tx_underrun;
adapter->soft_stats.tx_trunc += smb->tx_trunc;
adapter->soft_stats.tx_pause += smb->tx_pause;
@@ -3179,7 +3179,7 @@ static struct atl1_stats atl1_gstrings_stats[] = {
{"tx_deferred_ok", ATL1_STAT(soft_stats.deffer)},
{"tx_single_coll_ok", ATL1_STAT(soft_stats.scc)},
{"tx_multi_coll_ok", ATL1_STAT(soft_stats.mcc)},
- {"tx_underun", ATL1_STAT(soft_stats.tx_underun)},
+ {"tx_underrun", ATL1_STAT(soft_stats.tx_underrun)},
{"tx_trunc", ATL1_STAT(soft_stats.tx_trunc)},
{"tx_pause", ATL1_STAT(soft_stats.tx_pause)},
{"rx_pause", ATL1_STAT(soft_stats.rx_pause)},
diff --git a/drivers/net/ethernet/atheros/atlx/atl1.h b/drivers/net/ethernet/atheros/atlx/atl1.h
index 34a58cd846a0..eacff19ea05b 100644
--- a/drivers/net/ethernet/atheros/atlx/atl1.h
+++ b/drivers/net/ethernet/atheros/atlx/atl1.h
@@ -681,7 +681,7 @@ struct atl1_sft_stats {
u64 scc; /* packets TX after a single collision */
u64 mcc; /* packets TX after multiple collisions */
u64 latecol; /* TX packets w/ late collisions */
- u64 tx_underun; /* TX packets aborted due to TX FIFO underrun
+ u64 tx_underrun; /* TX packets aborted due to TX FIFO underrun
* or TRD FIFO underrun */
u64 tx_trunc; /* TX packets truncated due to size > MTU */
u64 rx_pause; /* num Pause packets received. */
diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c
index d99317b3d891..98da0fa27192 100644
--- a/drivers/net/ethernet/atheros/atlx/atl2.c
+++ b/drivers/net/ethernet/atheros/atlx/atl2.c
@@ -553,7 +553,7 @@ static void atl2_intr_tx(struct atl2_adapter *adapter)
netdev->stats.tx_aborted_errors++;
if (txs->late_col)
netdev->stats.tx_window_errors++;
- if (txs->underun)
+ if (txs->underrun)
netdev->stats.tx_fifo_errors++;
} while (1);
diff --git a/drivers/net/ethernet/atheros/atlx/atl2.h b/drivers/net/ethernet/atheros/atlx/atl2.h
index c64a6bdfa7ae..25ec84cb4853 100644
--- a/drivers/net/ethernet/atheros/atlx/atl2.h
+++ b/drivers/net/ethernet/atheros/atlx/atl2.h
@@ -260,7 +260,7 @@ struct tx_pkt_status {
unsigned multi_col:1;
unsigned late_col:1;
unsigned abort_col:1;
- unsigned underun:1; /* current packet is aborted
+ unsigned underrun:1; /* current packet is aborted
* due to txram underrun */
unsigned:3; /* reserved */
unsigned update:1; /* always 1'b1 in tx_status_buf */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
index a9bdc21873d3..10ff37d6dc78 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
@@ -957,7 +957,7 @@ int bnx2x_vfpf_update_vlan(struct bnx2x *bp, u16 vid, u8 vf_qid, bool add)
bnx2x_sample_bulletin(bp);
if (bp->shadow_bulletin.content.valid_bitmap & 1 << VLAN_VALID) {
- BNX2X_ERR("Hypervisor will dicline the request, avoiding\n");
+ BNX2X_ERR("Hypervisor will decline the request, avoiding\n");
rc = -EINVAL;
goto out;
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 4c586ba4364b..52ade133b57c 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -1625,7 +1625,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
netdev_warn(bp->dev, "RX buffer error %x\n", rx_err);
bnxt_sched_reset(bp, rxr);
}
- goto next_rx;
+ goto next_rx_no_len;
}
len = le32_to_cpu(rxcmp->rx_cmp_len_flags_type) >> RX_CMP_LEN_SHIFT;
@@ -1706,12 +1706,13 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
rc = 1;
next_rx:
- rxr->rx_prod = NEXT_RX(prod);
- rxr->rx_next_cons = NEXT_RX(cons);
-
cpr->rx_packets += 1;
cpr->rx_bytes += len;
+next_rx_no_len:
+ rxr->rx_prod = NEXT_RX(prod);
+ rxr->rx_next_cons = NEXT_RX(cons);
+
next_rx_no_prod_no_len:
*raw_cons = tmp_raw_cons;
@@ -5135,10 +5136,10 @@ static void bnxt_hwrm_ring_free(struct bnxt *bp, bool close_path)
for (i = 0; i < bp->tx_nr_rings; i++) {
struct bnxt_tx_ring_info *txr = &bp->tx_ring[i];
struct bnxt_ring_struct *ring = &txr->tx_ring_struct;
- u32 cmpl_ring_id;
- cmpl_ring_id = bnxt_cp_ring_for_tx(bp, txr);
if (ring->fw_ring_id != INVALID_HW_RING_ID) {
+ u32 cmpl_ring_id = bnxt_cp_ring_for_tx(bp, txr);
+
hwrm_ring_free_send_msg(bp, ring,
RING_FREE_REQ_RING_TYPE_TX,
close_path ? cmpl_ring_id :
@@ -5151,10 +5152,10 @@ static void bnxt_hwrm_ring_free(struct bnxt *bp, bool close_path)
struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
struct bnxt_ring_struct *ring = &rxr->rx_ring_struct;
u32 grp_idx = rxr->bnapi->index;
- u32 cmpl_ring_id;
- cmpl_ring_id = bnxt_cp_ring_for_rx(bp, rxr);
if (ring->fw_ring_id != INVALID_HW_RING_ID) {
+ u32 cmpl_ring_id = bnxt_cp_ring_for_rx(bp, rxr);
+
hwrm_ring_free_send_msg(bp, ring,
RING_FREE_REQ_RING_TYPE_RX,
close_path ? cmpl_ring_id :
@@ -5173,10 +5174,10 @@ static void bnxt_hwrm_ring_free(struct bnxt *bp, bool close_path)
struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
struct bnxt_ring_struct *ring = &rxr->rx_agg_ring_struct;
u32 grp_idx = rxr->bnapi->index;
- u32 cmpl_ring_id;
- cmpl_ring_id = bnxt_cp_ring_for_rx(bp, rxr);
if (ring->fw_ring_id != INVALID_HW_RING_ID) {
+ u32 cmpl_ring_id = bnxt_cp_ring_for_rx(bp, rxr);
+
hwrm_ring_free_send_msg(bp, ring, type,
close_path ? cmpl_ring_id :
INVALID_HW_RING_ID);
@@ -5315,17 +5316,16 @@ __bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, struct hwrm_func_cfg_input *req,
req->num_tx_rings = cpu_to_le16(tx_rings);
if (BNXT_NEW_RM(bp)) {
enables |= rx_rings ? FUNC_CFG_REQ_ENABLES_NUM_RX_RINGS : 0;
+ enables |= stats ? FUNC_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0;
if (bp->flags & BNXT_FLAG_CHIP_P5) {
enables |= cp_rings ? FUNC_CFG_REQ_ENABLES_NUM_MSIX : 0;
enables |= tx_rings + ring_grps ?
- FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS |
- FUNC_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0;
+ FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS : 0;
enables |= rx_rings ?
FUNC_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS : 0;
} else {
enables |= cp_rings ?
- FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS |
- FUNC_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0;
+ FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS : 0;
enables |= ring_grps ?
FUNC_CFG_REQ_ENABLES_NUM_HW_RING_GRPS |
FUNC_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS : 0;
@@ -5365,14 +5365,13 @@ __bnxt_hwrm_reserve_vf_rings(struct bnxt *bp,
enables |= tx_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_TX_RINGS : 0;
enables |= rx_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_RX_RINGS |
FUNC_VF_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS : 0;
+ enables |= stats ? FUNC_VF_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0;
if (bp->flags & BNXT_FLAG_CHIP_P5) {
enables |= tx_rings + ring_grps ?
- FUNC_VF_CFG_REQ_ENABLES_NUM_CMPL_RINGS |
- FUNC_VF_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0;
+ FUNC_VF_CFG_REQ_ENABLES_NUM_CMPL_RINGS : 0;
} else {
enables |= cp_rings ?
- FUNC_VF_CFG_REQ_ENABLES_NUM_CMPL_RINGS |
- FUNC_VF_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0;
+ FUNC_VF_CFG_REQ_ENABLES_NUM_CMPL_RINGS : 0;
enables |= ring_grps ?
FUNC_VF_CFG_REQ_ENABLES_NUM_HW_RING_GRPS : 0;
}
@@ -6753,6 +6752,7 @@ static int bnxt_hwrm_port_qstats_ext(struct bnxt *bp)
struct hwrm_queue_pri2cos_qcfg_input req2 = {0};
struct hwrm_port_qstats_ext_input req = {0};
struct bnxt_pf_info *pf = &bp->pf;
+ u32 tx_stat_size;
int rc;
if (!(bp->flags & BNXT_FLAG_PORT_STATS_EXT))
@@ -6762,13 +6762,16 @@ static int bnxt_hwrm_port_qstats_ext(struct bnxt *bp)
req.port_id = cpu_to_le16(pf->port_id);
req.rx_stat_size = cpu_to_le16(sizeof(struct rx_port_stats_ext));
req.rx_stat_host_addr = cpu_to_le64(bp->hw_rx_port_stats_ext_map);
- req.tx_stat_size = cpu_to_le16(sizeof(struct tx_port_stats_ext));
+ tx_stat_size = bp->hw_tx_port_stats_ext ?
+ sizeof(*bp->hw_tx_port_stats_ext) : 0;
+ req.tx_stat_size = cpu_to_le16(tx_stat_size);
req.tx_stat_host_addr = cpu_to_le64(bp->hw_tx_port_stats_ext_map);
mutex_lock(&bp->hwrm_cmd_lock);
rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
if (!rc) {
bp->fw_rx_stats_ext_size = le16_to_cpu(resp->rx_stat_size) / 8;
- bp->fw_tx_stats_ext_size = le16_to_cpu(resp->tx_stat_size) / 8;
+ bp->fw_tx_stats_ext_size = tx_stat_size ?
+ le16_to_cpu(resp->tx_stat_size) / 8 : 0;
} else {
bp->fw_rx_stats_ext_size = 0;
bp->fw_tx_stats_ext_size = 0;
@@ -8961,8 +8964,15 @@ static int bnxt_cfg_rx_mode(struct bnxt *bp)
skip_uc:
rc = bnxt_hwrm_cfa_l2_set_rx_mask(bp, 0);
+ if (rc && vnic->mc_list_count) {
+ netdev_info(bp->dev, "Failed setting MC filters rc: %d, turning on ALL_MCAST mode\n",
+ rc);
+ vnic->rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_ALL_MCAST;
+ vnic->mc_list_count = 0;
+ rc = bnxt_hwrm_cfa_l2_set_rx_mask(bp, 0);
+ }
if (rc)
- netdev_err(bp->dev, "HWRM cfa l2 rx mask failure rc: %x\n",
+ netdev_err(bp->dev, "HWRM cfa l2 rx mask failure rc: %d\n",
rc);
return rc;
@@ -10685,6 +10695,7 @@ init_err_cleanup_tc:
bnxt_clear_int_mode(bp);
init_err_pci_clean:
+ bnxt_free_hwrm_short_cmd_req(bp);
bnxt_free_hwrm_resources(bp);
bnxt_free_ctx_mem(bp);
kfree(bp->ctx);
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 28eac9056211..c032bef1b776 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -32,6 +32,13 @@
#define DRV_NAME "nicvf"
#define DRV_VERSION "1.0"
+/* NOTE: Packets bigger than 1530 are split across multiple pages and XDP needs
+ * the buffer to be contiguous. Allow XDP to be set up only if we don't exceed
+ * this value, keeping headroom for the 14 byte Ethernet header and two
+ * VLAN tags (for QinQ)
+ */
+#define MAX_XDP_MTU (1530 - ETH_HLEN - VLAN_HLEN * 2)
+
/* Supported devices */
static const struct pci_device_id nicvf_id_table[] = {
{ PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM,
@@ -1582,6 +1589,15 @@ static int nicvf_change_mtu(struct net_device *netdev, int new_mtu)
struct nicvf *nic = netdev_priv(netdev);
int orig_mtu = netdev->mtu;
+ /* For now just support only the usual MTU sized frames,
+ * plus some headroom for VLAN, QinQ.
+ */
+ if (nic->xdp_prog && new_mtu > MAX_XDP_MTU) {
+ netdev_warn(netdev, "Jumbo frames not yet supported with XDP, current MTU %d.\n",
+ netdev->mtu);
+ return -EINVAL;
+ }
+
netdev->mtu = new_mtu;
if (!netif_running(netdev))
@@ -1830,8 +1846,10 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog)
bool bpf_attached = false;
int ret = 0;
- /* For now just support only the usual MTU sized frames */
- if (prog && (dev->mtu > 1500)) {
+ /* For now just support only the usual MTU sized frames,
+ * plus some headroom for VLAN, QinQ.
+ */
+ if (prog && dev->mtu > MAX_XDP_MTU) {
netdev_warn(dev, "Jumbo frames not yet supported with XDP, current MTU %d.\n",
dev->mtu);
return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 697c2427f2b7..a96ad20ee484 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1840,13 +1840,9 @@ static int fec_enet_clk_enable(struct net_device *ndev, bool enable)
int ret;
if (enable) {
- ret = clk_prepare_enable(fep->clk_ahb);
- if (ret)
- return ret;
-
ret = clk_prepare_enable(fep->clk_enet_out);
if (ret)
- goto failed_clk_enet_out;
+ return ret;
if (fep->clk_ptp) {
mutex_lock(&fep->ptp_clk_mutex);
@@ -1866,7 +1862,6 @@ static int fec_enet_clk_enable(struct net_device *ndev, bool enable)
phy_reset_after_clk_enable(ndev->phydev);
} else {
- clk_disable_unprepare(fep->clk_ahb);
clk_disable_unprepare(fep->clk_enet_out);
if (fep->clk_ptp) {
mutex_lock(&fep->ptp_clk_mutex);
@@ -1885,8 +1880,6 @@ failed_clk_ref:
failed_clk_ptp:
if (fep->clk_enet_out)
clk_disable_unprepare(fep->clk_enet_out);
-failed_clk_enet_out:
- clk_disable_unprepare(fep->clk_ahb);
return ret;
}
@@ -3470,6 +3463,9 @@ fec_probe(struct platform_device *pdev)
ret = clk_prepare_enable(fep->clk_ipg);
if (ret)
goto failed_clk_ipg;
+ ret = clk_prepare_enable(fep->clk_ahb);
+ if (ret)
+ goto failed_clk_ahb;
fep->reg_phy = devm_regulator_get_optional(&pdev->dev, "phy");
if (!IS_ERR(fep->reg_phy)) {
@@ -3563,6 +3559,9 @@ failed_reset:
pm_runtime_put(&pdev->dev);
pm_runtime_disable(&pdev->dev);
failed_regulator:
+ clk_disable_unprepare(fep->clk_ahb);
+failed_clk_ahb:
+ clk_disable_unprepare(fep->clk_ipg);
failed_clk_ipg:
fec_enet_clk_enable(ndev, false);
failed_clk:
@@ -3686,6 +3685,7 @@ static int __maybe_unused fec_runtime_suspend(struct device *dev)
struct net_device *ndev = dev_get_drvdata(dev);
struct fec_enet_private *fep = netdev_priv(ndev);
+ clk_disable_unprepare(fep->clk_ahb);
clk_disable_unprepare(fep->clk_ipg);
return 0;
@@ -3695,8 +3695,20 @@ static int __maybe_unused fec_runtime_resume(struct device *dev)
{
struct net_device *ndev = dev_get_drvdata(dev);
struct fec_enet_private *fep = netdev_priv(ndev);
+ int ret;
- return clk_prepare_enable(fep->clk_ipg);
+ ret = clk_prepare_enable(fep->clk_ahb);
+ if (ret)
+ return ret;
+ ret = clk_prepare_enable(fep->clk_ipg);
+ if (ret)
+ goto failed_clk_ipg;
+
+ return 0;
+
+failed_clk_ipg:
+ clk_disable_unprepare(fep->clk_ahb);
+ return ret;
}
static const struct dev_pm_ops fec_pm_ops = {
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 51cfe95f3e24..3dfb2d131eb7 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -3762,6 +3762,7 @@ static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter)
{
struct device *dev = &adapter->vdev->dev;
struct ibmvnic_query_ip_offload_buffer *buf = &adapter->ip_offload_buf;
+ netdev_features_t old_hw_features = 0;
union ibmvnic_crq crq;
int i;
@@ -3837,24 +3838,41 @@ static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter)
adapter->ip_offload_ctrl.large_rx_ipv4 = 0;
adapter->ip_offload_ctrl.large_rx_ipv6 = 0;
- adapter->netdev->features = NETIF_F_SG | NETIF_F_GSO;
+ if (adapter->state != VNIC_PROBING) {
+ old_hw_features = adapter->netdev->hw_features;
+ adapter->netdev->hw_features = 0;
+ }
+
+ adapter->netdev->hw_features = NETIF_F_SG | NETIF_F_GSO | NETIF_F_GRO;
if (buf->tcp_ipv4_chksum || buf->udp_ipv4_chksum)
- adapter->netdev->features |= NETIF_F_IP_CSUM;
+ adapter->netdev->hw_features |= NETIF_F_IP_CSUM;
if (buf->tcp_ipv6_chksum || buf->udp_ipv6_chksum)
- adapter->netdev->features |= NETIF_F_IPV6_CSUM;
+ adapter->netdev->hw_features |= NETIF_F_IPV6_CSUM;
if ((adapter->netdev->features &
(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)))
- adapter->netdev->features |= NETIF_F_RXCSUM;
+ adapter->netdev->hw_features |= NETIF_F_RXCSUM;
if (buf->large_tx_ipv4)
- adapter->netdev->features |= NETIF_F_TSO;
+ adapter->netdev->hw_features |= NETIF_F_TSO;
if (buf->large_tx_ipv6)
- adapter->netdev->features |= NETIF_F_TSO6;
+ adapter->netdev->hw_features |= NETIF_F_TSO6;
- adapter->netdev->hw_features |= adapter->netdev->features;
+ if (adapter->state == VNIC_PROBING) {
+ adapter->netdev->features |= adapter->netdev->hw_features;
+ } else if (old_hw_features != adapter->netdev->hw_features) {
+ netdev_features_t tmp = 0;
+
+ /* disable features no longer supported */
+ adapter->netdev->features &= adapter->netdev->hw_features;
+ /* turn on features now supported if previously enabled */
+ tmp = (old_hw_features ^ adapter->netdev->hw_features) &
+ adapter->netdev->hw_features;
+ adapter->netdev->features |=
+ tmp & adapter->netdev->wanted_features;
+ }
memset(&crq, 0, sizeof(crq));
crq.control_ip_offload.first = IBMVNIC_CRQ_CMD;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 71c65cc17904..d3eaf2ceaa39 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -858,6 +858,7 @@ void mlx5e_close_channels(struct mlx5e_channels *chs);
* switching channels
*/
typedef int (*mlx5e_fp_hw_modify)(struct mlx5e_priv *priv);
+int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv);
int mlx5e_safe_switch_channels(struct mlx5e_priv *priv,
struct mlx5e_channels *new_chs,
mlx5e_fp_hw_modify hw_modify);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
index 9d38e62cdf24..476dd97f7f2f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -186,12 +186,17 @@ static int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_tx_err_ctx *err_ctx)
static int mlx5e_tx_reporter_recover_all(struct mlx5e_priv *priv)
{
- int err;
+ int err = 0;
rtnl_lock();
mutex_lock(&priv->state_lock);
- mlx5e_close_locked(priv->netdev);
- err = mlx5e_open_locked(priv->netdev);
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ goto out;
+
+ err = mlx5e_safe_reopen_channels(priv);
+
+out:
mutex_unlock(&priv->state_lock);
rtnl_unlock();
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
index fa2a3c444cdc..eec07b34b4ad 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
@@ -39,6 +39,10 @@ static int get_route_and_out_devs(struct mlx5e_priv *priv,
return -EOPNOTSUPP;
}
+ if (!(mlx5e_eswitch_rep(*out_dev) &&
+ mlx5e_is_uplink_rep(netdev_priv(*out_dev))))
+ return -EOPNOTSUPP;
+
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index 03b2a9f9c589..cad34d6f5f45 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -33,6 +33,26 @@
#include <linux/bpf_trace.h>
#include "en/xdp.h"
+int mlx5e_xdp_max_mtu(struct mlx5e_params *params)
+{
+ int hr = NET_IP_ALIGN + XDP_PACKET_HEADROOM;
+
+ /* Let S := SKB_DATA_ALIGN(sizeof(struct skb_shared_info)).
+ * The condition checked in mlx5e_rx_is_linear_skb is:
+ * SKB_DATA_ALIGN(sw_mtu + hard_mtu + hr) + S <= PAGE_SIZE (1)
+ * (Note that hw_mtu == sw_mtu + hard_mtu.)
+ * What is returned from this function is:
+ * max_mtu = PAGE_SIZE - S - hr - hard_mtu (2)
+ * After assigning sw_mtu := max_mtu, the left side of (1) turns to
+ * SKB_DATA_ALIGN(PAGE_SIZE - S) + S, which is equal to PAGE_SIZE,
+ * because both PAGE_SIZE and S are already aligned. Any number greater
+ * than max_mtu would make the left side of (1) greater than PAGE_SIZE,
+ * so max_mtu is the maximum MTU allowed.
+ */
+
+ return MLX5E_HW2SW_MTU(params, SKB_MAX_HEAD(hr));
+}
+
static inline bool
mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_dma_info *di,
struct xdp_buff *xdp)
@@ -304,9 +324,9 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq)
mlx5e_xdpi_fifo_pop(xdpi_fifo);
if (is_redirect) {
- xdp_return_frame(xdpi.xdpf);
dma_unmap_single(sq->pdev, xdpi.dma_addr,
xdpi.xdpf->len, DMA_TO_DEVICE);
+ xdp_return_frame(xdpi.xdpf);
} else {
/* Recycle RX page */
mlx5e_page_release(rq, &xdpi.di, true);
@@ -345,9 +365,9 @@ void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq)
mlx5e_xdpi_fifo_pop(xdpi_fifo);
if (is_redirect) {
- xdp_return_frame(xdpi.xdpf);
dma_unmap_single(sq->pdev, xdpi.dma_addr,
xdpi.xdpf->len, DMA_TO_DEVICE);
+ xdp_return_frame(xdpi.xdpf);
} else {
/* Recycle RX page */
mlx5e_page_release(rq, &xdpi.di, false);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
index ee27a7c8cd87..553956cadc8a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
@@ -34,13 +34,12 @@
#include "en.h"
-#define MLX5E_XDP_MAX_MTU ((int)(PAGE_SIZE - \
- MLX5_SKB_FRAG_SZ(XDP_PACKET_HEADROOM)))
#define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
#define MLX5E_XDP_TX_EMPTY_DS_COUNT \
(sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS)
#define MLX5E_XDP_TX_DS_COUNT (MLX5E_XDP_TX_EMPTY_DS_COUNT + 1 /* SG DS */)
+int mlx5e_xdp_max_mtu(struct mlx5e_params *params);
bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
void *va, u16 *rx_headroom, u32 *len);
bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 5efce4a3ff79..78dc8fe2a83c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -1586,7 +1586,7 @@ static int mlx5e_get_module_info(struct net_device *netdev,
break;
case MLX5_MODULE_ID_SFP:
modinfo->type = ETH_MODULE_SFF_8472;
- modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+ modinfo->eeprom_len = MLX5_EEPROM_PAGE_LENGTH;
break;
default:
netdev_err(priv->netdev, "%s: cable type not recognized:0x%x\n",
@@ -1768,7 +1768,8 @@ static int set_pflag_rx_no_csum_complete(struct net_device *netdev, bool enable)
struct mlx5e_channel *c;
int i;
- if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state) ||
+ priv->channels.params.xdp_prog)
return 0;
for (i = 0; i < channels->num; i++) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index b5fdbd3190d9..46157e2a1e5a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -951,7 +951,11 @@ static int mlx5e_open_rq(struct mlx5e_channel *c,
if (params->rx_dim_enabled)
__set_bit(MLX5E_RQ_STATE_AM, &c->rq.state);
- if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE))
+ /* We disable csum_complete when XDP is enabled since
+ * XDP programs might manipulate packets which will render
+ * skb->checksum incorrect.
+ */
+ if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) || c->xdp)
__set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state);
return 0;
@@ -2937,6 +2941,14 @@ int mlx5e_safe_switch_channels(struct mlx5e_priv *priv,
return 0;
}
+int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv)
+{
+ struct mlx5e_channels new_channels = {};
+
+ new_channels.params = priv->channels.params;
+ return mlx5e_safe_switch_channels(priv, &new_channels, NULL);
+}
+
void mlx5e_timestamp_init(struct mlx5e_priv *priv)
{
priv->tstamp.tx_type = HWTSTAMP_TX_OFF;
@@ -3765,7 +3777,7 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
if (params->xdp_prog &&
!mlx5e_rx_is_linear_skb(priv->mdev, &new_channels.params)) {
netdev_err(netdev, "MTU(%d) > %d is not allowed while XDP enabled\n",
- new_mtu, MLX5E_XDP_MAX_MTU);
+ new_mtu, mlx5e_xdp_max_mtu(params));
err = -EINVAL;
goto out;
}
@@ -4161,11 +4173,10 @@ static void mlx5e_tx_timeout_work(struct work_struct *work)
if (!report_failed)
goto unlock;
- mlx5e_close_locked(priv->netdev);
- err = mlx5e_open_locked(priv->netdev);
+ err = mlx5e_safe_reopen_channels(priv);
if (err)
netdev_err(priv->netdev,
- "mlx5e_open_locked failed recovering from a tx_timeout, err(%d).\n",
+ "mlx5e_safe_reopen_channels failed recovering from a tx_timeout, err(%d).\n",
err);
unlock:
@@ -4201,7 +4212,8 @@ static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog)
if (!mlx5e_rx_is_linear_skb(priv->mdev, &new_channels.params)) {
netdev_warn(netdev, "XDP is not allowed with MTU(%d) > %d\n",
- new_channels.params.sw_mtu, MLX5E_XDP_MAX_MTU);
+ new_channels.params.sw_mtu,
+ mlx5e_xdp_max_mtu(&new_channels.params));
return -EINVAL;
}
@@ -4553,7 +4565,7 @@ void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params,
{
enum mlx5e_traffic_types tt;
- rss_params->hfunc = ETH_RSS_HASH_XOR;
+ rss_params->hfunc = ETH_RSS_HASH_TOP;
netdev_rss_key_fill(rss_params->toeplitz_hash_key,
sizeof(rss_params->toeplitz_hash_key));
mlx5e_build_default_indir_rqt(rss_params->indirection_rqt,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 3dde5c7e0739..c3b3002ff62f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -692,7 +692,14 @@ static inline bool is_last_ethertype_ip(struct sk_buff *skb, int *network_depth,
{
*proto = ((struct ethhdr *)skb->data)->h_proto;
*proto = __vlan_get_protocol(skb, *proto, network_depth);
- return (*proto == htons(ETH_P_IP) || *proto == htons(ETH_P_IPV6));
+
+ if (*proto == htons(ETH_P_IP))
+ return pskb_may_pull(skb, *network_depth + sizeof(struct iphdr));
+
+ if (*proto == htons(ETH_P_IPV6))
+ return pskb_may_pull(skb, *network_depth + sizeof(struct ipv6hdr));
+
+ return false;
}
static inline void mlx5e_enable_ecn(struct mlx5e_rq *rq, struct sk_buff *skb)
@@ -712,17 +719,6 @@ static inline void mlx5e_enable_ecn(struct mlx5e_rq *rq, struct sk_buff *skb)
rq->stats->ecn_mark += !!rc;
}
-static u32 mlx5e_get_fcs(const struct sk_buff *skb)
-{
- const void *fcs_bytes;
- u32 _fcs_bytes;
-
- fcs_bytes = skb_header_pointer(skb, skb->len - ETH_FCS_LEN,
- ETH_FCS_LEN, &_fcs_bytes);
-
- return __get_unaligned_cpu32(fcs_bytes);
-}
-
static u8 get_ip_proto(struct sk_buff *skb, int network_depth, __be16 proto)
{
void *ip_p = skb->data + network_depth;
@@ -733,6 +729,68 @@ static u8 get_ip_proto(struct sk_buff *skb, int network_depth, __be16 proto)
#define short_frame(size) ((size) <= ETH_ZLEN + ETH_FCS_LEN)
+#define MAX_PADDING 8
+
+static void
+tail_padding_csum_slow(struct sk_buff *skb, int offset, int len,
+ struct mlx5e_rq_stats *stats)
+{
+ stats->csum_complete_tail_slow++;
+ skb->csum = csum_block_add(skb->csum,
+ skb_checksum(skb, offset, len, 0),
+ offset);
+}
+
+static void
+tail_padding_csum(struct sk_buff *skb, int offset,
+ struct mlx5e_rq_stats *stats)
+{
+ u8 tail_padding[MAX_PADDING];
+ int len = skb->len - offset;
+ void *tail;
+
+ if (unlikely(len > MAX_PADDING)) {
+ tail_padding_csum_slow(skb, offset, len, stats);
+ return;
+ }
+
+ tail = skb_header_pointer(skb, offset, len, tail_padding);
+ if (unlikely(!tail)) {
+ tail_padding_csum_slow(skb, offset, len, stats);
+ return;
+ }
+
+ stats->csum_complete_tail++;
+ skb->csum = csum_block_add(skb->csum, csum_partial(tail, len, 0), offset);
+}
+
+static void
+mlx5e_skb_padding_csum(struct sk_buff *skb, int network_depth, __be16 proto,
+ struct mlx5e_rq_stats *stats)
+{
+ struct ipv6hdr *ip6;
+ struct iphdr *ip4;
+ int pkt_len;
+
+ switch (proto) {
+ case htons(ETH_P_IP):
+ ip4 = (struct iphdr *)(skb->data + network_depth);
+ pkt_len = network_depth + ntohs(ip4->tot_len);
+ break;
+ case htons(ETH_P_IPV6):
+ ip6 = (struct ipv6hdr *)(skb->data + network_depth);
+ pkt_len = network_depth + sizeof(*ip6) + ntohs(ip6->payload_len);
+ break;
+ default:
+ return;
+ }
+
+ if (likely(pkt_len >= skb->len))
+ return;
+
+ tail_padding_csum(skb, pkt_len, stats);
+}
+
static inline void mlx5e_handle_csum(struct net_device *netdev,
struct mlx5_cqe64 *cqe,
struct mlx5e_rq *rq,
@@ -752,7 +810,8 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
return;
}
- if (unlikely(test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state)))
+ /* True when explicitly set via priv flag, or XDP prog is loaded */
+ if (test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state))
goto csum_unnecessary;
/* CQE csum doesn't cover padding octets in short ethernet
@@ -780,18 +839,15 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
skb->csum = csum_partial(skb->data + ETH_HLEN,
network_depth - ETH_HLEN,
skb->csum);
- if (unlikely(netdev->features & NETIF_F_RXFCS))
- skb->csum = csum_block_add(skb->csum,
- (__force __wsum)mlx5e_get_fcs(skb),
- skb->len - ETH_FCS_LEN);
+
+ mlx5e_skb_padding_csum(skb, network_depth, proto, stats);
stats->csum_complete++;
return;
}
csum_unnecessary:
if (likely((cqe->hds_ip_ext & CQE_L3_OK) &&
- ((cqe->hds_ip_ext & CQE_L4_OK) ||
- (get_cqe_l4_hdr_type(cqe) == CQE_L4_HDR_TYPE_NONE)))) {
+ (cqe->hds_ip_ext & CQE_L4_OK))) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
if (cqe_is_tunneled(cqe)) {
skb->csum_level = 1;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 1a78e05cbba8..b75aa8b8bf04 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -59,6 +59,8 @@ static const struct counter_desc sw_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete_tail) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete_tail_slow) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary_inner) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_drop) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_redirect) },
@@ -151,6 +153,8 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
s->rx_removed_vlan_packets += rq_stats->removed_vlan_packets;
s->rx_csum_none += rq_stats->csum_none;
s->rx_csum_complete += rq_stats->csum_complete;
+ s->rx_csum_complete_tail += rq_stats->csum_complete_tail;
+ s->rx_csum_complete_tail_slow += rq_stats->csum_complete_tail_slow;
s->rx_csum_unnecessary += rq_stats->csum_unnecessary;
s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner;
s->rx_xdp_drop += rq_stats->xdp_drop;
@@ -1190,6 +1194,8 @@ static const struct counter_desc rq_stats_desc[] = {
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, packets) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, bytes) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete_tail) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete_tail_slow) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) },
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index 4640d4f986f8..16c3b785f282 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -71,6 +71,8 @@ struct mlx5e_sw_stats {
u64 rx_csum_unnecessary;
u64 rx_csum_none;
u64 rx_csum_complete;
+ u64 rx_csum_complete_tail;
+ u64 rx_csum_complete_tail_slow;
u64 rx_csum_unnecessary_inner;
u64 rx_xdp_drop;
u64 rx_xdp_redirect;
@@ -181,6 +183,8 @@ struct mlx5e_rq_stats {
u64 packets;
u64 bytes;
u64 csum_complete;
+ u64 csum_complete_tail;
+ u64 csum_complete_tail_slow;
u64 csum_unnecessary;
u64 csum_unnecessary_inner;
u64 csum_none;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
index 8de64e88c670..22a2ef111514 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
@@ -148,14 +148,16 @@ static int mlx5_fpga_tls_alloc_swid(struct idr *idr, spinlock_t *idr_spinlock,
return ret;
}
-static void mlx5_fpga_tls_release_swid(struct idr *idr,
- spinlock_t *idr_spinlock, u32 swid)
+static void *mlx5_fpga_tls_release_swid(struct idr *idr,
+ spinlock_t *idr_spinlock, u32 swid)
{
unsigned long flags;
+ void *ptr;
spin_lock_irqsave(idr_spinlock, flags);
- idr_remove(idr, swid);
+ ptr = idr_remove(idr, swid);
spin_unlock_irqrestore(idr_spinlock, flags);
+ return ptr;
}
static void mlx_tls_kfree_complete(struct mlx5_fpga_conn *conn,
@@ -165,20 +167,12 @@ static void mlx_tls_kfree_complete(struct mlx5_fpga_conn *conn,
kfree(buf);
}
-struct mlx5_teardown_stream_context {
- struct mlx5_fpga_tls_command_context cmd;
- u32 swid;
-};
-
static void
mlx5_fpga_tls_teardown_completion(struct mlx5_fpga_conn *conn,
struct mlx5_fpga_device *fdev,
struct mlx5_fpga_tls_command_context *cmd,
struct mlx5_fpga_dma_buf *resp)
{
- struct mlx5_teardown_stream_context *ctx =
- container_of(cmd, struct mlx5_teardown_stream_context, cmd);
-
if (resp) {
u32 syndrome = MLX5_GET(tls_resp, resp->sg[0].data, syndrome);
@@ -186,14 +180,6 @@ mlx5_fpga_tls_teardown_completion(struct mlx5_fpga_conn *conn,
mlx5_fpga_err(fdev,
"Teardown stream failed with syndrome = %d",
syndrome);
- else if (MLX5_GET(tls_cmd, cmd->buf.sg[0].data, direction_sx))
- mlx5_fpga_tls_release_swid(&fdev->tls->tx_idr,
- &fdev->tls->tx_idr_spinlock,
- ctx->swid);
- else
- mlx5_fpga_tls_release_swid(&fdev->tls->rx_idr,
- &fdev->tls->rx_idr_spinlock,
- ctx->swid);
}
mlx5_fpga_tls_put_command_ctx(cmd);
}
@@ -217,22 +203,22 @@ int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq,
void *cmd;
int ret;
- rcu_read_lock();
- flow = idr_find(&mdev->fpga->tls->rx_idr, ntohl(handle));
- rcu_read_unlock();
-
- if (!flow) {
- WARN_ONCE(1, "Received NULL pointer for handle\n");
- return -EINVAL;
- }
-
buf = kzalloc(size, GFP_ATOMIC);
if (!buf)
return -ENOMEM;
cmd = (buf + 1);
+ rcu_read_lock();
+ flow = idr_find(&mdev->fpga->tls->rx_idr, ntohl(handle));
+ if (unlikely(!flow)) {
+ rcu_read_unlock();
+ WARN_ONCE(1, "Received NULL pointer for handle\n");
+ kfree(buf);
+ return -EINVAL;
+ }
mlx5_fpga_tls_flow_to_cmd(flow, cmd);
+ rcu_read_unlock();
MLX5_SET(tls_cmd, cmd, swid, ntohl(handle));
MLX5_SET64(tls_cmd, cmd, tls_rcd_sn, be64_to_cpu(rcd_sn));
@@ -253,7 +239,7 @@ int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq,
static void mlx5_fpga_tls_send_teardown_cmd(struct mlx5_core_dev *mdev,
void *flow, u32 swid, gfp_t flags)
{
- struct mlx5_teardown_stream_context *ctx;
+ struct mlx5_fpga_tls_command_context *ctx;
struct mlx5_fpga_dma_buf *buf;
void *cmd;
@@ -261,7 +247,7 @@ static void mlx5_fpga_tls_send_teardown_cmd(struct mlx5_core_dev *mdev,
if (!ctx)
return;
- buf = &ctx->cmd.buf;
+ buf = &ctx->buf;
cmd = (ctx + 1);
MLX5_SET(tls_cmd, cmd, command_type, CMD_TEARDOWN_STREAM);
MLX5_SET(tls_cmd, cmd, swid, swid);
@@ -272,8 +258,7 @@ static void mlx5_fpga_tls_send_teardown_cmd(struct mlx5_core_dev *mdev,
buf->sg[0].data = cmd;
buf->sg[0].size = MLX5_TLS_COMMAND_SIZE;
- ctx->swid = swid;
- mlx5_fpga_tls_cmd_send(mdev->fpga, &ctx->cmd,
+ mlx5_fpga_tls_cmd_send(mdev->fpga, ctx,
mlx5_fpga_tls_teardown_completion);
}
@@ -283,13 +268,14 @@ void mlx5_fpga_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
struct mlx5_fpga_tls *tls = mdev->fpga->tls;
void *flow;
- rcu_read_lock();
if (direction_sx)
- flow = idr_find(&tls->tx_idr, swid);
+ flow = mlx5_fpga_tls_release_swid(&tls->tx_idr,
+ &tls->tx_idr_spinlock,
+ swid);
else
- flow = idr_find(&tls->rx_idr, swid);
-
- rcu_read_unlock();
+ flow = mlx5_fpga_tls_release_swid(&tls->rx_idr,
+ &tls->rx_idr_spinlock,
+ swid);
if (!flow) {
mlx5_fpga_err(mdev->fpga, "No flow information for swid %u\n",
@@ -297,6 +283,7 @@ void mlx5_fpga_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
return;
}
+ synchronize_rcu(); /* before kfree(flow) */
mlx5_fpga_tls_send_teardown_cmd(mdev, flow, swid, flags);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index 21b7f05b16a5..361468e0435d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -317,10 +317,6 @@ int mlx5_query_module_eeprom(struct mlx5_core_dev *dev,
size -= offset + size - MLX5_EEPROM_PAGE_LENGTH;
i2c_addr = MLX5_I2C_ADDR_LOW;
- if (offset >= MLX5_EEPROM_PAGE_LENGTH) {
- i2c_addr = MLX5_I2C_ADDR_HIGH;
- offset -= MLX5_EEPROM_PAGE_LENGTH;
- }
MLX5_SET(mcia_reg, in, l, 0);
MLX5_SET(mcia_reg, in, module, module_num);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index d23d53c0e284..f26a4ca29363 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -568,7 +568,7 @@ static int mlxsw_emad_init(struct mlxsw_core *mlxsw_core)
if (!(mlxsw_core->bus->features & MLXSW_BUS_F_TXRX))
return 0;
- emad_wq = alloc_workqueue("mlxsw_core_emad", WQ_MEM_RECLAIM, 0);
+ emad_wq = alloc_workqueue("mlxsw_core_emad", 0, 0);
if (!emad_wq)
return -ENOMEM;
mlxsw_core->emad_wq = emad_wq;
@@ -1958,10 +1958,10 @@ static int __init mlxsw_core_module_init(void)
{
int err;
- mlxsw_wq = alloc_workqueue(mlxsw_core_driver_name, WQ_MEM_RECLAIM, 0);
+ mlxsw_wq = alloc_workqueue(mlxsw_core_driver_name, 0, 0);
if (!mlxsw_wq)
return -ENOMEM;
- mlxsw_owq = alloc_ordered_workqueue("%s_ordered", WQ_MEM_RECLAIM,
+ mlxsw_owq = alloc_ordered_workqueue("%s_ordered", 0,
mlxsw_core_driver_name);
if (!mlxsw_owq) {
err = -ENOMEM;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
index ffee38e36ce8..8648ca171254 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
@@ -27,7 +27,7 @@
#define MLXSW_PCI_SW_RESET 0xF0010
#define MLXSW_PCI_SW_RESET_RST_BIT BIT(0)
-#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS 13000
+#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS 20000
#define MLXSW_PCI_SW_RESET_WAIT_MSECS 100
#define MLXSW_PCI_FW_READY 0xA1844
#define MLXSW_PCI_FW_READY_MASK 0xFFFF
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 9eb63300c1d3..6b8aa3761899 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -3126,11 +3126,11 @@ mlxsw_sp_port_set_link_ksettings(struct net_device *dev,
if (err)
return err;
+ mlxsw_sp_port->link.autoneg = autoneg;
+
if (!netif_running(dev))
return 0;
- mlxsw_sp_port->link.autoneg = autoneg;
-
mlxsw_sp_port_admin_status_set(mlxsw_sp_port, false);
mlxsw_sp_port_admin_status_set(mlxsw_sp_port, true);
@@ -3316,7 +3316,7 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port)
err = mlxsw_sp_port_ets_set(mlxsw_sp_port,
MLXSW_REG_QEEC_HIERARCY_TC,
i + 8, i,
- false, 0);
+ true, 100);
if (err)
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
index 9a79b5e11597..d633bef5f105 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
@@ -70,6 +70,7 @@ static const struct mlxsw_sp_sb_pool_des mlxsw_sp2_sb_pool_dess[] = {
{MLXSW_REG_SBXX_DIR_EGRESS, 1},
{MLXSW_REG_SBXX_DIR_EGRESS, 2},
{MLXSW_REG_SBXX_DIR_EGRESS, 3},
+ {MLXSW_REG_SBXX_DIR_EGRESS, 15},
};
#define MLXSW_SP_SB_ING_TC_COUNT 8
@@ -428,6 +429,7 @@ static const struct mlxsw_sp_sb_pr mlxsw_sp2_sb_prs[] = {
MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0),
MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0),
MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0),
+ MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, MLXSW_SP_SB_INFI),
};
static int mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp,
@@ -517,14 +519,14 @@ static const struct mlxsw_sp_sb_cm mlxsw_sp2_sb_cms_egress[] = {
MLXSW_SP_SB_CM(0, 7, 4),
MLXSW_SP_SB_CM(0, 7, 4),
MLXSW_SP_SB_CM(0, 7, 4),
- MLXSW_SP_SB_CM(0, 7, 4),
- MLXSW_SP_SB_CM(0, 7, 4),
- MLXSW_SP_SB_CM(0, 7, 4),
- MLXSW_SP_SB_CM(0, 7, 4),
- MLXSW_SP_SB_CM(0, 7, 4),
- MLXSW_SP_SB_CM(0, 7, 4),
- MLXSW_SP_SB_CM(0, 7, 4),
- MLXSW_SP_SB_CM(0, 7, 4),
+ MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8),
+ MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8),
+ MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8),
+ MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8),
+ MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8),
+ MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8),
+ MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8),
+ MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8),
MLXSW_SP_SB_CM(1, 0xff, 4),
};
@@ -671,6 +673,7 @@ static const struct mlxsw_sp_sb_pm mlxsw_sp2_sb_pms[] = {
MLXSW_SP_SB_PM(0, 0),
MLXSW_SP_SB_PM(0, 0),
MLXSW_SP_SB_PM(0, 0),
+ MLXSW_SP_SB_PM(10000, 90000),
};
static int mlxsw_sp_port_sb_pms_init(struct mlxsw_sp_port *mlxsw_sp_port)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 52fed8c7bf1e..902e766a8ed3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -6781,7 +6781,7 @@ static int mlxsw_sp_router_port_check_rif_addr(struct mlxsw_sp *mlxsw_sp,
/* A RIF is not created for macvlan netdevs. Their MAC is used to
* populate the FDB
*/
- if (netif_is_macvlan(dev))
+ if (netif_is_macvlan(dev) || netif_is_l3_master(dev))
return 0;
for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index f6ce386c3036..50111f228d77 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -1630,7 +1630,7 @@ static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port,
u16 fid_index;
int err = 0;
- if (switchdev_trans_ph_prepare(trans))
+ if (switchdev_trans_ph_commit(trans))
return 0;
bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp->bridge, orig_dev);
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index a1d0d6e42533..d715ef4fc92f 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -613,7 +613,7 @@ static int ocelot_mact_mc_add(struct ocelot_port *port,
struct netdev_hw_addr *hw_addr)
{
struct ocelot *ocelot = port->ocelot;
- struct netdev_hw_addr *ha = kzalloc(sizeof(*ha), GFP_KERNEL);
+ struct netdev_hw_addr *ha = kzalloc(sizeof(*ha), GFP_ATOMIC);
if (!ha)
return -ENOMEM;
@@ -959,10 +959,8 @@ static void ocelot_get_strings(struct net_device *netdev, u32 sset, u8 *data)
ETH_GSTRING_LEN);
}
-static void ocelot_check_stats(struct work_struct *work)
+static void ocelot_update_stats(struct ocelot *ocelot)
{
- struct delayed_work *del_work = to_delayed_work(work);
- struct ocelot *ocelot = container_of(del_work, struct ocelot, stats_work);
int i, j;
mutex_lock(&ocelot->stats_lock);
@@ -986,11 +984,19 @@ static void ocelot_check_stats(struct work_struct *work)
}
}
- cancel_delayed_work(&ocelot->stats_work);
+ mutex_unlock(&ocelot->stats_lock);
+}
+
+static void ocelot_check_stats_work(struct work_struct *work)
+{
+ struct delayed_work *del_work = to_delayed_work(work);
+ struct ocelot *ocelot = container_of(del_work, struct ocelot,
+ stats_work);
+
+ ocelot_update_stats(ocelot);
+
queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work,
OCELOT_STATS_CHECK_DELAY);
-
- mutex_unlock(&ocelot->stats_lock);
}
static void ocelot_get_ethtool_stats(struct net_device *dev,
@@ -1001,7 +1007,7 @@ static void ocelot_get_ethtool_stats(struct net_device *dev,
int i;
/* check and update now */
- ocelot_check_stats(&ocelot->stats_work.work);
+ ocelot_update_stats(ocelot);
/* Copy all counters */
for (i = 0; i < ocelot->num_stats; i++)
@@ -1809,7 +1815,7 @@ int ocelot_init(struct ocelot *ocelot)
ANA_CPUQ_8021_CFG_CPUQ_BPDU_VAL(6),
ANA_CPUQ_8021_CFG, i);
- INIT_DELAYED_WORK(&ocelot->stats_work, ocelot_check_stats);
+ INIT_DELAYED_WORK(&ocelot->stats_work, ocelot_check_stats_work);
queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work,
OCELOT_STATS_CHECK_DELAY);
return 0;
diff --git a/drivers/net/ethernet/neterion/vxge/vxge-config.c b/drivers/net/ethernet/neterion/vxge/vxge-config.c
index 7cde387e5ec6..51cd57ab3d95 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-config.c
+++ b/drivers/net/ethernet/neterion/vxge/vxge-config.c
@@ -2366,6 +2366,7 @@ static void *__vxge_hw_blockpool_malloc(struct __vxge_hw_device *devh, u32 size,
dma_object->addr))) {
vxge_os_dma_free(devh->pdev, memblock,
&dma_object->acc_handle);
+ memblock = NULL;
goto exit;
}
diff --git a/drivers/net/ethernet/netronome/nfp/abm/cls.c b/drivers/net/ethernet/netronome/nfp/abm/cls.c
index 9852080cf454..ff3913085665 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/cls.c
+++ b/drivers/net/ethernet/netronome/nfp/abm/cls.c
@@ -39,7 +39,7 @@ nfp_abm_u32_check_knode(struct nfp_abm *abm, struct tc_cls_u32_knode *knode,
}
if (knode->sel->off || knode->sel->offshift || knode->sel->offmask ||
knode->sel->offoff || knode->fshift) {
- NL_SET_ERR_MSG_MOD(extack, "variable offseting not supported");
+ NL_SET_ERR_MSG_MOD(extack, "variable offsetting not supported");
return false;
}
if (knode->sel->hoff || knode->sel->hmask) {
@@ -78,7 +78,7 @@ nfp_abm_u32_check_knode(struct nfp_abm *abm, struct tc_cls_u32_knode *knode,
k = &knode->sel->keys[0];
if (k->offmask) {
- NL_SET_ERR_MSG_MOD(extack, "offset mask - variable offseting not supported");
+ NL_SET_ERR_MSG_MOD(extack, "offset mask - variable offsetting not supported");
return false;
}
if (k->off) {
diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index 43a57ec296fd..127c89b22ef0 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -431,12 +431,16 @@ struct qed_qm_info {
u8 num_pf_rls;
};
+#define QED_OVERFLOW_BIT 1
+
struct qed_db_recovery_info {
struct list_head list;
/* Lock to protect the doorbell recovery mechanism list */
spinlock_t lock;
+ bool dorq_attn;
u32 db_recovery_counter;
+ unsigned long overflow;
};
struct storm_stats {
@@ -920,8 +924,7 @@ u16 qed_get_cm_pq_idx_llt_mtc(struct qed_hwfn *p_hwfn, u8 tc);
/* doorbell recovery mechanism */
void qed_db_recovery_dp(struct qed_hwfn *p_hwfn);
-void qed_db_recovery_execute(struct qed_hwfn *p_hwfn,
- enum qed_db_rec_exec db_exec);
+void qed_db_recovery_execute(struct qed_hwfn *p_hwfn);
bool qed_edpm_enabled(struct qed_hwfn *p_hwfn);
/* Other Linux specific common definitions */
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 9df8c4b3b54e..866cdc86a3f2 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -102,11 +102,15 @@ static void qed_db_recovery_dp_entry(struct qed_hwfn *p_hwfn,
/* Doorbell address sanity (address within doorbell bar range) */
static bool qed_db_rec_sanity(struct qed_dev *cdev,
- void __iomem *db_addr, void *db_data)
+ void __iomem *db_addr,
+ enum qed_db_rec_width db_width,
+ void *db_data)
{
+ u32 width = (db_width == DB_REC_WIDTH_32B) ? 32 : 64;
+
/* Make sure doorbell address is within the doorbell bar */
if (db_addr < cdev->doorbells ||
- (u8 __iomem *)db_addr >
+ (u8 __iomem *)db_addr + width >
(u8 __iomem *)cdev->doorbells + cdev->db_size) {
WARN(true,
"Illegal doorbell address: %p. Legal range for doorbell addresses is [%p..%p]\n",
@@ -159,7 +163,7 @@ int qed_db_recovery_add(struct qed_dev *cdev,
}
/* Sanitize doorbell address */
- if (!qed_db_rec_sanity(cdev, db_addr, db_data))
+ if (!qed_db_rec_sanity(cdev, db_addr, db_width, db_data))
return -EINVAL;
/* Obtain hwfn from doorbell address */
@@ -205,10 +209,6 @@ int qed_db_recovery_del(struct qed_dev *cdev,
return 0;
}
- /* Sanitize doorbell address */
- if (!qed_db_rec_sanity(cdev, db_addr, db_data))
- return -EINVAL;
-
/* Obtain hwfn from doorbell address */
p_hwfn = qed_db_rec_find_hwfn(cdev, db_addr);
@@ -300,31 +300,24 @@ void qed_db_recovery_dp(struct qed_hwfn *p_hwfn)
/* Ring the doorbell of a single doorbell recovery entry */
static void qed_db_recovery_ring(struct qed_hwfn *p_hwfn,
- struct qed_db_recovery_entry *db_entry,
- enum qed_db_rec_exec db_exec)
-{
- if (db_exec != DB_REC_ONCE) {
- /* Print according to width */
- if (db_entry->db_width == DB_REC_WIDTH_32B) {
- DP_VERBOSE(p_hwfn, QED_MSG_SPQ,
- "%s doorbell address %p data %x\n",
- db_exec == DB_REC_DRY_RUN ?
- "would have rung" : "ringing",
- db_entry->db_addr,
- *(u32 *)db_entry->db_data);
- } else {
- DP_VERBOSE(p_hwfn, QED_MSG_SPQ,
- "%s doorbell address %p data %llx\n",
- db_exec == DB_REC_DRY_RUN ?
- "would have rung" : "ringing",
- db_entry->db_addr,
- *(u64 *)(db_entry->db_data));
- }
+ struct qed_db_recovery_entry *db_entry)
+{
+ /* Print according to width */
+ if (db_entry->db_width == DB_REC_WIDTH_32B) {
+ DP_VERBOSE(p_hwfn, QED_MSG_SPQ,
+ "ringing doorbell address %p data %x\n",
+ db_entry->db_addr,
+ *(u32 *)db_entry->db_data);
+ } else {
+ DP_VERBOSE(p_hwfn, QED_MSG_SPQ,
+ "ringing doorbell address %p data %llx\n",
+ db_entry->db_addr,
+ *(u64 *)(db_entry->db_data));
}
/* Sanity */
if (!qed_db_rec_sanity(p_hwfn->cdev, db_entry->db_addr,
- db_entry->db_data))
+ db_entry->db_width, db_entry->db_data))
return;
/* Flush the write combined buffer. Since there are multiple doorbelling
@@ -334,14 +327,12 @@ static void qed_db_recovery_ring(struct qed_hwfn *p_hwfn,
wmb();
/* Ring the doorbell */
- if (db_exec == DB_REC_REAL_DEAL || db_exec == DB_REC_ONCE) {
- if (db_entry->db_width == DB_REC_WIDTH_32B)
- DIRECT_REG_WR(db_entry->db_addr,
- *(u32 *)(db_entry->db_data));
- else
- DIRECT_REG_WR64(db_entry->db_addr,
- *(u64 *)(db_entry->db_data));
- }
+ if (db_entry->db_width == DB_REC_WIDTH_32B)
+ DIRECT_REG_WR(db_entry->db_addr,
+ *(u32 *)(db_entry->db_data));
+ else
+ DIRECT_REG_WR64(db_entry->db_addr,
+ *(u64 *)(db_entry->db_data));
/* Flush the write combined buffer. Next doorbell may come from a
* different entity to the same address...
@@ -350,29 +341,21 @@ static void qed_db_recovery_ring(struct qed_hwfn *p_hwfn,
}
/* Traverse the doorbell recovery entry list and ring all the doorbells */
-void qed_db_recovery_execute(struct qed_hwfn *p_hwfn,
- enum qed_db_rec_exec db_exec)
+void qed_db_recovery_execute(struct qed_hwfn *p_hwfn)
{
struct qed_db_recovery_entry *db_entry = NULL;
- if (db_exec != DB_REC_ONCE) {
- DP_NOTICE(p_hwfn,
- "Executing doorbell recovery. Counter was %d\n",
- p_hwfn->db_recovery_info.db_recovery_counter);
+ DP_NOTICE(p_hwfn, "Executing doorbell recovery. Counter was %d\n",
+ p_hwfn->db_recovery_info.db_recovery_counter);
- /* Track amount of times recovery was executed */
- p_hwfn->db_recovery_info.db_recovery_counter++;
- }
+ /* Track amount of times recovery was executed */
+ p_hwfn->db_recovery_info.db_recovery_counter++;
/* Protect the list */
spin_lock_bh(&p_hwfn->db_recovery_info.lock);
list_for_each_entry(db_entry,
- &p_hwfn->db_recovery_info.list, list_entry) {
- qed_db_recovery_ring(p_hwfn, db_entry, db_exec);
- if (db_exec == DB_REC_ONCE)
- break;
- }
-
+ &p_hwfn->db_recovery_info.list, list_entry)
+ qed_db_recovery_ring(p_hwfn, db_entry);
spin_unlock_bh(&p_hwfn->db_recovery_info.lock);
}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c
index e23980e301b6..8848d5bed6e5 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.c
@@ -378,6 +378,9 @@ static int qed_db_rec_flush_queue(struct qed_hwfn *p_hwfn,
u32 count = QED_DB_REC_COUNT;
u32 usage = 1;
+ /* Flush any pending (e)dpms as they may never arrive */
+ qed_wr(p_hwfn, p_ptt, DORQ_REG_DPM_FORCE_ABORT, 0x1);
+
/* wait for usage to zero or count to run out. This is necessary since
* EDPM doorbell transactions can take multiple 64b cycles, and as such
* can "split" over the pci. Possibly, the doorbell drop can happen with
@@ -406,51 +409,74 @@ static int qed_db_rec_flush_queue(struct qed_hwfn *p_hwfn,
int qed_db_rec_handler(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
{
- u32 overflow;
+ u32 attn_ovfl, cur_ovfl;
int rc;
- overflow = qed_rd(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY);
- DP_NOTICE(p_hwfn, "PF Overflow sticky 0x%x\n", overflow);
- if (!overflow) {
- qed_db_recovery_execute(p_hwfn, DB_REC_ONCE);
+ attn_ovfl = test_and_clear_bit(QED_OVERFLOW_BIT,
+ &p_hwfn->db_recovery_info.overflow);
+ cur_ovfl = qed_rd(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY);
+ if (!cur_ovfl && !attn_ovfl)
return 0;
- }
- if (qed_edpm_enabled(p_hwfn)) {
+ DP_NOTICE(p_hwfn, "PF Overflow sticky: attn %u current %u\n",
+ attn_ovfl, cur_ovfl);
+
+ if (cur_ovfl && !p_hwfn->db_bar_no_edpm) {
rc = qed_db_rec_flush_queue(p_hwfn, p_ptt);
if (rc)
return rc;
}
- /* Flush any pending (e)dpm as they may never arrive */
- qed_wr(p_hwfn, p_ptt, DORQ_REG_DPM_FORCE_ABORT, 0x1);
-
/* Release overflow sticky indication (stop silently dropping everything) */
qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY, 0x0);
/* Repeat all last doorbells (doorbell drop recovery) */
- qed_db_recovery_execute(p_hwfn, DB_REC_REAL_DEAL);
+ qed_db_recovery_execute(p_hwfn);
return 0;
}
-static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn)
+static void qed_dorq_attn_overflow(struct qed_hwfn *p_hwfn)
{
- u32 int_sts, first_drop_reason, details, address, all_drops_reason;
struct qed_ptt *p_ptt = p_hwfn->p_dpc_ptt;
+ u32 overflow;
int rc;
- int_sts = qed_rd(p_hwfn, p_ptt, DORQ_REG_INT_STS);
- DP_NOTICE(p_hwfn->cdev, "DORQ attention. int_sts was %x\n", int_sts);
+ overflow = qed_rd(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY);
+ if (!overflow)
+ goto out;
+
+ /* Run PF doorbell recovery in next periodic handler */
+ set_bit(QED_OVERFLOW_BIT, &p_hwfn->db_recovery_info.overflow);
+
+ if (!p_hwfn->db_bar_no_edpm) {
+ rc = qed_db_rec_flush_queue(p_hwfn, p_ptt);
+ if (rc)
+ goto out;
+ }
+
+ qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY, 0x0);
+out:
+ /* Schedule the handler even if overflow was not detected */
+ qed_periodic_db_rec_start(p_hwfn);
+}
+
+static int qed_dorq_attn_int_sts(struct qed_hwfn *p_hwfn)
+{
+ u32 int_sts, first_drop_reason, details, address, all_drops_reason;
+ struct qed_ptt *p_ptt = p_hwfn->p_dpc_ptt;
/* int_sts may be zero since all PFs were interrupted for doorbell
* overflow but another one already handled it. Can abort here. If
* This PF also requires overflow recovery we will be interrupted again.
* The masked almost full indication may also be set. Ignoring.
*/
+ int_sts = qed_rd(p_hwfn, p_ptt, DORQ_REG_INT_STS);
if (!(int_sts & ~DORQ_REG_INT_STS_DORQ_FIFO_AFULL))
return 0;
+ DP_NOTICE(p_hwfn->cdev, "DORQ attention. int_sts was %x\n", int_sts);
+
/* check if db_drop or overflow happened */
if (int_sts & (DORQ_REG_INT_STS_DB_DROP |
DORQ_REG_INT_STS_DORQ_FIFO_OVFL_ERR)) {
@@ -477,11 +503,6 @@ static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn)
GET_FIELD(details, QED_DORQ_ATTENTION_SIZE) * 4,
first_drop_reason, all_drops_reason);
- rc = qed_db_rec_handler(p_hwfn, p_ptt);
- qed_periodic_db_rec_start(p_hwfn);
- if (rc)
- return rc;
-
/* Clear the doorbell drop details and prepare for next drop */
qed_wr(p_hwfn, p_ptt, DORQ_REG_DB_DROP_DETAILS_REL, 0);
@@ -507,6 +528,25 @@ static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn)
return -EINVAL;
}
+static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn)
+{
+ p_hwfn->db_recovery_info.dorq_attn = true;
+ qed_dorq_attn_overflow(p_hwfn);
+
+ return qed_dorq_attn_int_sts(p_hwfn);
+}
+
+static void qed_dorq_attn_handler(struct qed_hwfn *p_hwfn)
+{
+ if (p_hwfn->db_recovery_info.dorq_attn)
+ goto out;
+
+ /* Call DORQ callback if the attention was missed */
+ qed_dorq_attn_cb(p_hwfn);
+out:
+ p_hwfn->db_recovery_info.dorq_attn = false;
+}
+
/* Instead of major changes to the data-structure, we have a some 'special'
* identifiers for sources that changed meaning between adapters.
*/
@@ -1080,6 +1120,9 @@ static int qed_int_deassertion(struct qed_hwfn *p_hwfn,
}
}
+ /* Handle missed DORQ attention */
+ qed_dorq_attn_handler(p_hwfn);
+
/* Clear IGU indication for the deasserted bits */
DIRECT_REG_WR((u8 __iomem *)p_hwfn->regview +
GTT_BAR0_MAP_REG_IGU_CMD +
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.h b/drivers/net/ethernet/qlogic/qed/qed_int.h
index 1f356ed4f761..d473b522afc5 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.h
@@ -192,8 +192,8 @@ void qed_int_disable_post_isr_release(struct qed_dev *cdev);
/**
* @brief - Doorbell Recovery handler.
- * Run DB_REAL_DEAL doorbell recovery in case of PF overflow
- * (and flush DORQ if needed), otherwise run DB_REC_ONCE.
+ * Run doorbell recovery in case of PF overflow (and flush DORQ if
+ * needed).
*
* @param p_hwfn
* @param p_ptt
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index f164d4acebcb..6de23b56b294 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -970,7 +970,7 @@ static void qed_update_pf_params(struct qed_dev *cdev,
}
}
-#define QED_PERIODIC_DB_REC_COUNT 100
+#define QED_PERIODIC_DB_REC_COUNT 10
#define QED_PERIODIC_DB_REC_INTERVAL_MS 100
#define QED_PERIODIC_DB_REC_INTERVAL \
msecs_to_jiffies(QED_PERIODIC_DB_REC_INTERVAL_MS)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
index 9faaa6df78ed..2f318aaf2b05 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
@@ -1591,7 +1591,7 @@ static void qed_iov_vf_mbx_acquire(struct qed_hwfn *p_hwfn,
p_vfdev->eth_fp_hsi_minor = ETH_HSI_VER_NO_PKT_LEN_TUNN;
} else {
DP_INFO(p_hwfn,
- "VF[%d] needs fastpath HSI %02x.%02x, which is incompatible with loaded FW's faspath HSI %02x.%02x\n",
+ "VF[%d] needs fastpath HSI %02x.%02x, which is incompatible with loaded FW's fastpath HSI %02x.%02x\n",
vf->abs_vf_id,
req->vfdev_info.eth_fp_hsi_major,
req->vfdev_info.eth_fp_hsi_minor,
diff --git a/drivers/net/ethernet/qlogic/qede/qede_ptp.c b/drivers/net/ethernet/qlogic/qede/qede_ptp.c
index 5f3f42a25361..bddb2b5982dc 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ptp.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ptp.c
@@ -490,18 +490,17 @@ int qede_ptp_enable(struct qede_dev *edev, bool init_tc)
ptp->clock = ptp_clock_register(&ptp->clock_info, &edev->pdev->dev);
if (IS_ERR(ptp->clock)) {
- rc = -EINVAL;
DP_ERR(edev, "PTP clock registration failed\n");
+ qede_ptp_disable(edev);
+ rc = -EINVAL;
goto err2;
}
return 0;
-err2:
- qede_ptp_disable(edev);
- ptp->clock = NULL;
err1:
kfree(ptp);
+err2:
edev->ptp = NULL;
return rc;
diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c
index a18149720aa2..cba5881b2746 100644
--- a/drivers/net/ethernet/socionext/netsec.c
+++ b/drivers/net/ethernet/socionext/netsec.c
@@ -673,7 +673,8 @@ static void netsec_process_tx(struct netsec_priv *priv)
}
static void *netsec_alloc_rx_data(struct netsec_priv *priv,
- dma_addr_t *dma_handle, u16 *desc_len)
+ dma_addr_t *dma_handle, u16 *desc_len,
+ bool napi)
{
size_t total_len = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
size_t payload_len = NETSEC_RX_BUF_SZ;
@@ -682,7 +683,7 @@ static void *netsec_alloc_rx_data(struct netsec_priv *priv,
total_len += SKB_DATA_ALIGN(payload_len + NETSEC_SKB_PAD);
- buf = napi_alloc_frag(total_len);
+ buf = napi ? napi_alloc_frag(total_len) : netdev_alloc_frag(total_len);
if (!buf)
return NULL;
@@ -765,7 +766,8 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget)
/* allocate a fresh buffer and map it to the hardware.
* This will eventually replace the old buffer in the hardware
*/
- buf_addr = netsec_alloc_rx_data(priv, &dma_handle, &desc_len);
+ buf_addr = netsec_alloc_rx_data(priv, &dma_handle, &desc_len,
+ true);
if (unlikely(!buf_addr))
break;
@@ -1069,7 +1071,8 @@ static int netsec_setup_rx_dring(struct netsec_priv *priv)
void *buf;
u16 len;
- buf = netsec_alloc_rx_data(priv, &dma_handle, &len);
+ buf = netsec_alloc_rx_data(priv, &dma_handle, &len,
+ false);
if (!buf) {
netsec_uninit_pkt_dring(priv, NETSEC_RING_RX);
goto err_out;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
index 062a600fa5a7..21428537e231 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
@@ -333,6 +333,9 @@ static int stm32mp1_parse_data(struct stm32_dwmac *dwmac,
*/
dwmac->irq_pwr_wakeup = platform_get_irq_byname(pdev,
"stm32_pwr_wakeup");
+ if (dwmac->irq_pwr_wakeup == -EPROBE_DEFER)
+ return -EPROBE_DEFER;
+
if (!dwmac->clk_eth_ck && dwmac->irq_pwr_wakeup >= 0) {
err = device_init_wakeup(&pdev->dev, true);
if (err) {
diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
index b7dd4e3c760d..6d690678c20e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
@@ -140,7 +140,7 @@ static void ndesc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, int mode,
p->des0 |= cpu_to_le32(RDES0_OWN);
bfsize1 = min(bfsize, BUF_SIZE_2KiB - 1);
- p->des1 |= cpu_to_le32(bfsize & RDES1_BUFFER1_SIZE_MASK);
+ p->des1 |= cpu_to_le32(bfsize1 & RDES1_BUFFER1_SIZE_MASK);
if (mode == STMMAC_CHAIN_MODE)
ndesc_rx_set_on_chain(p, end);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index a26e36dbb5df..48712437d0da 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2616,8 +2616,6 @@ static int stmmac_open(struct net_device *dev)
u32 chan;
int ret;
- stmmac_check_ether_addr(priv);
-
if (priv->hw->pcs != STMMAC_PCS_RGMII &&
priv->hw->pcs != STMMAC_PCS_TBI &&
priv->hw->pcs != STMMAC_PCS_RTBI) {
@@ -4303,6 +4301,8 @@ int stmmac_dvr_probe(struct device *device,
if (ret)
goto error_hw_init;
+ stmmac_check_ether_addr(priv);
+
/* Configure real RX and TX queues */
netif_set_real_num_rx_queues(ndev, priv->plat->rx_queues_to_use);
netif_set_real_num_tx_queues(ndev, priv->plat->tx_queues_to_use);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
index d819e8eaba12..26db6aa002d1 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
@@ -159,6 +159,12 @@ static const struct dmi_system_id quark_pci_dmi[] = {
},
.driver_data = (void *)&galileo_stmmac_dmi_data,
},
+ /*
+ * There are 2 types of SIMATIC IOT2000: IOT2020 and IOT2040.
+ * The asset tag "6ES7647-0AA00-0YA2" is only for IOT2020 which
+ * has only one pci network device while other asset tags are
+ * for IOT2040 which has two.
+ */
{
.matches = {
DMI_EXACT_MATCH(DMI_BOARD_NAME, "SIMATIC IOT2000"),
@@ -170,8 +176,6 @@ static const struct dmi_system_id quark_pci_dmi[] = {
{
.matches = {
DMI_EXACT_MATCH(DMI_BOARD_NAME, "SIMATIC IOT2000"),
- DMI_EXACT_MATCH(DMI_BOARD_ASSET_TAG,
- "6ES7647-0AA00-1YA2"),
},
.driver_data = (void *)&iot2040_stmmac_dmi_data,
},
diff --git a/drivers/net/ieee802154/mcr20a.c b/drivers/net/ieee802154/mcr20a.c
index c589f5ae75bb..8bb53ec8d9cf 100644
--- a/drivers/net/ieee802154/mcr20a.c
+++ b/drivers/net/ieee802154/mcr20a.c
@@ -533,6 +533,8 @@ mcr20a_start(struct ieee802154_hw *hw)
dev_dbg(printdev(lp), "no slotted operation\n");
ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL1,
DAR_PHY_CTRL1_SLOTTED, 0x0);
+ if (ret < 0)
+ return ret;
/* enable irq */
enable_irq(lp->spi->irq);
@@ -540,11 +542,15 @@ mcr20a_start(struct ieee802154_hw *hw)
/* Unmask SEQ interrupt */
ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL2,
DAR_PHY_CTRL2_SEQMSK, 0x0);
+ if (ret < 0)
+ return ret;
/* Start the RX sequence */
dev_dbg(printdev(lp), "start the RX sequence\n");
ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL1,
DAR_PHY_CTRL1_XCVSEQ_MASK, MCR20A_XCVSEQ_RX);
+ if (ret < 0)
+ return ret;
return 0;
}
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 3ccba37bd6dd..f76c4048b978 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -1489,9 +1489,10 @@ static int marvell_get_sset_count(struct phy_device *phydev)
static void marvell_get_strings(struct phy_device *phydev, u8 *data)
{
+ int count = marvell_get_sset_count(phydev);
int i;
- for (i = 0; i < ARRAY_SIZE(marvell_hw_stats); i++) {
+ for (i = 0; i < count; i++) {
strlcpy(data + i * ETH_GSTRING_LEN,
marvell_hw_stats[i].string, ETH_GSTRING_LEN);
}
@@ -1519,9 +1520,10 @@ static u64 marvell_get_stat(struct phy_device *phydev, int i)
static void marvell_get_stats(struct phy_device *phydev,
struct ethtool_stats *stats, u64 *data)
{
+ int count = marvell_get_sset_count(phydev);
int i;
- for (i = 0; i < ARRAY_SIZE(marvell_hw_stats); i++)
+ for (i = 0; i < count; i++)
data[i] = marvell_get_stat(phydev, i);
}
diff --git a/drivers/net/phy/spi_ks8995.c b/drivers/net/phy/spi_ks8995.c
index 92b64e254b44..7475cef17cf7 100644
--- a/drivers/net/phy/spi_ks8995.c
+++ b/drivers/net/phy/spi_ks8995.c
@@ -159,6 +159,14 @@ static const struct spi_device_id ks8995_id[] = {
};
MODULE_DEVICE_TABLE(spi, ks8995_id);
+static const struct of_device_id ks8895_spi_of_match[] = {
+ { .compatible = "micrel,ks8995" },
+ { .compatible = "micrel,ksz8864" },
+ { .compatible = "micrel,ksz8795" },
+ { },
+ };
+MODULE_DEVICE_TABLE(of, ks8895_spi_of_match);
+
static inline u8 get_chip_id(u8 val)
{
return (val >> ID1_CHIPID_S) & ID1_CHIPID_M;
@@ -526,6 +534,7 @@ static int ks8995_remove(struct spi_device *spi)
static struct spi_driver ks8995_driver = {
.driver = {
.name = "spi-ks8995",
+ .of_match_table = of_match_ptr(ks8895_spi_of_match),
},
.probe = ks8995_probe,
.remove = ks8995_remove,
diff --git a/drivers/net/slip/slhc.c b/drivers/net/slip/slhc.c
index f4e93f5fc204..ea90db3c7705 100644
--- a/drivers/net/slip/slhc.c
+++ b/drivers/net/slip/slhc.c
@@ -153,7 +153,7 @@ out_fail:
void
slhc_free(struct slcompress *comp)
{
- if ( comp == NULLSLCOMPR )
+ if ( IS_ERR_OR_NULL(comp) )
return;
if ( comp->tstate != NULLSLSTATE )
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 6ed96fdfd96d..16963f7a88f7 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1156,6 +1156,13 @@ static int team_port_add(struct team *team, struct net_device *port_dev,
return -EINVAL;
}
+ if (netdev_has_upper_dev(dev, port_dev)) {
+ NL_SET_ERR_MSG(extack, "Device is already an upper device of the team interface");
+ netdev_err(dev, "Device %s is already an upper device of the team interface\n",
+ portname);
+ return -EBUSY;
+ }
+
if (port_dev->features & NETIF_F_VLAN_CHALLENGED &&
vlan_uses_dev(dev)) {
NL_SET_ERR_MSG(extack, "Device is VLAN challenged and team device has VLAN set up");
@@ -1246,6 +1253,23 @@ static int team_port_add(struct team *team, struct net_device *port_dev,
goto err_option_port_add;
}
+ /* set promiscuity level to new slave */
+ if (dev->flags & IFF_PROMISC) {
+ err = dev_set_promiscuity(port_dev, 1);
+ if (err)
+ goto err_set_slave_promisc;
+ }
+
+ /* set allmulti level to new slave */
+ if (dev->flags & IFF_ALLMULTI) {
+ err = dev_set_allmulti(port_dev, 1);
+ if (err) {
+ if (dev->flags & IFF_PROMISC)
+ dev_set_promiscuity(port_dev, -1);
+ goto err_set_slave_promisc;
+ }
+ }
+
netif_addr_lock_bh(dev);
dev_uc_sync_multiple(port_dev, dev);
dev_mc_sync_multiple(port_dev, dev);
@@ -1262,6 +1286,9 @@ static int team_port_add(struct team *team, struct net_device *port_dev,
return 0;
+err_set_slave_promisc:
+ __team_option_inst_del_port(team, port);
+
err_option_port_add:
team_upper_dev_unlink(team, port);
@@ -1307,6 +1334,12 @@ static int team_port_del(struct team *team, struct net_device *port_dev)
team_port_disable(team, port);
list_del_rcu(&port->list);
+
+ if (dev->flags & IFF_PROMISC)
+ dev_set_promiscuity(port_dev, -1);
+ if (dev->flags & IFF_ALLMULTI)
+ dev_set_allmulti(port_dev, -1);
+
team_upper_dev_unlink(team, port);
netdev_rx_handler_unregister(port_dev);
team_port_disable_netpoll(port);
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 9195f3476b1d..679e404a5224 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1122,9 +1122,16 @@ static const struct usb_device_id products[] = {
{QMI_FIXED_INTF(0x0846, 0x68d3, 8)}, /* Netgear Aircard 779S */
{QMI_FIXED_INTF(0x12d1, 0x140c, 1)}, /* Huawei E173 */
{QMI_FIXED_INTF(0x12d1, 0x14ac, 1)}, /* Huawei E1820 */
+ {QMI_FIXED_INTF(0x1435, 0x0918, 3)}, /* Wistron NeWeb D16Q1 */
+ {QMI_FIXED_INTF(0x1435, 0x0918, 4)}, /* Wistron NeWeb D16Q1 */
+ {QMI_FIXED_INTF(0x1435, 0x0918, 5)}, /* Wistron NeWeb D16Q1 */
+ {QMI_FIXED_INTF(0x1435, 0x3185, 4)}, /* Wistron NeWeb M18Q5 */
+ {QMI_FIXED_INTF(0x1435, 0xd111, 4)}, /* M9615A DM11-1 D51QC */
{QMI_FIXED_INTF(0x1435, 0xd181, 3)}, /* Wistron NeWeb D18Q1 */
{QMI_FIXED_INTF(0x1435, 0xd181, 4)}, /* Wistron NeWeb D18Q1 */
{QMI_FIXED_INTF(0x1435, 0xd181, 5)}, /* Wistron NeWeb D18Q1 */
+ {QMI_FIXED_INTF(0x1435, 0xd182, 4)}, /* Wistron NeWeb D18 */
+ {QMI_FIXED_INTF(0x1435, 0xd182, 5)}, /* Wistron NeWeb D18 */
{QMI_FIXED_INTF(0x1435, 0xd191, 4)}, /* Wistron NeWeb D19Q1 */
{QMI_QUIRK_SET_DTR(0x1508, 0x1001, 4)}, /* Fibocom NL668 series */
{QMI_FIXED_INTF(0x16d8, 0x6003, 0)}, /* CMOTech 6003 */
@@ -1180,6 +1187,7 @@ static const struct usb_device_id products[] = {
{QMI_FIXED_INTF(0x19d2, 0x0265, 4)}, /* ONDA MT8205 4G LTE */
{QMI_FIXED_INTF(0x19d2, 0x0284, 4)}, /* ZTE MF880 */
{QMI_FIXED_INTF(0x19d2, 0x0326, 4)}, /* ZTE MF821D */
+ {QMI_FIXED_INTF(0x19d2, 0x0396, 3)}, /* ZTE ZM8620 */
{QMI_FIXED_INTF(0x19d2, 0x0412, 4)}, /* Telewell TW-LTE 4G */
{QMI_FIXED_INTF(0x19d2, 0x1008, 4)}, /* ZTE (Vodafone) K3570-Z */
{QMI_FIXED_INTF(0x19d2, 0x1010, 4)}, /* ZTE (Vodafone) K3571-Z */
@@ -1200,7 +1208,9 @@ static const struct usb_device_id products[] = {
{QMI_FIXED_INTF(0x19d2, 0x1425, 2)},
{QMI_FIXED_INTF(0x19d2, 0x1426, 2)}, /* ZTE MF91 */
{QMI_FIXED_INTF(0x19d2, 0x1428, 2)}, /* Telewell TW-LTE 4G v2 */
+ {QMI_FIXED_INTF(0x19d2, 0x1432, 3)}, /* ZTE ME3620 */
{QMI_FIXED_INTF(0x19d2, 0x2002, 4)}, /* ZTE (Vodafone) K3765-Z */
+ {QMI_FIXED_INTF(0x2001, 0x7e16, 3)}, /* D-Link DWM-221 */
{QMI_FIXED_INTF(0x2001, 0x7e19, 4)}, /* D-Link DWM-221 B1 */
{QMI_FIXED_INTF(0x2001, 0x7e35, 4)}, /* D-Link DWM-222 */
{QMI_FIXED_INTF(0x2020, 0x2031, 4)}, /* Olicard 600 */
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index cd15c32b2e43..9ee4d7402ca2 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -875,6 +875,7 @@ static const struct net_device_ops vrf_netdev_ops = {
.ndo_init = vrf_dev_init,
.ndo_uninit = vrf_dev_uninit,
.ndo_start_xmit = vrf_xmit,
+ .ndo_set_mac_address = eth_mac_addr,
.ndo_get_stats64 = vrf_get_stats64,
.ndo_add_slave = vrf_add_slave,
.ndo_del_slave = vrf_del_slave,
@@ -1274,6 +1275,7 @@ static void vrf_setup(struct net_device *dev)
/* default to no qdisc; user can add if desired */
dev->priv_flags |= IFF_NO_QUEUE;
dev->priv_flags |= IFF_NO_RX_HANDLER;
+ dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
/* VRF devices do not care about MTU, but if the MTU is set
* too low then the ipv4 and ipv6 protocols are disabled
diff --git a/drivers/net/wireless/ath/ath10k/ce.c b/drivers/net/wireless/ath/ath10k/ce.c
index 24b983edb357..eca87f7c5b6c 100644
--- a/drivers/net/wireless/ath/ath10k/ce.c
+++ b/drivers/net/wireless/ath/ath10k/ce.c
@@ -1855,7 +1855,7 @@ void ath10k_ce_dump_registers(struct ath10k *ar,
struct ath10k_ce_crash_data ce_data;
u32 addr, id;
- lockdep_assert_held(&ar->data_lock);
+ lockdep_assert_held(&ar->dump_mutex);
ath10k_err(ar, "Copy Engine register dump:\n");
diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c
index 835b8de92d55..aff585658fc0 100644
--- a/drivers/net/wireless/ath/ath10k/core.c
+++ b/drivers/net/wireless/ath/ath10k/core.c
@@ -3119,6 +3119,7 @@ struct ath10k *ath10k_core_create(size_t priv_size, struct device *dev,
goto err_free_wq;
mutex_init(&ar->conf_mutex);
+ mutex_init(&ar->dump_mutex);
spin_lock_init(&ar->data_lock);
INIT_LIST_HEAD(&ar->peers);
diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h
index e08a17b01e03..e35aae5146f1 100644
--- a/drivers/net/wireless/ath/ath10k/core.h
+++ b/drivers/net/wireless/ath/ath10k/core.h
@@ -1063,6 +1063,9 @@ struct ath10k {
/* prevents concurrent FW reconfiguration */
struct mutex conf_mutex;
+ /* protects coredump data */
+ struct mutex dump_mutex;
+
/* protects shared structure data */
spinlock_t data_lock;
diff --git a/drivers/net/wireless/ath/ath10k/coredump.c b/drivers/net/wireless/ath/ath10k/coredump.c
index 33838d9c1cb6..45a355fb62b9 100644
--- a/drivers/net/wireless/ath/ath10k/coredump.c
+++ b/drivers/net/wireless/ath/ath10k/coredump.c
@@ -1102,7 +1102,7 @@ struct ath10k_fw_crash_data *ath10k_coredump_new(struct ath10k *ar)
{
struct ath10k_fw_crash_data *crash_data = ar->coredump.fw_crash_data;
- lockdep_assert_held(&ar->data_lock);
+ lockdep_assert_held(&ar->dump_mutex);
if (ath10k_coredump_mask == 0)
/* coredump disabled */
@@ -1146,7 +1146,7 @@ static struct ath10k_dump_file_data *ath10k_coredump_build(struct ath10k *ar)
if (!buf)
return NULL;
- spin_lock_bh(&ar->data_lock);
+ mutex_lock(&ar->dump_mutex);
dump_data = (struct ath10k_dump_file_data *)(buf);
strlcpy(dump_data->df_magic, "ATH10K-FW-DUMP",
@@ -1213,7 +1213,7 @@ static struct ath10k_dump_file_data *ath10k_coredump_build(struct ath10k *ar)
sofar += sizeof(*dump_tlv) + crash_data->ramdump_buf_len;
}
- spin_unlock_bh(&ar->data_lock);
+ mutex_unlock(&ar->dump_mutex);
return dump_data;
}
diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c
index a20ea270d519..1acc622d2183 100644
--- a/drivers/net/wireless/ath/ath10k/htt_rx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
@@ -2728,7 +2728,7 @@ static void ath10k_htt_rx_tx_fetch_ind(struct ath10k *ar, struct sk_buff *skb)
num_msdus++;
num_bytes += ret;
}
- ieee80211_return_txq(hw, txq);
+ ieee80211_return_txq(hw, txq, false);
ieee80211_txq_schedule_end(hw, txq->ac);
record->num_msdus = cpu_to_le16(num_msdus);
diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index b73c23d4ce86..9c703d287333 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -4089,7 +4089,7 @@ static int ath10k_mac_schedule_txq(struct ieee80211_hw *hw, u32 ac)
if (ret < 0)
break;
}
- ieee80211_return_txq(hw, txq);
+ ieee80211_return_txq(hw, txq, false);
ath10k_htt_tx_txq_update(hw, txq);
if (ret == -EBUSY)
break;
@@ -4374,7 +4374,7 @@ static void ath10k_mac_op_wake_tx_queue(struct ieee80211_hw *hw,
if (ret < 0)
break;
}
- ieee80211_return_txq(hw, txq);
+ ieee80211_return_txq(hw, txq, false);
ath10k_htt_tx_txq_update(hw, txq);
out:
ieee80211_txq_schedule_end(hw, ac);
@@ -5774,7 +5774,7 @@ static void ath10k_bss_info_changed(struct ieee80211_hw *hw,
}
if (changed & BSS_CHANGED_MCAST_RATE &&
- !WARN_ON(ath10k_mac_vif_chan(arvif->vif, &def))) {
+ !ath10k_mac_vif_chan(arvif->vif, &def)) {
band = def.chan->band;
rateidx = vif->bss_conf.mcast_rate[band] - 1;
@@ -5812,7 +5812,7 @@ static void ath10k_bss_info_changed(struct ieee80211_hw *hw,
}
if (changed & BSS_CHANGED_BASIC_RATES) {
- if (WARN_ON(ath10k_mac_vif_chan(vif, &def))) {
+ if (ath10k_mac_vif_chan(vif, &def)) {
mutex_unlock(&ar->conf_mutex);
return;
}
diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c
index 271f92c24d44..2c27f407a851 100644
--- a/drivers/net/wireless/ath/ath10k/pci.c
+++ b/drivers/net/wireless/ath/ath10k/pci.c
@@ -1441,7 +1441,7 @@ static void ath10k_pci_dump_registers(struct ath10k *ar,
__le32 reg_dump_values[REG_DUMP_COUNT_QCA988X] = {};
int i, ret;
- lockdep_assert_held(&ar->data_lock);
+ lockdep_assert_held(&ar->dump_mutex);
ret = ath10k_pci_diag_read_hi(ar, &reg_dump_values[0],
hi_failure_state,
@@ -1656,7 +1656,7 @@ static void ath10k_pci_dump_memory(struct ath10k *ar,
int ret, i;
u8 *buf;
- lockdep_assert_held(&ar->data_lock);
+ lockdep_assert_held(&ar->dump_mutex);
if (!crash_data)
return;
@@ -1734,14 +1734,19 @@ static void ath10k_pci_dump_memory(struct ath10k *ar,
}
}
-static void ath10k_pci_fw_crashed_dump(struct ath10k *ar)
+static void ath10k_pci_fw_dump_work(struct work_struct *work)
{
+ struct ath10k_pci *ar_pci = container_of(work, struct ath10k_pci,
+ dump_work);
struct ath10k_fw_crash_data *crash_data;
+ struct ath10k *ar = ar_pci->ar;
char guid[UUID_STRING_LEN + 1];
- spin_lock_bh(&ar->data_lock);
+ mutex_lock(&ar->dump_mutex);
+ spin_lock_bh(&ar->data_lock);
ar->stats.fw_crash_counter++;
+ spin_unlock_bh(&ar->data_lock);
crash_data = ath10k_coredump_new(ar);
@@ -1756,11 +1761,18 @@ static void ath10k_pci_fw_crashed_dump(struct ath10k *ar)
ath10k_ce_dump_registers(ar, crash_data);
ath10k_pci_dump_memory(ar, crash_data);
- spin_unlock_bh(&ar->data_lock);
+ mutex_unlock(&ar->dump_mutex);
queue_work(ar->workqueue, &ar->restart_work);
}
+static void ath10k_pci_fw_crashed_dump(struct ath10k *ar)
+{
+ struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+
+ queue_work(ar->workqueue, &ar_pci->dump_work);
+}
+
void ath10k_pci_hif_send_complete_check(struct ath10k *ar, u8 pipe,
int force)
{
@@ -3442,6 +3454,8 @@ int ath10k_pci_setup_resource(struct ath10k *ar)
spin_lock_init(&ar_pci->ps_lock);
mutex_init(&ar_pci->ce_diag_mutex);
+ INIT_WORK(&ar_pci->dump_work, ath10k_pci_fw_dump_work);
+
timer_setup(&ar_pci->rx_post_retry, ath10k_pci_rx_replenish_retry, 0);
if (QCA_REV_6174(ar) || QCA_REV_9377(ar))
diff --git a/drivers/net/wireless/ath/ath10k/pci.h b/drivers/net/wireless/ath/ath10k/pci.h
index 3773c79f322f..4455ed6c5275 100644
--- a/drivers/net/wireless/ath/ath10k/pci.h
+++ b/drivers/net/wireless/ath/ath10k/pci.h
@@ -121,6 +121,8 @@ struct ath10k_pci {
/* For protecting ce_diag */
struct mutex ce_diag_mutex;
+ struct work_struct dump_work;
+
struct ath10k_ce ce;
struct timer_list rx_post_retry;
diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c
index 773d428ff1b0..b17e1ca40995 100644
--- a/drivers/net/wireless/ath/ath9k/xmit.c
+++ b/drivers/net/wireless/ath/ath9k/xmit.c
@@ -1938,12 +1938,15 @@ void ath_txq_schedule(struct ath_softc *sc, struct ath_txq *txq)
goto out;
while ((queue = ieee80211_next_txq(hw, txq->mac80211_qnum))) {
+ bool force;
+
tid = (struct ath_atx_tid *)queue->drv_priv;
ret = ath_tx_sched_aggr(sc, txq, tid);
ath_dbg(common, QUEUE, "ath_tx_sched_aggr returned %d\n", ret);
- ieee80211_return_txq(hw, queue);
+ force = !skb_queue_empty(&tid->retry_q);
+ ieee80211_return_txq(hw, queue, force);
}
out:
diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
index fdc56f821b5a..0a87d87fbb4f 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
@@ -82,6 +82,7 @@
#define IWL_22000_HR_A0_FW_PRE "iwlwifi-QuQnj-a0-hr-a0-"
#define IWL_22000_SU_Z0_FW_PRE "iwlwifi-su-z0-"
#define IWL_QU_B_JF_B_FW_PRE "iwlwifi-Qu-b0-jf-b0-"
+#define IWL_QUZ_A_HR_B_FW_PRE "iwlwifi-QuZ-a0-hr-b0-"
#define IWL_QNJ_B_JF_B_FW_PRE "iwlwifi-QuQnj-b0-jf-b0-"
#define IWL_CC_A_FW_PRE "iwlwifi-cc-a0-"
#define IWL_22000_SO_A_JF_B_FW_PRE "iwlwifi-so-a0-jf-b0-"
@@ -105,8 +106,8 @@
IWL_22000_HR_A0_FW_PRE __stringify(api) ".ucode"
#define IWL_22000_SU_Z0_MODULE_FIRMWARE(api) \
IWL_22000_SU_Z0_FW_PRE __stringify(api) ".ucode"
-#define IWL_QU_B_JF_B_MODULE_FIRMWARE(api) \
- IWL_QU_B_JF_B_FW_PRE __stringify(api) ".ucode"
+#define IWL_QUZ_A_HR_B_MODULE_FIRMWARE(api) \
+ IWL_QUZ_A_HR_B_FW_PRE __stringify(api) ".ucode"
#define IWL_QU_B_JF_B_MODULE_FIRMWARE(api) \
IWL_QU_B_JF_B_FW_PRE __stringify(api) ".ucode"
#define IWL_QNJ_B_JF_B_MODULE_FIRMWARE(api) \
@@ -200,7 +201,7 @@ static const struct iwl_ht_params iwl_22000_ht_params = {
#define IWL_DEVICE_AX210 \
IWL_DEVICE_AX200_COMMON, \
.device_family = IWL_DEVICE_FAMILY_AX210, \
- .base_params = &iwl_22000_base_params, \
+ .base_params = &iwl_22560_base_params, \
.csr = &iwl_csr_v1, \
.min_txq_size = 128
@@ -235,8 +236,20 @@ const struct iwl_cfg iwl_ax101_cfg_qu_hr = {
.max_tx_agg_size = IEEE80211_MAX_AMPDU_BUF_HT,
};
-const struct iwl_cfg iwl22260_2ax_cfg = {
- .name = "Intel(R) Wireless-AX 22260",
+const struct iwl_cfg iwl_ax101_cfg_quz_hr = {
+ .name = "Intel(R) Wi-Fi 6 AX101",
+ .fw_name_pre = IWL_QUZ_A_HR_B_FW_PRE,
+ IWL_DEVICE_22500,
+ /*
+ * This device doesn't support receiving BlockAck with a large bitmap
+ * so we need to restrict the size of transmitted aggregation to the
+ * HT size; mac80211 would otherwise pick the HE max (256) by default.
+ */
+ .max_tx_agg_size = IEEE80211_MAX_AMPDU_BUF_HT,
+};
+
+const struct iwl_cfg iwl_ax200_cfg_cc = {
+ .name = "Intel(R) Wi-Fi 6 AX200 160MHz",
.fw_name_pre = IWL_CC_A_FW_PRE,
IWL_DEVICE_22500,
/*
@@ -249,7 +262,7 @@ const struct iwl_cfg iwl22260_2ax_cfg = {
};
const struct iwl_cfg killer1650x_2ax_cfg = {
- .name = "Killer(R) Wireless-AX 1650x Wireless Network Adapter (200NGW)",
+ .name = "Killer(R) Wi-Fi 6 AX1650x 160MHz Wireless Network Adapter (200NGW)",
.fw_name_pre = IWL_CC_A_FW_PRE,
IWL_DEVICE_22500,
/*
@@ -262,7 +275,7 @@ const struct iwl_cfg killer1650x_2ax_cfg = {
};
const struct iwl_cfg killer1650w_2ax_cfg = {
- .name = "Killer(R) Wireless-AX 1650w Wireless Network Adapter (200D2W)",
+ .name = "Killer(R) Wi-Fi 6 AX1650w 160MHz Wireless Network Adapter (200D2W)",
.fw_name_pre = IWL_CC_A_FW_PRE,
IWL_DEVICE_22500,
/*
@@ -328,7 +341,7 @@ const struct iwl_cfg killer1550s_2ac_cfg_qu_b0_jf_b0 = {
};
const struct iwl_cfg killer1650s_2ax_cfg_qu_b0_hr_b0 = {
- .name = "Killer(R) Wireless-AX 1650i Wireless Network Adapter (22560NGW)",
+ .name = "Killer(R) Wi-Fi 6 AX1650i 160MHz Wireless Network Adapter (201NGW)",
.fw_name_pre = IWL_22000_QU_B_HR_B_FW_PRE,
IWL_DEVICE_22500,
/*
@@ -340,7 +353,7 @@ const struct iwl_cfg killer1650s_2ax_cfg_qu_b0_hr_b0 = {
};
const struct iwl_cfg killer1650i_2ax_cfg_qu_b0_hr_b0 = {
- .name = "Killer(R) Wireless-AX 1650s Wireless Network Adapter (22560D2W)",
+ .name = "Killer(R) Wi-Fi 6 AX1650s 160MHz Wireless Network Adapter (201D2W)",
.fw_name_pre = IWL_22000_QU_B_HR_B_FW_PRE,
IWL_DEVICE_22500,
/*
@@ -444,6 +457,7 @@ MODULE_FIRMWARE(IWL_22000_HR_B_QNJ_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
MODULE_FIRMWARE(IWL_22000_HR_A0_QNJ_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
MODULE_FIRMWARE(IWL_22000_SU_Z0_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
MODULE_FIRMWARE(IWL_QU_B_JF_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
+MODULE_FIRMWARE(IWL_QUZ_A_HR_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
MODULE_FIRMWARE(IWL_QNJ_B_JF_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
MODULE_FIRMWARE(IWL_CC_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
MODULE_FIRMWARE(IWL_22000_SO_A_JF_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/5000.c b/drivers/net/wireless/intel/iwlwifi/cfg/5000.c
index 575a7022d045..3846064d51a5 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/5000.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/5000.c
@@ -1,7 +1,7 @@
/******************************************************************************
*
* Copyright(c) 2007 - 2014 Intel Corporation. All rights reserved.
- * Copyright(c) 2018 Intel Corporation
+ * Copyright(c) 2018 - 2019 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of version 2 of the GNU General Public License as
@@ -136,6 +136,7 @@ const struct iwl_cfg iwl5350_agn_cfg = {
.ht_params = &iwl5000_ht_params,
.led_mode = IWL_LED_BLINK,
.internal_wimax_coex = true,
+ .csr = &iwl_csr_v1,
};
#define IWL_DEVICE_5150 \
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
index f119c49cd39c..d7380016f1c0 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
@@ -1614,6 +1614,7 @@ iwl_dump_ini_mem(struct iwl_fw_runtime *fwrt,
if (!range) {
IWL_ERR(fwrt, "Failed to fill region header: id=%d, type=%d\n",
le32_to_cpu(reg->region_id), type);
+ memset(*data, 0, le32_to_cpu((*data)->len));
return;
}
@@ -1623,6 +1624,7 @@ iwl_dump_ini_mem(struct iwl_fw_runtime *fwrt,
if (range_size < 0) {
IWL_ERR(fwrt, "Failed to dump region: id=%d, type=%d\n",
le32_to_cpu(reg->region_id), type);
+ memset(*data, 0, le32_to_cpu((*data)->len));
return;
}
range = range + range_size;
@@ -1807,12 +1809,12 @@ _iwl_fw_error_ini_dump(struct iwl_fw_runtime *fwrt,
trigger = fwrt->dump.active_trigs[id].trig;
- size = sizeof(*dump_file);
- size += iwl_fw_ini_get_trigger_len(fwrt, trigger);
-
+ size = iwl_fw_ini_get_trigger_len(fwrt, trigger);
if (!size)
return NULL;
+ size += sizeof(*dump_file);
+
dump_file = vzalloc(size);
if (!dump_file)
return NULL;
@@ -1942,14 +1944,10 @@ int iwl_fw_dbg_error_collect(struct iwl_fw_runtime *fwrt,
iwl_dump_error_desc->len = 0;
ret = iwl_fw_dbg_collect_desc(fwrt, iwl_dump_error_desc, false, 0);
- if (ret) {
+ if (ret)
kfree(iwl_dump_error_desc);
- } else {
- set_bit(STATUS_FW_WAIT_DUMP, &fwrt->trans->status);
-
- /* trigger nmi to halt the fw */
- iwl_force_nmi(fwrt->trans);
- }
+ else
+ iwl_trans_sync_nmi(fwrt->trans);
return ret;
}
@@ -2489,22 +2487,6 @@ IWL_EXPORT_SYMBOL(iwl_fw_dbg_apply_point);
void iwl_fwrt_stop_device(struct iwl_fw_runtime *fwrt)
{
- /* if the wait event timeout elapses instead of wake up then
- * the driver did not receive NMI interrupt and can not assume the FW
- * is halted
- */
- int ret = wait_event_timeout(fwrt->trans->fw_halt_waitq,
- !test_bit(STATUS_FW_WAIT_DUMP,
- &fwrt->trans->status),
- msecs_to_jiffies(2000));
- if (!ret) {
- /* failed to receive NMI interrupt, assuming the FW is stuck */
- set_bit(STATUS_FW_ERROR, &fwrt->trans->status);
-
- clear_bit(STATUS_FW_WAIT_DUMP, &fwrt->trans->status);
- }
-
- /* Assuming the op mode mutex is held at this point */
iwl_fw_dbg_collect_sync(fwrt);
iwl_trans_stop_device(fwrt->trans);
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/file.h b/drivers/net/wireless/intel/iwlwifi/fw/file.h
index 641c95d03b15..e06407dc088b 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/file.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/file.h
@@ -93,7 +93,7 @@ struct iwl_ucode_header {
} u;
};
-#define IWL_UCODE_INI_TLV_GROUP BIT(24)
+#define IWL_UCODE_INI_TLV_GROUP 0x1000000
/*
* new TLV uCode file layout
@@ -148,11 +148,14 @@ enum iwl_ucode_tlv_type {
IWL_UCODE_TLV_UMAC_DEBUG_ADDRS = 54,
IWL_UCODE_TLV_LMAC_DEBUG_ADDRS = 55,
IWL_UCODE_TLV_FW_RECOVERY_INFO = 57,
- IWL_UCODE_TLV_TYPE_BUFFER_ALLOCATION = IWL_UCODE_INI_TLV_GROUP | 0x1,
- IWL_UCODE_TLV_TYPE_HCMD = IWL_UCODE_INI_TLV_GROUP | 0x2,
- IWL_UCODE_TLV_TYPE_REGIONS = IWL_UCODE_INI_TLV_GROUP | 0x3,
- IWL_UCODE_TLV_TYPE_TRIGGERS = IWL_UCODE_INI_TLV_GROUP | 0x4,
- IWL_UCODE_TLV_TYPE_DEBUG_FLOW = IWL_UCODE_INI_TLV_GROUP | 0x5,
+
+ IWL_UCODE_TLV_TYPE_BUFFER_ALLOCATION = IWL_UCODE_INI_TLV_GROUP + 0x1,
+ IWL_UCODE_TLV_DEBUG_BASE = IWL_UCODE_TLV_TYPE_BUFFER_ALLOCATION,
+ IWL_UCODE_TLV_TYPE_HCMD = IWL_UCODE_INI_TLV_GROUP + 0x2,
+ IWL_UCODE_TLV_TYPE_REGIONS = IWL_UCODE_INI_TLV_GROUP + 0x3,
+ IWL_UCODE_TLV_TYPE_TRIGGERS = IWL_UCODE_INI_TLV_GROUP + 0x4,
+ IWL_UCODE_TLV_TYPE_DEBUG_FLOW = IWL_UCODE_INI_TLV_GROUP + 0x5,
+ IWL_UCODE_TLV_DEBUG_MAX = IWL_UCODE_TLV_TYPE_DEBUG_FLOW,
/* TLVs 0x1000-0x2000 are for internal driver usage */
IWL_UCODE_TLV_FW_DBG_DUMP_LST = 0x1000,
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/init.c b/drivers/net/wireless/intel/iwlwifi/fw/init.c
index 7adf4e4e841a..12310e3d2fc5 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/init.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/init.c
@@ -76,7 +76,6 @@ void iwl_fw_runtime_init(struct iwl_fw_runtime *fwrt, struct iwl_trans *trans,
fwrt->ops_ctx = ops_ctx;
INIT_DELAYED_WORK(&fwrt->dump.wk, iwl_fw_error_dump_wk);
iwl_fwrt_dbgfs_register(fwrt, dbgfs_dir);
- init_waitqueue_head(&fwrt->trans->fw_halt_waitq);
}
IWL_EXPORT_SYMBOL(iwl_fw_runtime_init);
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
index f5f87773667b..93070848280a 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
@@ -549,8 +549,9 @@ extern const struct iwl_cfg iwl22000_2ac_cfg_hr;
extern const struct iwl_cfg iwl22000_2ac_cfg_hr_cdb;
extern const struct iwl_cfg iwl22000_2ac_cfg_jf;
extern const struct iwl_cfg iwl_ax101_cfg_qu_hr;
+extern const struct iwl_cfg iwl_ax101_cfg_quz_hr;
extern const struct iwl_cfg iwl22000_2ax_cfg_hr;
-extern const struct iwl_cfg iwl22260_2ax_cfg;
+extern const struct iwl_cfg iwl_ax200_cfg_cc;
extern const struct iwl_cfg killer1650s_2ax_cfg_qu_b0_hr_b0;
extern const struct iwl_cfg killer1650i_2ax_cfg_qu_b0_hr_b0;
extern const struct iwl_cfg killer1650x_2ax_cfg;
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h
index aea6d03e545a..e539bc94eff7 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h
@@ -327,6 +327,7 @@ enum {
#define CSR_HW_REV_TYPE_NONE (0x00001F0)
#define CSR_HW_REV_TYPE_QNJ (0x0000360)
#define CSR_HW_REV_TYPE_QNJ_B0 (0x0000364)
+#define CSR_HW_REV_TYPE_QUZ (0x0000354)
#define CSR_HW_REV_TYPE_HR_CDB (0x0000340)
#define CSR_HW_REV_TYPE_SO (0x0000370)
#define CSR_HW_REV_TYPE_TY (0x0000420)
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c
index 5798f434f68f..c7070760a10a 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c
@@ -126,7 +126,8 @@ void iwl_alloc_dbg_tlv(struct iwl_trans *trans, size_t len, const u8 *data,
len -= ALIGN(tlv_len, 4);
data += sizeof(*tlv) + ALIGN(tlv_len, 4);
- if (!(tlv_type & IWL_UCODE_INI_TLV_GROUP))
+ if (tlv_type < IWL_UCODE_TLV_DEBUG_BASE ||
+ tlv_type > IWL_UCODE_TLV_DEBUG_MAX)
continue;
hdr = (void *)&tlv->data[0];
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
index bbebbf3efd57..d8690acee40c 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
@@ -338,7 +338,6 @@ enum iwl_d3_status {
* are sent
* @STATUS_TRANS_IDLE: the trans is idle - general commands are not to be sent
* @STATUS_TRANS_DEAD: trans is dead - avoid any read/write operation
- * @STATUS_FW_WAIT_DUMP: if set, wait until cleared before collecting dump
*/
enum iwl_trans_status {
STATUS_SYNC_HCMD_ACTIVE,
@@ -351,7 +350,6 @@ enum iwl_trans_status {
STATUS_TRANS_GOING_IDLE,
STATUS_TRANS_IDLE,
STATUS_TRANS_DEAD,
- STATUS_FW_WAIT_DUMP,
};
static inline int
@@ -618,6 +616,7 @@ struct iwl_trans_ops {
struct iwl_trans_dump_data *(*dump_data)(struct iwl_trans *trans,
u32 dump_mask);
void (*debugfs_cleanup)(struct iwl_trans *trans);
+ void (*sync_nmi)(struct iwl_trans *trans);
};
/**
@@ -831,7 +830,6 @@ struct iwl_trans {
u32 lmac_error_event_table[2];
u32 umac_error_event_table;
unsigned int error_event_table_tlv_status;
- wait_queue_head_t fw_halt_waitq;
/* pointer to trans specific struct */
/*Ensure that this pointer will always be aligned to sizeof pointer */
@@ -1239,10 +1237,12 @@ static inline void iwl_trans_fw_error(struct iwl_trans *trans)
/* prevent double restarts due to the same erroneous FW */
if (!test_and_set_bit(STATUS_FW_ERROR, &trans->status))
iwl_op_mode_nic_error(trans->op_mode);
+}
- if (test_and_clear_bit(STATUS_FW_WAIT_DUMP, &trans->status))
- wake_up(&trans->fw_halt_waitq);
-
+static inline void iwl_trans_sync_nmi(struct iwl_trans *trans)
+{
+ if (trans->ops->sync_nmi)
+ trans->ops->sync_nmi(trans);
}
/*****************************************************
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c
index 2453ceabf00d..6925527d8457 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c
@@ -774,8 +774,7 @@ void iwl_mvm_vif_dbgfs_register(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
return;
mvmvif->dbgfs_dir = debugfs_create_dir("iwlmvm", dbgfs_dir);
-
- if (!mvmvif->dbgfs_dir) {
+ if (IS_ERR_OR_NULL(mvmvif->dbgfs_dir)) {
IWL_ERR(mvm, "Failed to create debugfs directory under %pd\n",
dbgfs_dir);
return;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
index 00a47f6f1d81..ab68b5d53ec9 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -1121,7 +1121,9 @@ int iwl_mvm_up(struct iwl_mvm *mvm)
ret = iwl_mvm_load_rt_fw(mvm);
if (ret) {
IWL_ERR(mvm, "Failed to start RT ucode: %d\n", ret);
- iwl_fw_dbg_error_collect(&mvm->fwrt, FW_DBG_TRIGGER_DRIVER);
+ if (ret != -ERFKILL)
+ iwl_fw_dbg_error_collect(&mvm->fwrt,
+ FW_DBG_TRIGGER_DRIVER);
goto error;
}
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index 3a92c09d4692..6a3b11dd2edf 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -2714,9 +2714,6 @@ static void iwl_mvm_stop_ap_ibss(struct ieee80211_hw *hw,
iwl_mvm_mac_ctxt_remove(mvm, vif);
- kfree(mvmvif->ap_wep_key);
- mvmvif->ap_wep_key = NULL;
-
mutex_unlock(&mvm->mutex);
}
@@ -3183,24 +3180,7 @@ static int iwl_mvm_mac_sta_state(struct ieee80211_hw *hw,
ret = iwl_mvm_update_sta(mvm, vif, sta);
} else if (old_state == IEEE80211_STA_ASSOC &&
new_state == IEEE80211_STA_AUTHORIZED) {
- /* if wep is used, need to set the key for the station now */
- if (vif->type == NL80211_IFTYPE_AP && mvmvif->ap_wep_key) {
- mvm_sta->wep_key =
- kmemdup(mvmvif->ap_wep_key,
- sizeof(*mvmvif->ap_wep_key) +
- mvmvif->ap_wep_key->keylen,
- GFP_KERNEL);
- if (!mvm_sta->wep_key) {
- ret = -ENOMEM;
- goto out_unlock;
- }
-
- ret = iwl_mvm_set_sta_key(mvm, vif, sta,
- mvm_sta->wep_key,
- STA_KEY_IDX_INVALID);
- } else {
- ret = 0;
- }
+ ret = 0;
/* we don't support TDLS during DCM */
if (iwl_mvm_phy_ctx_count(mvm) > 1)
@@ -3242,17 +3222,6 @@ static int iwl_mvm_mac_sta_state(struct ieee80211_hw *hw,
NL80211_TDLS_DISABLE_LINK);
}
- /* Remove STA key if this is an AP using WEP */
- if (vif->type == NL80211_IFTYPE_AP && mvmvif->ap_wep_key) {
- int rm_ret = iwl_mvm_remove_sta_key(mvm, vif, sta,
- mvm_sta->wep_key);
-
- if (!ret)
- ret = rm_ret;
- kfree(mvm_sta->wep_key);
- mvm_sta->wep_key = NULL;
- }
-
if (unlikely(ret &&
test_bit(IWL_MVM_STATUS_HW_RESTART_REQUESTED,
&mvm->status)))
@@ -3289,6 +3258,13 @@ static void iwl_mvm_sta_rc_update(struct ieee80211_hw *hw,
struct ieee80211_sta *sta, u32 changed)
{
struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
+ struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
+
+ if (changed & (IEEE80211_RC_BW_CHANGED |
+ IEEE80211_RC_SUPP_RATES_CHANGED |
+ IEEE80211_RC_NSS_CHANGED))
+ iwl_mvm_rs_rate_init(mvm, sta, mvmvif->phy_ctxt->channel->band,
+ true);
if (vif->type == NL80211_IFTYPE_STATION &&
changed & IEEE80211_RC_NSS_CHANGED)
@@ -3439,20 +3415,12 @@ static int iwl_mvm_mac_set_key(struct ieee80211_hw *hw,
break;
case WLAN_CIPHER_SUITE_WEP40:
case WLAN_CIPHER_SUITE_WEP104:
- if (vif->type == NL80211_IFTYPE_AP) {
- struct iwl_mvm_vif *mvmvif =
- iwl_mvm_vif_from_mac80211(vif);
-
- mvmvif->ap_wep_key = kmemdup(key,
- sizeof(*key) + key->keylen,
- GFP_KERNEL);
- if (!mvmvif->ap_wep_key)
- return -ENOMEM;
- }
-
- if (vif->type != NL80211_IFTYPE_STATION)
- return 0;
- break;
+ if (vif->type == NL80211_IFTYPE_STATION)
+ break;
+ if (iwl_mvm_has_new_tx_api(mvm))
+ return -EOPNOTSUPP;
+ /* support HW crypto on TX */
+ return 0;
default:
/* currently FW supports only one optional cipher scheme */
if (hw->n_cipher_schemes &&
@@ -3540,12 +3508,17 @@ static int iwl_mvm_mac_set_key(struct ieee80211_hw *hw,
ret = iwl_mvm_set_sta_key(mvm, vif, sta, key, key_offset);
if (ret) {
IWL_WARN(mvm, "set key failed\n");
+ key->hw_key_idx = STA_KEY_IDX_INVALID;
/*
* can't add key for RX, but we don't need it
- * in the device for TX so still return 0
+ * in the device for TX so still return 0,
+ * unless we have new TX API where we cannot
+ * put key material into the TX_CMD
*/
- key->hw_key_idx = STA_KEY_IDX_INVALID;
- ret = 0;
+ if (iwl_mvm_has_new_tx_api(mvm))
+ ret = -EOPNOTSUPP;
+ else
+ ret = 0;
}
break;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
index bca6f6b536d9..a50dc53df086 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
@@ -498,7 +498,6 @@ struct iwl_mvm_vif {
netdev_features_t features;
struct iwl_probe_resp_data __rcu *probe_resp_data;
- struct ieee80211_key_conf *ap_wep_key;
};
static inline struct iwl_mvm_vif *
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
index ba27dce4c2bb..13681b03c10e 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
@@ -834,7 +834,7 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg,
mutex_lock(&mvm->mutex);
iwl_mvm_ref(mvm, IWL_MVM_REF_INIT_UCODE);
err = iwl_run_init_mvm_ucode(mvm, true);
- if (err)
+ if (err && err != -ERFKILL)
iwl_fw_dbg_error_collect(&mvm->fwrt, FW_DBG_TRIGGER_DRIVER);
if (!iwlmvm_mod_params.init_dbg || !err)
iwl_mvm_stop_device(mvm);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
index 1e03acf30762..b516fd1867ec 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
@@ -169,9 +169,9 @@ static inline int iwl_mvm_check_pn(struct iwl_mvm *mvm, struct sk_buff *skb,
}
/* iwl_mvm_create_skb Adds the rxb to a new skb */
-static void iwl_mvm_create_skb(struct sk_buff *skb, struct ieee80211_hdr *hdr,
- u16 len, u8 crypt_len,
- struct iwl_rx_cmd_buffer *rxb)
+static int iwl_mvm_create_skb(struct iwl_mvm *mvm, struct sk_buff *skb,
+ struct ieee80211_hdr *hdr, u16 len, u8 crypt_len,
+ struct iwl_rx_cmd_buffer *rxb)
{
struct iwl_rx_packet *pkt = rxb_addr(rxb);
struct iwl_rx_mpdu_desc *desc = (void *)pkt->data;
@@ -204,6 +204,20 @@ static void iwl_mvm_create_skb(struct sk_buff *skb, struct ieee80211_hdr *hdr,
* present before copying packet data.
*/
hdrlen += crypt_len;
+
+ if (WARN_ONCE(headlen < hdrlen,
+ "invalid packet lengths (hdrlen=%d, len=%d, crypt_len=%d)\n",
+ hdrlen, len, crypt_len)) {
+ /*
+ * We warn and trace because we want to be able to see
+ * it in trace-cmd as well.
+ */
+ IWL_DEBUG_RX(mvm,
+ "invalid packet lengths (hdrlen=%d, len=%d, crypt_len=%d)\n",
+ hdrlen, len, crypt_len);
+ return -EINVAL;
+ }
+
skb_put_data(skb, hdr, hdrlen);
skb_put_data(skb, (u8 *)hdr + hdrlen + pad_len, headlen - hdrlen);
@@ -216,6 +230,8 @@ static void iwl_mvm_create_skb(struct sk_buff *skb, struct ieee80211_hdr *hdr,
skb_add_rx_frag(skb, 0, rxb_steal_page(rxb), offset,
fraglen, rxb->truesize);
}
+
+ return 0;
}
static void iwl_mvm_add_rtap_sniffer_config(struct iwl_mvm *mvm,
@@ -1671,7 +1687,11 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi,
rx_status->boottime_ns = ktime_get_boot_ns();
}
- iwl_mvm_create_skb(skb, hdr, len, crypt_len, rxb);
+ if (iwl_mvm_create_skb(mvm, skb, hdr, len, crypt_len, rxb)) {
+ kfree_skb(skb);
+ goto out;
+ }
+
if (!iwl_mvm_reorder(mvm, napi, queue, sta, skb, desc))
iwl_mvm_pass_packet_to_mac80211(mvm, napi, skb, queue,
sta, csi);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
index 498c315291cf..98d123dd7177 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
@@ -8,7 +8,7 @@
* Copyright(c) 2012 - 2015 Intel Corporation. All rights reserved.
* Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
* Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 Intel Corporation
+ * Copyright(c) 2018 - 2019 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
@@ -31,7 +31,7 @@
* Copyright(c) 2012 - 2015 Intel Corporation. All rights reserved.
* Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
* Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 Intel Corporation
+ * Copyright(c) 2018 - 2019 Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -1399,7 +1399,9 @@ void iwl_mvm_add_new_dqa_stream_wk(struct work_struct *wk)
iwl_mvm_sta_alloc_queue(mvm, txq->sta, txq->ac, tid);
list_del_init(&mvmtxq->list);
+ local_bh_disable();
iwl_mvm_mac_itxq_xmit(mvm->hw, txq);
+ local_bh_enable();
}
mutex_unlock(&mvm->mutex);
@@ -2333,21 +2335,6 @@ int iwl_mvm_add_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
iwl_mvm_enable_txq(mvm, NULL, mvmvif->cab_queue, 0, &cfg,
timeout);
- if (mvmvif->ap_wep_key) {
- u8 key_offset = iwl_mvm_set_fw_key_idx(mvm);
-
- __set_bit(key_offset, mvm->fw_key_table);
-
- if (key_offset == STA_KEY_IDX_INVALID)
- return -ENOSPC;
-
- ret = iwl_mvm_send_sta_key(mvm, mvmvif->mcast_sta.sta_id,
- mvmvif->ap_wep_key, true, 0, NULL, 0,
- key_offset, 0);
- if (ret)
- return ret;
- }
-
return 0;
}
@@ -2419,28 +2406,6 @@ int iwl_mvm_rm_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
iwl_mvm_disable_txq(mvm, NULL, mvmvif->cab_queue, 0, 0);
- if (mvmvif->ap_wep_key) {
- int i;
-
- if (!__test_and_clear_bit(mvmvif->ap_wep_key->hw_key_idx,
- mvm->fw_key_table)) {
- IWL_ERR(mvm, "offset %d not used in fw key table.\n",
- mvmvif->ap_wep_key->hw_key_idx);
- return -ENOENT;
- }
-
- /* track which key was deleted last */
- for (i = 0; i < STA_KEY_MAX_NUM; i++) {
- if (mvm->fw_key_deleted[i] < U8_MAX)
- mvm->fw_key_deleted[i]++;
- }
- mvm->fw_key_deleted[mvmvif->ap_wep_key->hw_key_idx] = 0;
- ret = __iwl_mvm_remove_sta_key(mvm, mvmvif->mcast_sta.sta_id,
- mvmvif->ap_wep_key, true);
- if (ret)
- return ret;
- }
-
ret = iwl_mvm_rm_sta_common(mvm, mvmvif->mcast_sta.sta_id);
if (ret)
IWL_WARN(mvm, "Failed sending remove station\n");
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h
index 79700c7310a1..b4d4071b865d 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h
@@ -8,7 +8,7 @@
* Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
* Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
* Copyright(c) 2015 - 2016 Intel Deutschland GmbH
- * Copyright(c) 2018 Intel Corporation
+ * Copyright(c) 2018 - 2019 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
@@ -31,7 +31,7 @@
* Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
* Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
* Copyright(c) 2015 - 2016 Intel Deutschland GmbH
- * Copyright(c) 2018 Intel Corporation
+ * Copyright(c) 2018 - 2019 Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -394,7 +394,6 @@ struct iwl_mvm_rxq_dup_data {
* the BA window. To be used for UAPSD only.
* @ptk_pn: per-queue PTK PN data structures
* @dup_data: per queue duplicate packet detection data
- * @wep_key: used in AP mode. Is a duplicate of the WEP key.
* @deferred_traffic_tid_map: indication bitmap of deferred traffic per-TID
* @tx_ant: the index of the antenna to use for data tx to this station. Only
* used during connection establishment (e.g. for the 4 way handshake
@@ -426,8 +425,6 @@ struct iwl_mvm_sta {
struct iwl_mvm_key_pn __rcu *ptk_pn[4];
struct iwl_mvm_rxq_dup_data *dup_data;
- struct ieee80211_key_conf *wep_key;
-
u8 reserved_queue;
/* Temporary, until the new TLC will control the Tx protection */
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
index 2b94e4cef56c..9f1af8da9dc1 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
@@ -953,14 +953,15 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
{IWL_PCI_DEVICE(0xA0F0, 0x1652, killer1650i_2ax_cfg_qu_b0_hr_b0)},
{IWL_PCI_DEVICE(0xA0F0, 0x4070, iwl_ax101_cfg_qu_hr)},
- {IWL_PCI_DEVICE(0x2723, 0x0080, iwl22260_2ax_cfg)},
- {IWL_PCI_DEVICE(0x2723, 0x0084, iwl22260_2ax_cfg)},
- {IWL_PCI_DEVICE(0x2723, 0x0088, iwl22260_2ax_cfg)},
- {IWL_PCI_DEVICE(0x2723, 0x008C, iwl22260_2ax_cfg)},
+ {IWL_PCI_DEVICE(0x2723, 0x0080, iwl_ax200_cfg_cc)},
+ {IWL_PCI_DEVICE(0x2723, 0x0084, iwl_ax200_cfg_cc)},
+ {IWL_PCI_DEVICE(0x2723, 0x0088, iwl_ax200_cfg_cc)},
+ {IWL_PCI_DEVICE(0x2723, 0x008C, iwl_ax200_cfg_cc)},
{IWL_PCI_DEVICE(0x2723, 0x1653, killer1650w_2ax_cfg)},
{IWL_PCI_DEVICE(0x2723, 0x1654, killer1650x_2ax_cfg)},
- {IWL_PCI_DEVICE(0x2723, 0x4080, iwl22260_2ax_cfg)},
- {IWL_PCI_DEVICE(0x2723, 0x4088, iwl22260_2ax_cfg)},
+ {IWL_PCI_DEVICE(0x2723, 0x2080, iwl_ax200_cfg_cc)},
+ {IWL_PCI_DEVICE(0x2723, 0x4080, iwl_ax200_cfg_cc)},
+ {IWL_PCI_DEVICE(0x2723, 0x4088, iwl_ax200_cfg_cc)},
{IWL_PCI_DEVICE(0x1a56, 0x1653, killer1650w_2ax_cfg)},
{IWL_PCI_DEVICE(0x1a56, 0x1654, killer1650x_2ax_cfg)},
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
index bf8b61a476c5..59213164f35e 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
@@ -1043,7 +1043,7 @@ static inline bool iwl_pcie_dbg_on(struct iwl_trans *trans)
void iwl_trans_pcie_rf_kill(struct iwl_trans *trans, bool state);
void iwl_trans_pcie_dump_regs(struct iwl_trans *trans);
-void iwl_trans_sync_nmi(struct iwl_trans *trans);
+void iwl_trans_pcie_sync_nmi(struct iwl_trans *trans);
#ifdef CONFIG_IWLWIFI_DEBUGFS
int iwl_trans_pcie_dbgfs_register(struct iwl_trans *trans);
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
index fe8269d023de..c4375b868901 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
@@ -3318,7 +3318,8 @@ static void iwl_trans_pcie_resume(struct iwl_trans *trans)
.unref = iwl_trans_pcie_unref, \
.dump_data = iwl_trans_pcie_dump_data, \
.d3_suspend = iwl_trans_pcie_d3_suspend, \
- .d3_resume = iwl_trans_pcie_d3_resume
+ .d3_resume = iwl_trans_pcie_d3_resume, \
+ .sync_nmi = iwl_trans_pcie_sync_nmi
#ifdef CONFIG_PM_SLEEP
#define IWL_TRANS_PM_OPS \
@@ -3542,6 +3543,10 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev,
}
} else if (cfg == &iwl_ax101_cfg_qu_hr) {
if (CSR_HW_RF_ID_TYPE_CHIP_ID(trans->hw_rf_id) ==
+ CSR_HW_RF_ID_TYPE_CHIP_ID(CSR_HW_RF_ID_TYPE_HR) &&
+ trans->hw_rev == CSR_HW_REV_TYPE_QNJ_B0) {
+ trans->cfg = &iwl22000_2ax_cfg_qnj_hr_b0;
+ } else if (CSR_HW_RF_ID_TYPE_CHIP_ID(trans->hw_rf_id) ==
CSR_HW_RF_ID_TYPE_CHIP_ID(CSR_HW_RF_ID_TYPE_HR)) {
trans->cfg = &iwl_ax101_cfg_qu_hr;
} else if (CSR_HW_RF_ID_TYPE_CHIP_ID(trans->hw_rf_id) ==
@@ -3560,7 +3565,7 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev,
}
} else if (CSR_HW_RF_ID_TYPE_CHIP_ID(trans->hw_rf_id) ==
CSR_HW_RF_ID_TYPE_CHIP_ID(CSR_HW_RF_ID_TYPE_HR) &&
- (trans->cfg != &iwl22260_2ax_cfg ||
+ (trans->cfg != &iwl_ax200_cfg_cc ||
trans->hw_rev == CSR_HW_REV_TYPE_QNJ_B0)) {
u32 hw_status;
@@ -3637,22 +3642,29 @@ out_no_pci:
return ERR_PTR(ret);
}
-void iwl_trans_sync_nmi(struct iwl_trans *trans)
+void iwl_trans_pcie_sync_nmi(struct iwl_trans *trans)
{
+ struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
unsigned long timeout = jiffies + IWL_TRANS_NMI_TIMEOUT;
+ u32 inta_addr, sw_err_bit;
+
+ if (trans_pcie->msix_enabled) {
+ inta_addr = CSR_MSIX_HW_INT_CAUSES_AD;
+ sw_err_bit = MSIX_HW_INT_CAUSES_REG_SW_ERR;
+ } else {
+ inta_addr = CSR_INT;
+ sw_err_bit = CSR_INT_BIT_SW_ERR;
+ }
iwl_disable_interrupts(trans);
iwl_force_nmi(trans);
while (time_after(timeout, jiffies)) {
- u32 inta_hw = iwl_read32(trans,
- CSR_MSIX_HW_INT_CAUSES_AD);
+ u32 inta_hw = iwl_read32(trans, inta_addr);
/* Error detected by uCode */
- if (inta_hw & MSIX_HW_INT_CAUSES_REG_SW_ERR) {
+ if (inta_hw & sw_err_bit) {
/* Clear causes register */
- iwl_write32(trans, CSR_MSIX_HW_INT_CAUSES_AD,
- inta_hw &
- MSIX_HW_INT_CAUSES_REG_SW_ERR);
+ iwl_write32(trans, inta_addr, inta_hw & sw_err_bit);
break;
}
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
index 88530d9f4a54..38d110338987 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
@@ -965,7 +965,7 @@ static int iwl_pcie_gen2_send_hcmd_sync(struct iwl_trans *trans,
cmd_str);
ret = -ETIMEDOUT;
- iwl_trans_sync_nmi(trans);
+ iwl_trans_pcie_sync_nmi(trans);
goto cancel;
}
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
index 9fbd37d23e85..7be73e2c4681 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
@@ -1960,7 +1960,7 @@ static int iwl_pcie_send_hcmd_sync(struct iwl_trans *trans,
iwl_get_cmd_string(trans, cmd->id));
ret = -ETIMEDOUT;
- iwl_trans_sync_nmi(trans);
+ iwl_trans_pcie_sync_nmi(trans);
goto cancel;
}
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 0838af04d681..524eb5805995 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -2644,7 +2644,7 @@ static int mac80211_hwsim_new_radio(struct genl_info *info,
enum nl80211_band band;
const struct ieee80211_ops *ops = &mac80211_hwsim_ops;
struct net *net;
- int idx;
+ int idx, i;
int n_limits = 0;
if (WARN_ON(param->channels > 1 && !param->use_chanctx))
@@ -2768,12 +2768,23 @@ static int mac80211_hwsim_new_radio(struct genl_info *info,
goto failed_hw;
}
+ data->if_combination.max_interfaces = 0;
+ for (i = 0; i < n_limits; i++)
+ data->if_combination.max_interfaces +=
+ data->if_limits[i].max;
+
data->if_combination.n_limits = n_limits;
- data->if_combination.max_interfaces = 2048;
data->if_combination.limits = data->if_limits;
- hw->wiphy->iface_combinations = &data->if_combination;
- hw->wiphy->n_iface_combinations = 1;
+ /*
+ * If we actually were asked to support combinations,
+ * advertise them - if there's only a single thing like
+ * only IBSS then don't advertise it as combinations.
+ */
+ if (data->if_combination.max_interfaces > 1) {
+ hw->wiphy->iface_combinations = &data->if_combination;
+ hw->wiphy->n_iface_combinations = 1;
+ }
if (param->ciphers) {
memcpy(data->ciphers, param->ciphers,
diff --git a/drivers/net/wireless/marvell/mwifiex/sdio.c b/drivers/net/wireless/marvell/mwifiex/sdio.c
index a85648342d15..d5a70340a945 100644
--- a/drivers/net/wireless/marvell/mwifiex/sdio.c
+++ b/drivers/net/wireless/marvell/mwifiex/sdio.c
@@ -181,7 +181,7 @@ static int mwifiex_sdio_resume(struct device *dev)
adapter = card->adapter;
- if (test_bit(MWIFIEX_IS_SUSPENDED, &adapter->work_flags)) {
+ if (!test_bit(MWIFIEX_IS_SUSPENDED, &adapter->work_flags)) {
mwifiex_dbg(adapter, WARN,
"device already resumed\n");
return 0;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/init.c b/drivers/net/wireless/mediatek/mt76/mt7603/init.c
index d54dda67d036..3af45949e868 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/init.c
@@ -510,6 +510,8 @@ int mt7603_register_device(struct mt7603_dev *dev)
bus_ops->rmw = mt7603_rmw;
dev->mt76.bus = bus_ops;
+ spin_lock_init(&dev->ps_lock);
+
INIT_DELAYED_WORK(&dev->mac_work, mt7603_mac_work);
tasklet_init(&dev->pre_tbtt_tasklet, mt7603_pre_tbtt_tasklet,
(unsigned long)dev);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
index 5e31d7da96fc..5abc02b57818 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
@@ -343,7 +343,7 @@ void mt7603_mac_rx_ba_reset(struct mt7603_dev *dev, void *addr, u8 tid)
MT_BA_CONTROL_1_RESET));
}
-void mt7603_mac_tx_ba_reset(struct mt7603_dev *dev, int wcid, int tid, int ssn,
+void mt7603_mac_tx_ba_reset(struct mt7603_dev *dev, int wcid, int tid,
int ba_size)
{
u32 addr = mt7603_wtbl2_addr(wcid);
@@ -358,43 +358,6 @@ void mt7603_mac_tx_ba_reset(struct mt7603_dev *dev, int wcid, int tid, int ssn,
mt76_clear(dev, addr + (15 * 4), tid_mask);
return;
}
- mt76_poll(dev, MT_WTBL_UPDATE, MT_WTBL_UPDATE_BUSY, 0, 5000);
-
- mt7603_mac_stop(dev);
- switch (tid) {
- case 0:
- mt76_rmw_field(dev, addr + (2 * 4), MT_WTBL2_W2_TID0_SN, ssn);
- break;
- case 1:
- mt76_rmw_field(dev, addr + (2 * 4), MT_WTBL2_W2_TID1_SN, ssn);
- break;
- case 2:
- mt76_rmw_field(dev, addr + (2 * 4), MT_WTBL2_W2_TID2_SN_LO,
- ssn);
- mt76_rmw_field(dev, addr + (3 * 4), MT_WTBL2_W3_TID2_SN_HI,
- ssn >> 8);
- break;
- case 3:
- mt76_rmw_field(dev, addr + (3 * 4), MT_WTBL2_W3_TID3_SN, ssn);
- break;
- case 4:
- mt76_rmw_field(dev, addr + (3 * 4), MT_WTBL2_W3_TID4_SN, ssn);
- break;
- case 5:
- mt76_rmw_field(dev, addr + (3 * 4), MT_WTBL2_W3_TID5_SN_LO,
- ssn);
- mt76_rmw_field(dev, addr + (4 * 4), MT_WTBL2_W4_TID5_SN_HI,
- ssn >> 4);
- break;
- case 6:
- mt76_rmw_field(dev, addr + (4 * 4), MT_WTBL2_W4_TID6_SN, ssn);
- break;
- case 7:
- mt76_rmw_field(dev, addr + (4 * 4), MT_WTBL2_W4_TID7_SN, ssn);
- break;
- }
- mt7603_wtbl_update(dev, wcid, MT_WTBL_UPDATE_WTBL2);
- mt7603_mac_start(dev);
for (i = 7; i > 0; i--) {
if (ba_size >= MT_AGG_SIZE_LIMIT(i))
@@ -827,6 +790,7 @@ mt7603_mac_write_txwi(struct mt7603_dev *dev, __le32 *txwi,
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
struct ieee80211_tx_rate *rate = &info->control.rates[0];
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+ struct ieee80211_bar *bar = (struct ieee80211_bar *)skb->data;
struct ieee80211_vif *vif = info->control.vif;
struct mt7603_vif *mvif;
int wlan_idx;
@@ -834,6 +798,7 @@ mt7603_mac_write_txwi(struct mt7603_dev *dev, __le32 *txwi,
int tx_count = 8;
u8 frame_type, frame_subtype;
u16 fc = le16_to_cpu(hdr->frame_control);
+ u16 seqno = 0;
u8 vif_idx = 0;
u32 val;
u8 bw;
@@ -919,7 +884,17 @@ mt7603_mac_write_txwi(struct mt7603_dev *dev, __le32 *txwi,
tx_count = 0x1f;
val = FIELD_PREP(MT_TXD3_REM_TX_COUNT, tx_count) |
- FIELD_PREP(MT_TXD3_SEQ, le16_to_cpu(hdr->seq_ctrl));
+ MT_TXD3_SN_VALID;
+
+ if (ieee80211_is_data_qos(hdr->frame_control))
+ seqno = le16_to_cpu(hdr->seq_ctrl);
+ else if (ieee80211_is_back_req(hdr->frame_control))
+ seqno = le16_to_cpu(bar->start_seq_num);
+ else
+ val &= ~MT_TXD3_SN_VALID;
+
+ val |= FIELD_PREP(MT_TXD3_SEQ, seqno >> 4);
+
txwi[3] = cpu_to_le32(val);
if (key) {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/main.c b/drivers/net/wireless/mediatek/mt76/mt7603/main.c
index cc0fe0933b2d..a3c4ef198bfe 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/main.c
@@ -372,7 +372,7 @@ mt7603_sta_ps(struct mt76_dev *mdev, struct ieee80211_sta *sta, bool ps)
struct mt7603_sta *msta = (struct mt7603_sta *)sta->drv_priv;
struct sk_buff_head list;
- mt76_stop_tx_queues(&dev->mt76, sta, false);
+ mt76_stop_tx_queues(&dev->mt76, sta, true);
mt7603_wtbl_set_ps(dev, msta, ps);
if (ps)
return;
@@ -584,13 +584,13 @@ mt7603_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
case IEEE80211_AMPDU_TX_OPERATIONAL:
mtxq->aggr = true;
mtxq->send_bar = false;
- mt7603_mac_tx_ba_reset(dev, msta->wcid.idx, tid, *ssn, ba_size);
+ mt7603_mac_tx_ba_reset(dev, msta->wcid.idx, tid, ba_size);
break;
case IEEE80211_AMPDU_TX_STOP_FLUSH:
case IEEE80211_AMPDU_TX_STOP_FLUSH_CONT:
mtxq->aggr = false;
ieee80211_send_bar(vif, sta->addr, tid, mtxq->agg_ssn);
- mt7603_mac_tx_ba_reset(dev, msta->wcid.idx, tid, *ssn, -1);
+ mt7603_mac_tx_ba_reset(dev, msta->wcid.idx, tid, -1);
break;
case IEEE80211_AMPDU_TX_START:
mtxq->agg_ssn = *ssn << 4;
@@ -598,7 +598,7 @@ mt7603_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
break;
case IEEE80211_AMPDU_TX_STOP_CONT:
mtxq->aggr = false;
- mt7603_mac_tx_ba_reset(dev, msta->wcid.idx, tid, *ssn, -1);
+ mt7603_mac_tx_ba_reset(dev, msta->wcid.idx, tid, -1);
ieee80211_stop_tx_ba_cb_irqsafe(vif, sta->addr, tid);
break;
}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mt7603.h b/drivers/net/wireless/mediatek/mt76/mt7603/mt7603.h
index 79f332429432..6049f3b7c8fe 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/mt7603.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/mt7603.h
@@ -200,7 +200,7 @@ void mt7603_beacon_set_timer(struct mt7603_dev *dev, int idx, int intval);
int mt7603_mac_fill_rx(struct mt7603_dev *dev, struct sk_buff *skb);
void mt7603_mac_add_txs(struct mt7603_dev *dev, void *data);
void mt7603_mac_rx_ba_reset(struct mt7603_dev *dev, void *addr, u8 tid);
-void mt7603_mac_tx_ba_reset(struct mt7603_dev *dev, int wcid, int tid, int ssn,
+void mt7603_mac_tx_ba_reset(struct mt7603_dev *dev, int wcid, int tid,
int ba_size);
void mt7603_pse_client_reset(struct mt7603_dev *dev);
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c
index 9ed231abe916..4fe5a83ca5a4 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c
@@ -466,7 +466,6 @@ void mt76x02_send_tx_status(struct mt76x02_dev *dev,
return;
rcu_read_lock();
- mt76_tx_status_lock(mdev, &list);
if (stat->wcid < ARRAY_SIZE(dev->mt76.wcid))
wcid = rcu_dereference(dev->mt76.wcid[stat->wcid]);
@@ -479,6 +478,8 @@ void mt76x02_send_tx_status(struct mt76x02_dev *dev,
drv_priv);
}
+ mt76_tx_status_lock(mdev, &list);
+
if (wcid) {
if (stat->pktid >= MT_PACKET_ID_FIRST)
status.skb = mt76_tx_status_skb_get(mdev, wcid,
@@ -498,7 +499,9 @@ void mt76x02_send_tx_status(struct mt76x02_dev *dev,
if (*update == 0 && stat_val == stat_cache &&
stat->wcid == msta->status.wcid && msta->n_frames < 32) {
msta->n_frames++;
- goto out;
+ mt76_tx_status_unlock(mdev, &list);
+ rcu_read_unlock();
+ return;
}
mt76x02_mac_fill_tx_status(dev, status.info, &msta->status,
@@ -514,11 +517,10 @@ void mt76x02_send_tx_status(struct mt76x02_dev *dev,
if (status.skb)
mt76_tx_status_skb_done(mdev, status.skb, &list);
- else
- ieee80211_tx_status_ext(mt76_hw(dev), &status);
-
-out:
mt76_tx_status_unlock(mdev, &list);
+
+ if (!status.skb)
+ ieee80211_tx_status_ext(mt76_hw(dev), &status);
rcu_read_unlock();
}
diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00.h b/drivers/net/wireless/ralink/rt2x00/rt2x00.h
index 4b1744e9fb78..50b92ca92bd7 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2x00.h
+++ b/drivers/net/wireless/ralink/rt2x00/rt2x00.h
@@ -673,7 +673,6 @@ enum rt2x00_state_flags {
CONFIG_CHANNEL_HT40,
CONFIG_POWERSAVING,
CONFIG_HT_DISABLED,
- CONFIG_QOS_DISABLED,
CONFIG_MONITORING,
/*
diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c b/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c
index 2825560e2424..e8462f25d252 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c
@@ -642,19 +642,9 @@ void rt2x00mac_bss_info_changed(struct ieee80211_hw *hw,
rt2x00dev->intf_associated--;
rt2x00leds_led_assoc(rt2x00dev, !!rt2x00dev->intf_associated);
-
- clear_bit(CONFIG_QOS_DISABLED, &rt2x00dev->flags);
}
/*
- * Check for access point which do not support 802.11e . We have to
- * generate data frames sequence number in S/W for such AP, because
- * of H/W bug.
- */
- if (changes & BSS_CHANGED_QOS && !bss_conf->qos)
- set_bit(CONFIG_QOS_DISABLED, &rt2x00dev->flags);
-
- /*
* When the erp information has changed, we should perform
* additional configuration steps. For all other changes we are done.
*/
diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00queue.c b/drivers/net/wireless/ralink/rt2x00/rt2x00queue.c
index 92ddc19e7bf7..4834b4eb0206 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2x00queue.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt2x00queue.c
@@ -201,15 +201,18 @@ static void rt2x00queue_create_tx_descriptor_seq(struct rt2x00_dev *rt2x00dev,
if (!rt2x00_has_cap_flag(rt2x00dev, REQUIRE_SW_SEQNO)) {
/*
* rt2800 has a H/W (or F/W) bug, device incorrectly increase
- * seqno on retransmited data (non-QOS) frames. To workaround
- * the problem let's generate seqno in software if QOS is
- * disabled.
+ * seqno on retransmitted data (non-QOS) and management frames.
+ * To workaround the problem let's generate seqno in software.
+ * Except for beacons which are transmitted periodically by H/W
+ * hence hardware has to assign seqno for them.
*/
- if (test_bit(CONFIG_QOS_DISABLED, &rt2x00dev->flags))
- __clear_bit(ENTRY_TXD_GENERATE_SEQ, &txdesc->flags);
- else
+ if (ieee80211_is_beacon(hdr->frame_control)) {
+ __set_bit(ENTRY_TXD_GENERATE_SEQ, &txdesc->flags);
/* H/W will generate sequence number */
return;
+ }
+
+ __clear_bit(ENTRY_TXD_GENERATE_SEQ, &txdesc->flags);
}
/*
diff --git a/drivers/nfc/st95hf/core.c b/drivers/nfc/st95hf/core.c
index 2b26f762fbc3..01acb6e53365 100644
--- a/drivers/nfc/st95hf/core.c
+++ b/drivers/nfc/st95hf/core.c
@@ -1074,6 +1074,12 @@ static const struct spi_device_id st95hf_id[] = {
};
MODULE_DEVICE_TABLE(spi, st95hf_id);
+static const struct of_device_id st95hf_spi_of_match[] = {
+ { .compatible = "st,st95hf" },
+ { },
+};
+MODULE_DEVICE_TABLE(of, st95hf_spi_of_match);
+
static int st95hf_probe(struct spi_device *nfc_spi_dev)
{
int ret;
@@ -1260,6 +1266,7 @@ static struct spi_driver st95hf_driver = {
.driver = {
.name = "st95hf",
.owner = THIS_MODULE,
+ .of_match_table = of_match_ptr(st95hf_spi_of_match),
},
.id_table = st95hf_id,
.probe = st95hf_probe,
diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c
index b72a303176c7..9486acc08402 100644
--- a/drivers/nvdimm/btt_devs.c
+++ b/drivers/nvdimm/btt_devs.c
@@ -198,14 +198,15 @@ static struct device *__nd_btt_create(struct nd_region *nd_region,
return NULL;
nd_btt->id = ida_simple_get(&nd_region->btt_ida, 0, 0, GFP_KERNEL);
- if (nd_btt->id < 0) {
- kfree(nd_btt);
- return NULL;
- }
+ if (nd_btt->id < 0)
+ goto out_nd_btt;
nd_btt->lbasize = lbasize;
- if (uuid)
+ if (uuid) {
uuid = kmemdup(uuid, 16, GFP_KERNEL);
+ if (!uuid)
+ goto out_put_id;
+ }
nd_btt->uuid = uuid;
dev = &nd_btt->dev;
dev_set_name(dev, "btt%d.%d", nd_region->id, nd_btt->id);
@@ -220,6 +221,13 @@ static struct device *__nd_btt_create(struct nd_region *nd_region,
return NULL;
}
return dev;
+
+out_put_id:
+ ida_simple_remove(&nd_region->btt_ida, nd_btt->id);
+
+out_nd_btt:
+ kfree(nd_btt);
+ return NULL;
}
struct device *nd_btt_create(struct nd_region *nd_region)
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index 7849bf1812c4..f293556cbbf6 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -2249,9 +2249,12 @@ static struct device *create_namespace_blk(struct nd_region *nd_region,
if (!nsblk->uuid)
goto blk_err;
memcpy(name, nd_label->name, NSLABEL_NAME_LEN);
- if (name[0])
+ if (name[0]) {
nsblk->alt_name = kmemdup(name, NSLABEL_NAME_LEN,
GFP_KERNEL);
+ if (!nsblk->alt_name)
+ goto blk_err;
+ }
res = nsblk_add_resource(nd_region, ndd, nsblk,
__le64_to_cpu(nd_label->dpa));
if (!res)
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index bc2f700feef8..0279eb1da3ef 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -113,13 +113,13 @@ static void write_pmem(void *pmem_addr, struct page *page,
while (len) {
mem = kmap_atomic(page);
- chunk = min_t(unsigned int, len, PAGE_SIZE);
+ chunk = min_t(unsigned int, len, PAGE_SIZE - off);
memcpy_flushcache(pmem_addr, mem + off, chunk);
kunmap_atomic(mem);
len -= chunk;
off = 0;
page++;
- pmem_addr += PAGE_SIZE;
+ pmem_addr += chunk;
}
}
@@ -132,7 +132,7 @@ static blk_status_t read_pmem(struct page *page, unsigned int off,
while (len) {
mem = kmap_atomic(page);
- chunk = min_t(unsigned int, len, PAGE_SIZE);
+ chunk = min_t(unsigned int, len, PAGE_SIZE - off);
rem = memcpy_mcsafe(mem + off, pmem_addr, chunk);
kunmap_atomic(mem);
if (rem)
@@ -140,7 +140,7 @@ static blk_status_t read_pmem(struct page *page, unsigned int off,
len -= chunk;
off = 0;
page++;
- pmem_addr += PAGE_SIZE;
+ pmem_addr += chunk;
}
return BLK_STS_OK;
}
diff --git a/drivers/nvdimm/security.c b/drivers/nvdimm/security.c
index f8bb746a549f..a570f2263a42 100644
--- a/drivers/nvdimm/security.c
+++ b/drivers/nvdimm/security.c
@@ -22,6 +22,8 @@ static bool key_revalidate = true;
module_param(key_revalidate, bool, 0444);
MODULE_PARM_DESC(key_revalidate, "Require key validation at init.");
+static const char zero_key[NVDIMM_PASSPHRASE_LEN];
+
static void *key_data(struct key *key)
{
struct encrypted_key_payload *epayload = dereference_key_locked(key);
@@ -75,6 +77,16 @@ static struct key *nvdimm_request_key(struct nvdimm *nvdimm)
return key;
}
+static const void *nvdimm_get_key_payload(struct nvdimm *nvdimm,
+ struct key **key)
+{
+ *key = nvdimm_request_key(nvdimm);
+ if (!*key)
+ return zero_key;
+
+ return key_data(*key);
+}
+
static struct key *nvdimm_lookup_user_key(struct nvdimm *nvdimm,
key_serial_t id, int subclass)
{
@@ -105,36 +117,57 @@ static struct key *nvdimm_lookup_user_key(struct nvdimm *nvdimm,
return key;
}
-static struct key *nvdimm_key_revalidate(struct nvdimm *nvdimm)
+static const void *nvdimm_get_user_key_payload(struct nvdimm *nvdimm,
+ key_serial_t id, int subclass, struct key **key)
+{
+ *key = NULL;
+ if (id == 0) {
+ if (subclass == NVDIMM_BASE_KEY)
+ return zero_key;
+ else
+ return NULL;
+ }
+
+ *key = nvdimm_lookup_user_key(nvdimm, id, subclass);
+ if (!*key)
+ return NULL;
+
+ return key_data(*key);
+}
+
+
+static int nvdimm_key_revalidate(struct nvdimm *nvdimm)
{
struct key *key;
int rc;
+ const void *data;
if (!nvdimm->sec.ops->change_key)
- return NULL;
+ return -EOPNOTSUPP;
- key = nvdimm_request_key(nvdimm);
- if (!key)
- return NULL;
+ data = nvdimm_get_key_payload(nvdimm, &key);
/*
* Send the same key to the hardware as new and old key to
* verify that the key is good.
*/
- rc = nvdimm->sec.ops->change_key(nvdimm, key_data(key),
- key_data(key), NVDIMM_USER);
+ rc = nvdimm->sec.ops->change_key(nvdimm, data, data, NVDIMM_USER);
if (rc < 0) {
nvdimm_put_key(key);
- key = NULL;
+ return rc;
}
- return key;
+
+ nvdimm_put_key(key);
+ nvdimm->sec.state = nvdimm_security_state(nvdimm, NVDIMM_USER);
+ return 0;
}
static int __nvdimm_security_unlock(struct nvdimm *nvdimm)
{
struct device *dev = &nvdimm->dev;
struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
- struct key *key = NULL;
+ struct key *key;
+ const void *data;
int rc;
/* The bus lock should be held at the top level of the call stack */
@@ -160,16 +193,11 @@ static int __nvdimm_security_unlock(struct nvdimm *nvdimm)
if (!key_revalidate)
return 0;
- key = nvdimm_key_revalidate(nvdimm);
- if (!key)
- return nvdimm_security_freeze(nvdimm);
+ return nvdimm_key_revalidate(nvdimm);
} else
- key = nvdimm_request_key(nvdimm);
+ data = nvdimm_get_key_payload(nvdimm, &key);
- if (!key)
- return -ENOKEY;
-
- rc = nvdimm->sec.ops->unlock(nvdimm, key_data(key));
+ rc = nvdimm->sec.ops->unlock(nvdimm, data);
dev_dbg(dev, "key: %d unlock: %s\n", key_serial(key),
rc == 0 ? "success" : "fail");
@@ -195,6 +223,7 @@ int nvdimm_security_disable(struct nvdimm *nvdimm, unsigned int keyid)
struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
struct key *key;
int rc;
+ const void *data;
/* The bus lock should be held at the top level of the call stack */
lockdep_assert_held(&nvdimm_bus->reconfig_mutex);
@@ -214,11 +243,12 @@ int nvdimm_security_disable(struct nvdimm *nvdimm, unsigned int keyid)
return -EBUSY;
}
- key = nvdimm_lookup_user_key(nvdimm, keyid, NVDIMM_BASE_KEY);
- if (!key)
+ data = nvdimm_get_user_key_payload(nvdimm, keyid,
+ NVDIMM_BASE_KEY, &key);
+ if (!data)
return -ENOKEY;
- rc = nvdimm->sec.ops->disable(nvdimm, key_data(key));
+ rc = nvdimm->sec.ops->disable(nvdimm, data);
dev_dbg(dev, "key: %d disable: %s\n", key_serial(key),
rc == 0 ? "success" : "fail");
@@ -235,6 +265,7 @@ int nvdimm_security_update(struct nvdimm *nvdimm, unsigned int keyid,
struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
struct key *key, *newkey;
int rc;
+ const void *data, *newdata;
/* The bus lock should be held at the top level of the call stack */
lockdep_assert_held(&nvdimm_bus->reconfig_mutex);
@@ -249,22 +280,19 @@ int nvdimm_security_update(struct nvdimm *nvdimm, unsigned int keyid,
return -EIO;
}
- if (keyid == 0)
- key = NULL;
- else {
- key = nvdimm_lookup_user_key(nvdimm, keyid, NVDIMM_BASE_KEY);
- if (!key)
- return -ENOKEY;
- }
+ data = nvdimm_get_user_key_payload(nvdimm, keyid,
+ NVDIMM_BASE_KEY, &key);
+ if (!data)
+ return -ENOKEY;
- newkey = nvdimm_lookup_user_key(nvdimm, new_keyid, NVDIMM_NEW_KEY);
- if (!newkey) {
+ newdata = nvdimm_get_user_key_payload(nvdimm, new_keyid,
+ NVDIMM_NEW_KEY, &newkey);
+ if (!newdata) {
nvdimm_put_key(key);
return -ENOKEY;
}
- rc = nvdimm->sec.ops->change_key(nvdimm, key ? key_data(key) : NULL,
- key_data(newkey), pass_type);
+ rc = nvdimm->sec.ops->change_key(nvdimm, data, newdata, pass_type);
dev_dbg(dev, "key: %d %d update%s: %s\n",
key_serial(key), key_serial(newkey),
pass_type == NVDIMM_MASTER ? "(master)" : "(user)",
@@ -286,8 +314,9 @@ int nvdimm_security_erase(struct nvdimm *nvdimm, unsigned int keyid,
{
struct device *dev = &nvdimm->dev;
struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
- struct key *key;
+ struct key *key = NULL;
int rc;
+ const void *data;
/* The bus lock should be held at the top level of the call stack */
lockdep_assert_held(&nvdimm_bus->reconfig_mutex);
@@ -319,11 +348,12 @@ int nvdimm_security_erase(struct nvdimm *nvdimm, unsigned int keyid,
return -EOPNOTSUPP;
}
- key = nvdimm_lookup_user_key(nvdimm, keyid, NVDIMM_BASE_KEY);
- if (!key)
+ data = nvdimm_get_user_key_payload(nvdimm, keyid,
+ NVDIMM_BASE_KEY, &key);
+ if (!data)
return -ENOKEY;
- rc = nvdimm->sec.ops->erase(nvdimm, key_data(key), pass_type);
+ rc = nvdimm->sec.ops->erase(nvdimm, data, pass_type);
dev_dbg(dev, "key: %d erase%s: %s\n", key_serial(key),
pass_type == NVDIMM_MASTER ? "(master)" : "(user)",
rc == 0 ? "success" : "fail");
@@ -337,8 +367,9 @@ int nvdimm_security_overwrite(struct nvdimm *nvdimm, unsigned int keyid)
{
struct device *dev = &nvdimm->dev;
struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
- struct key *key;
+ struct key *key = NULL;
int rc;
+ const void *data;
/* The bus lock should be held at the top level of the call stack */
lockdep_assert_held(&nvdimm_bus->reconfig_mutex);
@@ -368,15 +399,12 @@ int nvdimm_security_overwrite(struct nvdimm *nvdimm, unsigned int keyid)
return -EBUSY;
}
- if (keyid == 0)
- key = NULL;
- else {
- key = nvdimm_lookup_user_key(nvdimm, keyid, NVDIMM_BASE_KEY);
- if (!key)
- return -ENOKEY;
- }
+ data = nvdimm_get_user_key_payload(nvdimm, keyid,
+ NVDIMM_BASE_KEY, &key);
+ if (!data)
+ return -ENOKEY;
- rc = nvdimm->sec.ops->overwrite(nvdimm, key ? key_data(key) : NULL);
+ rc = nvdimm->sec.ops->overwrite(nvdimm, data);
dev_dbg(dev, "key: %d overwrite submission: %s\n", key_serial(key),
rc == 0 ? "success" : "fail");
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 2c43e12b70af..6265d9225ec8 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -388,7 +388,7 @@ static void nvme_free_ns_head(struct kref *ref)
nvme_mpath_remove_disk(head);
ida_simple_remove(&head->subsys->ns_ida, head->instance);
list_del_init(&head->entry);
- cleanup_srcu_struct_quiesced(&head->srcu);
+ cleanup_srcu_struct(&head->srcu);
nvme_put_subsystem(head->subsys);
kfree(head);
}
diff --git a/drivers/of/of_net.c b/drivers/of/of_net.c
index 810ab0fbcccb..d820f3edd431 100644
--- a/drivers/of/of_net.c
+++ b/drivers/of/of_net.c
@@ -7,7 +7,6 @@
*/
#include <linux/etherdevice.h>
#include <linux/kernel.h>
-#include <linux/nvmem-consumer.h>
#include <linux/of_net.h>
#include <linux/phy.h>
#include <linux/export.h>
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 7c1b362f599a..766f5779db92 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -6262,8 +6262,7 @@ static int __init pci_setup(char *str)
} else if (!strncmp(str, "pcie_scan_all", 13)) {
pci_add_flags(PCI_SCAN_ALL_PCIE_DEVS);
} else if (!strncmp(str, "disable_acs_redir=", 18)) {
- disable_acs_redir_param =
- kstrdup(str + 18, GFP_KERNEL);
+ disable_acs_redir_param = str + 18;
} else {
printk(KERN_ERR "PCI: Unknown option `%s'\n",
str);
@@ -6274,3 +6273,19 @@ static int __init pci_setup(char *str)
return 0;
}
early_param("pci", pci_setup);
+
+/*
+ * 'disable_acs_redir_param' is initialized in pci_setup(), above, to point
+ * to data in the __initdata section which will be freed after the init
+ * sequence is complete. We can't allocate memory in pci_setup() because some
+ * architectures do not have any memory allocation service available during
+ * an early_param() call. So we allocate memory and copy the variable here
+ * before the init section is freed.
+ */
+static int __init pci_realloc_setup_params(void)
+{
+ disable_acs_redir_param = kstrdup(disable_acs_redir_param, GFP_KERNEL);
+
+ return 0;
+}
+pure_initcall(pci_realloc_setup_params);
diff --git a/drivers/pci/pcie/Kconfig b/drivers/pci/pcie/Kconfig
index 5cbdbca904ac..362eb8cfa53b 100644
--- a/drivers/pci/pcie/Kconfig
+++ b/drivers/pci/pcie/Kconfig
@@ -142,3 +142,11 @@ config PCIE_PTM
This is only useful if you have devices that support PTM, but it
is safe to enable even if you don't.
+
+config PCIE_BW
+ bool "PCI Express Bandwidth Change Notification"
+ depends on PCIEPORTBUS
+ help
+ This enables PCI Express Bandwidth Change Notification. If
+ you know link width or rate changes occur only to correct
+ unreliable links, you may answer Y.
diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile
index f1d7bc1e5efa..efb9d2e71e9e 100644
--- a/drivers/pci/pcie/Makefile
+++ b/drivers/pci/pcie/Makefile
@@ -3,7 +3,6 @@
# Makefile for PCI Express features and port driver
pcieportdrv-y := portdrv_core.o portdrv_pci.o err.o
-pcieportdrv-y += bw_notification.o
obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o
@@ -13,3 +12,4 @@ obj-$(CONFIG_PCIEAER_INJECT) += aer_inject.o
obj-$(CONFIG_PCIE_PME) += pme.o
obj-$(CONFIG_PCIE_DPC) += dpc.o
obj-$(CONFIG_PCIE_PTM) += ptm.o
+obj-$(CONFIG_PCIE_BW) += bw_notification.o
diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h
index 1d50dc58ac40..944827a8c7d3 100644
--- a/drivers/pci/pcie/portdrv.h
+++ b/drivers/pci/pcie/portdrv.h
@@ -49,7 +49,11 @@ int pcie_dpc_init(void);
static inline int pcie_dpc_init(void) { return 0; }
#endif
+#ifdef CONFIG_PCIE_BW
int pcie_bandwidth_notification_init(void);
+#else
+static inline int pcie_bandwidth_notification_init(void) { return 0; }
+#endif
/* Port Type */
#define PCIE_ANY_PORT (~0)
diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c
index 7d04f9d087a6..1b330129089f 100644
--- a/drivers/pci/pcie/portdrv_core.c
+++ b/drivers/pci/pcie/portdrv_core.c
@@ -55,7 +55,8 @@ static int pcie_message_numbers(struct pci_dev *dev, int mask,
* 7.8.2, 7.10.10, 7.31.2.
*/
- if (mask & (PCIE_PORT_SERVICE_PME | PCIE_PORT_SERVICE_HP)) {
+ if (mask & (PCIE_PORT_SERVICE_PME | PCIE_PORT_SERVICE_HP |
+ PCIE_PORT_SERVICE_BWNOTIF)) {
pcie_capability_read_word(dev, PCI_EXP_FLAGS, &reg16);
*pme = (reg16 & PCI_EXP_FLAGS_IRQ) >> 9;
nvec = *pme + 1;
diff --git a/drivers/power/supply/cpcap-battery.c b/drivers/power/supply/cpcap-battery.c
index 08d5037fd052..6887870ba32c 100644
--- a/drivers/power/supply/cpcap-battery.c
+++ b/drivers/power/supply/cpcap-battery.c
@@ -221,6 +221,9 @@ static int cpcap_battery_cc_raw_div(struct cpcap_battery_ddata *ddata,
int avg_current;
u32 cc_lsb;
+ if (!divider)
+ return 0;
+
sample &= 0xffffff; /* 24-bits, unsigned */
offset &= 0x7ff; /* 10-bits, signed */
diff --git a/drivers/power/supply/goldfish_battery.c b/drivers/power/supply/goldfish_battery.c
index ad969d9fc981..c2644a9fe80f 100644
--- a/drivers/power/supply/goldfish_battery.c
+++ b/drivers/power/supply/goldfish_battery.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL
+// SPDX-License-Identifier: GPL-2.0
/*
* Power supply driver for the goldfish emulator
*
diff --git a/drivers/power/supply/power_supply_sysfs.c b/drivers/power/supply/power_supply_sysfs.c
index dce24f596160..5358a80d854f 100644
--- a/drivers/power/supply/power_supply_sysfs.c
+++ b/drivers/power/supply/power_supply_sysfs.c
@@ -383,15 +383,11 @@ int power_supply_uevent(struct device *dev, struct kobj_uevent_env *env)
char *prop_buf;
char *attrname;
- dev_dbg(dev, "uevent\n");
-
if (!psy || !psy->desc) {
dev_dbg(dev, "No power supply yet\n");
return ret;
}
- dev_dbg(dev, "POWER_SUPPLY_NAME=%s\n", psy->desc->name);
-
ret = add_uevent_var(env, "POWER_SUPPLY_NAME=%s", psy->desc->name);
if (ret)
return ret;
@@ -427,8 +423,6 @@ int power_supply_uevent(struct device *dev, struct kobj_uevent_env *env)
goto out;
}
- dev_dbg(dev, "prop %s=%s\n", attrname, prop_buf);
-
ret = add_uevent_var(env, "POWER_SUPPLY_%s=%s", attrname, prop_buf);
kfree(attrname);
if (ret)
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 6e294b4d3635..f89f9d02e788 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -2004,14 +2004,14 @@ static int dasd_eckd_end_analysis(struct dasd_block *block)
blk_per_trk = recs_per_track(&private->rdc_data, 0, block->bp_block);
raw:
- block->blocks = (private->real_cyl *
+ block->blocks = ((unsigned long) private->real_cyl *
private->rdc_data.trk_per_cyl *
blk_per_trk);
dev_info(&device->cdev->dev,
- "DASD with %d KB/block, %d KB total size, %d KB/track, "
+ "DASD with %u KB/block, %lu KB total size, %u KB/track, "
"%s\n", (block->bp_block >> 10),
- ((private->real_cyl *
+ (((unsigned long) private->real_cyl *
private->rdc_data.trk_per_cyl *
blk_per_trk * (block->bp_block >> 9)) >> 1),
((blk_per_trk * block->bp_block) >> 10),
diff --git a/drivers/s390/char/con3270.c b/drivers/s390/char/con3270.c
index fd2146bcc0ad..e17364e13d2f 100644
--- a/drivers/s390/char/con3270.c
+++ b/drivers/s390/char/con3270.c
@@ -629,7 +629,7 @@ con3270_init(void)
(void (*)(unsigned long)) con3270_read_tasklet,
(unsigned long) condev->read);
- raw3270_add_view(&condev->view, &con3270_fn, 1);
+ raw3270_add_view(&condev->view, &con3270_fn, 1, RAW3270_VIEW_LOCK_IRQ);
INIT_LIST_HEAD(&condev->freemem);
for (i = 0; i < CON3270_STRING_PAGES; i++) {
diff --git a/drivers/s390/char/fs3270.c b/drivers/s390/char/fs3270.c
index 8f3a2eeb28dc..8b48ba9c598e 100644
--- a/drivers/s390/char/fs3270.c
+++ b/drivers/s390/char/fs3270.c
@@ -463,7 +463,8 @@ fs3270_open(struct inode *inode, struct file *filp)
init_waitqueue_head(&fp->wait);
fp->fs_pid = get_pid(task_pid(current));
- rc = raw3270_add_view(&fp->view, &fs3270_fn, minor);
+ rc = raw3270_add_view(&fp->view, &fs3270_fn, minor,
+ RAW3270_VIEW_LOCK_BH);
if (rc) {
fs3270_free_view(&fp->view);
goto out;
diff --git a/drivers/s390/char/raw3270.c b/drivers/s390/char/raw3270.c
index f8cd2935fbfd..63a41b168761 100644
--- a/drivers/s390/char/raw3270.c
+++ b/drivers/s390/char/raw3270.c
@@ -920,7 +920,7 @@ raw3270_deactivate_view(struct raw3270_view *view)
* Add view to device with minor "minor".
*/
int
-raw3270_add_view(struct raw3270_view *view, struct raw3270_fn *fn, int minor)
+raw3270_add_view(struct raw3270_view *view, struct raw3270_fn *fn, int minor, int subclass)
{
unsigned long flags;
struct raw3270 *rp;
@@ -942,6 +942,7 @@ raw3270_add_view(struct raw3270_view *view, struct raw3270_fn *fn, int minor)
view->cols = rp->cols;
view->ascebc = rp->ascebc;
spin_lock_init(&view->lock);
+ lockdep_set_subclass(&view->lock, subclass);
list_add(&view->list, &rp->view_list);
rc = 0;
spin_unlock_irqrestore(get_ccwdev_lock(rp->cdev), flags);
diff --git a/drivers/s390/char/raw3270.h b/drivers/s390/char/raw3270.h
index 114ca7cbf889..3afaa35f7351 100644
--- a/drivers/s390/char/raw3270.h
+++ b/drivers/s390/char/raw3270.h
@@ -150,6 +150,8 @@ struct raw3270_fn {
struct raw3270_view {
struct list_head list;
spinlock_t lock;
+#define RAW3270_VIEW_LOCK_IRQ 0
+#define RAW3270_VIEW_LOCK_BH 1
atomic_t ref_count;
struct raw3270 *dev;
struct raw3270_fn *fn;
@@ -158,7 +160,7 @@ struct raw3270_view {
unsigned char *ascebc; /* ascii -> ebcdic table */
};
-int raw3270_add_view(struct raw3270_view *, struct raw3270_fn *, int);
+int raw3270_add_view(struct raw3270_view *, struct raw3270_fn *, int, int);
int raw3270_activate_view(struct raw3270_view *);
void raw3270_del_view(struct raw3270_view *);
void raw3270_deactivate_view(struct raw3270_view *);
diff --git a/drivers/s390/char/tty3270.c b/drivers/s390/char/tty3270.c
index 2b0c36c2c568..98d7fc152e32 100644
--- a/drivers/s390/char/tty3270.c
+++ b/drivers/s390/char/tty3270.c
@@ -980,7 +980,8 @@ static int tty3270_install(struct tty_driver *driver, struct tty_struct *tty)
return PTR_ERR(tp);
rc = raw3270_add_view(&tp->view, &tty3270_fn,
- tty->index + RAW3270_FIRSTMINOR);
+ tty->index + RAW3270_FIRSTMINOR,
+ RAW3270_VIEW_LOCK_BH);
if (rc) {
tty3270_free_view(tp);
return rc;
diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c
index 6a340f2c3556..5ea83dc4f1d7 100644
--- a/drivers/s390/crypto/ap_queue.c
+++ b/drivers/s390/crypto/ap_queue.c
@@ -751,8 +751,8 @@ void ap_queue_prepare_remove(struct ap_queue *aq)
__ap_flush_queue(aq);
/* set REMOVE state to prevent new messages are queued in */
aq->state = AP_STATE_REMOVE;
- del_timer_sync(&aq->timeout);
spin_unlock_bh(&aq->lock);
+ del_timer_sync(&aq->timeout);
}
void ap_queue_remove(struct ap_queue *aq)
diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c
index 3e85d665c572..45eb0c14b880 100644
--- a/drivers/s390/crypto/pkey_api.c
+++ b/drivers/s390/crypto/pkey_api.c
@@ -51,7 +51,8 @@ static debug_info_t *debug_info;
static void __init pkey_debug_init(void)
{
- debug_info = debug_register("pkey", 1, 1, 4 * sizeof(long));
+ /* 5 arguments per dbf entry (including the format string ptr) */
+ debug_info = debug_register("pkey", 1, 1, 5 * sizeof(long));
debug_register_view(debug_info, &debug_sprintf_view);
debug_set_level(debug_info, 3);
}
diff --git a/drivers/s390/net/ctcm_main.c b/drivers/s390/net/ctcm_main.c
index 7617d21cb296..f63c5c871d3d 100644
--- a/drivers/s390/net/ctcm_main.c
+++ b/drivers/s390/net/ctcm_main.c
@@ -1595,6 +1595,7 @@ static int ctcm_new_device(struct ccwgroup_device *cgdev)
if (priv->channel[direction] == NULL) {
if (direction == CTCM_WRITE)
channel_free(priv->channel[CTCM_READ]);
+ result = -ENODEV;
goto out_dev;
}
priv->channel[direction]->netdev = dev;
diff --git a/drivers/scsi/aic7xxx/aic7770_osm.c b/drivers/scsi/aic7xxx/aic7770_osm.c
index 3d401d02c019..bdd177e3d762 100644
--- a/drivers/scsi/aic7xxx/aic7770_osm.c
+++ b/drivers/scsi/aic7xxx/aic7770_osm.c
@@ -91,6 +91,7 @@ aic7770_probe(struct device *dev)
ahc = ahc_alloc(&aic7xxx_driver_template, name);
if (ahc == NULL)
return (ENOMEM);
+ ahc->dev = dev;
error = aic7770_config(ahc, aic7770_ident_table + edev->id.driver_data,
eisaBase);
if (error != 0) {
diff --git a/drivers/scsi/aic7xxx/aic7xxx.h b/drivers/scsi/aic7xxx/aic7xxx.h
index 5614921b4041..88b90f9806c9 100644
--- a/drivers/scsi/aic7xxx/aic7xxx.h
+++ b/drivers/scsi/aic7xxx/aic7xxx.h
@@ -943,6 +943,7 @@ struct ahc_softc {
* Platform specific device information.
*/
ahc_dev_softc_t dev_softc;
+ struct device *dev;
/*
* Bus specific device information.
diff --git a/drivers/scsi/aic7xxx/aic7xxx_osm.c b/drivers/scsi/aic7xxx/aic7xxx_osm.c
index 3c9c17450bb3..d5c4a0d23706 100644
--- a/drivers/scsi/aic7xxx/aic7xxx_osm.c
+++ b/drivers/scsi/aic7xxx/aic7xxx_osm.c
@@ -860,8 +860,8 @@ int
ahc_dmamem_alloc(struct ahc_softc *ahc, bus_dma_tag_t dmat, void** vaddr,
int flags, bus_dmamap_t *mapp)
{
- *vaddr = pci_alloc_consistent(ahc->dev_softc,
- dmat->maxsize, mapp);
+ /* XXX: check if we really need the GFP_ATOMIC and unwind this mess! */
+ *vaddr = dma_alloc_coherent(ahc->dev, dmat->maxsize, mapp, GFP_ATOMIC);
if (*vaddr == NULL)
return ENOMEM;
return 0;
@@ -871,8 +871,7 @@ void
ahc_dmamem_free(struct ahc_softc *ahc, bus_dma_tag_t dmat,
void* vaddr, bus_dmamap_t map)
{
- pci_free_consistent(ahc->dev_softc, dmat->maxsize,
- vaddr, map);
+ dma_free_coherent(ahc->dev, dmat->maxsize, vaddr, map);
}
int
@@ -1123,8 +1122,7 @@ ahc_linux_register_host(struct ahc_softc *ahc, struct scsi_host_template *templa
host->transportt = ahc_linux_transport_template;
- retval = scsi_add_host(host,
- (ahc->dev_softc ? &ahc->dev_softc->dev : NULL));
+ retval = scsi_add_host(host, ahc->dev);
if (retval) {
printk(KERN_WARNING "aic7xxx: scsi_add_host failed\n");
scsi_host_put(host);
diff --git a/drivers/scsi/aic7xxx/aic7xxx_osm_pci.c b/drivers/scsi/aic7xxx/aic7xxx_osm_pci.c
index 0fc14dac7070..717d8d1082ce 100644
--- a/drivers/scsi/aic7xxx/aic7xxx_osm_pci.c
+++ b/drivers/scsi/aic7xxx/aic7xxx_osm_pci.c
@@ -250,6 +250,7 @@ ahc_linux_pci_dev_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
}
}
ahc->dev_softc = pci;
+ ahc->dev = &pci->dev;
error = ahc_pci_config(ahc, entry);
if (error != 0) {
ahc_free(ahc);
diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
index dfba4921b265..5bf61431434b 100644
--- a/drivers/scsi/libfc/fc_rport.c
+++ b/drivers/scsi/libfc/fc_rport.c
@@ -2162,7 +2162,6 @@ static void fc_rport_recv_logo_req(struct fc_lport *lport, struct fc_frame *fp)
FC_RPORT_DBG(rdata, "Received LOGO request while in state %s\n",
fc_rport_state(rdata));
- rdata->flags &= ~FC_RP_STARTED;
fc_rport_enter_delete(rdata, RPORT_EV_STOP);
mutex_unlock(&rdata->rp_mutex);
kref_put(&rdata->kref, fc_rport_destroy);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 601b9f1de267..07dfc17d4824 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1706,8 +1706,12 @@ out_put_budget:
ret = BLK_STS_DEV_RESOURCE;
break;
default:
+ if (unlikely(!scsi_device_online(sdev)))
+ scsi_req(req)->result = DID_NO_CONNECT << 16;
+ else
+ scsi_req(req)->result = DID_ERROR << 16;
/*
- * Make sure to release all allocated ressources when
+ * Make sure to release all allocated resources when
* we hit an error, as we will never see this command
* again.
*/
diff --git a/drivers/staging/comedi/drivers/ni_usb6501.c b/drivers/staging/comedi/drivers/ni_usb6501.c
index 808ed92ed66f..1bb1cb651349 100644
--- a/drivers/staging/comedi/drivers/ni_usb6501.c
+++ b/drivers/staging/comedi/drivers/ni_usb6501.c
@@ -463,10 +463,8 @@ static int ni6501_alloc_usb_buffers(struct comedi_device *dev)
size = usb_endpoint_maxp(devpriv->ep_tx);
devpriv->usb_tx_buf = kzalloc(size, GFP_KERNEL);
- if (!devpriv->usb_tx_buf) {
- kfree(devpriv->usb_rx_buf);
+ if (!devpriv->usb_tx_buf)
return -ENOMEM;
- }
return 0;
}
@@ -518,6 +516,9 @@ static int ni6501_auto_attach(struct comedi_device *dev,
if (!devpriv)
return -ENOMEM;
+ mutex_init(&devpriv->mut);
+ usb_set_intfdata(intf, devpriv);
+
ret = ni6501_find_endpoints(dev);
if (ret)
return ret;
@@ -526,9 +527,6 @@ static int ni6501_auto_attach(struct comedi_device *dev,
if (ret)
return ret;
- mutex_init(&devpriv->mut);
- usb_set_intfdata(intf, devpriv);
-
ret = comedi_alloc_subdevices(dev, 2);
if (ret)
return ret;
diff --git a/drivers/staging/comedi/drivers/vmk80xx.c b/drivers/staging/comedi/drivers/vmk80xx.c
index 6234b649d887..65dc6c51037e 100644
--- a/drivers/staging/comedi/drivers/vmk80xx.c
+++ b/drivers/staging/comedi/drivers/vmk80xx.c
@@ -682,10 +682,8 @@ static int vmk80xx_alloc_usb_buffers(struct comedi_device *dev)
size = usb_endpoint_maxp(devpriv->ep_tx);
devpriv->usb_tx_buf = kzalloc(size, GFP_KERNEL);
- if (!devpriv->usb_tx_buf) {
- kfree(devpriv->usb_rx_buf);
+ if (!devpriv->usb_tx_buf)
return -ENOMEM;
- }
return 0;
}
@@ -800,6 +798,8 @@ static int vmk80xx_auto_attach(struct comedi_device *dev,
devpriv->model = board->model;
+ sema_init(&devpriv->limit_sem, 8);
+
ret = vmk80xx_find_usb_endpoints(dev);
if (ret)
return ret;
@@ -808,8 +808,6 @@ static int vmk80xx_auto_attach(struct comedi_device *dev,
if (ret)
return ret;
- sema_init(&devpriv->limit_sem, 8);
-
usb_set_intfdata(intf, devpriv);
if (devpriv->model == VMK8055_MODEL)
diff --git a/drivers/staging/erofs/data.c b/drivers/staging/erofs/data.c
index 526e0dbea5b5..81af768e7248 100644
--- a/drivers/staging/erofs/data.c
+++ b/drivers/staging/erofs/data.c
@@ -298,7 +298,7 @@ submit_bio_retry:
*last_block = current_block;
/* shift in advance in case of it followed by too many gaps */
- if (unlikely(bio->bi_vcnt >= bio->bi_max_vecs)) {
+ if (bio->bi_iter.bi_size >= bio->bi_max_vecs * PAGE_SIZE) {
/* err should reassign to 0 after submitting */
err = 0;
goto submit_bio_out;
diff --git a/drivers/staging/iio/adc/ad7192.c b/drivers/staging/iio/adc/ad7192.c
index acdbc07fd259..2fc8bc22b57b 100644
--- a/drivers/staging/iio/adc/ad7192.c
+++ b/drivers/staging/iio/adc/ad7192.c
@@ -109,10 +109,10 @@
#define AD7192_CH_AIN3 BIT(6) /* AIN3 - AINCOM */
#define AD7192_CH_AIN4 BIT(7) /* AIN4 - AINCOM */
-#define AD7193_CH_AIN1P_AIN2M 0x000 /* AIN1(+) - AIN2(-) */
-#define AD7193_CH_AIN3P_AIN4M 0x001 /* AIN3(+) - AIN4(-) */
-#define AD7193_CH_AIN5P_AIN6M 0x002 /* AIN5(+) - AIN6(-) */
-#define AD7193_CH_AIN7P_AIN8M 0x004 /* AIN7(+) - AIN8(-) */
+#define AD7193_CH_AIN1P_AIN2M 0x001 /* AIN1(+) - AIN2(-) */
+#define AD7193_CH_AIN3P_AIN4M 0x002 /* AIN3(+) - AIN4(-) */
+#define AD7193_CH_AIN5P_AIN6M 0x004 /* AIN5(+) - AIN6(-) */
+#define AD7193_CH_AIN7P_AIN8M 0x008 /* AIN7(+) - AIN8(-) */
#define AD7193_CH_TEMP 0x100 /* Temp senseor */
#define AD7193_CH_AIN2P_AIN2M 0x200 /* AIN2(+) - AIN2(-) */
#define AD7193_CH_AIN1 0x401 /* AIN1 - AINCOM */
diff --git a/drivers/staging/iio/meter/ade7854.c b/drivers/staging/iio/meter/ade7854.c
index 029c3bf42d4d..07774c000c5a 100644
--- a/drivers/staging/iio/meter/ade7854.c
+++ b/drivers/staging/iio/meter/ade7854.c
@@ -269,7 +269,7 @@ static IIO_DEV_ATTR_VPEAK(0644,
static IIO_DEV_ATTR_IPEAK(0644,
ade7854_read_32bit,
ade7854_write_32bit,
- ADE7854_VPEAK);
+ ADE7854_IPEAK);
static IIO_DEV_ATTR_APHCAL(0644,
ade7854_read_16bit,
ade7854_write_16bit,
diff --git a/drivers/staging/most/core.c b/drivers/staging/most/core.c
index 18936cdb1083..956daf8c3bd2 100644
--- a/drivers/staging/most/core.c
+++ b/drivers/staging/most/core.c
@@ -1431,7 +1431,7 @@ int most_register_interface(struct most_interface *iface)
INIT_LIST_HEAD(&iface->p->channel_list);
iface->p->dev_id = id;
- snprintf(iface->p->name, STRING_SIZE, "mdev%d", id);
+ strcpy(iface->p->name, iface->description);
iface->dev.init_name = iface->p->name;
iface->dev.bus = &mc.bus;
iface->dev.parent = &mc.dev;
diff --git a/drivers/tty/rocket.c b/drivers/tty/rocket.c
index b121d8f8f3d7..27aeca30eeae 100644
--- a/drivers/tty/rocket.c
+++ b/drivers/tty/rocket.c
@@ -266,7 +266,7 @@ MODULE_PARM_DESC(pc104_3, "set interface types for ISA(PC104) board #3 (e.g. pc1
module_param_array(pc104_4, ulong, NULL, 0);
MODULE_PARM_DESC(pc104_4, "set interface types for ISA(PC104) board #4 (e.g. pc104_4=232,232,485,485,...");
-static int rp_init(void);
+static int __init rp_init(void);
static void rp_cleanup_module(void);
module_init(rp_init);
diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c
index 09a183dfc526..a31db15cd7c0 100644
--- a/drivers/tty/serial/sc16is7xx.c
+++ b/drivers/tty/serial/sc16is7xx.c
@@ -1520,11 +1520,13 @@ static int __init sc16is7xx_init(void)
#endif
return ret;
+#ifdef CONFIG_SERIAL_SC16IS7XX_SPI
err_spi:
+#endif
#ifdef CONFIG_SERIAL_SC16IS7XX_I2C
i2c_del_driver(&sc16is7xx_i2c_uart_driver);
-#endif
err_i2c:
+#endif
uart_unregister_driver(&sc16is7xx_uart);
return ret;
}
diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index 2d1c626312cd..3cd139752d3f 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -2512,14 +2512,16 @@ done:
* center of the last stop bit in sampling clocks.
*/
int last_stop = bits * 2 - 1;
- int deviation = min_err * srr * last_stop / 2 / baud;
+ int deviation = DIV_ROUND_CLOSEST(min_err * last_stop *
+ (int)(srr + 1),
+ 2 * (int)baud);
if (abs(deviation) >= 2) {
/* At least two sampling clocks off at the
* last stop bit; we can increase the error
* margin by shifting the sampling point.
*/
- int shift = min(-8, max(7, deviation / 2));
+ int shift = clamp(deviation / 2, -8, 7);
hssrr |= (shift << HSCIF_SRHP_SHIFT) &
HSCIF_SRHP_MASK;
diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index d34984aa646d..650c66886c80 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -1520,7 +1520,8 @@ static void csi_J(struct vc_data *vc, int vpar)
return;
}
scr_memsetw(start, vc->vc_video_erase_char, 2 * count);
- update_region(vc, (unsigned long) start, count);
+ if (con_should_update(vc))
+ do_update_region(vc, (unsigned long) start, count);
vc->vc_need_wrap = 0;
}
diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index 8987cec9549d..ebcadaad89d1 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -473,11 +473,6 @@ static int usb_unbind_interface(struct device *dev)
pm_runtime_disable(dev);
pm_runtime_set_suspended(dev);
- /* Undo any residual pm_autopm_get_interface_* calls */
- for (r = atomic_read(&intf->pm_usage_cnt); r > 0; --r)
- usb_autopm_put_interface_no_suspend(intf);
- atomic_set(&intf->pm_usage_cnt, 0);
-
if (!error)
usb_autosuspend_device(udev);
@@ -1633,7 +1628,6 @@ void usb_autopm_put_interface(struct usb_interface *intf)
int status;
usb_mark_last_busy(udev);
- atomic_dec(&intf->pm_usage_cnt);
status = pm_runtime_put_sync(&intf->dev);
dev_vdbg(&intf->dev, "%s: cnt %d -> %d\n",
__func__, atomic_read(&intf->dev.power.usage_count),
@@ -1662,7 +1656,6 @@ void usb_autopm_put_interface_async(struct usb_interface *intf)
int status;
usb_mark_last_busy(udev);
- atomic_dec(&intf->pm_usage_cnt);
status = pm_runtime_put(&intf->dev);
dev_vdbg(&intf->dev, "%s: cnt %d -> %d\n",
__func__, atomic_read(&intf->dev.power.usage_count),
@@ -1684,7 +1677,6 @@ void usb_autopm_put_interface_no_suspend(struct usb_interface *intf)
struct usb_device *udev = interface_to_usbdev(intf);
usb_mark_last_busy(udev);
- atomic_dec(&intf->pm_usage_cnt);
pm_runtime_put_noidle(&intf->dev);
}
EXPORT_SYMBOL_GPL(usb_autopm_put_interface_no_suspend);
@@ -1715,8 +1707,6 @@ int usb_autopm_get_interface(struct usb_interface *intf)
status = pm_runtime_get_sync(&intf->dev);
if (status < 0)
pm_runtime_put_sync(&intf->dev);
- else
- atomic_inc(&intf->pm_usage_cnt);
dev_vdbg(&intf->dev, "%s: cnt %d -> %d\n",
__func__, atomic_read(&intf->dev.power.usage_count),
status);
@@ -1750,8 +1740,6 @@ int usb_autopm_get_interface_async(struct usb_interface *intf)
status = pm_runtime_get(&intf->dev);
if (status < 0 && status != -EINPROGRESS)
pm_runtime_put_noidle(&intf->dev);
- else
- atomic_inc(&intf->pm_usage_cnt);
dev_vdbg(&intf->dev, "%s: cnt %d -> %d\n",
__func__, atomic_read(&intf->dev.power.usage_count),
status);
@@ -1775,7 +1763,6 @@ void usb_autopm_get_interface_no_resume(struct usb_interface *intf)
struct usb_device *udev = interface_to_usbdev(intf);
usb_mark_last_busy(udev);
- atomic_inc(&intf->pm_usage_cnt);
pm_runtime_get_noresume(&intf->dev);
}
EXPORT_SYMBOL_GPL(usb_autopm_get_interface_no_resume);
diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c
index 82239f27c4cc..e844bb7b5676 100644
--- a/drivers/usb/core/message.c
+++ b/drivers/usb/core/message.c
@@ -820,9 +820,11 @@ int usb_string(struct usb_device *dev, int index, char *buf, size_t size)
if (dev->state == USB_STATE_SUSPENDED)
return -EHOSTUNREACH;
- if (size <= 0 || !buf || !index)
+ if (size <= 0 || !buf)
return -EINVAL;
buf[0] = 0;
+ if (index <= 0 || index >= 256)
+ return -EINVAL;
tbuf = kmalloc(256, GFP_NOIO);
if (!tbuf)
return -ENOMEM;
diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c
index baf72f95f0f1..213b52508621 100644
--- a/drivers/usb/gadget/udc/dummy_hcd.c
+++ b/drivers/usb/gadget/udc/dummy_hcd.c
@@ -979,8 +979,18 @@ static int dummy_udc_start(struct usb_gadget *g,
struct dummy_hcd *dum_hcd = gadget_to_dummy_hcd(g);
struct dummy *dum = dum_hcd->dum;
- if (driver->max_speed == USB_SPEED_UNKNOWN)
+ switch (g->speed) {
+ /* All the speeds we support */
+ case USB_SPEED_LOW:
+ case USB_SPEED_FULL:
+ case USB_SPEED_HIGH:
+ case USB_SPEED_SUPER:
+ break;
+ default:
+ dev_err(dummy_dev(dum_hcd), "Unsupported driver max speed %d\n",
+ driver->max_speed);
return -EINVAL;
+ }
/*
* SLAVE side init ... the layer above hardware, which
@@ -1784,9 +1794,10 @@ static void dummy_timer(struct timer_list *t)
/* Bus speed is 500000 bytes/ms, so use a little less */
total = 490000;
break;
- default:
+ default: /* Can't happen */
dev_err(dummy_dev(dum_hcd), "bogus device speed\n");
- return;
+ total = 0;
+ break;
}
/* FIXME if HZ != 1000 this will probably misbehave ... */
@@ -1828,7 +1839,7 @@ restart:
/* Used up this frame's bandwidth? */
if (total <= 0)
- break;
+ continue;
/* find the gadget's ep for this request (if configured) */
address = usb_pipeendpoint (urb->pipe);
diff --git a/drivers/usb/misc/yurex.c b/drivers/usb/misc/yurex.c
index 6d9fd5f64903..7b306aa22d25 100644
--- a/drivers/usb/misc/yurex.c
+++ b/drivers/usb/misc/yurex.c
@@ -314,6 +314,7 @@ static void yurex_disconnect(struct usb_interface *interface)
usb_deregister_dev(interface, &yurex_class);
/* prevent more I/O from starting */
+ usb_poison_urb(dev->urb);
mutex_lock(&dev->io_mutex);
dev->interface = NULL;
mutex_unlock(&dev->io_mutex);
diff --git a/drivers/usb/storage/realtek_cr.c b/drivers/usb/storage/realtek_cr.c
index 31b024441938..cc794e25a0b6 100644
--- a/drivers/usb/storage/realtek_cr.c
+++ b/drivers/usb/storage/realtek_cr.c
@@ -763,18 +763,16 @@ static void rts51x_suspend_timer_fn(struct timer_list *t)
break;
case RTS51X_STAT_IDLE:
case RTS51X_STAT_SS:
- usb_stor_dbg(us, "RTS51X_STAT_SS, intf->pm_usage_cnt:%d, power.usage:%d\n",
- atomic_read(&us->pusb_intf->pm_usage_cnt),
+ usb_stor_dbg(us, "RTS51X_STAT_SS, power.usage:%d\n",
atomic_read(&us->pusb_intf->dev.power.usage_count));
- if (atomic_read(&us->pusb_intf->pm_usage_cnt) > 0) {
+ if (atomic_read(&us->pusb_intf->dev.power.usage_count) > 0) {
usb_stor_dbg(us, "Ready to enter SS state\n");
rts51x_set_stat(chip, RTS51X_STAT_SS);
/* ignore mass storage interface's children */
pm_suspend_ignore_children(&us->pusb_intf->dev, true);
usb_autopm_put_interface_async(us->pusb_intf);
- usb_stor_dbg(us, "RTS51X_STAT_SS 01, intf->pm_usage_cnt:%d, power.usage:%d\n",
- atomic_read(&us->pusb_intf->pm_usage_cnt),
+ usb_stor_dbg(us, "RTS51X_STAT_SS 01, power.usage:%d\n",
atomic_read(&us->pusb_intf->dev.power.usage_count));
}
break;
@@ -807,11 +805,10 @@ static void rts51x_invoke_transport(struct scsi_cmnd *srb, struct us_data *us)
int ret;
if (working_scsi(srb)) {
- usb_stor_dbg(us, "working scsi, intf->pm_usage_cnt:%d, power.usage:%d\n",
- atomic_read(&us->pusb_intf->pm_usage_cnt),
+ usb_stor_dbg(us, "working scsi, power.usage:%d\n",
atomic_read(&us->pusb_intf->dev.power.usage_count));
- if (atomic_read(&us->pusb_intf->pm_usage_cnt) <= 0) {
+ if (atomic_read(&us->pusb_intf->dev.power.usage_count) <= 0) {
ret = usb_autopm_get_interface(us->pusb_intf);
usb_stor_dbg(us, "working scsi, ret=%d\n", ret);
}
diff --git a/drivers/usb/usbip/stub_rx.c b/drivers/usb/usbip/stub_rx.c
index 97b09a42a10c..dbfb2f24d71e 100644
--- a/drivers/usb/usbip/stub_rx.c
+++ b/drivers/usb/usbip/stub_rx.c
@@ -361,16 +361,10 @@ static int get_pipe(struct stub_device *sdev, struct usbip_header *pdu)
}
if (usb_endpoint_xfer_isoc(epd)) {
- /* validate packet size and number of packets */
- unsigned int maxp, packets, bytes;
-
- maxp = usb_endpoint_maxp(epd);
- maxp *= usb_endpoint_maxp_mult(epd);
- bytes = pdu->u.cmd_submit.transfer_buffer_length;
- packets = DIV_ROUND_UP(bytes, maxp);
-
+ /* validate number of packets */
if (pdu->u.cmd_submit.number_of_packets < 0 ||
- pdu->u.cmd_submit.number_of_packets > packets) {
+ pdu->u.cmd_submit.number_of_packets >
+ USBIP_MAX_ISO_PACKETS) {
dev_err(&sdev->udev->dev,
"CMD_SUBMIT: isoc invalid num packets %d\n",
pdu->u.cmd_submit.number_of_packets);
diff --git a/drivers/usb/usbip/usbip_common.h b/drivers/usb/usbip/usbip_common.h
index bf8afe9b5883..8be857a4fa13 100644
--- a/drivers/usb/usbip/usbip_common.h
+++ b/drivers/usb/usbip/usbip_common.h
@@ -121,6 +121,13 @@ extern struct device_attribute dev_attr_usbip_debug;
#define USBIP_DIR_OUT 0x00
#define USBIP_DIR_IN 0x01
+/*
+ * Arbitrary limit for the maximum number of isochronous packets in an URB,
+ * compare for example the uhci_submit_isochronous function in
+ * drivers/usb/host/uhci-q.c
+ */
+#define USBIP_MAX_ISO_PACKETS 1024
+
/**
* struct usbip_header_basic - data pertinent to every request
* @command: the usbip request type
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 5ace833de746..351af88231ad 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -911,8 +911,12 @@ static int vhost_new_umem_range(struct vhost_umem *umem,
u64 start, u64 size, u64 end,
u64 userspace_addr, int perm)
{
- struct vhost_umem_node *tmp, *node = kmalloc(sizeof(*node), GFP_ATOMIC);
+ struct vhost_umem_node *tmp, *node;
+ if (!size)
+ return -EFAULT;
+
+ node = kmalloc(sizeof(*node), GFP_ATOMIC);
if (!node)
return -ENOMEM;
diff --git a/drivers/w1/masters/ds2490.c b/drivers/w1/masters/ds2490.c
index 0f4ecfcdb549..a9fb77585272 100644
--- a/drivers/w1/masters/ds2490.c
+++ b/drivers/w1/masters/ds2490.c
@@ -1016,15 +1016,15 @@ static int ds_probe(struct usb_interface *intf,
/* alternative 3, 1ms interrupt (greatly speeds search), 64 byte bulk */
alt = 3;
err = usb_set_interface(dev->udev,
- intf->altsetting[alt].desc.bInterfaceNumber, alt);
+ intf->cur_altsetting->desc.bInterfaceNumber, alt);
if (err) {
dev_err(&dev->udev->dev, "Failed to set alternative setting %d "
"for %d interface: err=%d.\n", alt,
- intf->altsetting[alt].desc.bInterfaceNumber, err);
+ intf->cur_altsetting->desc.bInterfaceNumber, err);
goto err_out_clear;
}
- iface_desc = &intf->altsetting[alt];
+ iface_desc = intf->cur_altsetting;
if (iface_desc->desc.bNumEndpoints != NUM_EP-1) {
pr_info("Num endpoints=%d. It is not DS9490R.\n",
iface_desc->desc.bNumEndpoints);
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index 1c7955f5cdaf..128f2dbe256a 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -203,8 +203,7 @@ void afs_put_cb_interest(struct afs_net *net, struct afs_cb_interest *cbi)
*/
void afs_init_callback_state(struct afs_server *server)
{
- if (!test_and_clear_bit(AFS_SERVER_FL_NEW, &server->flags))
- server->cb_s_break++;
+ server->cb_s_break++;
}
/*
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 8ee5972893ed..2f8acb4c556d 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -34,7 +34,7 @@ static void SRXAFSCB_TellMeAboutYourself(struct work_struct *);
static int afs_deliver_yfs_cb_callback(struct afs_call *);
#define CM_NAME(name) \
- const char afs_SRXCB##name##_name[] __tracepoint_string = \
+ char afs_SRXCB##name##_name[] __tracepoint_string = \
"CB." #name
/*
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 1a4ce07fb406..9cedc3fc1b77 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -216,9 +216,7 @@ struct inode *afs_iget_pseudo_dir(struct super_block *sb, bool root)
set_nlink(inode, 2);
inode->i_uid = GLOBAL_ROOT_UID;
inode->i_gid = GLOBAL_ROOT_GID;
- inode->i_ctime.tv_sec = get_seconds();
- inode->i_ctime.tv_nsec = 0;
- inode->i_atime = inode->i_mtime = inode->i_ctime;
+ inode->i_ctime = inode->i_atime = inode->i_mtime = current_time(inode);
inode->i_blocks = 0;
inode_set_iversion_raw(inode, 0);
inode->i_generation = 0;
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index bb1f244b2b3a..3904ab0b9563 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -474,7 +474,6 @@ struct afs_server {
time64_t put_time; /* Time at which last put */
time64_t update_at; /* Time at which to next update the record */
unsigned long flags;
-#define AFS_SERVER_FL_NEW 0 /* New server, don't inc cb_s_break */
#define AFS_SERVER_FL_NOT_READY 1 /* The record is not ready for use */
#define AFS_SERVER_FL_NOT_FOUND 2 /* VL server says no such server */
#define AFS_SERVER_FL_VL_FAIL 3 /* Failed to access VL server */
@@ -827,7 +826,7 @@ static inline struct afs_cb_interest *afs_get_cb_interest(struct afs_cb_interest
static inline unsigned int afs_calc_vnode_cb_break(struct afs_vnode *vnode)
{
- return vnode->cb_break + vnode->cb_s_break + vnode->cb_v_break;
+ return vnode->cb_break + vnode->cb_v_break;
}
static inline bool afs_cb_is_broken(unsigned int cb_break,
@@ -835,7 +834,6 @@ static inline bool afs_cb_is_broken(unsigned int cb_break,
const struct afs_cb_interest *cbi)
{
return !cbi || cb_break != (vnode->cb_break +
- cbi->server->cb_s_break +
vnode->volume->cb_v_break);
}
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 2c588f9bbbda..15c7e82d80cb 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -572,13 +572,17 @@ static void afs_deliver_to_call(struct afs_call *call)
case -ENODATA:
case -EBADMSG:
case -EMSGSIZE:
- default:
abort_code = RXGEN_CC_UNMARSHAL;
if (state != AFS_CALL_CL_AWAIT_REPLY)
abort_code = RXGEN_SS_UNMARSHAL;
rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
abort_code, ret, "KUM");
goto local_abort;
+ default:
+ abort_code = RX_USER_ABORT;
+ rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
+ abort_code, ret, "KER");
+ goto local_abort;
}
}
@@ -610,6 +614,7 @@ static long afs_wait_for_call_to_complete(struct afs_call *call,
bool stalled = false;
u64 rtt;
u32 life, last_life;
+ bool rxrpc_complete = false;
DECLARE_WAITQUEUE(myself, current);
@@ -621,7 +626,7 @@ static long afs_wait_for_call_to_complete(struct afs_call *call,
rtt2 = 2;
timeout = rtt2;
- last_life = rxrpc_kernel_check_life(call->net->socket, call->rxcall);
+ rxrpc_kernel_check_life(call->net->socket, call->rxcall, &last_life);
add_wait_queue(&call->waitq, &myself);
for (;;) {
@@ -639,7 +644,12 @@ static long afs_wait_for_call_to_complete(struct afs_call *call,
if (afs_check_call_state(call, AFS_CALL_COMPLETE))
break;
- life = rxrpc_kernel_check_life(call->net->socket, call->rxcall);
+ if (!rxrpc_kernel_check_life(call->net->socket, call->rxcall, &life)) {
+ /* rxrpc terminated the call. */
+ rxrpc_complete = true;
+ break;
+ }
+
if (timeout == 0 &&
life == last_life && signal_pending(current)) {
if (stalled)
@@ -663,12 +673,16 @@ static long afs_wait_for_call_to_complete(struct afs_call *call,
remove_wait_queue(&call->waitq, &myself);
__set_current_state(TASK_RUNNING);
- /* Kill off the call if it's still live. */
if (!afs_check_call_state(call, AFS_CALL_COMPLETE)) {
- _debug("call interrupted");
- if (rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
- RX_USER_ABORT, -EINTR, "KWI"))
- afs_set_call_complete(call, -EINTR, 0);
+ if (rxrpc_complete) {
+ afs_set_call_complete(call, call->error, call->abort_code);
+ } else {
+ /* Kill off the call if it's still live. */
+ _debug("call interrupted");
+ if (rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
+ RX_USER_ABORT, -EINTR, "KWI"))
+ afs_set_call_complete(call, -EINTR, 0);
+ }
}
spin_lock_bh(&call->state_lock);
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 642afa2e9783..65b33b6da48b 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -226,7 +226,6 @@ static struct afs_server *afs_alloc_server(struct afs_net *net,
RCU_INIT_POINTER(server->addresses, alist);
server->addr_version = alist->version;
server->uuid = *uuid;
- server->flags = (1UL << AFS_SERVER_FL_NEW);
server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
rwlock_init(&server->fs_lock);
INIT_HLIST_HEAD(&server->cb_volumes);
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 72efcfcf9f95..0122d7445fba 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -264,6 +264,7 @@ static void afs_kill_pages(struct address_space *mapping,
first = page->index + 1;
lock_page(page);
generic_error_remove_page(mapping, page);
+ unlock_page(page);
}
__pagevec_release(&pv);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 24615c76c1d0..bb28e2ead679 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -264,7 +264,8 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
bio_for_each_segment_all(bvec, &bio, i, iter_all) {
if (should_dirty && !PageCompound(bvec->bv_page))
set_page_dirty_lock(bvec->bv_page);
- put_page(bvec->bv_page);
+ if (!bio_flagged(&bio, BIO_NO_PAGE_REF))
+ put_page(bvec->bv_page);
}
if (unlikely(bio.bi_status))
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 920bf3b4b0ef..cccc75d15970 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -7,6 +7,7 @@
#include <linux/slab.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
+#include <linux/sched/mm.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
@@ -427,9 +428,13 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
unsigned long this_sum_bytes = 0;
int i;
u64 offset;
+ unsigned nofs_flag;
+
+ nofs_flag = memalloc_nofs_save();
+ sums = kvzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size),
+ GFP_KERNEL);
+ memalloc_nofs_restore(nofs_flag);
- sums = kzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size),
- GFP_NOFS);
if (!sums)
return BLK_STS_RESOURCE;
@@ -472,8 +477,10 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
bytes_left = bio->bi_iter.bi_size - total_bytes;
- sums = kzalloc(btrfs_ordered_sum_size(fs_info, bytes_left),
- GFP_NOFS);
+ nofs_flag = memalloc_nofs_save();
+ sums = kvzalloc(btrfs_ordered_sum_size(fs_info,
+ bytes_left), GFP_KERNEL);
+ memalloc_nofs_restore(nofs_flag);
BUG_ON(!sums); /* -ENOMEM */
sums->len = bytes_left;
ordered = btrfs_lookup_ordered_extent(inode,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 82fdda8ff5ab..2973608824ec 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6783,7 +6783,7 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
u64 extent_start = 0;
u64 extent_end = 0;
u64 objectid = btrfs_ino(inode);
- u8 extent_type;
+ int extent_type = -1;
struct btrfs_path *path = NULL;
struct btrfs_root *root = inode->root;
struct btrfs_file_extent_item *item;
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 6fde2b2741ef..45e3cfd1198b 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -6,6 +6,7 @@
#include <linux/slab.h>
#include <linux/blkdev.h>
#include <linux/writeback.h>
+#include <linux/sched/mm.h>
#include "ctree.h"
#include "transaction.h"
#include "btrfs_inode.h"
@@ -442,7 +443,7 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
cur = entry->list.next;
sum = list_entry(cur, struct btrfs_ordered_sum, list);
list_del(&sum->list);
- kfree(sum);
+ kvfree(sum);
}
kmem_cache_free(btrfs_ordered_extent_cache, entry);
}
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index a8f429882249..0637149fb9f9 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1766,6 +1766,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn)
{
struct ceph_inode_info *dci = ceph_inode(dir);
+ unsigned hash;
switch (dci->i_dir_layout.dl_dir_hash) {
case 0: /* for backward compat */
@@ -1773,8 +1774,11 @@ unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn)
return dn->d_name.hash;
default:
- return ceph_str_hash(dci->i_dir_layout.dl_dir_hash,
+ spin_lock(&dn->d_lock);
+ hash = ceph_str_hash(dci->i_dir_layout.dl_dir_hash,
dn->d_name.name, dn->d_name.len);
+ spin_unlock(&dn->d_lock);
+ return hash;
}
}
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 2d61ddda9bf5..c2feb310ac1e 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1163,6 +1163,19 @@ static int splice_dentry(struct dentry **pdn, struct inode *in)
return 0;
}
+static int d_name_cmp(struct dentry *dentry, const char *name, size_t len)
+{
+ int ret;
+
+ /* take d_lock to ensure dentry->d_name stability */
+ spin_lock(&dentry->d_lock);
+ ret = dentry->d_name.len - len;
+ if (!ret)
+ ret = memcmp(dentry->d_name.name, name, len);
+ spin_unlock(&dentry->d_lock);
+ return ret;
+}
+
/*
* Incorporate results into the local cache. This is either just
* one inode, or a directory, dentry, and possibly linked-to inode (e.g.,
@@ -1412,7 +1425,8 @@ retry_lookup:
err = splice_dentry(&req->r_dentry, in);
if (err < 0)
goto done;
- } else if (rinfo->head->is_dentry) {
+ } else if (rinfo->head->is_dentry &&
+ !d_name_cmp(req->r_dentry, rinfo->dname, rinfo->dname_len)) {
struct ceph_vino *ptvino = NULL;
if ((le32_to_cpu(rinfo->diri.in->cap.caps) & CEPH_CAP_FILE_SHARED) ||
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 21c33ed048ed..9049c2a3e972 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1414,6 +1414,15 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
list_add(&ci->i_prealloc_cap_flush->i_list, &to_remove);
ci->i_prealloc_cap_flush = NULL;
}
+
+ if (drop &&
+ ci->i_wrbuffer_ref_head == 0 &&
+ ci->i_wr_ref == 0 &&
+ ci->i_dirty_caps == 0 &&
+ ci->i_flushing_caps == 0) {
+ ceph_put_snap_context(ci->i_head_snapc);
+ ci->i_head_snapc = NULL;
+ }
}
spin_unlock(&ci->i_ceph_lock);
while (!list_empty(&to_remove)) {
@@ -2161,10 +2170,39 @@ retry:
return path;
}
+/* Duplicate the dentry->d_name.name safely */
+static int clone_dentry_name(struct dentry *dentry, const char **ppath,
+ int *ppathlen)
+{
+ u32 len;
+ char *name;
+
+retry:
+ len = READ_ONCE(dentry->d_name.len);
+ name = kmalloc(len + 1, GFP_NOFS);
+ if (!name)
+ return -ENOMEM;
+
+ spin_lock(&dentry->d_lock);
+ if (dentry->d_name.len != len) {
+ spin_unlock(&dentry->d_lock);
+ kfree(name);
+ goto retry;
+ }
+ memcpy(name, dentry->d_name.name, len);
+ spin_unlock(&dentry->d_lock);
+
+ name[len] = '\0';
+ *ppath = name;
+ *ppathlen = len;
+ return 0;
+}
+
static int build_dentry_path(struct dentry *dentry, struct inode *dir,
const char **ppath, int *ppathlen, u64 *pino,
- int *pfreepath)
+ bool *pfreepath, bool parent_locked)
{
+ int ret;
char *path;
rcu_read_lock();
@@ -2173,8 +2211,15 @@ static int build_dentry_path(struct dentry *dentry, struct inode *dir,
if (dir && ceph_snap(dir) == CEPH_NOSNAP) {
*pino = ceph_ino(dir);
rcu_read_unlock();
- *ppath = dentry->d_name.name;
- *ppathlen = dentry->d_name.len;
+ if (parent_locked) {
+ *ppath = dentry->d_name.name;
+ *ppathlen = dentry->d_name.len;
+ } else {
+ ret = clone_dentry_name(dentry, ppath, ppathlen);
+ if (ret)
+ return ret;
+ *pfreepath = true;
+ }
return 0;
}
rcu_read_unlock();
@@ -2182,13 +2227,13 @@ static int build_dentry_path(struct dentry *dentry, struct inode *dir,
if (IS_ERR(path))
return PTR_ERR(path);
*ppath = path;
- *pfreepath = 1;
+ *pfreepath = true;
return 0;
}
static int build_inode_path(struct inode *inode,
const char **ppath, int *ppathlen, u64 *pino,
- int *pfreepath)
+ bool *pfreepath)
{
struct dentry *dentry;
char *path;
@@ -2204,7 +2249,7 @@ static int build_inode_path(struct inode *inode,
if (IS_ERR(path))
return PTR_ERR(path);
*ppath = path;
- *pfreepath = 1;
+ *pfreepath = true;
return 0;
}
@@ -2215,7 +2260,7 @@ static int build_inode_path(struct inode *inode,
static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
struct inode *rdiri, const char *rpath,
u64 rino, const char **ppath, int *pathlen,
- u64 *ino, int *freepath)
+ u64 *ino, bool *freepath, bool parent_locked)
{
int r = 0;
@@ -2225,7 +2270,7 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
ceph_snap(rinode));
} else if (rdentry) {
r = build_dentry_path(rdentry, rdiri, ppath, pathlen, ino,
- freepath);
+ freepath, parent_locked);
dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen,
*ppath);
} else if (rpath || rino) {
@@ -2251,7 +2296,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
const char *path2 = NULL;
u64 ino1 = 0, ino2 = 0;
int pathlen1 = 0, pathlen2 = 0;
- int freepath1 = 0, freepath2 = 0;
+ bool freepath1 = false, freepath2 = false;
int len;
u16 releases;
void *p, *end;
@@ -2259,16 +2304,19 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
ret = set_request_path_attr(req->r_inode, req->r_dentry,
req->r_parent, req->r_path1, req->r_ino1.ino,
- &path1, &pathlen1, &ino1, &freepath1);
+ &path1, &pathlen1, &ino1, &freepath1,
+ test_bit(CEPH_MDS_R_PARENT_LOCKED,
+ &req->r_req_flags));
if (ret < 0) {
msg = ERR_PTR(ret);
goto out;
}
+ /* If r_old_dentry is set, then assume that its parent is locked */
ret = set_request_path_attr(NULL, req->r_old_dentry,
req->r_old_dentry_dir,
req->r_path2, req->r_ino2.ino,
- &path2, &pathlen2, &ino2, &freepath2);
+ &path2, &pathlen2, &ino2, &freepath2, true);
if (ret < 0) {
msg = ERR_PTR(ret);
goto out_free1;
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 89aa37fa0f84..b26e12cd8ec3 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -572,7 +572,12 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
old_snapc = NULL;
update_snapc:
- if (ci->i_head_snapc) {
+ if (ci->i_wrbuffer_ref_head == 0 &&
+ ci->i_wr_ref == 0 &&
+ ci->i_dirty_caps == 0 &&
+ ci->i_flushing_caps == 0) {
+ ci->i_head_snapc = NULL;
+ } else {
ci->i_head_snapc = ceph_get_snap_context(new_snapc);
dout(" new snapc is %p\n", new_snapc);
}
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 5b18d4585740..585ad3207cb1 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1333,6 +1333,7 @@ cifsFileInfo_get_locked(struct cifsFileInfo *cifs_file)
}
struct cifsFileInfo *cifsFileInfo_get(struct cifsFileInfo *cifs_file);
+void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_hdlr);
void cifsFileInfo_put(struct cifsFileInfo *cifs_file);
#define CIFS_CACHE_READ_FLG 1
@@ -1855,6 +1856,7 @@ GLOBAL_EXTERN spinlock_t gidsidlock;
#endif /* CONFIG_CIFS_ACL */
void cifs_oplock_break(struct work_struct *work);
+void cifs_queue_oplock_break(struct cifsFileInfo *cfile);
extern const struct slow_work_ops cifs_oplock_break_ops;
extern struct workqueue_struct *cifsiod_wq;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 89006e044973..7037a137fa53 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -360,13 +360,31 @@ cifsFileInfo_get(struct cifsFileInfo *cifs_file)
return cifs_file;
}
-/*
- * Release a reference on the file private data. This may involve closing
- * the filehandle out on the server. Must be called without holding
- * tcon->open_file_lock and cifs_file->file_info_lock.
+/**
+ * cifsFileInfo_put - release a reference of file priv data
+ *
+ * Always potentially wait for oplock handler. See _cifsFileInfo_put().
*/
void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
{
+ _cifsFileInfo_put(cifs_file, true);
+}
+
+/**
+ * _cifsFileInfo_put - release a reference of file priv data
+ *
+ * This may involve closing the filehandle @cifs_file out on the
+ * server. Must be called without holding tcon->open_file_lock and
+ * cifs_file->file_info_lock.
+ *
+ * If @wait_for_oplock_handler is true and we are releasing the last
+ * reference, wait for any running oplock break handler of the file
+ * and cancel any pending one. If calling this function from the
+ * oplock break handler, you need to pass false.
+ *
+ */
+void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler)
+{
struct inode *inode = d_inode(cifs_file->dentry);
struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
struct TCP_Server_Info *server = tcon->ses->server;
@@ -414,7 +432,8 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
spin_unlock(&tcon->open_file_lock);
- oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
+ oplock_break_cancelled = wait_oplock_handler ?
+ cancel_work_sync(&cifs_file->oplock_break) : false;
if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
struct TCP_Server_Info *server = tcon->ses->server;
@@ -2858,7 +2877,6 @@ static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
struct cifs_tcon *tcon;
struct cifs_sb_info *cifs_sb;
struct dentry *dentry = ctx->cfile->dentry;
- unsigned int i;
int rc;
tcon = tlink_tcon(ctx->cfile->tlink);
@@ -2922,10 +2940,6 @@ restart_loop:
kref_put(&wdata->refcount, cifs_uncached_writedata_release);
}
- if (!ctx->direct_io)
- for (i = 0; i < ctx->npages; i++)
- put_page(ctx->bv[i].bv_page);
-
cifs_stats_bytes_written(tcon, ctx->total_len);
set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
@@ -3563,7 +3577,6 @@ collect_uncached_read_data(struct cifs_aio_ctx *ctx)
struct iov_iter *to = &ctx->iter;
struct cifs_sb_info *cifs_sb;
struct cifs_tcon *tcon;
- unsigned int i;
int rc;
tcon = tlink_tcon(ctx->cfile->tlink);
@@ -3647,15 +3660,8 @@ again:
kref_put(&rdata->refcount, cifs_uncached_readdata_release);
}
- if (!ctx->direct_io) {
- for (i = 0; i < ctx->npages; i++) {
- if (ctx->should_dirty)
- set_page_dirty(ctx->bv[i].bv_page);
- put_page(ctx->bv[i].bv_page);
- }
-
+ if (!ctx->direct_io)
ctx->total_len = ctx->len - iov_iter_count(to);
- }
/* mask nodata case */
if (rc == -ENODATA)
@@ -4603,6 +4609,7 @@ void cifs_oplock_break(struct work_struct *work)
cinode);
cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
}
+ _cifsFileInfo_put(cfile, false /* do not wait for ourself */);
cifs_done_oplock_break(cinode);
}
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 53fdb5df0d2e..538fd7d807e4 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1735,6 +1735,10 @@ cifs_do_rename(const unsigned int xid, struct dentry *from_dentry,
if (rc == 0 || rc != -EBUSY)
goto do_rename_exit;
+ /* Don't fall back to using SMB on SMB 2+ mount */
+ if (server->vals->protocol_id != 0)
+ goto do_rename_exit;
+
/* open-file renames don't work across directories */
if (to_dentry->d_parent != from_dentry->d_parent)
goto do_rename_exit;
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index bee203055b30..0dc6f08020ac 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -501,8 +501,7 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv)
CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
&pCifsInode->flags);
- queue_work(cifsoplockd_wq,
- &netfile->oplock_break);
+ cifs_queue_oplock_break(netfile);
netfile->oplock_break_cancelled = false;
spin_unlock(&tcon->open_file_lock);
@@ -607,6 +606,28 @@ void cifs_put_writer(struct cifsInodeInfo *cinode)
spin_unlock(&cinode->writers_lock);
}
+/**
+ * cifs_queue_oplock_break - queue the oplock break handler for cfile
+ *
+ * This function is called from the demultiplex thread when it
+ * receives an oplock break for @cfile.
+ *
+ * Assumes the tcon->open_file_lock is held.
+ * Assumes cfile->file_info_lock is NOT held.
+ */
+void cifs_queue_oplock_break(struct cifsFileInfo *cfile)
+{
+ /*
+ * Bump the handle refcount now while we hold the
+ * open_file_lock to enforce the validity of it for the oplock
+ * break handler. The matching put is done at the end of the
+ * handler.
+ */
+ cifsFileInfo_get(cfile);
+
+ queue_work(cifsoplockd_wq, &cfile->oplock_break);
+}
+
void cifs_done_oplock_break(struct cifsInodeInfo *cinode)
{
clear_bit(CIFS_INODE_PENDING_OPLOCK_BREAK, &cinode->flags);
@@ -768,6 +789,11 @@ cifs_aio_ctx_alloc(void)
{
struct cifs_aio_ctx *ctx;
+ /*
+ * Must use kzalloc to initialize ctx->bv to NULL and ctx->direct_io
+ * to false so that we know when we have to unreference pages within
+ * cifs_aio_ctx_release()
+ */
ctx = kzalloc(sizeof(struct cifs_aio_ctx), GFP_KERNEL);
if (!ctx)
return NULL;
@@ -786,7 +812,23 @@ cifs_aio_ctx_release(struct kref *refcount)
struct cifs_aio_ctx, refcount);
cifsFileInfo_put(ctx->cfile);
- kvfree(ctx->bv);
+
+ /*
+ * ctx->bv is only set if setup_aio_ctx_iter() was call successfuly
+ * which means that iov_iter_get_pages() was a success and thus that
+ * we have taken reference on pages.
+ */
+ if (ctx->bv) {
+ unsigned i;
+
+ for (i = 0; i < ctx->npages; i++) {
+ if (ctx->should_dirty)
+ set_page_dirty(ctx->bv[i].bv_page);
+ put_page(ctx->bv[i].bv_page);
+ }
+ kvfree(ctx->bv);
+ }
+
kfree(ctx);
}
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 0e3570e40ff8..e311f58dc1c8 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -555,7 +555,7 @@ smb2_tcon_has_lease(struct cifs_tcon *tcon, struct smb2_lease_break *rsp,
clear_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
&cinode->flags);
- queue_work(cifsoplockd_wq, &cfile->oplock_break);
+ cifs_queue_oplock_break(cfile);
kfree(lw);
return true;
}
@@ -712,8 +712,8 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
&cinode->flags);
spin_unlock(&cfile->file_info_lock);
- queue_work(cifsoplockd_wq,
- &cfile->oplock_break);
+
+ cifs_queue_oplock_break(cfile);
spin_unlock(&tcon->open_file_lock);
spin_unlock(&cifs_tcp_ses_lock);
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 00225e699d03..c36ff0d1fe2a 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -2389,6 +2389,8 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, &err_iov,
&resp_buftype);
+ if (!rc)
+ SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
if (!rc || !err_iov.iov_base) {
rc = -ENOENT;
goto free_path;
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 21ad01d55ab2..a37774a55f3a 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -832,8 +832,11 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
} else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) {
/* ops set to 3.0 by default for default so update */
ses->server->ops = &smb21_operations;
- } else if (rsp->DialectRevision == cpu_to_le16(SMB311_PROT_ID))
+ ses->server->vals = &smb21_values;
+ } else if (rsp->DialectRevision == cpu_to_le16(SMB311_PROT_ID)) {
ses->server->ops = &smb311_operations;
+ ses->server->vals = &smb311_values;
+ }
} else if (le16_to_cpu(rsp->DialectRevision) !=
ses->server->vals->protocol_id) {
/* if requested single dialect ensure returned dialect matched */
@@ -3448,8 +3451,6 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms,
rqst.rq_nvec = 1;
rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov);
- cifs_small_buf_release(req);
-
rsp = (struct smb2_read_rsp *)rsp_iov.iov_base;
if (rc) {
@@ -3465,12 +3466,15 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms,
io_parms->tcon->tid, ses->Suid,
io_parms->offset, 0);
free_rsp_buf(resp_buftype, rsp_iov.iov_base);
+ cifs_small_buf_release(req);
return rc == -ENODATA ? 0 : rc;
} else
trace_smb3_read_done(xid, req->PersistentFileId,
io_parms->tcon->tid, ses->Suid,
io_parms->offset, io_parms->length);
+ cifs_small_buf_release(req);
+
*nbytes = le32_to_cpu(rsp->DataLength);
if ((*nbytes > CIFS_MAX_MSGSIZE) ||
(*nbytes > io_parms->length)) {
@@ -3769,7 +3773,6 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms,
rc = cifs_send_recv(xid, io_parms->tcon->ses, &rqst,
&resp_buftype, flags, &rsp_iov);
- cifs_small_buf_release(req);
rsp = (struct smb2_write_rsp *)rsp_iov.iov_base;
if (rc) {
@@ -3787,6 +3790,7 @@ SMB2_write(const unsigned int xid, struct cifs_io_parms *io_parms,
io_parms->offset, *nbytes);
}
+ cifs_small_buf_release(req);
free_rsp_buf(resp_buftype, rsp);
return rc;
}
diff --git a/fs/dax.c b/fs/dax.c
index ca0671d55aa6..e5e54da1715f 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -33,6 +33,7 @@
#include <linux/sizes.h>
#include <linux/mmu_notifier.h>
#include <linux/iomap.h>
+#include <asm/pgalloc.h>
#include "internal.h"
#define CREATE_TRACE_POINTS
@@ -1407,7 +1408,9 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
{
struct address_space *mapping = vmf->vma->vm_file->f_mapping;
unsigned long pmd_addr = vmf->address & PMD_MASK;
+ struct vm_area_struct *vma = vmf->vma;
struct inode *inode = mapping->host;
+ pgtable_t pgtable = NULL;
struct page *zero_page;
spinlock_t *ptl;
pmd_t pmd_entry;
@@ -1422,12 +1425,22 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
*entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn,
DAX_PMD | DAX_ZERO_PAGE, false);
+ if (arch_needs_pgtable_deposit()) {
+ pgtable = pte_alloc_one(vma->vm_mm);
+ if (!pgtable)
+ return VM_FAULT_OOM;
+ }
+
ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
if (!pmd_none(*(vmf->pmd))) {
spin_unlock(ptl);
goto fallback;
}
+ if (pgtable) {
+ pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
+ mm_inc_nr_ptes(vma->vm_mm);
+ }
pmd_entry = mk_pmd(zero_page, vmf->vma->vm_page_prot);
pmd_entry = pmd_mkhuge(pmd_entry);
set_pmd_at(vmf->vma->vm_mm, pmd_addr, vmf->pmd, pmd_entry);
@@ -1436,6 +1449,8 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
return VM_FAULT_NOPAGE;
fallback:
+ if (pgtable)
+ pte_free(vma->vm_mm, pgtable);
trace_dax_pmd_load_hole_fallback(inode, vmf, zero_page, *entry);
return VM_FAULT_FALLBACK;
}
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 8a63e52785e9..9971a35cf1ef 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -2056,10 +2056,8 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
rem += pipe->bufs[(pipe->curbuf + idx) & (pipe->buffers - 1)].len;
ret = -EINVAL;
- if (rem < len) {
- pipe_unlock(pipe);
- goto out;
- }
+ if (rem < len)
+ goto out_free;
rem = len;
while (rem) {
@@ -2077,7 +2075,9 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
pipe->nrbufs--;
} else {
- pipe_buf_get(pipe, ibuf);
+ if (!pipe_buf_get(pipe, ibuf))
+ goto out_free;
+
*obuf = *ibuf;
obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
obuf->len = rem;
@@ -2100,11 +2100,11 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
ret = fuse_dev_do_write(fud, &cs, len);
pipe_lock(pipe);
+out_free:
for (idx = 0; idx < nbuf; idx++)
pipe_buf_release(pipe, &bufs[idx]);
pipe_unlock(pipe);
-out:
kvfree(bufs);
return ret;
}
diff --git a/fs/inode.c b/fs/inode.c
index e9d97add2b36..9a453f3637f8 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1817,8 +1817,13 @@ int file_remove_privs(struct file *file)
int kill;
int error = 0;
- /* Fast path for nothing security related */
- if (IS_NOSEC(inode))
+ /*
+ * Fast path for nothing security related.
+ * As well for non-regular files, e.g. blkdev inodes.
+ * For example, blkdev_write_iter() might get here
+ * trying to remove privs which it is not allowed to.
+ */
+ if (IS_NOSEC(inode) || !S_ISREG(inode->i_mode))
return 0;
kill = dentry_needs_remove_privs(dentry);
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 89aa8412b5f5..84efb8956734 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -4,15 +4,28 @@
* supporting fast/efficient IO.
*
* A note on the read/write ordering memory barriers that are matched between
- * the application and kernel side. When the application reads the CQ ring
- * tail, it must use an appropriate smp_rmb() to order with the smp_wmb()
- * the kernel uses after writing the tail. Failure to do so could cause a
- * delay in when the application notices that completion events available.
- * This isn't a fatal condition. Likewise, the application must use an
- * appropriate smp_wmb() both before writing the SQ tail, and after writing
- * the SQ tail. The first one orders the sqe writes with the tail write, and
- * the latter is paired with the smp_rmb() the kernel will issue before
- * reading the SQ tail on submission.
+ * the application and kernel side.
+ *
+ * After the application reads the CQ ring tail, it must use an
+ * appropriate smp_rmb() to pair with the smp_wmb() the kernel uses
+ * before writing the tail (using smp_load_acquire to read the tail will
+ * do). It also needs a smp_mb() before updating CQ head (ordering the
+ * entry load(s) with the head store), pairing with an implicit barrier
+ * through a control-dependency in io_get_cqring (smp_store_release to
+ * store head will do). Failure to do so could lead to reading invalid
+ * CQ entries.
+ *
+ * Likewise, the application must use an appropriate smp_wmb() before
+ * writing the SQ tail (ordering SQ entry stores with the tail store),
+ * which pairs with smp_load_acquire in io_get_sqring (smp_store_release
+ * to store the tail will do). And it needs a barrier ordering the SQ
+ * head load before writing new SQ entries (smp_load_acquire to read
+ * head will do).
+ *
+ * When using the SQ poll thread (IORING_SETUP_SQPOLL), the application
+ * needs to check the SQ flags for IORING_SQ_NEED_WAKEUP *after*
+ * updating the SQ tail; a full memory barrier smp_mb() is needed
+ * between.
*
* Also see the examples in the liburing library:
*
@@ -70,20 +83,108 @@ struct io_uring {
u32 tail ____cacheline_aligned_in_smp;
};
+/*
+ * This data is shared with the application through the mmap at offset
+ * IORING_OFF_SQ_RING.
+ *
+ * The offsets to the member fields are published through struct
+ * io_sqring_offsets when calling io_uring_setup.
+ */
struct io_sq_ring {
+ /*
+ * Head and tail offsets into the ring; the offsets need to be
+ * masked to get valid indices.
+ *
+ * The kernel controls head and the application controls tail.
+ */
struct io_uring r;
+ /*
+ * Bitmask to apply to head and tail offsets (constant, equals
+ * ring_entries - 1)
+ */
u32 ring_mask;
+ /* Ring size (constant, power of 2) */
u32 ring_entries;
+ /*
+ * Number of invalid entries dropped by the kernel due to
+ * invalid index stored in array
+ *
+ * Written by the kernel, shouldn't be modified by the
+ * application (i.e. get number of "new events" by comparing to
+ * cached value).
+ *
+ * After a new SQ head value was read by the application this
+ * counter includes all submissions that were dropped reaching
+ * the new SQ head (and possibly more).
+ */
u32 dropped;
+ /*
+ * Runtime flags
+ *
+ * Written by the kernel, shouldn't be modified by the
+ * application.
+ *
+ * The application needs a full memory barrier before checking
+ * for IORING_SQ_NEED_WAKEUP after updating the sq tail.
+ */
u32 flags;
+ /*
+ * Ring buffer of indices into array of io_uring_sqe, which is
+ * mmapped by the application using the IORING_OFF_SQES offset.
+ *
+ * This indirection could e.g. be used to assign fixed
+ * io_uring_sqe entries to operations and only submit them to
+ * the queue when needed.
+ *
+ * The kernel modifies neither the indices array nor the entries
+ * array.
+ */
u32 array[];
};
+/*
+ * This data is shared with the application through the mmap at offset
+ * IORING_OFF_CQ_RING.
+ *
+ * The offsets to the member fields are published through struct
+ * io_cqring_offsets when calling io_uring_setup.
+ */
struct io_cq_ring {
+ /*
+ * Head and tail offsets into the ring; the offsets need to be
+ * masked to get valid indices.
+ *
+ * The application controls head and the kernel tail.
+ */
struct io_uring r;
+ /*
+ * Bitmask to apply to head and tail offsets (constant, equals
+ * ring_entries - 1)
+ */
u32 ring_mask;
+ /* Ring size (constant, power of 2) */
u32 ring_entries;
+ /*
+ * Number of completion events lost because the queue was full;
+ * this should be avoided by the application by making sure
+ * there are not more requests pending thatn there is space in
+ * the completion queue.
+ *
+ * Written by the kernel, shouldn't be modified by the
+ * application (i.e. get number of "new events" by comparing to
+ * cached value).
+ *
+ * As completion events come in out of order this counter is not
+ * ordered with any other data.
+ */
u32 overflow;
+ /*
+ * Ring buffer of completion events.
+ *
+ * The kernel writes completion events fresh every time they are
+ * produced, so the application is allowed to modify pending
+ * entries.
+ */
struct io_uring_cqe cqes[];
};
@@ -221,7 +322,7 @@ struct io_kiocb {
struct list_head list;
unsigned int flags;
refcount_t refs;
-#define REQ_F_FORCE_NONBLOCK 1 /* inline submission attempt */
+#define REQ_F_NOWAIT 1 /* must not punt to workers */
#define REQ_F_IOPOLL_COMPLETED 2 /* polled IO has completed */
#define REQ_F_FIXED_FILE 4 /* ctx owns file */
#define REQ_F_SEQ_PREV 8 /* sequential with previous */
@@ -317,12 +418,6 @@ static void io_commit_cqring(struct io_ring_ctx *ctx)
/* order cqe stores with ring update */
smp_store_release(&ring->r.tail, ctx->cached_cq_tail);
- /*
- * Write sider barrier of tail update, app has read side. See
- * comment at the top of this file.
- */
- smp_wmb();
-
if (wq_has_sleeper(&ctx->cq_wait)) {
wake_up_interruptible(&ctx->cq_wait);
kill_fasync(&ctx->cq_fasync, SIGIO, POLL_IN);
@@ -336,9 +431,12 @@ static struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx)
unsigned tail;
tail = ctx->cached_cq_tail;
- /* See comment at the top of the file */
- smp_rmb();
- if (tail + 1 == READ_ONCE(ring->r.head))
+ /*
+ * writes to the cq entry need to come after reading head; the
+ * control dependency is enough as we're using WRITE_ONCE to
+ * fill the cq entry
+ */
+ if (tail - READ_ONCE(ring->r.head) == ring->ring_entries)
return NULL;
ctx->cached_cq_tail++;
@@ -682,11 +780,9 @@ static void io_iopoll_req_issued(struct io_kiocb *req)
list_add_tail(&req->list, &ctx->poll_list);
}
-static void io_file_put(struct io_submit_state *state, struct file *file)
+static void io_file_put(struct io_submit_state *state)
{
- if (!state) {
- fput(file);
- } else if (state->file) {
+ if (state->file) {
int diff = state->has_refs - state->used_refs;
if (diff)
@@ -711,7 +807,7 @@ static struct file *io_file_get(struct io_submit_state *state, int fd)
state->ios_left--;
return state->file;
}
- io_file_put(state, NULL);
+ io_file_put(state);
}
state->file = fget_many(fd, state->ios_left);
if (!state->file)
@@ -742,7 +838,7 @@ static bool io_file_supports_async(struct file *file)
}
static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
- bool force_nonblock, struct io_submit_state *state)
+ bool force_nonblock)
{
const struct io_uring_sqe *sqe = s->sqe;
struct io_ring_ctx *ctx = req->ctx;
@@ -776,10 +872,14 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags));
if (unlikely(ret))
return ret;
- if (force_nonblock) {
+
+ /* don't allow async punt if RWF_NOWAIT was requested */
+ if (kiocb->ki_flags & IOCB_NOWAIT)
+ req->flags |= REQ_F_NOWAIT;
+
+ if (force_nonblock)
kiocb->ki_flags |= IOCB_NOWAIT;
- req->flags |= REQ_F_FORCE_NONBLOCK;
- }
+
if (ctx->flags & IORING_SETUP_IOPOLL) {
if (!(kiocb->ki_flags & IOCB_DIRECT) ||
!kiocb->ki_filp->f_op->iopoll)
@@ -940,7 +1040,7 @@ static void io_async_list_note(int rw, struct io_kiocb *req, size_t len)
}
static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
- bool force_nonblock, struct io_submit_state *state)
+ bool force_nonblock)
{
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct kiocb *kiocb = &req->rw;
@@ -949,7 +1049,7 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
size_t iov_count;
int ret;
- ret = io_prep_rw(req, s, force_nonblock, state);
+ ret = io_prep_rw(req, s, force_nonblock);
if (ret)
return ret;
file = kiocb->ki_filp;
@@ -987,7 +1087,7 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
}
static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
- bool force_nonblock, struct io_submit_state *state)
+ bool force_nonblock)
{
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct kiocb *kiocb = &req->rw;
@@ -996,7 +1096,7 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
size_t iov_count;
int ret;
- ret = io_prep_rw(req, s, force_nonblock, state);
+ ret = io_prep_rw(req, s, force_nonblock);
if (ret)
return ret;
@@ -1338,8 +1438,7 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe)
}
static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
- const struct sqe_submit *s, bool force_nonblock,
- struct io_submit_state *state)
+ const struct sqe_submit *s, bool force_nonblock)
{
int ret, opcode;
@@ -1355,18 +1454,18 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
case IORING_OP_READV:
if (unlikely(s->sqe->buf_index))
return -EINVAL;
- ret = io_read(req, s, force_nonblock, state);
+ ret = io_read(req, s, force_nonblock);
break;
case IORING_OP_WRITEV:
if (unlikely(s->sqe->buf_index))
return -EINVAL;
- ret = io_write(req, s, force_nonblock, state);
+ ret = io_write(req, s, force_nonblock);
break;
case IORING_OP_READ_FIXED:
- ret = io_read(req, s, force_nonblock, state);
+ ret = io_read(req, s, force_nonblock);
break;
case IORING_OP_WRITE_FIXED:
- ret = io_write(req, s, force_nonblock, state);
+ ret = io_write(req, s, force_nonblock);
break;
case IORING_OP_FSYNC:
ret = io_fsync(req, s->sqe, force_nonblock);
@@ -1439,8 +1538,7 @@ restart:
struct sqe_submit *s = &req->submit;
const struct io_uring_sqe *sqe = s->sqe;
- /* Ensure we clear previously set forced non-block flag */
- req->flags &= ~REQ_F_FORCE_NONBLOCK;
+ /* Ensure we clear previously set non-block flag */
req->rw.ki_flags &= ~IOCB_NOWAIT;
ret = 0;
@@ -1459,7 +1557,7 @@ restart:
s->has_user = cur_mm != NULL;
s->needs_lock = true;
do {
- ret = __io_submit_sqe(ctx, req, s, false, NULL);
+ ret = __io_submit_sqe(ctx, req, s, false);
/*
* We can get EAGAIN for polled IO even though
* we're forcing a sync submission from here,
@@ -1470,10 +1568,11 @@ restart:
break;
cond_resched();
} while (1);
-
- /* drop submission reference */
- io_put_req(req);
}
+
+ /* drop submission reference */
+ io_put_req(req);
+
if (ret) {
io_cqring_add_event(ctx, sqe->user_data, ret, 0);
io_put_req(req);
@@ -1625,8 +1724,8 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s,
if (unlikely(ret))
goto out;
- ret = __io_submit_sqe(ctx, req, s, true, state);
- if (ret == -EAGAIN) {
+ ret = __io_submit_sqe(ctx, req, s, true);
+ if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) {
struct io_uring_sqe *sqe_copy;
sqe_copy = kmalloc(sizeof(*sqe_copy), GFP_KERNEL);
@@ -1671,7 +1770,7 @@ out:
static void io_submit_state_end(struct io_submit_state *state)
{
blk_finish_plug(&state->plug);
- io_file_put(state, NULL);
+ io_file_put(state);
if (state->free_reqs)
kmem_cache_free_bulk(req_cachep, state->free_reqs,
&state->reqs[state->cur_req]);
@@ -1700,24 +1799,10 @@ static void io_commit_sqring(struct io_ring_ctx *ctx)
* write new data to them.
*/
smp_store_release(&ring->r.head, ctx->cached_sq_head);
-
- /*
- * write side barrier of head update, app has read side. See
- * comment at the top of this file
- */
- smp_wmb();
}
}
/*
- * Undo last io_get_sqring()
- */
-static void io_drop_sqring(struct io_ring_ctx *ctx)
-{
- ctx->cached_sq_head--;
-}
-
-/*
* Fetch an sqe, if one is available. Note that s->sqe will point to memory
* that is mapped by userspace. This means that care needs to be taken to
* ensure that reads are stable, as we cannot rely on userspace always
@@ -1739,9 +1824,8 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s)
* though the application is the one updating it.
*/
head = ctx->cached_sq_head;
- /* See comment at the top of this file */
- smp_rmb();
- if (head == READ_ONCE(ring->r.tail))
+ /* make sure SQ entry isn't read before tail */
+ if (head == smp_load_acquire(&ring->r.tail))
return false;
head = READ_ONCE(ring->array[head & ctx->sq_mask]);
@@ -1755,8 +1839,6 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s)
/* drop invalid entries */
ctx->cached_sq_head++;
ring->dropped++;
- /* See comment at the top of this file */
- smp_wmb();
return false;
}
@@ -1866,7 +1948,8 @@ static int io_sq_thread(void *data)
/* Tell userspace we may need a wakeup call */
ctx->sq_ring->flags |= IORING_SQ_NEED_WAKEUP;
- smp_wmb();
+ /* make sure to read SQ tail after writing flags */
+ smp_mb();
if (!io_get_sqring(ctx, &sqes[0])) {
if (kthread_should_stop()) {
@@ -1879,13 +1962,11 @@ static int io_sq_thread(void *data)
finish_wait(&ctx->sqo_wait, &wait);
ctx->sq_ring->flags &= ~IORING_SQ_NEED_WAKEUP;
- smp_wmb();
continue;
}
finish_wait(&ctx->sqo_wait, &wait);
ctx->sq_ring->flags &= ~IORING_SQ_NEED_WAKEUP;
- smp_wmb();
}
i = 0;
@@ -1920,13 +2001,17 @@ static int io_sq_thread(void *data)
unuse_mm(cur_mm);
mmput(cur_mm);
}
+
+ if (kthread_should_park())
+ kthread_parkme();
+
return 0;
}
static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
{
struct io_submit_state state, *statep = NULL;
- int i, ret = 0, submit = 0;
+ int i, submit = 0;
if (to_submit > IO_PLUG_THRESHOLD) {
io_submit_state_start(&state, ctx, to_submit);
@@ -1935,6 +2020,7 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
for (i = 0; i < to_submit; i++) {
struct sqe_submit s;
+ int ret;
if (!io_get_sqring(ctx, &s))
break;
@@ -1942,21 +2028,18 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
s.has_user = true;
s.needs_lock = false;
s.needs_fixed_file = false;
+ submit++;
ret = io_submit_sqe(ctx, &s, statep);
- if (ret) {
- io_drop_sqring(ctx);
- break;
- }
-
- submit++;
+ if (ret)
+ io_cqring_add_event(ctx, s.sqe->user_data, ret, 0);
}
io_commit_sqring(ctx);
if (statep)
io_submit_state_end(statep);
- return submit ? submit : ret;
+ return submit;
}
static unsigned io_cqring_events(struct io_cq_ring *ring)
@@ -2054,6 +2137,7 @@ static void io_sq_thread_stop(struct io_ring_ctx *ctx)
if (ctx->sqo_thread) {
ctx->sqo_stop = 1;
mb();
+ kthread_park(ctx->sqo_thread);
kthread_stop(ctx->sqo_thread);
ctx->sqo_thread = NULL;
}
@@ -2236,23 +2320,23 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx,
mmgrab(current->mm);
ctx->sqo_mm = current->mm;
- ctx->sq_thread_idle = msecs_to_jiffies(p->sq_thread_idle);
- if (!ctx->sq_thread_idle)
- ctx->sq_thread_idle = HZ;
-
- ret = -EINVAL;
- if (!cpu_possible(p->sq_thread_cpu))
- goto err;
-
if (ctx->flags & IORING_SETUP_SQPOLL) {
ret = -EPERM;
if (!capable(CAP_SYS_ADMIN))
goto err;
+ ctx->sq_thread_idle = msecs_to_jiffies(p->sq_thread_idle);
+ if (!ctx->sq_thread_idle)
+ ctx->sq_thread_idle = HZ;
+
if (p->flags & IORING_SETUP_SQ_AFF) {
- int cpu;
+ int cpu = array_index_nospec(p->sq_thread_cpu,
+ nr_cpu_ids);
+
+ ret = -EINVAL;
+ if (!cpu_possible(cpu))
+ goto err;
- cpu = array_index_nospec(p->sq_thread_cpu, NR_CPUS);
ctx->sqo_thread = kthread_create_on_cpu(io_sq_thread,
ctx, cpu,
"io_uring-sq");
@@ -2313,8 +2397,12 @@ static int io_account_mem(struct user_struct *user, unsigned long nr_pages)
static void io_mem_free(void *ptr)
{
- struct page *page = virt_to_head_page(ptr);
+ struct page *page;
+
+ if (!ptr)
+ return;
+ page = virt_to_head_page(ptr);
if (put_page_testzero(page))
free_compound_page(page);
}
@@ -2355,7 +2443,7 @@ static int io_sqe_buffer_unregister(struct io_ring_ctx *ctx)
if (ctx->account_mem)
io_unaccount_mem(ctx->user, imu->nr_bvecs);
- kfree(imu->bvec);
+ kvfree(imu->bvec);
imu->nr_bvecs = 0;
}
@@ -2447,9 +2535,9 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg,
if (!pages || nr_pages > got_pages) {
kfree(vmas);
kfree(pages);
- pages = kmalloc_array(nr_pages, sizeof(struct page *),
+ pages = kvmalloc_array(nr_pages, sizeof(struct page *),
GFP_KERNEL);
- vmas = kmalloc_array(nr_pages,
+ vmas = kvmalloc_array(nr_pages,
sizeof(struct vm_area_struct *),
GFP_KERNEL);
if (!pages || !vmas) {
@@ -2461,7 +2549,7 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg,
got_pages = nr_pages;
}
- imu->bvec = kmalloc_array(nr_pages, sizeof(struct bio_vec),
+ imu->bvec = kvmalloc_array(nr_pages, sizeof(struct bio_vec),
GFP_KERNEL);
ret = -ENOMEM;
if (!imu->bvec) {
@@ -2500,6 +2588,7 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg,
}
if (ctx->account_mem)
io_unaccount_mem(ctx->user, nr_pages);
+ kvfree(imu->bvec);
goto err;
}
@@ -2522,12 +2611,12 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg,
ctx->nr_user_bufs++;
}
- kfree(pages);
- kfree(vmas);
+ kvfree(pages);
+ kvfree(vmas);
return 0;
err:
- kfree(pages);
- kfree(vmas);
+ kvfree(pages);
+ kvfree(vmas);
io_sqe_buffer_unregister(ctx);
return ret;
}
@@ -2565,9 +2654,13 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait)
__poll_t mask = 0;
poll_wait(file, &ctx->cq_wait, wait);
- /* See comment at the top of this file */
+ /*
+ * synchronizes with barrier from wq_has_sleeper call in
+ * io_commit_cqring
+ */
smp_rmb();
- if (READ_ONCE(ctx->sq_ring->r.tail) + 1 != ctx->cached_sq_head)
+ if (READ_ONCE(ctx->sq_ring->r.tail) - ctx->cached_sq_head !=
+ ctx->sq_ring->ring_entries)
mask |= EPOLLOUT | EPOLLWRNORM;
if (READ_ONCE(ctx->cq_ring->r.head) != ctx->cached_cq_tail)
mask |= EPOLLIN | EPOLLRDNORM;
@@ -2678,24 +2771,12 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
mutex_lock(&ctx->uring_lock);
submitted = io_ring_submit(ctx, to_submit);
mutex_unlock(&ctx->uring_lock);
-
- if (submitted < 0)
- goto out_ctx;
}
if (flags & IORING_ENTER_GETEVENTS) {
unsigned nr_events = 0;
min_complete = min(min_complete, ctx->cq_entries);
- /*
- * The application could have included the 'to_submit' count
- * in how many events it wanted to wait for. If we failed to
- * submit the desired count, we may need to adjust the number
- * of events to poll/wait for.
- */
- if (submitted < to_submit)
- min_complete = min_t(unsigned, submitted, min_complete);
-
if (ctx->flags & IORING_SETUP_IOPOLL) {
mutex_lock(&ctx->uring_lock);
ret = io_iopoll_check(ctx, &nr_events, min_complete);
@@ -2741,17 +2822,12 @@ static int io_allocate_scq_urings(struct io_ring_ctx *ctx,
return -EOVERFLOW;
ctx->sq_sqes = io_mem_alloc(size);
- if (!ctx->sq_sqes) {
- io_mem_free(ctx->sq_ring);
+ if (!ctx->sq_sqes)
return -ENOMEM;
- }
cq_ring = io_mem_alloc(struct_size(cq_ring, cqes, p->cq_entries));
- if (!cq_ring) {
- io_mem_free(ctx->sq_ring);
- io_mem_free(ctx->sq_sqes);
+ if (!cq_ring)
return -ENOMEM;
- }
ctx->cq_ring = cq_ring;
cq_ring->ring_mask = p->cq_entries - 1;
@@ -2922,11 +2998,31 @@ SYSCALL_DEFINE2(io_uring_setup, u32, entries,
static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
void __user *arg, unsigned nr_args)
+ __releases(ctx->uring_lock)
+ __acquires(ctx->uring_lock)
{
int ret;
+ /*
+ * We're inside the ring mutex, if the ref is already dying, then
+ * someone else killed the ctx or is already going through
+ * io_uring_register().
+ */
+ if (percpu_ref_is_dying(&ctx->refs))
+ return -ENXIO;
+
percpu_ref_kill(&ctx->refs);
+
+ /*
+ * Drop uring mutex before waiting for references to exit. If another
+ * thread is currently inside io_uring_enter() it might need to grab
+ * the uring_lock to make progress. If we hold it here across the drain
+ * wait, then we can deadlock. It's safe to drop the mutex here, since
+ * no new references will come in after we've killed the percpu ref.
+ */
+ mutex_unlock(&ctx->uring_lock);
wait_for_completion(&ctx->ctx_done);
+ mutex_lock(&ctx->uring_lock);
switch (opcode) {
case IORING_REGISTER_BUFFERS:
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 8f933e84cec1..9bc32af4e2da 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -442,7 +442,9 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp)
struct nfsd3_readdirargs *argp = rqstp->rq_argp;
struct nfsd3_readdirres *resp = rqstp->rq_resp;
__be32 nfserr;
- int count;
+ int count = 0;
+ struct page **p;
+ caddr_t page_addr = NULL;
dprintk("nfsd: READDIR(3) %s %d bytes at %d\n",
SVCFH_fmt(&argp->fh),
@@ -462,7 +464,18 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp)
nfserr = nfsd_readdir(rqstp, &resp->fh, (loff_t*) &argp->cookie,
&resp->common, nfs3svc_encode_entry);
memcpy(resp->verf, argp->verf, 8);
- resp->count = resp->buffer - argp->buffer;
+ count = 0;
+ for (p = rqstp->rq_respages + 1; p < rqstp->rq_next_page; p++) {
+ page_addr = page_address(*p);
+
+ if (((caddr_t)resp->buffer >= page_addr) &&
+ ((caddr_t)resp->buffer < page_addr + PAGE_SIZE)) {
+ count += (caddr_t)resp->buffer - page_addr;
+ break;
+ }
+ count += PAGE_SIZE;
+ }
+ resp->count = count >> 2;
if (resp->offset) {
loff_t offset = argp->cookie;
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 93fea246f676..8d789124ed3c 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -573,6 +573,7 @@ int
nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p)
{
struct nfsd3_readdirargs *args = rqstp->rq_argp;
+ int len;
u32 max_blocksize = svc_max_payload(rqstp);
p = decode_fh(p, &args->fh);
@@ -582,8 +583,14 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p)
args->verf = p; p += 2;
args->dircount = ~0;
args->count = ntohl(*p++);
- args->count = min_t(u32, args->count, max_blocksize);
- args->buffer = page_address(*(rqstp->rq_next_page++));
+ len = args->count = min_t(u32, args->count, max_blocksize);
+
+ while (len > 0) {
+ struct page *p = *(rqstp->rq_next_page++);
+ if (!args->buffer)
+ args->buffer = page_address(p);
+ len -= PAGE_SIZE;
+ }
return xdr_argsize_check(rqstp, p);
}
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index d219159b98af..7caa3801ce72 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -1010,8 +1010,9 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
cb->cb_seq_status = 1;
cb->cb_status = 0;
if (minorversion) {
- if (!nfsd41_cb_get_slot(clp, task))
+ if (!cb->cb_holds_slot && !nfsd41_cb_get_slot(clp, task))
return;
+ cb->cb_holds_slot = true;
}
rpc_call_start(task);
}
@@ -1038,6 +1039,9 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
return true;
}
+ if (!cb->cb_holds_slot)
+ goto need_restart;
+
switch (cb->cb_seq_status) {
case 0:
/*
@@ -1076,6 +1080,7 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
cb->cb_seq_status);
}
+ cb->cb_holds_slot = false;
clear_bit(0, &clp->cl_cb_slot_busy);
rpc_wake_up_next(&clp->cl_cb_waitq);
dprintk("%s: freed slot, new seqid=%d\n", __func__,
@@ -1283,6 +1288,7 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
cb->cb_seq_status = 1;
cb->cb_status = 0;
cb->cb_need_restart = false;
+ cb->cb_holds_slot = false;
}
void nfsd4_run_cb(struct nfsd4_callback *cb)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 6a45fb00c5fc..f056b1d3fecd 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -265,6 +265,7 @@ find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
static void
free_blocked_lock(struct nfsd4_blocked_lock *nbl)
{
+ locks_delete_block(&nbl->nbl_lock);
locks_release_private(&nbl->nbl_lock);
kfree(nbl);
}
@@ -293,11 +294,18 @@ remove_blocked_locks(struct nfs4_lockowner *lo)
nbl = list_first_entry(&reaplist, struct nfsd4_blocked_lock,
nbl_lru);
list_del_init(&nbl->nbl_lru);
- locks_delete_block(&nbl->nbl_lock);
free_blocked_lock(nbl);
}
}
+static void
+nfsd4_cb_notify_lock_prepare(struct nfsd4_callback *cb)
+{
+ struct nfsd4_blocked_lock *nbl = container_of(cb,
+ struct nfsd4_blocked_lock, nbl_cb);
+ locks_delete_block(&nbl->nbl_lock);
+}
+
static int
nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task)
{
@@ -325,6 +333,7 @@ nfsd4_cb_notify_lock_release(struct nfsd4_callback *cb)
}
static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = {
+ .prepare = nfsd4_cb_notify_lock_prepare,
.done = nfsd4_cb_notify_lock_done,
.release = nfsd4_cb_notify_lock_release,
};
@@ -4863,7 +4872,6 @@ nfs4_laundromat(struct nfsd_net *nn)
nbl = list_first_entry(&reaplist,
struct nfsd4_blocked_lock, nbl_lru);
list_del_init(&nbl->nbl_lru);
- locks_delete_block(&nbl->nbl_lock);
free_blocked_lock(nbl);
}
out:
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 396c76755b03..9d6cb246c6c5 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -70,6 +70,7 @@ struct nfsd4_callback {
int cb_seq_status;
int cb_status;
bool cb_need_restart;
+ bool cb_holds_slot;
};
struct nfsd4_callback_ops {
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 6b9c27548997..63c6bb1f8c4d 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -346,10 +346,16 @@ static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info)
__kernel_fsid_t fsid = {};
fsnotify_foreach_obj_type(type) {
+ struct fsnotify_mark_connector *conn;
+
if (!fsnotify_iter_should_report_type(iter_info, type))
continue;
- fsid = iter_info->marks[type]->connector->fsid;
+ conn = READ_ONCE(iter_info->marks[type]->connector);
+ /* Mark is just getting destroyed or created? */
+ if (!conn)
+ continue;
+ fsid = conn->fsid;
if (WARN_ON_ONCE(!fsid.val[0] && !fsid.val[1]))
continue;
return fsid;
@@ -408,8 +414,12 @@ static int fanotify_handle_event(struct fsnotify_group *group,
return 0;
}
- if (FAN_GROUP_FLAG(group, FAN_REPORT_FID))
+ if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) {
fsid = fanotify_get_fsid(iter_info);
+ /* Racing with mark destruction or creation? */
+ if (!fsid.val[0] && !fsid.val[1])
+ return 0;
+ }
event = fanotify_alloc_event(group, inode, mask, data, data_type,
&fsid);
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index d593d4269561..22acb0a79b53 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -239,13 +239,13 @@ static void fsnotify_drop_object(unsigned int type, void *objp)
void fsnotify_put_mark(struct fsnotify_mark *mark)
{
- struct fsnotify_mark_connector *conn;
+ struct fsnotify_mark_connector *conn = READ_ONCE(mark->connector);
void *objp = NULL;
unsigned int type = FSNOTIFY_OBJ_TYPE_DETACHED;
bool free_conn = false;
/* Catch marks that were actually never attached to object */
- if (!mark->connector) {
+ if (!conn) {
if (refcount_dec_and_test(&mark->refcnt))
fsnotify_final_mark_destroy(mark);
return;
@@ -255,10 +255,9 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
* We have to be careful so that traversals of obj_list under lock can
* safely grab mark reference.
*/
- if (!refcount_dec_and_lock(&mark->refcnt, &mark->connector->lock))
+ if (!refcount_dec_and_lock(&mark->refcnt, &conn->lock))
return;
- conn = mark->connector;
hlist_del_init_rcu(&mark->obj_list);
if (hlist_empty(&conn->list)) {
objp = fsnotify_detach_connector_from_object(conn, &type);
@@ -266,7 +265,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
} else {
__fsnotify_recalc_mask(conn);
}
- mark->connector = NULL;
+ WRITE_ONCE(mark->connector, NULL);
spin_unlock(&conn->lock);
fsnotify_drop_object(type, objp);
@@ -620,7 +619,7 @@ restart:
/* mark should be the last entry. last is the current last entry */
hlist_add_behind_rcu(&mark->obj_list, &last->obj_list);
added:
- mark->connector = conn;
+ WRITE_ONCE(mark->connector, conn);
out_err:
spin_unlock(&conn->lock);
spin_unlock(&mark->lock);
@@ -808,6 +807,7 @@ void fsnotify_init_mark(struct fsnotify_mark *mark,
refcount_set(&mark->refcnt, 1);
fsnotify_get_group(group);
mark->group = group;
+ WRITE_ONCE(mark->connector, NULL);
}
/*
diff --git a/fs/pipe.c b/fs/pipe.c
index 070aad543382..41065901106b 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -188,9 +188,9 @@ EXPORT_SYMBOL(generic_pipe_buf_steal);
* in the tee() system call, when we duplicate the buffers in one
* pipe into another.
*/
-void generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
+bool generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
{
- get_page(buf->page);
+ return try_get_page(buf->page);
}
EXPORT_SYMBOL(generic_pipe_buf_get);
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index d65390727541..7325baa8f9d4 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -1626,9 +1626,11 @@ static void drop_sysctl_table(struct ctl_table_header *header)
if (--header->nreg)
return;
- if (parent)
+ if (parent) {
put_links(header);
- start_unregistering(header);
+ start_unregistering(header);
+ }
+
if (!--header->count)
kfree_rcu(header, rcu);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 92a91e7816d8..95ca1fe7283c 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1143,6 +1143,24 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
count = -EINTR;
goto out_mm;
}
+ /*
+ * Avoid to modify vma->vm_flags
+ * without locked ops while the
+ * coredump reads the vm_flags.
+ */
+ if (!mmget_still_valid(mm)) {
+ /*
+ * Silently return "count"
+ * like if get_task_mm()
+ * failed. FIXME: should this
+ * function have returned
+ * -ESRCH if get_task_mm()
+ * failed like if
+ * get_proc_task() fails?
+ */
+ up_write(&mm->mmap_sem);
+ goto out_mm;
+ }
for (vma = mm->mmap; vma; vma = vma->vm_next) {
vma->vm_flags &= ~VM_SOFTDIRTY;
vma_set_page_prot(vma);
diff --git a/fs/splice.c b/fs/splice.c
index 3ee7e82df48f..25212dcca2df 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -330,8 +330,8 @@ const struct pipe_buf_operations default_pipe_buf_ops = {
.get = generic_pipe_buf_get,
};
-static int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe,
- struct pipe_buffer *buf)
+int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe,
+ struct pipe_buffer *buf)
{
return 1;
}
@@ -1593,7 +1593,11 @@ retry:
* Get a reference to this pipe buffer,
* so we can copy the contents over.
*/
- pipe_buf_get(ipipe, ibuf);
+ if (!pipe_buf_get(ipipe, ibuf)) {
+ if (ret == 0)
+ ret = -EFAULT;
+ break;
+ }
*obuf = *ibuf;
/*
@@ -1667,7 +1671,11 @@ static int link_pipe(struct pipe_inode_info *ipipe,
* Get a reference to this pipe buffer,
* so we can copy the contents over.
*/
- pipe_buf_get(ipipe, ibuf);
+ if (!pipe_buf_get(ipipe, ibuf)) {
+ if (ret == 0)
+ ret = -EFAULT;
+ break;
+ }
obuf = opipe->bufs + nbuf;
*obuf = *ibuf;
diff --git a/fs/super.c b/fs/super.c
index 583a0124bc39..2739f57515f8 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1467,11 +1467,6 @@ int vfs_get_tree(struct fs_context *fc)
struct super_block *sb;
int error;
- if (fc->fs_type->fs_flags & FS_REQUIRES_DEV && !fc->source) {
- errorf(fc, "Filesystem requires source device");
- return -ENOENT;
- }
-
if (fc->root)
return -EBUSY;
diff --git a/fs/ufs/util.h b/fs/ufs/util.h
index 1fd3011ea623..7fd4802222b8 100644
--- a/fs/ufs/util.h
+++ b/fs/ufs/util.h
@@ -229,7 +229,7 @@ ufs_get_inode_gid(struct super_block *sb, struct ufs_inode *inode)
case UFS_UID_44BSD:
return fs32_to_cpu(sb, inode->ui_u3.ui_44.ui_gid);
case UFS_UID_EFT:
- if (inode->ui_u1.oldids.ui_suid == 0xFFFF)
+ if (inode->ui_u1.oldids.ui_sgid == 0xFFFF)
return fs32_to_cpu(sb, inode->ui_u3.ui_sun.ui_gid);
/* Fall through */
default:
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 89800fc7dc9d..f5de1e726356 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -629,6 +629,8 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
/* the various vma->vm_userfaultfd_ctx still points to it */
down_write(&mm->mmap_sem);
+ /* no task can run (and in turn coredump) yet */
+ VM_WARN_ON(!mmget_still_valid(mm));
for (vma = mm->mmap; vma; vma = vma->vm_next)
if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) {
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
@@ -883,6 +885,8 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
* taking the mmap_sem for writing.
*/
down_write(&mm->mmap_sem);
+ if (!mmget_still_valid(mm))
+ goto skip_mm;
prev = NULL;
for (vma = mm->mmap; vma; vma = vma->vm_next) {
cond_resched();
@@ -905,6 +909,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
vma->vm_flags = new_flags;
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
}
+skip_mm:
up_write(&mm->mmap_sem);
mmput(mm);
wakeup:
@@ -1333,6 +1338,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
goto out;
down_write(&mm->mmap_sem);
+ if (!mmget_still_valid(mm))
+ goto out_unlock;
vma = find_vma_prev(mm, start, &prev);
if (!vma)
goto out_unlock;
@@ -1520,6 +1527,8 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
goto out;
down_write(&mm->mmap_sem);
+ if (!mmget_still_valid(mm))
+ goto out_unlock;
vma = find_vma_prev(mm, start, &prev);
if (!vma)
goto out_unlock;
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 6be86c1c5c58..b9edc7608d90 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -19,9 +19,131 @@
#include <linux/swap.h>
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
#ifdef CONFIG_MMU
+/*
+ * Generic MMU-gather implementation.
+ *
+ * The mmu_gather data structure is used by the mm code to implement the
+ * correct and efficient ordering of freeing pages and TLB invalidations.
+ *
+ * This correct ordering is:
+ *
+ * 1) unhook page
+ * 2) TLB invalidate page
+ * 3) free page
+ *
+ * That is, we must never free a page before we have ensured there are no live
+ * translations left to it. Otherwise it might be possible to observe (or
+ * worse, change) the page content after it has been reused.
+ *
+ * The mmu_gather API consists of:
+ *
+ * - tlb_gather_mmu() / tlb_finish_mmu(); start and finish a mmu_gather
+ *
+ * Finish in particular will issue a (final) TLB invalidate and free
+ * all (remaining) queued pages.
+ *
+ * - tlb_start_vma() / tlb_end_vma(); marks the start / end of a VMA
+ *
+ * Defaults to flushing at tlb_end_vma() to reset the range; helps when
+ * there's large holes between the VMAs.
+ *
+ * - tlb_remove_page() / __tlb_remove_page()
+ * - tlb_remove_page_size() / __tlb_remove_page_size()
+ *
+ * __tlb_remove_page_size() is the basic primitive that queues a page for
+ * freeing. __tlb_remove_page() assumes PAGE_SIZE. Both will return a
+ * boolean indicating if the queue is (now) full and a call to
+ * tlb_flush_mmu() is required.
+ *
+ * tlb_remove_page() and tlb_remove_page_size() imply the call to
+ * tlb_flush_mmu() when required and has no return value.
+ *
+ * - tlb_change_page_size()
+ *
+ * call before __tlb_remove_page*() to set the current page-size; implies a
+ * possible tlb_flush_mmu() call.
+ *
+ * - tlb_flush_mmu() / tlb_flush_mmu_tlbonly()
+ *
+ * tlb_flush_mmu_tlbonly() - does the TLB invalidate (and resets
+ * related state, like the range)
+ *
+ * tlb_flush_mmu() - in addition to the above TLB invalidate, also frees
+ * whatever pages are still batched.
+ *
+ * - mmu_gather::fullmm
+ *
+ * A flag set by tlb_gather_mmu() to indicate we're going to free
+ * the entire mm; this allows a number of optimizations.
+ *
+ * - We can ignore tlb_{start,end}_vma(); because we don't
+ * care about ranges. Everything will be shot down.
+ *
+ * - (RISC) architectures that use ASIDs can cycle to a new ASID
+ * and delay the invalidation until ASID space runs out.
+ *
+ * - mmu_gather::need_flush_all
+ *
+ * A flag that can be set by the arch code if it wants to force
+ * flush the entire TLB irrespective of the range. For instance
+ * x86-PAE needs this when changing top-level entries.
+ *
+ * And allows the architecture to provide and implement tlb_flush():
+ *
+ * tlb_flush() may, in addition to the above mentioned mmu_gather fields, make
+ * use of:
+ *
+ * - mmu_gather::start / mmu_gather::end
+ *
+ * which provides the range that needs to be flushed to cover the pages to
+ * be freed.
+ *
+ * - mmu_gather::freed_tables
+ *
+ * set when we freed page table pages
+ *
+ * - tlb_get_unmap_shift() / tlb_get_unmap_size()
+ *
+ * returns the smallest TLB entry size unmapped in this range.
+ *
+ * If an architecture does not provide tlb_flush() a default implementation
+ * based on flush_tlb_range() will be used, unless MMU_GATHER_NO_RANGE is
+ * specified, in which case we'll default to flush_tlb_mm().
+ *
+ * Additionally there are a few opt-in features:
+ *
+ * HAVE_MMU_GATHER_PAGE_SIZE
+ *
+ * This ensures we call tlb_flush() every time tlb_change_page_size() actually
+ * changes the size and provides mmu_gather::page_size to tlb_flush().
+ *
+ * HAVE_RCU_TABLE_FREE
+ *
+ * This provides tlb_remove_table(), to be used instead of tlb_remove_page()
+ * for page directores (__p*_free_tlb()). This provides separate freeing of
+ * the page-table pages themselves in a semi-RCU fashion (see comment below).
+ * Useful if your architecture doesn't use IPIs for remote TLB invalidates
+ * and therefore doesn't naturally serialize with software page-table walkers.
+ *
+ * When used, an architecture is expected to provide __tlb_remove_table()
+ * which does the actual freeing of these pages.
+ *
+ * HAVE_RCU_TABLE_NO_INVALIDATE
+ *
+ * This makes HAVE_RCU_TABLE_FREE avoid calling tlb_flush_mmu_tlbonly() before
+ * freeing the page-table pages. This can be avoided if you use
+ * HAVE_RCU_TABLE_FREE and your architecture does _NOT_ use the Linux
+ * page-tables natively.
+ *
+ * MMU_GATHER_NO_RANGE
+ *
+ * Use this if your architecture lacks an efficient flush_tlb_range().
+ */
+
#ifdef CONFIG_HAVE_RCU_TABLE_FREE
/*
* Semi RCU freeing of the page directories.
@@ -60,11 +182,11 @@ struct mmu_table_batch {
#define MAX_TABLE_BATCH \
((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *))
-extern void tlb_table_flush(struct mmu_gather *tlb);
extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
#endif
+#ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER
/*
* If we can't allocate a page to make a big batch of page pointers
* to work on, then just handle a few from the on-stack structure.
@@ -89,14 +211,21 @@ struct mmu_gather_batch {
*/
#define MAX_GATHER_BATCH_COUNT (10000UL/MAX_GATHER_BATCH)
-/* struct mmu_gather is an opaque type used by the mm code for passing around
+extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page,
+ int page_size);
+#endif
+
+/*
+ * struct mmu_gather is an opaque type used by the mm code for passing around
* any data needed by arch specific code for tlb_remove_page.
*/
struct mmu_gather {
struct mm_struct *mm;
+
#ifdef CONFIG_HAVE_RCU_TABLE_FREE
struct mmu_table_batch *batch;
#endif
+
unsigned long start;
unsigned long end;
/*
@@ -124,23 +253,30 @@ struct mmu_gather {
unsigned int cleared_puds : 1;
unsigned int cleared_p4ds : 1;
+ /*
+ * tracks VM_EXEC | VM_HUGETLB in tlb_start_vma
+ */
+ unsigned int vma_exec : 1;
+ unsigned int vma_huge : 1;
+
+ unsigned int batch_count;
+
+#ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER
struct mmu_gather_batch *active;
struct mmu_gather_batch local;
struct page *__pages[MMU_GATHER_BUNDLE];
- unsigned int batch_count;
- int page_size;
-};
-#define HAVE_GENERIC_MMU_GATHER
+#ifdef CONFIG_HAVE_MMU_GATHER_PAGE_SIZE
+ unsigned int page_size;
+#endif
+#endif
+};
void arch_tlb_gather_mmu(struct mmu_gather *tlb,
struct mm_struct *mm, unsigned long start, unsigned long end);
void tlb_flush_mmu(struct mmu_gather *tlb);
void arch_tlb_finish_mmu(struct mmu_gather *tlb,
unsigned long start, unsigned long end, bool force);
-void tlb_flush_mmu_free(struct mmu_gather *tlb);
-extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page,
- int page_size);
static inline void __tlb_adjust_range(struct mmu_gather *tlb,
unsigned long address,
@@ -163,8 +299,94 @@ static inline void __tlb_reset_range(struct mmu_gather *tlb)
tlb->cleared_pmds = 0;
tlb->cleared_puds = 0;
tlb->cleared_p4ds = 0;
+ /*
+ * Do not reset mmu_gather::vma_* fields here, we do not
+ * call into tlb_start_vma() again to set them if there is an
+ * intermediate flush.
+ */
+}
+
+#ifdef CONFIG_MMU_GATHER_NO_RANGE
+
+#if defined(tlb_flush) || defined(tlb_start_vma) || defined(tlb_end_vma)
+#error MMU_GATHER_NO_RANGE relies on default tlb_flush(), tlb_start_vma() and tlb_end_vma()
+#endif
+
+/*
+ * When an architecture does not have efficient means of range flushing TLBs
+ * there is no point in doing intermediate flushes on tlb_end_vma() to keep the
+ * range small. We equally don't have to worry about page granularity or other
+ * things.
+ *
+ * All we need to do is issue a full flush for any !0 range.
+ */
+static inline void tlb_flush(struct mmu_gather *tlb)
+{
+ if (tlb->end)
+ flush_tlb_mm(tlb->mm);
+}
+
+static inline void
+tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) { }
+
+#define tlb_end_vma tlb_end_vma
+static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) { }
+
+#else /* CONFIG_MMU_GATHER_NO_RANGE */
+
+#ifndef tlb_flush
+
+#if defined(tlb_start_vma) || defined(tlb_end_vma)
+#error Default tlb_flush() relies on default tlb_start_vma() and tlb_end_vma()
+#endif
+
+/*
+ * When an architecture does not provide its own tlb_flush() implementation
+ * but does have a reasonably efficient flush_vma_range() implementation
+ * use that.
+ */
+static inline void tlb_flush(struct mmu_gather *tlb)
+{
+ if (tlb->fullmm || tlb->need_flush_all) {
+ flush_tlb_mm(tlb->mm);
+ } else if (tlb->end) {
+ struct vm_area_struct vma = {
+ .vm_mm = tlb->mm,
+ .vm_flags = (tlb->vma_exec ? VM_EXEC : 0) |
+ (tlb->vma_huge ? VM_HUGETLB : 0),
+ };
+
+ flush_tlb_range(&vma, tlb->start, tlb->end);
+ }
}
+static inline void
+tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma)
+{
+ /*
+ * flush_tlb_range() implementations that look at VM_HUGETLB (tile,
+ * mips-4k) flush only large pages.
+ *
+ * flush_tlb_range() implementations that flush I-TLB also flush D-TLB
+ * (tile, xtensa, arm), so it's ok to just add VM_EXEC to an existing
+ * range.
+ *
+ * We rely on tlb_end_vma() to issue a flush, such that when we reset
+ * these values the batch is empty.
+ */
+ tlb->vma_huge = !!(vma->vm_flags & VM_HUGETLB);
+ tlb->vma_exec = !!(vma->vm_flags & VM_EXEC);
+}
+
+#else
+
+static inline void
+tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) { }
+
+#endif
+
+#endif /* CONFIG_MMU_GATHER_NO_RANGE */
+
static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
{
if (!tlb->end)
@@ -196,21 +418,18 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
return tlb_remove_page_size(tlb, page, PAGE_SIZE);
}
-#ifndef tlb_remove_check_page_size_change
-#define tlb_remove_check_page_size_change tlb_remove_check_page_size_change
-static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
+static inline void tlb_change_page_size(struct mmu_gather *tlb,
unsigned int page_size)
{
- /*
- * We don't care about page size change, just update
- * mmu_gather page size here so that debug checks
- * doesn't throw false warning.
- */
-#ifdef CONFIG_DEBUG_VM
+#ifdef CONFIG_HAVE_MMU_GATHER_PAGE_SIZE
+ if (tlb->page_size && tlb->page_size != page_size) {
+ if (!tlb->fullmm)
+ tlb_flush_mmu(tlb);
+ }
+
tlb->page_size = page_size;
#endif
}
-#endif
static inline unsigned long tlb_get_unmap_shift(struct mmu_gather *tlb)
{
@@ -237,17 +456,30 @@ static inline unsigned long tlb_get_unmap_size(struct mmu_gather *tlb)
* the vmas are adjusted to only cover the region to be torn down.
*/
#ifndef tlb_start_vma
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#endif
+static inline void tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
+{
+ if (tlb->fullmm)
+ return;
-#define __tlb_end_vma(tlb, vma) \
- do { \
- if (!tlb->fullmm) \
- tlb_flush_mmu_tlbonly(tlb); \
- } while (0)
+ tlb_update_vma_flags(tlb, vma);
+ flush_cache_range(vma, vma->vm_start, vma->vm_end);
+}
+#endif
#ifndef tlb_end_vma
-#define tlb_end_vma __tlb_end_vma
+static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
+{
+ if (tlb->fullmm)
+ return;
+
+ /*
+ * Do a TLB flush and reset the range at VMA boundaries; this avoids
+ * the ranges growing with the unused space between consecutive VMAs,
+ * but also the mmu_gather::vma_* flags from tlb_start_vma() rely on
+ * this.
+ */
+ tlb_flush_mmu_tlbonly(tlb);
+}
#endif
#ifndef __tlb_remove_tlb_entry
@@ -372,6 +604,4 @@ static inline unsigned long tlb_get_unmap_size(struct mmu_gather *tlb)
#endif /* CONFIG_MMU */
-#define tlb_migrate_finish(mm) do {} while (0)
-
#endif /* _ASM_GENERIC__TLB_H */
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index cbf3180cb612..668ad971cd7b 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -420,7 +420,6 @@ extern struct ttm_bo_global {
/**
* Protected by ttm_global_mutex.
*/
- unsigned int use_count;
struct list_head device_list;
/**
diff --git a/include/dt-bindings/clock/sifive-fu540-prci.h b/include/dt-bindings/clock/sifive-fu540-prci.h
new file mode 100644
index 000000000000..6a0b70a37d78
--- /dev/null
+++ b/include/dt-bindings/clock/sifive-fu540-prci.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018-2019 SiFive, Inc.
+ * Wesley Terpstra
+ * Paul Walmsley
+ */
+
+#ifndef __DT_BINDINGS_CLOCK_SIFIVE_FU540_PRCI_H
+#define __DT_BINDINGS_CLOCK_SIFIVE_FU540_PRCI_H
+
+/* Clock indexes for use by Device Tree data and the PRCI driver */
+
+#define PRCI_CLK_COREPLL 0
+#define PRCI_CLK_DDRPLL 1
+#define PRCI_CLK_GEMGXLPLL 2
+#define PRCI_CLK_TLCLK 3
+
+#endif
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 5c58a3b2bf00..317ab30d2904 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -548,7 +548,6 @@ struct request_queue {
struct rcu_head rcu_head;
wait_queue_head_t mq_freeze_wq;
struct percpu_ref q_usage_counter;
- struct list_head all_q_node;
struct blk_mq_tag_set *tag_set;
struct list_head tag_set_list;
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f02367faa58d..944ccc310201 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -510,7 +510,7 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
} \
_out: \
rcu_read_unlock(); \
- preempt_enable_no_resched(); \
+ preempt_enable(); \
_ret; \
})
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index 3bc91879e1e2..ff13cbc1887d 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -160,8 +160,9 @@ static inline void bvec_advance(const struct bio_vec *bvec,
bv->bv_page = nth_page(bv->bv_page, 1);
bv->bv_offset = 0;
} else {
- bv->bv_page = bvec->bv_page;
- bv->bv_offset = bvec->bv_offset;
+ bv->bv_page = bvec_nth_page(bvec->bv_page, bvec->bv_offset /
+ PAGE_SIZE);
+ bv->bv_offset = bvec->bv_offset & ~PAGE_MASK;
}
bv->bv_len = min_t(unsigned int, PAGE_SIZE - bv->bv_offset,
bvec->bv_len - iter_all->done);
diff --git a/include/linux/clk.h b/include/linux/clk.h
index d8bc1a856b39..f689fc58d7be 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -811,6 +811,22 @@ static inline bool clk_has_parent(struct clk *clk, struct clk *parent)
return true;
}
+static inline int clk_set_rate_range(struct clk *clk, unsigned long min,
+ unsigned long max)
+{
+ return 0;
+}
+
+static inline int clk_set_min_rate(struct clk *clk, unsigned long rate)
+{
+ return 0;
+}
+
+static inline int clk_set_max_rate(struct clk *clk, unsigned long rate)
+{
+ return 0;
+}
+
static inline int clk_set_parent(struct clk *clk, struct clk *parent)
{
return 0;
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 445348facea9..d58aa0db05f9 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -67,7 +67,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
.line = __LINE__, \
}; \
______r = !!(cond); \
- ______f.miss_hit[______r]++; \
+ ______r ? ______f.miss_hit[1]++ : ______f.miss_hit[0]++;\
______r; \
}))
#endif /* CONFIG_PROFILE_ALL_BRANCHES */
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 5041357d0297..2d9c6f4b78f5 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -187,4 +187,28 @@ static inline void cpu_smt_disable(bool force) { }
static inline void cpu_smt_check_topology(void) { }
#endif
+/*
+ * These are used for a global "mitigations=" cmdline option for toggling
+ * optional CPU mitigations.
+ */
+enum cpu_mitigations {
+ CPU_MITIGATIONS_OFF,
+ CPU_MITIGATIONS_AUTO,
+ CPU_MITIGATIONS_AUTO_NOSMT,
+};
+
+extern enum cpu_mitigations cpu_mitigations;
+
+/* mitigations=off */
+static inline bool cpu_mitigations_off(void)
+{
+ return cpu_mitigations == CPU_MITIGATIONS_OFF;
+}
+
+/* mitigations=auto,nosmt */
+static inline bool cpu_mitigations_auto_nosmt(void)
+{
+ return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT;
+}
+
#endif /* _LINUX_CPU_H_ */
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 54357a258b35..6ebc2098cfe1 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1611,7 +1611,12 @@ efi_status_t efi_setup_gop(efi_system_table_t *sys_table_arg,
struct screen_info *si, efi_guid_t *proto,
unsigned long size);
-bool efi_runtime_disabled(void);
+#ifdef CONFIG_EFI
+extern bool efi_runtime_disabled(void);
+#else
+static inline bool efi_runtime_disabled(void) { return true; }
+#endif
+
extern void efi_call_virt_check_flags(unsigned long flags, const char *call);
extern unsigned long efi_call_virt_save_flags(void);
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 2e9e2763bf47..6e8bc53740f0 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -31,6 +31,7 @@ struct elevator_mq_ops {
void (*exit_sched)(struct elevator_queue *);
int (*init_hctx)(struct blk_mq_hw_ctx *, unsigned int);
void (*exit_hctx)(struct blk_mq_hw_ctx *, unsigned int);
+ void (*depth_updated)(struct blk_mq_hw_ctx *);
bool (*allow_merge)(struct request_queue *, struct request *, struct bio *);
bool (*bio_merge)(struct blk_mq_hw_ctx *, struct bio *);
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index e2f3b21cd72a..aa8bfd6f738c 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -449,6 +449,18 @@ static inline void eth_addr_dec(u8 *addr)
}
/**
+ * eth_addr_inc() - Increment the given MAC address.
+ * @addr: Pointer to a six-byte array containing Ethernet address to increment.
+ */
+static inline void eth_addr_inc(u8 *addr)
+{
+ u64 u = ether_addr_to_u64(addr);
+
+ u++;
+ u64_to_ether_addr(u, addr);
+}
+
+/**
* is_etherdev_addr - Tell if given Ethernet address belongs to the device.
* @dev: Pointer to a device structure
* @addr: Pointer to a six-byte array containing the Ethernet address
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 201f0f2683f2..9a897256e481 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -173,6 +173,7 @@ struct kretprobe_instance {
struct kretprobe *rp;
kprobe_opcode_t *ret_addr;
struct task_struct *task;
+ void *fp;
char data[0];
};
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 9d55c63db09b..640a03642766 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -28,6 +28,7 @@
#include <linux/irqbypass.h>
#include <linux/swait.h>
#include <linux/refcount.h>
+#include <linux/nospec.h>
#include <asm/signal.h>
#include <linux/kvm.h>
@@ -513,10 +514,10 @@ static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx)
static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
{
- /* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu, in case
- * the caller has read kvm->online_vcpus before (as is the case
- * for kvm_for_each_vcpu, for example).
- */
+ int num_vcpus = atomic_read(&kvm->online_vcpus);
+ i = array_index_nospec(i, num_vcpus);
+
+ /* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu. */
smp_rmb();
return kvm->vcpus[i];
}
@@ -600,6 +601,7 @@ void kvm_put_kvm(struct kvm *kvm);
static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id)
{
+ as_id = array_index_nospec(as_id, KVM_ADDRESS_SPACE_NUM);
return srcu_dereference_check(kvm->memslots[as_id], &kvm->srcu,
lockdep_is_held(&kvm->slots_lock) ||
!refcount_read(&kvm->users_count));
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 76769749b5a5..6b10c21630f5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -966,6 +966,10 @@ static inline bool is_pci_p2pdma_page(const struct page *page)
}
#endif /* CONFIG_DEV_PAGEMAP_OPS */
+/* 127: arbitrary random number, small enough to assemble well */
+#define page_ref_zero_or_close_to_overflow(page) \
+ ((unsigned int) page_ref_count(page) + 127u <= 127u)
+
static inline void get_page(struct page *page)
{
page = compound_head(page);
@@ -973,8 +977,17 @@ static inline void get_page(struct page *page)
* Getting a normal page or the head of a compound page
* requires to already have an elevated page->_refcount.
*/
- VM_BUG_ON_PAGE(page_ref_count(page) <= 0, page);
+ VM_BUG_ON_PAGE(page_ref_zero_or_close_to_overflow(page), page);
+ page_ref_inc(page);
+}
+
+static inline __must_check bool try_get_page(struct page *page)
+{
+ page = compound_head(page);
+ if (WARN_ON_ONCE(page_ref_count(page) <= 0))
+ return false;
page_ref_inc(page);
+ return true;
}
static inline void put_page(struct page *page)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 26f69cf763f4..324e872c91d1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1500,6 +1500,7 @@ struct net_device_ops {
* @IFF_FAILOVER: device is a failover master device
* @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device
* @IFF_L3MDEV_RX_HANDLER: only invoke the rx handler of L3 master device
+ * @IFF_LIVE_RENAME_OK: rename is allowed while device is up and running
*/
enum netdev_priv_flags {
IFF_802_1Q_VLAN = 1<<0,
@@ -1532,6 +1533,7 @@ enum netdev_priv_flags {
IFF_FAILOVER = 1<<27,
IFF_FAILOVER_SLAVE = 1<<28,
IFF_L3MDEV_RX_HANDLER = 1<<29,
+ IFF_LIVE_RENAME_OK = 1<<30,
};
#define IFF_802_1Q_VLAN IFF_802_1Q_VLAN
@@ -1563,6 +1565,7 @@ enum netdev_priv_flags {
#define IFF_FAILOVER IFF_FAILOVER
#define IFF_FAILOVER_SLAVE IFF_FAILOVER_SLAVE
#define IFF_L3MDEV_RX_HANDLER IFF_L3MDEV_RX_HANDLER
+#define IFF_LIVE_RENAME_OK IFF_LIVE_RENAME_OK
/**
* struct net_device - The DEVICE structure.
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index e47ef764f613..1f678f023850 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -240,7 +240,6 @@ struct perf_event;
#define PERF_PMU_CAP_NO_INTERRUPT 0x01
#define PERF_PMU_CAP_NO_NMI 0x02
#define PERF_PMU_CAP_AUX_NO_SG 0x04
-#define PERF_PMU_CAP_AUX_SW_DOUBLEBUF 0x08
#define PERF_PMU_CAP_EXCLUSIVE 0x10
#define PERF_PMU_CAP_ITRACE 0x20
#define PERF_PMU_CAP_HETEROGENEOUS_CPUS 0x40
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 787d224ff43e..5c626fdc10db 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -101,18 +101,20 @@ struct pipe_buf_operations {
/*
* Get a reference to the pipe buffer.
*/
- void (*get)(struct pipe_inode_info *, struct pipe_buffer *);
+ bool (*get)(struct pipe_inode_info *, struct pipe_buffer *);
};
/**
* pipe_buf_get - get a reference to a pipe_buffer
* @pipe: the pipe that the buffer belongs to
* @buf: the buffer to get a reference to
+ *
+ * Return: %true if the reference was successfully obtained.
*/
-static inline void pipe_buf_get(struct pipe_inode_info *pipe,
+static inline __must_check bool pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- buf->ops->get(pipe, buf);
+ return buf->ops->get(pipe, buf);
}
/**
@@ -171,9 +173,10 @@ struct pipe_inode_info *alloc_pipe_info(void);
void free_pipe_info(struct pipe_inode_info *);
/* Generic pipe buffer ops functions */
-void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *);
+bool generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *);
int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *);
int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *);
+int generic_pipe_buf_nosteal(struct pipe_inode_info *, struct pipe_buffer *);
void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *);
void pipe_buf_mark_unmergeable(struct pipe_buffer *buf);
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 6cdb1db776cf..922bb6848813 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -878,9 +878,11 @@ static inline void rcu_head_init(struct rcu_head *rhp)
static inline bool
rcu_head_after_call_rcu(struct rcu_head *rhp, rcu_callback_t f)
{
- if (READ_ONCE(rhp->func) == f)
+ rcu_callback_t func = READ_ONCE(rhp->func);
+
+ if (func == f)
return true;
- WARN_ON_ONCE(READ_ONCE(rhp->func) != (rcu_callback_t)~0L);
+ WARN_ON_ONCE(func != (rcu_callback_t)~0L);
return false;
}
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1549584a1538..50606a6e73d6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1057,7 +1057,6 @@ struct task_struct {
#ifdef CONFIG_RSEQ
struct rseq __user *rseq;
- u32 rseq_len;
u32 rseq_sig;
/*
* RmW on rseq_event_mask must be performed atomically
@@ -1855,12 +1854,10 @@ static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
{
if (clone_flags & CLONE_THREAD) {
t->rseq = NULL;
- t->rseq_len = 0;
t->rseq_sig = 0;
t->rseq_event_mask = 0;
} else {
t->rseq = current->rseq;
- t->rseq_len = current->rseq_len;
t->rseq_sig = current->rseq_sig;
t->rseq_event_mask = current->rseq_event_mask;
}
@@ -1869,7 +1866,6 @@ static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
static inline void rseq_execve(struct task_struct *t)
{
t->rseq = NULL;
- t->rseq_len = 0;
t->rseq_sig = 0;
t->rseq_event_mask = 0;
}
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 0cd9f10423fb..a3fda9f024c3 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -49,6 +49,27 @@ static inline void mmdrop(struct mm_struct *mm)
__mmdrop(mm);
}
+/*
+ * This has to be called after a get_task_mm()/mmget_not_zero()
+ * followed by taking the mmap_sem for writing before modifying the
+ * vmas or anything the coredump pretends not to change from under it.
+ *
+ * NOTE: find_extend_vma() called from GUP context is the only place
+ * that can modify the "mm" (notably the vm_start/end) under mmap_sem
+ * for reading and outside the context of the process, so it is also
+ * the only case that holds the mmap_sem for reading that must call
+ * this function. Generally if the mmap_sem is hold for reading
+ * there's no need of this check after get_task_mm()/mmget_not_zero().
+ *
+ * This function can be obsoleted and the check can be removed, after
+ * the coredump code will hold the mmap_sem for writing before
+ * invoking the ->core_dump methods.
+ */
+static inline bool mmget_still_valid(struct mm_struct *mm)
+{
+ return likely(!mm->core_state);
+}
+
/**
* mmget() - Pin the address space associated with a &struct mm_struct.
* @mm: The address space to pin.
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index f3fb1edb3526..20d815a33145 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -21,6 +21,7 @@ struct shmem_inode_info {
struct list_head swaplist; /* chain of maybes on swap */
struct shared_policy policy; /* NUMA memory alloc policy */
struct simple_xattrs xattrs; /* list of xattrs */
+ atomic_t stop_eviction; /* hold when working on inode */
struct inode vfs_inode;
};
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index c495b2d51569..e432cc92c73d 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -56,45 +56,11 @@ struct srcu_struct { };
void call_srcu(struct srcu_struct *ssp, struct rcu_head *head,
void (*func)(struct rcu_head *head));
-void _cleanup_srcu_struct(struct srcu_struct *ssp, bool quiesced);
+void cleanup_srcu_struct(struct srcu_struct *ssp);
int __srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp);
void __srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp);
void synchronize_srcu(struct srcu_struct *ssp);
-/**
- * cleanup_srcu_struct - deconstruct a sleep-RCU structure
- * @ssp: structure to clean up.
- *
- * Must invoke this after you are finished using a given srcu_struct that
- * was initialized via init_srcu_struct(), else you leak memory.
- */
-static inline void cleanup_srcu_struct(struct srcu_struct *ssp)
-{
- _cleanup_srcu_struct(ssp, false);
-}
-
-/**
- * cleanup_srcu_struct_quiesced - deconstruct a quiesced sleep-RCU structure
- * @ssp: structure to clean up.
- *
- * Must invoke this after you are finished using a given srcu_struct that
- * was initialized via init_srcu_struct(), else you leak memory. Also,
- * all grace-period processing must have completed.
- *
- * "Completed" means that the last synchronize_srcu() and
- * synchronize_srcu_expedited() calls must have returned before the call
- * to cleanup_srcu_struct_quiesced(). It also means that the callback
- * from the last call_srcu() must have been invoked before the call to
- * cleanup_srcu_struct_quiesced(), but you can use srcu_barrier() to help
- * with this last. Violating these rules will get you a WARN_ON() splat
- * (with high probability, anyway), and will also cause the srcu_struct
- * to be leaked.
- */
-static inline void cleanup_srcu_struct_quiesced(struct srcu_struct *ssp)
-{
- _cleanup_srcu_struct(ssp, true);
-}
-
#ifdef CONFIG_DEBUG_LOCK_ALLOC
/**
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 37b226e8df13..2b70130af585 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -268,6 +268,8 @@ extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count);
#define user_access_end() do { } while (0)
#define unsafe_get_user(x, ptr, err) do { if (unlikely(__get_user(x, ptr))) goto err; } while (0)
#define unsafe_put_user(x, ptr, err) do { if (unlikely(__put_user(x, ptr))) goto err; } while (0)
+static inline unsigned long user_access_save(void) { return 0UL; }
+static inline void user_access_restore(unsigned long flags) { }
#endif
#ifdef CONFIG_HARDENED_USERCOPY
diff --git a/include/linux/uio.h b/include/linux/uio.h
index f184af1999a8..2d0131ad4604 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -60,7 +60,7 @@ struct iov_iter {
static inline enum iter_type iov_iter_type(const struct iov_iter *i)
{
- return i->type & ~(READ | WRITE);
+ return i->type & ~(READ | WRITE | ITER_BVEC_FLAG_NO_REF);
}
static inline bool iter_is_iovec(const struct iov_iter *i)
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 5e49e82c4368..ff010d1fd1c7 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -200,7 +200,6 @@ usb_find_last_int_out_endpoint(struct usb_host_interface *alt,
* @dev: driver model's view of this device
* @usb_dev: if an interface is bound to the USB major, this will point
* to the sysfs representation for that device.
- * @pm_usage_cnt: PM usage counter for this interface
* @reset_ws: Used for scheduling resets from atomic context.
* @resetting_device: USB core reset the device, so use alt setting 0 as
* current; needs bandwidth alloc after reset.
@@ -257,7 +256,6 @@ struct usb_interface {
struct device dev; /* interface specific device info */
struct device *usb_dev;
- atomic_t pm_usage_cnt; /* usage counter for autosuspend */
struct work_struct reset_ws; /* for resets in atomic context */
};
#define to_usb_interface(d) container_of(d, struct usb_interface, dev)
diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index 2bfb87eb98ce..78c856cba4f5 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -61,10 +61,12 @@ int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t,
rxrpc_user_attach_call_t, unsigned long, gfp_t,
unsigned int);
void rxrpc_kernel_set_tx_length(struct socket *, struct rxrpc_call *, s64);
-u32 rxrpc_kernel_check_life(const struct socket *, const struct rxrpc_call *);
+bool rxrpc_kernel_check_life(const struct socket *, const struct rxrpc_call *,
+ u32 *);
void rxrpc_kernel_probe_life(struct socket *, struct rxrpc_call *);
u32 rxrpc_kernel_get_epoch(struct socket *, struct rxrpc_call *);
bool rxrpc_kernel_get_reply_time(struct socket *, struct rxrpc_call *,
ktime_t *);
+bool rxrpc_kernel_call_is_complete(struct rxrpc_call *);
#endif /* _NET_RXRPC_H */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index bb307a11ee63..13bfeb712d36 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -7183,6 +7183,11 @@ void cfg80211_pmsr_complete(struct wireless_dev *wdev,
#define wiphy_info(wiphy, format, args...) \
dev_info(&(wiphy)->dev, format, ##args)
+#define wiphy_err_ratelimited(wiphy, format, args...) \
+ dev_err_ratelimited(&(wiphy)->dev, format, ##args)
+#define wiphy_warn_ratelimited(wiphy, format, args...) \
+ dev_warn_ratelimited(&(wiphy)->dev, format, ##args)
+
#define wiphy_debug(wiphy, format, args...) \
wiphy_printk(KERN_DEBUG, wiphy, format, ##args)
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index ac2ed8ec662b..112dc18c658f 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -6231,8 +6231,6 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
* @hw: pointer as obtained from ieee80211_alloc_hw()
* @ac: AC number to return packets from.
*
- * Should only be called between calls to ieee80211_txq_schedule_start()
- * and ieee80211_txq_schedule_end().
* Returns the next txq if successful, %NULL if no queue is eligible. If a txq
* is returned, it should be returned with ieee80211_return_txq() after the
* driver has finished scheduling it.
@@ -6240,51 +6238,58 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac);
/**
- * ieee80211_return_txq - return a TXQ previously acquired by ieee80211_next_txq()
- *
- * @hw: pointer as obtained from ieee80211_alloc_hw()
- * @txq: pointer obtained from station or virtual interface
- *
- * Should only be called between calls to ieee80211_txq_schedule_start()
- * and ieee80211_txq_schedule_end().
- */
-void ieee80211_return_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq);
-
-/**
- * ieee80211_txq_schedule_start - acquire locks for safe scheduling of an AC
+ * ieee80211_txq_schedule_start - start new scheduling round for TXQs
*
* @hw: pointer as obtained from ieee80211_alloc_hw()
* @ac: AC number to acquire locks for
*
- * Acquire locks needed to schedule TXQs from the given AC. Should be called
- * before ieee80211_next_txq() or ieee80211_return_txq().
+ * Should be called before ieee80211_next_txq() or ieee80211_return_txq().
+ * The driver must not call multiple TXQ scheduling rounds concurrently.
*/
-void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac)
- __acquires(txq_lock);
+void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac);
+
+/* (deprecated) */
+static inline void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
+{
+}
+
+void __ieee80211_schedule_txq(struct ieee80211_hw *hw,
+ struct ieee80211_txq *txq, bool force);
/**
- * ieee80211_txq_schedule_end - release locks for safe scheduling of an AC
+ * ieee80211_schedule_txq - schedule a TXQ for transmission
*
* @hw: pointer as obtained from ieee80211_alloc_hw()
- * @ac: AC number to acquire locks for
+ * @txq: pointer obtained from station or virtual interface
*
- * Release locks previously acquired by ieee80211_txq_schedule_end().
+ * Schedules a TXQ for transmission if it is not already scheduled,
+ * even if mac80211 does not have any packets buffered.
+ *
+ * The driver may call this function if it has buffered packets for
+ * this TXQ internally.
*/
-void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
- __releases(txq_lock);
+static inline void
+ieee80211_schedule_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq)
+{
+ __ieee80211_schedule_txq(hw, txq, true);
+}
/**
- * ieee80211_schedule_txq - schedule a TXQ for transmission
+ * ieee80211_return_txq - return a TXQ previously acquired by ieee80211_next_txq()
*
* @hw: pointer as obtained from ieee80211_alloc_hw()
* @txq: pointer obtained from station or virtual interface
+ * @force: schedule txq even if mac80211 does not have any buffered packets.
*
- * Schedules a TXQ for transmission if it is not already scheduled. Takes a
- * lock, which means it must *not* be called between
- * ieee80211_txq_schedule_start() and ieee80211_txq_schedule_end()
+ * The driver may set force=true if it has buffered packets for this TXQ
+ * internally.
*/
-void ieee80211_schedule_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq)
- __acquires(txq_lock) __releases(txq_lock);
+static inline void
+ieee80211_return_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq,
+ bool force)
+{
+ __ieee80211_schedule_txq(hw, txq, force);
+}
/**
* ieee80211_txq_may_transmit - check whether TXQ is allowed to transmit
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 5ee7b30b4917..d2bc733a2ef1 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -316,6 +316,8 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
gfp_t flags);
void nf_ct_tmpl_free(struct nf_conn *tmpl);
+u32 nf_ct_get_id(const struct nf_conn *ct);
+
static inline void
nf_ct_set(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info info)
{
diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 778087591983..a49edfdf47e8 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -75,6 +75,12 @@ bool nf_conntrack_invert_icmp_tuple(struct nf_conntrack_tuple *tuple,
bool nf_conntrack_invert_icmpv6_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple *orig);
+int nf_conntrack_inet_error(struct nf_conn *tmpl, struct sk_buff *skb,
+ unsigned int dataoff,
+ const struct nf_hook_state *state,
+ u8 l4proto,
+ union nf_inet_addr *outer_daddr);
+
int nf_conntrack_icmpv4_error(struct nf_conn *tmpl,
struct sk_buff *skb,
unsigned int dataoff,
diff --git a/include/net/netrom.h b/include/net/netrom.h
index 5a0714ff500f..80f15b1c1a48 100644
--- a/include/net/netrom.h
+++ b/include/net/netrom.h
@@ -266,7 +266,7 @@ void nr_stop_idletimer(struct sock *);
int nr_t1timer_running(struct sock *);
/* sysctl_net_netrom.c */
-void nr_register_sysctl(void);
+int nr_register_sysctl(void);
void nr_unregister_sysctl(void);
#endif
diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h
index 6640f84fe536..6d5beac29bc1 100644
--- a/include/net/sctp/command.h
+++ b/include/net/sctp/command.h
@@ -105,7 +105,6 @@ enum sctp_verb {
SCTP_CMD_T1_RETRAN, /* Mark for retransmission after T1 timeout */
SCTP_CMD_UPDATE_INITTAG, /* Update peer inittag */
SCTP_CMD_SEND_MSG, /* Send the whole use message */
- SCTP_CMD_SEND_NEXT_ASCONF, /* Send the next ASCONF after ACK */
SCTP_CMD_PURGE_ASCONF_QUEUE, /* Purge all asconf queues.*/
SCTP_CMD_SET_ASOC, /* Restore association context */
SCTP_CMD_LAST
diff --git a/include/net/sock.h b/include/net/sock.h
index 8de5ee258b93..341f8bafa0cf 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2084,12 +2084,6 @@ static inline bool skwq_has_sleeper(struct socket_wq *wq)
* @p: poll_table
*
* See the comments in the wq_has_sleeper function.
- *
- * Do not derive sock from filp->private_data here. An SMC socket establishes
- * an internal TCP socket that is used in the fallback case. All socket
- * operations on the SMC socket are then forwarded to the TCP socket. In case of
- * poll, the filp->private_data pointer references the SMC socket because the
- * TCP socket has no file assigned.
*/
static inline void sock_poll_wait(struct file *filp, struct socket *sock,
poll_table *p)
diff --git a/include/net/tls.h b/include/net/tls.h
index a5a938583295..5934246b2c6f 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -307,6 +307,7 @@ int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
int tls_device_sendpage(struct sock *sk, struct page *page,
int offset, size_t size, int flags);
void tls_device_sk_destruct(struct sock *sk);
+void tls_device_free_resources_tx(struct sock *sk);
void tls_device_init(void);
void tls_device_cleanup(void);
int tls_tx_records(struct sock *sk, int flags);
@@ -330,6 +331,7 @@ int tls_push_sg(struct sock *sk, struct tls_context *ctx,
int flags);
int tls_push_partial_record(struct sock *sk, struct tls_context *ctx,
int flags);
+bool tls_free_partial_record(struct sock *sk, struct tls_context *ctx);
static inline struct tls_msg *tls_msg(struct sk_buff *skb)
{
@@ -379,7 +381,7 @@ tls_validate_xmit_skb(struct sock *sk, struct net_device *dev,
static inline bool tls_is_sk_tx_device_offloaded(struct sock *sk)
{
#ifdef CONFIG_SOCK_VALIDATE_XMIT
- return sk_fullsock(sk) &
+ return sk_fullsock(sk) &&
(smp_load_acquire(&sk->sk_validate_xmit_skb) ==
&tls_validate_xmit_skb);
#else
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 85386becbaea..c9b0b2b5d672 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -295,7 +295,8 @@ struct xfrm_replay {
};
struct xfrm_if_cb {
- struct xfrm_if *(*decode_session)(struct sk_buff *skb);
+ struct xfrm_if *(*decode_session)(struct sk_buff *skb,
+ unsigned short family);
};
void xfrm_if_register_cb(const struct xfrm_if_cb *ifcb);
@@ -1404,6 +1405,23 @@ static inline int xfrm_state_kern(const struct xfrm_state *x)
return atomic_read(&x->tunnel_users);
}
+static inline bool xfrm_id_proto_valid(u8 proto)
+{
+ switch (proto) {
+ case IPPROTO_AH:
+ case IPPROTO_ESP:
+ case IPPROTO_COMP:
+#if IS_ENABLED(CONFIG_IPV6)
+ case IPPROTO_ROUTING:
+ case IPPROTO_DSTOPTS:
+#endif
+ return true;
+ default:
+ return false;
+ }
+}
+
+/* IPSEC_PROTO_ANY only matches 3 IPsec protocols, 0 could match all. */
static inline int xfrm_id_proto_match(u8 proto, u8 userproto)
{
return (!userproto || proto == userproto ||
diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h
index 7f14d4a66c28..64cee116928e 100644
--- a/include/uapi/linux/input-event-codes.h
+++ b/include/uapi/linux/input-event-codes.h
@@ -439,10 +439,12 @@
#define KEY_TITLE 0x171
#define KEY_SUBTITLE 0x172
#define KEY_ANGLE 0x173
-#define KEY_ZOOM 0x174
+#define KEY_FULL_SCREEN 0x174 /* AC View Toggle */
+#define KEY_ZOOM KEY_FULL_SCREEN
#define KEY_MODE 0x175
#define KEY_KEYBOARD 0x176
-#define KEY_SCREEN 0x177
+#define KEY_ASPECT_RATIO 0x177 /* HUTRR37: Aspect */
+#define KEY_SCREEN KEY_ASPECT_RATIO
#define KEY_PC 0x178 /* Media Select Computer */
#define KEY_TV 0x179 /* Media Select TV */
#define KEY_TV2 0x17a /* Media Select Cable */
diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h
index 87b3198f4b5d..f4d4010b7e3e 100644
--- a/include/uapi/rdma/mlx5-abi.h
+++ b/include/uapi/rdma/mlx5-abi.h
@@ -238,6 +238,7 @@ enum mlx5_ib_query_dev_resp_flags {
MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_COMP = 1 << 0,
MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD = 1 << 1,
MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE = 1 << 2,
+ MLX5_IB_QUERY_DEV_RESP_FLAGS_SCAT2CQE_DCT = 1 << 3,
};
enum mlx5_ib_tunnel_offloads {
diff --git a/init/main.c b/init/main.c
index 598e278b46f7..7d4025d665eb 100644
--- a/init/main.c
+++ b/init/main.c
@@ -582,6 +582,8 @@ asmlinkage __visible void __init start_kernel(void)
page_alloc_init();
pr_notice("Kernel command line: %s\n", boot_command_line);
+ /* parameters may set static keys */
+ jump_label_init();
parse_early_param();
after_dashes = parse_args("Booting kernel",
static_command_line, __start___param,
@@ -591,8 +593,6 @@ asmlinkage __visible void __init start_kernel(void)
parse_args("Setting init args", after_dashes, NULL, 0, -1, -1,
NULL, set_init_arg);
- jump_label_init();
-
/*
* These use large bootmem allocations and must precede
* kmem_cache_init()
diff --git a/kernel/Makefile b/kernel/Makefile
index 6c57e78817da..62471e75a2b0 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -30,6 +30,7 @@ KCOV_INSTRUMENT_extable.o := n
# Don't self-instrument.
KCOV_INSTRUMENT_kcov.o := n
KASAN_SANITIZE_kcov.o := n
+CFLAGS_kcov.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector)
# cond_syscall is currently not LTO compatible
CFLAGS_sys_ni.o = $(DISABLE_LTO)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 6c5a41f7f338..09d5d972c9ff 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4138,15 +4138,35 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
return 0;
}
+static void __find_good_pkt_pointers(struct bpf_func_state *state,
+ struct bpf_reg_state *dst_reg,
+ enum bpf_reg_type type, u16 new_range)
+{
+ struct bpf_reg_state *reg;
+ int i;
+
+ for (i = 0; i < MAX_BPF_REG; i++) {
+ reg = &state->regs[i];
+ if (reg->type == type && reg->id == dst_reg->id)
+ /* keep the maximum range already checked */
+ reg->range = max(reg->range, new_range);
+ }
+
+ bpf_for_each_spilled_reg(i, state, reg) {
+ if (!reg)
+ continue;
+ if (reg->type == type && reg->id == dst_reg->id)
+ reg->range = max(reg->range, new_range);
+ }
+}
+
static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
struct bpf_reg_state *dst_reg,
enum bpf_reg_type type,
bool range_right_open)
{
- struct bpf_func_state *state = vstate->frame[vstate->curframe];
- struct bpf_reg_state *regs = state->regs, *reg;
u16 new_range;
- int i, j;
+ int i;
if (dst_reg->off < 0 ||
(dst_reg->off == 0 && range_right_open))
@@ -4211,20 +4231,9 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
* the range won't allow anything.
* dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
*/
- for (i = 0; i < MAX_BPF_REG; i++)
- if (regs[i].type == type && regs[i].id == dst_reg->id)
- /* keep the maximum range already checked */
- regs[i].range = max(regs[i].range, new_range);
-
- for (j = 0; j <= vstate->curframe; j++) {
- state = vstate->frame[j];
- bpf_for_each_spilled_reg(i, state, reg) {
- if (!reg)
- continue;
- if (reg->type == type && reg->id == dst_reg->id)
- reg->range = max(reg->range, new_range);
- }
- }
+ for (i = 0; i <= vstate->curframe; i++)
+ __find_good_pkt_pointers(vstate->frame[i], dst_reg, type,
+ new_range);
}
/* compute branch direction of the expression "if (reg opcode val) goto target;"
@@ -4698,6 +4707,22 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
}
}
+static void __mark_ptr_or_null_regs(struct bpf_func_state *state, u32 id,
+ bool is_null)
+{
+ struct bpf_reg_state *reg;
+ int i;
+
+ for (i = 0; i < MAX_BPF_REG; i++)
+ mark_ptr_or_null_reg(state, &state->regs[i], id, is_null);
+
+ bpf_for_each_spilled_reg(i, state, reg) {
+ if (!reg)
+ continue;
+ mark_ptr_or_null_reg(state, reg, id, is_null);
+ }
+}
+
/* The logic is similar to find_good_pkt_pointers(), both could eventually
* be folded together at some point.
*/
@@ -4705,10 +4730,10 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
bool is_null)
{
struct bpf_func_state *state = vstate->frame[vstate->curframe];
- struct bpf_reg_state *reg, *regs = state->regs;
+ struct bpf_reg_state *regs = state->regs;
u32 ref_obj_id = regs[regno].ref_obj_id;
u32 id = regs[regno].id;
- int i, j;
+ int i;
if (ref_obj_id && ref_obj_id == id && is_null)
/* regs[regno] is in the " == NULL" branch.
@@ -4717,17 +4742,8 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
*/
WARN_ON_ONCE(release_reference_state(state, id));
- for (i = 0; i < MAX_BPF_REG; i++)
- mark_ptr_or_null_reg(state, &regs[i], id, is_null);
-
- for (j = 0; j <= vstate->curframe; j++) {
- state = vstate->frame[j];
- bpf_for_each_spilled_reg(i, state, reg) {
- if (!reg)
- continue;
- mark_ptr_or_null_reg(state, reg, id, is_null);
- }
- }
+ for (i = 0; i <= vstate->curframe; i++)
+ __mark_ptr_or_null_regs(vstate->frame[i], id, is_null);
}
static bool try_match_pkt_pointers(const struct bpf_insn *insn,
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 6754f3ecfd94..43e741e88691 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -2304,3 +2304,18 @@ void __init boot_cpu_hotplug_init(void)
#endif
this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);
}
+
+enum cpu_mitigations cpu_mitigations __ro_after_init = CPU_MITIGATIONS_AUTO;
+
+static int __init mitigations_parse_cmdline(char *arg)
+{
+ if (!strcmp(arg, "off"))
+ cpu_mitigations = CPU_MITIGATIONS_OFF;
+ else if (!strcmp(arg, "auto"))
+ cpu_mitigations = CPU_MITIGATIONS_AUTO;
+ else if (!strcmp(arg, "auto,nosmt"))
+ cpu_mitigations = CPU_MITIGATIONS_AUTO_NOSMT;
+
+ return 0;
+}
+early_param("mitigations", mitigations_parse_cmdline);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 534e01e7bc36..dc7dead2d2cc 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -9077,26 +9077,29 @@ static void perf_event_addr_filters_apply(struct perf_event *event)
if (task == TASK_TOMBSTONE)
return;
- if (!ifh->nr_file_filters)
- return;
-
- mm = get_task_mm(event->ctx->task);
- if (!mm)
- goto restart;
+ if (ifh->nr_file_filters) {
+ mm = get_task_mm(event->ctx->task);
+ if (!mm)
+ goto restart;
- down_read(&mm->mmap_sem);
+ down_read(&mm->mmap_sem);
+ }
raw_spin_lock_irqsave(&ifh->lock, flags);
list_for_each_entry(filter, &ifh->list, entry) {
- event->addr_filter_ranges[count].start = 0;
- event->addr_filter_ranges[count].size = 0;
+ if (filter->path.dentry) {
+ /*
+ * Adjust base offset if the filter is associated to a
+ * binary that needs to be mapped:
+ */
+ event->addr_filter_ranges[count].start = 0;
+ event->addr_filter_ranges[count].size = 0;
- /*
- * Adjust base offset if the filter is associated to a binary
- * that needs to be mapped:
- */
- if (filter->path.dentry)
perf_addr_filter_apply(filter, mm, &event->addr_filter_ranges[count]);
+ } else {
+ event->addr_filter_ranges[count].start = filter->offset;
+ event->addr_filter_ranges[count].size = filter->size;
+ }
count++;
}
@@ -9104,9 +9107,11 @@ static void perf_event_addr_filters_apply(struct perf_event *event)
event->addr_filters_gen++;
raw_spin_unlock_irqrestore(&ifh->lock, flags);
- up_read(&mm->mmap_sem);
+ if (ifh->nr_file_filters) {
+ up_read(&mm->mmap_sem);
- mmput(mm);
+ mmput(mm);
+ }
restart:
perf_event_stop(event, 1);
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 2545ac08cc77..674b35383491 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -455,24 +455,21 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
rb->aux_head += size;
}
- if (size || handle->aux_flags) {
- /*
- * Only send RECORD_AUX if we have something useful to communicate
- *
- * Note: the OVERWRITE records by themselves are not considered
- * useful, as they don't communicate any *new* information,
- * aside from the short-lived offset, that becomes history at
- * the next event sched-in and therefore isn't useful.
- * The userspace that needs to copy out AUX data in overwrite
- * mode should know to use user_page::aux_head for the actual
- * offset. So, from now on we don't output AUX records that
- * have *only* OVERWRITE flag set.
- */
-
- if (handle->aux_flags & ~(u64)PERF_AUX_FLAG_OVERWRITE)
- perf_event_aux_event(handle->event, aux_head, size,
- handle->aux_flags);
- }
+ /*
+ * Only send RECORD_AUX if we have something useful to communicate
+ *
+ * Note: the OVERWRITE records by themselves are not considered
+ * useful, as they don't communicate any *new* information,
+ * aside from the short-lived offset, that becomes history at
+ * the next event sched-in and therefore isn't useful.
+ * The userspace that needs to copy out AUX data in overwrite
+ * mode should know to use user_page::aux_head for the actual
+ * offset. So, from now on we don't output AUX records that
+ * have *only* OVERWRITE flag set.
+ */
+ if (size || (handle->aux_flags & ~(u64)PERF_AUX_FLAG_OVERWRITE))
+ perf_event_aux_event(handle->event, aux_head, size,
+ handle->aux_flags);
rb->user_page->aux_head = rb->aux_head;
if (rb_need_aux_wakeup(rb))
@@ -613,8 +610,7 @@ int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
* PMU requests more than one contiguous chunks of memory
* for SW double buffering
*/
- if ((event->pmu->capabilities & PERF_PMU_CAP_AUX_SW_DOUBLEBUF) &&
- !overwrite) {
+ if (!overwrite) {
if (!max_order)
return -EINVAL;
diff --git a/kernel/iomem.c b/kernel/iomem.c
index f7525e14ebc6..93c264444510 100644
--- a/kernel/iomem.c
+++ b/kernel/iomem.c
@@ -55,7 +55,7 @@ static void *try_ram_remap(resource_size_t offset, size_t size,
*
* MEMREMAP_WB - matches the default mapping for System RAM on
* the architecture. This is usually a read-allocate write-back cache.
- * Morever, if MEMREMAP_WB is specified and the requested remap region is RAM
+ * Moreover, if MEMREMAP_WB is specified and the requested remap region is RAM
* memremap() will bypass establishing a new mapping and instead return
* a pointer into the direct map.
*
@@ -86,7 +86,7 @@ void *memremap(resource_size_t offset, size_t size, unsigned long flags)
/* Try all mapping types requested until one returns non-NULL */
if (flags & MEMREMAP_WB) {
/*
- * MEMREMAP_WB is special in that it can be satisifed
+ * MEMREMAP_WB is special in that it can be satisfied
* from the direct map. Some archs depend on the
* capability of memremap() to autodetect cases where
* the requested range is potentially in System RAM.
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index c83e54727131..b1ea30a5540e 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -709,7 +709,6 @@ static void unoptimize_kprobe(struct kprobe *p, bool force)
static int reuse_unused_kprobe(struct kprobe *ap)
{
struct optimized_kprobe *op;
- int ret;
/*
* Unused kprobe MUST be on the way of delayed unoptimizing (means
@@ -720,9 +719,8 @@ static int reuse_unused_kprobe(struct kprobe *ap)
/* Enable the probe again */
ap->flags &= ~KPROBE_FLAG_DISABLED;
/* Optimize it again (remove from op->list) */
- ret = kprobe_optready(ap);
- if (ret)
- return ret;
+ if (!kprobe_optready(ap))
+ return -EINVAL;
optimize_kprobe(ap);
return 0;
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 45bcaf2e4cb6..91c6b89f04df 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -4896,8 +4896,9 @@ void lockdep_unregister_key(struct lock_class_key *key)
return;
raw_local_irq_save(flags);
- arch_spin_lock(&lockdep_lock);
- current->lockdep_recursion = 1;
+ if (!graph_lock())
+ goto out_irq;
+
pf = get_pending_free();
hlist_for_each_entry_rcu(k, hash_head, hash_entry) {
if (k == key) {
@@ -4909,8 +4910,8 @@ void lockdep_unregister_key(struct lock_class_key *key)
WARN_ON_ONCE(!found);
__lockdep_free_key_range(pf, key, 1);
call_rcu_zapped(pf);
- current->lockdep_recursion = 0;
- arch_spin_unlock(&lockdep_lock);
+ graph_unlock();
+out_irq:
raw_local_irq_restore(flags);
/* Wait until is_dynamic_key() has finished accessing k->hash_entry. */
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index ad40a2617063..80a463d31a8d 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -829,7 +829,9 @@ static void lock_torture_cleanup(void)
"End of test: SUCCESS");
kfree(cxt.lwsa);
+ cxt.lwsa = NULL;
kfree(cxt.lrsa);
+ cxt.lrsa = NULL;
end:
torture_cleanup_end();
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index acee72c0b24b..4b58c907b4b7 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -233,6 +233,7 @@ static inline bool __rcu_reclaim(const char *rn, struct rcu_head *head)
#ifdef CONFIG_RCU_STALL_COMMON
extern int rcu_cpu_stall_suppress;
+extern int rcu_cpu_stall_timeout;
int rcu_jiffies_till_stall_check(void);
#define rcu_ftrace_dump_stall_suppress() \
diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c
index c29761152874..7a6890b23c5f 100644
--- a/kernel/rcu/rcuperf.c
+++ b/kernel/rcu/rcuperf.c
@@ -494,6 +494,10 @@ rcu_perf_cleanup(void)
if (torture_cleanup_begin())
return;
+ if (!cur_ops) {
+ torture_cleanup_end();
+ return;
+ }
if (reader_tasks) {
for (i = 0; i < nrealreaders; i++)
@@ -614,6 +618,7 @@ rcu_perf_init(void)
pr_cont("\n");
WARN_ON(!IS_MODULE(CONFIG_RCU_PERF_TEST));
firsterr = -EINVAL;
+ cur_ops = NULL;
goto unwind;
}
if (cur_ops->init)
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index f14d1b18a74f..efaa5b3f4d3f 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -299,7 +299,6 @@ struct rcu_torture_ops {
int irq_capable;
int can_boost;
int extendables;
- int ext_irq_conflict;
const char *name;
};
@@ -592,12 +591,7 @@ static void srcu_torture_init(void)
static void srcu_torture_cleanup(void)
{
- static DEFINE_TORTURE_RANDOM(rand);
-
- if (torture_random(&rand) & 0x800)
- cleanup_srcu_struct(&srcu_ctld);
- else
- cleanup_srcu_struct_quiesced(&srcu_ctld);
+ cleanup_srcu_struct(&srcu_ctld);
srcu_ctlp = &srcu_ctl; /* In case of a later rcutorture run. */
}
@@ -1160,7 +1154,7 @@ rcutorture_extend_mask(int oldmask, struct torture_random_state *trsp)
unsigned long randmask2 = randmask1 >> 3;
WARN_ON_ONCE(mask >> RCUTORTURE_RDR_SHIFT);
- /* Most of the time lots of bits, half the time only one bit. */
+ /* Mostly only one bit (need preemption!), sometimes lots of bits. */
if (!(randmask1 & 0x7))
mask = mask & randmask2;
else
@@ -1170,10 +1164,6 @@ rcutorture_extend_mask(int oldmask, struct torture_random_state *trsp)
((!(mask & RCUTORTURE_RDR_BH) && (oldmask & RCUTORTURE_RDR_BH)) ||
(!(mask & RCUTORTURE_RDR_RBH) && (oldmask & RCUTORTURE_RDR_RBH))))
mask |= RCUTORTURE_RDR_BH | RCUTORTURE_RDR_RBH;
- if ((mask & RCUTORTURE_RDR_IRQ) &&
- !(mask & cur_ops->ext_irq_conflict) &&
- (oldmask & cur_ops->ext_irq_conflict))
- mask |= cur_ops->ext_irq_conflict; /* Or if readers object. */
return mask ?: RCUTORTURE_RDR_RCU;
}
@@ -1848,7 +1838,7 @@ static int rcutorture_oom_notify(struct notifier_block *self,
WARN(1, "%s invoked upon OOM during forward-progress testing.\n",
__func__);
rcu_torture_fwd_cb_hist();
- rcu_fwd_progress_check(1 + (jiffies - READ_ONCE(rcu_fwd_startat) / 2));
+ rcu_fwd_progress_check(1 + (jiffies - READ_ONCE(rcu_fwd_startat)) / 2);
WRITE_ONCE(rcu_fwd_emergency_stop, true);
smp_mb(); /* Emergency stop before free and wait to avoid hangs. */
pr_info("%s: Freed %lu RCU callbacks.\n",
@@ -2094,6 +2084,10 @@ rcu_torture_cleanup(void)
cur_ops->cb_barrier();
return;
}
+ if (!cur_ops) {
+ torture_cleanup_end();
+ return;
+ }
rcu_torture_barrier_cleanup();
torture_stop_kthread(rcu_torture_fwd_prog, fwd_prog_task);
@@ -2267,6 +2261,7 @@ rcu_torture_init(void)
pr_cont("\n");
WARN_ON(!IS_MODULE(CONFIG_RCU_TORTURE_TEST));
firsterr = -EINVAL;
+ cur_ops = NULL;
goto unwind;
}
if (cur_ops->fqs == NULL && fqs_duration != 0) {
diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c
index 5d4a39a6505a..44d6606b8325 100644
--- a/kernel/rcu/srcutiny.c
+++ b/kernel/rcu/srcutiny.c
@@ -76,19 +76,16 @@ EXPORT_SYMBOL_GPL(init_srcu_struct);
* Must invoke this after you are finished using a given srcu_struct that
* was initialized via init_srcu_struct(), else you leak memory.
*/
-void _cleanup_srcu_struct(struct srcu_struct *ssp, bool quiesced)
+void cleanup_srcu_struct(struct srcu_struct *ssp)
{
WARN_ON(ssp->srcu_lock_nesting[0] || ssp->srcu_lock_nesting[1]);
- if (quiesced)
- WARN_ON(work_pending(&ssp->srcu_work));
- else
- flush_work(&ssp->srcu_work);
+ flush_work(&ssp->srcu_work);
WARN_ON(ssp->srcu_gp_running);
WARN_ON(ssp->srcu_gp_waiting);
WARN_ON(ssp->srcu_cb_head);
WARN_ON(&ssp->srcu_cb_head != ssp->srcu_cb_tail);
}
-EXPORT_SYMBOL_GPL(_cleanup_srcu_struct);
+EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
/*
* Removes the count for the old reader from the appropriate element of
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index a60b8ba9e1ac..9b761e546de8 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -360,8 +360,14 @@ static unsigned long srcu_get_delay(struct srcu_struct *ssp)
return SRCU_INTERVAL;
}
-/* Helper for cleanup_srcu_struct() and cleanup_srcu_struct_quiesced(). */
-void _cleanup_srcu_struct(struct srcu_struct *ssp, bool quiesced)
+/**
+ * cleanup_srcu_struct - deconstruct a sleep-RCU structure
+ * @ssp: structure to clean up.
+ *
+ * Must invoke this after you are finished using a given srcu_struct that
+ * was initialized via init_srcu_struct(), else you leak memory.
+ */
+void cleanup_srcu_struct(struct srcu_struct *ssp)
{
int cpu;
@@ -369,24 +375,14 @@ void _cleanup_srcu_struct(struct srcu_struct *ssp, bool quiesced)
return; /* Just leak it! */
if (WARN_ON(srcu_readers_active(ssp)))
return; /* Just leak it! */
- if (quiesced) {
- if (WARN_ON(delayed_work_pending(&ssp->work)))
- return; /* Just leak it! */
- } else {
- flush_delayed_work(&ssp->work);
- }
+ flush_delayed_work(&ssp->work);
for_each_possible_cpu(cpu) {
struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
- if (quiesced) {
- if (WARN_ON(timer_pending(&sdp->delay_work)))
- return; /* Just leak it! */
- if (WARN_ON(work_pending(&sdp->work)))
- return; /* Just leak it! */
- } else {
- del_timer_sync(&sdp->delay_work);
- flush_work(&sdp->work);
- }
+ del_timer_sync(&sdp->delay_work);
+ flush_work(&sdp->work);
+ if (WARN_ON(rcu_segcblist_n_cbs(&sdp->srcu_cblist)))
+ return; /* Forgot srcu_barrier(), so just leak it! */
}
if (WARN_ON(rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) != SRCU_STATE_IDLE) ||
WARN_ON(srcu_readers_active(ssp))) {
@@ -397,7 +393,7 @@ void _cleanup_srcu_struct(struct srcu_struct *ssp, bool quiesced)
free_percpu(ssp->sda);
ssp->sda = NULL;
}
-EXPORT_SYMBOL_GPL(_cleanup_srcu_struct);
+EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
/*
* Counts the new reader in the appropriate per-CPU element of the
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index 911bd9076d43..477b4eb44af5 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -52,7 +52,7 @@ void rcu_qs(void)
local_irq_save(flags);
if (rcu_ctrlblk.donetail != rcu_ctrlblk.curtail) {
rcu_ctrlblk.donetail = rcu_ctrlblk.curtail;
- raise_softirq(RCU_SOFTIRQ);
+ raise_softirq_irqoff(RCU_SOFTIRQ);
}
local_irq_restore(flags);
}
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index acd6ccf56faf..ec77ec336f58 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -102,11 +102,6 @@ int rcu_num_lvls __read_mostly = RCU_NUM_LVLS;
/* Number of rcu_nodes at specified level. */
int num_rcu_lvl[] = NUM_RCU_LVL_INIT;
int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */
-/* panic() on RCU Stall sysctl. */
-int sysctl_panic_on_rcu_stall __read_mostly;
-/* Commandeer a sysrq key to dump RCU's tree. */
-static bool sysrq_rcu;
-module_param(sysrq_rcu, bool, 0444);
/*
* The rcu_scheduler_active variable is initialized to the value
@@ -149,7 +144,7 @@ static void sync_sched_exp_online_cleanup(int cpu);
/* rcuc/rcub kthread realtime priority */
static int kthread_prio = IS_ENABLED(CONFIG_RCU_BOOST) ? 1 : 0;
-module_param(kthread_prio, int, 0644);
+module_param(kthread_prio, int, 0444);
/* Delay in jiffies for grace-period initialization delays, debug only. */
@@ -406,7 +401,7 @@ static bool rcu_kick_kthreads;
*/
static ulong jiffies_till_sched_qs = ULONG_MAX;
module_param(jiffies_till_sched_qs, ulong, 0444);
-static ulong jiffies_to_sched_qs; /* Adjusted version of above if not default */
+static ulong jiffies_to_sched_qs; /* See adjust_jiffies_till_sched_qs(). */
module_param(jiffies_to_sched_qs, ulong, 0444); /* Display only! */
/*
@@ -424,6 +419,7 @@ static void adjust_jiffies_till_sched_qs(void)
WRITE_ONCE(jiffies_to_sched_qs, jiffies_till_sched_qs);
return;
}
+ /* Otherwise, set to third fqs scan, but bound below on large system. */
j = READ_ONCE(jiffies_till_first_fqs) +
2 * READ_ONCE(jiffies_till_next_fqs);
if (j < HZ / 10 + nr_cpu_ids / RCU_JIFFIES_FQS_DIV)
@@ -513,74 +509,6 @@ static const char *gp_state_getname(short gs)
}
/*
- * Show the state of the grace-period kthreads.
- */
-void show_rcu_gp_kthreads(void)
-{
- int cpu;
- unsigned long j;
- unsigned long ja;
- unsigned long jr;
- unsigned long jw;
- struct rcu_data *rdp;
- struct rcu_node *rnp;
-
- j = jiffies;
- ja = j - READ_ONCE(rcu_state.gp_activity);
- jr = j - READ_ONCE(rcu_state.gp_req_activity);
- jw = j - READ_ONCE(rcu_state.gp_wake_time);
- pr_info("%s: wait state: %s(%d) ->state: %#lx delta ->gp_activity %lu ->gp_req_activity %lu ->gp_wake_time %lu ->gp_wake_seq %ld ->gp_seq %ld ->gp_seq_needed %ld ->gp_flags %#x\n",
- rcu_state.name, gp_state_getname(rcu_state.gp_state),
- rcu_state.gp_state,
- rcu_state.gp_kthread ? rcu_state.gp_kthread->state : 0x1ffffL,
- ja, jr, jw, (long)READ_ONCE(rcu_state.gp_wake_seq),
- (long)READ_ONCE(rcu_state.gp_seq),
- (long)READ_ONCE(rcu_get_root()->gp_seq_needed),
- READ_ONCE(rcu_state.gp_flags));
- rcu_for_each_node_breadth_first(rnp) {
- if (ULONG_CMP_GE(rcu_state.gp_seq, rnp->gp_seq_needed))
- continue;
- pr_info("\trcu_node %d:%d ->gp_seq %ld ->gp_seq_needed %ld\n",
- rnp->grplo, rnp->grphi, (long)rnp->gp_seq,
- (long)rnp->gp_seq_needed);
- if (!rcu_is_leaf_node(rnp))
- continue;
- for_each_leaf_node_possible_cpu(rnp, cpu) {
- rdp = per_cpu_ptr(&rcu_data, cpu);
- if (rdp->gpwrap ||
- ULONG_CMP_GE(rcu_state.gp_seq,
- rdp->gp_seq_needed))
- continue;
- pr_info("\tcpu %d ->gp_seq_needed %ld\n",
- cpu, (long)rdp->gp_seq_needed);
- }
- }
- /* sched_show_task(rcu_state.gp_kthread); */
-}
-EXPORT_SYMBOL_GPL(show_rcu_gp_kthreads);
-
-/* Dump grace-period-request information due to commandeered sysrq. */
-static void sysrq_show_rcu(int key)
-{
- show_rcu_gp_kthreads();
-}
-
-static struct sysrq_key_op sysrq_rcudump_op = {
- .handler = sysrq_show_rcu,
- .help_msg = "show-rcu(y)",
- .action_msg = "Show RCU tree",
- .enable_mask = SYSRQ_ENABLE_DUMP,
-};
-
-static int __init rcu_sysrq_init(void)
-{
- if (sysrq_rcu)
- return register_sysrq_key('y', &sysrq_rcudump_op);
- return 0;
-}
-early_initcall(rcu_sysrq_init);
-
-/*
* Send along grace-period-related data for rcutorture diagnostics.
*/
void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
@@ -1034,27 +962,6 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp)
}
/*
- * Handler for the irq_work request posted when a grace period has
- * gone on for too long, but not yet long enough for an RCU CPU
- * stall warning. Set state appropriately, but just complain if
- * there is unexpected state on entry.
- */
-static void rcu_iw_handler(struct irq_work *iwp)
-{
- struct rcu_data *rdp;
- struct rcu_node *rnp;
-
- rdp = container_of(iwp, struct rcu_data, rcu_iw);
- rnp = rdp->mynode;
- raw_spin_lock_rcu_node(rnp);
- if (!WARN_ON_ONCE(!rdp->rcu_iw_pending)) {
- rdp->rcu_iw_gp_seq = rnp->gp_seq;
- rdp->rcu_iw_pending = false;
- }
- raw_spin_unlock_rcu_node(rnp);
-}
-
-/*
* Return true if the specified CPU has passed through a quiescent
* state by virtue of being in or having passed through an dynticks
* idle state since the last call to dyntick_save_progress_counter()
@@ -1167,295 +1074,6 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
return 0;
}
-static void record_gp_stall_check_time(void)
-{
- unsigned long j = jiffies;
- unsigned long j1;
-
- rcu_state.gp_start = j;
- j1 = rcu_jiffies_till_stall_check();
- /* Record ->gp_start before ->jiffies_stall. */
- smp_store_release(&rcu_state.jiffies_stall, j + j1); /* ^^^ */
- rcu_state.jiffies_resched = j + j1 / 2;
- rcu_state.n_force_qs_gpstart = READ_ONCE(rcu_state.n_force_qs);
-}
-
-/*
- * Complain about starvation of grace-period kthread.
- */
-static void rcu_check_gp_kthread_starvation(void)
-{
- struct task_struct *gpk = rcu_state.gp_kthread;
- unsigned long j;
-
- j = jiffies - READ_ONCE(rcu_state.gp_activity);
- if (j > 2 * HZ) {
- pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx ->cpu=%d\n",
- rcu_state.name, j,
- (long)rcu_seq_current(&rcu_state.gp_seq),
- READ_ONCE(rcu_state.gp_flags),
- gp_state_getname(rcu_state.gp_state), rcu_state.gp_state,
- gpk ? gpk->state : ~0, gpk ? task_cpu(gpk) : -1);
- if (gpk) {
- pr_err("RCU grace-period kthread stack dump:\n");
- sched_show_task(gpk);
- wake_up_process(gpk);
- }
- }
-}
-
-/*
- * Dump stacks of all tasks running on stalled CPUs. First try using
- * NMIs, but fall back to manual remote stack tracing on architectures
- * that don't support NMI-based stack dumps. The NMI-triggered stack
- * traces are more accurate because they are printed by the target CPU.
- */
-static void rcu_dump_cpu_stacks(void)
-{
- int cpu;
- unsigned long flags;
- struct rcu_node *rnp;
-
- rcu_for_each_leaf_node(rnp) {
- raw_spin_lock_irqsave_rcu_node(rnp, flags);
- for_each_leaf_node_possible_cpu(rnp, cpu)
- if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu))
- if (!trigger_single_cpu_backtrace(cpu))
- dump_cpu_task(cpu);
- raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
- }
-}
-
-/*
- * If too much time has passed in the current grace period, and if
- * so configured, go kick the relevant kthreads.
- */
-static void rcu_stall_kick_kthreads(void)
-{
- unsigned long j;
-
- if (!rcu_kick_kthreads)
- return;
- j = READ_ONCE(rcu_state.jiffies_kick_kthreads);
- if (time_after(jiffies, j) && rcu_state.gp_kthread &&
- (rcu_gp_in_progress() || READ_ONCE(rcu_state.gp_flags))) {
- WARN_ONCE(1, "Kicking %s grace-period kthread\n",
- rcu_state.name);
- rcu_ftrace_dump(DUMP_ALL);
- wake_up_process(rcu_state.gp_kthread);
- WRITE_ONCE(rcu_state.jiffies_kick_kthreads, j + HZ);
- }
-}
-
-static void panic_on_rcu_stall(void)
-{
- if (sysctl_panic_on_rcu_stall)
- panic("RCU Stall\n");
-}
-
-static void print_other_cpu_stall(unsigned long gp_seq)
-{
- int cpu;
- unsigned long flags;
- unsigned long gpa;
- unsigned long j;
- int ndetected = 0;
- struct rcu_node *rnp = rcu_get_root();
- long totqlen = 0;
-
- /* Kick and suppress, if so configured. */
- rcu_stall_kick_kthreads();
- if (rcu_cpu_stall_suppress)
- return;
-
- /*
- * OK, time to rat on our buddy...
- * See Documentation/RCU/stallwarn.txt for info on how to debug
- * RCU CPU stall warnings.
- */
- pr_err("INFO: %s detected stalls on CPUs/tasks:", rcu_state.name);
- print_cpu_stall_info_begin();
- rcu_for_each_leaf_node(rnp) {
- raw_spin_lock_irqsave_rcu_node(rnp, flags);
- ndetected += rcu_print_task_stall(rnp);
- if (rnp->qsmask != 0) {
- for_each_leaf_node_possible_cpu(rnp, cpu)
- if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu)) {
- print_cpu_stall_info(cpu);
- ndetected++;
- }
- }
- raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
- }
-
- print_cpu_stall_info_end();
- for_each_possible_cpu(cpu)
- totqlen += rcu_get_n_cbs_cpu(cpu);
- pr_cont("(detected by %d, t=%ld jiffies, g=%ld, q=%lu)\n",
- smp_processor_id(), (long)(jiffies - rcu_state.gp_start),
- (long)rcu_seq_current(&rcu_state.gp_seq), totqlen);
- if (ndetected) {
- rcu_dump_cpu_stacks();
-
- /* Complain about tasks blocking the grace period. */
- rcu_print_detail_task_stall();
- } else {
- if (rcu_seq_current(&rcu_state.gp_seq) != gp_seq) {
- pr_err("INFO: Stall ended before state dump start\n");
- } else {
- j = jiffies;
- gpa = READ_ONCE(rcu_state.gp_activity);
- pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld, root ->qsmask %#lx\n",
- rcu_state.name, j - gpa, j, gpa,
- READ_ONCE(jiffies_till_next_fqs),
- rcu_get_root()->qsmask);
- /* In this case, the current CPU might be at fault. */
- sched_show_task(current);
- }
- }
- /* Rewrite if needed in case of slow consoles. */
- if (ULONG_CMP_GE(jiffies, READ_ONCE(rcu_state.jiffies_stall)))
- WRITE_ONCE(rcu_state.jiffies_stall,
- jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
-
- rcu_check_gp_kthread_starvation();
-
- panic_on_rcu_stall();
-
- rcu_force_quiescent_state(); /* Kick them all. */
-}
-
-static void print_cpu_stall(void)
-{
- int cpu;
- unsigned long flags;
- struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
- struct rcu_node *rnp = rcu_get_root();
- long totqlen = 0;
-
- /* Kick and suppress, if so configured. */
- rcu_stall_kick_kthreads();
- if (rcu_cpu_stall_suppress)
- return;
-
- /*
- * OK, time to rat on ourselves...
- * See Documentation/RCU/stallwarn.txt for info on how to debug
- * RCU CPU stall warnings.
- */
- pr_err("INFO: %s self-detected stall on CPU", rcu_state.name);
- print_cpu_stall_info_begin();
- raw_spin_lock_irqsave_rcu_node(rdp->mynode, flags);
- print_cpu_stall_info(smp_processor_id());
- raw_spin_unlock_irqrestore_rcu_node(rdp->mynode, flags);
- print_cpu_stall_info_end();
- for_each_possible_cpu(cpu)
- totqlen += rcu_get_n_cbs_cpu(cpu);
- pr_cont(" (t=%lu jiffies g=%ld q=%lu)\n",
- jiffies - rcu_state.gp_start,
- (long)rcu_seq_current(&rcu_state.gp_seq), totqlen);
-
- rcu_check_gp_kthread_starvation();
-
- rcu_dump_cpu_stacks();
-
- raw_spin_lock_irqsave_rcu_node(rnp, flags);
- /* Rewrite if needed in case of slow consoles. */
- if (ULONG_CMP_GE(jiffies, READ_ONCE(rcu_state.jiffies_stall)))
- WRITE_ONCE(rcu_state.jiffies_stall,
- jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
- raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
-
- panic_on_rcu_stall();
-
- /*
- * Attempt to revive the RCU machinery by forcing a context switch.
- *
- * A context switch would normally allow the RCU state machine to make
- * progress and it could be we're stuck in kernel space without context
- * switches for an entirely unreasonable amount of time.
- */
- set_tsk_need_resched(current);
- set_preempt_need_resched();
-}
-
-static void check_cpu_stall(struct rcu_data *rdp)
-{
- unsigned long gs1;
- unsigned long gs2;
- unsigned long gps;
- unsigned long j;
- unsigned long jn;
- unsigned long js;
- struct rcu_node *rnp;
-
- if ((rcu_cpu_stall_suppress && !rcu_kick_kthreads) ||
- !rcu_gp_in_progress())
- return;
- rcu_stall_kick_kthreads();
- j = jiffies;
-
- /*
- * Lots of memory barriers to reject false positives.
- *
- * The idea is to pick up rcu_state.gp_seq, then
- * rcu_state.jiffies_stall, then rcu_state.gp_start, and finally
- * another copy of rcu_state.gp_seq. These values are updated in
- * the opposite order with memory barriers (or equivalent) during
- * grace-period initialization and cleanup. Now, a false positive
- * can occur if we get an new value of rcu_state.gp_start and a old
- * value of rcu_state.jiffies_stall. But given the memory barriers,
- * the only way that this can happen is if one grace period ends
- * and another starts between these two fetches. This is detected
- * by comparing the second fetch of rcu_state.gp_seq with the
- * previous fetch from rcu_state.gp_seq.
- *
- * Given this check, comparisons of jiffies, rcu_state.jiffies_stall,
- * and rcu_state.gp_start suffice to forestall false positives.
- */
- gs1 = READ_ONCE(rcu_state.gp_seq);
- smp_rmb(); /* Pick up ->gp_seq first... */
- js = READ_ONCE(rcu_state.jiffies_stall);
- smp_rmb(); /* ...then ->jiffies_stall before the rest... */
- gps = READ_ONCE(rcu_state.gp_start);
- smp_rmb(); /* ...and finally ->gp_start before ->gp_seq again. */
- gs2 = READ_ONCE(rcu_state.gp_seq);
- if (gs1 != gs2 ||
- ULONG_CMP_LT(j, js) ||
- ULONG_CMP_GE(gps, js))
- return; /* No stall or GP completed since entering function. */
- rnp = rdp->mynode;
- jn = jiffies + 3 * rcu_jiffies_till_stall_check() + 3;
- if (rcu_gp_in_progress() &&
- (READ_ONCE(rnp->qsmask) & rdp->grpmask) &&
- cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) {
-
- /* We haven't checked in, so go dump stack. */
- print_cpu_stall();
-
- } else if (rcu_gp_in_progress() &&
- ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY) &&
- cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) {
-
- /* They had a few time units to dump stack, so complain. */
- print_other_cpu_stall(gs2);
- }
-}
-
-/**
- * rcu_cpu_stall_reset - prevent further stall warnings in current grace period
- *
- * Set the stall-warning timeout way off into the future, thus preventing
- * any RCU CPU stall-warning messages from appearing in the current set of
- * RCU grace periods.
- *
- * The caller must disable hard irqs.
- */
-void rcu_cpu_stall_reset(void)
-{
- WRITE_ONCE(rcu_state.jiffies_stall, jiffies + ULONG_MAX / 2);
-}
-
/* Trace-event wrapper function for trace_rcu_future_grace_period. */
static void trace_rcu_this_gp(struct rcu_node *rnp, struct rcu_data *rdp,
unsigned long gp_seq_req, const char *s)
@@ -1585,7 +1203,7 @@ static bool rcu_future_gp_cleanup(struct rcu_node *rnp)
static void rcu_gp_kthread_wake(void)
{
if ((current == rcu_state.gp_kthread &&
- !in_interrupt() && !in_serving_softirq()) ||
+ !in_irq() && !in_serving_softirq()) ||
!READ_ONCE(rcu_state.gp_flags) ||
!rcu_state.gp_kthread)
return;
@@ -2295,11 +1913,10 @@ rcu_report_qs_rdp(int cpu, struct rcu_data *rdp)
return;
}
mask = rdp->grpmask;
+ rdp->core_needs_qs = false;
if ((rnp->qsmask & mask) == 0) {
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
} else {
- rdp->core_needs_qs = false;
-
/*
* This GP can't end until cpu checks in, so all of our
* callbacks can be processed during the next GP.
@@ -2548,11 +2165,11 @@ void rcu_sched_clock_irq(int user)
}
/*
- * Scan the leaf rcu_node structures, processing dyntick state for any that
- * have not yet encountered a quiescent state, using the function specified.
- * Also initiate boosting for any threads blocked on the root rcu_node.
- *
- * The caller must have suppressed start of new grace periods.
+ * Scan the leaf rcu_node structures. For each structure on which all
+ * CPUs have reported a quiescent state and on which there are tasks
+ * blocking the current grace period, initiate RCU priority boosting.
+ * Otherwise, invoke the specified function to check dyntick state for
+ * each CPU that has not yet reported a quiescent state.
*/
static void force_qs_rnp(int (*f)(struct rcu_data *rdp))
{
@@ -2635,101 +2252,6 @@ void rcu_force_quiescent_state(void)
}
EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
-/*
- * This function checks for grace-period requests that fail to motivate
- * RCU to come out of its idle mode.
- */
-void
-rcu_check_gp_start_stall(struct rcu_node *rnp, struct rcu_data *rdp,
- const unsigned long gpssdelay)
-{
- unsigned long flags;
- unsigned long j;
- struct rcu_node *rnp_root = rcu_get_root();
- static atomic_t warned = ATOMIC_INIT(0);
-
- if (!IS_ENABLED(CONFIG_PROVE_RCU) || rcu_gp_in_progress() ||
- ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed))
- return;
- j = jiffies; /* Expensive access, and in common case don't get here. */
- if (time_before(j, READ_ONCE(rcu_state.gp_req_activity) + gpssdelay) ||
- time_before(j, READ_ONCE(rcu_state.gp_activity) + gpssdelay) ||
- atomic_read(&warned))
- return;
-
- raw_spin_lock_irqsave_rcu_node(rnp, flags);
- j = jiffies;
- if (rcu_gp_in_progress() ||
- ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed) ||
- time_before(j, READ_ONCE(rcu_state.gp_req_activity) + gpssdelay) ||
- time_before(j, READ_ONCE(rcu_state.gp_activity) + gpssdelay) ||
- atomic_read(&warned)) {
- raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
- return;
- }
- /* Hold onto the leaf lock to make others see warned==1. */
-
- if (rnp_root != rnp)
- raw_spin_lock_rcu_node(rnp_root); /* irqs already disabled. */
- j = jiffies;
- if (rcu_gp_in_progress() ||
- ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed) ||
- time_before(j, rcu_state.gp_req_activity + gpssdelay) ||
- time_before(j, rcu_state.gp_activity + gpssdelay) ||
- atomic_xchg(&warned, 1)) {
- raw_spin_unlock_rcu_node(rnp_root); /* irqs remain disabled. */
- raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
- return;
- }
- WARN_ON(1);
- if (rnp_root != rnp)
- raw_spin_unlock_rcu_node(rnp_root);
- raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
- show_rcu_gp_kthreads();
-}
-
-/*
- * Do a forward-progress check for rcutorture. This is normally invoked
- * due to an OOM event. The argument "j" gives the time period during
- * which rcutorture would like progress to have been made.
- */
-void rcu_fwd_progress_check(unsigned long j)
-{
- unsigned long cbs;
- int cpu;
- unsigned long max_cbs = 0;
- int max_cpu = -1;
- struct rcu_data *rdp;
-
- if (rcu_gp_in_progress()) {
- pr_info("%s: GP age %lu jiffies\n",
- __func__, jiffies - rcu_state.gp_start);
- show_rcu_gp_kthreads();
- } else {
- pr_info("%s: Last GP end %lu jiffies ago\n",
- __func__, jiffies - rcu_state.gp_end);
- preempt_disable();
- rdp = this_cpu_ptr(&rcu_data);
- rcu_check_gp_start_stall(rdp->mynode, rdp, j);
- preempt_enable();
- }
- for_each_possible_cpu(cpu) {
- cbs = rcu_get_n_cbs_cpu(cpu);
- if (!cbs)
- continue;
- if (max_cpu < 0)
- pr_info("%s: callbacks", __func__);
- pr_cont(" %d: %lu", cpu, cbs);
- if (cbs <= max_cbs)
- continue;
- max_cbs = cbs;
- max_cpu = cpu;
- }
- if (max_cpu >= 0)
- pr_cont("\n");
-}
-EXPORT_SYMBOL_GPL(rcu_fwd_progress_check);
-
/* Perform RCU core processing work for the current CPU. */
static __latent_entropy void rcu_core(struct softirq_action *unused)
{
@@ -3559,13 +3081,11 @@ static int rcu_pm_notify(struct notifier_block *self,
switch (action) {
case PM_HIBERNATION_PREPARE:
case PM_SUSPEND_PREPARE:
- if (nr_cpu_ids <= 256) /* Expediting bad for large systems. */
- rcu_expedite_gp();
+ rcu_expedite_gp();
break;
case PM_POST_HIBERNATION:
case PM_POST_SUSPEND:
- if (nr_cpu_ids <= 256) /* Expediting bad for large systems. */
- rcu_unexpedite_gp();
+ rcu_unexpedite_gp();
break;
default:
break;
@@ -3742,8 +3262,7 @@ static void __init rcu_init_geometry(void)
jiffies_till_first_fqs = d;
if (jiffies_till_next_fqs == ULONG_MAX)
jiffies_till_next_fqs = d;
- if (jiffies_till_sched_qs == ULONG_MAX)
- adjust_jiffies_till_sched_qs();
+ adjust_jiffies_till_sched_qs();
/* If the compile-time values are accurate, just leave. */
if (rcu_fanout_leaf == RCU_FANOUT_LEAF &&
@@ -3858,5 +3377,6 @@ void __init rcu_init(void)
srcu_init();
}
+#include "tree_stall.h"
#include "tree_exp.h"
#include "tree_plugin.h"
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index bb4f995f2d3f..e253d11af3c4 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -393,15 +393,13 @@ static const char *tp_rcu_varname __used __tracepoint_string = rcu_name;
int rcu_dynticks_snap(struct rcu_data *rdp);
-/* Forward declarations for rcutree_plugin.h */
+/* Forward declarations for tree_plugin.h */
static void rcu_bootup_announce(void);
static void rcu_qs(void);
static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp);
#ifdef CONFIG_HOTPLUG_CPU
static bool rcu_preempt_has_tasks(struct rcu_node *rnp);
#endif /* #ifdef CONFIG_HOTPLUG_CPU */
-static void rcu_print_detail_task_stall(void);
-static int rcu_print_task_stall(struct rcu_node *rnp);
static int rcu_print_task_exp_stall(struct rcu_node *rnp);
static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp);
static void rcu_flavor_sched_clock_irq(int user);
@@ -418,9 +416,6 @@ static void rcu_prepare_for_idle(void);
static bool rcu_preempt_has_tasks(struct rcu_node *rnp);
static bool rcu_preempt_need_deferred_qs(struct task_struct *t);
static void rcu_preempt_deferred_qs(struct task_struct *t);
-static void print_cpu_stall_info_begin(void);
-static void print_cpu_stall_info(int cpu);
-static void print_cpu_stall_info_end(void);
static void zero_cpu_stall_ticks(struct rcu_data *rdp);
static bool rcu_nocb_cpu_needs_barrier(int cpu);
static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp);
@@ -445,3 +440,10 @@ static void rcu_bind_gp_kthread(void);
static bool rcu_nohz_full_cpu(void);
static void rcu_dynticks_task_enter(void);
static void rcu_dynticks_task_exit(void);
+
+/* Forward declarations for tree_stall.h */
+static void record_gp_stall_check_time(void);
+static void rcu_iw_handler(struct irq_work *iwp);
+static void check_cpu_stall(struct rcu_data *rdp);
+static void rcu_check_gp_start_stall(struct rcu_node *rnp, struct rcu_data *rdp,
+ const unsigned long gpssdelay);
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 4c2a0189e748..9c990df880d1 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -10,6 +10,7 @@
#include <linux/lockdep.h>
static void rcu_exp_handler(void *unused);
+static int rcu_print_task_exp_stall(struct rcu_node *rnp);
/*
* Record the start of an expedited grace period.
@@ -633,7 +634,7 @@ static void rcu_exp_handler(void *unused)
raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (rnp->expmask & rdp->grpmask) {
rdp->deferred_qs = true;
- WRITE_ONCE(t->rcu_read_unlock_special.b.exp_hint, true);
+ t->rcu_read_unlock_special.b.exp_hint = true;
}
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
return;
@@ -648,7 +649,7 @@ static void rcu_exp_handler(void *unused)
*
* If the CPU is fully enabled (or if some buggy RCU-preempt
* read-side critical section is being used from idle), just
- * invoke rcu_preempt_defer_qs() to immediately report the
+ * invoke rcu_preempt_deferred_qs() to immediately report the
* quiescent state. We cannot use rcu_read_unlock_special()
* because we are in an interrupt handler, which will cause that
* function to take an early exit without doing anything.
@@ -670,6 +671,27 @@ static void sync_sched_exp_online_cleanup(int cpu)
{
}
+/*
+ * Scan the current list of tasks blocked within RCU read-side critical
+ * sections, printing out the tid of each that is blocking the current
+ * expedited grace period.
+ */
+static int rcu_print_task_exp_stall(struct rcu_node *rnp)
+{
+ struct task_struct *t;
+ int ndetected = 0;
+
+ if (!rnp->exp_tasks)
+ return 0;
+ t = list_entry(rnp->exp_tasks->prev,
+ struct task_struct, rcu_node_entry);
+ list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
+ pr_cont(" P%d", t->pid);
+ ndetected++;
+ }
+ return ndetected;
+}
+
#else /* #ifdef CONFIG_PREEMPT_RCU */
/* Invoked on each online non-idle CPU for expedited quiescent state. */
@@ -709,6 +731,16 @@ static void sync_sched_exp_online_cleanup(int cpu)
WARN_ON_ONCE(ret);
}
+/*
+ * Because preemptible RCU does not exist, we never have to check for
+ * tasks blocked within RCU read-side critical sections that are
+ * blocking the current expedited grace period.
+ */
+static int rcu_print_task_exp_stall(struct rcu_node *rnp)
+{
+ return 0;
+}
+
#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
/**
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 97dba50f6fb2..1102765f91fd 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -285,7 +285,7 @@ static void rcu_qs(void)
TPS("cpuqs"));
__this_cpu_write(rcu_data.cpu_no_qs.b.norm, false);
barrier(); /* Coordinate with rcu_flavor_sched_clock_irq(). */
- current->rcu_read_unlock_special.b.need_qs = false;
+ WRITE_ONCE(current->rcu_read_unlock_special.b.need_qs, false);
}
}
@@ -643,100 +643,6 @@ static void rcu_read_unlock_special(struct task_struct *t)
}
/*
- * Dump detailed information for all tasks blocking the current RCU
- * grace period on the specified rcu_node structure.
- */
-static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
-{
- unsigned long flags;
- struct task_struct *t;
-
- raw_spin_lock_irqsave_rcu_node(rnp, flags);
- if (!rcu_preempt_blocked_readers_cgp(rnp)) {
- raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
- return;
- }
- t = list_entry(rnp->gp_tasks->prev,
- struct task_struct, rcu_node_entry);
- list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
- /*
- * We could be printing a lot while holding a spinlock.
- * Avoid triggering hard lockup.
- */
- touch_nmi_watchdog();
- sched_show_task(t);
- }
- raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
-}
-
-/*
- * Dump detailed information for all tasks blocking the current RCU
- * grace period.
- */
-static void rcu_print_detail_task_stall(void)
-{
- struct rcu_node *rnp = rcu_get_root();
-
- rcu_print_detail_task_stall_rnp(rnp);
- rcu_for_each_leaf_node(rnp)
- rcu_print_detail_task_stall_rnp(rnp);
-}
-
-static void rcu_print_task_stall_begin(struct rcu_node *rnp)
-{
- pr_err("\tTasks blocked on level-%d rcu_node (CPUs %d-%d):",
- rnp->level, rnp->grplo, rnp->grphi);
-}
-
-static void rcu_print_task_stall_end(void)
-{
- pr_cont("\n");
-}
-
-/*
- * Scan the current list of tasks blocked within RCU read-side critical
- * sections, printing out the tid of each.
- */
-static int rcu_print_task_stall(struct rcu_node *rnp)
-{
- struct task_struct *t;
- int ndetected = 0;
-
- if (!rcu_preempt_blocked_readers_cgp(rnp))
- return 0;
- rcu_print_task_stall_begin(rnp);
- t = list_entry(rnp->gp_tasks->prev,
- struct task_struct, rcu_node_entry);
- list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
- pr_cont(" P%d", t->pid);
- ndetected++;
- }
- rcu_print_task_stall_end();
- return ndetected;
-}
-
-/*
- * Scan the current list of tasks blocked within RCU read-side critical
- * sections, printing out the tid of each that is blocking the current
- * expedited grace period.
- */
-static int rcu_print_task_exp_stall(struct rcu_node *rnp)
-{
- struct task_struct *t;
- int ndetected = 0;
-
- if (!rnp->exp_tasks)
- return 0;
- t = list_entry(rnp->exp_tasks->prev,
- struct task_struct, rcu_node_entry);
- list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
- pr_cont(" P%d", t->pid);
- ndetected++;
- }
- return ndetected;
-}
-
-/*
* Check that the list of blocked tasks for the newly completed grace
* period is in fact empty. It is a serious bug to complete a grace
* period that still has RCU readers blocked! This function must be
@@ -804,19 +710,25 @@ static void rcu_flavor_sched_clock_irq(int user)
/*
* Check for a task exiting while in a preemptible-RCU read-side
- * critical section, clean up if so. No need to issue warnings,
- * as debug_check_no_locks_held() already does this if lockdep
- * is enabled.
+ * critical section, clean up if so. No need to issue warnings, as
+ * debug_check_no_locks_held() already does this if lockdep is enabled.
+ * Besides, if this function does anything other than just immediately
+ * return, there was a bug of some sort. Spewing warnings from this
+ * function is like as not to simply obscure important prior warnings.
*/
void exit_rcu(void)
{
struct task_struct *t = current;
- if (likely(list_empty(&current->rcu_node_entry)))
+ if (unlikely(!list_empty(&current->rcu_node_entry))) {
+ t->rcu_read_lock_nesting = 1;
+ barrier();
+ WRITE_ONCE(t->rcu_read_unlock_special.b.blocked, true);
+ } else if (unlikely(t->rcu_read_lock_nesting)) {
+ t->rcu_read_lock_nesting = 1;
+ } else {
return;
- t->rcu_read_lock_nesting = 1;
- barrier();
- t->rcu_read_unlock_special.b.blocked = true;
+ }
__rcu_read_unlock();
rcu_preempt_deferred_qs(current);
}
@@ -980,33 +892,6 @@ static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
static void rcu_preempt_deferred_qs(struct task_struct *t) { }
/*
- * Because preemptible RCU does not exist, we never have to check for
- * tasks blocked within RCU read-side critical sections.
- */
-static void rcu_print_detail_task_stall(void)
-{
-}
-
-/*
- * Because preemptible RCU does not exist, we never have to check for
- * tasks blocked within RCU read-side critical sections.
- */
-static int rcu_print_task_stall(struct rcu_node *rnp)
-{
- return 0;
-}
-
-/*
- * Because preemptible RCU does not exist, we never have to check for
- * tasks blocked within RCU read-side critical sections that are
- * blocking the current expedited grace period.
- */
-static int rcu_print_task_exp_stall(struct rcu_node *rnp)
-{
- return 0;
-}
-
-/*
* Because there is no preemptible RCU, there can be no readers blocked,
* so there is no need to check for blocked tasks. So check only for
* bogus qsmask values.
@@ -1185,8 +1070,6 @@ static int rcu_boost_kthread(void *arg)
static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
__releases(rnp->lock)
{
- struct task_struct *t;
-
raw_lockdep_assert_held_rcu_node(rnp);
if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) {
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
@@ -1200,9 +1083,8 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
if (rnp->exp_tasks == NULL)
rnp->boost_tasks = rnp->gp_tasks;
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
- t = rnp->boost_kthread_task;
- if (t)
- rcu_wake_cond(t, rnp->boost_kthread_status);
+ rcu_wake_cond(rnp->boost_kthread_task,
+ rnp->boost_kthread_status);
} else {
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
@@ -1649,98 +1531,6 @@ static void rcu_cleanup_after_idle(void)
#endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */
-#ifdef CONFIG_RCU_FAST_NO_HZ
-
-static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
-{
- struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
-
- sprintf(cp, "last_accelerate: %04lx/%04lx, Nonlazy posted: %c%c%c",
- rdp->last_accelerate & 0xffff, jiffies & 0xffff,
- ".l"[rdp->all_lazy],
- ".L"[!rcu_segcblist_n_nonlazy_cbs(&rdp->cblist)],
- ".D"[!rdp->tick_nohz_enabled_snap]);
-}
-
-#else /* #ifdef CONFIG_RCU_FAST_NO_HZ */
-
-static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
-{
- *cp = '\0';
-}
-
-#endif /* #else #ifdef CONFIG_RCU_FAST_NO_HZ */
-
-/* Initiate the stall-info list. */
-static void print_cpu_stall_info_begin(void)
-{
- pr_cont("\n");
-}
-
-/*
- * Print out diagnostic information for the specified stalled CPU.
- *
- * If the specified CPU is aware of the current RCU grace period, then
- * print the number of scheduling clock interrupts the CPU has taken
- * during the time that it has been aware. Otherwise, print the number
- * of RCU grace periods that this CPU is ignorant of, for example, "1"
- * if the CPU was aware of the previous grace period.
- *
- * Also print out idle and (if CONFIG_RCU_FAST_NO_HZ) idle-entry info.
- */
-static void print_cpu_stall_info(int cpu)
-{
- unsigned long delta;
- char fast_no_hz[72];
- struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
- char *ticks_title;
- unsigned long ticks_value;
-
- /*
- * We could be printing a lot while holding a spinlock. Avoid
- * triggering hard lockup.
- */
- touch_nmi_watchdog();
-
- ticks_value = rcu_seq_ctr(rcu_state.gp_seq - rdp->gp_seq);
- if (ticks_value) {
- ticks_title = "GPs behind";
- } else {
- ticks_title = "ticks this GP";
- ticks_value = rdp->ticks_this_gp;
- }
- print_cpu_stall_fast_no_hz(fast_no_hz, cpu);
- delta = rcu_seq_ctr(rdp->mynode->gp_seq - rdp->rcu_iw_gp_seq);
- pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%ld/%#lx softirq=%u/%u fqs=%ld %s\n",
- cpu,
- "O."[!!cpu_online(cpu)],
- "o."[!!(rdp->grpmask & rdp->mynode->qsmaskinit)],
- "N."[!!(rdp->grpmask & rdp->mynode->qsmaskinitnext)],
- !IS_ENABLED(CONFIG_IRQ_WORK) ? '?' :
- rdp->rcu_iw_pending ? (int)min(delta, 9UL) + '0' :
- "!."[!delta],
- ticks_value, ticks_title,
- rcu_dynticks_snap(rdp) & 0xfff,
- rdp->dynticks_nesting, rdp->dynticks_nmi_nesting,
- rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu),
- READ_ONCE(rcu_state.n_force_qs) - rcu_state.n_force_qs_gpstart,
- fast_no_hz);
-}
-
-/* Terminate the stall-info list. */
-static void print_cpu_stall_info_end(void)
-{
- pr_err("\t");
-}
-
-/* Zero ->ticks_this_gp and snapshot the number of RCU softirq handlers. */
-static void zero_cpu_stall_ticks(struct rcu_data *rdp)
-{
- rdp->ticks_this_gp = 0;
- rdp->softirq_snap = kstat_softirqs_cpu(RCU_SOFTIRQ, smp_processor_id());
- WRITE_ONCE(rdp->last_fqs_resched, jiffies);
-}
-
#ifdef CONFIG_RCU_NOCB_CPU
/*
@@ -1766,11 +1556,22 @@ static void zero_cpu_stall_ticks(struct rcu_data *rdp)
*/
-/* Parse the boot-time rcu_nocb_mask CPU list from the kernel parameters. */
+/*
+ * Parse the boot-time rcu_nocb_mask CPU list from the kernel parameters.
+ * The string after the "rcu_nocbs=" is either "all" for all CPUs, or a
+ * comma-separated list of CPUs and/or CPU ranges. If an invalid list is
+ * given, a warning is emitted and all CPUs are offloaded.
+ */
static int __init rcu_nocb_setup(char *str)
{
alloc_bootmem_cpumask_var(&rcu_nocb_mask);
- cpulist_parse(str, rcu_nocb_mask);
+ if (!strcasecmp(str, "all"))
+ cpumask_setall(rcu_nocb_mask);
+ else
+ if (cpulist_parse(str, rcu_nocb_mask)) {
+ pr_warn("rcu_nocbs= bad CPU range, all CPUs set\n");
+ cpumask_setall(rcu_nocb_mask);
+ }
return 1;
}
__setup("rcu_nocbs=", rcu_nocb_setup);
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
new file mode 100644
index 000000000000..f65a73a97323
--- /dev/null
+++ b/kernel/rcu/tree_stall.h
@@ -0,0 +1,709 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * RCU CPU stall warnings for normal RCU grace periods
+ *
+ * Copyright IBM Corporation, 2019
+ *
+ * Author: Paul E. McKenney <paulmck@linux.ibm.com>
+ */
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// Controlling CPU stall warnings, including delay calculation.
+
+/* panic() on RCU Stall sysctl. */
+int sysctl_panic_on_rcu_stall __read_mostly;
+
+#ifdef CONFIG_PROVE_RCU
+#define RCU_STALL_DELAY_DELTA (5 * HZ)
+#else
+#define RCU_STALL_DELAY_DELTA 0
+#endif
+
+/* Limit-check stall timeouts specified at boottime and runtime. */
+int rcu_jiffies_till_stall_check(void)
+{
+ int till_stall_check = READ_ONCE(rcu_cpu_stall_timeout);
+
+ /*
+ * Limit check must be consistent with the Kconfig limits
+ * for CONFIG_RCU_CPU_STALL_TIMEOUT.
+ */
+ if (till_stall_check < 3) {
+ WRITE_ONCE(rcu_cpu_stall_timeout, 3);
+ till_stall_check = 3;
+ } else if (till_stall_check > 300) {
+ WRITE_ONCE(rcu_cpu_stall_timeout, 300);
+ till_stall_check = 300;
+ }
+ return till_stall_check * HZ + RCU_STALL_DELAY_DELTA;
+}
+EXPORT_SYMBOL_GPL(rcu_jiffies_till_stall_check);
+
+/* Don't do RCU CPU stall warnings during long sysrq printouts. */
+void rcu_sysrq_start(void)
+{
+ if (!rcu_cpu_stall_suppress)
+ rcu_cpu_stall_suppress = 2;
+}
+
+void rcu_sysrq_end(void)
+{
+ if (rcu_cpu_stall_suppress == 2)
+ rcu_cpu_stall_suppress = 0;
+}
+
+/* Don't print RCU CPU stall warnings during a kernel panic. */
+static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
+{
+ rcu_cpu_stall_suppress = 1;
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block rcu_panic_block = {
+ .notifier_call = rcu_panic,
+};
+
+static int __init check_cpu_stall_init(void)
+{
+ atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block);
+ return 0;
+}
+early_initcall(check_cpu_stall_init);
+
+/* If so specified via sysctl, panic, yielding cleaner stall-warning output. */
+static void panic_on_rcu_stall(void)
+{
+ if (sysctl_panic_on_rcu_stall)
+ panic("RCU Stall\n");
+}
+
+/**
+ * rcu_cpu_stall_reset - prevent further stall warnings in current grace period
+ *
+ * Set the stall-warning timeout way off into the future, thus preventing
+ * any RCU CPU stall-warning messages from appearing in the current set of
+ * RCU grace periods.
+ *
+ * The caller must disable hard irqs.
+ */
+void rcu_cpu_stall_reset(void)
+{
+ WRITE_ONCE(rcu_state.jiffies_stall, jiffies + ULONG_MAX / 2);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// Interaction with RCU grace periods
+
+/* Start of new grace period, so record stall time (and forcing times). */
+static void record_gp_stall_check_time(void)
+{
+ unsigned long j = jiffies;
+ unsigned long j1;
+
+ rcu_state.gp_start = j;
+ j1 = rcu_jiffies_till_stall_check();
+ /* Record ->gp_start before ->jiffies_stall. */
+ smp_store_release(&rcu_state.jiffies_stall, j + j1); /* ^^^ */
+ rcu_state.jiffies_resched = j + j1 / 2;
+ rcu_state.n_force_qs_gpstart = READ_ONCE(rcu_state.n_force_qs);
+}
+
+/* Zero ->ticks_this_gp and snapshot the number of RCU softirq handlers. */
+static void zero_cpu_stall_ticks(struct rcu_data *rdp)
+{
+ rdp->ticks_this_gp = 0;
+ rdp->softirq_snap = kstat_softirqs_cpu(RCU_SOFTIRQ, smp_processor_id());
+ WRITE_ONCE(rdp->last_fqs_resched, jiffies);
+}
+
+/*
+ * If too much time has passed in the current grace period, and if
+ * so configured, go kick the relevant kthreads.
+ */
+static void rcu_stall_kick_kthreads(void)
+{
+ unsigned long j;
+
+ if (!rcu_kick_kthreads)
+ return;
+ j = READ_ONCE(rcu_state.jiffies_kick_kthreads);
+ if (time_after(jiffies, j) && rcu_state.gp_kthread &&
+ (rcu_gp_in_progress() || READ_ONCE(rcu_state.gp_flags))) {
+ WARN_ONCE(1, "Kicking %s grace-period kthread\n",
+ rcu_state.name);
+ rcu_ftrace_dump(DUMP_ALL);
+ wake_up_process(rcu_state.gp_kthread);
+ WRITE_ONCE(rcu_state.jiffies_kick_kthreads, j + HZ);
+ }
+}
+
+/*
+ * Handler for the irq_work request posted about halfway into the RCU CPU
+ * stall timeout, and used to detect excessive irq disabling. Set state
+ * appropriately, but just complain if there is unexpected state on entry.
+ */
+static void rcu_iw_handler(struct irq_work *iwp)
+{
+ struct rcu_data *rdp;
+ struct rcu_node *rnp;
+
+ rdp = container_of(iwp, struct rcu_data, rcu_iw);
+ rnp = rdp->mynode;
+ raw_spin_lock_rcu_node(rnp);
+ if (!WARN_ON_ONCE(!rdp->rcu_iw_pending)) {
+ rdp->rcu_iw_gp_seq = rnp->gp_seq;
+ rdp->rcu_iw_pending = false;
+ }
+ raw_spin_unlock_rcu_node(rnp);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// Printing RCU CPU stall warnings
+
+#ifdef CONFIG_PREEMPT
+
+/*
+ * Dump detailed information for all tasks blocking the current RCU
+ * grace period on the specified rcu_node structure.
+ */
+static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
+{
+ unsigned long flags;
+ struct task_struct *t;
+
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
+ if (!rcu_preempt_blocked_readers_cgp(rnp)) {
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+ return;
+ }
+ t = list_entry(rnp->gp_tasks->prev,
+ struct task_struct, rcu_node_entry);
+ list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
+ /*
+ * We could be printing a lot while holding a spinlock.
+ * Avoid triggering hard lockup.
+ */
+ touch_nmi_watchdog();
+ sched_show_task(t);
+ }
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+}
+
+/*
+ * Scan the current list of tasks blocked within RCU read-side critical
+ * sections, printing out the tid of each.
+ */
+static int rcu_print_task_stall(struct rcu_node *rnp)
+{
+ struct task_struct *t;
+ int ndetected = 0;
+
+ if (!rcu_preempt_blocked_readers_cgp(rnp))
+ return 0;
+ pr_err("\tTasks blocked on level-%d rcu_node (CPUs %d-%d):",
+ rnp->level, rnp->grplo, rnp->grphi);
+ t = list_entry(rnp->gp_tasks->prev,
+ struct task_struct, rcu_node_entry);
+ list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
+ pr_cont(" P%d", t->pid);
+ ndetected++;
+ }
+ pr_cont("\n");
+ return ndetected;
+}
+
+#else /* #ifdef CONFIG_PREEMPT */
+
+/*
+ * Because preemptible RCU does not exist, we never have to check for
+ * tasks blocked within RCU read-side critical sections.
+ */
+static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
+{
+}
+
+/*
+ * Because preemptible RCU does not exist, we never have to check for
+ * tasks blocked within RCU read-side critical sections.
+ */
+static int rcu_print_task_stall(struct rcu_node *rnp)
+{
+ return 0;
+}
+#endif /* #else #ifdef CONFIG_PREEMPT */
+
+/*
+ * Dump stacks of all tasks running on stalled CPUs. First try using
+ * NMIs, but fall back to manual remote stack tracing on architectures
+ * that don't support NMI-based stack dumps. The NMI-triggered stack
+ * traces are more accurate because they are printed by the target CPU.
+ */
+static void rcu_dump_cpu_stacks(void)
+{
+ int cpu;
+ unsigned long flags;
+ struct rcu_node *rnp;
+
+ rcu_for_each_leaf_node(rnp) {
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
+ for_each_leaf_node_possible_cpu(rnp, cpu)
+ if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu))
+ if (!trigger_single_cpu_backtrace(cpu))
+ dump_cpu_task(cpu);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+ }
+}
+
+#ifdef CONFIG_RCU_FAST_NO_HZ
+
+static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
+{
+ struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
+
+ sprintf(cp, "last_accelerate: %04lx/%04lx, Nonlazy posted: %c%c%c",
+ rdp->last_accelerate & 0xffff, jiffies & 0xffff,
+ ".l"[rdp->all_lazy],
+ ".L"[!rcu_segcblist_n_nonlazy_cbs(&rdp->cblist)],
+ ".D"[!!rdp->tick_nohz_enabled_snap]);
+}
+
+#else /* #ifdef CONFIG_RCU_FAST_NO_HZ */
+
+static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
+{
+ *cp = '\0';
+}
+
+#endif /* #else #ifdef CONFIG_RCU_FAST_NO_HZ */
+
+/*
+ * Print out diagnostic information for the specified stalled CPU.
+ *
+ * If the specified CPU is aware of the current RCU grace period, then
+ * print the number of scheduling clock interrupts the CPU has taken
+ * during the time that it has been aware. Otherwise, print the number
+ * of RCU grace periods that this CPU is ignorant of, for example, "1"
+ * if the CPU was aware of the previous grace period.
+ *
+ * Also print out idle and (if CONFIG_RCU_FAST_NO_HZ) idle-entry info.
+ */
+static void print_cpu_stall_info(int cpu)
+{
+ unsigned long delta;
+ char fast_no_hz[72];
+ struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
+ char *ticks_title;
+ unsigned long ticks_value;
+
+ /*
+ * We could be printing a lot while holding a spinlock. Avoid
+ * triggering hard lockup.
+ */
+ touch_nmi_watchdog();
+
+ ticks_value = rcu_seq_ctr(rcu_state.gp_seq - rdp->gp_seq);
+ if (ticks_value) {
+ ticks_title = "GPs behind";
+ } else {
+ ticks_title = "ticks this GP";
+ ticks_value = rdp->ticks_this_gp;
+ }
+ print_cpu_stall_fast_no_hz(fast_no_hz, cpu);
+ delta = rcu_seq_ctr(rdp->mynode->gp_seq - rdp->rcu_iw_gp_seq);
+ pr_err("\t%d-%c%c%c%c: (%lu %s) idle=%03x/%ld/%#lx softirq=%u/%u fqs=%ld %s\n",
+ cpu,
+ "O."[!!cpu_online(cpu)],
+ "o."[!!(rdp->grpmask & rdp->mynode->qsmaskinit)],
+ "N."[!!(rdp->grpmask & rdp->mynode->qsmaskinitnext)],
+ !IS_ENABLED(CONFIG_IRQ_WORK) ? '?' :
+ rdp->rcu_iw_pending ? (int)min(delta, 9UL) + '0' :
+ "!."[!delta],
+ ticks_value, ticks_title,
+ rcu_dynticks_snap(rdp) & 0xfff,
+ rdp->dynticks_nesting, rdp->dynticks_nmi_nesting,
+ rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu),
+ READ_ONCE(rcu_state.n_force_qs) - rcu_state.n_force_qs_gpstart,
+ fast_no_hz);
+}
+
+/* Complain about starvation of grace-period kthread. */
+static void rcu_check_gp_kthread_starvation(void)
+{
+ struct task_struct *gpk = rcu_state.gp_kthread;
+ unsigned long j;
+
+ j = jiffies - READ_ONCE(rcu_state.gp_activity);
+ if (j > 2 * HZ) {
+ pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx ->cpu=%d\n",
+ rcu_state.name, j,
+ (long)rcu_seq_current(&rcu_state.gp_seq),
+ READ_ONCE(rcu_state.gp_flags),
+ gp_state_getname(rcu_state.gp_state), rcu_state.gp_state,
+ gpk ? gpk->state : ~0, gpk ? task_cpu(gpk) : -1);
+ if (gpk) {
+ pr_err("RCU grace-period kthread stack dump:\n");
+ sched_show_task(gpk);
+ wake_up_process(gpk);
+ }
+ }
+}
+
+static void print_other_cpu_stall(unsigned long gp_seq)
+{
+ int cpu;
+ unsigned long flags;
+ unsigned long gpa;
+ unsigned long j;
+ int ndetected = 0;
+ struct rcu_node *rnp;
+ long totqlen = 0;
+
+ /* Kick and suppress, if so configured. */
+ rcu_stall_kick_kthreads();
+ if (rcu_cpu_stall_suppress)
+ return;
+
+ /*
+ * OK, time to rat on our buddy...
+ * See Documentation/RCU/stallwarn.txt for info on how to debug
+ * RCU CPU stall warnings.
+ */
+ pr_err("INFO: %s detected stalls on CPUs/tasks:\n", rcu_state.name);
+ rcu_for_each_leaf_node(rnp) {
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
+ ndetected += rcu_print_task_stall(rnp);
+ if (rnp->qsmask != 0) {
+ for_each_leaf_node_possible_cpu(rnp, cpu)
+ if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu)) {
+ print_cpu_stall_info(cpu);
+ ndetected++;
+ }
+ }
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+ }
+
+ for_each_possible_cpu(cpu)
+ totqlen += rcu_get_n_cbs_cpu(cpu);
+ pr_cont("\t(detected by %d, t=%ld jiffies, g=%ld, q=%lu)\n",
+ smp_processor_id(), (long)(jiffies - rcu_state.gp_start),
+ (long)rcu_seq_current(&rcu_state.gp_seq), totqlen);
+ if (ndetected) {
+ rcu_dump_cpu_stacks();
+
+ /* Complain about tasks blocking the grace period. */
+ rcu_for_each_leaf_node(rnp)
+ rcu_print_detail_task_stall_rnp(rnp);
+ } else {
+ if (rcu_seq_current(&rcu_state.gp_seq) != gp_seq) {
+ pr_err("INFO: Stall ended before state dump start\n");
+ } else {
+ j = jiffies;
+ gpa = READ_ONCE(rcu_state.gp_activity);
+ pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld, root ->qsmask %#lx\n",
+ rcu_state.name, j - gpa, j, gpa,
+ READ_ONCE(jiffies_till_next_fqs),
+ rcu_get_root()->qsmask);
+ /* In this case, the current CPU might be at fault. */
+ sched_show_task(current);
+ }
+ }
+ /* Rewrite if needed in case of slow consoles. */
+ if (ULONG_CMP_GE(jiffies, READ_ONCE(rcu_state.jiffies_stall)))
+ WRITE_ONCE(rcu_state.jiffies_stall,
+ jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
+
+ rcu_check_gp_kthread_starvation();
+
+ panic_on_rcu_stall();
+
+ rcu_force_quiescent_state(); /* Kick them all. */
+}
+
+static void print_cpu_stall(void)
+{
+ int cpu;
+ unsigned long flags;
+ struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
+ struct rcu_node *rnp = rcu_get_root();
+ long totqlen = 0;
+
+ /* Kick and suppress, if so configured. */
+ rcu_stall_kick_kthreads();
+ if (rcu_cpu_stall_suppress)
+ return;
+
+ /*
+ * OK, time to rat on ourselves...
+ * See Documentation/RCU/stallwarn.txt for info on how to debug
+ * RCU CPU stall warnings.
+ */
+ pr_err("INFO: %s self-detected stall on CPU\n", rcu_state.name);
+ raw_spin_lock_irqsave_rcu_node(rdp->mynode, flags);
+ print_cpu_stall_info(smp_processor_id());
+ raw_spin_unlock_irqrestore_rcu_node(rdp->mynode, flags);
+ for_each_possible_cpu(cpu)
+ totqlen += rcu_get_n_cbs_cpu(cpu);
+ pr_cont("\t(t=%lu jiffies g=%ld q=%lu)\n",
+ jiffies - rcu_state.gp_start,
+ (long)rcu_seq_current(&rcu_state.gp_seq), totqlen);
+
+ rcu_check_gp_kthread_starvation();
+
+ rcu_dump_cpu_stacks();
+
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
+ /* Rewrite if needed in case of slow consoles. */
+ if (ULONG_CMP_GE(jiffies, READ_ONCE(rcu_state.jiffies_stall)))
+ WRITE_ONCE(rcu_state.jiffies_stall,
+ jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+
+ panic_on_rcu_stall();
+
+ /*
+ * Attempt to revive the RCU machinery by forcing a context switch.
+ *
+ * A context switch would normally allow the RCU state machine to make
+ * progress and it could be we're stuck in kernel space without context
+ * switches for an entirely unreasonable amount of time.
+ */
+ set_tsk_need_resched(current);
+ set_preempt_need_resched();
+}
+
+static void check_cpu_stall(struct rcu_data *rdp)
+{
+ unsigned long gs1;
+ unsigned long gs2;
+ unsigned long gps;
+ unsigned long j;
+ unsigned long jn;
+ unsigned long js;
+ struct rcu_node *rnp;
+
+ if ((rcu_cpu_stall_suppress && !rcu_kick_kthreads) ||
+ !rcu_gp_in_progress())
+ return;
+ rcu_stall_kick_kthreads();
+ j = jiffies;
+
+ /*
+ * Lots of memory barriers to reject false positives.
+ *
+ * The idea is to pick up rcu_state.gp_seq, then
+ * rcu_state.jiffies_stall, then rcu_state.gp_start, and finally
+ * another copy of rcu_state.gp_seq. These values are updated in
+ * the opposite order with memory barriers (or equivalent) during
+ * grace-period initialization and cleanup. Now, a false positive
+ * can occur if we get an new value of rcu_state.gp_start and a old
+ * value of rcu_state.jiffies_stall. But given the memory barriers,
+ * the only way that this can happen is if one grace period ends
+ * and another starts between these two fetches. This is detected
+ * by comparing the second fetch of rcu_state.gp_seq with the
+ * previous fetch from rcu_state.gp_seq.
+ *
+ * Given this check, comparisons of jiffies, rcu_state.jiffies_stall,
+ * and rcu_state.gp_start suffice to forestall false positives.
+ */
+ gs1 = READ_ONCE(rcu_state.gp_seq);
+ smp_rmb(); /* Pick up ->gp_seq first... */
+ js = READ_ONCE(rcu_state.jiffies_stall);
+ smp_rmb(); /* ...then ->jiffies_stall before the rest... */
+ gps = READ_ONCE(rcu_state.gp_start);
+ smp_rmb(); /* ...and finally ->gp_start before ->gp_seq again. */
+ gs2 = READ_ONCE(rcu_state.gp_seq);
+ if (gs1 != gs2 ||
+ ULONG_CMP_LT(j, js) ||
+ ULONG_CMP_GE(gps, js))
+ return; /* No stall or GP completed since entering function. */
+ rnp = rdp->mynode;
+ jn = jiffies + 3 * rcu_jiffies_till_stall_check() + 3;
+ if (rcu_gp_in_progress() &&
+ (READ_ONCE(rnp->qsmask) & rdp->grpmask) &&
+ cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) {
+
+ /* We haven't checked in, so go dump stack. */
+ print_cpu_stall();
+
+ } else if (rcu_gp_in_progress() &&
+ ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY) &&
+ cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) {
+
+ /* They had a few time units to dump stack, so complain. */
+ print_other_cpu_stall(gs2);
+ }
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// RCU forward-progress mechanisms, including of callback invocation.
+
+
+/*
+ * Show the state of the grace-period kthreads.
+ */
+void show_rcu_gp_kthreads(void)
+{
+ int cpu;
+ unsigned long j;
+ unsigned long ja;
+ unsigned long jr;
+ unsigned long jw;
+ struct rcu_data *rdp;
+ struct rcu_node *rnp;
+
+ j = jiffies;
+ ja = j - READ_ONCE(rcu_state.gp_activity);
+ jr = j - READ_ONCE(rcu_state.gp_req_activity);
+ jw = j - READ_ONCE(rcu_state.gp_wake_time);
+ pr_info("%s: wait state: %s(%d) ->state: %#lx delta ->gp_activity %lu ->gp_req_activity %lu ->gp_wake_time %lu ->gp_wake_seq %ld ->gp_seq %ld ->gp_seq_needed %ld ->gp_flags %#x\n",
+ rcu_state.name, gp_state_getname(rcu_state.gp_state),
+ rcu_state.gp_state,
+ rcu_state.gp_kthread ? rcu_state.gp_kthread->state : 0x1ffffL,
+ ja, jr, jw, (long)READ_ONCE(rcu_state.gp_wake_seq),
+ (long)READ_ONCE(rcu_state.gp_seq),
+ (long)READ_ONCE(rcu_get_root()->gp_seq_needed),
+ READ_ONCE(rcu_state.gp_flags));
+ rcu_for_each_node_breadth_first(rnp) {
+ if (ULONG_CMP_GE(rcu_state.gp_seq, rnp->gp_seq_needed))
+ continue;
+ pr_info("\trcu_node %d:%d ->gp_seq %ld ->gp_seq_needed %ld\n",
+ rnp->grplo, rnp->grphi, (long)rnp->gp_seq,
+ (long)rnp->gp_seq_needed);
+ if (!rcu_is_leaf_node(rnp))
+ continue;
+ for_each_leaf_node_possible_cpu(rnp, cpu) {
+ rdp = per_cpu_ptr(&rcu_data, cpu);
+ if (rdp->gpwrap ||
+ ULONG_CMP_GE(rcu_state.gp_seq,
+ rdp->gp_seq_needed))
+ continue;
+ pr_info("\tcpu %d ->gp_seq_needed %ld\n",
+ cpu, (long)rdp->gp_seq_needed);
+ }
+ }
+ /* sched_show_task(rcu_state.gp_kthread); */
+}
+EXPORT_SYMBOL_GPL(show_rcu_gp_kthreads);
+
+/*
+ * This function checks for grace-period requests that fail to motivate
+ * RCU to come out of its idle mode.
+ */
+static void rcu_check_gp_start_stall(struct rcu_node *rnp, struct rcu_data *rdp,
+ const unsigned long gpssdelay)
+{
+ unsigned long flags;
+ unsigned long j;
+ struct rcu_node *rnp_root = rcu_get_root();
+ static atomic_t warned = ATOMIC_INIT(0);
+
+ if (!IS_ENABLED(CONFIG_PROVE_RCU) || rcu_gp_in_progress() ||
+ ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed))
+ return;
+ j = jiffies; /* Expensive access, and in common case don't get here. */
+ if (time_before(j, READ_ONCE(rcu_state.gp_req_activity) + gpssdelay) ||
+ time_before(j, READ_ONCE(rcu_state.gp_activity) + gpssdelay) ||
+ atomic_read(&warned))
+ return;
+
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
+ j = jiffies;
+ if (rcu_gp_in_progress() ||
+ ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed) ||
+ time_before(j, READ_ONCE(rcu_state.gp_req_activity) + gpssdelay) ||
+ time_before(j, READ_ONCE(rcu_state.gp_activity) + gpssdelay) ||
+ atomic_read(&warned)) {
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+ return;
+ }
+ /* Hold onto the leaf lock to make others see warned==1. */
+
+ if (rnp_root != rnp)
+ raw_spin_lock_rcu_node(rnp_root); /* irqs already disabled. */
+ j = jiffies;
+ if (rcu_gp_in_progress() ||
+ ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed) ||
+ time_before(j, rcu_state.gp_req_activity + gpssdelay) ||
+ time_before(j, rcu_state.gp_activity + gpssdelay) ||
+ atomic_xchg(&warned, 1)) {
+ raw_spin_unlock_rcu_node(rnp_root); /* irqs remain disabled. */
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+ return;
+ }
+ WARN_ON(1);
+ if (rnp_root != rnp)
+ raw_spin_unlock_rcu_node(rnp_root);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+ show_rcu_gp_kthreads();
+}
+
+/*
+ * Do a forward-progress check for rcutorture. This is normally invoked
+ * due to an OOM event. The argument "j" gives the time period during
+ * which rcutorture would like progress to have been made.
+ */
+void rcu_fwd_progress_check(unsigned long j)
+{
+ unsigned long cbs;
+ int cpu;
+ unsigned long max_cbs = 0;
+ int max_cpu = -1;
+ struct rcu_data *rdp;
+
+ if (rcu_gp_in_progress()) {
+ pr_info("%s: GP age %lu jiffies\n",
+ __func__, jiffies - rcu_state.gp_start);
+ show_rcu_gp_kthreads();
+ } else {
+ pr_info("%s: Last GP end %lu jiffies ago\n",
+ __func__, jiffies - rcu_state.gp_end);
+ preempt_disable();
+ rdp = this_cpu_ptr(&rcu_data);
+ rcu_check_gp_start_stall(rdp->mynode, rdp, j);
+ preempt_enable();
+ }
+ for_each_possible_cpu(cpu) {
+ cbs = rcu_get_n_cbs_cpu(cpu);
+ if (!cbs)
+ continue;
+ if (max_cpu < 0)
+ pr_info("%s: callbacks", __func__);
+ pr_cont(" %d: %lu", cpu, cbs);
+ if (cbs <= max_cbs)
+ continue;
+ max_cbs = cbs;
+ max_cpu = cpu;
+ }
+ if (max_cpu >= 0)
+ pr_cont("\n");
+}
+EXPORT_SYMBOL_GPL(rcu_fwd_progress_check);
+
+/* Commandeer a sysrq key to dump RCU's tree. */
+static bool sysrq_rcu;
+module_param(sysrq_rcu, bool, 0444);
+
+/* Dump grace-period-request information due to commandeered sysrq. */
+static void sysrq_show_rcu(int key)
+{
+ show_rcu_gp_kthreads();
+}
+
+static struct sysrq_key_op sysrq_rcudump_op = {
+ .handler = sysrq_show_rcu,
+ .help_msg = "show-rcu(y)",
+ .action_msg = "Show RCU tree",
+ .enable_mask = SYSRQ_ENABLE_DUMP,
+};
+
+static int __init rcu_sysrq_init(void)
+{
+ if (sysrq_rcu)
+ return register_sysrq_key('y', &sysrq_rcudump_op);
+ return 0;
+}
+early_initcall(rcu_sysrq_init);
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index cbaa976c5945..c3bf44ba42e5 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -424,68 +424,11 @@ EXPORT_SYMBOL_GPL(do_trace_rcu_torture_read);
#endif
#ifdef CONFIG_RCU_STALL_COMMON
-
-#ifdef CONFIG_PROVE_RCU
-#define RCU_STALL_DELAY_DELTA (5 * HZ)
-#else
-#define RCU_STALL_DELAY_DELTA 0
-#endif
-
int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */
EXPORT_SYMBOL_GPL(rcu_cpu_stall_suppress);
-static int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
-
module_param(rcu_cpu_stall_suppress, int, 0644);
+int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
module_param(rcu_cpu_stall_timeout, int, 0644);
-
-int rcu_jiffies_till_stall_check(void)
-{
- int till_stall_check = READ_ONCE(rcu_cpu_stall_timeout);
-
- /*
- * Limit check must be consistent with the Kconfig limits
- * for CONFIG_RCU_CPU_STALL_TIMEOUT.
- */
- if (till_stall_check < 3) {
- WRITE_ONCE(rcu_cpu_stall_timeout, 3);
- till_stall_check = 3;
- } else if (till_stall_check > 300) {
- WRITE_ONCE(rcu_cpu_stall_timeout, 300);
- till_stall_check = 300;
- }
- return till_stall_check * HZ + RCU_STALL_DELAY_DELTA;
-}
-EXPORT_SYMBOL_GPL(rcu_jiffies_till_stall_check);
-
-void rcu_sysrq_start(void)
-{
- if (!rcu_cpu_stall_suppress)
- rcu_cpu_stall_suppress = 2;
-}
-
-void rcu_sysrq_end(void)
-{
- if (rcu_cpu_stall_suppress == 2)
- rcu_cpu_stall_suppress = 0;
-}
-
-static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
-{
- rcu_cpu_stall_suppress = 1;
- return NOTIFY_DONE;
-}
-
-static struct notifier_block rcu_panic_block = {
- .notifier_call = rcu_panic,
-};
-
-static int __init check_cpu_stall_init(void)
-{
- atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block);
- return 0;
-}
-early_initcall(check_cpu_stall_init);
-
#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
#ifdef CONFIG_TASKS_RCU
diff --git a/kernel/resource.c b/kernel/resource.c
index 92190f62ebc5..8c15f846e8ef 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -520,21 +520,20 @@ EXPORT_SYMBOL_GPL(page_is_ram);
int region_intersects(resource_size_t start, size_t size, unsigned long flags,
unsigned long desc)
{
- resource_size_t end = start + size - 1;
+ struct resource res;
int type = 0; int other = 0;
struct resource *p;
+ res.start = start;
+ res.end = start + size - 1;
+
read_lock(&resource_lock);
for (p = iomem_resource.child; p ; p = p->sibling) {
bool is_type = (((p->flags & flags) == flags) &&
((desc == IORES_DESC_NONE) ||
(desc == p->desc)));
- if (start >= p->start && start <= p->end)
- is_type ? type++ : other++;
- if (end >= p->start && end <= p->end)
- is_type ? type++ : other++;
- if (p->start >= start && p->end <= end)
+ if (resource_overlaps(p, &res))
is_type ? type++ : other++;
}
read_unlock(&resource_lock);
diff --git a/kernel/rseq.c b/kernel/rseq.c
index 25e9a7b60eba..9424ee90589e 100644
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -254,8 +254,7 @@ static int rseq_ip_fixup(struct pt_regs *regs)
* - signal delivery,
* and return to user-space.
*
- * This is how we can ensure that the entire rseq critical section,
- * consisting of both the C part and the assembly instruction sequence,
+ * This is how we can ensure that the entire rseq critical section
* will issue the commit instruction only if executed atomically with
* respect to other threads scheduled on the same CPU, and with respect
* to signal handlers.
@@ -314,7 +313,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len,
/* Unregister rseq for current thread. */
if (current->rseq != rseq || !current->rseq)
return -EINVAL;
- if (current->rseq_len != rseq_len)
+ if (rseq_len != sizeof(*rseq))
return -EINVAL;
if (current->rseq_sig != sig)
return -EPERM;
@@ -322,7 +321,6 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len,
if (ret)
return ret;
current->rseq = NULL;
- current->rseq_len = 0;
current->rseq_sig = 0;
return 0;
}
@@ -336,7 +334,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len,
* the provided address differs from the prior
* one.
*/
- if (current->rseq != rseq || current->rseq_len != rseq_len)
+ if (current->rseq != rseq || rseq_len != sizeof(*rseq))
return -EINVAL;
if (current->rseq_sig != sig)
return -EPERM;
@@ -354,7 +352,6 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len,
if (!access_ok(rseq, rseq_len))
return -EFAULT;
current->rseq = rseq;
- current->rseq_len = rseq_len;
current->rseq_sig = sig;
/*
* If rseq was previously inactive, and has just been
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 4778c48a7fda..ade3f2287d1f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1151,7 +1151,6 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
/* Need help from migration thread: drop lock and wait. */
task_rq_unlock(rq, p, &rf);
stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
- tlb_migrate_finish(p->mm);
return 0;
} else if (task_on_rq_queued(p)) {
/*
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 5c41ea367422..3638d2377e3c 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -771,6 +771,7 @@ out:
return 0;
fail:
+ kobject_put(&tunables->attr_set.kobj);
policy->governor_data = NULL;
sugov_tunables_free(tunables);
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 6a73e41a2016..43901fa3f269 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -252,7 +252,6 @@ static void task_non_contending(struct task_struct *p)
if (dl_entity_is_special(dl_se))
return;
- WARN_ON(hrtimer_active(&dl_se->inactive_timer));
WARN_ON(dl_se->dl_non_contending);
zerolag_time = dl_se->deadline -
@@ -269,7 +268,7 @@ static void task_non_contending(struct task_struct *p)
* If the "0-lag time" already passed, decrease the active
* utilization now, instead of starting a timer
*/
- if (zerolag_time < 0) {
+ if ((zerolag_time < 0) || hrtimer_active(&dl_se->inactive_timer)) {
if (dl_task(p))
sub_running_bw(dl_se, dl_rq);
if (!dl_task(p) || p->state == TASK_DEAD) {
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 40bd1e27b1b7..35f3ea375084 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2007,6 +2007,10 @@ static u64 numa_get_avg_runtime(struct task_struct *p, u64 *period)
if (p->last_task_numa_placement) {
delta = runtime - p->last_sum_exec_runtime;
*period = now - p->last_task_numa_placement;
+
+ /* Avoid time going backwards, prevent potential divide error: */
+ if (unlikely((s64)*period < 0))
+ *period = 0;
} else {
delta = p->se.avg.load_sum;
*period = LOAD_AVG_MAX;
@@ -4885,6 +4889,8 @@ static enum hrtimer_restart sched_cfs_slack_timer(struct hrtimer *timer)
return HRTIMER_NORESTART;
}
+extern const u64 max_cfs_quota_period;
+
static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
{
struct cfs_bandwidth *cfs_b =
@@ -4892,6 +4898,7 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
unsigned long flags;
int overrun;
int idle = 0;
+ int count = 0;
raw_spin_lock_irqsave(&cfs_b->lock, flags);
for (;;) {
@@ -4899,6 +4906,28 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
if (!overrun)
break;
+ if (++count > 3) {
+ u64 new, old = ktime_to_ns(cfs_b->period);
+
+ new = (old * 147) / 128; /* ~115% */
+ new = min(new, max_cfs_quota_period);
+
+ cfs_b->period = ns_to_ktime(new);
+
+ /* since max is 1s, this is limited to 1e9^2, which fits in u64 */
+ cfs_b->quota *= new;
+ cfs_b->quota = div64_u64(cfs_b->quota, old);
+
+ pr_warn_ratelimited(
+ "cfs_period_timer[cpu%d]: period too short, scaling up (new cfs_period_us %lld, cfs_quota_us = %lld)\n",
+ smp_processor_id(),
+ div_u64(new, NSEC_PER_USEC),
+ div_u64(cfs_b->quota, NSEC_PER_USEC));
+
+ /* reset count so we don't come right back in here */
+ count = 0;
+ }
+
idle = do_sched_cfs_period_timer(cfs_b, overrun, flags);
}
if (idle)
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index df27e499956a..3582eeb59893 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -502,7 +502,10 @@ out:
*
* Caller must be holding current->sighand->siglock lock.
*
- * Returns 0 on success, -ve on error.
+ * Returns 0 on success, -ve on error, or
+ * - in TSYNC mode: the pid of a thread which was either not in the correct
+ * seccomp mode or did not have an ancestral seccomp filter
+ * - in NEW_LISTENER mode: the fd of the new listener
*/
static long seccomp_attach_filter(unsigned int flags,
struct seccomp_filter *filter)
@@ -1258,6 +1261,16 @@ static long seccomp_set_mode_filter(unsigned int flags,
if (flags & ~SECCOMP_FILTER_FLAG_MASK)
return -EINVAL;
+ /*
+ * In the successful case, NEW_LISTENER returns the new listener fd.
+ * But in the failure case, TSYNC returns the thread that died. If you
+ * combine these two flags, there's no way to tell whether something
+ * succeeded or failed. So, let's disallow this combination.
+ */
+ if ((flags & SECCOMP_FILTER_FLAG_TSYNC) &&
+ (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER))
+ return -EINVAL;
+
/* Prepare the new filter before holding any locks. */
prepared = seccomp_prepare_user_filter(filter);
if (IS_ERR(prepared))
@@ -1304,7 +1317,7 @@ out:
mutex_unlock(&current->signal->cred_guard_mutex);
out_put_fd:
if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) {
- if (ret < 0) {
+ if (ret) {
listener_f->private_data = NULL;
fput(listener_f);
put_unused_fd(listener);
diff --git a/kernel/signal.c b/kernel/signal.c
index f98448cf2def..227ba170298e 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3581,7 +3581,7 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
if (flags)
return -EINVAL;
- f = fdget_raw(pidfd);
+ f = fdget(pidfd);
if (!f.file)
return -EBADF;
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index 094b82ca95e5..930113b9799a 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -272,7 +272,7 @@ static u64 notrace suspended_sched_clock_read(void)
return cd.read_data[seq & 1].epoch_cyc;
}
-static int sched_clock_suspend(void)
+int sched_clock_suspend(void)
{
struct clock_read_data *rd = &cd.read_data[0];
@@ -283,7 +283,7 @@ static int sched_clock_suspend(void)
return 0;
}
-static void sched_clock_resume(void)
+void sched_clock_resume(void)
{
struct clock_read_data *rd = &cd.read_data[0];
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 529143b4c8d2..df401463a191 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -487,6 +487,7 @@ void tick_freeze(void)
trace_suspend_resume(TPS("timekeeping_freeze"),
smp_processor_id(), true);
system_state = SYSTEM_SUSPEND;
+ sched_clock_suspend();
timekeeping_suspend();
} else {
tick_suspend_local();
@@ -510,6 +511,7 @@ void tick_unfreeze(void)
if (tick_freeze_depth == num_online_cpus()) {
timekeeping_resume();
+ sched_clock_resume();
system_state = SYSTEM_RUNNING;
trace_suspend_resume(TPS("timekeeping_freeze"),
smp_processor_id(), false);
diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h
index 7a9b4eb7a1d5..141ab3ab0354 100644
--- a/kernel/time/timekeeping.h
+++ b/kernel/time/timekeeping.h
@@ -14,6 +14,13 @@ extern u64 timekeeping_max_deferment(void);
extern void timekeeping_warp_clock(void);
extern int timekeeping_suspend(void);
extern void timekeeping_resume(void);
+#ifdef CONFIG_GENERIC_SCHED_CLOCK
+extern int sched_clock_suspend(void);
+extern void sched_clock_resume(void);
+#else
+static inline int sched_clock_suspend(void) { return 0; }
+static inline void sched_clock_resume(void) { }
+#endif
extern void do_timer(unsigned long ticks);
extern void update_wall_time(void);
diff --git a/kernel/torture.c b/kernel/torture.c
index 8faa1a9aaeb9..17b2be9bde12 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -88,6 +88,8 @@ bool torture_offline(int cpu, long *n_offl_attempts, long *n_offl_successes,
if (!cpu_online(cpu) || !cpu_is_hotpluggable(cpu))
return false;
+ if (num_online_cpus() <= 1)
+ return false; /* Can't offline the last CPU. */
if (verbose > 1)
pr_alert("%s" TORTURE_FLAG
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 26c8ca9bd06b..b920358dd8f7 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -33,6 +33,7 @@
#include <linux/list.h>
#include <linux/hash.h>
#include <linux/rcupdate.h>
+#include <linux/kprobes.h>
#include <trace/events/sched.h>
@@ -6246,7 +6247,7 @@ void ftrace_reset_array_ops(struct trace_array *tr)
tr->ops->func = ftrace_stub;
}
-static inline void
+static nokprobe_inline void
__ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *ignored, struct pt_regs *regs)
{
@@ -6306,11 +6307,13 @@ static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
{
__ftrace_ops_list_func(ip, parent_ip, NULL, regs);
}
+NOKPROBE_SYMBOL(ftrace_ops_list_func);
#else
static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip)
{
__ftrace_ops_list_func(ip, parent_ip, NULL, NULL);
}
+NOKPROBE_SYMBOL(ftrace_ops_no_ops);
#endif
/*
@@ -6337,6 +6340,7 @@ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip,
preempt_enable_notrace();
trace_clear_recursion(bit);
}
+NOKPROBE_SYMBOL(ftrace_ops_assist_func);
/**
* ftrace_ops_get_func - get the function a trampoline should call
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 41b6f96e5366..4ee8d8aa3d0f 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -762,7 +762,7 @@ u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu)
preempt_disable_notrace();
time = rb_time_stamp(buffer);
- preempt_enable_no_resched_notrace();
+ preempt_enable_notrace();
return time;
}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 0ce8515dd470..ec439999f387 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -498,8 +498,10 @@ int trace_pid_write(struct trace_pid_list *filtered_pids,
* not modified.
*/
pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
- if (!pid_list)
+ if (!pid_list) {
+ trace_parser_put(&parser);
return -ENOMEM;
+ }
pid_list->pid_max = READ_ONCE(pid_max);
@@ -509,6 +511,7 @@ int trace_pid_write(struct trace_pid_list *filtered_pids,
pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
if (!pid_list->pids) {
+ trace_parser_put(&parser);
kfree(pid_list);
return -ENOMEM;
}
@@ -7014,35 +7017,43 @@ struct buffer_ref {
struct ring_buffer *buffer;
void *page;
int cpu;
- int ref;
+ refcount_t refcount;
};
+static void buffer_ref_release(struct buffer_ref *ref)
+{
+ if (!refcount_dec_and_test(&ref->refcount))
+ return;
+ ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
+ kfree(ref);
+}
+
static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
struct buffer_ref *ref = (struct buffer_ref *)buf->private;
- if (--ref->ref)
- return;
-
- ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
- kfree(ref);
+ buffer_ref_release(ref);
buf->private = 0;
}
-static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
+static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
struct buffer_ref *ref = (struct buffer_ref *)buf->private;
- ref->ref++;
+ if (refcount_read(&ref->refcount) > INT_MAX/2)
+ return false;
+
+ refcount_inc(&ref->refcount);
+ return true;
}
/* Pipe buffer operations for a buffer. */
static const struct pipe_buf_operations buffer_pipe_buf_ops = {
.confirm = generic_pipe_buf_confirm,
.release = buffer_pipe_buf_release,
- .steal = generic_pipe_buf_steal,
+ .steal = generic_pipe_buf_nosteal,
.get = buffer_pipe_buf_get,
};
@@ -7055,11 +7066,7 @@ static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
struct buffer_ref *ref =
(struct buffer_ref *)spd->partial[i].private;
- if (--ref->ref)
- return;
-
- ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
- kfree(ref);
+ buffer_ref_release(ref);
spd->partial[i].private = 0;
}
@@ -7114,7 +7121,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
break;
}
- ref->ref = 1;
+ refcount_set(&ref->refcount, 1);
ref->buffer = iter->trace_buffer->buffer;
ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
if (IS_ERR(ref->page)) {
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index 4ad967453b6f..3ea65cdff30d 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -205,6 +205,8 @@ void trace_likely_condition(struct ftrace_likely_data *f, int val, int expect)
void ftrace_likely_update(struct ftrace_likely_data *f, int val,
int expect, int is_constant)
{
+ unsigned long flags = user_access_save();
+
/* A constant is always correct */
if (is_constant) {
f->constant++;
@@ -223,6 +225,8 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
f->data.correct++;
else
f->data.incorrect++;
+
+ user_access_restore(flags);
}
EXPORT_SYMBOL(ftrace_likely_update);
diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c
index 71381168dede..247bf0b1582c 100644
--- a/kernel/watchdog_hld.c
+++ b/kernel/watchdog_hld.c
@@ -135,7 +135,8 @@ static void watchdog_overflow_callback(struct perf_event *event,
if (__this_cpu_read(hard_watchdog_warn) == true)
return;
- pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
+ pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n",
+ this_cpu);
print_modules();
print_irqtrace_events(current);
if (regs)
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 0d9e81779e37..d5a4a4036d2f 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -753,9 +753,9 @@ endmenu # "Memory Debugging"
config ARCH_HAS_KCOV
bool
help
- KCOV does not have any arch-specific code, but currently it is enabled
- only for x86_64. KCOV requires testing on other archs, and most likely
- disabling of instrumentation for some early boot code.
+ An architecture should select this when it can successfully
+ build and run with CONFIG_KCOV. This typically requires
+ disabling instrumentation for some early boot code.
config CC_HAS_SANCOV_TRACE_PC
def_bool $(cc-option,-fsanitize-coverage=trace-pc)
@@ -1929,6 +1929,7 @@ config TEST_KMOD
depends on m
depends on BLOCK && (64BIT || LBDAF) # for XFS, BTRFS
depends on NETDEVICES && NET_CORE && INET # for TUN
+ depends on BLOCK
select TEST_LKM
select XFS_FS
select TUN
diff --git a/lib/Makefile b/lib/Makefile
index 3b08673e8881..e16e7aadc41a 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -17,6 +17,17 @@ KCOV_INSTRUMENT_list_debug.o := n
KCOV_INSTRUMENT_debugobjects.o := n
KCOV_INSTRUMENT_dynamic_debug.o := n
+# Early boot use of cmdline, don't instrument it
+ifdef CONFIG_AMD_MEM_ENCRYPT
+KASAN_SANITIZE_string.o := n
+
+ifdef CONFIG_FUNCTION_TRACER
+CFLAGS_REMOVE_string.o = -pg
+endif
+
+CFLAGS_string.o := $(call cc-option, -fno-stack-protector)
+endif
+
lib-y := ctype.o string.o vsprintf.o cmdline.o \
rbtree.o radix-tree.o timerqueue.o xarray.o \
idr.o int_sqrt.o extable.o \
@@ -268,6 +279,7 @@ obj-$(CONFIG_UCS2_STRING) += ucs2_string.o
obj-$(CONFIG_UBSAN) += ubsan.o
UBSAN_SANITIZE_ubsan.o := n
+CFLAGS_ubsan.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector)
obj-$(CONFIG_SBITMAP) += sbitmap.o
diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c
index 58eacd41526c..023ba9f3b99f 100644
--- a/lib/strncpy_from_user.c
+++ b/lib/strncpy_from_user.c
@@ -23,10 +23,11 @@
* hit it), 'max' is the address space maximum (and we return
* -EFAULT if we hit it).
*/
-static inline long do_strncpy_from_user(char *dst, const char __user *src, long count, unsigned long max)
+static inline long do_strncpy_from_user(char *dst, const char __user *src,
+ unsigned long count, unsigned long max)
{
const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
- long res = 0;
+ unsigned long res = 0;
/*
* Truncate 'max' to the user-specified limit, so that
diff --git a/lib/strnlen_user.c b/lib/strnlen_user.c
index 1c1a1b0e38a5..7f2db3fe311f 100644
--- a/lib/strnlen_user.c
+++ b/lib/strnlen_user.c
@@ -28,7 +28,7 @@
static inline long do_strnlen_user(const char __user *src, unsigned long count, unsigned long max)
{
const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
- long align, res = 0;
+ unsigned long align, res = 0;
unsigned long c;
/*
@@ -42,7 +42,7 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count,
* Do everything aligned. But that means that we
* need to also expand the maximum..
*/
- align = (sizeof(long) - 1) & (unsigned long)src;
+ align = (sizeof(unsigned long) - 1) & (unsigned long)src;
src -= align;
max += align;
diff --git a/lib/test_vmalloc.c b/lib/test_vmalloc.c
index 83cdcaa82bf6..f832b095afba 100644
--- a/lib/test_vmalloc.c
+++ b/lib/test_vmalloc.c
@@ -383,14 +383,14 @@ static void shuffle_array(int *arr, int n)
static int test_func(void *private)
{
struct test_driver *t = private;
- cpumask_t newmask = CPU_MASK_NONE;
int random_array[ARRAY_SIZE(test_case_array)];
int index, i, j, ret;
ktime_t kt;
u64 delta;
- cpumask_set_cpu(t->cpu, &newmask);
- set_cpus_allowed_ptr(current, &newmask);
+ ret = set_cpus_allowed_ptr(current, cpumask_of(t->cpu));
+ if (ret < 0)
+ pr_err("Failed to set affinity to %d CPU\n", t->cpu);
for (i = 0; i < ARRAY_SIZE(test_case_array); i++)
random_array[i] = i;
diff --git a/lib/ubsan.c b/lib/ubsan.c
index e4162f59a81c..ecc179338094 100644
--- a/lib/ubsan.c
+++ b/lib/ubsan.c
@@ -17,6 +17,7 @@
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/sched.h>
+#include <linux/uaccess.h>
#include "ubsan.h"
@@ -86,11 +87,13 @@ static bool is_inline_int(struct type_descriptor *type)
return bits <= inline_bits;
}
-static s_max get_signed_val(struct type_descriptor *type, unsigned long val)
+static s_max get_signed_val(struct type_descriptor *type, void *val)
{
if (is_inline_int(type)) {
unsigned extra_bits = sizeof(s_max)*8 - type_bit_width(type);
- return ((s_max)val) << extra_bits >> extra_bits;
+ unsigned long ulong_val = (unsigned long)val;
+
+ return ((s_max)ulong_val) << extra_bits >> extra_bits;
}
if (type_bit_width(type) == 64)
@@ -99,15 +102,15 @@ static s_max get_signed_val(struct type_descriptor *type, unsigned long val)
return *(s_max *)val;
}
-static bool val_is_negative(struct type_descriptor *type, unsigned long val)
+static bool val_is_negative(struct type_descriptor *type, void *val)
{
return type_is_signed(type) && get_signed_val(type, val) < 0;
}
-static u_max get_unsigned_val(struct type_descriptor *type, unsigned long val)
+static u_max get_unsigned_val(struct type_descriptor *type, void *val)
{
if (is_inline_int(type))
- return val;
+ return (unsigned long)val;
if (type_bit_width(type) == 64)
return *(u64 *)val;
@@ -116,7 +119,7 @@ static u_max get_unsigned_val(struct type_descriptor *type, unsigned long val)
}
static void val_to_string(char *str, size_t size, struct type_descriptor *type,
- unsigned long value)
+ void *value)
{
if (type_is_int(type)) {
if (type_bit_width(type) == 128) {
@@ -163,8 +166,8 @@ static void ubsan_epilogue(unsigned long *flags)
current->in_ubsan--;
}
-static void handle_overflow(struct overflow_data *data, unsigned long lhs,
- unsigned long rhs, char op)
+static void handle_overflow(struct overflow_data *data, void *lhs,
+ void *rhs, char op)
{
struct type_descriptor *type = data->type;
@@ -191,8 +194,7 @@ static void handle_overflow(struct overflow_data *data, unsigned long lhs,
}
void __ubsan_handle_add_overflow(struct overflow_data *data,
- unsigned long lhs,
- unsigned long rhs)
+ void *lhs, void *rhs)
{
handle_overflow(data, lhs, rhs, '+');
@@ -200,23 +202,21 @@ void __ubsan_handle_add_overflow(struct overflow_data *data,
EXPORT_SYMBOL(__ubsan_handle_add_overflow);
void __ubsan_handle_sub_overflow(struct overflow_data *data,
- unsigned long lhs,
- unsigned long rhs)
+ void *lhs, void *rhs)
{
handle_overflow(data, lhs, rhs, '-');
}
EXPORT_SYMBOL(__ubsan_handle_sub_overflow);
void __ubsan_handle_mul_overflow(struct overflow_data *data,
- unsigned long lhs,
- unsigned long rhs)
+ void *lhs, void *rhs)
{
handle_overflow(data, lhs, rhs, '*');
}
EXPORT_SYMBOL(__ubsan_handle_mul_overflow);
void __ubsan_handle_negate_overflow(struct overflow_data *data,
- unsigned long old_val)
+ void *old_val)
{
unsigned long flags;
char old_val_str[VALUE_LENGTH];
@@ -237,8 +237,7 @@ EXPORT_SYMBOL(__ubsan_handle_negate_overflow);
void __ubsan_handle_divrem_overflow(struct overflow_data *data,
- unsigned long lhs,
- unsigned long rhs)
+ void *lhs, void *rhs)
{
unsigned long flags;
char rhs_val_str[VALUE_LENGTH];
@@ -313,6 +312,7 @@ static void handle_object_size_mismatch(struct type_mismatch_data_common *data,
static void ubsan_type_mismatch_common(struct type_mismatch_data_common *data,
unsigned long ptr)
{
+ unsigned long flags = user_access_save();
if (!ptr)
handle_null_ptr_deref(data);
@@ -320,10 +320,12 @@ static void ubsan_type_mismatch_common(struct type_mismatch_data_common *data,
handle_misaligned_access(data, ptr);
else
handle_object_size_mismatch(data, ptr);
+
+ user_access_restore(flags);
}
void __ubsan_handle_type_mismatch(struct type_mismatch_data *data,
- unsigned long ptr)
+ void *ptr)
{
struct type_mismatch_data_common common_data = {
.location = &data->location,
@@ -332,12 +334,12 @@ void __ubsan_handle_type_mismatch(struct type_mismatch_data *data,
.type_check_kind = data->type_check_kind
};
- ubsan_type_mismatch_common(&common_data, ptr);
+ ubsan_type_mismatch_common(&common_data, (unsigned long)ptr);
}
EXPORT_SYMBOL(__ubsan_handle_type_mismatch);
void __ubsan_handle_type_mismatch_v1(struct type_mismatch_data_v1 *data,
- unsigned long ptr)
+ void *ptr)
{
struct type_mismatch_data_common common_data = {
@@ -347,30 +349,11 @@ void __ubsan_handle_type_mismatch_v1(struct type_mismatch_data_v1 *data,
.type_check_kind = data->type_check_kind
};
- ubsan_type_mismatch_common(&common_data, ptr);
+ ubsan_type_mismatch_common(&common_data, (unsigned long)ptr);
}
EXPORT_SYMBOL(__ubsan_handle_type_mismatch_v1);
-void __ubsan_handle_vla_bound_not_positive(struct vla_bound_data *data,
- unsigned long bound)
-{
- unsigned long flags;
- char bound_str[VALUE_LENGTH];
-
- if (suppress_report(&data->location))
- return;
-
- ubsan_prologue(&data->location, &flags);
-
- val_to_string(bound_str, sizeof(bound_str), data->type, bound);
- pr_err("variable length array bound value %s <= 0\n", bound_str);
-
- ubsan_epilogue(&flags);
-}
-EXPORT_SYMBOL(__ubsan_handle_vla_bound_not_positive);
-
-void __ubsan_handle_out_of_bounds(struct out_of_bounds_data *data,
- unsigned long index)
+void __ubsan_handle_out_of_bounds(struct out_of_bounds_data *data, void *index)
{
unsigned long flags;
char index_str[VALUE_LENGTH];
@@ -388,7 +371,7 @@ void __ubsan_handle_out_of_bounds(struct out_of_bounds_data *data,
EXPORT_SYMBOL(__ubsan_handle_out_of_bounds);
void __ubsan_handle_shift_out_of_bounds(struct shift_out_of_bounds_data *data,
- unsigned long lhs, unsigned long rhs)
+ void *lhs, void *rhs)
{
unsigned long flags;
struct type_descriptor *rhs_type = data->rhs_type;
@@ -439,7 +422,7 @@ void __ubsan_handle_builtin_unreachable(struct unreachable_data *data)
EXPORT_SYMBOL(__ubsan_handle_builtin_unreachable);
void __ubsan_handle_load_invalid_value(struct invalid_value_data *data,
- unsigned long val)
+ void *val)
{
unsigned long flags;
char val_str[VALUE_LENGTH];
diff --git a/lib/ubsan.h b/lib/ubsan.h
index f4d8d0bd4016..b8fa83864467 100644
--- a/lib/ubsan.h
+++ b/lib/ubsan.h
@@ -57,11 +57,6 @@ struct nonnull_arg_data {
int arg_index;
};
-struct vla_bound_data {
- struct source_location location;
- struct type_descriptor *type;
-};
-
struct out_of_bounds_data {
struct source_location location;
struct type_descriptor *array_type;
diff --git a/mm/gup.c b/mm/gup.c
index f84e22685aaa..91819b8ad9cc 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -160,8 +160,12 @@ retry:
goto retry;
}
- if (flags & FOLL_GET)
- get_page(page);
+ if (flags & FOLL_GET) {
+ if (unlikely(!try_get_page(page))) {
+ page = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+ }
if (flags & FOLL_TOUCH) {
if ((flags & FOLL_WRITE) &&
!pte_dirty(pte) && !PageDirty(page))
@@ -298,7 +302,10 @@ retry_locked:
if (pmd_trans_unstable(pmd))
ret = -EBUSY;
} else {
- get_page(page);
+ if (unlikely(!try_get_page(page))) {
+ spin_unlock(ptl);
+ return ERR_PTR(-ENOMEM);
+ }
spin_unlock(ptl);
lock_page(page);
ret = split_huge_page(page);
@@ -500,7 +507,10 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address,
if (is_device_public_page(*page))
goto unmap;
}
- get_page(*page);
+ if (unlikely(!try_get_page(*page))) {
+ ret = -ENOMEM;
+ goto unmap;
+ }
out:
ret = 0;
unmap:
@@ -1545,6 +1555,20 @@ static void undo_dev_pagemap(int *nr, int nr_start, struct page **pages)
}
}
+/*
+ * Return the compund head page with ref appropriately incremented,
+ * or NULL if that failed.
+ */
+static inline struct page *try_get_compound_head(struct page *page, int refs)
+{
+ struct page *head = compound_head(page);
+ if (WARN_ON_ONCE(page_ref_count(head) < 0))
+ return NULL;
+ if (unlikely(!page_cache_add_speculative(head, refs)))
+ return NULL;
+ return head;
+}
+
#ifdef CONFIG_ARCH_HAS_PTE_SPECIAL
static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
int write, struct page **pages, int *nr)
@@ -1579,9 +1603,9 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
page = pte_page(pte);
- head = compound_head(page);
- if (!page_cache_get_speculative(head))
+ head = try_get_compound_head(page, 1);
+ if (!head)
goto pte_unmap;
if (unlikely(pte_val(pte) != pte_val(*ptep))) {
@@ -1720,8 +1744,8 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
refs++;
} while (addr += PAGE_SIZE, addr != end);
- head = compound_head(pmd_page(orig));
- if (!page_cache_add_speculative(head, refs)) {
+ head = try_get_compound_head(pmd_page(orig), refs);
+ if (!head) {
*nr -= refs;
return 0;
}
@@ -1758,8 +1782,8 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
refs++;
} while (addr += PAGE_SIZE, addr != end);
- head = compound_head(pud_page(orig));
- if (!page_cache_add_speculative(head, refs)) {
+ head = try_get_compound_head(pud_page(orig), refs);
+ if (!head) {
*nr -= refs;
return 0;
}
@@ -1795,8 +1819,8 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
refs++;
} while (addr += PAGE_SIZE, addr != end);
- head = compound_head(pgd_page(orig));
- if (!page_cache_add_speculative(head, refs)) {
+ head = try_get_compound_head(pgd_page(orig), refs);
+ if (!head) {
*nr -= refs;
return 0;
}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 165ea46bf149..b6a34b32d8ac 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1677,7 +1677,7 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
struct mm_struct *mm = tlb->mm;
bool ret = false;
- tlb_remove_check_page_size_change(tlb, HPAGE_PMD_SIZE);
+ tlb_change_page_size(tlb, HPAGE_PMD_SIZE);
ptl = pmd_trans_huge_lock(pmd, vma);
if (!ptl)
@@ -1753,7 +1753,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
pmd_t orig_pmd;
spinlock_t *ptl;
- tlb_remove_check_page_size_change(tlb, HPAGE_PMD_SIZE);
+ tlb_change_page_size(tlb, HPAGE_PMD_SIZE);
ptl = __pmd_trans_huge_lock(pmd, vma);
if (!ptl)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 97b1e0290c66..641cedfc8c0f 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3353,7 +3353,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
* This is a hugetlb vma, all the pte entries should point
* to huge page.
*/
- tlb_remove_check_page_size_change(tlb, sz);
+ tlb_change_page_size(tlb, sz);
tlb_start_vma(tlb, vma);
/*
@@ -4299,6 +4299,19 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
pfn_offset = (vaddr & ~huge_page_mask(h)) >> PAGE_SHIFT;
page = pte_page(huge_ptep_get(pte));
+
+ /*
+ * Instead of doing 'try_get_page()' below in the same_page
+ * loop, just check the count once here.
+ */
+ if (unlikely(page_count(page) <= 0)) {
+ if (pages) {
+ spin_unlock(ptl);
+ remainder = 0;
+ err = -ENOMEM;
+ break;
+ }
+ }
same_page:
if (pages) {
pages[i] = mem_map_offset(page, pfn_offset);
diff --git a/mm/kasan/Makefile b/mm/kasan/Makefile
index 5d1065efbd47..613dfe681e9f 100644
--- a/mm/kasan/Makefile
+++ b/mm/kasan/Makefile
@@ -2,11 +2,13 @@
KASAN_SANITIZE := n
UBSAN_SANITIZE_common.o := n
UBSAN_SANITIZE_generic.o := n
+UBSAN_SANITIZE_generic_report.o := n
UBSAN_SANITIZE_tags.o := n
KCOV_INSTRUMENT := n
CFLAGS_REMOVE_common.o = -pg
CFLAGS_REMOVE_generic.o = -pg
+CFLAGS_REMOVE_generic_report.o = -pg
CFLAGS_REMOVE_tags.o = -pg
# Function splitter causes unnecessary splits in __asan_load1/__asan_store1
@@ -14,6 +16,7 @@ CFLAGS_REMOVE_tags.o = -pg
CFLAGS_common.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector)
CFLAGS_generic.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector)
+CFLAGS_generic_report.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector)
CFLAGS_tags.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector)
obj-$(CONFIG_KASAN) := common.o init.o report.o
diff --git a/mm/kasan/common.c b/mm/kasan/common.c
index 303a7379d2a3..36afcf64e016 100644
--- a/mm/kasan/common.c
+++ b/mm/kasan/common.c
@@ -36,6 +36,7 @@
#include <linux/types.h>
#include <linux/vmalloc.h>
#include <linux/bug.h>
+#include <linux/uaccess.h>
#include "kasan.h"
#include "../slab.h"
@@ -605,6 +606,15 @@ void kasan_free_shadow(const struct vm_struct *vm)
vfree(kasan_mem_to_shadow(vm->addr));
}
+extern void __kasan_report(unsigned long addr, size_t size, bool is_write, unsigned long ip);
+
+void kasan_report(unsigned long addr, size_t size, bool is_write, unsigned long ip)
+{
+ unsigned long flags = user_access_save();
+ __kasan_report(addr, size, is_write, ip);
+ user_access_restore(flags);
+}
+
#ifdef CONFIG_MEMORY_HOTPLUG
static bool shadow_mapped(unsigned long addr)
{
diff --git a/mm/kasan/report.c b/mm/kasan/report.c
index 882d77568e7e..03a443579386 100644
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c
@@ -282,8 +282,7 @@ void kasan_report_invalid_free(void *object, unsigned long ip)
end_report(&flags);
}
-void kasan_report(unsigned long addr, size_t size,
- bool is_write, unsigned long ip)
+void __kasan_report(unsigned long addr, size_t size, bool is_write, unsigned long ip)
{
struct kasan_access_info info;
void *tagged_addr;
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index d12b35de1e7e..e57bf810f798 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -1388,6 +1388,7 @@ static void scan_block(void *_start, void *_end,
/*
* Scan a large memory block in MAX_SCAN_SIZE chunks to reduce the latency.
*/
+#ifdef CONFIG_SMP
static void scan_large_block(void *start, void *end)
{
void *next;
@@ -1399,6 +1400,7 @@ static void scan_large_block(void *start, void *end)
cond_resched();
}
}
+#endif
/*
* Scan a memory block corresponding to a kmemleak_object. A condition is
diff --git a/mm/madvise.c b/mm/madvise.c
index 21a7881a2db4..bb3a4554d5d5 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -328,7 +328,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
if (pmd_trans_unstable(pmd))
return 0;
- tlb_remove_check_page_size_change(tlb, PAGE_SIZE);
+ tlb_change_page_size(tlb, PAGE_SIZE);
orig_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
flush_tlb_batched_pending(mm);
arch_enter_lazy_mmu_mode();
diff --git a/mm/memory.c b/mm/memory.c
index ab650c21bccd..36aac6844662 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -356,7 +356,7 @@ void free_pgd_range(struct mmu_gather *tlb,
* We add page table cache pages with PAGE_SIZE,
* (see pte_free_tlb()), flush the tlb if we need
*/
- tlb_remove_check_page_size_change(tlb, PAGE_SIZE);
+ tlb_change_page_size(tlb, PAGE_SIZE);
pgd = pgd_offset(tlb->mm, addr);
do {
next = pgd_addr_end(addr, end);
@@ -1046,7 +1046,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
pte_t *pte;
swp_entry_t entry;
- tlb_remove_check_page_size_change(tlb, PAGE_SIZE);
+ tlb_change_page_size(tlb, PAGE_SIZE);
again:
init_rss_vec(rss);
start_pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
@@ -1155,7 +1155,7 @@ again:
*/
if (force_flush) {
force_flush = 0;
- tlb_flush_mmu_free(tlb);
+ tlb_flush_mmu(tlb);
if (addr != end)
goto again;
}
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 0082d699be94..b236069ff0d8 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -874,6 +874,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
*/
mem = find_memory_block(__pfn_to_section(pfn));
nid = mem->nid;
+ put_device(&mem->dev);
/* associate pfn range with the zone */
zone = move_pfn_range(online_type, nid, pfn, nr_pages);
diff --git a/mm/mmap.c b/mm/mmap.c
index 41eb48d9b527..bd7b9f293b39 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -45,6 +45,7 @@
#include <linux/moduleparam.h>
#include <linux/pkeys.h>
#include <linux/oom.h>
+#include <linux/sched/mm.h>
#include <linux/uaccess.h>
#include <asm/cacheflush.h>
@@ -2525,7 +2526,8 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
vma = find_vma_prev(mm, addr, &prev);
if (vma && (vma->vm_start <= addr))
return vma;
- if (!prev || expand_stack(prev, addr))
+ /* don't alter vm_end if the coredump is running */
+ if (!prev || !mmget_still_valid(mm) || expand_stack(prev, addr))
return NULL;
if (prev->vm_flags & VM_LOCKED)
populate_vma_page_range(prev, addr, prev->vm_end, NULL);
@@ -2551,6 +2553,9 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
return vma;
if (!(vma->vm_flags & VM_GROWSDOWN))
return NULL;
+ /* don't alter vm_start if the coredump is running */
+ if (!mmget_still_valid(mm))
+ return NULL;
start = vma->vm_start;
if (expand_stack(vma, addr))
return NULL;
diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c
index f2f03c655807..99740e1dd273 100644
--- a/mm/mmu_gather.c
+++ b/mm/mmu_gather.c
@@ -11,7 +11,7 @@
#include <asm/pgalloc.h>
#include <asm/tlb.h>
-#ifdef HAVE_GENERIC_MMU_GATHER
+#ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER
static bool tlb_next_batch(struct mmu_gather *tlb)
{
@@ -41,35 +41,10 @@ static bool tlb_next_batch(struct mmu_gather *tlb)
return true;
}
-void arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
- unsigned long start, unsigned long end)
-{
- tlb->mm = mm;
-
- /* Is it from 0 to ~0? */
- tlb->fullmm = !(start | (end+1));
- tlb->need_flush_all = 0;
- tlb->local.next = NULL;
- tlb->local.nr = 0;
- tlb->local.max = ARRAY_SIZE(tlb->__pages);
- tlb->active = &tlb->local;
- tlb->batch_count = 0;
-
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
- tlb->batch = NULL;
-#endif
- tlb->page_size = 0;
-
- __tlb_reset_range(tlb);
-}
-
-void tlb_flush_mmu_free(struct mmu_gather *tlb)
+static void tlb_batch_pages_flush(struct mmu_gather *tlb)
{
struct mmu_gather_batch *batch;
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
- tlb_table_flush(tlb);
-#endif
for (batch = &tlb->local; batch && batch->nr; batch = batch->next) {
free_pages_and_swap_cache(batch->pages, batch->nr);
batch->nr = 0;
@@ -77,31 +52,10 @@ void tlb_flush_mmu_free(struct mmu_gather *tlb)
tlb->active = &tlb->local;
}
-void tlb_flush_mmu(struct mmu_gather *tlb)
-{
- tlb_flush_mmu_tlbonly(tlb);
- tlb_flush_mmu_free(tlb);
-}
-
-/* tlb_finish_mmu
- * Called at the end of the shootdown operation to free up any resources
- * that were required.
- */
-void arch_tlb_finish_mmu(struct mmu_gather *tlb,
- unsigned long start, unsigned long end, bool force)
+static void tlb_batch_list_free(struct mmu_gather *tlb)
{
struct mmu_gather_batch *batch, *next;
- if (force) {
- __tlb_reset_range(tlb);
- __tlb_adjust_range(tlb, start, end - start);
- }
-
- tlb_flush_mmu(tlb);
-
- /* keep the page table cache within bounds */
- check_pgt_cache();
-
for (batch = tlb->local.next; batch; batch = next) {
next = batch->next;
free_pages((unsigned long)batch, 0);
@@ -109,19 +63,15 @@ void arch_tlb_finish_mmu(struct mmu_gather *tlb,
tlb->local.next = NULL;
}
-/* __tlb_remove_page
- * Must perform the equivalent to __free_pte(pte_get_and_clear(ptep)), while
- * handling the additional races in SMP caused by other CPUs caching valid
- * mappings in their TLBs. Returns the number of free page slots left.
- * When out of page slots we must call tlb_flush_mmu().
- *returns true if the caller should flush.
- */
bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size)
{
struct mmu_gather_batch *batch;
VM_BUG_ON(!tlb->end);
+
+#ifdef CONFIG_HAVE_MMU_GATHER_PAGE_SIZE
VM_WARN_ON(tlb->page_size != page_size);
+#endif
batch = tlb->active;
/*
@@ -139,7 +89,7 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_
return false;
}
-#endif /* HAVE_GENERIC_MMU_GATHER */
+#endif /* HAVE_MMU_GATHER_NO_GATHER */
#ifdef CONFIG_HAVE_RCU_TABLE_FREE
@@ -152,7 +102,7 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_
*/
static inline void tlb_table_invalidate(struct mmu_gather *tlb)
{
-#ifdef CONFIG_HAVE_RCU_TABLE_INVALIDATE
+#ifndef CONFIG_HAVE_RCU_TABLE_NO_INVALIDATE
/*
* Invalidate page-table caches used by hardware walkers. Then we still
* need to RCU-sched wait while freeing the pages because software
@@ -193,7 +143,7 @@ static void tlb_remove_table_rcu(struct rcu_head *head)
free_page((unsigned long)batch);
}
-void tlb_table_flush(struct mmu_gather *tlb)
+static void tlb_table_flush(struct mmu_gather *tlb)
{
struct mmu_table_batch **batch = &tlb->batch;
@@ -225,6 +175,22 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
#endif /* CONFIG_HAVE_RCU_TABLE_FREE */
+static void tlb_flush_mmu_free(struct mmu_gather *tlb)
+{
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+ tlb_table_flush(tlb);
+#endif
+#ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER
+ tlb_batch_pages_flush(tlb);
+#endif
+}
+
+void tlb_flush_mmu(struct mmu_gather *tlb)
+{
+ tlb_flush_mmu_tlbonly(tlb);
+ tlb_flush_mmu_free(tlb);
+}
+
/**
* tlb_gather_mmu - initialize an mmu_gather structure for page-table tear-down
* @tlb: the mmu_gather structure to initialize
@@ -240,10 +206,40 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
unsigned long start, unsigned long end)
{
- arch_tlb_gather_mmu(tlb, mm, start, end);
+ tlb->mm = mm;
+
+ /* Is it from 0 to ~0? */
+ tlb->fullmm = !(start | (end+1));
+
+#ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER
+ tlb->need_flush_all = 0;
+ tlb->local.next = NULL;
+ tlb->local.nr = 0;
+ tlb->local.max = ARRAY_SIZE(tlb->__pages);
+ tlb->active = &tlb->local;
+ tlb->batch_count = 0;
+#endif
+
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+ tlb->batch = NULL;
+#endif
+#ifdef CONFIG_HAVE_MMU_GATHER_PAGE_SIZE
+ tlb->page_size = 0;
+#endif
+
+ __tlb_reset_range(tlb);
inc_tlb_flush_pending(tlb->mm);
}
+/**
+ * tlb_finish_mmu - finish an mmu_gather structure
+ * @tlb: the mmu_gather structure to finish
+ * @start: start of the region that will be removed from the page-table
+ * @end: end of the region that will be removed from the page-table
+ *
+ * Called at the end of the shootdown operation to free up any resources that
+ * were required.
+ */
void tlb_finish_mmu(struct mmu_gather *tlb,
unsigned long start, unsigned long end)
{
@@ -254,8 +250,17 @@ void tlb_finish_mmu(struct mmu_gather *tlb,
* the TLB by observing pte_none|!pte_dirty, for example so flush TLB
* forcefully if we detect parallel PTE batching threads.
*/
- bool force = mm_tlb_flush_nested(tlb->mm);
+ if (mm_tlb_flush_nested(tlb->mm)) {
+ __tlb_reset_range(tlb);
+ __tlb_adjust_range(tlb, start, end - start);
+ }
- arch_tlb_finish_mmu(tlb, start, end, force);
+ tlb_flush_mmu(tlb);
+
+ /* keep the page table cache within bounds */
+ check_pgt_cache();
+#ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER
+ tlb_batch_list_free(tlb);
+#endif
dec_tlb_flush_pending(tlb->mm);
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d96ca5bc555b..c02cff1ed56e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -266,7 +266,20 @@ compound_page_dtor * const compound_page_dtors[] = {
int min_free_kbytes = 1024;
int user_min_free_kbytes = -1;
+#ifdef CONFIG_DISCONTIGMEM
+/*
+ * DiscontigMem defines memory ranges as separate pg_data_t even if the ranges
+ * are not on separate NUMA nodes. Functionally this works but with
+ * watermark_boost_factor, it can reclaim prematurely as the ranges can be
+ * quite small. By default, do not boost watermarks on discontigmem as in
+ * many cases very high-order allocations like THP are likely to be
+ * unsupported and the premature reclaim offsets the advantage of long-term
+ * fragmentation avoidance.
+ */
+int watermark_boost_factor __read_mostly;
+#else
int watermark_boost_factor __read_mostly = 15000;
+#endif
int watermark_scale_factor = 10;
static unsigned long nr_kernel_pages __initdata;
@@ -3419,8 +3432,11 @@ alloc_flags_nofragment(struct zone *zone, gfp_t gfp_mask)
alloc_flags |= ALLOC_KSWAPD;
#ifdef CONFIG_ZONE_DMA32
+ if (!zone)
+ return alloc_flags;
+
if (zone_idx(zone) != ZONE_NORMAL)
- goto out;
+ return alloc_flags;
/*
* If ZONE_DMA32 exists, assume it is the one after ZONE_NORMAL and
@@ -3429,9 +3445,9 @@ alloc_flags_nofragment(struct zone *zone, gfp_t gfp_mask)
*/
BUILD_BUG_ON(ZONE_NORMAL - ZONE_DMA32 != 1);
if (nr_online_nodes > 1 && !populated_zone(--zone))
- goto out;
+ return alloc_flags;
-out:
+ alloc_flags |= ALLOC_NOFRAGMENT;
#endif /* CONFIG_ZONE_DMA32 */
return alloc_flags;
}
@@ -3773,11 +3789,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
memalloc_noreclaim_restore(noreclaim_flag);
psi_memstall_leave(&pflags);
- if (*compact_result <= COMPACT_INACTIVE) {
- WARN_ON_ONCE(page);
- return NULL;
- }
-
/*
* At least in one zone compaction wasn't deferred or skipped, so let's
* count a compaction stall
@@ -8005,7 +8016,10 @@ void *__init alloc_large_system_hash(const char *tablename,
bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
int migratetype, int flags)
{
- unsigned long pfn, iter, found;
+ unsigned long found;
+ unsigned long iter = 0;
+ unsigned long pfn = page_to_pfn(page);
+ const char *reason = "unmovable page";
/*
* TODO we could make this much more efficient by not checking every
@@ -8015,17 +8029,20 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
* can still lead to having bootmem allocations in zone_movable.
*/
- /*
- * CMA allocations (alloc_contig_range) really need to mark isolate
- * CMA pageblocks even when they are not movable in fact so consider
- * them movable here.
- */
- if (is_migrate_cma(migratetype) &&
- is_migrate_cma(get_pageblock_migratetype(page)))
- return false;
+ if (is_migrate_cma_page(page)) {
+ /*
+ * CMA allocations (alloc_contig_range) really need to mark
+ * isolate CMA pageblocks even when they are not movable in fact
+ * so consider them movable here.
+ */
+ if (is_migrate_cma(migratetype))
+ return false;
+
+ reason = "CMA page";
+ goto unmovable;
+ }
- pfn = page_to_pfn(page);
- for (found = 0, iter = 0; iter < pageblock_nr_pages; iter++) {
+ for (found = 0; iter < pageblock_nr_pages; iter++) {
unsigned long check = pfn + iter;
if (!pfn_valid_within(check))
@@ -8105,7 +8122,7 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
unmovable:
WARN_ON_ONCE(zone_idx(zone) == ZONE_MOVABLE);
if (flags & REPORT_FAILURE)
- dump_page(pfn_to_page(pfn+iter), "unmovable page");
+ dump_page(pfn_to_page(pfn + iter), reason);
return true;
}
diff --git a/mm/percpu.c b/mm/percpu.c
index 2e6fc8d552c9..68dd2e7e73b5 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -2567,8 +2567,8 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
ai->groups[group].base_offset = areas[group] - base;
}
- pr_info("Embedded %zu pages/cpu @%p s%zu r%zu d%zu u%zu\n",
- PFN_DOWN(size_sum), base, ai->static_size, ai->reserved_size,
+ pr_info("Embedded %zu pages/cpu s%zu r%zu d%zu u%zu\n",
+ PFN_DOWN(size_sum), ai->static_size, ai->reserved_size,
ai->dyn_size, ai->unit_size);
rc = pcpu_setup_first_chunk(ai, base);
@@ -2692,8 +2692,8 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
}
/* we're ready, commit */
- pr_info("%d %s pages/cpu @%p s%zu r%zu d%zu\n",
- unit_pages, psize_str, vm.addr, ai->static_size,
+ pr_info("%d %s pages/cpu s%zu r%zu d%zu\n",
+ unit_pages, psize_str, ai->static_size,
ai->reserved_size, ai->dyn_size);
rc = pcpu_setup_first_chunk(ai, vm.addr);
diff --git a/mm/shmem.c b/mm/shmem.c
index b3db3779a30a..2275a0ff7c30 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1081,9 +1081,14 @@ static void shmem_evict_inode(struct inode *inode)
}
spin_unlock(&sbinfo->shrinklist_lock);
}
- if (!list_empty(&info->swaplist)) {
+ while (!list_empty(&info->swaplist)) {
+ /* Wait while shmem_unuse() is scanning this inode... */
+ wait_var_event(&info->stop_eviction,
+ !atomic_read(&info->stop_eviction));
mutex_lock(&shmem_swaplist_mutex);
- list_del_init(&info->swaplist);
+ /* ...but beware of the race if we peeked too early */
+ if (!atomic_read(&info->stop_eviction))
+ list_del_init(&info->swaplist);
mutex_unlock(&shmem_swaplist_mutex);
}
}
@@ -1099,10 +1104,11 @@ extern struct swap_info_struct *swap_info[];
static int shmem_find_swap_entries(struct address_space *mapping,
pgoff_t start, unsigned int nr_entries,
struct page **entries, pgoff_t *indices,
- bool frontswap)
+ unsigned int type, bool frontswap)
{
XA_STATE(xas, &mapping->i_pages, start);
struct page *page;
+ swp_entry_t entry;
unsigned int ret = 0;
if (!nr_entries)
@@ -1116,13 +1122,12 @@ static int shmem_find_swap_entries(struct address_space *mapping,
if (!xa_is_value(page))
continue;
- if (frontswap) {
- swp_entry_t entry = radix_to_swp_entry(page);
-
- if (!frontswap_test(swap_info[swp_type(entry)],
- swp_offset(entry)))
- continue;
- }
+ entry = radix_to_swp_entry(page);
+ if (swp_type(entry) != type)
+ continue;
+ if (frontswap &&
+ !frontswap_test(swap_info[type], swp_offset(entry)))
+ continue;
indices[ret] = xas.xa_index;
entries[ret] = page;
@@ -1194,7 +1199,7 @@ static int shmem_unuse_inode(struct inode *inode, unsigned int type,
pvec.nr = shmem_find_swap_entries(mapping, start, nr_entries,
pvec.pages, indices,
- frontswap);
+ type, frontswap);
if (pvec.nr == 0) {
ret = 0;
break;
@@ -1227,36 +1232,27 @@ int shmem_unuse(unsigned int type, bool frontswap,
unsigned long *fs_pages_to_unuse)
{
struct shmem_inode_info *info, *next;
- struct inode *inode;
- struct inode *prev_inode = NULL;
int error = 0;
if (list_empty(&shmem_swaplist))
return 0;
mutex_lock(&shmem_swaplist_mutex);
-
- /*
- * The extra refcount on the inode is necessary to safely dereference
- * p->next after re-acquiring the lock. New shmem inodes with swap
- * get added to the end of the list and we will scan them all.
- */
list_for_each_entry_safe(info, next, &shmem_swaplist, swaplist) {
if (!info->swapped) {
list_del_init(&info->swaplist);
continue;
}
-
- inode = igrab(&info->vfs_inode);
- if (!inode)
- continue;
-
+ /*
+ * Drop the swaplist mutex while searching the inode for swap;
+ * but before doing so, make sure shmem_evict_inode() will not
+ * remove placeholder inode from swaplist, nor let it be freed
+ * (igrab() would protect from unlink, but not from unmount).
+ */
+ atomic_inc(&info->stop_eviction);
mutex_unlock(&shmem_swaplist_mutex);
- if (prev_inode)
- iput(prev_inode);
- prev_inode = inode;
- error = shmem_unuse_inode(inode, type, frontswap,
+ error = shmem_unuse_inode(&info->vfs_inode, type, frontswap,
fs_pages_to_unuse);
cond_resched();
@@ -1264,14 +1260,13 @@ int shmem_unuse(unsigned int type, bool frontswap,
next = list_next_entry(info, swaplist);
if (!info->swapped)
list_del_init(&info->swaplist);
+ if (atomic_dec_and_test(&info->stop_eviction))
+ wake_up_var(&info->stop_eviction);
if (error)
break;
}
mutex_unlock(&shmem_swaplist_mutex);
- if (prev_inode)
- iput(prev_inode);
-
return error;
}
@@ -2238,6 +2233,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
info = SHMEM_I(inode);
memset(info, 0, (char *)inode - (char *)info);
spin_lock_init(&info->lock);
+ atomic_set(&info->stop_eviction, 0);
info->seals = F_SEAL_SEAL;
info->flags = flags & VM_NORESERVE;
INIT_LIST_HEAD(&info->shrinklist);
diff --git a/mm/slab.c b/mm/slab.c
index 47a380a486ee..9142ee992493 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2374,7 +2374,6 @@ static void *alloc_slabmgmt(struct kmem_cache *cachep,
/* Slab management obj is off-slab. */
freelist = kmem_cache_alloc_node(cachep->freelist_cache,
local_flags, nodeid);
- freelist = kasan_reset_tag(freelist);
if (!freelist)
return NULL;
} else {
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 2b8d9c3fbb47..cf63b5f01adf 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2023,7 +2023,6 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si,
* If the boolean frontswap is true, only unuse pages_to_unuse pages;
* pages_to_unuse==0 means all pages; ignored if frontswap is false
*/
-#define SWAP_UNUSE_MAX_TRIES 3
int try_to_unuse(unsigned int type, bool frontswap,
unsigned long pages_to_unuse)
{
@@ -2035,7 +2034,6 @@ int try_to_unuse(unsigned int type, bool frontswap,
struct page *page;
swp_entry_t entry;
unsigned int i;
- int retries = 0;
if (!si->inuse_pages)
return 0;
@@ -2053,11 +2051,9 @@ retry:
spin_lock(&mmlist_lock);
p = &init_mm.mmlist;
- while ((p = p->next) != &init_mm.mmlist) {
- if (signal_pending(current)) {
- retval = -EINTR;
- break;
- }
+ while (si->inuse_pages &&
+ !signal_pending(current) &&
+ (p = p->next) != &init_mm.mmlist) {
mm = list_entry(p, struct mm_struct, mmlist);
if (!mmget_not_zero(mm))
@@ -2084,7 +2080,9 @@ retry:
mmput(prev_mm);
i = 0;
- while ((i = find_next_to_unuse(si, i, frontswap)) != 0) {
+ while (si->inuse_pages &&
+ !signal_pending(current) &&
+ (i = find_next_to_unuse(si, i, frontswap)) != 0) {
entry = swp_entry(type, i);
page = find_get_page(swap_address_space(entry), i);
@@ -2117,14 +2115,18 @@ retry:
* If yes, we would need to do retry the unuse logic again.
* Under global memory pressure, swap entries can be reinserted back
* into process space after the mmlist loop above passes over them.
- * Its not worth continuosuly retrying to unuse the swap in this case.
- * So we try SWAP_UNUSE_MAX_TRIES times.
+ *
+ * Limit the number of retries? No: when mmget_not_zero() above fails,
+ * that mm is likely to be freeing swap from exit_mmap(), which proceeds
+ * at its own independent pace; and even shmem_writepage() could have
+ * been preempted after get_swap_page(), temporarily hiding that swap.
+ * It's easy and robust (though cpu-intensive) just to keep retrying.
*/
- if (++retries >= SWAP_UNUSE_MAX_TRIES)
- retval = -EBUSY;
- else if (si->inuse_pages)
- goto retry;
-
+ if (si->inuse_pages) {
+ if (!signal_pending(current))
+ goto retry;
+ retval = -EINTR;
+ }
out:
return (retval == FRONTSWAP_PAGES_UNUSED) ? 0 : retval;
}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index a5ad0b35ab8e..a815f73ee4d5 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2176,7 +2176,6 @@ static void shrink_active_list(unsigned long nr_to_scan,
* 10TB 320 32GB
*/
static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
- struct mem_cgroup *memcg,
struct scan_control *sc, bool actual_reclaim)
{
enum lru_list active_lru = file * LRU_FILE + LRU_ACTIVE;
@@ -2197,16 +2196,12 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
inactive = lruvec_lru_size(lruvec, inactive_lru, sc->reclaim_idx);
active = lruvec_lru_size(lruvec, active_lru, sc->reclaim_idx);
- if (memcg)
- refaults = memcg_page_state(memcg, WORKINGSET_ACTIVATE);
- else
- refaults = node_page_state(pgdat, WORKINGSET_ACTIVATE);
-
/*
* When refaults are being observed, it means a new workingset
* is being established. Disable active list protection to get
* rid of the stale workingset quickly.
*/
+ refaults = lruvec_page_state(lruvec, WORKINGSET_ACTIVATE);
if (file && actual_reclaim && lruvec->refaults != refaults) {
inactive_ratio = 0;
} else {
@@ -2227,12 +2222,10 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
}
static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
- struct lruvec *lruvec, struct mem_cgroup *memcg,
- struct scan_control *sc)
+ struct lruvec *lruvec, struct scan_control *sc)
{
if (is_active_lru(lru)) {
- if (inactive_list_is_low(lruvec, is_file_lru(lru),
- memcg, sc, true))
+ if (inactive_list_is_low(lruvec, is_file_lru(lru), sc, true))
shrink_active_list(nr_to_scan, lruvec, sc, lru);
return 0;
}
@@ -2332,7 +2325,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
* anonymous pages on the LRU in eligible zones.
* Otherwise, the small LRU gets thrashed.
*/
- if (!inactive_list_is_low(lruvec, false, memcg, sc, false) &&
+ if (!inactive_list_is_low(lruvec, false, sc, false) &&
lruvec_lru_size(lruvec, LRU_INACTIVE_ANON, sc->reclaim_idx)
>> sc->priority) {
scan_balance = SCAN_ANON;
@@ -2350,7 +2343,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
* lruvec even if it has plenty of old anonymous pages unless the
* system is under heavy pressure.
*/
- if (!inactive_list_is_low(lruvec, true, memcg, sc, false) &&
+ if (!inactive_list_is_low(lruvec, true, sc, false) &&
lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, sc->reclaim_idx) >> sc->priority) {
scan_balance = SCAN_FILE;
goto out;
@@ -2503,7 +2496,7 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc
nr[lru] -= nr_to_scan;
nr_reclaimed += shrink_list(lru, nr_to_scan,
- lruvec, memcg, sc);
+ lruvec, sc);
}
}
@@ -2570,7 +2563,7 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc
* Even if we did not try to evict anon pages at all, we want to
* rebalance the anon lru active/inactive ratio.
*/
- if (inactive_list_is_low(lruvec, false, memcg, sc, true))
+ if (inactive_list_is_low(lruvec, false, sc, true))
shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
sc, LRU_ACTIVE_ANON);
}
@@ -2969,12 +2962,8 @@ static void snapshot_refaults(struct mem_cgroup *root_memcg, pg_data_t *pgdat)
unsigned long refaults;
struct lruvec *lruvec;
- if (memcg)
- refaults = memcg_page_state(memcg, WORKINGSET_ACTIVATE);
- else
- refaults = node_page_state(pgdat, WORKINGSET_ACTIVATE);
-
lruvec = mem_cgroup_lruvec(pgdat, memcg);
+ refaults = lruvec_page_state(lruvec, WORKINGSET_ACTIVATE);
lruvec->refaults = refaults;
} while ((memcg = mem_cgroup_iter(root_memcg, memcg, NULL)));
}
@@ -3339,7 +3328,7 @@ static void age_active_anon(struct pglist_data *pgdat,
do {
struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, memcg);
- if (inactive_list_is_low(lruvec, false, memcg, sc, true))
+ if (inactive_list_is_low(lruvec, false, sc, true))
shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
sc, LRU_ACTIVE_ANON);
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 36b56f858f0f..a7d493366a65 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1274,13 +1274,8 @@ const char * const vmstat_text[] = {
#endif
#endif /* CONFIG_MEMORY_BALLOON */
#ifdef CONFIG_DEBUG_TLBFLUSH
-#ifdef CONFIG_SMP
"nr_tlb_remote_flush",
"nr_tlb_remote_flush_received",
-#else
- "", /* nr_tlb_remote_flush */
- "", /* nr_tlb_remote_flush_received */
-#endif /* CONFIG_SMP */
"nr_tlb_local_flush_all",
"nr_tlb_local_flush_one",
#endif /* CONFIG_DEBUG_TLBFLUSH */
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 709d2542f729..dbe8b1993be9 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1920,6 +1920,7 @@ static int __init atalk_init(void)
ddp_dl = register_snap_client(ddp_snap_id, atalk_rcv);
if (!ddp_dl) {
pr_crit("Unable to register DDP with SNAP.\n");
+ rc = -ENOMEM;
goto out_sock;
}
diff --git a/net/atm/lec.c b/net/atm/lec.c
index d7f5cf5b7594..ad4f829193f0 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -710,7 +710,10 @@ static int lec_vcc_attach(struct atm_vcc *vcc, void __user *arg)
static int lec_mcast_attach(struct atm_vcc *vcc, int arg)
{
- if (arg < 0 || arg >= MAX_LEC_ITF || !dev_lec[arg])
+ if (arg < 0 || arg >= MAX_LEC_ITF)
+ return -EINVAL;
+ arg = array_index_nospec(arg, MAX_LEC_ITF);
+ if (!dev_lec[arg])
return -EINVAL;
vcc->proto_data = dev_lec[arg];
return lec_mcast_make(netdev_priv(dev_lec[arg]), vcc);
@@ -728,6 +731,7 @@ static int lecd_attach(struct atm_vcc *vcc, int arg)
i = arg;
if (arg >= MAX_LEC_ITF)
return -EINVAL;
+ i = array_index_nospec(arg, MAX_LEC_ITF);
if (!dev_lec[i]) {
int size;
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 9a580999ca57..d892b7c3cc42 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -523,12 +523,12 @@ static int sco_sock_bind(struct socket *sock, struct sockaddr *addr,
struct sock *sk = sock->sk;
int err = 0;
- BT_DBG("sk %p %pMR", sk, &sa->sco_bdaddr);
-
if (!addr || addr_len < sizeof(struct sockaddr_sco) ||
addr->sa_family != AF_BLUETOOTH)
return -EINVAL;
+ BT_DBG("sk %p %pMR", sk, &sa->sco_bdaddr);
+
lock_sock(sk);
if (sk->sk_state != BT_OPEN) {
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 5ea7e56119c1..ba303ee99b9b 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -197,13 +197,10 @@ static void __br_handle_local_finish(struct sk_buff *skb)
/* note: already called with rcu_read_lock */
static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- struct net_bridge_port *p = br_port_get_rcu(skb->dev);
-
__br_handle_local_finish(skb);
- BR_INPUT_SKB_CB(skb)->brdev = p->br->dev;
- br_pass_frame_up(skb);
- return 0;
+ /* return 1 to signal the okfn() was called so it's ok to use the skb */
+ return 1;
}
/*
@@ -280,10 +277,18 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
goto forward;
}
- /* Deliver packet to local host only */
- NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, dev_net(skb->dev),
- NULL, skb, skb->dev, NULL, br_handle_local_finish);
- return RX_HANDLER_CONSUMED;
+ /* The else clause should be hit when nf_hook():
+ * - returns < 0 (drop/error)
+ * - returns = 0 (stolen/nf_queue)
+ * Thus return 1 from the okfn() to signal the skb is ok to pass
+ */
+ if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN,
+ dev_net(skb->dev), NULL, skb, skb->dev, NULL,
+ br_handle_local_finish) == 1) {
+ return RX_HANDLER_PASS;
+ } else {
+ return RX_HANDLER_CONSUMED;
+ }
}
forward:
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 02da21d771c9..45e7f4173bba 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -2031,7 +2031,8 @@ static void br_multicast_start_querier(struct net_bridge *br,
__br_multicast_open(br, query);
- list_for_each_entry(port, &br->port_list, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(port, &br->port_list, list) {
if (port->state == BR_STATE_DISABLED ||
port->state == BR_STATE_BLOCKING)
continue;
@@ -2043,6 +2044,7 @@ static void br_multicast_start_querier(struct net_bridge *br,
br_multicast_enable(&port->ip6_own_query);
#endif
}
+ rcu_read_unlock();
}
int br_multicast_toggle(struct net_bridge *br, unsigned long val)
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 9c07591b0232..7104cf13da84 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -1441,7 +1441,7 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
nla_put_u8(skb, IFLA_BR_VLAN_STATS_ENABLED,
br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) ||
nla_put_u8(skb, IFLA_BR_VLAN_STATS_PER_PORT,
- br_opt_get(br, IFLA_BR_VLAN_STATS_PER_PORT)))
+ br_opt_get(br, BROPT_VLAN_STATS_PER_PORT)))
return -EMSGSIZE;
#endif
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index eb15891f8b9f..3cad01ac64e4 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -2032,7 +2032,8 @@ static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32,
if (match_kern)
match_kern->match_size = ret;
- if (WARN_ON(type == EBT_COMPAT_TARGET && size_left))
+ /* rule should have no remaining data after target */
+ if (type == EBT_COMPAT_TARGET && size_left)
return -EINVAL;
match32 = (struct compat_ebt_entry_mwt *) buf;
diff --git a/net/core/dev.c b/net/core/dev.c
index fdcff29df915..f409406254dd 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1184,7 +1184,21 @@ int dev_change_name(struct net_device *dev, const char *newname)
BUG_ON(!dev_net(dev));
net = dev_net(dev);
- if (dev->flags & IFF_UP)
+
+ /* Some auto-enslaved devices e.g. failover slaves are
+ * special, as userspace might rename the device after
+ * the interface had been brought up and running since
+ * the point kernel initiated auto-enslavement. Allow
+ * live name change even when these slave devices are
+ * up and running.
+ *
+ * Typically, users of these auto-enslaving devices
+ * don't actually care about slave name change, as
+ * they are supposed to operate on master interface
+ * directly.
+ */
+ if (dev->flags & IFF_UP &&
+ likely(!(dev->priv_flags & IFF_LIVE_RENAME_OK)))
return -EBUSY;
write_seqcount_begin(&devnet_rename_seq);
diff --git a/net/core/failover.c b/net/core/failover.c
index 4a92a98ccce9..b5cd3c727285 100644
--- a/net/core/failover.c
+++ b/net/core/failover.c
@@ -80,14 +80,14 @@ static int failover_slave_register(struct net_device *slave_dev)
goto err_upper_link;
}
- slave_dev->priv_flags |= IFF_FAILOVER_SLAVE;
+ slave_dev->priv_flags |= (IFF_FAILOVER_SLAVE | IFF_LIVE_RENAME_OK);
if (fops && fops->slave_register &&
!fops->slave_register(slave_dev, failover_dev))
return NOTIFY_OK;
netdev_upper_dev_unlink(slave_dev, failover_dev);
- slave_dev->priv_flags &= ~IFF_FAILOVER_SLAVE;
+ slave_dev->priv_flags &= ~(IFF_FAILOVER_SLAVE | IFF_LIVE_RENAME_OK);
err_upper_link:
netdev_rx_handler_unregister(slave_dev);
done:
@@ -121,7 +121,7 @@ int failover_slave_unregister(struct net_device *slave_dev)
netdev_rx_handler_unregister(slave_dev);
netdev_upper_dev_unlink(slave_dev, failover_dev);
- slave_dev->priv_flags &= ~IFF_FAILOVER_SLAVE;
+ slave_dev->priv_flags &= ~(IFF_FAILOVER_SLAVE | IFF_LIVE_RENAME_OK);
if (fops && fops->slave_unregister &&
!fops->slave_unregister(slave_dev, failover_dev))
diff --git a/net/core/filter.c b/net/core/filter.c
index fc92ebc4e200..27e61ffd9039 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4383,6 +4383,8 @@ BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr,
* Only binding to IP is supported.
*/
err = -EINVAL;
+ if (addr_len < offsetofend(struct sockaddr, sa_family))
+ return err;
if (addr->sa_family == AF_INET) {
if (addr_len < sizeof(struct sockaddr_in))
return err;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index f8f94303a1f5..8f8b7b6c2945 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1747,20 +1747,16 @@ int netdev_register_kobject(struct net_device *ndev)
error = device_add(dev);
if (error)
- goto error_put_device;
+ return error;
error = register_queue_kobjects(ndev);
- if (error)
- goto error_device_del;
+ if (error) {
+ device_del(dev);
+ return error;
+ }
pm_runtime_set_memalloc_noio(dev, true);
- return 0;
-
-error_device_del:
- device_del(dev);
-error_put_device:
- put_device(dev);
return error;
}
diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c
index 703cf76aa7c2..7109c168b5e0 100644
--- a/net/core/ptp_classifier.c
+++ b/net/core/ptp_classifier.c
@@ -185,9 +185,10 @@ void __init ptp_classifier_init(void)
{ 0x16, 0, 0, 0x00000000 },
{ 0x06, 0, 0, 0x00000000 },
};
- struct sock_fprog_kern ptp_prog = {
- .len = ARRAY_SIZE(ptp_filter), .filter = ptp_filter,
- };
+ struct sock_fprog_kern ptp_prog;
+
+ ptp_prog.len = ARRAY_SIZE(ptp_filter);
+ ptp_prog.filter = ptp_filter;
BUG_ON(bpf_prog_create(&ptp_insns, &ptp_prog));
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index a51cab95ba64..220c56e93659 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -4948,7 +4948,7 @@ static int rtnl_valid_stats_req(const struct nlmsghdr *nlh, bool strict_check,
{
struct if_stats_msg *ifsm;
- if (nlh->nlmsg_len < sizeof(*ifsm)) {
+ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifsm))) {
NL_SET_ERR_MSG(extack, "Invalid header for stats dump");
return -EINVAL;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index ef2cd5712098..40796b8bf820 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -5083,7 +5083,8 @@ EXPORT_SYMBOL_GPL(skb_gso_validate_mac_len);
static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
{
- int mac_len;
+ int mac_len, meta_len;
+ void *meta;
if (skb_cow(skb, skb_headroom(skb)) < 0) {
kfree_skb(skb);
@@ -5095,6 +5096,13 @@ static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
memmove(skb_mac_header(skb) + VLAN_HLEN, skb_mac_header(skb),
mac_len - VLAN_HLEN - ETH_TLEN);
}
+
+ meta_len = skb_metadata_len(skb);
+ if (meta_len) {
+ meta = skb_metadata_end(skb) - meta_len;
+ memmove(meta + VLAN_HLEN, meta, meta_len);
+ }
+
skb->mac_header += VLAN_HLEN;
return skb;
}
diff --git a/net/core/sock.c b/net/core/sock.c
index 782343bb925b..067878a1e4c5 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -348,7 +348,7 @@ static int sock_get_timeout(long timeo, void *optval, bool old_timeval)
tv.tv_usec = ((timeo % HZ) * USEC_PER_SEC) / HZ;
}
- if (in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
+ if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
struct old_timeval32 tv32 = { tv.tv_sec, tv.tv_usec };
*(struct old_timeval32 *)optval = tv32;
return sizeof(tv32);
@@ -372,7 +372,7 @@ static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen, bool
{
struct __kernel_sock_timeval tv;
- if (in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
+ if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
struct old_timeval32 tv32;
if (optlen < sizeof(tv32))
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 10e809b296ec..fb065a8937ea 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -226,7 +226,7 @@ static void esp_output_fill_trailer(u8 *tail, int tfclen, int plen, __u8 proto)
tail[plen - 1] = proto;
}
-static void esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp)
+static int esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp)
{
int encap_type;
struct udphdr *uh;
@@ -234,6 +234,7 @@ static void esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, stru
__be16 sport, dport;
struct xfrm_encap_tmpl *encap = x->encap;
struct ip_esp_hdr *esph = esp->esph;
+ unsigned int len;
spin_lock_bh(&x->lock);
sport = encap->encap_sport;
@@ -241,11 +242,14 @@ static void esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, stru
encap_type = encap->encap_type;
spin_unlock_bh(&x->lock);
+ len = skb->len + esp->tailen - skb_transport_offset(skb);
+ if (len + sizeof(struct iphdr) >= IP_MAX_MTU)
+ return -EMSGSIZE;
+
uh = (struct udphdr *)esph;
uh->source = sport;
uh->dest = dport;
- uh->len = htons(skb->len + esp->tailen
- - skb_transport_offset(skb));
+ uh->len = htons(len);
uh->check = 0;
switch (encap_type) {
@@ -262,6 +266,8 @@ static void esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, stru
*skb_mac_header(skb) = IPPROTO_UDP;
esp->esph = esph;
+
+ return 0;
}
int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp)
@@ -275,8 +281,12 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
int tailen = esp->tailen;
/* this is non-NULL only with UDP Encapsulation */
- if (x->encap)
- esp_output_udp_encap(x, skb, esp);
+ if (x->encap) {
+ int err = esp_output_udp_encap(x, skb, esp);
+
+ if (err < 0)
+ return err;
+ }
if (!skb_cloned(skb)) {
if (tailen <= skb_tailroom(skb)) {
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 8756e0e790d2..d3170a8001b2 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -52,13 +52,13 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
goto out;
if (sp->len == XFRM_MAX_DEPTH)
- goto out;
+ goto out_reset;
x = xfrm_state_lookup(dev_net(skb->dev), skb->mark,
(xfrm_address_t *)&ip_hdr(skb)->daddr,
spi, IPPROTO_ESP, AF_INET);
if (!x)
- goto out;
+ goto out_reset;
sp->xvec[sp->len++] = x;
sp->olen++;
@@ -66,7 +66,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
xo = xfrm_offload(skb);
if (!xo) {
xfrm_state_put(x);
- goto out;
+ goto out_reset;
}
}
@@ -82,6 +82,8 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
xfrm_input(skb, IPPROTO_ESP, spi, -2);
return ERR_PTR(-EINPROGRESS);
+out_reset:
+ secpath_reset(skb);
out:
skb_push(skb, offset);
NAPI_GRO_CB(skb)->same_flow = 0;
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 79e98e21cdd7..12ce6c526d72 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -121,6 +121,7 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
struct guehdr *guehdr;
void *data;
u16 doffset = 0;
+ u8 proto_ctype;
if (!fou)
return 1;
@@ -212,13 +213,14 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
if (unlikely(guehdr->control))
return gue_control_message(skb, guehdr);
+ proto_ctype = guehdr->proto_ctype;
__skb_pull(skb, sizeof(struct udphdr) + hdrlen);
skb_reset_transport_header(skb);
if (iptunnel_pull_offloads(skb))
goto drop;
- return -guehdr->proto_ctype;
+ return -proto_ctype;
drop:
kfree_skb(skb);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index c80188875f39..e8bb2e85c5a4 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -519,6 +519,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
to->pkt_type = from->pkt_type;
to->priority = from->priority;
to->protocol = from->protocol;
+ to->skb_iif = from->skb_iif;
skb_dst_drop(to);
skb_dst_copy(to, from);
to->dev = from->dev;
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 68a21bf75dd0..35d8346742e2 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -646,10 +646,8 @@ static int __init vti_init(void)
msg = "ipip tunnel";
err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
- if (err < 0) {
- pr_info("%s: cant't register tunnel\n",__func__);
+ if (err < 0)
goto xfrm_tunnel_failed;
- }
msg = "netlink interface";
err = rtnl_link_register(&vti_link_ops);
@@ -659,9 +657,9 @@ static int __init vti_init(void)
return err;
rtnl_link_failed:
- xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP);
-xfrm_tunnel_failed:
xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
+xfrm_tunnel_failed:
+ xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP);
xfrm_proto_comp_failed:
xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
xfrm_proto_ah_failed:
@@ -676,6 +674,7 @@ pernet_dev_failed:
static void __exit vti_fini(void)
{
rtnl_link_unregister(&vti_link_ops);
+ xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP);
xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 835d50b279f5..a2a88ab07f7b 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -56,7 +56,7 @@ struct clusterip_config {
#endif
enum clusterip_hashmode hash_mode; /* which hashing mode */
u_int32_t hash_initval; /* hash initialization */
- struct rcu_head rcu; /* for call_rcu_bh */
+ struct rcu_head rcu; /* for call_rcu */
struct net *net; /* netns for pernet list */
char ifname[IFNAMSIZ]; /* device ifname */
};
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a5da63e5faa2..6fdf1c195d8e 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1183,11 +1183,39 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
return dst;
}
+static void ipv4_send_dest_unreach(struct sk_buff *skb)
+{
+ struct ip_options opt;
+ int res;
+
+ /* Recompile ip options since IPCB may not be valid anymore.
+ * Also check we have a reasonable ipv4 header.
+ */
+ if (!pskb_network_may_pull(skb, sizeof(struct iphdr)) ||
+ ip_hdr(skb)->version != 4 || ip_hdr(skb)->ihl < 5)
+ return;
+
+ memset(&opt, 0, sizeof(opt));
+ if (ip_hdr(skb)->ihl > 5) {
+ if (!pskb_network_may_pull(skb, ip_hdr(skb)->ihl * 4))
+ return;
+ opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr);
+
+ rcu_read_lock();
+ res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL);
+ rcu_read_unlock();
+
+ if (res)
+ return;
+ }
+ __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, &opt);
+}
+
static void ipv4_link_failure(struct sk_buff *skb)
{
struct rtable *rt;
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
+ ipv4_send_dest_unreach(skb);
rt = skb_rtable(skb);
if (rt)
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index ba0fc4b18465..eeb4041fa5f9 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -49,6 +49,7 @@ static int ip_ping_group_range_min[] = { 0, 0 };
static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
static int comp_sack_nr_max = 255;
static u32 u32_max_div_HZ = UINT_MAX / HZ;
+static int one_day_secs = 24 * 3600;
/* obsolete */
static int sysctl_tcp_low_latency __read_mostly;
@@ -1151,7 +1152,9 @@ static struct ctl_table ipv4_net_table[] = {
.data = &init_net.ipv4.sysctl_tcp_min_rtt_wlen,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &one_day_secs
},
{
.procname = "tcp_autocorking",
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index 359da68d7c06..477cb4aa456c 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -49,9 +49,8 @@
#define DCTCP_MAX_ALPHA 1024U
struct dctcp {
- u32 acked_bytes_ecn;
- u32 acked_bytes_total;
- u32 prior_snd_una;
+ u32 old_delivered;
+ u32 old_delivered_ce;
u32 prior_rcv_nxt;
u32 dctcp_alpha;
u32 next_seq;
@@ -73,8 +72,8 @@ static void dctcp_reset(const struct tcp_sock *tp, struct dctcp *ca)
{
ca->next_seq = tp->snd_nxt;
- ca->acked_bytes_ecn = 0;
- ca->acked_bytes_total = 0;
+ ca->old_delivered = tp->delivered;
+ ca->old_delivered_ce = tp->delivered_ce;
}
static void dctcp_init(struct sock *sk)
@@ -86,7 +85,6 @@ static void dctcp_init(struct sock *sk)
sk->sk_state == TCP_CLOSE)) {
struct dctcp *ca = inet_csk_ca(sk);
- ca->prior_snd_una = tp->snd_una;
ca->prior_rcv_nxt = tp->rcv_nxt;
ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA);
@@ -118,37 +116,25 @@ static void dctcp_update_alpha(struct sock *sk, u32 flags)
{
const struct tcp_sock *tp = tcp_sk(sk);
struct dctcp *ca = inet_csk_ca(sk);
- u32 acked_bytes = tp->snd_una - ca->prior_snd_una;
-
- /* If ack did not advance snd_una, count dupack as MSS size.
- * If ack did update window, do not count it at all.
- */
- if (acked_bytes == 0 && !(flags & CA_ACK_WIN_UPDATE))
- acked_bytes = inet_csk(sk)->icsk_ack.rcv_mss;
- if (acked_bytes) {
- ca->acked_bytes_total += acked_bytes;
- ca->prior_snd_una = tp->snd_una;
-
- if (flags & CA_ACK_ECE)
- ca->acked_bytes_ecn += acked_bytes;
- }
/* Expired RTT */
if (!before(tp->snd_una, ca->next_seq)) {
- u64 bytes_ecn = ca->acked_bytes_ecn;
+ u32 delivered_ce = tp->delivered_ce - ca->old_delivered_ce;
u32 alpha = ca->dctcp_alpha;
/* alpha = (1 - g) * alpha + g * F */
alpha -= min_not_zero(alpha, alpha >> dctcp_shift_g);
- if (bytes_ecn) {
+ if (delivered_ce) {
+ u32 delivered = tp->delivered - ca->old_delivered;
+
/* If dctcp_shift_g == 1, a 32bit value would overflow
- * after 8 Mbytes.
+ * after 8 M packets.
*/
- bytes_ecn <<= (10 - dctcp_shift_g);
- do_div(bytes_ecn, max(1U, ca->acked_bytes_total));
+ delivered_ce <<= (10 - dctcp_shift_g);
+ delivered_ce /= max(1U, delivered);
- alpha = min(alpha + (u32)bytes_ecn, DCTCP_MAX_ALPHA);
+ alpha = min(alpha + delivered_ce, DCTCP_MAX_ALPHA);
}
/* dctcp_alpha can be read from dctcp_get_info() without
* synchro, so we ask compiler to not use dctcp_alpha
@@ -200,6 +186,7 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr,
union tcp_cc_info *info)
{
const struct dctcp *ca = inet_csk_ca(sk);
+ const struct tcp_sock *tp = tcp_sk(sk);
/* Fill it also in case of VEGASINFO due to req struct limits.
* We can still correctly retrieve it later.
@@ -211,8 +198,10 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr,
info->dctcp.dctcp_enabled = 1;
info->dctcp.dctcp_ce_state = (u16) ca->ce_state;
info->dctcp.dctcp_alpha = ca->dctcp_alpha;
- info->dctcp.dctcp_ab_ecn = ca->acked_bytes_ecn;
- info->dctcp.dctcp_ab_tot = ca->acked_bytes_total;
+ info->dctcp.dctcp_ab_ecn = tp->mss_cache *
+ (tp->delivered_ce - ca->old_delivered_ce);
+ info->dctcp.dctcp_ab_tot = tp->mss_cache *
+ (tp->delivered - ca->old_delivered);
}
*attr = INET_DIAG_DCTCPINFO;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 5def3c48870e..731d3045b50a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -402,11 +402,12 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
+ int room;
+
+ room = min_t(int, tp->window_clamp, tcp_space(sk)) - tp->rcv_ssthresh;
/* Check #1 */
- if (tp->rcv_ssthresh < tp->window_clamp &&
- (int)tp->rcv_ssthresh < tcp_space(sk) &&
- !tcp_under_memory_pressure(sk)) {
+ if (room > 0 && !tcp_under_memory_pressure(sk)) {
int incr;
/* Check #2. Increase window, if skb with such overhead
@@ -419,8 +420,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
if (incr) {
incr = max_t(int, incr, 2 * skb->len);
- tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr,
- tp->window_clamp);
+ tp->rcv_ssthresh += min(room, incr);
inet_csk(sk)->icsk_ack.quick |= 1;
}
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 2f8039a26b08..a2896944aa37 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1673,7 +1673,9 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
if (TCP_SKB_CB(tail)->end_seq != TCP_SKB_CB(skb)->seq ||
TCP_SKB_CB(tail)->ip_dsfield != TCP_SKB_CB(skb)->ip_dsfield ||
((TCP_SKB_CB(tail)->tcp_flags |
- TCP_SKB_CB(skb)->tcp_flags) & TCPHDR_URG) ||
+ TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_SYN | TCPHDR_RST | TCPHDR_URG)) ||
+ !((TCP_SKB_CB(tail)->tcp_flags &
+ TCP_SKB_CB(skb)->tcp_flags) & TCPHDR_ACK) ||
((TCP_SKB_CB(tail)->tcp_flags ^
TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_ECE | TCPHDR_CWR)) ||
#ifdef CONFIG_TLS_DEVICE
@@ -1692,6 +1694,15 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
if (after(TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(tail)->ack_seq))
TCP_SKB_CB(tail)->ack_seq = TCP_SKB_CB(skb)->ack_seq;
+ /* We have to update both TCP_SKB_CB(tail)->tcp_flags and
+ * thtail->fin, so that the fast path in tcp_rcv_established()
+ * is not entered if we append a packet with a FIN.
+ * SYN, RST, URG are not present.
+ * ACK is set on both packets.
+ * PSH : we do not really care in TCP stack,
+ * at least for 'GRO' packets.
+ */
+ thtail->fin |= th->fin;
TCP_SKB_CB(tail)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
if (TCP_SKB_CB(skb)->has_rxtstamp) {
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 64f9715173ac..065334b41d57 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -352,6 +352,7 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
struct sk_buff *pp = NULL;
struct udphdr *uh2;
struct sk_buff *p;
+ unsigned int ulen;
/* requires non zero csum, for symmetry with GSO */
if (!uh->check) {
@@ -359,6 +360,12 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
return NULL;
}
+ /* Do not deal with padded or malicious packets, sorry ! */
+ ulen = ntohs(uh->len);
+ if (ulen <= sizeof(*uh) || ulen != skb_gro_len(skb)) {
+ NAPI_GRO_CB(skb)->flush = 1;
+ return NULL;
+ }
/* pull encapsulating udp header */
skb_gro_pull(skb, sizeof(struct udphdr));
skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
@@ -377,13 +384,14 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
/* Terminate the flow on len mismatch or if it grow "too much".
* Under small packet flood GRO count could elsewhere grow a lot
- * leading to execessive truesize values
+ * leading to excessive truesize values.
+ * On len mismatch merge the first packet shorter than gso_size,
+ * otherwise complete the GRO packet.
*/
- if (!skb_gro_receive(p, skb) &&
+ if (ulen > ntohs(uh2->len) || skb_gro_receive(p, skb) ||
+ ulen != ntohs(uh2->len) ||
NAPI_GRO_CB(p)->count >= UDP_GRO_CNT_MAX)
pp = p;
- else if (uh->len != uh2->len)
- pp = p;
return pp;
}
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index d73a6d6652f6..2b144b92ae46 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -111,7 +111,8 @@ static void
_decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
{
const struct iphdr *iph = ip_hdr(skb);
- u8 *xprth = skb_network_header(skb) + iph->ihl * 4;
+ int ihl = iph->ihl;
+ u8 *xprth = skb_network_header(skb) + ihl * 4;
struct flowi4 *fl4 = &fl->u.ip4;
int oif = 0;
@@ -122,6 +123,11 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
fl4->flowi4_mark = skb->mark;
fl4->flowi4_oif = reverse ? skb->skb_iif : oif;
+ fl4->flowi4_proto = iph->protocol;
+ fl4->daddr = reverse ? iph->saddr : iph->daddr;
+ fl4->saddr = reverse ? iph->daddr : iph->saddr;
+ fl4->flowi4_tos = iph->tos;
+
if (!ip_is_fragment(iph)) {
switch (iph->protocol) {
case IPPROTO_UDP:
@@ -133,7 +139,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
pskb_may_pull(skb, xprth + 4 - skb->data)) {
__be16 *ports;
- xprth = skb_network_header(skb) + iph->ihl * 4;
+ xprth = skb_network_header(skb) + ihl * 4;
ports = (__be16 *)xprth;
fl4->fl4_sport = ports[!!reverse];
@@ -146,7 +152,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
pskb_may_pull(skb, xprth + 2 - skb->data)) {
u8 *icmp;
- xprth = skb_network_header(skb) + iph->ihl * 4;
+ xprth = skb_network_header(skb) + ihl * 4;
icmp = xprth;
fl4->fl4_icmp_type = icmp[0];
@@ -159,7 +165,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
pskb_may_pull(skb, xprth + 4 - skb->data)) {
__be32 *ehdr;
- xprth = skb_network_header(skb) + iph->ihl * 4;
+ xprth = skb_network_header(skb) + ihl * 4;
ehdr = (__be32 *)xprth;
fl4->fl4_ipsec_spi = ehdr[0];
@@ -171,7 +177,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
pskb_may_pull(skb, xprth + 8 - skb->data)) {
__be32 *ah_hdr;
- xprth = skb_network_header(skb) + iph->ihl * 4;
+ xprth = skb_network_header(skb) + ihl * 4;
ah_hdr = (__be32 *)xprth;
fl4->fl4_ipsec_spi = ah_hdr[1];
@@ -183,7 +189,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
pskb_may_pull(skb, xprth + 4 - skb->data)) {
__be16 *ipcomp_hdr;
- xprth = skb_network_header(skb) + iph->ihl * 4;
+ xprth = skb_network_header(skb) + ihl * 4;
ipcomp_hdr = (__be16 *)xprth;
fl4->fl4_ipsec_spi = htonl(ntohs(ipcomp_hdr[1]));
@@ -196,7 +202,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
__be16 *greflags;
__be32 *gre_hdr;
- xprth = skb_network_header(skb) + iph->ihl * 4;
+ xprth = skb_network_header(skb) + ihl * 4;
greflags = (__be16 *)xprth;
gre_hdr = (__be32 *)xprth;
@@ -213,10 +219,6 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
break;
}
}
- fl4->flowi4_proto = iph->protocol;
- fl4->daddr = reverse ? iph->saddr : iph->daddr;
- fl4->saddr = reverse ? iph->daddr : iph->saddr;
- fl4->flowi4_tos = iph->tos;
}
static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk,
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index d43d076c98f5..1766325423b5 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -476,7 +476,7 @@ static int ip6addrlbl_valid_dump_req(const struct nlmsghdr *nlh,
}
if (nlmsg_attrlen(nlh, sizeof(*ifal))) {
- NL_SET_ERR_MSG_MOD(extack, "Invalid data after header for address label dump requewst");
+ NL_SET_ERR_MSG_MOD(extack, "Invalid data after header for address label dump request");
return -EINVAL;
}
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index d46b4eb645c2..cb99f6fb79b7 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -74,13 +74,13 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
goto out;
if (sp->len == XFRM_MAX_DEPTH)
- goto out;
+ goto out_reset;
x = xfrm_state_lookup(dev_net(skb->dev), skb->mark,
(xfrm_address_t *)&ipv6_hdr(skb)->daddr,
spi, IPPROTO_ESP, AF_INET6);
if (!x)
- goto out;
+ goto out_reset;
sp->xvec[sp->len++] = x;
sp->olen++;
@@ -88,7 +88,7 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
xo = xfrm_offload(skb);
if (!xo) {
xfrm_state_put(x);
- goto out;
+ goto out_reset;
}
}
@@ -109,6 +109,8 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
xfrm_input(skb, IPPROTO_ESP, spi, -2);
return ERR_PTR(-EINPROGRESS);
+out_reset:
+ secpath_reset(skb);
out:
skb_push(skb, offset);
NAPI_GRO_CB(skb)->same_flow = 0;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 6613d8dbb0e5..91247a6fc67f 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -921,9 +921,7 @@ static void fib6_drop_pcpu_from(struct fib6_info *f6i,
if (pcpu_rt) {
struct fib6_info *from;
- from = rcu_dereference_protected(pcpu_rt->from,
- lockdep_is_held(&table->tb6_lock));
- rcu_assign_pointer(pcpu_rt->from, NULL);
+ from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL);
fib6_info_release(from);
}
}
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index cb54a8a3c273..be5f3d7ceb96 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -94,15 +94,21 @@ static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label)
return fl;
}
+static void fl_free_rcu(struct rcu_head *head)
+{
+ struct ip6_flowlabel *fl = container_of(head, struct ip6_flowlabel, rcu);
+
+ if (fl->share == IPV6_FL_S_PROCESS)
+ put_pid(fl->owner.pid);
+ kfree(fl->opt);
+ kfree(fl);
+}
+
static void fl_free(struct ip6_flowlabel *fl)
{
- if (fl) {
- if (fl->share == IPV6_FL_S_PROCESS)
- put_pid(fl->owner.pid);
- kfree(fl->opt);
- kfree_rcu(fl, rcu);
- }
+ if (fl)
+ call_rcu(&fl->rcu, fl_free_rcu);
}
static void fl_release(struct ip6_flowlabel *fl)
@@ -633,9 +639,9 @@ recheck:
if (fl1->share == IPV6_FL_S_EXCL ||
fl1->share != fl->share ||
((fl1->share == IPV6_FL_S_PROCESS) &&
- (fl1->owner.pid == fl->owner.pid)) ||
+ (fl1->owner.pid != fl->owner.pid)) ||
((fl1->share == IPV6_FL_S_USER) &&
- uid_eq(fl1->owner.uid, fl->owner.uid)))
+ !uid_eq(fl1->owner.uid, fl->owner.uid)))
goto release;
err = -ENOMEM;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 0302e0eb07af..0520aca3354b 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -379,11 +379,8 @@ static void ip6_dst_destroy(struct dst_entry *dst)
in6_dev_put(idev);
}
- rcu_read_lock();
- from = rcu_dereference(rt->from);
- rcu_assign_pointer(rt->from, NULL);
+ from = xchg((__force struct fib6_info **)&rt->from, NULL);
fib6_info_release(from);
- rcu_read_unlock();
}
static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
@@ -1288,9 +1285,7 @@ static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
/* purge completely the exception to allow releasing the held resources:
* some [sk] cache may keep the dst around for unlimited time
*/
- from = rcu_dereference_protected(rt6_ex->rt6i->from,
- lockdep_is_held(&rt6_exception_lock));
- rcu_assign_pointer(rt6_ex->rt6i->from, NULL);
+ from = xchg((__force struct fib6_info **)&rt6_ex->rt6i->from, NULL);
fib6_info_release(from);
dst_dev_put(&rt6_ex->rt6i->dst);
@@ -2330,6 +2325,10 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
rcu_read_lock();
from = rcu_dereference(rt6->from);
+ if (!from) {
+ rcu_read_unlock();
+ return;
+ }
nrt6 = ip6_rt_cache_alloc(from, daddr, saddr);
if (nrt6) {
rt6_do_update_pmtu(nrt6, mtu);
@@ -3393,11 +3392,8 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
rcu_read_lock();
from = rcu_dereference(rt->from);
- /* This fib6_info_hold() is safe here because we hold reference to rt
- * and rt already holds reference to fib6_info.
- */
- fib6_info_hold(from);
- rcu_read_unlock();
+ if (!from)
+ goto out;
nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL);
if (!nrt)
@@ -3409,10 +3405,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
- /* No need to remove rt from the exception table if rt is
- * a cached route because rt6_insert_exception() will
- * takes care of it
- */
+ /* rt6_insert_exception() will take care of duplicated exceptions */
if (rt6_insert_exception(nrt, from)) {
dst_release_immediate(&nrt->dst);
goto out;
@@ -3425,7 +3418,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
out:
- fib6_info_release(from);
+ rcu_read_unlock();
neigh_release(neigh);
}
@@ -3664,23 +3657,34 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
{
- int type;
struct dst_entry *dst = skb_dst(skb);
+ struct net *net = dev_net(dst->dev);
+ struct inet6_dev *idev;
+ int type;
+
+ if (netif_is_l3_master(skb->dev) &&
+ dst->dev == net->loopback_dev)
+ idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
+ else
+ idev = ip6_dst_idev(dst);
+
switch (ipstats_mib_noroutes) {
case IPSTATS_MIB_INNOROUTES:
type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
if (type == IPV6_ADDR_ANY) {
- IP6_INC_STATS(dev_net(dst->dev),
- __in6_dev_get_safely(skb->dev),
- IPSTATS_MIB_INADDRERRORS);
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
break;
}
/* FALLTHROUGH */
case IPSTATS_MIB_OUTNOROUTES:
- IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
- ipstats_mib_noroutes);
+ IP6_INC_STATS(net, idev, ipstats_mib_noroutes);
break;
}
+
+ /* Start over by dropping the dst for l3mdev case */
+ if (netif_is_l3_master(skb->dev))
+ skb_dst_drop(skb);
+
icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
kfree_skb(skb);
return 0;
@@ -5013,16 +5017,20 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
rcu_read_lock();
from = rcu_dereference(rt->from);
-
- if (fibmatch)
- err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, iif,
- RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
- nlh->nlmsg_seq, 0);
- else
- err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
- &fl6.saddr, iif, RTM_NEWROUTE,
- NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
- 0);
+ if (from) {
+ if (fibmatch)
+ err = rt6_fill_node(net, skb, from, NULL, NULL, NULL,
+ iif, RTM_NEWROUTE,
+ NETLINK_CB(in_skb).portid,
+ nlh->nlmsg_seq, 0);
+ else
+ err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
+ &fl6.saddr, iif, RTM_NEWROUTE,
+ NETLINK_CB(in_skb).portid,
+ nlh->nlmsg_seq, 0);
+ } else {
+ err = -ENETUNREACH;
+ }
rcu_read_unlock();
if (err < 0) {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index b444483cdb2b..622eeaf5732b 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1047,6 +1047,8 @@ static void udp_v6_flush_pending_frames(struct sock *sk)
static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
int addr_len)
{
+ if (addr_len < offsetofend(struct sockaddr, sa_family))
+ return -EINVAL;
/* The following checks are replicated from __ip6_datagram_connect()
* and intended to prevent BPF program called below from accessing
* bytes that are out of the bound specified by user in addr_len.
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index bc65db782bfb..d9e5f6808811 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -345,7 +345,7 @@ static void __net_exit xfrm6_tunnel_net_exit(struct net *net)
unsigned int i;
xfrm_flush_gc();
- xfrm_state_flush(net, IPSEC_PROTO_ANY, false, true);
+ xfrm_state_flush(net, 0, false, true);
for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++)
WARN_ON_ONCE(!hlist_empty(&xfrm6_tn->spi_byaddr[i]));
@@ -402,6 +402,10 @@ static void __exit xfrm6_tunnel_fini(void)
xfrm6_tunnel_deregister(&xfrm6_tunnel_handler, AF_INET6);
xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6);
unregister_pernet_subsys(&xfrm6_tunnel_net_ops);
+ /* Someone maybe has gotten the xfrm6_tunnel_spi.
+ * So need to wait it.
+ */
+ rcu_barrier();
kmem_cache_destroy(xfrm6_tunnel_spi_kmem);
}
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 5651c29cb5bd..4af1e1d60b9f 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1951,8 +1951,10 @@ parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq)
if (rq->sadb_x_ipsecrequest_mode == 0)
return -EINVAL;
+ if (!xfrm_id_proto_valid(rq->sadb_x_ipsecrequest_proto))
+ return -EINVAL;
- t->id.proto = rq->sadb_x_ipsecrequest_proto; /* XXX check proto */
+ t->id.proto = rq->sadb_x_ipsecrequest_proto;
if ((mode = pfkey_mode_to_xfrm(rq->sadb_x_ipsecrequest_mode)) < 0)
return -EINVAL;
t->mode = mode;
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index fed6becc5daf..52b5a2797c0c 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -169,8 +169,8 @@ struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id)
rcu_read_lock_bh();
list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
- if (tunnel->tunnel_id == tunnel_id) {
- l2tp_tunnel_inc_refcount(tunnel);
+ if (tunnel->tunnel_id == tunnel_id &&
+ refcount_inc_not_zero(&tunnel->ref_count)) {
rcu_read_unlock_bh();
return tunnel;
@@ -190,8 +190,8 @@ struct l2tp_tunnel *l2tp_tunnel_get_nth(const struct net *net, int nth)
rcu_read_lock_bh();
list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
- if (++count > nth) {
- l2tp_tunnel_inc_refcount(tunnel);
+ if (++count > nth &&
+ refcount_inc_not_zero(&tunnel->ref_count)) {
rcu_read_unlock_bh();
return tunnel;
}
@@ -909,7 +909,7 @@ int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
{
struct l2tp_tunnel *tunnel;
- tunnel = l2tp_tunnel(sk);
+ tunnel = rcu_dereference_sk_user_data(sk);
if (tunnel == NULL)
goto pass_up;
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index b99e73a7e7e0..2017b7d780f5 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -320,14 +320,13 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
struct llc_sap *sap;
int rc = -EINVAL;
- dprintk("%s: binding %02X\n", __func__, addr->sllc_sap);
-
lock_sock(sk);
if (unlikely(!sock_flag(sk, SOCK_ZAPPED) || addrlen != sizeof(*addr)))
goto out;
rc = -EAFNOSUPPORT;
if (unlikely(addr->sllc_family != AF_LLC))
goto out;
+ dprintk("%s: binding %02X\n", __func__, addr->sllc_sap);
rc = -ENODEV;
rcu_read_lock();
if (sk->sk_bound_dev_if) {
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index cff0fb3578c9..deb3faf08337 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -841,7 +841,7 @@ void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata)
dir = sdata->vif.debugfs_dir;
- if (!dir)
+ if (IS_ERR_OR_NULL(dir))
return;
sprintf(buf, "netdev:%s", sdata->name);
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 28d022a3eee3..ae4f0be3b393 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -1195,6 +1195,9 @@ static inline void drv_wake_tx_queue(struct ieee80211_local *local,
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(txq->txq.vif);
+ if (local->in_reconfig)
+ return;
+
if (!check_sdata_in_driver(sdata))
return;
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index e03c46ac8e4d..c62101857b9b 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -112,8 +112,9 @@ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata,
IEEE80211_HT_CAP_TX_STBC);
/* Allow user to configure RX STBC bits */
- if (ht_capa_mask->cap_info & IEEE80211_HT_CAP_RX_STBC)
- ht_cap->cap |= ht_capa->cap_info & IEEE80211_HT_CAP_RX_STBC;
+ if (ht_capa_mask->cap_info & cpu_to_le16(IEEE80211_HT_CAP_RX_STBC))
+ ht_cap->cap |= le16_to_cpu(ht_capa->cap_info) &
+ IEEE80211_HT_CAP_RX_STBC;
/* Allow user to decrease AMPDU factor */
if (ht_capa_mask->ampdu_params_info &
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 4a6ff1482a9f..02d2e6f11e93 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1908,6 +1908,9 @@ void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata)
list_del_rcu(&sdata->list);
mutex_unlock(&sdata->local->iflist_mtx);
+ if (sdata->vif.txq)
+ ieee80211_txq_purge(sdata->local, to_txq_info(sdata->vif.txq));
+
synchronize_rcu();
if (sdata->dev) {
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 4700718e010f..37e372896230 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -167,8 +167,10 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
* The driver doesn't know anything about VLAN interfaces.
* Hence, don't send GTKs for VLAN interfaces to the driver.
*/
- if (!(key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE))
+ if (!(key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE)) {
+ ret = 1;
goto out_unsupported;
+ }
}
ret = drv_set_key(key->local, SET_KEY, sdata,
@@ -213,11 +215,8 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
/* all of these we can do in software - if driver can */
if (ret == 1)
return 0;
- if (ieee80211_hw_check(&key->local->hw, SW_CRYPTO_CONTROL)) {
- if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
- return 0;
+ if (ieee80211_hw_check(&key->local->hw, SW_CRYPTO_CONTROL))
return -EINVAL;
- }
return 0;
default:
return -EINVAL;
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 95eb5064fa91..b76a2aefa9ec 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -23,7 +23,7 @@ static void mesh_path_free_rcu(struct mesh_table *tbl, struct mesh_path *mpath);
static u32 mesh_table_hash(const void *addr, u32 len, u32 seed)
{
/* Use last four bytes of hw addr as hash index */
- return jhash_1word(*(u32 *)(addr+2), seed);
+ return jhash_1word(__get_unaligned_cpu32((u8 *)addr + 2), seed);
}
static const struct rhashtable_params mesh_rht_params = {
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 7f8d93401ce0..bf0b187f994e 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1568,7 +1568,15 @@ static void sta_ps_start(struct sta_info *sta)
return;
for (tid = 0; tid < IEEE80211_NUM_TIDS; tid++) {
- if (txq_has_queue(sta->sta.txq[tid]))
+ struct ieee80211_txq *txq = sta->sta.txq[tid];
+ struct txq_info *txqi = to_txq_info(txq);
+
+ spin_lock(&local->active_txq_lock[txq->ac]);
+ if (!list_empty(&txqi->schedule_order))
+ list_del_init(&txqi->schedule_order);
+ spin_unlock(&local->active_txq_lock[txq->ac]);
+
+ if (txq_has_queue(txq))
set_bit(tid, &sta->txq_buffered_tids);
else
clear_bit(tid, &sta->txq_buffered_tids);
diff --git a/net/mac80211/trace_msg.h b/net/mac80211/trace_msg.h
index 366b9e6f043e..40141df09f25 100644
--- a/net/mac80211/trace_msg.h
+++ b/net/mac80211/trace_msg.h
@@ -1,4 +1,9 @@
/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Portions of this file
+ * Copyright (C) 2019 Intel Corporation
+ */
+
#ifdef CONFIG_MAC80211_MESSAGE_TRACING
#if !defined(__MAC80211_MSG_DRIVER_TRACE) || defined(TRACE_HEADER_MULTI_READ)
@@ -11,7 +16,7 @@
#undef TRACE_SYSTEM
#define TRACE_SYSTEM mac80211_msg
-#define MAX_MSG_LEN 100
+#define MAX_MSG_LEN 120
DECLARE_EVENT_CLASS(mac80211_msg_event,
TP_PROTO(struct va_format *vaf),
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 8a49a74c0a37..2e816dd67be7 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -3221,6 +3221,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
u8 max_subframes = sta->sta.max_amsdu_subframes;
int max_frags = local->hw.max_tx_fragments;
int max_amsdu_len = sta->sta.max_amsdu_len;
+ int orig_truesize;
__be16 len;
void *data;
bool ret = false;
@@ -3261,6 +3262,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
if (!head || skb_is_gso(head))
goto out;
+ orig_truesize = head->truesize;
orig_len = head->len;
if (skb->len + head->len > max_amsdu_len)
@@ -3318,6 +3320,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
*frag_tail = skb;
out_recalc:
+ fq->memory_usage += head->truesize - orig_truesize;
if (head->len != orig_len) {
flow->backlog += head->len - orig_len;
tin->backlog_bytes += head->len - orig_len;
@@ -3646,16 +3649,17 @@ EXPORT_SYMBOL(ieee80211_tx_dequeue);
struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac)
{
struct ieee80211_local *local = hw_to_local(hw);
+ struct ieee80211_txq *ret = NULL;
struct txq_info *txqi = NULL;
- lockdep_assert_held(&local->active_txq_lock[ac]);
+ spin_lock_bh(&local->active_txq_lock[ac]);
begin:
txqi = list_first_entry_or_null(&local->active_txqs[ac],
struct txq_info,
schedule_order);
if (!txqi)
- return NULL;
+ goto out;
if (txqi->txq.sta) {
struct sta_info *sta = container_of(txqi->txq.sta,
@@ -3672,24 +3676,30 @@ struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac)
if (txqi->schedule_round == local->schedule_round[ac])
- return NULL;
+ goto out;
list_del_init(&txqi->schedule_order);
txqi->schedule_round = local->schedule_round[ac];
- return &txqi->txq;
+ ret = &txqi->txq;
+
+out:
+ spin_unlock_bh(&local->active_txq_lock[ac]);
+ return ret;
}
EXPORT_SYMBOL(ieee80211_next_txq);
-void ieee80211_return_txq(struct ieee80211_hw *hw,
- struct ieee80211_txq *txq)
+void __ieee80211_schedule_txq(struct ieee80211_hw *hw,
+ struct ieee80211_txq *txq,
+ bool force)
{
struct ieee80211_local *local = hw_to_local(hw);
struct txq_info *txqi = to_txq_info(txq);
- lockdep_assert_held(&local->active_txq_lock[txq->ac]);
+ spin_lock_bh(&local->active_txq_lock[txq->ac]);
if (list_empty(&txqi->schedule_order) &&
- (!skb_queue_empty(&txqi->frags) || txqi->tin.backlog_packets)) {
+ (force || !skb_queue_empty(&txqi->frags) ||
+ txqi->tin.backlog_packets)) {
/* If airtime accounting is active, always enqueue STAs at the
* head of the list to ensure that they only get moved to the
* back by the airtime DRR scheduler once they have a negative
@@ -3706,20 +3716,10 @@ void ieee80211_return_txq(struct ieee80211_hw *hw,
list_add_tail(&txqi->schedule_order,
&local->active_txqs[txq->ac]);
}
-}
-EXPORT_SYMBOL(ieee80211_return_txq);
-void ieee80211_schedule_txq(struct ieee80211_hw *hw,
- struct ieee80211_txq *txq)
- __acquires(txq_lock) __releases(txq_lock)
-{
- struct ieee80211_local *local = hw_to_local(hw);
-
- spin_lock_bh(&local->active_txq_lock[txq->ac]);
- ieee80211_return_txq(hw, txq);
spin_unlock_bh(&local->active_txq_lock[txq->ac]);
}
-EXPORT_SYMBOL(ieee80211_schedule_txq);
+EXPORT_SYMBOL(__ieee80211_schedule_txq);
bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw,
struct ieee80211_txq *txq)
@@ -3729,7 +3729,7 @@ bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw,
struct sta_info *sta;
u8 ac = txq->ac;
- lockdep_assert_held(&local->active_txq_lock[ac]);
+ spin_lock_bh(&local->active_txq_lock[ac]);
if (!txqi->txq.sta)
goto out;
@@ -3759,34 +3759,27 @@ bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw,
sta->airtime[ac].deficit += sta->airtime_weight;
list_move_tail(&txqi->schedule_order, &local->active_txqs[ac]);
+ spin_unlock_bh(&local->active_txq_lock[ac]);
return false;
out:
if (!list_empty(&txqi->schedule_order))
list_del_init(&txqi->schedule_order);
+ spin_unlock_bh(&local->active_txq_lock[ac]);
return true;
}
EXPORT_SYMBOL(ieee80211_txq_may_transmit);
void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac)
- __acquires(txq_lock)
{
struct ieee80211_local *local = hw_to_local(hw);
spin_lock_bh(&local->active_txq_lock[ac]);
local->schedule_round[ac]++;
-}
-EXPORT_SYMBOL(ieee80211_txq_schedule_start);
-
-void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
- __releases(txq_lock)
-{
- struct ieee80211_local *local = hw_to_local(hw);
-
spin_unlock_bh(&local->active_txq_lock[ac]);
}
-EXPORT_SYMBOL(ieee80211_txq_schedule_end);
+EXPORT_SYMBOL(ieee80211_txq_schedule_start);
void __ieee80211_subif_start_xmit(struct sk_buff *skb,
struct net_device *dev,
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index dc07fcc7938e..802db01e3075 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -11,6 +11,7 @@
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
#include <linux/skbuff.h>
#include <net/ncsi.h>
@@ -667,7 +668,10 @@ static int ncsi_rsp_handler_oem_bcm_gma(struct ncsi_request *nr)
ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
memcpy(saddr.sa_data, &rsp->data[BCM_MAC_ADDR_OFFSET], ETH_ALEN);
/* Increase mac address by 1 for BMC's address */
- saddr.sa_data[ETH_ALEN - 1]++;
+ eth_addr_inc((u8 *)saddr.sa_data);
+ if (!is_valid_ether_addr((const u8 *)saddr.sa_data))
+ return -ENXIO;
+
ret = ops->ndo_set_mac_address(ndev, &saddr);
if (ret < 0)
netdev_warn(ndev, "NCSI: 'Writing mac address to device failed\n");
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 43bbaa32b1d6..14457551bcb4 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1678,7 +1678,7 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related,
if (!cp) {
int v;
- if (!sysctl_schedule_icmp(ipvs))
+ if (ipip || !sysctl_schedule_icmp(ipvs))
return NF_ACCEPT;
if (!ip_vs_try_to_schedule(ipvs, AF_INET, skb, pd, &v, &cp, &ciph))
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 82bfbeef46af..2a714527cde1 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -25,6 +25,7 @@
#include <linux/slab.h>
#include <linux/random.h>
#include <linux/jhash.h>
+#include <linux/siphash.h>
#include <linux/err.h>
#include <linux/percpu.h>
#include <linux/moduleparam.h>
@@ -449,6 +450,40 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
}
EXPORT_SYMBOL_GPL(nf_ct_invert_tuple);
+/* Generate a almost-unique pseudo-id for a given conntrack.
+ *
+ * intentionally doesn't re-use any of the seeds used for hash
+ * table location, we assume id gets exposed to userspace.
+ *
+ * Following nf_conn items do not change throughout lifetime
+ * of the nf_conn after it has been committed to main hash table:
+ *
+ * 1. nf_conn address
+ * 2. nf_conn->ext address
+ * 3. nf_conn->master address (normally NULL)
+ * 4. tuple
+ * 5. the associated net namespace
+ */
+u32 nf_ct_get_id(const struct nf_conn *ct)
+{
+ static __read_mostly siphash_key_t ct_id_seed;
+ unsigned long a, b, c, d;
+
+ net_get_random_once(&ct_id_seed, sizeof(ct_id_seed));
+
+ a = (unsigned long)ct;
+ b = (unsigned long)ct->master ^ net_hash_mix(nf_ct_net(ct));
+ c = (unsigned long)ct->ext;
+ d = (unsigned long)siphash(&ct->tuplehash, sizeof(ct->tuplehash),
+ &ct_id_seed);
+#ifdef CONFIG_64BIT
+ return siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &ct_id_seed);
+#else
+ return siphash_4u32((u32)a, (u32)b, (u32)c, (u32)d, &ct_id_seed);
+#endif
+}
+EXPORT_SYMBOL_GPL(nf_ct_get_id);
+
static void
clean_from_lists(struct nf_conn *ct)
{
@@ -982,12 +1017,9 @@ __nf_conntrack_confirm(struct sk_buff *skb)
/* set conntrack timestamp, if enabled. */
tstamp = nf_conn_tstamp_find(ct);
- if (tstamp) {
- if (skb->tstamp == 0)
- __net_timestamp(skb);
+ if (tstamp)
+ tstamp->start = ktime_get_real_ns();
- tstamp->start = ktime_to_ns(skb->tstamp);
- }
/* Since the lookup is lockless, hash insertion must be done after
* starting the timer and setting the CONFIRMED bit. The RCU barriers
* guarantee that no other CPU can find the conntrack before the above
@@ -1350,6 +1382,7 @@ __nf_conntrack_alloc(struct net *net,
/* save hash for reusing when confirming */
*(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash;
ct->status = 0;
+ ct->timeout = 0;
write_pnet(&ct->ct_net, net);
memset(&ct->__nfct_init_offset[0], 0,
offsetof(struct nf_conn, proto) -
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 66c596d287a5..d7f61b0547c6 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -29,6 +29,7 @@
#include <linux/spinlock.h>
#include <linux/interrupt.h>
#include <linux/slab.h>
+#include <linux/siphash.h>
#include <linux/netfilter.h>
#include <net/netlink.h>
@@ -485,7 +486,9 @@ nla_put_failure:
static int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct)
{
- if (nla_put_be32(skb, CTA_ID, htonl((unsigned long)ct)))
+ __be32 id = (__force __be32)nf_ct_get_id(ct);
+
+ if (nla_put_be32(skb, CTA_ID, id))
goto nla_put_failure;
return 0;
@@ -1286,8 +1289,9 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
}
if (cda[CTA_ID]) {
- u_int32_t id = ntohl(nla_get_be32(cda[CTA_ID]));
- if (id != (u32)(unsigned long)ct) {
+ __be32 id = nla_get_be32(cda[CTA_ID]);
+
+ if (id != (__force __be32)nf_ct_get_id(ct)) {
nf_ct_put(ct);
return -ENOENT;
}
@@ -2692,6 +2696,25 @@ nla_put_failure:
static const union nf_inet_addr any_addr;
+static __be32 nf_expect_get_id(const struct nf_conntrack_expect *exp)
+{
+ static __read_mostly siphash_key_t exp_id_seed;
+ unsigned long a, b, c, d;
+
+ net_get_random_once(&exp_id_seed, sizeof(exp_id_seed));
+
+ a = (unsigned long)exp;
+ b = (unsigned long)exp->helper;
+ c = (unsigned long)exp->master;
+ d = (unsigned long)siphash(&exp->tuple, sizeof(exp->tuple), &exp_id_seed);
+
+#ifdef CONFIG_64BIT
+ return (__force __be32)siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &exp_id_seed);
+#else
+ return (__force __be32)siphash_4u32((u32)a, (u32)b, (u32)c, (u32)d, &exp_id_seed);
+#endif
+}
+
static int
ctnetlink_exp_dump_expect(struct sk_buff *skb,
const struct nf_conntrack_expect *exp)
@@ -2739,7 +2762,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
}
#endif
if (nla_put_be32(skb, CTA_EXPECT_TIMEOUT, htonl(timeout)) ||
- nla_put_be32(skb, CTA_EXPECT_ID, htonl((unsigned long)exp)) ||
+ nla_put_be32(skb, CTA_EXPECT_ID, nf_expect_get_id(exp)) ||
nla_put_be32(skb, CTA_EXPECT_FLAGS, htonl(exp->flags)) ||
nla_put_be32(skb, CTA_EXPECT_CLASS, htonl(exp->class)))
goto nla_put_failure;
@@ -3044,7 +3067,8 @@ static int ctnetlink_get_expect(struct net *net, struct sock *ctnl,
if (cda[CTA_EXPECT_ID]) {
__be32 id = nla_get_be32(cda[CTA_EXPECT_ID]);
- if (ntohl(id) != (u32)(unsigned long)exp) {
+
+ if (id != nf_expect_get_id(exp)) {
nf_ct_expect_put(exp);
return -ENOENT;
}
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index b9403a266a2e..37bb530d848f 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -55,7 +55,7 @@ void nf_l4proto_log_invalid(const struct sk_buff *skb,
struct va_format vaf;
va_list args;
- if (net->ct.sysctl_log_invalid != protonum ||
+ if (net->ct.sysctl_log_invalid != protonum &&
net->ct.sysctl_log_invalid != IPPROTO_RAW)
return;
diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c
index 7df477996b16..9becac953587 100644
--- a/net/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/netfilter/nf_conntrack_proto_icmp.c
@@ -103,49 +103,94 @@ int nf_conntrack_icmp_packet(struct nf_conn *ct,
return NF_ACCEPT;
}
-/* Returns conntrack if it dealt with ICMP, and filled in skb fields */
-static int
-icmp_error_message(struct nf_conn *tmpl, struct sk_buff *skb,
- const struct nf_hook_state *state)
+/* Check inner header is related to any of the existing connections */
+int nf_conntrack_inet_error(struct nf_conn *tmpl, struct sk_buff *skb,
+ unsigned int dataoff,
+ const struct nf_hook_state *state,
+ u8 l4proto, union nf_inet_addr *outer_daddr)
{
struct nf_conntrack_tuple innertuple, origtuple;
const struct nf_conntrack_tuple_hash *h;
const struct nf_conntrack_zone *zone;
enum ip_conntrack_info ctinfo;
struct nf_conntrack_zone tmp;
+ union nf_inet_addr *ct_daddr;
+ enum ip_conntrack_dir dir;
+ struct nf_conn *ct;
WARN_ON(skb_nfct(skb));
zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
/* Are they talking about one of our connections? */
- if (!nf_ct_get_tuplepr(skb,
- skb_network_offset(skb) + ip_hdrlen(skb)
- + sizeof(struct icmphdr),
- PF_INET, state->net, &origtuple)) {
- pr_debug("icmp_error_message: failed to get tuple\n");
+ if (!nf_ct_get_tuplepr(skb, dataoff,
+ state->pf, state->net, &origtuple))
return -NF_ACCEPT;
- }
/* Ordinarily, we'd expect the inverted tupleproto, but it's
been preserved inside the ICMP. */
- if (!nf_ct_invert_tuple(&innertuple, &origtuple)) {
- pr_debug("icmp_error_message: no match\n");
+ if (!nf_ct_invert_tuple(&innertuple, &origtuple))
return -NF_ACCEPT;
- }
-
- ctinfo = IP_CT_RELATED;
h = nf_conntrack_find_get(state->net, zone, &innertuple);
- if (!h) {
- pr_debug("icmp_error_message: no match\n");
+ if (!h)
+ return -NF_ACCEPT;
+
+ /* Consider: A -> T (=This machine) -> B
+ * Conntrack entry will look like this:
+ * Original: A->B
+ * Reply: B->T (SNAT case) OR A
+ *
+ * When this function runs, we got packet that looks like this:
+ * iphdr|icmphdr|inner_iphdr|l4header (tcp, udp, ..).
+ *
+ * Above nf_conntrack_find_get() makes lookup based on inner_hdr,
+ * so we should expect that destination of the found connection
+ * matches outer header destination address.
+ *
+ * In above example, we can consider these two cases:
+ * 1. Error coming in reply direction from B or M (middle box) to
+ * T (SNAT case) or A.
+ * Inner saddr will be B, dst will be T or A.
+ * The found conntrack will be reply tuple (B->T/A).
+ * 2. Error coming in original direction from A or M to B.
+ * Inner saddr will be A, inner daddr will be B.
+ * The found conntrack will be original tuple (A->B).
+ *
+ * In both cases, conntrack[dir].dst == inner.dst.
+ *
+ * A bogus packet could look like this:
+ * Inner: B->T
+ * Outer: B->X (other machine reachable by T).
+ *
+ * In this case, lookup yields connection A->B and will
+ * set packet from B->X as *RELATED*, even though no connection
+ * from X was ever seen.
+ */
+ ct = nf_ct_tuplehash_to_ctrack(h);
+ dir = NF_CT_DIRECTION(h);
+ ct_daddr = &ct->tuplehash[dir].tuple.dst.u3;
+ if (!nf_inet_addr_cmp(outer_daddr, ct_daddr)) {
+ if (state->pf == AF_INET) {
+ nf_l4proto_log_invalid(skb, state->net, state->pf,
+ l4proto,
+ "outer daddr %pI4 != inner %pI4",
+ &outer_daddr->ip, &ct_daddr->ip);
+ } else if (state->pf == AF_INET6) {
+ nf_l4proto_log_invalid(skb, state->net, state->pf,
+ l4proto,
+ "outer daddr %pI6 != inner %pI6",
+ &outer_daddr->ip6, &ct_daddr->ip6);
+ }
+ nf_ct_put(ct);
return -NF_ACCEPT;
}
- if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
+ ctinfo = IP_CT_RELATED;
+ if (dir == IP_CT_DIR_REPLY)
ctinfo += IP_CT_IS_REPLY;
/* Update skb to refer to this connection */
- nf_ct_set(skb, nf_ct_tuplehash_to_ctrack(h), ctinfo);
+ nf_ct_set(skb, ct, ctinfo);
return NF_ACCEPT;
}
@@ -162,11 +207,12 @@ int nf_conntrack_icmpv4_error(struct nf_conn *tmpl,
struct sk_buff *skb, unsigned int dataoff,
const struct nf_hook_state *state)
{
+ union nf_inet_addr outer_daddr;
const struct icmphdr *icmph;
struct icmphdr _ih;
/* Not enough header? */
- icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
+ icmph = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);
if (icmph == NULL) {
icmp_error_log(skb, state, "short packet");
return -NF_ACCEPT;
@@ -199,7 +245,12 @@ int nf_conntrack_icmpv4_error(struct nf_conn *tmpl,
icmph->type != ICMP_REDIRECT)
return NF_ACCEPT;
- return icmp_error_message(tmpl, skb, state);
+ memset(&outer_daddr, 0, sizeof(outer_daddr));
+ outer_daddr.ip = ip_hdr(skb)->daddr;
+
+ dataoff += sizeof(*icmph);
+ return nf_conntrack_inet_error(tmpl, skb, dataoff, state,
+ IPPROTO_ICMP, &outer_daddr);
}
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c
index bec4a3211658..c63ee3612855 100644
--- a/net/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/netfilter/nf_conntrack_proto_icmpv6.c
@@ -123,51 +123,6 @@ int nf_conntrack_icmpv6_packet(struct nf_conn *ct,
return NF_ACCEPT;
}
-static int
-icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
- struct sk_buff *skb,
- unsigned int icmp6off)
-{
- struct nf_conntrack_tuple intuple, origtuple;
- const struct nf_conntrack_tuple_hash *h;
- enum ip_conntrack_info ctinfo;
- struct nf_conntrack_zone tmp;
-
- WARN_ON(skb_nfct(skb));
-
- /* Are they talking about one of our connections? */
- if (!nf_ct_get_tuplepr(skb,
- skb_network_offset(skb)
- + sizeof(struct ipv6hdr)
- + sizeof(struct icmp6hdr),
- PF_INET6, net, &origtuple)) {
- pr_debug("icmpv6_error: Can't get tuple\n");
- return -NF_ACCEPT;
- }
-
- /* Ordinarily, we'd expect the inverted tupleproto, but it's
- been preserved inside the ICMP. */
- if (!nf_ct_invert_tuple(&intuple, &origtuple)) {
- pr_debug("icmpv6_error: Can't invert tuple\n");
- return -NF_ACCEPT;
- }
-
- ctinfo = IP_CT_RELATED;
-
- h = nf_conntrack_find_get(net, nf_ct_zone_tmpl(tmpl, skb, &tmp),
- &intuple);
- if (!h) {
- pr_debug("icmpv6_error: no match\n");
- return -NF_ACCEPT;
- } else {
- if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
- ctinfo += IP_CT_IS_REPLY;
- }
-
- /* Update skb to refer to this connection */
- nf_ct_set(skb, nf_ct_tuplehash_to_ctrack(h), ctinfo);
- return NF_ACCEPT;
-}
static void icmpv6_error_log(const struct sk_buff *skb,
const struct nf_hook_state *state,
@@ -182,6 +137,7 @@ int nf_conntrack_icmpv6_error(struct nf_conn *tmpl,
unsigned int dataoff,
const struct nf_hook_state *state)
{
+ union nf_inet_addr outer_daddr;
const struct icmp6hdr *icmp6h;
struct icmp6hdr _ih;
int type;
@@ -210,7 +166,11 @@ int nf_conntrack_icmpv6_error(struct nf_conn *tmpl,
if (icmp6h->icmp6_type >= 128)
return NF_ACCEPT;
- return icmpv6_error_message(state->net, tmpl, skb, dataoff);
+ memcpy(&outer_daddr.ip6, &ipv6_hdr(skb)->daddr,
+ sizeof(outer_daddr.ip6));
+ dataoff += sizeof(*icmp6h);
+ return nf_conntrack_inet_error(tmpl, skb, dataoff, state,
+ IPPROTO_ICMPV6, &outer_daddr);
}
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index af7dc6537758..000952719adf 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -415,9 +415,14 @@ static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
case IPPROTO_ICMPV6:
/* id is same for either direction... */
keyptr = &tuple->src.u.icmp.id;
- min = range->min_proto.icmp.id;
- range_size = ntohs(range->max_proto.icmp.id) -
- ntohs(range->min_proto.icmp.id) + 1;
+ if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
+ min = 0;
+ range_size = 65536;
+ } else {
+ min = ntohs(range->min_proto.icmp.id);
+ range_size = ntohs(range->max_proto.icmp.id) -
+ ntohs(range->min_proto.icmp.id) + 1;
+ }
goto find_free_id;
#if IS_ENABLED(CONFIG_NF_CT_PROTO_GRE)
case IPPROTO_GRE:
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index ef7772e976cc..1606eaa5ae0d 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1545,7 +1545,7 @@ static int nft_chain_parse_hook(struct net *net,
if (IS_ERR(type))
return PTR_ERR(type);
}
- if (!(type->hook_mask & (1 << hook->num)))
+ if (hook->num > NF_MAX_HOOKS || !(type->hook_mask & (1 << hook->num)))
return -EOPNOTSUPP;
if (type->type == NFT_CHAIN_T_NAT &&
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index b1f9c5303f02..0b3347570265 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -540,7 +540,7 @@ __build_packet_message(struct nfnl_log_net *log,
goto nla_put_failure;
}
- if (skb->tstamp) {
+ if (hooknum <= NF_INET_FORWARD && skb->tstamp) {
struct nfulnl_msg_packet_timestamp ts;
struct timespec64 kts = ktime_to_timespec64(skb->tstamp);
ts.sec = cpu_to_be64(kts.tv_sec);
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 0dcc3592d053..e057b2961d31 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -582,7 +582,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
if (nfqnl_put_bridge(entry, skb) < 0)
goto nla_put_failure;
- if (entskb->tstamp) {
+ if (entry->state.hook <= NF_INET_FORWARD && entskb->tstamp) {
struct nfqnl_msg_packet_timestamp ts;
struct timespec64 kts = ktime_to_timespec64(entskb->tstamp);
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index c13bcd0ab491..8dbb4d48f2ed 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -163,19 +163,24 @@ time_mt(const struct sk_buff *skb, struct xt_action_param *par)
s64 stamp;
/*
- * We cannot use get_seconds() instead of __net_timestamp() here.
+ * We need real time here, but we can neither use skb->tstamp
+ * nor __net_timestamp().
+ *
+ * skb->tstamp and skb->skb_mstamp_ns overlap, however, they
+ * use different clock types (real vs monotonic).
+ *
* Suppose you have two rules:
- * 1. match before 13:00
- * 2. match after 13:00
+ * 1. match before 13:00
+ * 2. match after 13:00
+ *
* If you match against processing time (get_seconds) it
* may happen that the same packet matches both rules if
- * it arrived at the right moment before 13:00.
+ * it arrived at the right moment before 13:00, so it would be
+ * better to check skb->tstamp and set it via __net_timestamp()
+ * if needed. This however breaks outgoing packets tx timestamp,
+ * and causes them to get delayed forever by fq packet scheduler.
*/
- if (skb->tstamp == 0)
- __net_timestamp((struct sk_buff *)skb);
-
- stamp = ktime_to_ns(skb->tstamp);
- stamp = div_s64(stamp, NSEC_PER_SEC);
+ stamp = get_seconds();
if (info->flags & XT_TIME_LOCAL_TZ)
/* Adjust for local timezone */
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index f28e937320a3..216ab915dd54 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -988,7 +988,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
struct netlink_sock *nlk = nlk_sk(sk);
struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
int err = 0;
- unsigned long groups = nladdr->nl_groups;
+ unsigned long groups;
bool bound;
if (addr_len < sizeof(struct sockaddr_nl))
@@ -996,6 +996,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
if (nladdr->nl_family != AF_NETLINK)
return -EINVAL;
+ groups = nladdr->nl_groups;
/* Only superuser is allowed to listen multicasts */
if (groups) {
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index f0ec068e1d02..cb69d35c8e6a 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -362,8 +362,8 @@ int genl_register_family(struct genl_family *family)
} else
family->attrbuf = NULL;
- family->id = idr_alloc(&genl_fam_idr, family,
- start, end + 1, GFP_KERNEL);
+ family->id = idr_alloc_cyclic(&genl_fam_idr, family,
+ start, end + 1, GFP_KERNEL);
if (family->id < 0) {
err = family->id;
goto errout_free;
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 1d3144d19903..71ffd1a6dc7c 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -1392,18 +1392,22 @@ static int __init nr_proto_init(void)
int i;
int rc = proto_register(&nr_proto, 0);
- if (rc != 0)
- goto out;
+ if (rc)
+ return rc;
if (nr_ndevs > 0x7fffffff/sizeof(struct net_device *)) {
- printk(KERN_ERR "NET/ROM: nr_proto_init - nr_ndevs parameter to large\n");
- return -1;
+ pr_err("NET/ROM: %s - nr_ndevs parameter too large\n",
+ __func__);
+ rc = -EINVAL;
+ goto unregister_proto;
}
dev_nr = kcalloc(nr_ndevs, sizeof(struct net_device *), GFP_KERNEL);
- if (dev_nr == NULL) {
- printk(KERN_ERR "NET/ROM: nr_proto_init - unable to allocate device array\n");
- return -1;
+ if (!dev_nr) {
+ pr_err("NET/ROM: %s - unable to allocate device array\n",
+ __func__);
+ rc = -ENOMEM;
+ goto unregister_proto;
}
for (i = 0; i < nr_ndevs; i++) {
@@ -1413,13 +1417,13 @@ static int __init nr_proto_init(void)
sprintf(name, "nr%d", i);
dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, nr_setup);
if (!dev) {
- printk(KERN_ERR "NET/ROM: nr_proto_init - unable to allocate device structure\n");
+ rc = -ENOMEM;
goto fail;
}
dev->base_addr = i;
- if (register_netdev(dev)) {
- printk(KERN_ERR "NET/ROM: nr_proto_init - unable to register network device\n");
+ rc = register_netdev(dev);
+ if (rc) {
free_netdev(dev);
goto fail;
}
@@ -1427,36 +1431,64 @@ static int __init nr_proto_init(void)
dev_nr[i] = dev;
}
- if (sock_register(&nr_family_ops)) {
- printk(KERN_ERR "NET/ROM: nr_proto_init - unable to register socket family\n");
+ rc = sock_register(&nr_family_ops);
+ if (rc)
goto fail;
- }
- register_netdevice_notifier(&nr_dev_notifier);
+ rc = register_netdevice_notifier(&nr_dev_notifier);
+ if (rc)
+ goto out_sock;
ax25_register_pid(&nr_pid);
ax25_linkfail_register(&nr_linkfail_notifier);
#ifdef CONFIG_SYSCTL
- nr_register_sysctl();
+ rc = nr_register_sysctl();
+ if (rc)
+ goto out_sysctl;
#endif
nr_loopback_init();
- proc_create_seq("nr", 0444, init_net.proc_net, &nr_info_seqops);
- proc_create_seq("nr_neigh", 0444, init_net.proc_net, &nr_neigh_seqops);
- proc_create_seq("nr_nodes", 0444, init_net.proc_net, &nr_node_seqops);
-out:
- return rc;
+ rc = -ENOMEM;
+ if (!proc_create_seq("nr", 0444, init_net.proc_net, &nr_info_seqops))
+ goto proc_remove1;
+ if (!proc_create_seq("nr_neigh", 0444, init_net.proc_net,
+ &nr_neigh_seqops))
+ goto proc_remove2;
+ if (!proc_create_seq("nr_nodes", 0444, init_net.proc_net,
+ &nr_node_seqops))
+ goto proc_remove3;
+
+ return 0;
+
+proc_remove3:
+ remove_proc_entry("nr_neigh", init_net.proc_net);
+proc_remove2:
+ remove_proc_entry("nr", init_net.proc_net);
+proc_remove1:
+
+ nr_loopback_clear();
+ nr_rt_free();
+
+#ifdef CONFIG_SYSCTL
+ nr_unregister_sysctl();
+out_sysctl:
+#endif
+ ax25_linkfail_release(&nr_linkfail_notifier);
+ ax25_protocol_release(AX25_P_NETROM);
+ unregister_netdevice_notifier(&nr_dev_notifier);
+out_sock:
+ sock_unregister(PF_NETROM);
fail:
while (--i >= 0) {
unregister_netdev(dev_nr[i]);
free_netdev(dev_nr[i]);
}
kfree(dev_nr);
+unregister_proto:
proto_unregister(&nr_proto);
- rc = -1;
- goto out;
+ return rc;
}
module_init(nr_proto_init);
diff --git a/net/netrom/nr_loopback.c b/net/netrom/nr_loopback.c
index 215ad22a9647..93d13f019981 100644
--- a/net/netrom/nr_loopback.c
+++ b/net/netrom/nr_loopback.c
@@ -70,7 +70,7 @@ static void nr_loopback_timer(struct timer_list *unused)
}
}
-void __exit nr_loopback_clear(void)
+void nr_loopback_clear(void)
{
del_timer_sync(&loopback_timer);
skb_queue_purge(&loopback_queue);
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index 6485f593e2f0..b76aa668a94b 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -953,7 +953,7 @@ const struct seq_operations nr_neigh_seqops = {
/*
* Free all memory associated with the nodes and routes lists.
*/
-void __exit nr_rt_free(void)
+void nr_rt_free(void)
{
struct nr_neigh *s = NULL;
struct nr_node *t = NULL;
diff --git a/net/netrom/sysctl_net_netrom.c b/net/netrom/sysctl_net_netrom.c
index ba1c368b3f18..771011b84270 100644
--- a/net/netrom/sysctl_net_netrom.c
+++ b/net/netrom/sysctl_net_netrom.c
@@ -146,9 +146,12 @@ static struct ctl_table nr_table[] = {
{ }
};
-void __init nr_register_sysctl(void)
+int __init nr_register_sysctl(void)
{
nr_table_header = register_net_sysctl(&init_net, "net/netrom", nr_table);
+ if (!nr_table_header)
+ return -ENOMEM;
+ return 0;
}
void nr_unregister_sysctl(void)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 9419c5cf4de5..9b81813dd16a 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2602,8 +2602,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
void *ph;
DECLARE_SOCKADDR(struct sockaddr_ll *, saddr, msg->msg_name);
bool need_wait = !(msg->msg_flags & MSG_DONTWAIT);
+ unsigned char *addr = NULL;
int tp_len, size_max;
- unsigned char *addr;
void *data;
int len_sum = 0;
int status = TP_STATUS_AVAILABLE;
@@ -2614,7 +2614,6 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
if (likely(saddr == NULL)) {
dev = packet_cached_dev_get(po);
proto = po->num;
- addr = NULL;
} else {
err = -EINVAL;
if (msg->msg_namelen < sizeof(struct sockaddr_ll))
@@ -2624,10 +2623,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
sll_addr)))
goto out;
proto = saddr->sll_protocol;
- addr = saddr->sll_halen ? saddr->sll_addr : NULL;
dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex);
- if (addr && dev && saddr->sll_halen < dev->addr_len)
- goto out_put;
+ if (po->sk.sk_socket->type == SOCK_DGRAM) {
+ if (dev && msg->msg_namelen < dev->addr_len +
+ offsetof(struct sockaddr_ll, sll_addr))
+ goto out_put;
+ addr = saddr->sll_addr;
+ }
}
err = -ENXIO;
@@ -2799,7 +2801,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
struct sk_buff *skb;
struct net_device *dev;
__be16 proto;
- unsigned char *addr;
+ unsigned char *addr = NULL;
int err, reserve = 0;
struct sockcm_cookie sockc;
struct virtio_net_hdr vnet_hdr = { 0 };
@@ -2816,7 +2818,6 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
if (likely(saddr == NULL)) {
dev = packet_cached_dev_get(po);
proto = po->num;
- addr = NULL;
} else {
err = -EINVAL;
if (msg->msg_namelen < sizeof(struct sockaddr_ll))
@@ -2824,10 +2825,13 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
goto out;
proto = saddr->sll_protocol;
- addr = saddr->sll_halen ? saddr->sll_addr : NULL;
dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex);
- if (addr && dev && saddr->sll_halen < dev->addr_len)
- goto out_unlock;
+ if (sock->type == SOCK_DGRAM) {
+ if (dev && msg->msg_namelen < dev->addr_len +
+ offsetof(struct sockaddr_ll, sll_addr))
+ goto out_unlock;
+ addr = saddr->sll_addr;
+ }
}
err = -ENXIO;
@@ -3344,20 +3348,29 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
sock_recv_ts_and_drops(msg, sk, skb);
if (msg->msg_name) {
+ int copy_len;
+
/* If the address length field is there to be filled
* in, we fill it in now.
*/
if (sock->type == SOCK_PACKET) {
__sockaddr_check_size(sizeof(struct sockaddr_pkt));
msg->msg_namelen = sizeof(struct sockaddr_pkt);
+ copy_len = msg->msg_namelen;
} else {
struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll;
msg->msg_namelen = sll->sll_halen +
offsetof(struct sockaddr_ll, sll_addr);
+ copy_len = msg->msg_namelen;
+ if (msg->msg_namelen < sizeof(struct sockaddr_ll)) {
+ memset(msg->msg_name +
+ offsetof(struct sockaddr_ll, sll_addr),
+ 0, sizeof(sll->sll_addr));
+ msg->msg_namelen = sizeof(struct sockaddr_ll);
+ }
}
- memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
- msg->msg_namelen);
+ memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len);
}
if (pkt_sk(sk)->auxdata) {
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index d6cc97fbbbb0..2b969f99ef13 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -543,6 +543,9 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr,
struct rds_sock *rs = rds_sk_to_rs(sk);
int ret = 0;
+ if (addr_len < offsetofend(struct sockaddr, sa_family))
+ return -EINVAL;
+
lock_sock(sk);
switch (uaddr->sa_family) {
diff --git a/net/rds/bind.c b/net/rds/bind.c
index 17c9d9f0c848..0f4398e7f2a7 100644
--- a/net/rds/bind.c
+++ b/net/rds/bind.c
@@ -173,6 +173,8 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
/* We allow an RDS socket to be bound to either IPv4 or IPv6
* address.
*/
+ if (addr_len < offsetofend(struct sockaddr, sa_family))
+ return -EINVAL;
if (uaddr->sa_family == AF_INET) {
struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
diff --git a/net/rds/ib_fmr.c b/net/rds/ib_fmr.c
index 31cf37da4510..93c0437e6a5f 100644
--- a/net/rds/ib_fmr.c
+++ b/net/rds/ib_fmr.c
@@ -44,6 +44,17 @@ struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev, int npages)
else
pool = rds_ibdev->mr_1m_pool;
+ if (atomic_read(&pool->dirty_count) >= pool->max_items / 10)
+ queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10);
+
+ /* Switch pools if one of the pool is reaching upper limit */
+ if (atomic_read(&pool->dirty_count) >= pool->max_items * 9 / 10) {
+ if (pool->pool_type == RDS_IB_MR_8K_POOL)
+ pool = rds_ibdev->mr_1m_pool;
+ else
+ pool = rds_ibdev->mr_8k_pool;
+ }
+
ibmr = rds_ib_try_reuse_ibmr(pool);
if (ibmr)
return ibmr;
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 63c8d107adcf..d664e9ade74d 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -454,9 +454,6 @@ struct rds_ib_mr *rds_ib_try_reuse_ibmr(struct rds_ib_mr_pool *pool)
struct rds_ib_mr *ibmr = NULL;
int iter = 0;
- if (atomic_read(&pool->dirty_count) >= pool->max_items_soft / 10)
- queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10);
-
while (1) {
ibmr = rds_ib_reuse_mr(pool);
if (ibmr)
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 70559854837e..8946c89d7392 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -772,7 +772,7 @@ static void rds_ib_cong_recv(struct rds_connection *conn,
unsigned long frag_off;
unsigned long to_copy;
unsigned long copied;
- uint64_t uncongested = 0;
+ __le64 uncongested = 0;
void *addr;
/* catch completely corrupt packets */
@@ -789,7 +789,7 @@ static void rds_ib_cong_recv(struct rds_connection *conn,
copied = 0;
while (copied < RDS_CONG_MAP_BYTES) {
- uint64_t *src, *dst;
+ __le64 *src, *dst;
unsigned int k;
to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off);
@@ -824,9 +824,7 @@ static void rds_ib_cong_recv(struct rds_connection *conn,
}
/* the congestion map is in little endian order */
- uncongested = le64_to_cpu(uncongested);
-
- rds_cong_map_updated(map, uncongested);
+ rds_cong_map_updated(map, le64_to_cpu(uncongested));
}
static void rds_ib_process_recv(struct rds_connection *conn,
diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c
index 7af4f99c4a93..094a6621f8e8 100644
--- a/net/rose/rose_loopback.c
+++ b/net/rose/rose_loopback.c
@@ -16,6 +16,7 @@
#include <linux/init.h>
static struct sk_buff_head loopback_queue;
+#define ROSE_LOOPBACK_LIMIT 1000
static struct timer_list loopback_timer;
static void rose_set_loopback_timer(void);
@@ -35,29 +36,27 @@ static int rose_loopback_running(void)
int rose_loopback_queue(struct sk_buff *skb, struct rose_neigh *neigh)
{
- struct sk_buff *skbn;
+ struct sk_buff *skbn = NULL;
- skbn = skb_clone(skb, GFP_ATOMIC);
+ if (skb_queue_len(&loopback_queue) < ROSE_LOOPBACK_LIMIT)
+ skbn = skb_clone(skb, GFP_ATOMIC);
- kfree_skb(skb);
-
- if (skbn != NULL) {
+ if (skbn) {
+ consume_skb(skb);
skb_queue_tail(&loopback_queue, skbn);
if (!rose_loopback_running())
rose_set_loopback_timer();
+ } else {
+ kfree_skb(skb);
}
return 1;
}
-
static void rose_set_loopback_timer(void)
{
- del_timer(&loopback_timer);
-
- loopback_timer.expires = jiffies + 10;
- add_timer(&loopback_timer);
+ mod_timer(&loopback_timer, jiffies + 10);
}
static void rose_loopback_timer(struct timer_list *unused)
@@ -68,8 +67,12 @@ static void rose_loopback_timer(struct timer_list *unused)
struct sock *sk;
unsigned short frametype;
unsigned int lci_i, lci_o;
+ int count;
- while ((skb = skb_dequeue(&loopback_queue)) != NULL) {
+ for (count = 0; count < ROSE_LOOPBACK_LIMIT; count++) {
+ skb = skb_dequeue(&loopback_queue);
+ if (!skb)
+ return;
if (skb->len < ROSE_MIN_LEN) {
kfree_skb(skb);
continue;
@@ -106,6 +109,8 @@ static void rose_loopback_timer(struct timer_list *unused)
kfree_skb(skb);
}
}
+ if (!skb_queue_empty(&loopback_queue))
+ mod_timer(&loopback_timer, jiffies + 1);
}
void __exit rose_loopback_clear(void)
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 96f2952bbdfd..ae8c5d7f3bf1 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -135,7 +135,7 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len)
struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *)saddr;
struct rxrpc_local *local;
struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
- u16 service_id = srx->srx_service;
+ u16 service_id;
int ret;
_enter("%p,%p,%d", rx, saddr, len);
@@ -143,6 +143,7 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len)
ret = rxrpc_validate_address(rx, srx, len);
if (ret < 0)
goto error;
+ service_id = srx->srx_service;
lock_sock(&rx->sk);
@@ -370,18 +371,22 @@ EXPORT_SYMBOL(rxrpc_kernel_end_call);
* rxrpc_kernel_check_life - Check to see whether a call is still alive
* @sock: The socket the call is on
* @call: The call to check
+ * @_life: Where to store the life value
*
* Allow a kernel service to find out whether a call is still alive - ie. we're
- * getting ACKs from the server. Returns a number representing the life state
- * which can be compared to that returned by a previous call.
+ * getting ACKs from the server. Passes back in *_life a number representing
+ * the life state which can be compared to that returned by a previous call and
+ * return true if the call is still alive.
*
* If the life state stalls, rxrpc_kernel_probe_life() should be called and
* then 2RTT waited.
*/
-u32 rxrpc_kernel_check_life(const struct socket *sock,
- const struct rxrpc_call *call)
+bool rxrpc_kernel_check_life(const struct socket *sock,
+ const struct rxrpc_call *call,
+ u32 *_life)
{
- return call->acks_latest;
+ *_life = call->acks_latest;
+ return call->state != RXRPC_CALL_COMPLETE;
}
EXPORT_SYMBOL(rxrpc_kernel_check_life);
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 4b1a534d290a..062ca9dc29b8 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -654,6 +654,7 @@ struct rxrpc_call {
u8 ackr_reason; /* reason to ACK */
u16 ackr_skew; /* skew on packet being ACK'd */
rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */
+ rxrpc_serial_t ackr_first_seq; /* first sequence number received */
rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */
rxrpc_seq_t ackr_consumed; /* Highest packet shown consumed */
rxrpc_seq_t ackr_seen; /* Highest packet shown seen */
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 8aa2937b069f..fe96881a334d 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -604,30 +604,30 @@ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet)
_enter("");
- if (list_empty(&rxnet->calls))
- return;
+ if (!list_empty(&rxnet->calls)) {
+ write_lock(&rxnet->call_lock);
- write_lock(&rxnet->call_lock);
+ while (!list_empty(&rxnet->calls)) {
+ call = list_entry(rxnet->calls.next,
+ struct rxrpc_call, link);
+ _debug("Zapping call %p", call);
- while (!list_empty(&rxnet->calls)) {
- call = list_entry(rxnet->calls.next, struct rxrpc_call, link);
- _debug("Zapping call %p", call);
+ rxrpc_see_call(call);
+ list_del_init(&call->link);
- rxrpc_see_call(call);
- list_del_init(&call->link);
+ pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n",
+ call, atomic_read(&call->usage),
+ rxrpc_call_states[call->state],
+ call->flags, call->events);
- pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n",
- call, atomic_read(&call->usage),
- rxrpc_call_states[call->state],
- call->flags, call->events);
+ write_unlock(&rxnet->call_lock);
+ cond_resched();
+ write_lock(&rxnet->call_lock);
+ }
write_unlock(&rxnet->call_lock);
- cond_resched();
- write_lock(&rxnet->call_lock);
}
- write_unlock(&rxnet->call_lock);
-
atomic_dec(&rxnet->nr_calls);
wait_var_event(&rxnet->nr_calls, !atomic_read(&rxnet->nr_calls));
}
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index b6fca8ebb117..8d31fb4c51e1 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -153,7 +153,8 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
* pass a connection-level abort onto all calls on that connection
*/
static void rxrpc_abort_calls(struct rxrpc_connection *conn,
- enum rxrpc_call_completion compl)
+ enum rxrpc_call_completion compl,
+ rxrpc_serial_t serial)
{
struct rxrpc_call *call;
int i;
@@ -173,6 +174,9 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn,
call->call_id, 0,
conn->abort_code,
conn->error);
+ else
+ trace_rxrpc_rx_abort(call, serial,
+ conn->abort_code);
if (rxrpc_set_call_completion(call, compl,
conn->abort_code,
conn->error))
@@ -213,8 +217,6 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn,
conn->state = RXRPC_CONN_LOCALLY_ABORTED;
spin_unlock_bh(&conn->state_lock);
- rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED);
-
msg.msg_name = &conn->params.peer->srx.transport;
msg.msg_namelen = conn->params.peer->srx.transport_len;
msg.msg_control = NULL;
@@ -242,6 +244,7 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn,
len = iov[0].iov_len + iov[1].iov_len;
serial = atomic_inc_return(&conn->serial);
+ rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, serial);
whdr.serial = htonl(serial);
_proto("Tx CONN ABORT %%%u { %d }", serial, conn->abort_code);
@@ -321,7 +324,7 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
conn->error = -ECONNABORTED;
conn->abort_code = abort_code;
conn->state = RXRPC_CONN_REMOTELY_ABORTED;
- rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED);
+ rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, sp->hdr.serial);
return -ECONNABORTED;
case RXRPC_PACKET_TYPE_CHALLENGE:
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 9128aa0e40aa..c2c35cf4e308 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -837,7 +837,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb,
u8 acks[RXRPC_MAXACKS];
} buf;
rxrpc_serial_t acked_serial;
- rxrpc_seq_t first_soft_ack, hard_ack;
+ rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt;
int nr_acks, offset, ioffset;
_enter("");
@@ -851,13 +851,14 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb,
acked_serial = ntohl(buf.ack.serial);
first_soft_ack = ntohl(buf.ack.firstPacket);
+ prev_pkt = ntohl(buf.ack.previousPacket);
hard_ack = first_soft_ack - 1;
nr_acks = buf.ack.nAcks;
summary.ack_reason = (buf.ack.reason < RXRPC_ACK__INVALID ?
buf.ack.reason : RXRPC_ACK__INVALID);
trace_rxrpc_rx_ack(call, sp->hdr.serial, acked_serial,
- first_soft_ack, ntohl(buf.ack.previousPacket),
+ first_soft_ack, prev_pkt,
summary.ack_reason, nr_acks);
if (buf.ack.reason == RXRPC_ACK_PING_RESPONSE)
@@ -878,8 +879,9 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb,
rxrpc_propose_ack_respond_to_ack);
}
- /* Discard any out-of-order or duplicate ACKs. */
- if (before_eq(sp->hdr.serial, call->acks_latest))
+ /* Discard any out-of-order or duplicate ACKs (outside lock). */
+ if (before(first_soft_ack, call->ackr_first_seq) ||
+ before(prev_pkt, call->ackr_prev_seq))
return;
buf.info.rxMTU = 0;
@@ -890,12 +892,16 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb,
spin_lock(&call->input_lock);
- /* Discard any out-of-order or duplicate ACKs. */
- if (before_eq(sp->hdr.serial, call->acks_latest))
+ /* Discard any out-of-order or duplicate ACKs (inside lock). */
+ if (before(first_soft_ack, call->ackr_first_seq) ||
+ before(prev_pkt, call->ackr_prev_seq))
goto out;
call->acks_latest_ts = skb->tstamp;
call->acks_latest = sp->hdr.serial;
+ call->ackr_first_seq = first_soft_ack;
+ call->ackr_prev_seq = prev_pkt;
+
/* Parse rwind and mtu sizes if provided. */
if (buf.info.rxMTU)
rxrpc_input_ackinfo(call, skb, &buf.info);
@@ -1155,19 +1161,19 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb)
* handle data received on the local endpoint
* - may be called in interrupt context
*
- * The socket is locked by the caller and this prevents the socket from being
- * shut down and the local endpoint from going away, thus sk_user_data will not
- * be cleared until this function returns.
+ * [!] Note that as this is called from the encap_rcv hook, the socket is not
+ * held locked by the caller and nothing prevents sk_user_data on the UDP from
+ * being cleared in the middle of processing this function.
*
* Called with the RCU read lock held from the IP layer via UDP.
*/
int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb)
{
+ struct rxrpc_local *local = rcu_dereference_sk_user_data(udp_sk);
struct rxrpc_connection *conn;
struct rxrpc_channel *chan;
struct rxrpc_call *call = NULL;
struct rxrpc_skb_priv *sp;
- struct rxrpc_local *local = udp_sk->sk_user_data;
struct rxrpc_peer *peer = NULL;
struct rxrpc_sock *rx = NULL;
unsigned int channel;
@@ -1175,6 +1181,10 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb)
_enter("%p", udp_sk);
+ if (unlikely(!local)) {
+ kfree_skb(skb);
+ return 0;
+ }
if (skb->tstamp == 0)
skb->tstamp = ktime_get_real();
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 15cf42d5b53a..01959db51445 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -304,7 +304,8 @@ nomem:
ret = -ENOMEM;
sock_error:
mutex_unlock(&rxnet->local_mutex);
- kfree(local);
+ if (local)
+ call_rcu(&local->rcu, rxrpc_local_rcu);
_leave(" = %d", ret);
return ERR_PTR(ret);
diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
index bc05af89fc38..6e84d878053c 100644
--- a/net/rxrpc/peer_event.c
+++ b/net/rxrpc/peer_event.c
@@ -157,6 +157,11 @@ void rxrpc_error_report(struct sock *sk)
_enter("%p{%d}", sk, local->debug_id);
+ /* Clear the outstanding error value on the socket so that it doesn't
+ * cause kernel_sendmsg() to return it later.
+ */
+ sock_error(sk);
+
skb = sock_dequeue_err_skb(sk);
if (!skb) {
_leave("UDP socket errqueue empty");
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 46c9312085b1..bec64deb7b0a 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -152,12 +152,13 @@ static void rxrpc_notify_end_tx(struct rxrpc_sock *rx, struct rxrpc_call *call,
}
/*
- * Queue a DATA packet for transmission, set the resend timeout and send the
- * packet immediately
+ * Queue a DATA packet for transmission, set the resend timeout and send
+ * the packet immediately. Returns the error from rxrpc_send_data_packet()
+ * in case the caller wants to do something with it.
*/
-static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
- struct sk_buff *skb, bool last,
- rxrpc_notify_end_tx_t notify_end_tx)
+static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
+ struct sk_buff *skb, bool last,
+ rxrpc_notify_end_tx_t notify_end_tx)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
unsigned long now;
@@ -250,7 +251,8 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
out:
rxrpc_free_skb(skb, rxrpc_skb_tx_freed);
- _leave("");
+ _leave(" = %d", ret);
+ return ret;
}
/*
@@ -423,9 +425,10 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
if (ret < 0)
goto out;
- rxrpc_queue_packet(rx, call, skb,
- !msg_data_left(msg) && !more,
- notify_end_tx);
+ ret = rxrpc_queue_packet(rx, call, skb,
+ !msg_data_left(msg) && !more,
+ notify_end_tx);
+ /* Should check for failure here */
skb = NULL;
}
} while (msg_data_left(msg) > 0);
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 1d143bc3f73d..4aa03588f87b 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -1112,32 +1112,6 @@ static void sctp_cmd_send_msg(struct sctp_association *asoc,
}
-/* Sent the next ASCONF packet currently stored in the association.
- * This happens after the ASCONF_ACK was succeffully processed.
- */
-static void sctp_cmd_send_asconf(struct sctp_association *asoc)
-{
- struct net *net = sock_net(asoc->base.sk);
-
- /* Send the next asconf chunk from the addip chunk
- * queue.
- */
- if (!list_empty(&asoc->addip_chunk_list)) {
- struct list_head *entry = asoc->addip_chunk_list.next;
- struct sctp_chunk *asconf = list_entry(entry,
- struct sctp_chunk, list);
- list_del_init(entry);
-
- /* Hold the chunk until an ASCONF_ACK is received. */
- sctp_chunk_hold(asconf);
- if (sctp_primitive_ASCONF(net, asoc, asconf))
- sctp_chunk_free(asconf);
- else
- asoc->addip_last_asconf = asconf;
- }
-}
-
-
/* These three macros allow us to pull the debugging code out of the
* main flow of sctp_do_sm() to keep attention focused on the real
* functionality there.
@@ -1783,9 +1757,6 @@ static int sctp_cmd_interpreter(enum sctp_event_type event_type,
}
sctp_cmd_send_msg(asoc, cmd->obj.msg, gfp);
break;
- case SCTP_CMD_SEND_NEXT_ASCONF:
- sctp_cmd_send_asconf(asoc);
- break;
case SCTP_CMD_PURGE_ASCONF_QUEUE:
sctp_asconf_queue_teardown(asoc);
break;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index c9ae3404b1bb..713a669d2058 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -3824,6 +3824,29 @@ enum sctp_disposition sctp_sf_do_asconf(struct net *net,
return SCTP_DISPOSITION_CONSUME;
}
+static enum sctp_disposition sctp_send_next_asconf(
+ struct net *net,
+ const struct sctp_endpoint *ep,
+ struct sctp_association *asoc,
+ const union sctp_subtype type,
+ struct sctp_cmd_seq *commands)
+{
+ struct sctp_chunk *asconf;
+ struct list_head *entry;
+
+ if (list_empty(&asoc->addip_chunk_list))
+ return SCTP_DISPOSITION_CONSUME;
+
+ entry = asoc->addip_chunk_list.next;
+ asconf = list_entry(entry, struct sctp_chunk, list);
+
+ list_del_init(entry);
+ sctp_chunk_hold(asconf);
+ asoc->addip_last_asconf = asconf;
+
+ return sctp_sf_do_prm_asconf(net, ep, asoc, type, asconf, commands);
+}
+
/*
* ADDIP Section 4.3 General rules for address manipulation
* When building TLV parameters for the ASCONF Chunk that will add or
@@ -3915,14 +3938,10 @@ enum sctp_disposition sctp_sf_do_asconf_ack(struct net *net,
SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO));
if (!sctp_process_asconf_ack((struct sctp_association *)asoc,
- asconf_ack)) {
- /* Successfully processed ASCONF_ACK. We can
- * release the next asconf if we have one.
- */
- sctp_add_cmd_sf(commands, SCTP_CMD_SEND_NEXT_ASCONF,
- SCTP_NULL());
- return SCTP_DISPOSITION_CONSUME;
- }
+ asconf_ack))
+ return sctp_send_next_asconf(net, ep,
+ (struct sctp_association *)asoc,
+ type, commands);
abort = sctp_make_abort(asoc, asconf_ack,
sizeof(struct sctp_errhdr));
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 9874e60c9b0d..4583fa914e62 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4847,7 +4847,8 @@ static int sctp_connect(struct sock *sk, struct sockaddr *addr,
}
/* Validate addr_len before calling common connect/connectx routine. */
- af = sctp_get_af_specific(addr->sa_family);
+ af = addr_len < offsetofend(struct sockaddr, sa_family) ? NULL :
+ sctp_get_af_specific(addr->sa_family);
if (!af || addr_len < af->sockaddr_len) {
err = -EINVAL;
} else {
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 77ef53596d18..6f869ef49b32 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -167,10 +167,9 @@ static int smc_release(struct socket *sock)
if (sk->sk_state == SMC_CLOSED) {
if (smc->clcsock) {
- mutex_lock(&smc->clcsock_release_lock);
- sock_release(smc->clcsock);
- smc->clcsock = NULL;
- mutex_unlock(&smc->clcsock_release_lock);
+ release_sock(sk);
+ smc_clcsock_release(smc);
+ lock_sock(sk);
}
if (!smc->use_fallback)
smc_conn_free(&smc->conn);
@@ -446,10 +445,19 @@ static void smc_link_save_peer_info(struct smc_link *link,
link->peer_mtu = clc->qp_mtu;
}
+static void smc_switch_to_fallback(struct smc_sock *smc)
+{
+ smc->use_fallback = true;
+ if (smc->sk.sk_socket && smc->sk.sk_socket->file) {
+ smc->clcsock->file = smc->sk.sk_socket->file;
+ smc->clcsock->file->private_data = smc->clcsock;
+ }
+}
+
/* fall back during connect */
static int smc_connect_fallback(struct smc_sock *smc, int reason_code)
{
- smc->use_fallback = true;
+ smc_switch_to_fallback(smc);
smc->fallback_rsn = reason_code;
smc_copy_sock_settings_to_clc(smc);
if (smc->sk.sk_state == SMC_INIT)
@@ -775,10 +783,14 @@ static void smc_connect_work(struct work_struct *work)
smc->sk.sk_err = -rc;
out:
- if (smc->sk.sk_err)
- smc->sk.sk_state_change(&smc->sk);
- else
- smc->sk.sk_write_space(&smc->sk);
+ if (!sock_flag(&smc->sk, SOCK_DEAD)) {
+ if (smc->sk.sk_err) {
+ smc->sk.sk_state_change(&smc->sk);
+ } else { /* allow polling before and after fallback decision */
+ smc->clcsock->sk->sk_write_space(smc->clcsock->sk);
+ smc->sk.sk_write_space(&smc->sk);
+ }
+ }
kfree(smc->connect_info);
smc->connect_info = NULL;
release_sock(&smc->sk);
@@ -872,11 +884,11 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
if (rc < 0)
lsk->sk_err = -rc;
if (rc < 0 || lsk->sk_state == SMC_CLOSED) {
+ new_sk->sk_prot->unhash(new_sk);
if (new_clcsock)
sock_release(new_clcsock);
new_sk->sk_state = SMC_CLOSED;
sock_set_flag(new_sk, SOCK_DEAD);
- new_sk->sk_prot->unhash(new_sk);
sock_put(new_sk); /* final */
*new_smc = NULL;
goto out;
@@ -927,16 +939,21 @@ struct sock *smc_accept_dequeue(struct sock *parent,
smc_accept_unlink(new_sk);
if (new_sk->sk_state == SMC_CLOSED) {
+ new_sk->sk_prot->unhash(new_sk);
if (isk->clcsock) {
sock_release(isk->clcsock);
isk->clcsock = NULL;
}
- new_sk->sk_prot->unhash(new_sk);
sock_put(new_sk); /* final */
continue;
}
- if (new_sock)
+ if (new_sock) {
sock_graft(new_sk, new_sock);
+ if (isk->use_fallback) {
+ smc_sk(new_sk)->clcsock->file = new_sock->file;
+ isk->clcsock->file->private_data = isk->clcsock;
+ }
+ }
return new_sk;
}
return NULL;
@@ -956,6 +973,7 @@ void smc_close_non_accepted(struct sock *sk)
sock_set_flag(sk, SOCK_DEAD);
sk->sk_shutdown |= SHUTDOWN_MASK;
}
+ sk->sk_prot->unhash(sk);
if (smc->clcsock) {
struct socket *tcp;
@@ -971,7 +989,6 @@ void smc_close_non_accepted(struct sock *sk)
smc_conn_free(&smc->conn);
}
release_sock(sk);
- sk->sk_prot->unhash(sk);
sock_put(sk); /* final sock_put */
}
@@ -1037,13 +1054,13 @@ static void smc_listen_out(struct smc_sock *new_smc)
struct smc_sock *lsmc = new_smc->listen_smc;
struct sock *newsmcsk = &new_smc->sk;
- lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
if (lsmc->sk.sk_state == SMC_LISTEN) {
+ lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
smc_accept_enqueue(&lsmc->sk, newsmcsk);
+ release_sock(&lsmc->sk);
} else { /* no longer listening */
smc_close_non_accepted(newsmcsk);
}
- release_sock(&lsmc->sk);
/* Wake up accept */
lsmc->sk.sk_data_ready(&lsmc->sk);
@@ -1087,7 +1104,7 @@ static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
return;
}
smc_conn_free(&new_smc->conn);
- new_smc->use_fallback = true;
+ smc_switch_to_fallback(new_smc);
new_smc->fallback_rsn = reason_code;
if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) {
if (smc_clc_send_decline(new_smc, reason_code) < 0) {
@@ -1237,6 +1254,9 @@ static void smc_listen_work(struct work_struct *work)
int rc = 0;
u8 ibport;
+ if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN)
+ return smc_listen_out_err(new_smc);
+
if (new_smc->use_fallback) {
smc_listen_out_connected(new_smc);
return;
@@ -1244,7 +1264,7 @@ static void smc_listen_work(struct work_struct *work)
/* check if peer is smc capable */
if (!tcp_sk(newclcsock->sk)->syn_smc) {
- new_smc->use_fallback = true;
+ smc_switch_to_fallback(new_smc);
new_smc->fallback_rsn = SMC_CLC_DECL_PEERNOSMC;
smc_listen_out_connected(new_smc);
return;
@@ -1501,7 +1521,7 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
if (msg->msg_flags & MSG_FASTOPEN) {
if (sk->sk_state == SMC_INIT) {
- smc->use_fallback = true;
+ smc_switch_to_fallback(smc);
smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
} else {
rc = -EINVAL;
@@ -1703,7 +1723,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
case TCP_FASTOPEN_NO_COOKIE:
/* option not supported by SMC */
if (sk->sk_state == SMC_INIT) {
- smc->use_fallback = true;
+ smc_switch_to_fallback(smc);
smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
} else {
if (!smc->use_fallback)
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index 2ad37e998509..fc06720b53c1 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -21,6 +21,22 @@
#define SMC_CLOSE_WAIT_LISTEN_CLCSOCK_TIME (5 * HZ)
+/* release the clcsock that is assigned to the smc_sock */
+void smc_clcsock_release(struct smc_sock *smc)
+{
+ struct socket *tcp;
+
+ if (smc->listen_smc && current_work() != &smc->smc_listen_work)
+ cancel_work_sync(&smc->smc_listen_work);
+ mutex_lock(&smc->clcsock_release_lock);
+ if (smc->clcsock) {
+ tcp = smc->clcsock;
+ smc->clcsock = NULL;
+ sock_release(tcp);
+ }
+ mutex_unlock(&smc->clcsock_release_lock);
+}
+
static void smc_close_cleanup_listen(struct sock *parent)
{
struct sock *sk;
@@ -321,6 +337,7 @@ static void smc_close_passive_work(struct work_struct *work)
close_work);
struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
struct smc_cdc_conn_state_flags *rxflags;
+ bool release_clcsock = false;
struct sock *sk = &smc->sk;
int old_state;
@@ -400,13 +417,13 @@ wakeup:
if ((sk->sk_state == SMC_CLOSED) &&
(sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) {
smc_conn_free(conn);
- if (smc->clcsock) {
- sock_release(smc->clcsock);
- smc->clcsock = NULL;
- }
+ if (smc->clcsock)
+ release_clcsock = true;
}
}
release_sock(sk);
+ if (release_clcsock)
+ smc_clcsock_release(smc);
sock_put(sk); /* sock_hold done by schedulers of close_work */
}
diff --git a/net/smc/smc_close.h b/net/smc/smc_close.h
index 19eb6a211c23..e0e3b5df25d2 100644
--- a/net/smc/smc_close.h
+++ b/net/smc/smc_close.h
@@ -23,5 +23,6 @@ void smc_close_wake_tx_prepared(struct smc_sock *smc);
int smc_close_active(struct smc_sock *smc);
int smc_close_shutdown_write(struct smc_sock *smc);
void smc_close_init(struct smc_sock *smc);
+void smc_clcsock_release(struct smc_sock *smc);
#endif /* SMC_CLOSE_H */
diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c
index 2fff79db1a59..e89e918b88e0 100644
--- a/net/smc/smc_ism.c
+++ b/net/smc/smc_ism.c
@@ -289,6 +289,11 @@ struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
INIT_LIST_HEAD(&smcd->vlan);
smcd->event_wq = alloc_ordered_workqueue("ism_evt_wq-%s)",
WQ_MEM_RECLAIM, name);
+ if (!smcd->event_wq) {
+ kfree(smcd->conn);
+ kfree(smcd);
+ return NULL;
+ }
return smcd;
}
EXPORT_SYMBOL_GPL(smcd_alloc_dev);
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 8d2f6296279c..0285c7f9e79b 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -603,7 +603,8 @@ static int smc_pnet_flush(struct sk_buff *skb, struct genl_info *info)
{
struct net *net = genl_info_net(info);
- return smc_pnet_remove_by_pnetid(net, NULL);
+ smc_pnet_remove_by_pnetid(net, NULL);
+ return 0;
}
/* SMC_PNETID generic netlink operation definition */
diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
index 860dcfb95ee4..fa6c977b4c41 100644
--- a/net/strparser/strparser.c
+++ b/net/strparser/strparser.c
@@ -140,13 +140,11 @@ static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
/* We are going to append to the frags_list of head.
* Need to unshare the frag_list.
*/
- if (skb_has_frag_list(head)) {
- err = skb_unclone(head, GFP_ATOMIC);
- if (err) {
- STRP_STATS_INCR(strp->stats.mem_fail);
- desc->error = err;
- return 0;
- }
+ err = skb_unclone(head, GFP_ATOMIC);
+ if (err) {
+ STRP_STATS_INCR(strp->stats.mem_fail);
+ desc->error = err;
+ return 0;
}
if (unlikely(skb_shinfo(head)->frag_list)) {
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 12bb23b8e0c5..261131dfa1f1 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -54,6 +54,7 @@ static void cache_init(struct cache_head *h, struct cache_detail *detail)
h->last_refresh = now;
}
+static inline int cache_is_valid(struct cache_head *h);
static void cache_fresh_locked(struct cache_head *head, time_t expiry,
struct cache_detail *detail);
static void cache_fresh_unlocked(struct cache_head *head,
@@ -105,6 +106,8 @@ static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail,
if (cache_is_expired(detail, tmp)) {
hlist_del_init_rcu(&tmp->cache_list);
detail->entries --;
+ if (cache_is_valid(tmp) == -EAGAIN)
+ set_bit(CACHE_NEGATIVE, &tmp->flags);
cache_fresh_locked(tmp, 0, detail);
freeme = tmp;
break;
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 1d0395ef62c9..8ff11dc98d7f 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -2081,8 +2081,8 @@ call_transmit_status(struct rpc_task *task)
* test first.
*/
if (rpc_task_transmitted(task)) {
- if (task->tk_status == 0)
- xprt_request_wait_receive(task);
+ task->tk_status = 0;
+ xprt_request_wait_receive(task);
return;
}
@@ -2167,6 +2167,9 @@ call_bc_transmit_status(struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
+ if (rpc_task_transmitted(task))
+ task->tk_status = 0;
+
dprint_status(task);
switch (task->tk_status) {
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 341ecd796aa4..131aa2f0fd27 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -869,6 +869,8 @@ void tipc_link_reset(struct tipc_link *l)
__skb_queue_head_init(&list);
l->in_session = false;
+ /* Force re-synch of peer session number before establishing */
+ l->peer_session--;
l->session++;
l->mtu = l->advertised_mtu;
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index bff241f03525..89993afe0fbd 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -909,7 +909,8 @@ static int tipc_nl_service_list(struct net *net, struct tipc_nl_msg *msg,
for (; i < TIPC_NAMETBL_SIZE; i++) {
head = &tn->nametbl->services[i];
- if (*last_type) {
+ if (*last_type ||
+ (!i && *last_key && (*last_lower == *last_key))) {
service = tipc_service_find(net, *last_type);
if (!service)
return -EPIPE;
diff --git a/net/tipc/sysctl.c b/net/tipc/sysctl.c
index 3481e4906bd6..9df82a573aa7 100644
--- a/net/tipc/sysctl.c
+++ b/net/tipc/sysctl.c
@@ -38,6 +38,8 @@
#include <linux/sysctl.h>
+static int zero;
+static int one = 1;
static struct ctl_table_header *tipc_ctl_hdr;
static struct ctl_table tipc_table[] = {
@@ -46,14 +48,16 @@ static struct ctl_table tipc_table[] = {
.data = &sysctl_tipc_rmem,
.maxlen = sizeof(sysctl_tipc_rmem),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &one,
},
{
.procname = "named_timeout",
.data = &sysctl_tipc_named_timeout,
.maxlen = sizeof(sysctl_tipc_named_timeout),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
},
{
.procname = "sk_filter",
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 135a7ee9db03..14dedb24fa7b 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -52,8 +52,11 @@ static DEFINE_SPINLOCK(tls_device_lock);
static void tls_device_free_ctx(struct tls_context *ctx)
{
- if (ctx->tx_conf == TLS_HW)
+ if (ctx->tx_conf == TLS_HW) {
kfree(tls_offload_ctx_tx(ctx));
+ kfree(ctx->tx.rec_seq);
+ kfree(ctx->tx.iv);
+ }
if (ctx->rx_conf == TLS_HW)
kfree(tls_offload_ctx_rx(ctx));
@@ -216,6 +219,13 @@ void tls_device_sk_destruct(struct sock *sk)
}
EXPORT_SYMBOL(tls_device_sk_destruct);
+void tls_device_free_resources_tx(struct sock *sk)
+{
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
+
+ tls_free_partial_record(sk, tls_ctx);
+}
+
static void tls_append_frag(struct tls_record_info *record,
struct page_frag *pfrag,
int size)
@@ -587,7 +597,7 @@ void handle_device_resync(struct sock *sk, u32 seq, u64 rcd_sn)
static int tls_device_reencrypt(struct sock *sk, struct sk_buff *skb)
{
struct strp_msg *rxm = strp_msg(skb);
- int err = 0, offset = rxm->offset, copy, nsg;
+ int err = 0, offset = rxm->offset, copy, nsg, data_len, pos;
struct sk_buff *skb_iter, *unused;
struct scatterlist sg[1];
char *orig_buf, *buf;
@@ -618,25 +628,42 @@ static int tls_device_reencrypt(struct sock *sk, struct sk_buff *skb)
else
err = 0;
- copy = min_t(int, skb_pagelen(skb) - offset,
- rxm->full_len - TLS_CIPHER_AES_GCM_128_TAG_SIZE);
+ data_len = rxm->full_len - TLS_CIPHER_AES_GCM_128_TAG_SIZE;
- if (skb->decrypted)
- skb_store_bits(skb, offset, buf, copy);
+ if (skb_pagelen(skb) > offset) {
+ copy = min_t(int, skb_pagelen(skb) - offset, data_len);
- offset += copy;
- buf += copy;
+ if (skb->decrypted)
+ skb_store_bits(skb, offset, buf, copy);
+
+ offset += copy;
+ buf += copy;
+ }
+ pos = skb_pagelen(skb);
skb_walk_frags(skb, skb_iter) {
- copy = min_t(int, skb_iter->len,
- rxm->full_len - offset + rxm->offset -
- TLS_CIPHER_AES_GCM_128_TAG_SIZE);
+ int frag_pos;
+
+ /* Practically all frags must belong to msg if reencrypt
+ * is needed with current strparser and coalescing logic,
+ * but strparser may "get optimized", so let's be safe.
+ */
+ if (pos + skb_iter->len <= offset)
+ goto done_with_frag;
+ if (pos >= data_len + rxm->offset)
+ break;
+
+ frag_pos = offset - pos;
+ copy = min_t(int, skb_iter->len - frag_pos,
+ data_len + rxm->offset - offset);
if (skb_iter->decrypted)
- skb_store_bits(skb_iter, offset, buf, copy);
+ skb_store_bits(skb_iter, frag_pos, buf, copy);
offset += copy;
buf += copy;
+done_with_frag:
+ pos += skb_iter->len;
}
free_buf:
@@ -894,7 +921,9 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx)
goto release_netdev;
free_sw_resources:
+ up_read(&device_offload_lock);
tls_sw_free_resources_rx(sk);
+ down_read(&device_offload_lock);
release_ctx:
ctx->priv_ctx_rx = NULL;
release_netdev:
@@ -929,8 +958,6 @@ void tls_device_offload_cleanup_rx(struct sock *sk)
}
out:
up_read(&device_offload_lock);
- kfree(tls_ctx->rx.rec_seq);
- kfree(tls_ctx->rx.iv);
tls_sw_release_resources_rx(sk);
}
diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c
index 54c3a758f2a7..c3a5fe624b4e 100644
--- a/net/tls/tls_device_fallback.c
+++ b/net/tls/tls_device_fallback.c
@@ -194,18 +194,26 @@ static void update_chksum(struct sk_buff *skb, int headln)
static void complete_skb(struct sk_buff *nskb, struct sk_buff *skb, int headln)
{
+ struct sock *sk = skb->sk;
+ int delta;
+
skb_copy_header(nskb, skb);
skb_put(nskb, skb->len);
memcpy(nskb->data, skb->data, headln);
- update_chksum(nskb, headln);
nskb->destructor = skb->destructor;
- nskb->sk = skb->sk;
+ nskb->sk = sk;
skb->destructor = NULL;
skb->sk = NULL;
- refcount_add(nskb->truesize - skb->truesize,
- &nskb->sk->sk_wmem_alloc);
+
+ update_chksum(nskb, headln);
+
+ delta = nskb->truesize - skb->truesize;
+ if (likely(delta < 0))
+ WARN_ON_ONCE(refcount_sub_and_test(-delta, &sk->sk_wmem_alloc));
+ else if (delta)
+ refcount_add(delta, &sk->sk_wmem_alloc);
}
/* This function may be called after the user socket is already
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index df921a2904b9..478603f43964 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -208,6 +208,26 @@ int tls_push_partial_record(struct sock *sk, struct tls_context *ctx,
return tls_push_sg(sk, ctx, sg, offset, flags);
}
+bool tls_free_partial_record(struct sock *sk, struct tls_context *ctx)
+{
+ struct scatterlist *sg;
+
+ sg = ctx->partially_sent_record;
+ if (!sg)
+ return false;
+
+ while (1) {
+ put_page(sg_page(sg));
+ sk_mem_uncharge(sk, sg->length);
+
+ if (sg_is_last(sg))
+ break;
+ sg++;
+ }
+ ctx->partially_sent_record = NULL;
+ return true;
+}
+
static void tls_write_space(struct sock *sk)
{
struct tls_context *ctx = tls_get_ctx(sk);
@@ -267,13 +287,14 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
kfree(ctx->tx.rec_seq);
kfree(ctx->tx.iv);
tls_sw_free_resources_tx(sk);
+#ifdef CONFIG_TLS_DEVICE
+ } else if (ctx->tx_conf == TLS_HW) {
+ tls_device_free_resources_tx(sk);
+#endif
}
- if (ctx->rx_conf == TLS_SW) {
- kfree(ctx->rx.rec_seq);
- kfree(ctx->rx.iv);
+ if (ctx->rx_conf == TLS_SW)
tls_sw_free_resources_rx(sk);
- }
#ifdef CONFIG_TLS_DEVICE
if (ctx->rx_conf == TLS_HW)
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 20b191227969..29d6af43dd24 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -2052,20 +2052,7 @@ void tls_sw_free_resources_tx(struct sock *sk)
/* Free up un-sent records in tx_list. First, free
* the partially sent record if any at head of tx_list.
*/
- if (tls_ctx->partially_sent_record) {
- struct scatterlist *sg = tls_ctx->partially_sent_record;
-
- while (1) {
- put_page(sg_page(sg));
- sk_mem_uncharge(sk, sg->length);
-
- if (sg_is_last(sg))
- break;
- sg++;
- }
-
- tls_ctx->partially_sent_record = NULL;
-
+ if (tls_free_partial_record(sk, tls_ctx)) {
rec = list_first_entry(&ctx->tx_list,
struct tls_rec, list);
list_del(&rec->list);
@@ -2091,6 +2078,9 @@ void tls_sw_release_resources_rx(struct sock *sk)
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+ kfree(tls_ctx->rx.rec_seq);
+ kfree(tls_ctx->rx.iv);
+
if (ctx->aead_recv) {
kfree_skb(ctx->recv_pkt);
ctx->recv_pkt = NULL;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 25a9e3b5c154..47e30a58566c 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -13650,7 +13650,8 @@ static const struct genl_ops nl80211_ops[] = {
.policy = nl80211_policy,
.flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ NL80211_FLAG_NEED_RTNL |
+ NL80211_FLAG_CLEAR_SKB,
},
{
.cmd = NL80211_CMD_DEAUTHENTICATE,
@@ -13701,7 +13702,8 @@ static const struct genl_ops nl80211_ops[] = {
.policy = nl80211_policy,
.flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ NL80211_FLAG_NEED_RTNL |
+ NL80211_FLAG_CLEAR_SKB,
},
{
.cmd = NL80211_CMD_UPDATE_CONNECT_PARAMS,
@@ -13709,7 +13711,8 @@ static const struct genl_ops nl80211_ops[] = {
.policy = nl80211_policy,
.flags = GENL_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ NL80211_FLAG_NEED_RTNL |
+ NL80211_FLAG_CLEAR_SKB,
},
{
.cmd = NL80211_CMD_DISCONNECT,
@@ -13738,7 +13741,8 @@ static const struct genl_ops nl80211_ops[] = {
.policy = nl80211_policy,
.flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ NL80211_FLAG_NEED_RTNL |
+ NL80211_FLAG_CLEAR_SKB,
},
{
.cmd = NL80211_CMD_DEL_PMKSA,
@@ -14090,7 +14094,8 @@ static const struct genl_ops nl80211_ops[] = {
.policy = nl80211_policy,
.flags = GENL_UNS_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_WIPHY |
- NL80211_FLAG_NEED_RTNL,
+ NL80211_FLAG_NEED_RTNL |
+ NL80211_FLAG_CLEAR_SKB,
},
{
.cmd = NL80211_CMD_SET_QOS_MAP,
@@ -14145,7 +14150,8 @@ static const struct genl_ops nl80211_ops[] = {
.doit = nl80211_set_pmk,
.policy = nl80211_policy,
.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ NL80211_FLAG_NEED_RTNL |
+ NL80211_FLAG_CLEAR_SKB,
},
{
.cmd = NL80211_CMD_DEL_PMK,
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 2f1bf91eb226..a6fd5ce199da 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1309,6 +1309,16 @@ reg_intersect_dfs_region(const enum nl80211_dfs_regions dfs_region1,
return dfs_region1;
}
+static void reg_wmm_rules_intersect(const struct ieee80211_wmm_ac *wmm_ac1,
+ const struct ieee80211_wmm_ac *wmm_ac2,
+ struct ieee80211_wmm_ac *intersect)
+{
+ intersect->cw_min = max_t(u16, wmm_ac1->cw_min, wmm_ac2->cw_min);
+ intersect->cw_max = max_t(u16, wmm_ac1->cw_max, wmm_ac2->cw_max);
+ intersect->cot = min_t(u16, wmm_ac1->cot, wmm_ac2->cot);
+ intersect->aifsn = max_t(u8, wmm_ac1->aifsn, wmm_ac2->aifsn);
+}
+
/*
* Helper for regdom_intersect(), this does the real
* mathematical intersection fun
@@ -1323,6 +1333,8 @@ static int reg_rules_intersect(const struct ieee80211_regdomain *rd1,
struct ieee80211_freq_range *freq_range;
const struct ieee80211_power_rule *power_rule1, *power_rule2;
struct ieee80211_power_rule *power_rule;
+ const struct ieee80211_wmm_rule *wmm_rule1, *wmm_rule2;
+ struct ieee80211_wmm_rule *wmm_rule;
u32 freq_diff, max_bandwidth1, max_bandwidth2;
freq_range1 = &rule1->freq_range;
@@ -1333,6 +1345,10 @@ static int reg_rules_intersect(const struct ieee80211_regdomain *rd1,
power_rule2 = &rule2->power_rule;
power_rule = &intersected_rule->power_rule;
+ wmm_rule1 = &rule1->wmm_rule;
+ wmm_rule2 = &rule2->wmm_rule;
+ wmm_rule = &intersected_rule->wmm_rule;
+
freq_range->start_freq_khz = max(freq_range1->start_freq_khz,
freq_range2->start_freq_khz);
freq_range->end_freq_khz = min(freq_range1->end_freq_khz,
@@ -1376,6 +1392,29 @@ static int reg_rules_intersect(const struct ieee80211_regdomain *rd1,
intersected_rule->dfs_cac_ms = max(rule1->dfs_cac_ms,
rule2->dfs_cac_ms);
+ if (rule1->has_wmm && rule2->has_wmm) {
+ u8 ac;
+
+ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+ reg_wmm_rules_intersect(&wmm_rule1->client[ac],
+ &wmm_rule2->client[ac],
+ &wmm_rule->client[ac]);
+ reg_wmm_rules_intersect(&wmm_rule1->ap[ac],
+ &wmm_rule2->ap[ac],
+ &wmm_rule->ap[ac]);
+ }
+
+ intersected_rule->has_wmm = true;
+ } else if (rule1->has_wmm) {
+ *wmm_rule = *wmm_rule1;
+ intersected_rule->has_wmm = true;
+ } else if (rule2->has_wmm) {
+ *wmm_rule = *wmm_rule2;
+ intersected_rule->has_wmm = true;
+ } else {
+ intersected_rule->has_wmm = false;
+ }
+
if (!is_valid_reg_rule(intersected_rule))
return -EINVAL;
@@ -3739,10 +3778,9 @@ void wiphy_regulatory_register(struct wiphy *wiphy)
/*
* The last request may have been received before this
* registration call. Call the driver notifier if
- * initiator is USER and user type is CELL_BASE.
+ * initiator is USER.
*/
- if (lr->initiator == NL80211_REGDOM_SET_BY_USER &&
- lr->user_reg_hint_type == NL80211_USER_REG_HINT_CELL_BASE)
+ if (lr->initiator == NL80211_REGDOM_SET_BY_USER)
reg_call_notifier(wiphy, lr);
}
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 287518c6caa4..04d888628f29 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -190,10 +190,9 @@ static size_t cfg80211_gen_new_ie(const u8 *ie, size_t ielen,
/* copy subelement as we need to change its content to
* mark an ie after it is processed.
*/
- sub_copy = kmalloc(subie_len, gfp);
+ sub_copy = kmemdup(subelement, subie_len, gfp);
if (!sub_copy)
return 0;
- memcpy(sub_copy, subelement, subie_len);
pos = &new_ie[0];
diff --git a/net/wireless/util.c b/net/wireless/util.c
index e4b8db5e81ec..75899b62bdc9 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1220,9 +1220,11 @@ static u32 cfg80211_calculate_bitrate_he(struct rate_info *rate)
else if (rate->bw == RATE_INFO_BW_HE_RU &&
rate->he_ru_alloc == NL80211_RATE_INFO_HE_RU_ALLOC_26)
result = rates_26[rate->he_gi];
- else if (WARN(1, "invalid HE MCS: bw:%d, ru:%d\n",
- rate->bw, rate->he_ru_alloc))
+ else {
+ WARN(1, "invalid HE MCS: bw:%d, ru:%d\n",
+ rate->bw, rate->he_ru_alloc);
return 0;
+ }
/* now scale to the appropriate MCS */
tmp = result;
diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
index dbb3c1945b5c..85fec98676d3 100644
--- a/net/xfrm/xfrm_interface.c
+++ b/net/xfrm/xfrm_interface.c
@@ -70,17 +70,28 @@ static struct xfrm_if *xfrmi_lookup(struct net *net, struct xfrm_state *x)
return NULL;
}
-static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb)
+static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb,
+ unsigned short family)
{
struct xfrmi_net *xfrmn;
- int ifindex;
struct xfrm_if *xi;
+ int ifindex = 0;
if (!secpath_exists(skb) || !skb->dev)
return NULL;
+ switch (family) {
+ case AF_INET6:
+ ifindex = inet6_sdif(skb);
+ break;
+ case AF_INET:
+ ifindex = inet_sdif(skb);
+ break;
+ }
+ if (!ifindex)
+ ifindex = skb->dev->ifindex;
+
xfrmn = net_generic(xs_net(xfrm_input_state(skb)), xfrmi_net_id);
- ifindex = skb->dev->ifindex;
for_each_xfrmi_rcu(xfrmn->xfrmi[0], xi) {
if (ifindex == xi->dev->ifindex &&
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 8d1a898d0ba5..a6b58df7a70f 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -3313,7 +3313,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
ifcb = xfrm_if_get_cb();
if (ifcb) {
- xi = ifcb->decode_session(skb);
+ xi = ifcb->decode_session(skb, family);
if (xi) {
if_id = xi->p.if_id;
net = xi->net;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 1bb971f46fc6..178baaa037e5 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -2384,7 +2384,7 @@ void xfrm_state_fini(struct net *net)
flush_work(&net->xfrm.state_hash_work);
flush_work(&xfrm_state_gc_work);
- xfrm_state_flush(net, IPSEC_PROTO_ANY, false, true);
+ xfrm_state_flush(net, 0, false, true);
WARN_ON(!list_empty(&net->xfrm.state_all));
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index a131f9ff979e..6916931b1de1 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1424,7 +1424,7 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)
ret = verify_policy_dir(p->dir);
if (ret)
return ret;
- if (p->index && ((p->index & XFRM_POLICY_MAX) != p->dir))
+ if (p->index && (xfrm_policy_id2dir(p->index) != p->dir))
return -EINVAL;
return 0;
@@ -1513,20 +1513,8 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
return -EINVAL;
}
- switch (ut[i].id.proto) {
- case IPPROTO_AH:
- case IPPROTO_ESP:
- case IPPROTO_COMP:
-#if IS_ENABLED(CONFIG_IPV6)
- case IPPROTO_ROUTING:
- case IPPROTO_DSTOPTS:
-#endif
- case IPSEC_PROTO_ANY:
- break;
- default:
+ if (!xfrm_id_proto_valid(ut[i].id.proto))
return -EINVAL;
- }
-
}
return 0;
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 76ca30cc4791..0c5969fa795f 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -222,6 +222,9 @@ endif
ifdef CONFIG_RETPOLINE
objtool_args += --retpoline
endif
+ifdef CONFIG_X86_SMAP
+ objtool_args += --uaccess
+endif
# 'OBJECT_FILES_NON_STANDARD := y': skip objtool checking for a directory
# 'OBJECT_FILES_NON_STANDARD_foo.o := 'y': skip objtool checking for a file
diff --git a/scripts/Makefile.ubsan b/scripts/Makefile.ubsan
index 38b2b4818e8e..019771b845c5 100644
--- a/scripts/Makefile.ubsan
+++ b/scripts/Makefile.ubsan
@@ -3,7 +3,6 @@ ifdef CONFIG_UBSAN
CFLAGS_UBSAN += $(call cc-option, -fsanitize=shift)
CFLAGS_UBSAN += $(call cc-option, -fsanitize=integer-divide-by-zero)
CFLAGS_UBSAN += $(call cc-option, -fsanitize=unreachable)
- CFLAGS_UBSAN += $(call cc-option, -fsanitize=vla-bound)
CFLAGS_UBSAN += $(call cc-option, -fsanitize=signed-integer-overflow)
CFLAGS_UBSAN += $(call cc-option, -fsanitize=bounds)
CFLAGS_UBSAN += $(call cc-option, -fsanitize=object-size)
diff --git a/scripts/atomic/gen-atomics.sh b/scripts/atomic/gen-atomics.sh
index 27400b0cd732..000dc6437893 100644
--- a/scripts/atomic/gen-atomics.sh
+++ b/scripts/atomic/gen-atomics.sh
@@ -13,7 +13,7 @@ gen-atomic-long.sh asm-generic/atomic-long.h
gen-atomic-fallback.sh linux/atomic-fallback.h
EOF
while read script header; do
- ${ATOMICDIR}/${script} ${ATOMICTBL} > ${LINUXDIR}/include/${header}
+ /bin/sh ${ATOMICDIR}/${script} ${ATOMICTBL} > ${LINUXDIR}/include/${header}
HASH="$(sha1sum ${LINUXDIR}/include/${header})"
HASH="${HASH%% *}"
printf "// %s\n" "${HASH}" >> ${LINUXDIR}/include/${header}
diff --git a/scripts/selinux/genheaders/genheaders.c b/scripts/selinux/genheaders/genheaders.c
index 1ceedea847dd..544ca126a8a8 100644
--- a/scripts/selinux/genheaders/genheaders.c
+++ b/scripts/selinux/genheaders/genheaders.c
@@ -9,7 +9,6 @@
#include <string.h>
#include <errno.h>
#include <ctype.h>
-#include <sys/socket.h>
struct security_class_mapping {
const char *name;
diff --git a/scripts/selinux/mdp/mdp.c b/scripts/selinux/mdp/mdp.c
index 073fe7537f6c..6d51b74bc679 100644
--- a/scripts/selinux/mdp/mdp.c
+++ b/scripts/selinux/mdp/mdp.c
@@ -32,7 +32,6 @@
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
-#include <sys/socket.h>
static void usage(char *name)
{
diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index fefee040bf79..b9298d2e8165 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -123,17 +123,22 @@ static int aafs_show_path(struct seq_file *seq, struct dentry *dentry)
return 0;
}
-static void aafs_evict_inode(struct inode *inode)
+static void aafs_i_callback(struct rcu_head *head)
{
- truncate_inode_pages_final(&inode->i_data);
- clear_inode(inode);
+ struct inode *inode = container_of(head, struct inode, i_rcu);
if (S_ISLNK(inode->i_mode))
kfree(inode->i_link);
+ free_inode_nonrcu(inode);
+}
+
+static void aafs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, aafs_i_callback);
}
static const struct super_operations aafs_super_ops = {
.statfs = simple_statfs,
- .evict_inode = aafs_evict_inode,
+ .destroy_inode = aafs_destroy_inode,
.show_path = aafs_show_path,
};
diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index cd97929fac66..dc28914fa72e 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -560,7 +560,7 @@ static int propagate_exception(struct dev_cgroup *devcg_root,
devcg->behavior == DEVCG_DEFAULT_ALLOW) {
rc = dev_exception_add(devcg, ex);
if (rc)
- break;
+ return rc;
} else {
/*
* in the other possible cases:
diff --git a/security/inode.c b/security/inode.c
index b7772a9b315e..421dd72b5876 100644
--- a/security/inode.c
+++ b/security/inode.c
@@ -27,17 +27,22 @@
static struct vfsmount *mount;
static int mount_count;
-static void securityfs_evict_inode(struct inode *inode)
+static void securityfs_i_callback(struct rcu_head *head)
{
- truncate_inode_pages_final(&inode->i_data);
- clear_inode(inode);
+ struct inode *inode = container_of(head, struct inode, i_rcu);
if (S_ISLNK(inode->i_mode))
kfree(inode->i_link);
+ free_inode_nonrcu(inode);
+}
+
+static void securityfs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, securityfs_i_callback);
}
static const struct super_operations securityfs_super_operations = {
.statfs = simple_statfs,
- .evict_inode = securityfs_evict_inode,
+ .destroy_inode = securityfs_destroy_inode,
};
static int fill_super(struct super_block *sb, void *data, int silent)
diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h
index bd5fe0d3204a..201f7e588a29 100644
--- a/security/selinux/include/classmap.h
+++ b/security/selinux/include/classmap.h
@@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/capability.h>
+#include <linux/socket.h>
#define COMMON_FILE_SOCK_PERMS "ioctl", "read", "write", "create", \
"getattr", "setattr", "lock", "relabelfrom", "relabelto", "append", "map"
diff --git a/sound/core/info.c b/sound/core/info.c
index 96a074019c33..0eb169acc850 100644
--- a/sound/core/info.c
+++ b/sound/core/info.c
@@ -713,8 +713,11 @@ snd_info_create_entry(const char *name, struct snd_info_entry *parent,
INIT_LIST_HEAD(&entry->list);
entry->parent = parent;
entry->module = module;
- if (parent)
+ if (parent) {
+ mutex_lock(&parent->access);
list_add_tail(&entry->list, &parent->children);
+ mutex_unlock(&parent->access);
+ }
return entry;
}
@@ -792,7 +795,12 @@ void snd_info_free_entry(struct snd_info_entry * entry)
list_for_each_entry_safe(p, n, &entry->children, list)
snd_info_free_entry(p);
- list_del(&entry->list);
+ p = entry->parent;
+ if (p) {
+ mutex_lock(&p->access);
+ list_del(&entry->list);
+ mutex_unlock(&p->access);
+ }
kfree(entry->name);
if (entry->private_free)
entry->private_free(entry);
diff --git a/sound/core/init.c b/sound/core/init.c
index 0c4dc40376a7..079c12d64b0e 100644
--- a/sound/core/init.c
+++ b/sound/core/init.c
@@ -382,14 +382,7 @@ int snd_card_disconnect(struct snd_card *card)
card->shutdown = 1;
spin_unlock(&card->files_lock);
- /* phase 1: disable fops (user space) operations for ALSA API */
- mutex_lock(&snd_card_mutex);
- snd_cards[card->number] = NULL;
- clear_bit(card->number, snd_cards_lock);
- mutex_unlock(&snd_card_mutex);
-
- /* phase 2: replace file->f_op with special dummy operations */
-
+ /* replace file->f_op with special dummy operations */
spin_lock(&card->files_lock);
list_for_each_entry(mfile, &card->files_list, list) {
/* it's critical part, use endless loop */
@@ -405,7 +398,7 @@ int snd_card_disconnect(struct snd_card *card)
}
spin_unlock(&card->files_lock);
- /* phase 3: notify all connected devices about disconnection */
+ /* notify all connected devices about disconnection */
/* at this point, they cannot respond to any calls except release() */
#if IS_ENABLED(CONFIG_SND_MIXER_OSS)
@@ -421,6 +414,13 @@ int snd_card_disconnect(struct snd_card *card)
device_del(&card->card_dev);
card->registered = false;
}
+
+ /* disable fops (user space) operations for ALSA API */
+ mutex_lock(&snd_card_mutex);
+ snd_cards[card->number] = NULL;
+ clear_bit(card->number, snd_cards_lock);
+ mutex_unlock(&snd_card_mutex);
+
#ifdef CONFIG_PM
wake_up(&card->power_sleep);
#endif
diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index ec0b8595eb4d..701a69d856f5 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -969,6 +969,7 @@ int snd_hda_codec_device_new(struct hda_bus *bus, struct snd_card *card,
/* power-up all before initialization */
hda_set_power_state(codec, AC_PWRST_D0);
+ codec->core.dev.power.power_state = PMSG_ON;
snd_hda_codec_proc_new(codec);
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 810479766090..42cd3945e0de 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -5450,6 +5450,8 @@ static void alc274_fixup_bind_dacs(struct hda_codec *codec,
return;
spec->gen.preferred_dacs = preferred_pairs;
+ spec->gen.auto_mute_via_amp = 1;
+ codec->power_save_node = 0;
}
/* The DAC of NID 0x3 will introduce click/pop noise on headphones, so invalidate it */
@@ -7266,6 +7268,12 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
{0x12, 0x90a60140},
{0x14, 0x90170150},
{0x21, 0x02211020}),
+ SND_HDA_PIN_QUIRK(0x10ec0236, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
+ {0x21, 0x02211020}),
+ SND_HDA_PIN_QUIRK(0x10ec0236, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
+ {0x12, 0x40000000},
+ {0x14, 0x90170110},
+ {0x21, 0x02211020}),
SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL2_MIC_NO_PRESENCE,
{0x14, 0x90170110},
{0x21, 0x02211020}),
@@ -7376,6 +7384,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
{0x21, 0x0221101f}),
SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
ALC256_STANDARD_PINS),
+ SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
+ {0x14, 0x90170110},
+ {0x1b, 0x01011020},
+ {0x21, 0x0221101f}),
SND_HDA_PIN_QUIRK(0x10ec0256, 0x1043, "ASUS", ALC256_FIXUP_ASUS_MIC,
{0x14, 0x90170110},
{0x1b, 0x90a70130},
@@ -7535,6 +7547,13 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
{0x12, 0x90a60130},
{0x17, 0x90170110},
{0x21, 0x04211020}),
+ SND_HDA_PIN_QUIRK(0x10ec0295, 0x1043, "ASUS", ALC294_FIXUP_ASUS_SPK,
+ {0x12, 0x90a60130},
+ {0x17, 0x90170110},
+ {0x21, 0x03211020}),
+ SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
+ {0x14, 0x90170110},
+ {0x21, 0x04211020}),
SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
ALC295_STANDARD_PINS,
{0x17, 0x21014020},
diff --git a/sound/usb/line6/driver.c b/sound/usb/line6/driver.c
index 7afe8fae4939..b61f65bed4e4 100644
--- a/sound/usb/line6/driver.c
+++ b/sound/usb/line6/driver.c
@@ -351,12 +351,16 @@ int line6_read_data(struct usb_line6 *line6, unsigned address, void *data,
{
struct usb_device *usbdev = line6->usbdev;
int ret;
- unsigned char len;
+ unsigned char *len;
unsigned count;
if (address > 0xffff || datalen > 0xff)
return -EINVAL;
+ len = kmalloc(sizeof(*len), GFP_KERNEL);
+ if (!len)
+ return -ENOMEM;
+
/* query the serial number: */
ret = usb_control_msg(usbdev, usb_sndctrlpipe(usbdev, 0), 0x67,
USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT,
@@ -365,7 +369,7 @@ int line6_read_data(struct usb_line6 *line6, unsigned address, void *data,
if (ret < 0) {
dev_err(line6->ifcdev, "read request failed (error %d)\n", ret);
- return ret;
+ goto exit;
}
/* Wait for data length. We'll get 0xff until length arrives. */
@@ -375,28 +379,29 @@ int line6_read_data(struct usb_line6 *line6, unsigned address, void *data,
ret = usb_control_msg(usbdev, usb_rcvctrlpipe(usbdev, 0), 0x67,
USB_TYPE_VENDOR | USB_RECIP_DEVICE |
USB_DIR_IN,
- 0x0012, 0x0000, &len, 1,
+ 0x0012, 0x0000, len, 1,
LINE6_TIMEOUT * HZ);
if (ret < 0) {
dev_err(line6->ifcdev,
"receive length failed (error %d)\n", ret);
- return ret;
+ goto exit;
}
- if (len != 0xff)
+ if (*len != 0xff)
break;
}
- if (len == 0xff) {
+ ret = -EIO;
+ if (*len == 0xff) {
dev_err(line6->ifcdev, "read failed after %d retries\n",
count);
- return -EIO;
- } else if (len != datalen) {
+ goto exit;
+ } else if (*len != datalen) {
/* should be equal or something went wrong */
dev_err(line6->ifcdev,
"length mismatch (expected %d, got %d)\n",
- (int)datalen, (int)len);
- return -EIO;
+ (int)datalen, (int)*len);
+ goto exit;
}
/* receive the result: */
@@ -405,12 +410,12 @@ int line6_read_data(struct usb_line6 *line6, unsigned address, void *data,
0x0013, 0x0000, data, datalen,
LINE6_TIMEOUT * HZ);
- if (ret < 0) {
+ if (ret < 0)
dev_err(line6->ifcdev, "read failed (error %d)\n", ret);
- return ret;
- }
- return 0;
+exit:
+ kfree(len);
+ return ret;
}
EXPORT_SYMBOL_GPL(line6_read_data);
@@ -422,12 +427,16 @@ int line6_write_data(struct usb_line6 *line6, unsigned address, void *data,
{
struct usb_device *usbdev = line6->usbdev;
int ret;
- unsigned char status;
+ unsigned char *status;
int count;
if (address > 0xffff || datalen > 0xffff)
return -EINVAL;
+ status = kmalloc(sizeof(*status), GFP_KERNEL);
+ if (!status)
+ return -ENOMEM;
+
ret = usb_control_msg(usbdev, usb_sndctrlpipe(usbdev, 0), 0x67,
USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT,
0x0022, address, data, datalen,
@@ -436,7 +445,7 @@ int line6_write_data(struct usb_line6 *line6, unsigned address, void *data,
if (ret < 0) {
dev_err(line6->ifcdev,
"write request failed (error %d)\n", ret);
- return ret;
+ goto exit;
}
for (count = 0; count < LINE6_READ_WRITE_MAX_RETRIES; count++) {
@@ -447,28 +456,29 @@ int line6_write_data(struct usb_line6 *line6, unsigned address, void *data,
USB_TYPE_VENDOR | USB_RECIP_DEVICE |
USB_DIR_IN,
0x0012, 0x0000,
- &status, 1, LINE6_TIMEOUT * HZ);
+ status, 1, LINE6_TIMEOUT * HZ);
if (ret < 0) {
dev_err(line6->ifcdev,
"receiving status failed (error %d)\n", ret);
- return ret;
+ goto exit;
}
- if (status != 0xff)
+ if (*status != 0xff)
break;
}
- if (status == 0xff) {
+ if (*status == 0xff) {
dev_err(line6->ifcdev, "write failed after %d retries\n",
count);
- return -EIO;
- } else if (status != 0) {
+ ret = -EIO;
+ } else if (*status != 0) {
dev_err(line6->ifcdev, "write failed (error %d)\n", ret);
- return -EIO;
+ ret = -EIO;
}
-
- return 0;
+exit:
+ kfree(status);
+ return ret;
}
EXPORT_SYMBOL_GPL(line6_write_data);
diff --git a/sound/usb/line6/podhd.c b/sound/usb/line6/podhd.c
index 36ed9c85c0eb..5f3c87264e66 100644
--- a/sound/usb/line6/podhd.c
+++ b/sound/usb/line6/podhd.c
@@ -225,28 +225,32 @@ static void podhd_startup_start_workqueue(struct timer_list *t)
static int podhd_dev_start(struct usb_line6_podhd *pod)
{
int ret;
- u8 init_bytes[8];
+ u8 *init_bytes;
int i;
struct usb_device *usbdev = pod->line6.usbdev;
+ init_bytes = kmalloc(8, GFP_KERNEL);
+ if (!init_bytes)
+ return -ENOMEM;
+
ret = usb_control_msg(usbdev, usb_sndctrlpipe(usbdev, 0),
0x67, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT,
0x11, 0,
NULL, 0, LINE6_TIMEOUT * HZ);
if (ret < 0) {
dev_err(pod->line6.ifcdev, "read request failed (error %d)\n", ret);
- return ret;
+ goto exit;
}
/* NOTE: looks like some kind of ping message */
ret = usb_control_msg(usbdev, usb_rcvctrlpipe(usbdev, 0), 0x67,
USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN,
0x11, 0x0,
- &init_bytes, 3, LINE6_TIMEOUT * HZ);
+ init_bytes, 3, LINE6_TIMEOUT * HZ);
if (ret < 0) {
dev_err(pod->line6.ifcdev,
"receive length failed (error %d)\n", ret);
- return ret;
+ goto exit;
}
pod->firmware_version =
@@ -255,7 +259,7 @@ static int podhd_dev_start(struct usb_line6_podhd *pod)
for (i = 0; i <= 16; i++) {
ret = line6_read_data(&pod->line6, 0xf000 + 0x08 * i, init_bytes, 8);
if (ret < 0)
- return ret;
+ goto exit;
}
ret = usb_control_msg(usbdev, usb_sndctrlpipe(usbdev, 0),
@@ -263,10 +267,9 @@ static int podhd_dev_start(struct usb_line6_podhd *pod)
USB_TYPE_STANDARD | USB_RECIP_DEVICE | USB_DIR_OUT,
1, 0,
NULL, 0, LINE6_TIMEOUT * HZ);
- if (ret < 0)
- return ret;
-
- return 0;
+exit:
+ kfree(init_bytes);
+ return ret;
}
static void podhd_startup_workqueue(struct work_struct *work)
diff --git a/sound/usb/line6/toneport.c b/sound/usb/line6/toneport.c
index f47ba94e6f4a..19bee725de00 100644
--- a/sound/usb/line6/toneport.c
+++ b/sound/usb/line6/toneport.c
@@ -365,16 +365,21 @@ static bool toneport_has_source_select(struct usb_line6_toneport *toneport)
/*
Setup Toneport device.
*/
-static void toneport_setup(struct usb_line6_toneport *toneport)
+static int toneport_setup(struct usb_line6_toneport *toneport)
{
- u32 ticks;
+ u32 *ticks;
struct usb_line6 *line6 = &toneport->line6;
struct usb_device *usbdev = line6->usbdev;
+ ticks = kmalloc(sizeof(*ticks), GFP_KERNEL);
+ if (!ticks)
+ return -ENOMEM;
+
/* sync time on device with host: */
/* note: 32-bit timestamps overflow in year 2106 */
- ticks = (u32)ktime_get_real_seconds();
- line6_write_data(line6, 0x80c6, &ticks, 4);
+ *ticks = (u32)ktime_get_real_seconds();
+ line6_write_data(line6, 0x80c6, ticks, 4);
+ kfree(ticks);
/* enable device: */
toneport_send_cmd(usbdev, 0x0301, 0x0000);
@@ -389,6 +394,7 @@ static void toneport_setup(struct usb_line6_toneport *toneport)
toneport_update_led(toneport);
mod_timer(&toneport->timer, jiffies + TONEPORT_PCM_DELAY * HZ);
+ return 0;
}
/*
@@ -451,7 +457,9 @@ static int toneport_init(struct usb_line6 *line6,
return err;
}
- toneport_setup(toneport);
+ err = toneport_setup(toneport);
+ if (err)
+ return err;
/* register audio system: */
return snd_card_register(line6->card);
@@ -463,7 +471,11 @@ static int toneport_init(struct usb_line6 *line6,
*/
static int toneport_reset_resume(struct usb_interface *interface)
{
- toneport_setup(usb_get_intfdata(interface));
+ int err;
+
+ err = toneport_setup(usb_get_intfdata(interface));
+ if (err)
+ return err;
return line6_resume(interface);
}
#endif
diff --git a/tools/arch/arc/include/uapi/asm/unistd.h b/tools/arch/arc/include/uapi/asm/unistd.h
new file mode 100644
index 000000000000..5eafa1115162
--- /dev/null
+++ b/tools/arch/arc/include/uapi/asm/unistd.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/******** no-legacy-syscalls-ABI *******/
+
+/*
+ * Non-typical guard macro to enable inclusion twice in ARCH sys.c
+ * That is how the Generic syscall wrapper generator works
+ */
+#if !defined(_UAPI_ASM_ARC_UNISTD_H) || defined(__SYSCALL)
+#define _UAPI_ASM_ARC_UNISTD_H
+
+#define __ARCH_WANT_RENAMEAT
+#define __ARCH_WANT_STAT64
+#define __ARCH_WANT_SET_GET_RLIMIT
+#define __ARCH_WANT_SYS_EXECVE
+#define __ARCH_WANT_SYS_CLONE
+#define __ARCH_WANT_SYS_VFORK
+#define __ARCH_WANT_SYS_FORK
+#define __ARCH_WANT_TIME32_SYSCALLS
+
+#define sys_mmap2 sys_mmap_pgoff
+
+#include <asm-generic/unistd.h>
+
+#define NR_syscalls __NR_syscalls
+
+/* Generic syscall (fs/filesystems.c - lost in asm-generic/unistd.h */
+#define __NR_sysfs (__NR_arch_specific_syscall + 3)
+
+/* ARC specific syscall */
+#define __NR_cacheflush (__NR_arch_specific_syscall + 0)
+#define __NR_arc_settls (__NR_arch_specific_syscall + 1)
+#define __NR_arc_gettls (__NR_arch_specific_syscall + 2)
+#define __NR_arc_usr_cmpxchg (__NR_arch_specific_syscall + 4)
+
+__SYSCALL(__NR_cacheflush, sys_cacheflush)
+__SYSCALL(__NR_arc_settls, sys_arc_settls)
+__SYSCALL(__NR_arc_gettls, sys_arc_gettls)
+__SYSCALL(__NR_arc_usr_cmpxchg, sys_arc_usr_cmpxchg)
+__SYSCALL(__NR_sysfs, sys_sysfs)
+
+#undef __SYSCALL
+
+#endif
diff --git a/tools/arch/hexagon/include/uapi/asm/unistd.h b/tools/arch/hexagon/include/uapi/asm/unistd.h
new file mode 100644
index 000000000000..432c4db1b623
--- /dev/null
+++ b/tools/arch/hexagon/include/uapi/asm/unistd.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Syscall support for Hexagon
+ *
+ * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+/*
+ * The kernel pulls this unistd.h in three different ways:
+ * 1. the "normal" way which gets all the __NR defines
+ * 2. with __SYSCALL defined to produce function declarations
+ * 3. with __SYSCALL defined to produce syscall table initialization
+ * See also: syscalltab.c
+ */
+
+#define sys_mmap2 sys_mmap_pgoff
+#define __ARCH_WANT_RENAMEAT
+#define __ARCH_WANT_STAT64
+#define __ARCH_WANT_SET_GET_RLIMIT
+#define __ARCH_WANT_SYS_EXECVE
+#define __ARCH_WANT_SYS_CLONE
+#define __ARCH_WANT_SYS_VFORK
+#define __ARCH_WANT_SYS_FORK
+#define __ARCH_WANT_TIME32_SYSCALLS
+
+#include <asm-generic/unistd.h>
diff --git a/tools/arch/riscv/include/uapi/asm/unistd.h b/tools/arch/riscv/include/uapi/asm/unistd.h
new file mode 100644
index 000000000000..0e2eeeb1fd27
--- /dev/null
+++ b/tools/arch/riscv/include/uapi/asm/unistd.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2018 David Abdurachmanov <david.abdurachmanov@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifdef __LP64__
+#define __ARCH_WANT_NEW_STAT
+#define __ARCH_WANT_SET_GET_RLIMIT
+#endif /* __LP64__ */
+
+#include <asm-generic/unistd.h>
+
+/*
+ * Allows the instruction cache to be flushed from userspace. Despite RISC-V
+ * having a direct 'fence.i' instruction available to userspace (which we
+ * can't trap!), that's not actually viable when running on Linux because the
+ * kernel might schedule a process on another hart. There is no way for
+ * userspace to handle this without invoking the kernel (as it doesn't know the
+ * thread->hart mappings), so we've defined a RISC-V specific system call to
+ * flush the instruction cache.
+ *
+ * __NR_riscv_flush_icache is defined to flush the instruction cache over an
+ * address range, with the flush applying to either all threads or just the
+ * caller. We don't currently do anything with the address range, that's just
+ * in there for forwards compatibility.
+ */
+#ifndef __NR_riscv_flush_icache
+#define __NR_riscv_flush_icache (__NR_arch_specific_syscall + 15)
+#endif
+__SYSCALL(__NR_riscv_flush_icache, sys_riscv_flush_icache)
diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h
index f0b0c90dd398..d213ec5c3766 100644
--- a/tools/arch/x86/include/uapi/asm/vmx.h
+++ b/tools/arch/x86/include/uapi/asm/vmx.h
@@ -146,6 +146,7 @@
#define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1
#define VMX_ABORT_LOAD_HOST_PDPTE_FAIL 2
+#define VMX_ABORT_VMCS_CORRUPTED 3
#define VMX_ABORT_LOAD_HOST_MSR_FAIL 4
#endif /* _UAPIVMX_H */
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index e0c650d91784..994a7e0d16fb 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -1151,6 +1151,9 @@ static int do_create(int argc, char **argv)
return -1;
}
NEXT_ARG();
+ } else {
+ p_err("unknown arg %s", *argv);
+ return -1;
}
}
diff --git a/tools/include/uapi/sound/asound.h b/tools/include/uapi/sound/asound.h
index 404d4b9ffe76..df1153cea0b7 100644
--- a/tools/include/uapi/sound/asound.h
+++ b/tools/include/uapi/sound/asound.h
@@ -32,6 +32,7 @@
#ifndef __KERNEL__
#include <stdlib.h>
+#include <time.h>
#endif
/*
diff --git a/tools/lib/bpf/.gitignore b/tools/lib/bpf/.gitignore
index 4db74758c674..fecb78afea3f 100644
--- a/tools/lib/bpf/.gitignore
+++ b/tools/lib/bpf/.gitignore
@@ -1,3 +1,4 @@
libbpf_version.h
FEATURE-DUMP.libbpf
test_libbpf
+libbpf.so.*
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index 87494c7c619d..981c6ce2da2c 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -2233,7 +2233,7 @@ eval_type_str(unsigned long long val, const char *type, int pointer)
return val & 0xffffffff;
if (strcmp(type, "u64") == 0 ||
- strcmp(type, "s64"))
+ strcmp(type, "s64") == 0)
return val;
if (strcmp(type, "s8") == 0)
diff --git a/tools/lib/traceevent/parse-utils.c b/tools/lib/traceevent/parse-utils.c
index 77e4ec6402dd..e99867111387 100644
--- a/tools/lib/traceevent/parse-utils.c
+++ b/tools/lib/traceevent/parse-utils.c
@@ -14,7 +14,7 @@
void __vwarning(const char *fmt, va_list ap)
{
if (errno)
- perror("trace-cmd");
+ perror("libtraceevent");
errno = 0;
fprintf(stderr, " ");
diff --git a/tools/memory-model/Documentation/explanation.txt b/tools/memory-model/Documentation/explanation.txt
index 35bff92cc773..68caa9a976d0 100644
--- a/tools/memory-model/Documentation/explanation.txt
+++ b/tools/memory-model/Documentation/explanation.txt
@@ -27,7 +27,7 @@ Explanation of the Linux-Kernel Memory Consistency Model
19. AND THEN THERE WAS ALPHA
20. THE HAPPENS-BEFORE RELATION: hb
21. THE PROPAGATES-BEFORE RELATION: pb
- 22. RCU RELATIONS: rcu-link, gp, rscs, rcu-fence, and rb
+ 22. RCU RELATIONS: rcu-link, rcu-gp, rcu-rscsi, rcu-fence, and rb
23. LOCKING
24. ODDS AND ENDS
@@ -1430,8 +1430,8 @@ they execute means that it cannot have cycles. This requirement is
the content of the LKMM's "propagation" axiom.
-RCU RELATIONS: rcu-link, gp, rscs, rcu-fence, and rb
-----------------------------------------------------
+RCU RELATIONS: rcu-link, rcu-gp, rcu-rscsi, rcu-fence, and rb
+-------------------------------------------------------------
RCU (Read-Copy-Update) is a powerful synchronization mechanism. It
rests on two concepts: grace periods and read-side critical sections.
@@ -1446,17 +1446,19 @@ As far as memory models are concerned, RCU's main feature is its
Grace-Period Guarantee, which states that a critical section can never
span a full grace period. In more detail, the Guarantee says:
- If a critical section starts before a grace period then it
- must end before the grace period does. In addition, every
- store that propagates to the critical section's CPU before the
- end of the critical section must propagate to every CPU before
- the end of the grace period.
+ For any critical section C and any grace period G, at least
+ one of the following statements must hold:
- If a critical section ends after a grace period ends then it
- must start after the grace period does. In addition, every
- store that propagates to the grace period's CPU before the
- start of the grace period must propagate to every CPU before
- the start of the critical section.
+(1) C ends before G does, and in addition, every store that
+ propagates to C's CPU before the end of C must propagate to
+ every CPU before G ends.
+
+(2) G starts before C does, and in addition, every store that
+ propagates to G's CPU before the start of G must propagate
+ to every CPU before C starts.
+
+In particular, it is not possible for a critical section to both start
+before and end after a grace period.
Here is a simple example of RCU in action:
@@ -1483,10 +1485,11 @@ The Grace Period Guarantee tells us that when this code runs, it will
never end with r1 = 1 and r2 = 0. The reasoning is as follows. r1 = 1
means that P0's store to x propagated to P1 before P1 called
synchronize_rcu(), so P0's critical section must have started before
-P1's grace period. On the other hand, r2 = 0 means that P0's store to
-y, which occurs before the end of the critical section, did not
-propagate to P1 before the end of the grace period, violating the
-Guarantee.
+P1's grace period, contrary to part (2) of the Guarantee. On the
+other hand, r2 = 0 means that P0's store to y, which occurs before the
+end of the critical section, did not propagate to P1 before the end of
+the grace period, contrary to part (1). Together the results violate
+the Guarantee.
In the kernel's implementations of RCU, the requirements for stores
to propagate to every CPU are fulfilled by placing strong fences at
@@ -1504,11 +1507,11 @@ before" or "ends after" a grace period? Some aspects of the meaning
are pretty obvious, as in the example above, but the details aren't
entirely clear. The LKMM formalizes this notion by means of the
rcu-link relation. rcu-link encompasses a very general notion of
-"before": Among other things, X ->rcu-link Z includes cases where X
-happens-before or is equal to some event Y which is equal to or comes
-before Z in the coherence order. When Y = Z this says that X ->rfe Z
-implies X ->rcu-link Z. In addition, when Y = X it says that X ->fr Z
-and X ->co Z each imply X ->rcu-link Z.
+"before": If E and F are RCU fence events (i.e., rcu_read_lock(),
+rcu_read_unlock(), or synchronize_rcu()) then among other things,
+E ->rcu-link F includes cases where E is po-before some memory-access
+event X, F is po-after some memory-access event Y, and we have any of
+X ->rfe Y, X ->co Y, or X ->fr Y.
The formal definition of the rcu-link relation is more than a little
obscure, and we won't give it here. It is closely related to the pb
@@ -1516,171 +1519,173 @@ relation, and the details don't matter unless you want to comb through
a somewhat lengthy formal proof. Pretty much all you need to know
about rcu-link is the information in the preceding paragraph.
-The LKMM also defines the gp and rscs relations. They bring grace
-periods and read-side critical sections into the picture, in the
+The LKMM also defines the rcu-gp and rcu-rscsi relations. They bring
+grace periods and read-side critical sections into the picture, in the
following way:
- E ->gp F means there is a synchronize_rcu() fence event S such
- that E ->po S and either S ->po F or S = F. In simple terms,
- there is a grace period po-between E and F.
+ E ->rcu-gp F means that E and F are in fact the same event,
+ and that event is a synchronize_rcu() fence (i.e., a grace
+ period).
- E ->rscs F means there is a critical section delimited by an
- rcu_read_lock() fence L and an rcu_read_unlock() fence U, such
- that E ->po U and either L ->po F or L = F. You can think of
- this as saying that E and F are in the same critical section
- (in fact, it also allows E to be po-before the start of the
- critical section and F to be po-after the end).
+ E ->rcu-rscsi F means that E and F are the rcu_read_unlock()
+ and rcu_read_lock() fence events delimiting some read-side
+ critical section. (The 'i' at the end of the name emphasizes
+ that this relation is "inverted": It links the end of the
+ critical section to the start.)
If we think of the rcu-link relation as standing for an extended
-"before", then X ->gp Y ->rcu-link Z says that X executes before a
-grace period which ends before Z executes. (In fact it covers more
-than this, because it also includes cases where X executes before a
-grace period and some store propagates to Z's CPU before Z executes
-but doesn't propagate to some other CPU until after the grace period
-ends.) Similarly, X ->rscs Y ->rcu-link Z says that X is part of (or
-before the start of) a critical section which starts before Z
-executes.
-
-The LKMM goes on to define the rcu-fence relation as a sequence of gp
-and rscs links separated by rcu-link links, in which the number of gp
-links is >= the number of rscs links. For example:
+"before", then X ->rcu-gp Y ->rcu-link Z roughly says that X is a
+grace period which ends before Z begins. (In fact it covers more than
+this, because it also includes cases where some store propagates to
+Z's CPU before Z begins but doesn't propagate to some other CPU until
+after X ends.) Similarly, X ->rcu-rscsi Y ->rcu-link Z says that X is
+the end of a critical section which starts before Z begins.
+
+The LKMM goes on to define the rcu-fence relation as a sequence of
+rcu-gp and rcu-rscsi links separated by rcu-link links, in which the
+number of rcu-gp links is >= the number of rcu-rscsi links. For
+example:
- X ->gp Y ->rcu-link Z ->rscs T ->rcu-link U ->gp V
+ X ->rcu-gp Y ->rcu-link Z ->rcu-rscsi T ->rcu-link U ->rcu-gp V
would imply that X ->rcu-fence V, because this sequence contains two
-gp links and only one rscs link. (It also implies that X ->rcu-fence T
-and Z ->rcu-fence V.) On the other hand:
+rcu-gp links and one rcu-rscsi link. (It also implies that
+X ->rcu-fence T and Z ->rcu-fence V.) On the other hand:
- X ->rscs Y ->rcu-link Z ->rscs T ->rcu-link U ->gp V
+ X ->rcu-rscsi Y ->rcu-link Z ->rcu-rscsi T ->rcu-link U ->rcu-gp V
does not imply X ->rcu-fence V, because the sequence contains only
-one gp link but two rscs links.
+one rcu-gp link but two rcu-rscsi links.
The rcu-fence relation is important because the Grace Period Guarantee
means that rcu-fence acts kind of like a strong fence. In particular,
-if W is a write and we have W ->rcu-fence Z, the Guarantee says that W
-will propagate to every CPU before Z executes.
+E ->rcu-fence F implies not only that E begins before F ends, but also
+that any write po-before E will propagate to every CPU before any
+instruction po-after F can execute. (However, it does not imply that
+E must execute before F; in fact, each synchronize_rcu() fence event
+is linked to itself by rcu-fence as a degenerate case.)
To prove this in full generality requires some intellectual effort.
We'll consider just a very simple case:
- W ->gp X ->rcu-link Y ->rscs Z.
+ G ->rcu-gp W ->rcu-link Z ->rcu-rscsi F.
-This formula means that there is a grace period G and a critical
-section C such that:
+This formula means that G and W are the same event (a grace period),
+and there are events X, Y and a read-side critical section C such that:
- 1. W is po-before G;
+ 1. G = W is po-before or equal to X;
- 2. X is equal to or po-after G;
+ 2. X comes "before" Y in some sense (including rfe, co and fr);
- 3. X comes "before" Y in some sense;
+ 2. Y is po-before Z;
- 4. Y is po-before the end of C;
+ 4. Z is the rcu_read_unlock() event marking the end of C;
- 5. Z is equal to or po-after the start of C.
+ 5. F is the rcu_read_lock() event marking the start of C.
-From 2 - 4 we deduce that the grace period G ends before the critical
-section C. Then the second part of the Grace Period Guarantee says
-not only that G starts before C does, but also that W (which executes
-on G's CPU before G starts) must propagate to every CPU before C
-starts. In particular, W propagates to every CPU before Z executes
-(or finishes executing, in the case where Z is equal to the
-rcu_read_lock() fence event which starts C.) This sort of reasoning
-can be expanded to handle all the situations covered by rcu-fence.
+From 1 - 4 we deduce that the grace period G ends before the critical
+section C. Then part (2) of the Grace Period Guarantee says not only
+that G starts before C does, but also that any write which executes on
+G's CPU before G starts must propagate to every CPU before C starts.
+In particular, the write propagates to every CPU before F finishes
+executing and hence before any instruction po-after F can execute.
+This sort of reasoning can be extended to handle all the situations
+covered by rcu-fence.
Finally, the LKMM defines the RCU-before (rb) relation in terms of
rcu-fence. This is done in essentially the same way as the pb
relation was defined in terms of strong-fence. We will omit the
-details; the end result is that E ->rb F implies E must execute before
-F, just as E ->pb F does (and for much the same reasons).
+details; the end result is that E ->rb F implies E must execute
+before F, just as E ->pb F does (and for much the same reasons).
Putting this all together, the LKMM expresses the Grace Period
Guarantee by requiring that the rb relation does not contain a cycle.
-Equivalently, this "rcu" axiom requires that there are no events E and
-F with E ->rcu-link F ->rcu-fence E. Or to put it a third way, the
-axiom requires that there are no cycles consisting of gp and rscs
-alternating with rcu-link, where the number of gp links is >= the
-number of rscs links.
+Equivalently, this "rcu" axiom requires that there are no events E
+and F with E ->rcu-link F ->rcu-fence E. Or to put it a third way,
+the axiom requires that there are no cycles consisting of rcu-gp and
+rcu-rscsi alternating with rcu-link, where the number of rcu-gp links
+is >= the number of rcu-rscsi links.
Justifying the axiom isn't easy, but it is in fact a valid
formalization of the Grace Period Guarantee. We won't attempt to go
through the detailed argument, but the following analysis gives a
-taste of what is involved. Suppose we have a violation of the first
-part of the Guarantee: A critical section starts before a grace
-period, and some store propagates to the critical section's CPU before
-the end of the critical section but doesn't propagate to some other
-CPU until after the end of the grace period.
+taste of what is involved. Suppose both parts of the Guarantee are
+violated: A critical section starts before a grace period, and some
+store propagates to the critical section's CPU before the end of the
+critical section but doesn't propagate to some other CPU until after
+the end of the grace period.
Putting symbols to these ideas, let L and U be the rcu_read_lock() and
rcu_read_unlock() fence events delimiting the critical section in
question, and let S be the synchronize_rcu() fence event for the grace
period. Saying that the critical section starts before S means there
-are events E and F where E is po-after L (which marks the start of the
-critical section), E is "before" F in the sense of the rcu-link
-relation, and F is po-before the grace period S:
+are events Q and R where Q is po-after L (which marks the start of the
+critical section), Q is "before" R in the sense used by the rcu-link
+relation, and R is po-before the grace period S. Thus we have:
- L ->po E ->rcu-link F ->po S.
+ L ->rcu-link S.
-Let W be the store mentioned above, let Z come before the end of the
+Let W be the store mentioned above, let Y come before the end of the
critical section and witness that W propagates to the critical
-section's CPU by reading from W, and let Y on some arbitrary CPU be a
-witness that W has not propagated to that CPU, where Y happens after
+section's CPU by reading from W, and let Z on some arbitrary CPU be a
+witness that W has not propagated to that CPU, where Z happens after
some event X which is po-after S. Symbolically, this amounts to:
- S ->po X ->hb* Y ->fr W ->rf Z ->po U.
+ S ->po X ->hb* Z ->fr W ->rf Y ->po U.
-The fr link from Y to W indicates that W has not propagated to Y's CPU
-at the time that Y executes. From this, it can be shown (see the
-discussion of the rcu-link relation earlier) that X and Z are related
-by rcu-link, yielding:
+The fr link from Z to W indicates that W has not propagated to Z's CPU
+at the time that Z executes. From this, it can be shown (see the
+discussion of the rcu-link relation earlier) that S and U are related
+by rcu-link:
- S ->po X ->rcu-link Z ->po U.
+ S ->rcu-link U.
-The formulas say that S is po-between F and X, hence F ->gp X. They
-also say that Z comes before the end of the critical section and E
-comes after its start, hence Z ->rscs E. From all this we obtain:
+Since S is a grace period we have S ->rcu-gp S, and since L and U are
+the start and end of the critical section C we have U ->rcu-rscsi L.
+From this we obtain:
- F ->gp X ->rcu-link Z ->rscs E ->rcu-link F,
+ S ->rcu-gp S ->rcu-link U ->rcu-rscsi L ->rcu-link S,
a forbidden cycle. Thus the "rcu" axiom rules out this violation of
the Grace Period Guarantee.
For something a little more down-to-earth, let's see how the axiom
works out in practice. Consider the RCU code example from above, this
-time with statement labels added to the memory access instructions:
+time with statement labels added:
int x, y;
P0()
{
- rcu_read_lock();
- W: WRITE_ONCE(x, 1);
- X: WRITE_ONCE(y, 1);
- rcu_read_unlock();
+ L: rcu_read_lock();
+ X: WRITE_ONCE(x, 1);
+ Y: WRITE_ONCE(y, 1);
+ U: rcu_read_unlock();
}
P1()
{
int r1, r2;
- Y: r1 = READ_ONCE(x);
- synchronize_rcu();
- Z: r2 = READ_ONCE(y);
+ Z: r1 = READ_ONCE(x);
+ S: synchronize_rcu();
+ W: r2 = READ_ONCE(y);
}
-If r2 = 0 at the end then P0's store at X overwrites the value that
-P1's load at Z reads from, so we have Z ->fre X and thus Z ->rcu-link X.
-In addition, there is a synchronize_rcu() between Y and Z, so therefore
-we have Y ->gp Z.
+If r2 = 0 at the end then P0's store at Y overwrites the value that
+P1's load at W reads from, so we have W ->fre Y. Since S ->po W and
+also Y ->po U, we get S ->rcu-link U. In addition, S ->rcu-gp S
+because S is a grace period.
-If r1 = 1 at the end then P1's load at Y reads from P0's store at W,
-so we have W ->rcu-link Y. In addition, W and X are in the same critical
-section, so therefore we have X ->rscs W.
+If r1 = 1 at the end then P1's load at Z reads from P0's store at X,
+so we have X ->rfe Z. Together with L ->po X and Z ->po S, this
+yields L ->rcu-link S. And since L and U are the start and end of a
+critical section, we have U ->rcu-rscsi L.
-Then X ->rscs W ->rcu-link Y ->gp Z ->rcu-link X is a forbidden cycle,
-violating the "rcu" axiom. Hence the outcome is not allowed by the
-LKMM, as we would expect.
+Then U ->rcu-rscsi L ->rcu-link S ->rcu-gp S ->rcu-link U is a
+forbidden cycle, violating the "rcu" axiom. Hence the outcome is not
+allowed by the LKMM, as we would expect.
For contrast, let's see what can happen in a more complicated example:
@@ -1690,51 +1695,52 @@ For contrast, let's see what can happen in a more complicated example:
{
int r0;
- rcu_read_lock();
- W: r0 = READ_ONCE(x);
- X: WRITE_ONCE(y, 1);
- rcu_read_unlock();
+ L0: rcu_read_lock();
+ r0 = READ_ONCE(x);
+ WRITE_ONCE(y, 1);
+ U0: rcu_read_unlock();
}
P1()
{
int r1;
- Y: r1 = READ_ONCE(y);
- synchronize_rcu();
- Z: WRITE_ONCE(z, 1);
+ r1 = READ_ONCE(y);
+ S1: synchronize_rcu();
+ WRITE_ONCE(z, 1);
}
P2()
{
int r2;
- rcu_read_lock();
- U: r2 = READ_ONCE(z);
- V: WRITE_ONCE(x, 1);
- rcu_read_unlock();
+ L2: rcu_read_lock();
+ r2 = READ_ONCE(z);
+ WRITE_ONCE(x, 1);
+ U2: rcu_read_unlock();
}
If r0 = r1 = r2 = 1 at the end, then similar reasoning to before shows
-that W ->rscs X ->rcu-link Y ->gp Z ->rcu-link U ->rscs V ->rcu-link W.
-However this cycle is not forbidden, because the sequence of relations
-contains fewer instances of gp (one) than of rscs (two). Consequently
-the outcome is allowed by the LKMM. The following instruction timing
-diagram shows how it might actually occur:
+that U0 ->rcu-rscsi L0 ->rcu-link S1 ->rcu-gp S1 ->rcu-link U2 ->rcu-rscsi
+L2 ->rcu-link U0. However this cycle is not forbidden, because the
+sequence of relations contains fewer instances of rcu-gp (one) than of
+rcu-rscsi (two). Consequently the outcome is allowed by the LKMM.
+The following instruction timing diagram shows how it might actually
+occur:
P0 P1 P2
-------------------- -------------------- --------------------
rcu_read_lock()
-X: WRITE_ONCE(y, 1)
- Y: r1 = READ_ONCE(y)
+WRITE_ONCE(y, 1)
+ r1 = READ_ONCE(y)
synchronize_rcu() starts
. rcu_read_lock()
- . V: WRITE_ONCE(x, 1)
-W: r0 = READ_ONCE(x) .
+ . WRITE_ONCE(x, 1)
+r0 = READ_ONCE(x) .
rcu_read_unlock() .
synchronize_rcu() ends
- Z: WRITE_ONCE(z, 1)
- U: r2 = READ_ONCE(z)
+ WRITE_ONCE(z, 1)
+ r2 = READ_ONCE(z)
rcu_read_unlock()
This requires P0 and P2 to execute their loads and stores out of
@@ -1744,6 +1750,15 @@ section in P0 both starts before P1's grace period does and ends
before it does, and the critical section in P2 both starts after P1's
grace period does and ends after it does.
+Addendum: The LKMM now supports SRCU (Sleepable Read-Copy-Update) in
+addition to normal RCU. The ideas involved are much the same as
+above, with new relations srcu-gp and srcu-rscsi added to represent
+SRCU grace periods and read-side critical sections. There is a
+restriction on the srcu-gp and srcu-rscsi links that can appear in an
+rcu-fence sequence (the srcu-rscsi links must be paired with srcu-gp
+links having the same SRCU domain with proper nesting); the details
+are relatively unimportant.
+
LOCKING
-------
diff --git a/tools/memory-model/README b/tools/memory-model/README
index 0f2c366518c6..2b87f3971548 100644
--- a/tools/memory-model/README
+++ b/tools/memory-model/README
@@ -20,13 +20,17 @@ that litmus test to be exercised within the Linux kernel.
REQUIREMENTS
============
-Version 7.49 of the "herd7" and "klitmus7" tools must be downloaded
-separately:
+Version 7.52 or higher of the "herd7" and "klitmus7" tools must be
+downloaded separately:
https://github.com/herd/herdtools7
See "herdtools7/INSTALL.md" for installation instructions.
+Note that although these tools usually provide backwards compatibility,
+this is not absolutely guaranteed. Therefore, if a later version does
+not work, please try using the exact version called out above.
+
==================
BASIC USAGE: HERD7
@@ -221,8 +225,29 @@ The Linux-kernel memory model has the following limitations:
additional call_rcu() process to the site of the
emulated rcu-barrier().
- e. Sleepable RCU (SRCU) is not modeled. It can be
- emulated, but perhaps not simply.
+ e. Although sleepable RCU (SRCU) is now modeled, there
+ are some subtle differences between its semantics and
+ those in the Linux kernel. For example, the kernel
+ might interpret the following sequence as two partially
+ overlapping SRCU read-side critical sections:
+
+ 1 r1 = srcu_read_lock(&my_srcu);
+ 2 do_something_1();
+ 3 r2 = srcu_read_lock(&my_srcu);
+ 4 do_something_2();
+ 5 srcu_read_unlock(&my_srcu, r1);
+ 6 do_something_3();
+ 7 srcu_read_unlock(&my_srcu, r2);
+
+ In contrast, LKMM will interpret this as a nested pair of
+ SRCU read-side critical sections, with the outer critical
+ section spanning lines 1-7 and the inner critical section
+ spanning lines 3-5.
+
+ This difference would be more of a concern had anyone
+ identified a reasonable use case for partially overlapping
+ SRCU read-side critical sections. For more information,
+ please see: https://paulmck.livejournal.com/40593.html
f. Reader-writer locking is not modeled. It can be
emulated in litmus tests using atomic read-modify-write
diff --git a/tools/memory-model/linux-kernel.bell b/tools/memory-model/linux-kernel.bell
index 796513362c05..def9131d3d8e 100644
--- a/tools/memory-model/linux-kernel.bell
+++ b/tools/memory-model/linux-kernel.bell
@@ -33,8 +33,14 @@ enum Barriers = 'wmb (*smp_wmb*) ||
'after-unlock-lock (*smp_mb__after_unlock_lock*)
instructions F[Barriers]
+(* SRCU *)
+enum SRCU = 'srcu-lock || 'srcu-unlock || 'sync-srcu
+instructions SRCU[SRCU]
+(* All srcu events *)
+let Srcu = Srcu-lock | Srcu-unlock | Sync-srcu
+
(* Compute matching pairs of nested Rcu-lock and Rcu-unlock *)
-let matched = let rec
+let rcu-rscs = let rec
unmatched-locks = Rcu-lock \ domain(matched)
and unmatched-unlocks = Rcu-unlock \ range(matched)
and unmatched = unmatched-locks | unmatched-unlocks
@@ -46,8 +52,27 @@ let matched = let rec
in matched
(* Validate nesting *)
-flag ~empty Rcu-lock \ domain(matched) as unbalanced-rcu-locking
-flag ~empty Rcu-unlock \ range(matched) as unbalanced-rcu-locking
+flag ~empty Rcu-lock \ domain(rcu-rscs) as unbalanced-rcu-locking
+flag ~empty Rcu-unlock \ range(rcu-rscs) as unbalanced-rcu-locking
+
+(* Compute matching pairs of nested Srcu-lock and Srcu-unlock *)
+let srcu-rscs = let rec
+ unmatched-locks = Srcu-lock \ domain(matched)
+ and unmatched-unlocks = Srcu-unlock \ range(matched)
+ and unmatched = unmatched-locks | unmatched-unlocks
+ and unmatched-po = ([unmatched] ; po ; [unmatched]) & loc
+ and unmatched-locks-to-unlocks =
+ ([unmatched-locks] ; po ; [unmatched-unlocks]) & loc
+ and matched = matched | (unmatched-locks-to-unlocks \
+ (unmatched-po ; unmatched-po))
+ in matched
+
+(* Validate nesting *)
+flag ~empty Srcu-lock \ domain(srcu-rscs) as unbalanced-srcu-locking
+flag ~empty Srcu-unlock \ range(srcu-rscs) as unbalanced-srcu-locking
+
+(* Check for use of synchronize_srcu() inside an RCU critical section *)
+flag ~empty rcu-rscs & (po ; [Sync-srcu] ; po) as invalid-sleep
-(* Outermost level of nesting only *)
-let crit = matched \ (po^-1 ; matched ; po^-1)
+(* Validate SRCU dynamic match *)
+flag ~empty different-values(srcu-rscs) as srcu-bad-nesting
diff --git a/tools/memory-model/linux-kernel.cat b/tools/memory-model/linux-kernel.cat
index 8f23c74a96fd..8dcb37835b61 100644
--- a/tools/memory-model/linux-kernel.cat
+++ b/tools/memory-model/linux-kernel.cat
@@ -33,7 +33,7 @@ let mb = ([M] ; fencerel(Mb) ; [M]) |
([M] ; po? ; [LKW] ; fencerel(After-spinlock) ; [M]) |
([M] ; po ; [UL] ; (co | po) ; [LKW] ;
fencerel(After-unlock-lock) ; [M])
-let gp = po ; [Sync-rcu] ; po?
+let gp = po ; [Sync-rcu | Sync-srcu] ; po?
let strong-fence = mb | gp
@@ -91,32 +91,47 @@ acyclic pb as propagation
(*******)
(*
- * Effect of read-side critical section proceeds from the rcu_read_lock()
- * onward on the one hand and from the rcu_read_unlock() backwards on the
- * other hand.
+ * Effects of read-side critical sections proceed from the rcu_read_unlock()
+ * or srcu_read_unlock() backwards on the one hand, and from the
+ * rcu_read_lock() or srcu_read_lock() forwards on the other hand.
+ *
+ * In the definition of rcu-fence below, the po term at the left-hand side
+ * of each disjunct and the po? term at the right-hand end have been factored
+ * out. They have been moved into the definitions of rcu-link and rb.
+ * This was necessary in order to apply the "& loc" tests correctly.
*)
-let rscs = po ; crit^-1 ; po?
+let rcu-gp = [Sync-rcu] (* Compare with gp *)
+let srcu-gp = [Sync-srcu]
+let rcu-rscsi = rcu-rscs^-1
+let srcu-rscsi = srcu-rscs^-1
(*
* The synchronize_rcu() strong fence is special in that it can order not
* one but two non-rf relations, but only in conjunction with an RCU
* read-side critical section.
*)
-let rcu-link = hb* ; pb* ; prop
+let rcu-link = po? ; hb* ; pb* ; prop ; po
(*
* Any sequence containing at least as many grace periods as RCU read-side
* critical sections (joined by rcu-link) acts as a generalized strong fence.
+ * Likewise for SRCU grace periods and read-side critical sections, provided
+ * the synchronize_srcu() and srcu_read_[un]lock() calls refer to the same
+ * struct srcu_struct location.
*)
-let rec rcu-fence = gp |
- (gp ; rcu-link ; rscs) |
- (rscs ; rcu-link ; gp) |
- (gp ; rcu-link ; rcu-fence ; rcu-link ; rscs) |
- (rscs ; rcu-link ; rcu-fence ; rcu-link ; gp) |
+let rec rcu-fence = rcu-gp | srcu-gp |
+ (rcu-gp ; rcu-link ; rcu-rscsi) |
+ ((srcu-gp ; rcu-link ; srcu-rscsi) & loc) |
+ (rcu-rscsi ; rcu-link ; rcu-gp) |
+ ((srcu-rscsi ; rcu-link ; srcu-gp) & loc) |
+ (rcu-gp ; rcu-link ; rcu-fence ; rcu-link ; rcu-rscsi) |
+ ((srcu-gp ; rcu-link ; rcu-fence ; rcu-link ; srcu-rscsi) & loc) |
+ (rcu-rscsi ; rcu-link ; rcu-fence ; rcu-link ; rcu-gp) |
+ ((srcu-rscsi ; rcu-link ; rcu-fence ; rcu-link ; srcu-gp) & loc) |
(rcu-fence ; rcu-link ; rcu-fence)
(* rb orders instructions just as pb does *)
-let rb = prop ; rcu-fence ; hb* ; pb*
+let rb = prop ; po ; rcu-fence ; po? ; hb* ; pb*
irreflexive rb as rcu
diff --git a/tools/memory-model/linux-kernel.def b/tools/memory-model/linux-kernel.def
index b27911cc087d..551eeaa389d4 100644
--- a/tools/memory-model/linux-kernel.def
+++ b/tools/memory-model/linux-kernel.def
@@ -47,6 +47,12 @@ rcu_read_unlock() { __fence{rcu-unlock}; }
synchronize_rcu() { __fence{sync-rcu}; }
synchronize_rcu_expedited() { __fence{sync-rcu}; }
+// SRCU
+srcu_read_lock(X) __srcu{srcu-lock}(X)
+srcu_read_unlock(X,Y) { __srcu{srcu-unlock}(X,Y); }
+synchronize_srcu(X) { __srcu{sync-srcu}(X); }
+synchronize_srcu_expedited(X) { __srcu{sync-srcu}(X); }
+
// Atomic
atomic_read(X) READ_ONCE(*X)
atomic_set(X,V) { WRITE_ONCE(*X,V); }
diff --git a/tools/memory-model/lock.cat b/tools/memory-model/lock.cat
index 305ded17e741..a059d1a6d8a2 100644
--- a/tools/memory-model/lock.cat
+++ b/tools/memory-model/lock.cat
@@ -6,9 +6,6 @@
(*
* Generate coherence orders and handle lock operations
- *
- * Warning: spin_is_locked() crashes herd7 versions strictly before 7.48.
- * spin_is_locked() is functional from herd7 version 7.49.
*)
include "cross.cat"
diff --git a/tools/objtool/arch.h b/tools/objtool/arch.h
index b0d7dc3d71b5..7a111a77b7aa 100644
--- a/tools/objtool/arch.h
+++ b/tools/objtool/arch.h
@@ -33,7 +33,11 @@
#define INSN_STACK 8
#define INSN_BUG 9
#define INSN_NOP 10
-#define INSN_OTHER 11
+#define INSN_STAC 11
+#define INSN_CLAC 12
+#define INSN_STD 13
+#define INSN_CLD 14
+#define INSN_OTHER 15
#define INSN_LAST INSN_OTHER
enum op_dest_type {
@@ -41,6 +45,7 @@ enum op_dest_type {
OP_DEST_REG_INDIRECT,
OP_DEST_MEM,
OP_DEST_PUSH,
+ OP_DEST_PUSHF,
OP_DEST_LEAVE,
};
@@ -55,6 +60,7 @@ enum op_src_type {
OP_SRC_REG_INDIRECT,
OP_SRC_CONST,
OP_SRC_POP,
+ OP_SRC_POPF,
OP_SRC_ADD,
OP_SRC_AND,
};
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 540a209b78ab..472e991f6512 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -357,19 +357,26 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
/* pushf */
*type = INSN_STACK;
op->src.type = OP_SRC_CONST;
- op->dest.type = OP_DEST_PUSH;
+ op->dest.type = OP_DEST_PUSHF;
break;
case 0x9d:
/* popf */
*type = INSN_STACK;
- op->src.type = OP_SRC_POP;
+ op->src.type = OP_SRC_POPF;
op->dest.type = OP_DEST_MEM;
break;
case 0x0f:
- if (op2 >= 0x80 && op2 <= 0x8f) {
+ if (op2 == 0x01) {
+
+ if (modrm == 0xca)
+ *type = INSN_CLAC;
+ else if (modrm == 0xcb)
+ *type = INSN_STAC;
+
+ } else if (op2 >= 0x80 && op2 <= 0x8f) {
*type = INSN_JUMP_CONDITIONAL;
@@ -444,6 +451,14 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
*type = INSN_CALL;
break;
+ case 0xfc:
+ *type = INSN_CLD;
+ break;
+
+ case 0xfd:
+ *type = INSN_STD;
+ break;
+
case 0xff:
if (modrm_reg == 2 || modrm_reg == 3)
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index 694abc628e9b..f3b378126011 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -29,7 +29,7 @@
#include "builtin.h"
#include "check.h"
-bool no_fp, no_unreachable, retpoline, module;
+bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess;
static const char * const check_usage[] = {
"objtool check [<options>] file.o",
@@ -41,6 +41,8 @@ const struct option check_options[] = {
OPT_BOOLEAN('u', "no-unreachable", &no_unreachable, "Skip 'unreachable instruction' warnings"),
OPT_BOOLEAN('r', "retpoline", &retpoline, "Validate retpoline assumptions"),
OPT_BOOLEAN('m', "module", &module, "Indicates the object will be part of a kernel module"),
+ OPT_BOOLEAN('b', "backtrace", &backtrace, "unwind on error"),
+ OPT_BOOLEAN('a', "uaccess", &uaccess, "enable uaccess checking"),
OPT_END(),
};
diff --git a/tools/objtool/builtin.h b/tools/objtool/builtin.h
index 28ff40e19a14..69762f9c5602 100644
--- a/tools/objtool/builtin.h
+++ b/tools/objtool/builtin.h
@@ -20,7 +20,7 @@
#include <subcmd/parse-options.h>
extern const struct option check_options[];
-extern bool no_fp, no_unreachable, retpoline, module;
+extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess;
extern int cmd_check(int argc, const char **argv);
extern int cmd_orc(int argc, const char **argv);
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 479196aeb409..ac743a1d53ab 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -31,6 +31,7 @@
struct alternative {
struct list_head list;
struct instruction *insn;
+ bool skip_orig;
};
const char *objname;
@@ -105,29 +106,6 @@ static struct instruction *next_insn_same_func(struct objtool_file *file,
insn = next_insn_same_sec(file, insn))
/*
- * Check if the function has been manually whitelisted with the
- * STACK_FRAME_NON_STANDARD macro, or if it should be automatically whitelisted
- * due to its use of a context switching instruction.
- */
-static bool ignore_func(struct objtool_file *file, struct symbol *func)
-{
- struct rela *rela;
-
- /* check for STACK_FRAME_NON_STANDARD */
- if (file->whitelist && file->whitelist->rela)
- list_for_each_entry(rela, &file->whitelist->rela->rela_list, list) {
- if (rela->sym->type == STT_SECTION &&
- rela->sym->sec == func->sec &&
- rela->addend == func->offset)
- return true;
- if (rela->sym->type == STT_FUNC && rela->sym == func)
- return true;
- }
-
- return false;
-}
-
-/*
* This checks to see if the given function is a "noreturn" function.
*
* For global functions which are outside the scope of this object file, we
@@ -437,18 +415,107 @@ static void add_ignores(struct objtool_file *file)
struct instruction *insn;
struct section *sec;
struct symbol *func;
+ struct rela *rela;
- for_each_sec(file, sec) {
- list_for_each_entry(func, &sec->symbol_list, list) {
- if (func->type != STT_FUNC)
- continue;
+ sec = find_section_by_name(file->elf, ".rela.discard.func_stack_frame_non_standard");
+ if (!sec)
+ return;
- if (!ignore_func(file, func))
+ list_for_each_entry(rela, &sec->rela_list, list) {
+ switch (rela->sym->type) {
+ case STT_FUNC:
+ func = rela->sym;
+ break;
+
+ case STT_SECTION:
+ func = find_symbol_by_offset(rela->sym->sec, rela->addend);
+ if (!func || func->type != STT_FUNC)
continue;
+ break;
- func_for_each_insn_all(file, func, insn)
- insn->ignore = true;
+ default:
+ WARN("unexpected relocation symbol type in %s: %d", sec->name, rela->sym->type);
+ continue;
}
+
+ func_for_each_insn_all(file, func, insn)
+ insn->ignore = true;
+ }
+}
+
+/*
+ * This is a whitelist of functions that is allowed to be called with AC set.
+ * The list is meant to be minimal and only contains compiler instrumentation
+ * ABI and a few functions used to implement *_{to,from}_user() functions.
+ *
+ * These functions must not directly change AC, but may PUSHF/POPF.
+ */
+static const char *uaccess_safe_builtin[] = {
+ /* KASAN */
+ "kasan_report",
+ "check_memory_region",
+ /* KASAN out-of-line */
+ "__asan_loadN_noabort",
+ "__asan_load1_noabort",
+ "__asan_load2_noabort",
+ "__asan_load4_noabort",
+ "__asan_load8_noabort",
+ "__asan_load16_noabort",
+ "__asan_storeN_noabort",
+ "__asan_store1_noabort",
+ "__asan_store2_noabort",
+ "__asan_store4_noabort",
+ "__asan_store8_noabort",
+ "__asan_store16_noabort",
+ /* KASAN in-line */
+ "__asan_report_load_n_noabort",
+ "__asan_report_load1_noabort",
+ "__asan_report_load2_noabort",
+ "__asan_report_load4_noabort",
+ "__asan_report_load8_noabort",
+ "__asan_report_load16_noabort",
+ "__asan_report_store_n_noabort",
+ "__asan_report_store1_noabort",
+ "__asan_report_store2_noabort",
+ "__asan_report_store4_noabort",
+ "__asan_report_store8_noabort",
+ "__asan_report_store16_noabort",
+ /* KCOV */
+ "write_comp_data",
+ "__sanitizer_cov_trace_pc",
+ "__sanitizer_cov_trace_const_cmp1",
+ "__sanitizer_cov_trace_const_cmp2",
+ "__sanitizer_cov_trace_const_cmp4",
+ "__sanitizer_cov_trace_const_cmp8",
+ "__sanitizer_cov_trace_cmp1",
+ "__sanitizer_cov_trace_cmp2",
+ "__sanitizer_cov_trace_cmp4",
+ "__sanitizer_cov_trace_cmp8",
+ /* UBSAN */
+ "ubsan_type_mismatch_common",
+ "__ubsan_handle_type_mismatch",
+ "__ubsan_handle_type_mismatch_v1",
+ /* misc */
+ "csum_partial_copy_generic",
+ "__memcpy_mcsafe",
+ "ftrace_likely_update", /* CONFIG_TRACE_BRANCH_PROFILING */
+ NULL
+};
+
+static void add_uaccess_safe(struct objtool_file *file)
+{
+ struct symbol *func;
+ const char **name;
+
+ if (!uaccess)
+ return;
+
+ for (name = uaccess_safe_builtin; *name; name++) {
+ func = find_symbol_by_name(file->elf, *name);
+ if (!func)
+ continue;
+
+ func->alias->uaccess_safe = true;
}
}
@@ -458,13 +525,13 @@ static void add_ignores(struct objtool_file *file)
* But it at least allows objtool to understand the control flow *around* the
* retpoline.
*/
-static int add_nospec_ignores(struct objtool_file *file)
+static int add_ignore_alternatives(struct objtool_file *file)
{
struct section *sec;
struct rela *rela;
struct instruction *insn;
- sec = find_section_by_name(file->elf, ".rela.discard.nospec");
+ sec = find_section_by_name(file->elf, ".rela.discard.ignore_alts");
if (!sec)
return 0;
@@ -476,7 +543,7 @@ static int add_nospec_ignores(struct objtool_file *file)
insn = find_insn(file, rela->sym->sec, rela->addend);
if (!insn) {
- WARN("bad .discard.nospec entry");
+ WARN("bad .discard.ignore_alts entry");
return -1;
}
@@ -525,7 +592,8 @@ static int add_jump_destinations(struct objtool_file *file)
continue;
} else {
/* sibling call */
- insn->jump_dest = 0;
+ insn->call_dest = rela->sym;
+ insn->jump_dest = NULL;
continue;
}
@@ -547,25 +615,38 @@ static int add_jump_destinations(struct objtool_file *file)
}
/*
- * For GCC 8+, create parent/child links for any cold
- * subfunctions. This is _mostly_ redundant with a similar
- * initialization in read_symbols().
- *
- * If a function has aliases, we want the *first* such function
- * in the symbol table to be the subfunction's parent. In that
- * case we overwrite the initialization done in read_symbols().
- *
- * However this code can't completely replace the
- * read_symbols() code because this doesn't detect the case
- * where the parent function's only reference to a subfunction
- * is through a switch table.
+ * Cross-function jump.
*/
if (insn->func && insn->jump_dest->func &&
- insn->func != insn->jump_dest->func &&
- !strstr(insn->func->name, ".cold.") &&
- strstr(insn->jump_dest->func->name, ".cold.")) {
- insn->func->cfunc = insn->jump_dest->func;
- insn->jump_dest->func->pfunc = insn->func;
+ insn->func != insn->jump_dest->func) {
+
+ /*
+ * For GCC 8+, create parent/child links for any cold
+ * subfunctions. This is _mostly_ redundant with a
+ * similar initialization in read_symbols().
+ *
+ * If a function has aliases, we want the *first* such
+ * function in the symbol table to be the subfunction's
+ * parent. In that case we overwrite the
+ * initialization done in read_symbols().
+ *
+ * However this code can't completely replace the
+ * read_symbols() code because this doesn't detect the
+ * case where the parent function's only reference to a
+ * subfunction is through a switch table.
+ */
+ if (!strstr(insn->func->name, ".cold.") &&
+ strstr(insn->jump_dest->func->name, ".cold.")) {
+ insn->func->cfunc = insn->jump_dest->func;
+ insn->jump_dest->func->pfunc = insn->func;
+
+ } else if (insn->jump_dest->func->pfunc != insn->func->pfunc &&
+ insn->jump_dest->offset == insn->jump_dest->func->offset) {
+
+ /* sibling class */
+ insn->call_dest = insn->jump_dest->func;
+ insn->jump_dest = NULL;
+ }
}
}
@@ -634,9 +715,6 @@ static int add_call_destinations(struct objtool_file *file)
* conditionally jumps to the _end_ of the entry. We have to modify these
* jumps' destinations to point back to .text rather than the end of the
* entry in .altinstr_replacement.
- *
- * 4. It has been requested that we don't validate the !POPCNT feature path
- * which is a "very very small percentage of machines".
*/
static int handle_group_alt(struct objtool_file *file,
struct special_alt *special_alt,
@@ -652,9 +730,6 @@ static int handle_group_alt(struct objtool_file *file,
if (insn->offset >= special_alt->orig_off + special_alt->orig_len)
break;
- if (special_alt->skip_orig)
- insn->type = INSN_NOP;
-
insn->alt_group = true;
last_orig_insn = insn;
}
@@ -696,6 +771,7 @@ static int handle_group_alt(struct objtool_file *file,
last_new_insn = insn;
insn->ignore = orig_insn->ignore_alts;
+ insn->func = orig_insn->func;
if (insn->type != INSN_JUMP_CONDITIONAL &&
insn->type != INSN_JUMP_UNCONDITIONAL)
@@ -818,6 +894,8 @@ static int add_special_section_alts(struct objtool_file *file)
}
alt->insn = new_insn;
+ alt->skip_orig = special_alt->skip_orig;
+ orig_insn->ignore_alts |= special_alt->skip_alt;
list_add_tail(&alt->list, &orig_insn->alts);
list_del(&special_alt->list);
@@ -1239,8 +1317,9 @@ static int decode_sections(struct objtool_file *file)
return ret;
add_ignores(file);
+ add_uaccess_safe(file);
- ret = add_nospec_ignores(file);
+ ret = add_ignore_alternatives(file);
if (ret)
return ret;
@@ -1320,11 +1399,11 @@ static int update_insn_state_regs(struct instruction *insn, struct insn_state *s
return 0;
/* push */
- if (op->dest.type == OP_DEST_PUSH)
+ if (op->dest.type == OP_DEST_PUSH || op->dest.type == OP_DEST_PUSHF)
cfa->offset += 8;
/* pop */
- if (op->src.type == OP_SRC_POP)
+ if (op->src.type == OP_SRC_POP || op->src.type == OP_SRC_POPF)
cfa->offset -= 8;
/* add immediate to sp */
@@ -1581,6 +1660,7 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
break;
case OP_SRC_POP:
+ case OP_SRC_POPF:
if (!state->drap && op->dest.type == OP_DEST_REG &&
op->dest.reg == cfa->base) {
@@ -1645,6 +1725,7 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
break;
case OP_DEST_PUSH:
+ case OP_DEST_PUSHF:
state->stack_size += 8;
if (cfa->base == CFI_SP)
cfa->offset += 8;
@@ -1735,7 +1816,7 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
break;
case OP_DEST_MEM:
- if (op->src.type != OP_SRC_POP) {
+ if (op->src.type != OP_SRC_POP && op->src.type != OP_SRC_POPF) {
WARN_FUNC("unknown stack-related memory operation",
insn->sec, insn->offset);
return -1;
@@ -1799,6 +1880,50 @@ static bool insn_state_match(struct instruction *insn, struct insn_state *state)
return false;
}
+static inline bool func_uaccess_safe(struct symbol *func)
+{
+ if (func)
+ return func->alias->uaccess_safe;
+
+ return false;
+}
+
+static inline const char *insn_dest_name(struct instruction *insn)
+{
+ if (insn->call_dest)
+ return insn->call_dest->name;
+
+ return "{dynamic}";
+}
+
+static int validate_call(struct instruction *insn, struct insn_state *state)
+{
+ if (state->uaccess && !func_uaccess_safe(insn->call_dest)) {
+ WARN_FUNC("call to %s() with UACCESS enabled",
+ insn->sec, insn->offset, insn_dest_name(insn));
+ return 1;
+ }
+
+ if (state->df) {
+ WARN_FUNC("call to %s() with DF set",
+ insn->sec, insn->offset, insn_dest_name(insn));
+ return 1;
+ }
+
+ return 0;
+}
+
+static int validate_sibling_call(struct instruction *insn, struct insn_state *state)
+{
+ if (has_modified_stack_frame(state)) {
+ WARN_FUNC("sibling call from callable instruction with modified stack frame",
+ insn->sec, insn->offset);
+ return 1;
+ }
+
+ return validate_call(insn, state);
+}
+
/*
* Follow the branch starting at the given instruction, and recursively follow
* any other branches (jumps). Meanwhile, track the frame pointer state at
@@ -1844,7 +1969,9 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
if (!insn->hint && !insn_state_match(insn, &state))
return 1;
- return 0;
+ /* If we were here with AC=0, but now have AC=1, go again */
+ if (insn->state.uaccess || !state.uaccess)
+ return 0;
}
if (insn->hint) {
@@ -1893,16 +2020,42 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
insn->visited = true;
if (!insn->ignore_alts) {
+ bool skip_orig = false;
+
list_for_each_entry(alt, &insn->alts, list) {
+ if (alt->skip_orig)
+ skip_orig = true;
+
ret = validate_branch(file, alt->insn, state);
- if (ret)
- return 1;
+ if (ret) {
+ if (backtrace)
+ BT_FUNC("(alt)", insn);
+ return ret;
+ }
}
+
+ if (skip_orig)
+ return 0;
}
switch (insn->type) {
case INSN_RETURN:
+ if (state.uaccess && !func_uaccess_safe(func)) {
+ WARN_FUNC("return with UACCESS enabled", sec, insn->offset);
+ return 1;
+ }
+
+ if (!state.uaccess && func_uaccess_safe(func)) {
+ WARN_FUNC("return with UACCESS disabled from a UACCESS-safe function", sec, insn->offset);
+ return 1;
+ }
+
+ if (state.df) {
+ WARN_FUNC("return with DF set", sec, insn->offset);
+ return 1;
+ }
+
if (func && has_modified_stack_frame(&state)) {
WARN_FUNC("return with modified stack frame",
sec, insn->offset);
@@ -1918,17 +2071,22 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
return 0;
case INSN_CALL:
- if (is_fentry_call(insn))
- break;
+ case INSN_CALL_DYNAMIC:
+ ret = validate_call(insn, &state);
+ if (ret)
+ return ret;
- ret = dead_end_function(file, insn->call_dest);
- if (ret == 1)
- return 0;
- if (ret == -1)
- return 1;
+ if (insn->type == INSN_CALL) {
+ if (is_fentry_call(insn))
+ break;
+
+ ret = dead_end_function(file, insn->call_dest);
+ if (ret == 1)
+ return 0;
+ if (ret == -1)
+ return 1;
+ }
- /* fallthrough */
- case INSN_CALL_DYNAMIC:
if (!no_fp && func && !has_valid_stack_frame(&state)) {
WARN_FUNC("call without frame pointer save/setup",
sec, insn->offset);
@@ -1938,18 +2096,21 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
case INSN_JUMP_CONDITIONAL:
case INSN_JUMP_UNCONDITIONAL:
- if (insn->jump_dest &&
- (!func || !insn->jump_dest->func ||
- insn->jump_dest->func->pfunc == func)) {
- ret = validate_branch(file, insn->jump_dest,
- state);
+ if (func && !insn->jump_dest) {
+ ret = validate_sibling_call(insn, &state);
if (ret)
- return 1;
+ return ret;
- } else if (func && has_modified_stack_frame(&state)) {
- WARN_FUNC("sibling call from callable instruction with modified stack frame",
- sec, insn->offset);
- return 1;
+ } else if (insn->jump_dest &&
+ (!func || !insn->jump_dest->func ||
+ insn->jump_dest->func->pfunc == func)) {
+ ret = validate_branch(file, insn->jump_dest,
+ state);
+ if (ret) {
+ if (backtrace)
+ BT_FUNC("(branch)", insn);
+ return ret;
+ }
}
if (insn->type == INSN_JUMP_UNCONDITIONAL)
@@ -1958,11 +2119,10 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
break;
case INSN_JUMP_DYNAMIC:
- if (func && list_empty(&insn->alts) &&
- has_modified_stack_frame(&state)) {
- WARN_FUNC("sibling call from callable instruction with modified stack frame",
- sec, insn->offset);
- return 1;
+ if (func && list_empty(&insn->alts)) {
+ ret = validate_sibling_call(insn, &state);
+ if (ret)
+ return ret;
}
return 0;
@@ -1979,6 +2139,63 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
if (update_insn_state(insn, &state))
return 1;
+ if (insn->stack_op.dest.type == OP_DEST_PUSHF) {
+ if (!state.uaccess_stack) {
+ state.uaccess_stack = 1;
+ } else if (state.uaccess_stack >> 31) {
+ WARN_FUNC("PUSHF stack exhausted", sec, insn->offset);
+ return 1;
+ }
+ state.uaccess_stack <<= 1;
+ state.uaccess_stack |= state.uaccess;
+ }
+
+ if (insn->stack_op.src.type == OP_SRC_POPF) {
+ if (state.uaccess_stack) {
+ state.uaccess = state.uaccess_stack & 1;
+ state.uaccess_stack >>= 1;
+ if (state.uaccess_stack == 1)
+ state.uaccess_stack = 0;
+ }
+ }
+
+ break;
+
+ case INSN_STAC:
+ if (state.uaccess) {
+ WARN_FUNC("recursive UACCESS enable", sec, insn->offset);
+ return 1;
+ }
+
+ state.uaccess = true;
+ break;
+
+ case INSN_CLAC:
+ if (!state.uaccess && insn->func) {
+ WARN_FUNC("redundant UACCESS disable", sec, insn->offset);
+ return 1;
+ }
+
+ if (func_uaccess_safe(func) && !state.uaccess_stack) {
+ WARN_FUNC("UACCESS-safe disables UACCESS", sec, insn->offset);
+ return 1;
+ }
+
+ state.uaccess = false;
+ break;
+
+ case INSN_STD:
+ if (state.df)
+ WARN_FUNC("recursive STD", sec, insn->offset);
+
+ state.df = true;
+ break;
+
+ case INSN_CLD:
+ if (!state.df && insn->func)
+ WARN_FUNC("redundant CLD", sec, insn->offset);
+
+ state.df = false;
break;
default:
@@ -2015,6 +2232,8 @@ static int validate_unwind_hints(struct objtool_file *file)
for_each_insn(file, insn) {
if (insn->hint && !insn->visited) {
ret = validate_branch(file, insn, state);
+ if (ret && backtrace)
+ BT_FUNC("<=== (hint)", insn);
warnings += ret;
}
}
@@ -2142,7 +2361,11 @@ static int validate_functions(struct objtool_file *file)
if (!insn || insn->ignore)
continue;
+ state.uaccess = func->alias->uaccess_safe;
+
ret = validate_branch(file, insn, state);
+ if (ret && backtrace)
+ BT_FUNC("<=== (func)", insn);
warnings += ret;
}
}
@@ -2199,7 +2422,6 @@ int check(const char *_objname, bool orc)
INIT_LIST_HEAD(&file.insn_list);
hash_init(file.insn_hash);
- file.whitelist = find_section_by_name(file.elf, ".discard.func_stack_frame_non_standard");
file.c_file = find_section_by_name(file.elf, ".comment");
file.ignore_unreachables = no_unreachable;
file.hints = false;
diff --git a/tools/objtool/check.h b/tools/objtool/check.h
index e6e8a655b556..71e54f97dbcd 100644
--- a/tools/objtool/check.h
+++ b/tools/objtool/check.h
@@ -31,7 +31,8 @@ struct insn_state {
int stack_size;
unsigned char type;
bool bp_scratch;
- bool drap, end;
+ bool drap, end, uaccess, df;
+ unsigned int uaccess_stack;
int drap_reg, drap_offset;
struct cfi_reg vals[CFI_NUM_REGS];
};
@@ -60,7 +61,6 @@ struct objtool_file {
struct elf *elf;
struct list_head insn_list;
DECLARE_HASHTABLE(insn_hash, 16);
- struct section *whitelist;
bool ignore_unreachables, c_file, hints, rodata;
};
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index b8f3cca8e58b..dd198d53387d 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -219,7 +219,7 @@ static int read_sections(struct elf *elf)
static int read_symbols(struct elf *elf)
{
struct section *symtab, *sec;
- struct symbol *sym, *pfunc;
+ struct symbol *sym, *pfunc, *alias;
struct list_head *entry, *tmp;
int symbols_nr, i;
char *coldstr;
@@ -239,6 +239,7 @@ static int read_symbols(struct elf *elf)
return -1;
}
memset(sym, 0, sizeof(*sym));
+ alias = sym;
sym->idx = i;
@@ -288,11 +289,17 @@ static int read_symbols(struct elf *elf)
break;
}
- if (sym->offset == s->offset && sym->len >= s->len) {
- entry = tmp;
- break;
+ if (sym->offset == s->offset) {
+ if (sym->len == s->len && alias == sym)
+ alias = s;
+
+ if (sym->len >= s->len) {
+ entry = tmp;
+ break;
+ }
}
}
+ sym->alias = alias;
list_add(&sym->list, entry);
hash_add(sym->sec->symbol_hash, &sym->hash, sym->idx);
}
diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h
index bc97ed86b9cd..2cc2ed49322d 100644
--- a/tools/objtool/elf.h
+++ b/tools/objtool/elf.h
@@ -61,7 +61,8 @@ struct symbol {
unsigned char bind, type;
unsigned long offset;
unsigned int len;
- struct symbol *pfunc, *cfunc;
+ struct symbol *pfunc, *cfunc, *alias;
+ bool uaccess_safe;
};
struct rela {
diff --git a/tools/objtool/special.c b/tools/objtool/special.c
index 50af4e1274b3..4e50563d87c6 100644
--- a/tools/objtool/special.c
+++ b/tools/objtool/special.c
@@ -23,6 +23,7 @@
#include <stdlib.h>
#include <string.h>
+#include "builtin.h"
#include "special.h"
#include "warn.h"
@@ -42,6 +43,7 @@
#define ALT_NEW_LEN_OFFSET 11
#define X86_FEATURE_POPCNT (4*32+23)
+#define X86_FEATURE_SMAP (9*32+20)
struct special_entry {
const char *sec;
@@ -110,6 +112,22 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry,
*/
if (feature == X86_FEATURE_POPCNT)
alt->skip_orig = true;
+
+ /*
+ * If UACCESS validation is enabled; force that alternative;
+ * otherwise force it the other way.
+ *
+ * What we want to avoid is having both the original and the
+ * alternative code flow at the same time, in that case we can
+ * find paths that see the STAC but take the NOP instead of
+ * CLAC and the other way around.
+ */
+ if (feature == X86_FEATURE_SMAP) {
+ if (uaccess)
+ alt->skip_orig = true;
+ else
+ alt->skip_alt = true;
+ }
}
orig_rela = find_rela_by_dest(sec, offset + entry->orig);
diff --git a/tools/objtool/special.h b/tools/objtool/special.h
index fad1d092f679..d5c062e718ef 100644
--- a/tools/objtool/special.h
+++ b/tools/objtool/special.h
@@ -26,6 +26,7 @@ struct special_alt {
bool group;
bool skip_orig;
+ bool skip_alt;
bool jump_or_nop;
struct section *orig_sec;
diff --git a/tools/objtool/warn.h b/tools/objtool/warn.h
index afd9f7a05f6d..f4fbb972b611 100644
--- a/tools/objtool/warn.h
+++ b/tools/objtool/warn.h
@@ -64,6 +64,14 @@ static inline char *offstr(struct section *sec, unsigned long offset)
free(_str); \
})
+#define BT_FUNC(format, insn, ...) \
+({ \
+ struct instruction *_insn = (insn); \
+ char *_str = offstr(_insn->sec, _insn->offset); \
+ WARN(" %s: " format, _str, ##__VA_ARGS__); \
+ free(_str); \
+})
+
#define WARN_ELF(format, ...) \
WARN(format ": %s", ##__VA_ARGS__, elf_errmsg(-1))
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index fe3f97e342fa..6d65874e16c3 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -227,7 +227,7 @@ FEATURE_CHECK_LDFLAGS-libpython-version := $(PYTHON_EMBED_LDOPTS)
FEATURE_CHECK_LDFLAGS-libaio = -lrt
-FEATURE_CHECK_LDFLAGS-disassembler-four-args = -lbfd -lopcodes
+FEATURE_CHECK_LDFLAGS-disassembler-four-args = -lbfd -lopcodes -ldl
CFLAGS += -fno-omit-frame-pointer
CFLAGS += -ggdb3
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index 98ad783efc69..a7784554a80d 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -39,6 +39,10 @@
#include <numa.h>
#include <numaif.h>
+#ifndef RUSAGE_THREAD
+# define RUSAGE_THREAD 1
+#endif
+
/*
* Regular printout to the terminal, supressed if -q is specified:
*/
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 49ee3c2033ec..c3625ec374e0 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1308,6 +1308,7 @@ static void init_features(struct perf_session *session)
for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
perf_header__set_feat(&session->header, feat);
+ perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 1999d6533d12..fbbb0da43abb 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1377,6 +1377,7 @@ int cmd_top(int argc, const char **argv)
* */
.overwrite = 0,
.sample_time = true,
+ .sample_time_set = true,
},
.max_stack = sysctl__max_stack(),
.annotation_opts = annotation__default_options,
diff --git a/tools/perf/scripts/python/export-to-sqlite.py b/tools/perf/scripts/python/export-to-sqlite.py
index 3b71902a5a21..bf271fbc3a88 100644
--- a/tools/perf/scripts/python/export-to-sqlite.py
+++ b/tools/perf/scripts/python/export-to-sqlite.py
@@ -331,7 +331,7 @@ if perf_db_export_calls:
'return_id,'
'CASE WHEN flags=0 THEN \'\' WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' WHEN flags=6 THEN \'jump\' ELSE flags END AS flags,'
'parent_call_path_id,'
- 'parent_id'
+ 'calls.parent_id'
' FROM calls INNER JOIN call_paths ON call_paths.id = call_path_id')
do_query(query, 'CREATE VIEW samples_view AS '
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index c8b01176c9e1..09762985c713 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -1714,8 +1714,8 @@ static int symbol__disassemble_bpf(struct symbol *sym,
if (dso->binary_type != DSO_BINARY_TYPE__BPF_PROG_INFO)
return -1;
- pr_debug("%s: handling sym %s addr %lx len %lx\n", __func__,
- sym->name, sym->start, sym->end - sym->start);
+ pr_debug("%s: handling sym %s addr %" PRIx64 " len %" PRIx64 "\n", __func__,
+ sym->name, sym->start, sym->end - sym->start);
memset(tpath, 0, sizeof(tpath));
perf_exe(tpath, sizeof(tpath));
@@ -1740,7 +1740,7 @@ static int symbol__disassemble_bpf(struct symbol *sym,
info_linear = info_node->info_linear;
sub_id = dso->bpf_prog.sub_id;
- info.buffer = (void *)(info_linear->info.jited_prog_insns);
+ info.buffer = (void *)(uintptr_t)(info_linear->info.jited_prog_insns);
info.buffer_length = info_linear->info.jited_prog_len;
if (info_linear->info.nr_line_info)
@@ -1776,7 +1776,7 @@ static int symbol__disassemble_bpf(struct symbol *sym,
const char *srcline;
u64 addr;
- addr = pc + ((u64 *)(info_linear->info.jited_ksyms))[sub_id];
+ addr = pc + ((u64 *)(uintptr_t)(info_linear->info.jited_ksyms))[sub_id];
count = disassemble(pc, &info);
if (prog_linfo)
diff --git a/tools/perf/util/cloexec.c b/tools/perf/util/cloexec.c
index ca0fff6272be..06f48312c5ed 100644
--- a/tools/perf/util/cloexec.c
+++ b/tools/perf/util/cloexec.c
@@ -7,7 +7,6 @@
#include "asm/bug.h"
#include "debug.h"
#include <unistd.h>
-#include <asm/unistd.h>
#include <sys/syscall.h>
static unsigned long flag = PERF_FLAG_FD_CLOEXEC;
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 110804936fc3..de488b43f440 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -422,11 +422,9 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm)
if (!etmq->packet)
goto out_free;
- if (etm->synth_opts.last_branch || etm->sample_branches) {
- etmq->prev_packet = zalloc(szp);
- if (!etmq->prev_packet)
- goto out_free;
- }
+ etmq->prev_packet = zalloc(szp);
+ if (!etmq->prev_packet)
+ goto out_free;
if (etm->synth_opts.last_branch) {
size_t sz = sizeof(struct branch_stack);
@@ -981,7 +979,6 @@ static int cs_etm__sample(struct cs_etm_queue *etmq)
* PREV_PACKET is a branch.
*/
if (etm->synth_opts.last_branch &&
- etmq->prev_packet &&
etmq->prev_packet->sample_type == CS_ETM_RANGE &&
etmq->prev_packet->last_instr_taken_branch)
cs_etm__update_last_branch_rb(etmq);
@@ -1014,7 +1011,7 @@ static int cs_etm__sample(struct cs_etm_queue *etmq)
etmq->period_instructions = instrs_over;
}
- if (etm->sample_branches && etmq->prev_packet) {
+ if (etm->sample_branches) {
bool generate_sample = false;
/* Generate sample for tracing on packet */
@@ -1071,9 +1068,6 @@ static int cs_etm__flush(struct cs_etm_queue *etmq)
struct cs_etm_auxtrace *etm = etmq->etm;
struct cs_etm_packet *tmp;
- if (!etmq->prev_packet)
- return 0;
-
/* Handle start tracing packet */
if (etmq->prev_packet->sample_type == CS_ETM_EMPTY)
goto swap_packet;
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index c6351b557bb0..6a3eaf7d9353 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -57,9 +57,11 @@ struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env,
else if (prog_id > node->info_linear->info.id)
n = n->rb_right;
else
- break;
+ goto out;
}
+ node = NULL;
+out:
up_read(&env->bpf_progs.lock);
return node;
}
@@ -109,9 +111,11 @@ struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id)
else if (btf_id > node->id)
n = n->rb_right;
else
- break;
+ goto out;
}
+ node = NULL;
+out:
up_read(&env->bpf_progs.lock);
return node;
}
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 6689378ee577..51ead577533f 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1868,12 +1868,12 @@ static void *perf_evlist__poll_thread(void *arg)
{
struct perf_evlist *evlist = arg;
bool draining = false;
- int i;
+ int i, done = 0;
+
+ while (!done) {
+ bool got_data = false;
- while (draining || !(evlist->thread.done)) {
- if (draining)
- draining = false;
- else if (evlist->thread.done)
+ if (evlist->thread.done)
draining = true;
if (!draining)
@@ -1894,9 +1894,13 @@ static void *perf_evlist__poll_thread(void *arg)
pr_warning("cannot locate proper evsel for the side band event\n");
perf_mmap__consume(map);
+ got_data = true;
}
perf_mmap__read_done(map);
}
+
+ if (draining && !got_data)
+ break;
}
return NULL;
}
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 66d066f18b5b..966360844fff 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -2368,7 +2368,7 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
if (data->user_regs.abi) {
u64 mask = evsel->attr.sample_regs_user;
- sz = hweight_long(mask) * sizeof(u64);
+ sz = hweight64(mask) * sizeof(u64);
OVERFLOW_CHECK(array, sz, max_size);
data->user_regs.mask = mask;
data->user_regs.regs = (u64 *)array;
@@ -2424,7 +2424,7 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
if (data->intr_regs.abi != PERF_SAMPLE_REGS_ABI_NONE) {
u64 mask = evsel->attr.sample_regs_intr;
- sz = hweight_long(mask) * sizeof(u64);
+ sz = hweight64(mask) * sizeof(u64);
OVERFLOW_CHECK(array, sz, max_size);
data->intr_regs.mask = mask;
data->intr_regs.regs = (u64 *)array;
@@ -2552,7 +2552,7 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
if (type & PERF_SAMPLE_REGS_USER) {
if (sample->user_regs.abi) {
result += sizeof(u64);
- sz = hweight_long(sample->user_regs.mask) * sizeof(u64);
+ sz = hweight64(sample->user_regs.mask) * sizeof(u64);
result += sz;
} else {
result += sizeof(u64);
@@ -2580,7 +2580,7 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
if (type & PERF_SAMPLE_REGS_INTR) {
if (sample->intr_regs.abi) {
result += sizeof(u64);
- sz = hweight_long(sample->intr_regs.mask) * sizeof(u64);
+ sz = hweight64(sample->intr_regs.mask) * sizeof(u64);
result += sz;
} else {
result += sizeof(u64);
@@ -2710,7 +2710,7 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type,
if (type & PERF_SAMPLE_REGS_USER) {
if (sample->user_regs.abi) {
*array++ = sample->user_regs.abi;
- sz = hweight_long(sample->user_regs.mask) * sizeof(u64);
+ sz = hweight64(sample->user_regs.mask) * sizeof(u64);
memcpy(array, sample->user_regs.regs, sz);
array = (void *)array + sz;
} else {
@@ -2746,7 +2746,7 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type,
if (type & PERF_SAMPLE_REGS_INTR) {
if (sample->intr_regs.abi) {
*array++ = sample->intr_regs.abi;
- sz = hweight_long(sample->intr_regs.mask) * sizeof(u64);
+ sz = hweight64(sample->intr_regs.mask) * sizeof(u64);
memcpy(array, sample->intr_regs.regs, sz);
array = (void *)array + sz;
} else {
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index b9e693825873..2d2af2ac2b1e 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -2606,6 +2606,7 @@ static int process_bpf_prog_info(struct feat_fd *ff, void *data __maybe_unused)
perf_env__insert_bpf_prog_info(env, info_node);
}
+ up_write(&env->bpf_progs.lock);
return 0;
out:
free(info_linear);
@@ -2623,7 +2624,9 @@ static int process_bpf_prog_info(struct feat_fd *ff __maybe_unused, void *data _
static int process_bpf_btf(struct feat_fd *ff, void *data __maybe_unused)
{
struct perf_env *env = &ff->ph->env;
+ struct btf_node *node = NULL;
u32 count, i;
+ int err = -1;
if (ff->ph->needs_swap) {
pr_warning("interpreting btf from systems with endianity is not yet supported\n");
@@ -2636,31 +2639,32 @@ static int process_bpf_btf(struct feat_fd *ff, void *data __maybe_unused)
down_write(&env->bpf_progs.lock);
for (i = 0; i < count; ++i) {
- struct btf_node *node;
u32 id, data_size;
if (do_read_u32(ff, &id))
- return -1;
+ goto out;
if (do_read_u32(ff, &data_size))
- return -1;
+ goto out;
node = malloc(sizeof(struct btf_node) + data_size);
if (!node)
- return -1;
+ goto out;
node->id = id;
node->data_size = data_size;
- if (__do_read(ff, node->data, data_size)) {
- free(node);
- return -1;
- }
+ if (__do_read(ff, node->data, data_size))
+ goto out;
perf_env__insert_btf(env, node);
+ node = NULL;
}
+ err = 0;
+out:
up_write(&env->bpf_progs.lock);
- return 0;
+ free(node);
+ return err;
}
struct feature_ops {
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index e32628cd20a7..ee71efb9db62 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -261,6 +261,22 @@ bool __map__is_extra_kernel_map(const struct map *map)
return kmap && kmap->name[0];
}
+bool __map__is_bpf_prog(const struct map *map)
+{
+ const char *name;
+
+ if (map->dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO)
+ return true;
+
+ /*
+ * If PERF_RECORD_BPF_EVENT is not included, the dso will not have
+ * type of DSO_BINARY_TYPE__BPF_PROG_INFO. In such cases, we can
+ * guess the type based on name.
+ */
+ name = map->dso->short_name;
+ return name && (strstr(name, "bpf_prog_") == name);
+}
+
bool map__has_symbols(const struct map *map)
{
return dso__has_symbols(map->dso);
@@ -910,10 +926,8 @@ static void __maps__insert_name(struct maps *maps, struct map *map)
rc = strcmp(m->dso->short_name, map->dso->short_name);
if (rc < 0)
p = &(*p)->rb_left;
- else if (rc > 0)
- p = &(*p)->rb_right;
else
- return;
+ p = &(*p)->rb_right;
}
rb_link_node(&map->rb_node_name, parent, p);
rb_insert_color(&map->rb_node_name, &maps->names);
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 0e20749f2c55..dc93787c74f0 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -159,10 +159,12 @@ int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name,
bool __map__is_kernel(const struct map *map);
bool __map__is_extra_kernel_map(const struct map *map);
+bool __map__is_bpf_prog(const struct map *map);
static inline bool __map__is_kmodule(const struct map *map)
{
- return !__map__is_kernel(map) && !__map__is_extra_kernel_map(map);
+ return !__map__is_kernel(map) && !__map__is_extra_kernel_map(map) &&
+ !__map__is_bpf_prog(map);
}
bool map__has_symbols(const struct map *map);
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index b17f1c9bc965..bad5f87ae001 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1928,12 +1928,14 @@ more:
size = event->header.size;
+ skip = -EINVAL;
+
if (size < sizeof(struct perf_event_header) ||
(skip = rd->process(session, event, file_pos)) < 0) {
- pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n",
+ pr_err("%#" PRIx64 " [%#x]: failed to process type: %d [%s]\n",
file_offset + head, event->header.size,
- event->header.type);
- err = -EINVAL;
+ event->header.type, strerror(-skip));
+ err = skip;
goto out;
}
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index b579f962451d..85ffdcfa596b 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -146,6 +146,7 @@ static int dimm_fail_cmd_code[ARRAY_SIZE(handle)];
struct nfit_test_sec {
u8 state;
u8 ext_state;
+ u8 old_state;
u8 passphrase[32];
u8 master_passphrase[32];
u64 overwrite_end_time;
@@ -225,6 +226,8 @@ static struct workqueue_struct *nfit_wq;
static struct gen_pool *nfit_pool;
+static const char zero_key[NVDIMM_PASSPHRASE_LEN];
+
static struct nfit_test *to_nfit_test(struct device *dev)
{
struct platform_device *pdev = to_platform_device(dev);
@@ -1059,8 +1062,7 @@ static int nd_intel_test_cmd_secure_erase(struct nfit_test *t,
struct device *dev = &t->pdev.dev;
struct nfit_test_sec *sec = &dimm_sec_info[dimm];
- if (!(sec->state & ND_INTEL_SEC_STATE_ENABLED) ||
- (sec->state & ND_INTEL_SEC_STATE_FROZEN)) {
+ if (sec->state & ND_INTEL_SEC_STATE_FROZEN) {
nd_cmd->status = ND_INTEL_STATUS_INVALID_STATE;
dev_dbg(dev, "secure erase: wrong security state\n");
} else if (memcmp(nd_cmd->passphrase, sec->passphrase,
@@ -1068,6 +1070,12 @@ static int nd_intel_test_cmd_secure_erase(struct nfit_test *t,
nd_cmd->status = ND_INTEL_STATUS_INVALID_PASS;
dev_dbg(dev, "secure erase: wrong passphrase\n");
} else {
+ if (!(sec->state & ND_INTEL_SEC_STATE_ENABLED)
+ && (memcmp(nd_cmd->passphrase, zero_key,
+ ND_INTEL_PASSPHRASE_SIZE) != 0)) {
+ dev_dbg(dev, "invalid zero key\n");
+ return 0;
+ }
memset(sec->passphrase, 0, ND_INTEL_PASSPHRASE_SIZE);
memset(sec->master_passphrase, 0, ND_INTEL_PASSPHRASE_SIZE);
sec->state = 0;
@@ -1093,7 +1101,7 @@ static int nd_intel_test_cmd_overwrite(struct nfit_test *t,
return 0;
}
- memset(sec->passphrase, 0, ND_INTEL_PASSPHRASE_SIZE);
+ sec->old_state = sec->state;
sec->state = ND_INTEL_SEC_STATE_OVERWRITE;
dev_dbg(dev, "overwrite progressing.\n");
sec->overwrite_end_time = get_jiffies_64() + 5 * HZ;
@@ -1115,7 +1123,8 @@ static int nd_intel_test_cmd_query_overwrite(struct nfit_test *t,
if (time_is_before_jiffies64(sec->overwrite_end_time)) {
sec->overwrite_end_time = 0;
- sec->state = 0;
+ sec->state = sec->old_state;
+ sec->old_state = 0;
sec->ext_state = ND_INTEL_SEC_ESTATE_ENABLED;
dev_dbg(dev, "overwrite is complete\n");
} else
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index fb11240b758b..9093a8f64dc6 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -375,6 +375,31 @@
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
+ "calls: ptr null check in subprog",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_6, 0),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+ .fixup_map_hash_48b = { 3 },
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ .retval = 0,
+},
+{
"calls: two calls with args",
.insns = {
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
diff --git a/tools/testing/selftests/bpf/verifier/direct_packet_access.c b/tools/testing/selftests/bpf/verifier/direct_packet_access.c
index e3fc22e672c2..d5c596fdc4b9 100644
--- a/tools/testing/selftests/bpf/verifier/direct_packet_access.c
+++ b/tools/testing/selftests/bpf/verifier/direct_packet_access.c
@@ -631,3 +631,25 @@
.errstr = "invalid access to packet",
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
},
+{
+ "direct packet access: test29 (reg > pkt_end in subprog)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_6, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_2, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+},
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
index c4cf6e6d800e..a6c196c8534c 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
@@ -11,6 +11,7 @@ lib_dir=$(dirname $0)/../../../net/forwarding
ALL_TESTS="
rif_set_addr_test
+ rif_vrf_set_addr_test
rif_inherit_bridge_addr_test
rif_non_inherit_bridge_addr_test
vlan_interface_deletion_test
@@ -98,6 +99,25 @@ rif_set_addr_test()
ip link set dev $swp1 addr $swp1_mac
}
+rif_vrf_set_addr_test()
+{
+ # Test that it is possible to set an IP address on a VRF upper despite
+ # its random MAC address.
+ RET=0
+
+ ip link add name vrf-test type vrf table 10
+ ip link set dev $swp1 master vrf-test
+
+ ip -4 address add 192.0.2.1/24 dev vrf-test
+ check_err $? "failed to set IPv4 address on VRF"
+ ip -6 address add 2001:db8:1::1/64 dev vrf-test
+ check_err $? "failed to set IPv6 address on VRF"
+
+ log_test "RIF - setting IP address on VRF"
+
+ ip link del dev vrf-test
+}
+
rif_inherit_bridge_addr_test()
{
RET=0
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 7514fcea91a7..f8588cca2bef 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -1,3 +1,5 @@
+include ../../../../scripts/Kbuild.include
+
all:
top_srcdir = ../../../..
@@ -17,6 +19,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/state_test
TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
+TEST_GEN_PROGS_x86_64 += x86_64/smm_test
TEST_GEN_PROGS_x86_64 += dirty_log_test
TEST_GEN_PROGS_x86_64 += clear_dirty_log_test
@@ -30,7 +33,11 @@ INSTALL_HDR_PATH = $(top_srcdir)/usr
LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include
CFLAGS += -O2 -g -std=gnu99 -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -Iinclude/$(UNAME_M) -I..
-LDFLAGS += -pthread -no-pie
+
+no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \
+ $(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie)
+
+LDFLAGS += -pthread $(no-pie-option)
# After inclusion, $(OUTPUT) is defined and
# $(TEST_GEN_PROGS) starts with $(OUTPUT)/
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index 4715cfba20dc..93f99c6b7d79 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -288,8 +288,11 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
#endif
max_gfn = (1ul << (guest_pa_bits - guest_page_shift)) - 1;
guest_page_size = (1ul << guest_page_shift);
- /* 1G of guest page sized pages */
- guest_num_pages = (1ul << (30 - guest_page_shift));
+ /*
+ * A little more than 1G of guest page sized pages. Cover the
+ * case where the size is not aligned to 64 pages.
+ */
+ guest_num_pages = (1ul << (30 - guest_page_shift)) + 3;
host_page_size = getpagesize();
host_num_pages = (guest_num_pages * guest_page_size) / host_page_size +
!!((guest_num_pages * guest_page_size) % host_page_size);
@@ -359,7 +362,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
#ifdef USE_CLEAR_DIRTY_LOG
kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0,
- DIV_ROUND_UP(host_num_pages, 64) * 64);
+ host_num_pages);
#endif
vm_dirty_log_verify(bmap);
iteration++;
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index e2884c2b81ff..6063d5b2f356 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -778,6 +778,33 @@ void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
#define MSR_IA32_APICBASE_ENABLE (1<<11)
#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
+#define APIC_BASE_MSR 0x800
+#define X2APIC_ENABLE (1UL << 10)
+#define APIC_ICR 0x300
+#define APIC_DEST_SELF 0x40000
+#define APIC_DEST_ALLINC 0x80000
+#define APIC_DEST_ALLBUT 0xC0000
+#define APIC_ICR_RR_MASK 0x30000
+#define APIC_ICR_RR_INVALID 0x00000
+#define APIC_ICR_RR_INPROG 0x10000
+#define APIC_ICR_RR_VALID 0x20000
+#define APIC_INT_LEVELTRIG 0x08000
+#define APIC_INT_ASSERT 0x04000
+#define APIC_ICR_BUSY 0x01000
+#define APIC_DEST_LOGICAL 0x00800
+#define APIC_DEST_PHYSICAL 0x00000
+#define APIC_DM_FIXED 0x00000
+#define APIC_DM_FIXED_MASK 0x00700
+#define APIC_DM_LOWEST 0x00100
+#define APIC_DM_SMI 0x00200
+#define APIC_DM_REMRD 0x00300
+#define APIC_DM_NMI 0x00400
+#define APIC_DM_INIT 0x00500
+#define APIC_DM_STARTUP 0x00600
+#define APIC_DM_EXTINT 0x00700
+#define APIC_VECTOR_MASK 0x000FF
+#define APIC_ICR2 0x310
+
#define MSR_IA32_TSCDEADLINE 0x000006e0
#define MSR_IA32_UCODE_WRITE 0x00000079
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index efa0aad8b3c6..4ca96b228e46 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -91,6 +91,11 @@ static void vm_open(struct kvm_vm *vm, int perm, unsigned long type)
if (vm->kvm_fd < 0)
exit(KSFT_SKIP);
+ if (!kvm_check_cap(KVM_CAP_IMMEDIATE_EXIT)) {
+ fprintf(stderr, "immediate_exit not available, skipping test\n");
+ exit(KSFT_SKIP);
+ }
+
vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, type);
TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, "
"rc: %i errno: %i", vm->fd, errno);
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index f28127f4a3af..dc7fae9fa424 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -1030,6 +1030,14 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid)
nested_size, sizeof(state->nested_));
}
+ /*
+ * When KVM exits to userspace with KVM_EXIT_IO, KVM guarantees
+ * guest state is consistent only after userspace re-enters the
+ * kernel with KVM_RUN. Complete IO prior to migrating state
+ * to a new VM.
+ */
+ vcpu_run_complete_io(vm, vcpuid);
+
nmsrs = kvm_get_num_msrs(vm);
list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
list->nmsrs = nmsrs;
@@ -1093,12 +1101,6 @@ void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *s
struct vcpu *vcpu = vcpu_find(vm, vcpuid);
int r;
- if (state->nested.size) {
- r = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, &state->nested);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_NESTED_STATE, r: %i",
- r);
- }
-
r = ioctl(vcpu->fd, KVM_SET_XSAVE, &state->xsave);
TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i",
r);
@@ -1130,4 +1132,10 @@ void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *s
r = ioctl(vcpu->fd, KVM_SET_REGS, &state->regs);
TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_REGS, r: %i",
r);
+
+ if (state->nested.size) {
+ r = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, &state->nested);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_NESTED_STATE, r: %i",
+ r);
+ }
}
diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
index c49c2a28b0eb..36669684eca5 100644
--- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
@@ -123,8 +123,6 @@ int main(int argc, char *argv[])
stage, run->exit_reason,
exit_reason_str(run->exit_reason));
- memset(&regs1, 0, sizeof(regs1));
- vcpu_regs_get(vm, VCPU_ID, &regs1);
switch (get_ucall(vm, VCPU_ID, &uc)) {
case UCALL_ABORT:
TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0],
@@ -144,6 +142,9 @@ int main(int argc, char *argv[])
stage, (ulong)uc.args[1]);
state = vcpu_save_state(vm, VCPU_ID);
+ memset(&regs1, 0, sizeof(regs1));
+ vcpu_regs_get(vm, VCPU_ID, &regs1);
+
kvm_vm_release(vm);
/* Restore state in a new VM. */
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
index 264425f75806..9a21e912097c 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
@@ -141,7 +141,13 @@ int main(int argc, char *argv[])
free(hv_cpuid_entries);
- vcpu_ioctl(vm, VCPU_ID, KVM_ENABLE_CAP, &enable_evmcs_cap);
+ rv = _vcpu_ioctl(vm, VCPU_ID, KVM_ENABLE_CAP, &enable_evmcs_cap);
+
+ if (rv) {
+ fprintf(stderr,
+ "Enlightened VMCS is unsupported, skip related test\n");
+ goto vm_free;
+ }
hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm);
if (!hv_cpuid_entries)
@@ -151,6 +157,7 @@ int main(int argc, char *argv[])
free(hv_cpuid_entries);
+vm_free:
kvm_vm_free(vm);
return 0;
diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c
new file mode 100644
index 000000000000..fb8086964d83
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/smm_test.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018, Red Hat, Inc.
+ *
+ * Tests for SMM.
+ */
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+
+#include "vmx.h"
+
+#define VCPU_ID 1
+
+#define PAGE_SIZE 4096
+
+#define SMRAM_SIZE 65536
+#define SMRAM_MEMSLOT ((1 << 16) | 1)
+#define SMRAM_PAGES (SMRAM_SIZE / PAGE_SIZE)
+#define SMRAM_GPA 0x1000000
+#define SMRAM_STAGE 0xfe
+
+#define STR(x) #x
+#define XSTR(s) STR(s)
+
+#define SYNC_PORT 0xe
+#define DONE 0xff
+
+/*
+ * This is compiled as normal 64-bit code, however, SMI handler is executed
+ * in real-address mode. To stay simple we're limiting ourselves to a mode
+ * independent subset of asm here.
+ * SMI handler always report back fixed stage SMRAM_STAGE.
+ */
+uint8_t smi_handler[] = {
+ 0xb0, SMRAM_STAGE, /* mov $SMRAM_STAGE, %al */
+ 0xe4, SYNC_PORT, /* in $SYNC_PORT, %al */
+ 0x0f, 0xaa, /* rsm */
+};
+
+void sync_with_host(uint64_t phase)
+{
+ asm volatile("in $" XSTR(SYNC_PORT)", %%al \n"
+ : : "a" (phase));
+}
+
+void self_smi(void)
+{
+ wrmsr(APIC_BASE_MSR + (APIC_ICR >> 4),
+ APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_SMI);
+}
+
+void guest_code(struct vmx_pages *vmx_pages)
+{
+ uint64_t apicbase = rdmsr(MSR_IA32_APICBASE);
+
+ sync_with_host(1);
+
+ wrmsr(MSR_IA32_APICBASE, apicbase | X2APIC_ENABLE);
+
+ sync_with_host(2);
+
+ self_smi();
+
+ sync_with_host(4);
+
+ if (vmx_pages) {
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+
+ sync_with_host(5);
+
+ self_smi();
+
+ sync_with_host(7);
+ }
+
+ sync_with_host(DONE);
+}
+
+int main(int argc, char *argv[])
+{
+ struct vmx_pages *vmx_pages = NULL;
+ vm_vaddr_t vmx_pages_gva = 0;
+
+ struct kvm_regs regs;
+ struct kvm_vm *vm;
+ struct kvm_run *run;
+ struct kvm_x86_state *state;
+ int stage, stage_reported;
+
+ /* Create VM */
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+
+ vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+ run = vcpu_state(vm, VCPU_ID);
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, SMRAM_GPA,
+ SMRAM_MEMSLOT, SMRAM_PAGES, 0);
+ TEST_ASSERT(vm_phy_pages_alloc(vm, SMRAM_PAGES, SMRAM_GPA, SMRAM_MEMSLOT)
+ == SMRAM_GPA, "could not allocate guest physical addresses?");
+
+ memset(addr_gpa2hva(vm, SMRAM_GPA), 0x0, SMRAM_SIZE);
+ memcpy(addr_gpa2hva(vm, SMRAM_GPA) + 0x8000, smi_handler,
+ sizeof(smi_handler));
+
+ vcpu_set_msr(vm, VCPU_ID, MSR_IA32_SMBASE, SMRAM_GPA);
+
+ if (kvm_check_cap(KVM_CAP_NESTED_STATE)) {
+ vmx_pages = vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+ } else {
+ printf("will skip SMM test with VMX enabled\n");
+ vcpu_args_set(vm, VCPU_ID, 1, 0);
+ }
+
+ for (stage = 1;; stage++) {
+ _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Stage %d: unexpected exit reason: %u (%s),\n",
+ stage, run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ memset(&regs, 0, sizeof(regs));
+ vcpu_regs_get(vm, VCPU_ID, &regs);
+
+ stage_reported = regs.rax & 0xff;
+
+ if (stage_reported == DONE)
+ goto done;
+
+ TEST_ASSERT(stage_reported == stage ||
+ stage_reported == SMRAM_STAGE,
+ "Unexpected stage: #%x, got %x",
+ stage, stage_reported);
+
+ state = vcpu_save_state(vm, VCPU_ID);
+ kvm_vm_release(vm);
+ kvm_vm_restart(vm, O_RDWR);
+ vm_vcpu_add(vm, VCPU_ID, 0, 0);
+ vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+ vcpu_load_state(vm, VCPU_ID, state);
+ run = vcpu_state(vm, VCPU_ID);
+ free(state);
+ }
+
+done:
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c
index 30f75856cf39..e0a3c0204b7c 100644
--- a/tools/testing/selftests/kvm/x86_64/state_test.c
+++ b/tools/testing/selftests/kvm/x86_64/state_test.c
@@ -134,11 +134,6 @@ int main(int argc, char *argv[])
struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
- if (!kvm_check_cap(KVM_CAP_IMMEDIATE_EXIT)) {
- fprintf(stderr, "immediate_exit not available, skipping test\n");
- exit(KSFT_SKIP);
- }
-
/* Create VM */
vm = vm_create_default(VCPU_ID, 0, guest_code);
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
@@ -179,18 +174,10 @@ int main(int argc, char *argv[])
uc.args[1] == stage, "Unexpected register values vmexit #%lx, got %lx",
stage, (ulong)uc.args[1]);
- /*
- * When KVM exits to userspace with KVM_EXIT_IO, KVM guarantees
- * guest state is consistent only after userspace re-enters the
- * kernel with KVM_RUN. Complete IO prior to migrating state
- * to a new VM.
- */
- vcpu_run_complete_io(vm, VCPU_ID);
-
+ state = vcpu_save_state(vm, VCPU_ID);
memset(&regs1, 0, sizeof(regs1));
vcpu_regs_get(vm, VCPU_ID, &regs1);
- state = vcpu_save_state(vm, VCPU_ID);
kvm_vm_release(vm);
/* Restore state in a new VM. */
diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
index d4cfb6a7a086..4b7e107865bf 100755
--- a/tools/testing/selftests/net/fib_rule_tests.sh
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -27,6 +27,7 @@ log_test()
nsuccess=$((nsuccess+1))
printf "\n TEST: %-50s [ OK ]\n" "${msg}"
else
+ ret=1
nfail=$((nfail+1))
printf "\n TEST: %-50s [FAIL]\n" "${msg}"
if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
@@ -147,8 +148,8 @@ fib_rule6_test()
fib_check_iproute_support "ipproto" "ipproto"
if [ $? -eq 0 ]; then
- match="ipproto icmp"
- fib_rule6_test_match_n_redirect "$match" "$match" "ipproto icmp match"
+ match="ipproto ipv6-icmp"
+ fib_rule6_test_match_n_redirect "$match" "$match" "ipproto ipv6-icmp match"
fi
}
@@ -245,4 +246,9 @@ setup
run_fibrule_tests
cleanup
+if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+fi
+
exit $ret
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 1080ff55a788..0d2a5f4f1e63 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -605,6 +605,39 @@ run_cmd()
return $rc
}
+check_expected()
+{
+ local out="$1"
+ local expected="$2"
+ local rc=0
+
+ [ "${out}" = "${expected}" ] && return 0
+
+ if [ -z "${out}" ]; then
+ if [ "$VERBOSE" = "1" ]; then
+ printf "\nNo route entry found\n"
+ printf "Expected:\n"
+ printf " ${expected}\n"
+ fi
+ return 1
+ fi
+
+ # tricky way to convert output to 1-line without ip's
+ # messy '\'; this drops all extra white space
+ out=$(echo ${out})
+ if [ "${out}" != "${expected}" ]; then
+ rc=1
+ if [ "${VERBOSE}" = "1" ]; then
+ printf " Unexpected route entry. Have:\n"
+ printf " ${out}\n"
+ printf " Expected:\n"
+ printf " ${expected}\n\n"
+ fi
+ fi
+
+ return $rc
+}
+
# add route for a prefix, flushing any existing routes first
# expected to be the first step of a test
add_route6()
@@ -652,31 +685,7 @@ check_route6()
pfx=$1
out=$($IP -6 ro ls match ${pfx} | sed -e 's/ pref medium//')
- [ "${out}" = "${expected}" ] && return 0
-
- if [ -z "${out}" ]; then
- if [ "$VERBOSE" = "1" ]; then
- printf "\nNo route entry found\n"
- printf "Expected:\n"
- printf " ${expected}\n"
- fi
- return 1
- fi
-
- # tricky way to convert output to 1-line without ip's
- # messy '\'; this drops all extra white space
- out=$(echo ${out})
- if [ "${out}" != "${expected}" ]; then
- rc=1
- if [ "${VERBOSE}" = "1" ]; then
- printf " Unexpected route entry. Have:\n"
- printf " ${out}\n"
- printf " Expected:\n"
- printf " ${expected}\n\n"
- fi
- fi
-
- return $rc
+ check_expected "${out}" "${expected}"
}
route_cleanup()
@@ -725,7 +734,7 @@ route_setup()
ip -netns ns2 addr add 172.16.103.2/24 dev veth4
ip -netns ns2 addr add 172.16.104.1/24 dev dummy1
- set +ex
+ set +e
}
# assumption is that basic add of a single path route works
@@ -960,7 +969,8 @@ ipv6_addr_metric_test()
run_cmd "$IP li set dev dummy2 down"
rc=$?
if [ $rc -eq 0 ]; then
- check_route6 ""
+ out=$($IP -6 ro ls match 2001:db8:104::/64)
+ check_expected "${out}" ""
rc=$?
fi
log_test $rc 0 "Prefix route removed on link down"
@@ -1091,38 +1101,13 @@ check_route()
local pfx
local expected="$1"
local out
- local rc=0
set -- $expected
pfx=$1
[ "${pfx}" = "unreachable" ] && pfx=$2
out=$($IP ro ls match ${pfx})
- [ "${out}" = "${expected}" ] && return 0
-
- if [ -z "${out}" ]; then
- if [ "$VERBOSE" = "1" ]; then
- printf "\nNo route entry found\n"
- printf "Expected:\n"
- printf " ${expected}\n"
- fi
- return 1
- fi
-
- # tricky way to convert output to 1-line without ip's
- # messy '\'; this drops all extra white space
- out=$(echo ${out})
- if [ "${out}" != "${expected}" ]; then
- rc=1
- if [ "${VERBOSE}" = "1" ]; then
- printf " Unexpected route entry. Have:\n"
- printf " ${out}\n"
- printf " Expected:\n"
- printf " ${expected}\n\n"
- fi
- fi
-
- return $rc
+ check_expected "${out}" "${expected}"
}
# assumption is that basic add of a single path route works
@@ -1387,7 +1372,8 @@ ipv4_addr_metric_test()
run_cmd "$IP li set dev dummy2 down"
rc=$?
if [ $rc -eq 0 ]; then
- check_route ""
+ out=$($IP ro ls match 172.16.104.0/24)
+ check_expected "${out}" ""
rc=$?
fi
log_test $rc 0 "Prefix route removed on link down"
diff --git a/tools/testing/selftests/net/run_afpackettests b/tools/testing/selftests/net/run_afpackettests
index 2dc95fda7ef7..ea5938ec009a 100755
--- a/tools/testing/selftests/net/run_afpackettests
+++ b/tools/testing/selftests/net/run_afpackettests
@@ -6,12 +6,14 @@ if [ $(id -u) != 0 ]; then
exit 0
fi
+ret=0
echo "--------------------"
echo "running psock_fanout test"
echo "--------------------"
./in_netns.sh ./psock_fanout
if [ $? -ne 0 ]; then
echo "[FAIL]"
+ ret=1
else
echo "[PASS]"
fi
@@ -22,6 +24,7 @@ echo "--------------------"
./in_netns.sh ./psock_tpacket
if [ $? -ne 0 ]; then
echo "[FAIL]"
+ ret=1
else
echo "[PASS]"
fi
@@ -32,6 +35,8 @@ echo "--------------------"
./in_netns.sh ./txring_overwrite
if [ $? -ne 0 ]; then
echo "[FAIL]"
+ ret=1
else
echo "[PASS]"
fi
+exit $ret
diff --git a/tools/testing/selftests/net/run_netsocktests b/tools/testing/selftests/net/run_netsocktests
index b093f39c298c..14e41faf2c57 100755
--- a/tools/testing/selftests/net/run_netsocktests
+++ b/tools/testing/selftests/net/run_netsocktests
@@ -7,7 +7,7 @@ echo "--------------------"
./socket
if [ $? -ne 0 ]; then
echo "[FAIL]"
+ exit 1
else
echo "[PASS]"
fi
-
diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile
index c9ff2b47bd1c..a37cb1192c6a 100644
--- a/tools/testing/selftests/netfilter/Makefile
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
# Makefile for netfilter selftests
-TEST_PROGS := nft_trans_stress.sh nft_nat.sh
+TEST_PROGS := nft_trans_stress.sh nft_nat.sh conntrack_icmp_related.sh
include ../lib.mk
diff --git a/tools/testing/selftests/netfilter/conntrack_icmp_related.sh b/tools/testing/selftests/netfilter/conntrack_icmp_related.sh
new file mode 100755
index 000000000000..b48e1833bc89
--- /dev/null
+++ b/tools/testing/selftests/netfilter/conntrack_icmp_related.sh
@@ -0,0 +1,283 @@
+#!/bin/bash
+#
+# check that ICMP df-needed/pkttoobig icmp are set are set as related
+# state
+#
+# Setup is:
+#
+# nsclient1 -> nsrouter1 -> nsrouter2 -> nsclient2
+# MTU 1500, except for nsrouter2 <-> nsclient2 link (1280).
+# ping nsclient2 from nsclient1, checking that conntrack did set RELATED
+# 'fragmentation needed' icmp packet.
+#
+# In addition, nsrouter1 will perform IP masquerading, i.e. also
+# check the icmp errors are propagated to the correct host as per
+# nat of "established" icmp-echo "connection".
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without nft tool"
+ exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+cleanup() {
+ for i in 1 2;do ip netns del nsclient$i;done
+ for i in 1 2;do ip netns del nsrouter$i;done
+}
+
+ipv4() {
+ echo -n 192.168.$1.2
+}
+
+ipv6 () {
+ echo -n dead:$1::2
+}
+
+check_counter()
+{
+ ns=$1
+ name=$2
+ expect=$3
+ local lret=0
+
+ cnt=$(ip netns exec $ns nft list counter inet filter "$name" | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ echo "ERROR: counter $name in $ns has unexpected value (expected $expect)" 1>&2
+ ip netns exec $ns nft list counter inet filter "$name" 1>&2
+ lret=1
+ fi
+
+ return $lret
+}
+
+check_unknown()
+{
+ expect="packets 0 bytes 0"
+ for n in nsclient1 nsclient2 nsrouter1 nsrouter2; do
+ check_counter $n "unknown" "$expect"
+ if [ $? -ne 0 ] ;then
+ return 1
+ fi
+ done
+
+ return 0
+}
+
+for n in nsclient1 nsclient2 nsrouter1 nsrouter2; do
+ ip netns add $n
+ ip -net $n link set lo up
+done
+
+DEV=veth0
+ip link add $DEV netns nsclient1 type veth peer name eth1 netns nsrouter1
+DEV=veth0
+ip link add $DEV netns nsclient2 type veth peer name eth1 netns nsrouter2
+
+DEV=veth0
+ip link add $DEV netns nsrouter1 type veth peer name eth2 netns nsrouter2
+
+DEV=veth0
+for i in 1 2; do
+ ip -net nsclient$i link set $DEV up
+ ip -net nsclient$i addr add $(ipv4 $i)/24 dev $DEV
+ ip -net nsclient$i addr add $(ipv6 $i)/64 dev $DEV
+done
+
+ip -net nsrouter1 link set eth1 up
+ip -net nsrouter1 link set veth0 up
+
+ip -net nsrouter2 link set eth1 up
+ip -net nsrouter2 link set eth2 up
+
+ip -net nsclient1 route add default via 192.168.1.1
+ip -net nsclient1 -6 route add default via dead:1::1
+
+ip -net nsclient2 route add default via 192.168.2.1
+ip -net nsclient2 route add default via dead:2::1
+
+i=3
+ip -net nsrouter1 addr add 192.168.1.1/24 dev eth1
+ip -net nsrouter1 addr add 192.168.3.1/24 dev veth0
+ip -net nsrouter1 addr add dead:1::1/64 dev eth1
+ip -net nsrouter1 addr add dead:3::1/64 dev veth0
+ip -net nsrouter1 route add default via 192.168.3.10
+ip -net nsrouter1 -6 route add default via dead:3::10
+
+ip -net nsrouter2 addr add 192.168.2.1/24 dev eth1
+ip -net nsrouter2 addr add 192.168.3.10/24 dev eth2
+ip -net nsrouter2 addr add dead:2::1/64 dev eth1
+ip -net nsrouter2 addr add dead:3::10/64 dev eth2
+ip -net nsrouter2 route add default via 192.168.3.1
+ip -net nsrouter2 route add default via dead:3::1
+
+sleep 2
+for i in 4 6; do
+ ip netns exec nsrouter1 sysctl -q net.ipv$i.conf.all.forwarding=1
+ ip netns exec nsrouter2 sysctl -q net.ipv$i.conf.all.forwarding=1
+done
+
+for netns in nsrouter1 nsrouter2; do
+ip netns exec $netns nft -f - <<EOF
+table inet filter {
+ counter unknown { }
+ counter related { }
+ chain forward {
+ type filter hook forward priority 0; policy accept;
+ meta l4proto icmpv6 icmpv6 type "packet-too-big" ct state "related" counter name "related" accept
+ meta l4proto icmp icmp type "destination-unreachable" ct state "related" counter name "related" accept
+ meta l4proto { icmp, icmpv6 } ct state new,established accept
+ counter name "unknown" drop
+ }
+}
+EOF
+done
+
+ip netns exec nsclient1 nft -f - <<EOF
+table inet filter {
+ counter unknown { }
+ counter related { }
+ chain input {
+ type filter hook input priority 0; policy accept;
+ meta l4proto { icmp, icmpv6 } ct state established,untracked accept
+
+ meta l4proto { icmp, icmpv6 } ct state "related" counter name "related" accept
+ counter name "unknown" drop
+ }
+}
+EOF
+
+ip netns exec nsclient2 nft -f - <<EOF
+table inet filter {
+ counter unknown { }
+ counter new { }
+ counter established { }
+
+ chain input {
+ type filter hook input priority 0; policy accept;
+ meta l4proto { icmp, icmpv6 } ct state established,untracked accept
+
+ meta l4proto { icmp, icmpv6 } ct state "new" counter name "new" accept
+ meta l4proto { icmp, icmpv6 } ct state "established" counter name "established" accept
+ counter name "unknown" drop
+ }
+ chain output {
+ type filter hook output priority 0; policy accept;
+ meta l4proto { icmp, icmpv6 } ct state established,untracked accept
+
+ meta l4proto { icmp, icmpv6 } ct state "new" counter name "new"
+ meta l4proto { icmp, icmpv6 } ct state "established" counter name "established"
+ counter name "unknown" drop
+ }
+}
+EOF
+
+
+# make sure NAT core rewrites adress of icmp error if nat is used according to
+# conntrack nat information (icmp error will be directed at nsrouter1 address,
+# but it needs to be routed to nsclient1 address).
+ip netns exec nsrouter1 nft -f - <<EOF
+table ip nat {
+ chain postrouting {
+ type nat hook postrouting priority 0; policy accept;
+ ip protocol icmp oifname "veth0" counter masquerade
+ }
+}
+table ip6 nat {
+ chain postrouting {
+ type nat hook postrouting priority 0; policy accept;
+ ip6 nexthdr icmpv6 oifname "veth0" counter masquerade
+ }
+}
+EOF
+
+ip netns exec nsrouter2 ip link set eth1 mtu 1280
+ip netns exec nsclient2 ip link set veth0 mtu 1280
+sleep 1
+
+ip netns exec nsclient1 ping -c 1 -s 1000 -q -M do 192.168.2.2 >/dev/null
+if [ $? -ne 0 ]; then
+ echo "ERROR: netns ip routing/connectivity broken" 1>&2
+ cleanup
+ exit 1
+fi
+ip netns exec nsclient1 ping6 -q -c 1 -s 1000 dead:2::2 >/dev/null
+if [ $? -ne 0 ]; then
+ echo "ERROR: netns ipv6 routing/connectivity broken" 1>&2
+ cleanup
+ exit 1
+fi
+
+check_unknown
+if [ $? -ne 0 ]; then
+ ret=1
+fi
+
+expect="packets 0 bytes 0"
+for netns in nsrouter1 nsrouter2 nsclient1;do
+ check_counter "$netns" "related" "$expect"
+ if [ $? -ne 0 ]; then
+ ret=1
+ fi
+done
+
+expect="packets 2 bytes 2076"
+check_counter nsclient2 "new" "$expect"
+if [ $? -ne 0 ]; then
+ ret=1
+fi
+
+ip netns exec nsclient1 ping -q -c 1 -s 1300 -M do 192.168.2.2 > /dev/null
+if [ $? -eq 0 ]; then
+ echo "ERROR: ping should have failed with PMTU too big error" 1>&2
+ ret=1
+fi
+
+# nsrouter2 should have generated the icmp error, so
+# related counter should be 0 (its in forward).
+expect="packets 0 bytes 0"
+check_counter "nsrouter2" "related" "$expect"
+if [ $? -ne 0 ]; then
+ ret=1
+fi
+
+# but nsrouter1 should have seen it, same for nsclient1.
+expect="packets 1 bytes 576"
+for netns in nsrouter1 nsclient1;do
+ check_counter "$netns" "related" "$expect"
+ if [ $? -ne 0 ]; then
+ ret=1
+ fi
+done
+
+ip netns exec nsclient1 ping6 -c 1 -s 1300 dead:2::2 > /dev/null
+if [ $? -eq 0 ]; then
+ echo "ERROR: ping6 should have failed with PMTU too big error" 1>&2
+ ret=1
+fi
+
+expect="packets 2 bytes 1856"
+for netns in nsrouter1 nsclient1;do
+ check_counter "$netns" "related" "$expect"
+ if [ $? -ne 0 ]; then
+ ret=1
+ fi
+done
+
+if [ $ret -eq 0 ];then
+ echo "PASS: icmp mtu error had RELATED state"
+else
+ echo "ERROR: icmp error RELATED state test has failed"
+fi
+
+cleanup
+exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh
index 8ec76681605c..3194007cf8d1 100755
--- a/tools/testing/selftests/netfilter/nft_nat.sh
+++ b/tools/testing/selftests/netfilter/nft_nat.sh
@@ -321,6 +321,7 @@ EOF
test_masquerade6()
{
+ local natflags=$1
local lret=0
ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
@@ -354,13 +355,13 @@ ip netns exec ns0 nft -f - <<EOF
table ip6 nat {
chain postrouting {
type nat hook postrouting priority 0; policy accept;
- meta oif veth0 masquerade
+ meta oif veth0 masquerade $natflags
}
}
EOF
ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
if [ $? -ne 0 ] ; then
- echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerading"
+ echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerade $natflags"
lret=1
fi
@@ -397,19 +398,26 @@ EOF
fi
done
+ ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
+ if [ $? -ne 0 ] ; then
+ echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerade $natflags (attempt 2)"
+ lret=1
+ fi
+
ip netns exec ns0 nft flush chain ip6 nat postrouting
if [ $? -ne 0 ]; then
echo "ERROR: Could not flush ip6 nat postrouting" 1>&2
lret=1
fi
- test $lret -eq 0 && echo "PASS: IPv6 masquerade for ns2"
+ test $lret -eq 0 && echo "PASS: IPv6 masquerade $natflags for ns2"
return $lret
}
test_masquerade()
{
+ local natflags=$1
local lret=0
ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
@@ -417,7 +425,7 @@ test_masquerade()
ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
if [ $? -ne 0 ] ; then
- echo "ERROR: canot ping ns1 from ns2"
+ echo "ERROR: cannot ping ns1 from ns2 $natflags"
lret=1
fi
@@ -443,13 +451,13 @@ ip netns exec ns0 nft -f - <<EOF
table ip nat {
chain postrouting {
type nat hook postrouting priority 0; policy accept;
- meta oif veth0 masquerade
+ meta oif veth0 masquerade $natflags
}
}
EOF
ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
if [ $? -ne 0 ] ; then
- echo "ERROR: cannot ping ns1 from ns2 with active ip masquerading"
+ echo "ERROR: cannot ping ns1 from ns2 with active ip masquere $natflags"
lret=1
fi
@@ -485,13 +493,19 @@ EOF
fi
done
+ ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
+ if [ $? -ne 0 ] ; then
+ echo "ERROR: cannot ping ns1 from ns2 with active ip masquerade $natflags (attempt 2)"
+ lret=1
+ fi
+
ip netns exec ns0 nft flush chain ip nat postrouting
if [ $? -ne 0 ]; then
echo "ERROR: Could not flush nat postrouting" 1>&2
lret=1
fi
- test $lret -eq 0 && echo "PASS: IP masquerade for ns2"
+ test $lret -eq 0 && echo "PASS: IP masquerade $natflags for ns2"
return $lret
}
@@ -750,8 +764,12 @@ test_local_dnat
test_local_dnat6
reset_counters
-test_masquerade
-test_masquerade6
+test_masquerade ""
+test_masquerade6 ""
+
+reset_counters
+test_masquerade "fully-random"
+test_masquerade6 "fully-random"
reset_counters
test_redirect
diff --git a/tools/testing/selftests/proc/proc-pid-vm.c b/tools/testing/selftests/proc/proc-pid-vm.c
index 7202bbac976e..853aa164a401 100644
--- a/tools/testing/selftests/proc/proc-pid-vm.c
+++ b/tools/testing/selftests/proc/proc-pid-vm.c
@@ -187,8 +187,8 @@ static int make_exe(const uint8_t *payload, size_t len)
ph.p_offset = 0;
ph.p_vaddr = VADDR;
ph.p_paddr = 0;
- ph.p_filesz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + sizeof(payload);
- ph.p_memsz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + sizeof(payload);
+ ph.p_filesz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len;
+ ph.p_memsz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len;
ph.p_align = 4096;
fd = openat(AT_FDCWD, "/tmp", O_WRONLY|O_EXCL|O_TMPFILE, 0700);
diff --git a/tools/testing/selftests/proc/proc-self-map-files-002.c b/tools/testing/selftests/proc/proc-self-map-files-002.c
index 762cb01f2ca7..47b7473dedef 100644
--- a/tools/testing/selftests/proc/proc-self-map-files-002.c
+++ b/tools/testing/selftests/proc/proc-self-map-files-002.c
@@ -46,12 +46,9 @@ static void fail(const char *fmt, unsigned long a, unsigned long b)
int main(void)
{
- const unsigned int PAGE_SIZE = sysconf(_SC_PAGESIZE);
-#ifdef __arm__
- unsigned long va = 2 * PAGE_SIZE;
-#else
- unsigned long va = 0;
-#endif
+ const int PAGE_SIZE = sysconf(_SC_PAGESIZE);
+ const unsigned long va_max = 1UL << 32;
+ unsigned long va;
void *p;
int fd;
unsigned long a, b;
@@ -60,10 +57,13 @@ int main(void)
if (fd == -1)
return 1;
- p = mmap((void *)va, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_FILE|MAP_FIXED, fd, 0);
- if (p == MAP_FAILED) {
- if (errno == EPERM)
- return 4;
+ for (va = 0; va < va_max; va += PAGE_SIZE) {
+ p = mmap((void *)va, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_FILE|MAP_FIXED, fd, 0);
+ if (p == (void *)va)
+ break;
+ }
+ if (va == va_max) {
+ fprintf(stderr, "error: mmap doesn't like you\n");
return 1;
}
diff --git a/tools/testing/selftests/rcutorture/bin/configNR_CPUS.sh b/tools/testing/selftests/rcutorture/bin/configNR_CPUS.sh
index 43540f1828cc..2deea2169fc2 100755
--- a/tools/testing/selftests/rcutorture/bin/configNR_CPUS.sh
+++ b/tools/testing/selftests/rcutorture/bin/configNR_CPUS.sh
@@ -1,4 +1,5 @@
#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
#
# Extract the number of CPUs expected from the specified Kconfig-file
# fragment by checking CONFIG_SMP and CONFIG_NR_CPUS. If the specified
@@ -7,23 +8,9 @@
#
# Usage: configNR_CPUS.sh config-frag
#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
# Copyright (C) IBM Corporation, 2013
#
-# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
cf=$1
if test ! -r $cf
diff --git a/tools/testing/selftests/rcutorture/bin/config_override.sh b/tools/testing/selftests/rcutorture/bin/config_override.sh
index ef7fcbac3d42..90016c359e83 100755
--- a/tools/testing/selftests/rcutorture/bin/config_override.sh
+++ b/tools/testing/selftests/rcutorture/bin/config_override.sh
@@ -1,4 +1,5 @@
#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
#
# config_override.sh base override
#
@@ -6,23 +7,9 @@
# that conflict with any in override, concatenating what remains and
# sending the result to standard output.
#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
# Copyright (C) IBM Corporation, 2017
#
-# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
base=$1
if test -r $base
diff --git a/tools/testing/selftests/rcutorture/bin/configcheck.sh b/tools/testing/selftests/rcutorture/bin/configcheck.sh
index 197deece7c7c..31584cee84d7 100755
--- a/tools/testing/selftests/rcutorture/bin/configcheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/configcheck.sh
@@ -1,23 +1,11 @@
#!/bin/bash
-# Usage: configcheck.sh .config .config-template
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
+# SPDX-License-Identifier: GPL-2.0+
#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
+# Usage: configcheck.sh .config .config-template
#
# Copyright (C) IBM Corporation, 2011
#
-# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
T=${TMPDIR-/tmp}/abat-chk-config.sh.$$
trap 'rm -rf $T' 0
@@ -26,6 +14,7 @@ mkdir $T
cat $1 > $T/.config
cat $2 | sed -e 's/\(.*\)=n/# \1 is not set/' -e 's/^#CHECK#//' |
+grep -v '^CONFIG_INITRAMFS_SOURCE' |
awk '
{
print "if grep -q \"" $0 "\" < '"$T/.config"'";
diff --git a/tools/testing/selftests/rcutorture/bin/configinit.sh b/tools/testing/selftests/rcutorture/bin/configinit.sh
index 65541c21a544..40359486b3a8 100755
--- a/tools/testing/selftests/rcutorture/bin/configinit.sh
+++ b/tools/testing/selftests/rcutorture/bin/configinit.sh
@@ -1,4 +1,5 @@
#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
#
# Usage: configinit.sh config-spec-file build-output-dir results-dir
#
@@ -14,23 +15,9 @@
# for example, "O=/tmp/foo". If this argument is omitted, the .config
# file will be generated directly in the current directory.
#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
# Copyright (C) IBM Corporation, 2013
#
-# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
T=${TMPDIR-/tmp}/configinit.sh.$$
trap 'rm -rf $T' 0
diff --git a/tools/testing/selftests/rcutorture/bin/cpus2use.sh b/tools/testing/selftests/rcutorture/bin/cpus2use.sh
index bb99cde3f5f9..ff7102212703 100755
--- a/tools/testing/selftests/rcutorture/bin/cpus2use.sh
+++ b/tools/testing/selftests/rcutorture/bin/cpus2use.sh
@@ -1,26 +1,13 @@
#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
#
# Get an estimate of how CPU-hoggy to be.
#
# Usage: cpus2use.sh
#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
# Copyright (C) IBM Corporation, 2013
#
-# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
ncpus=`grep '^processor' /proc/cpuinfo | wc -l`
idlecpus=`mpstat | tail -1 | \
diff --git a/tools/testing/selftests/rcutorture/bin/functions.sh b/tools/testing/selftests/rcutorture/bin/functions.sh
index 65f6655026f0..6bcb8b5b2ff2 100644
--- a/tools/testing/selftests/rcutorture/bin/functions.sh
+++ b/tools/testing/selftests/rcutorture/bin/functions.sh
@@ -1,24 +1,11 @@
#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
#
# Shell functions for the rest of the scripts.
#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
# Copyright (C) IBM Corporation, 2013
#
-# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
# bootparam_hotplug_cpu bootparam-string
#
diff --git a/tools/testing/selftests/rcutorture/bin/jitter.sh b/tools/testing/selftests/rcutorture/bin/jitter.sh
index 3633828375e3..435b60933985 100755
--- a/tools/testing/selftests/rcutorture/bin/jitter.sh
+++ b/tools/testing/selftests/rcutorture/bin/jitter.sh
@@ -1,4 +1,5 @@
#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
#
# Alternate sleeping and spinning on randomly selected CPUs. The purpose
# of this script is to inflict random OS jitter on a concurrently running
@@ -11,23 +12,9 @@
# sleepmax: Maximum microseconds to sleep, defaults to one second.
# spinmax: Maximum microseconds to spin, defaults to one millisecond.
#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
# Copyright (C) IBM Corporation, 2016
#
-# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
me=$(($1 * 1000))
duration=$2
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-build.sh b/tools/testing/selftests/rcutorture/bin/kvm-build.sh
index 9115fcdb5617..c27a0bbb9c02 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-build.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-build.sh
@@ -1,26 +1,13 @@
#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
#
# Build a kvm-ready Linux kernel from the tree in the current directory.
#
# Usage: kvm-build.sh config-template build-dir resdir
#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
# Copyright (C) IBM Corporation, 2011
#
-# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
config_template=${1}
if test -z "$config_template" -o ! -f "$config_template" -o ! -r "$config_template"
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
index 98f650c9bf54..8426fe1f15ee 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
@@ -1,4 +1,5 @@
#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0+
#
# Invoke a text editor on all console.log files for all runs with diagnostics,
# that is, on all such files having a console.log.diags counterpart.
@@ -10,6 +11,10 @@
#
# The "directory" above should end with the date/time directory, for example,
# "tools/testing/selftests/rcutorture/res/2018.02.25-14:27:27".
+#
+# Copyright (C) IBM Corporation, 2018
+#
+# Author: Paul E. McKenney <paulmck@linux.ibm.com>
rundir="${1}"
if test -z "$rundir" -o ! -d "$rundir"
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh
index 2de92f43ee8c..f3a7a5e2b89d 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh
@@ -1,26 +1,13 @@
#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
#
# Analyze a given results directory for locktorture progress.
#
# Usage: kvm-recheck-lock.sh resdir
#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
# Copyright (C) IBM Corporation, 2014
#
-# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
i="$1"
if test -d "$i" -a -r "$i"
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
index 0fa8a61ccb7b..2a7f3f4756a7 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
@@ -1,26 +1,13 @@
#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
#
# Analyze a given results directory for rcutorture progress.
#
# Usage: kvm-recheck-rcu.sh resdir
#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
# Copyright (C) IBM Corporation, 2014
#
-# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
i="$1"
if test -d "$i" -a -r "$i"
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh
index 8948f7926b21..7d3c2be66c64 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh
@@ -1,4 +1,5 @@
#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
#
# Analyze a given results directory for rcuperf performance measurements,
# looking for ftrace data. Exits with 0 if data was found, analyzed, and
@@ -7,23 +8,9 @@
#
# Usage: kvm-recheck-rcuperf-ftrace.sh resdir
#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
# Copyright (C) IBM Corporation, 2016
#
-# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
i="$1"
. functions.sh
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh
index ccebf772fa1e..db0375a57f28 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh
@@ -1,26 +1,13 @@
#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
#
# Analyze a given results directory for rcuperf performance measurements.
#
# Usage: kvm-recheck-rcuperf.sh resdir
#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
# Copyright (C) IBM Corporation, 2016
#
-# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
i="$1"
if test -d "$i" -a -r "$i"
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
index c9bab57a77eb..2adde6aaafdb 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
@@ -1,4 +1,5 @@
#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
#
# Given the results directories for previous KVM-based torture runs,
# check the build and console output for errors. Given a directory
@@ -6,23 +7,9 @@
#
# Usage: kvm-recheck.sh resdir ...
#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
# Copyright (C) IBM Corporation, 2011
#
-# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
PATH=`pwd`/tools/testing/selftests/rcutorture/bin:$PATH; export PATH
. functions.sh
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index 58ca758a5786..0eb1ec16d78a 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -1,4 +1,5 @@
#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
#
# Run a kvm-based test of the specified tree on the specified configs.
# Fully automated run and error checking, no graphics console.
@@ -20,23 +21,9 @@
#
# More sophisticated argument parsing is clearly needed.
#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
# Copyright (C) IBM Corporation, 2011
#
-# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
T=${TMPDIR-/tmp}/kvm-test-1-run.sh.$$
trap 'rm -rf $T' 0
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 19864f1cb27a..8f1e337b9b54 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -1,4 +1,5 @@
#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
#
# Run a series of tests under KVM. By default, this series is specified
# by the relevant CFLIST file, but can be overridden by the --configs
@@ -6,23 +7,9 @@
#
# Usage: kvm.sh [ options ]
#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
# Copyright (C) IBM Corporation, 2011
#
-# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
scriptname=$0
args="$*"
diff --git a/tools/testing/selftests/rcutorture/bin/mkinitrd.sh b/tools/testing/selftests/rcutorture/bin/mkinitrd.sh
index 83552bb007b4..6fa9bd1ddc09 100755
--- a/tools/testing/selftests/rcutorture/bin/mkinitrd.sh
+++ b/tools/testing/selftests/rcutorture/bin/mkinitrd.sh
@@ -1,21 +1,8 @@
#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
#
# Create an initrd directory if one does not already exist.
#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
# Copyright (C) IBM Corporation, 2013
#
# Author: Connor Shu <Connor.Shu@ibm.com>
diff --git a/tools/testing/selftests/rcutorture/bin/parse-build.sh b/tools/testing/selftests/rcutorture/bin/parse-build.sh
index 24fe5f822b28..0701b3bf6ade 100755
--- a/tools/testing/selftests/rcutorture/bin/parse-build.sh
+++ b/tools/testing/selftests/rcutorture/bin/parse-build.sh
@@ -1,4 +1,5 @@
#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
#
# Check the build output from an rcutorture run for goodness.
# The "file" is a pathname on the local system, and "title" is
@@ -8,23 +9,9 @@
#
# Usage: parse-build.sh file title
#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
# Copyright (C) IBM Corporation, 2011
#
-# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
F=$1
title=$2
diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh
index 84933f6aed77..4508373a922f 100755
--- a/tools/testing/selftests/rcutorture/bin/parse-console.sh
+++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh
@@ -1,4 +1,5 @@
#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
#
# Check the console output from an rcutorture run for oopses.
# The "file" is a pathname on the local system, and "title" is
@@ -6,23 +7,9 @@
#
# Usage: parse-console.sh file title
#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
# Copyright (C) IBM Corporation, 2011
#
-# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
T=${TMPDIR-/tmp}/parse-console.sh.$$
file="$1"
diff --git a/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh
index 80eb646e1319..d3e4b2971f92 100644
--- a/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh
+++ b/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh
@@ -1,24 +1,11 @@
#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
#
# Kernel-version-dependent shell functions for the rest of the scripts.
#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
# Copyright (C) IBM Corporation, 2014
#
-# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
# locktorture_param_onoff bootparam-string config-file
#
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh
index 7bab8246392b..effa415f9b92 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh
+++ b/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh
@@ -1,24 +1,11 @@
#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
#
# Kernel-version-dependent shell functions for the rest of the scripts.
#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
# Copyright (C) IBM Corporation, 2013
#
-# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
# rcutorture_param_n_barrier_cbs bootparam-string
#
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh
index d36b8fd6f0fc..777d5b0c190f 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh
+++ b/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh
@@ -1,24 +1,11 @@
#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
#
# Torture-suite-dependent shell functions for the rest of the scripts.
#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
# Copyright (C) IBM Corporation, 2015
#
-# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
# per_version_boot_params bootparam-string config-file seconds
#
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index f69d2ee29742..5019cdae5d0b 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -2166,11 +2166,14 @@ TEST(detect_seccomp_filter_flags)
SECCOMP_FILTER_FLAG_LOG,
SECCOMP_FILTER_FLAG_SPEC_ALLOW,
SECCOMP_FILTER_FLAG_NEW_LISTENER };
- unsigned int flag, all_flags;
+ unsigned int exclusive[] = {
+ SECCOMP_FILTER_FLAG_TSYNC,
+ SECCOMP_FILTER_FLAG_NEW_LISTENER };
+ unsigned int flag, all_flags, exclusive_mask;
int i;
long ret;
- /* Test detection of known-good filter flags */
+ /* Test detection of individual known-good filter flags */
for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) {
int bits = 0;
@@ -2197,16 +2200,29 @@ TEST(detect_seccomp_filter_flags)
all_flags |= flag;
}
- /* Test detection of all known-good filter flags */
- ret = seccomp(SECCOMP_SET_MODE_FILTER, all_flags, NULL);
- EXPECT_EQ(-1, ret);
- EXPECT_EQ(EFAULT, errno) {
- TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!",
- all_flags);
+ /*
+ * Test detection of all known-good filter flags combined. But
+ * for the exclusive flags we need to mask them out and try them
+ * individually for the "all flags" testing.
+ */
+ exclusive_mask = 0;
+ for (i = 0; i < ARRAY_SIZE(exclusive); i++)
+ exclusive_mask |= exclusive[i];
+ for (i = 0; i < ARRAY_SIZE(exclusive); i++) {
+ flag = all_flags & ~exclusive_mask;
+ flag |= exclusive[i];
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EFAULT, errno) {
+ TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!",
+ flag);
+ }
}
- /* Test detection of an unknown filter flag */
+ /* Test detection of an unknown filter flags, without exclusives. */
flag = -1;
+ flag &= ~exclusive_mask;
ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
EXPECT_EQ(-1, ret);
EXPECT_EQ(EINVAL, errno) {
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 3417f2dbc366..7fc272ecae16 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -508,6 +508,14 @@ static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu)
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
/*
+ * Update the timer output so that it is likely to match the
+ * state we're about to restore. If the timer expires between
+ * this point and the register restoration, we'll take the
+ * interrupt anyway.
+ */
+ kvm_timer_update_irq(vcpu, kvm_timer_should_fire(vtimer), vtimer);
+
+ /*
* When using a userspace irqchip with the architected timers and a
* host interrupt controller that doesn't support an active state, we
* must still prevent continuously exiting from the guest, and
@@ -730,7 +738,6 @@ static void kvm_timer_init_interrupt(void *info)
int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
{
struct arch_timer_context *timer;
- bool level;
switch (regid) {
case KVM_REG_ARM_TIMER_CTL:
@@ -758,10 +765,6 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
return -1;
}
- level = kvm_timer_should_fire(timer);
- kvm_timer_update_irq(vcpu, level, timer);
- timer_emulate(timer);
-
return 0;
}
@@ -812,7 +815,7 @@ static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
switch (treg) {
case TIMER_REG_TVAL:
- val = kvm_phys_timer_read() - timer->cntvoff - timer->cnt_cval;
+ val = timer->cnt_cval - kvm_phys_timer_read() + timer->cntvoff;
break;
case TIMER_REG_CTL:
@@ -858,7 +861,7 @@ static void kvm_arm_timer_write(struct kvm_vcpu *vcpu,
{
switch (treg) {
case TIMER_REG_TVAL:
- timer->cnt_cval = val - kvm_phys_timer_read() - timer->cntvoff;
+ timer->cnt_cval = kvm_phys_timer_read() - timer->cntvoff + val;
break;
case TIMER_REG_CTL:
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 99c37384ba7b..f412ebc90610 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -934,7 +934,7 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
const struct kvm_vcpu_init *init)
{
- unsigned int i;
+ unsigned int i, ret;
int phys_target = kvm_target_cpu();
if (init->target != phys_target)
@@ -969,9 +969,14 @@ static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
vcpu->arch.target = phys_target;
/* Now we know what it is, we can reset it. */
- return kvm_reset_vcpu(vcpu);
-}
+ ret = kvm_reset_vcpu(vcpu);
+ if (ret) {
+ vcpu->arch.target = -1;
+ bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
+ }
+ return ret;
+}
static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
struct kvm_vcpu_init *init)
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 27c958306449..a39dcfdbcc65 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -1781,8 +1781,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
* Only PMD_SIZE transparent hugepages(THP) are
* currently supported. This code will need to be
* updated to support other THP sizes.
+ *
+ * Make sure the host VA and the guest IPA are sufficiently
+ * aligned and that the block is contained within the memslot.
*/
- if (transparent_hugepage_adjust(&pfn, &fault_ipa))
+ if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE) &&
+ transparent_hugepage_adjust(&pfn, &fault_ipa))
vma_pagesize = PMD_SIZE;
}
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
index 4a12322bf7df..9f4843fe9cda 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
@@ -200,6 +200,9 @@ static void vgic_mmio_write_v3r_ctlr(struct kvm_vcpu *vcpu,
vgic_cpu->lpis_enabled = val & GICR_CTLR_ENABLE_LPIS;
+ if (was_enabled && !vgic_cpu->lpis_enabled)
+ vgic_flush_pending_lpis(vcpu);
+
if (!was_enabled && vgic_cpu->lpis_enabled)
vgic_enable_lpis(vcpu);
}
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
index 3af69f2a3866..191deccf60bf 100644
--- a/virt/kvm/arm/vgic/vgic.c
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -151,6 +151,27 @@ void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
kfree(irq);
}
+void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu)
+{
+ struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+ struct vgic_irq *irq, *tmp;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags);
+
+ list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) {
+ if (irq->intid >= VGIC_MIN_LPI) {
+ raw_spin_lock(&irq->irq_lock);
+ list_del(&irq->ap_list);
+ irq->vcpu = NULL;
+ raw_spin_unlock(&irq->irq_lock);
+ vgic_put_irq(vcpu->kvm, irq);
+ }
+ }
+
+ raw_spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags);
+}
+
void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending)
{
WARN_ON(irq_set_irqchip_state(irq->host_irq,
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index a90024718ca4..abeeffabc456 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -238,6 +238,7 @@ void vgic_v3_put(struct kvm_vcpu *vcpu);
bool vgic_has_its(struct kvm *kvm);
int kvm_vgic_register_its_device(void);
void vgic_enable_lpis(struct kvm_vcpu *vcpu);
+void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu);
int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi);
int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr);
int vgic_v3_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 3547b0d8c91e..79e59e4fa3dc 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -144,18 +144,19 @@ static int setup_routing_entry(struct kvm *kvm,
{
struct kvm_kernel_irq_routing_entry *ei;
int r;
+ u32 gsi = array_index_nospec(ue->gsi, KVM_MAX_IRQ_ROUTES);
/*
* Do not allow GSI to be mapped to the same irqchip more than once.
* Allow only one to one mapping between GSI and non-irqchip routing.
*/
- hlist_for_each_entry(ei, &rt->map[ue->gsi], link)
+ hlist_for_each_entry(ei, &rt->map[gsi], link)
if (ei->type != KVM_IRQ_ROUTING_IRQCHIP ||
ue->type != KVM_IRQ_ROUTING_IRQCHIP ||
ue->u.irqchip.irqchip == ei->irqchip.irqchip)
return -EINVAL;
- e->gsi = ue->gsi;
+ e->gsi = gsi;
e->type = ue->type;
r = kvm_set_routing_entry(kvm, e, ue);
if (r)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 55fe8e20d8fd..a704d1f9bd96 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1240,7 +1240,7 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm,
if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS)
return -EINVAL;
- if ((log->first_page & 63) || (log->num_pages & 63))
+ if (log->first_page & 63)
return -EINVAL;
slots = __kvm_memslots(kvm, as_id);
@@ -1253,8 +1253,9 @@ int kvm_clear_dirty_log_protect(struct kvm *kvm,
n = kvm_dirty_bitmap_bytes(memslot);
if (log->first_page > memslot->npages ||
- log->num_pages > memslot->npages - log->first_page)
- return -EINVAL;
+ log->num_pages > memslot->npages - log->first_page ||
+ (log->num_pages < memslot->npages - log->first_page && (log->num_pages & 63)))
+ return -EINVAL;
*flush = false;
dirty_bitmap_buffer = kvm_second_dirty_bitmap(memslot);
@@ -2977,12 +2978,14 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
struct kvm_device_ops *ops = NULL;
struct kvm_device *dev;
bool test = cd->flags & KVM_CREATE_DEVICE_TEST;
+ int type;
int ret;
if (cd->type >= ARRAY_SIZE(kvm_device_ops_table))
return -ENODEV;
- ops = kvm_device_ops_table[cd->type];
+ type = array_index_nospec(cd->type, ARRAY_SIZE(kvm_device_ops_table));
+ ops = kvm_device_ops_table[type];
if (ops == NULL)
return -ENODEV;
@@ -2997,7 +3000,7 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
dev->kvm = kvm;
mutex_lock(&kvm->lock);
- ret = ops->create(dev, cd->type);
+ ret = ops->create(dev, type);
if (ret < 0) {
mutex_unlock(&kvm->lock);
kfree(dev);