summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.get_maintainer.ignore2
-rw-r--r--Documentation/admin-guide/sysctl/net.rst2
-rw-r--r--Documentation/atomic_bitops.txt10
-rw-r--r--MAINTAINERS4
-rw-r--r--arch/parisc/Kconfig21
-rw-r--r--arch/parisc/include/asm/bitops.h8
-rw-r--r--arch/parisc/kernel/head.S43
-rw-r--r--arch/parisc/kernel/unaligned.c2
-rw-r--r--arch/x86/include/asm/bitops.h21
-rw-r--r--block/blk-mq.c5
-rw-r--r--drivers/block/loop.c5
-rw-r--r--drivers/block/zram/zram_drv.c42
-rw-r--r--drivers/block/zram/zram_drv.h1
-rw-r--r--drivers/firmware/dmi_scan.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c78
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc21.c22
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c6
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c5
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h8
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c1
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_7_0_offset.h2
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_7_0_sh_mask.h13
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h31
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h2
-rw-r--r--drivers/gpu/drm/drm_gem.c17
-rw-r--r--drivers/gpu/drm/drm_internal.h4
-rw-r--r--drivers/gpu/drm/drm_prime.c20
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bo.c9
-rw-r--r--drivers/gpu/drm/radeon/radeon_device.c3
-rw-r--r--drivers/gpu/drm/vc4/Kconfig1
-rw-r--r--drivers/gpu/drm/vc4/vc4_hdmi.c17
-rw-r--r--drivers/md/md.c4
-rw-r--r--drivers/md/raid10.c13
-rw-r--r--drivers/mmc/host/Kconfig1
-rw-r--r--drivers/net/bonding/bond_3ad.c41
-rw-r--r--drivers/net/bonding/bond_main.c2
-rw-r--r--drivers/net/dsa/microchip/ksz_common.c53
-rw-r--r--drivers/net/dsa/microchip/ksz_common.h1
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c5
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.h1
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c1
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c2
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c10
-rw-r--r--drivers/net/ethernet/freescale/dpaa/dpaa_eth.c6
-rw-r--r--drivers/net/ethernet/freescale/fec.h10
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c42
-rw-r--r--drivers/net/ethernet/freescale/fec_ptp.c29
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_ethtool.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice.h36
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.c4
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c25
-rw-r--r--drivers/net/ethernet/intel/ice/ice_xsk.c18
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c59
-rw-r--r--drivers/net/ethernet/lantiq_xrx200.c9
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.c22
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_fs.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c57
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sriov.c2
-rw-r--r--drivers/net/ethernet/moxa/moxart_ether.c11
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_lif.c95
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_main.c4
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c8
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c9
-rw-r--r--drivers/net/ipa/ipa_mem.c2
-rw-r--r--drivers/net/ipvlan/ipvtap.c4
-rw-r--r--drivers/net/macsec.c11
-rw-r--r--drivers/net/phy/phy_device.c8
-rw-r--r--drivers/net/usb/r8152.c27
-rw-r--r--drivers/nfc/pn533/uart.c1
-rw-r--r--drivers/parisc/ccio-dma.c13
-rw-r--r--drivers/parisc/led.c2
-rw-r--r--drivers/scsi/megaraid/megaraid_sas_base.c8
-rw-r--r--drivers/scsi/megaraid/megaraid_sas_fusion.c1
-rw-r--r--drivers/scsi/qla2xxx/qla_target.c10
-rw-r--r--drivers/scsi/scsi_lib.c47
-rw-r--r--drivers/scsi/sd.c84
-rw-r--r--drivers/scsi/sd.h5
-rw-r--r--drivers/scsi/storvsc_drv.c2
-rw-r--r--drivers/ufs/core/ufshcd.c9
-rw-r--r--drivers/ufs/host/ufs-exynos.c2
-rw-r--r--fs/inode.c14
-rw-r--r--fs/locks.c1
-rw-r--r--fs/namespace.c7
-rw-r--r--fs/nfs/dir.c7
-rw-r--r--fs/nfs/file.c15
-rw-r--r--fs/nfs/inode.c1
-rw-r--r--fs/nfs/nfs4file.c6
-rw-r--r--fs/nfs/pnfs.c1
-rw-r--r--fs/nfs/write.c6
-rw-r--r--fs/ntfs3/xattr.c16
-rw-r--r--fs/overlayfs/inode.c11
-rw-r--r--fs/posix_acl.c15
-rw-r--r--fs/proc/task_mmu.c7
-rw-r--r--fs/userfaultfd.c4
-rw-r--r--include/asm-generic/bitops/generic-non-atomic.h14
-rw-r--r--include/asm-generic/bitops/instrumented-non-atomic.h12
-rw-r--r--include/asm-generic/bitops/non-atomic.h1
-rw-r--r--include/asm-generic/bitops/non-instrumented-non-atomic.h1
-rw-r--r--include/linux/bitops.h1
-rw-r--r--include/linux/buffer_head.h2
-rw-r--r--include/linux/cgroup.h5
-rw-r--r--include/linux/mlx5/driver.h1
-rw-r--r--include/linux/mm.h1
-rw-r--r--include/linux/netdevice.h20
-rw-r--r--include/linux/netfilter_bridge/ebtables.h4
-rw-r--r--include/linux/nfs_fs.h2
-rw-r--r--include/linux/psi.h2
-rw-r--r--include/linux/shmem_fs.h13
-rw-r--r--include/linux/userfaultfd_k.h2
-rw-r--r--include/linux/vm_event_item.h15
-rw-r--r--include/linux/wait_bit.h8
-rw-r--r--include/net/bond_3ad.h2
-rw-r--r--include/net/busy_poll.h2
-rw-r--r--include/net/gro.h2
-rw-r--r--include/net/netfilter/nf_flow_table.h3
-rw-r--r--include/net/netfilter/nf_tables.h1
-rw-r--r--include/uapi/linux/io_uring.h8
-rw-r--r--include/uapi/linux/xfrm.h2
-rw-r--r--include/ufs/ufshci.h6
-rw-r--r--io_uring/cancel.c2
-rw-r--r--io_uring/io_uring.c7
-rw-r--r--io_uring/net.c56
-rw-r--r--io_uring/net.h1
-rw-r--r--io_uring/notif.c8
-rw-r--r--io_uring/opdef.c4
-rw-r--r--io_uring/opdef.h2
-rw-r--r--io_uring/uring_cmd.c2
-rw-r--r--kernel/audit_fsnotify.c1
-rw-r--r--kernel/cgroup/cgroup-v1.c2
-rw-r--r--kernel/cgroup/cgroup.c80
-rw-r--r--kernel/cgroup/cpuset.c3
-rw-r--r--kernel/kprobes.c9
-rw-r--r--kernel/module/main.c2
-rw-r--r--kernel/sched/psi.c10
-rw-r--r--kernel/sched/wait_bit.c2
-rw-r--r--kernel/sys_ni.c1
-rw-r--r--kernel/trace/ftrace.c79
-rw-r--r--lib/ratelimit.c12
-rw-r--r--mm/gup.c68
-rw-r--r--mm/huge_memory.c64
-rw-r--r--mm/hugetlb.c26
-rw-r--r--mm/mmap.c7
-rw-r--r--mm/shmem.c59
-rw-r--r--mm/userfaultfd.c29
-rw-r--r--mm/vmstat.c9
-rw-r--r--net/bridge/netfilter/ebtable_broute.c8
-rw-r--r--net/bridge/netfilter/ebtable_filter.c8
-rw-r--r--net/bridge/netfilter/ebtable_nat.c8
-rw-r--r--net/bridge/netfilter/ebtables.c8
-rw-r--r--net/core/bpf_sk_storage.c5
-rw-r--r--net/core/dev.c20
-rw-r--r--net/core/filter.c13
-rw-r--r--net/core/gro_cells.c2
-rw-r--r--net/core/neighbour.c12
-rw-r--r--net/core/skbuff.c7
-rw-r--r--net/core/sock.c18
-rw-r--r--net/core/sysctl_net_core.c15
-rw-r--r--net/dsa/slave.c2
-rw-r--r--net/ipv4/devinet.c16
-rw-r--r--net/ipv4/ip_output.c2
-rw-r--r--net/ipv4/ip_sockglue.c6
-rw-r--r--net/ipv4/tcp.c4
-rw-r--r--net/ipv4/tcp_output.c2
-rw-r--r--net/ipv6/addrconf.c5
-rw-r--r--net/ipv6/ipv6_sockglue.c4
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c1
-rw-r--r--net/key/af_key.c3
-rw-r--r--net/mptcp/protocol.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c4
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c31
-rw-r--r--net/netfilter/nf_flow_table_core.c15
-rw-r--r--net/netfilter/nf_flow_table_offload.c8
-rw-r--r--net/netfilter/nf_tables_api.c14
-rw-r--r--net/netfilter/nft_osf.c18
-rw-r--r--net/netfilter/nft_payload.c29
-rw-r--r--net/netfilter/nft_tproxy.c8
-rw-r--r--net/netfilter/nft_tunnel.c1
-rw-r--r--net/rose/rose_loopback.c3
-rw-r--r--net/rxrpc/call_object.c4
-rw-r--r--net/rxrpc/sendmsg.c92
-rw-r--r--net/sched/sch_generic.c2
-rw-r--r--net/socket.c2
-rw-r--r--net/sunrpc/clnt.c2
-rw-r--r--net/xfrm/espintcp.c2
-rw-r--r--net/xfrm/xfrm_input.c3
-rw-r--r--net/xfrm/xfrm_output.c1
-rw-r--r--net/xfrm/xfrm_policy.c3
-rw-r--r--net/xfrm/xfrm_state.c1
-rw-r--r--tools/include/linux/compiler_types.h4
-rw-r--r--tools/testing/selftests/Makefile1
-rw-r--r--tools/testing/selftests/drivers/net/bonding/Makefile6
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh81
-rw-r--r--tools/testing/selftests/drivers/net/bonding/config1
-rw-r--r--tools/testing/selftests/drivers/net/bonding/settings1
-rw-r--r--tools/testing/selftests/lib.mk1
-rw-r--r--tools/testing/selftests/sgx/sigstruct.c6
211 files changed, 1756 insertions, 871 deletions
diff --git a/.get_maintainer.ignore b/.get_maintainer.ignore
index a64d21913745..c298bab3d320 100644
--- a/.get_maintainer.ignore
+++ b/.get_maintainer.ignore
@@ -1,2 +1,4 @@
+Alan Cox <alan@lxorguk.ukuu.org.uk>
+Alan Cox <root@hraefn.swansea.linux.org.uk>
Christoph Hellwig <hch@lst.de>
Marc Gonzalez <marc.w.gonzalez@free.fr>
diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst
index 805f2281e000..60d44165fba7 100644
--- a/Documentation/admin-guide/sysctl/net.rst
+++ b/Documentation/admin-guide/sysctl/net.rst
@@ -271,7 +271,7 @@ poll cycle or the number of packets processed reaches netdev_budget.
netdev_max_backlog
------------------
-Maximum number of packets, queued on the INPUT side, when the interface
+Maximum number of packets, queued on the INPUT side, when the interface
receives packets faster than kernel can process them.
netdev_rss_key
diff --git a/Documentation/atomic_bitops.txt b/Documentation/atomic_bitops.txt
index d8b101c97031..edea4656c5c0 100644
--- a/Documentation/atomic_bitops.txt
+++ b/Documentation/atomic_bitops.txt
@@ -58,13 +58,11 @@ Like with atomic_t, the rule of thumb is:
- RMW operations that have a return value are fully ordered.
- - RMW operations that are conditional are unordered on FAILURE,
- otherwise the above rules apply. In the case of test_and_set_bit_lock(),
- if the bit in memory is unchanged by the operation then it is deemed to have
- failed.
+ - RMW operations that are conditional are fully ordered.
-Except for a successful test_and_set_bit_lock() which has ACQUIRE semantics and
-clear_bit_unlock() which has RELEASE semantics.
+Except for a successful test_and_set_bit_lock() which has ACQUIRE semantics,
+clear_bit_unlock() which has RELEASE semantics and test_bit_acquire which has
+ACQUIRE semantics.
Since a platform only has a single means of achieving atomic operations
the same barriers as for atomic_t are used, see atomic_t.txt.
diff --git a/MAINTAINERS b/MAINTAINERS
index 9d7f64dc0efe..07403024f3f2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3679,6 +3679,7 @@ F: Documentation/networking/bonding.rst
F: drivers/net/bonding/
F: include/net/bond*
F: include/uapi/linux/if_bonding.h
+F: tools/testing/selftests/drivers/net/bonding/
BOSCH SENSORTEC BMA400 ACCELEROMETER IIO DRIVER
M: Dan Robertson <dan@dlrobertson.com>
@@ -9781,7 +9782,7 @@ M: Christian Brauner <brauner@kernel.org>
M: Seth Forshee <sforshee@kernel.org>
L: linux-fsdevel@vger.kernel.org
S: Maintained
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux.git
+T: git://git.kernel.org/pub/scm/linux/kernel/git/vfs/idmapping.git
F: Documentation/filesystems/idmappings.rst
F: tools/testing/selftests/mount_setattr/
F: include/linux/mnt_idmapping.h
@@ -10658,6 +10659,7 @@ T: git git://git.kernel.dk/linux-block
T: git git://git.kernel.dk/liburing
F: io_uring/
F: include/linux/io_uring.h
+F: include/linux/io_uring_types.h
F: include/uapi/linux/io_uring.h
F: tools/io_uring/
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 7f059cd1196a..9aede2447011 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -146,10 +146,10 @@ menu "Processor type and features"
choice
prompt "Processor type"
- default PA7000
+ default PA7000 if "$(ARCH)" = "parisc"
config PA7000
- bool "PA7000/PA7100"
+ bool "PA7000/PA7100" if "$(ARCH)" = "parisc"
help
This is the processor type of your CPU. This information is
used for optimizing purposes. In order to compile a kernel
@@ -160,21 +160,21 @@ config PA7000
which is required on some machines.
config PA7100LC
- bool "PA7100LC"
+ bool "PA7100LC" if "$(ARCH)" = "parisc"
help
Select this option for the PCX-L processor, as used in the
712, 715/64, 715/80, 715/100, 715/100XC, 725/100, 743, 748,
D200, D210, D300, D310 and E-class
config PA7200
- bool "PA7200"
+ bool "PA7200" if "$(ARCH)" = "parisc"
help
Select this option for the PCX-T' processor, as used in the
C100, C110, J100, J110, J210XC, D250, D260, D350, D360,
K100, K200, K210, K220, K400, K410 and K420
config PA7300LC
- bool "PA7300LC"
+ bool "PA7300LC" if "$(ARCH)" = "parisc"
help
Select this option for the PCX-L2 processor, as used in the
744, A180, B132L, B160L, B180L, C132L, C160L, C180L,
@@ -224,17 +224,8 @@ config MLONGCALLS
Enabling this option will probably slow down your kernel.
config 64BIT
- bool "64-bit kernel"
+ def_bool "$(ARCH)" = "parisc64"
depends on PA8X00
- help
- Enable this if you want to support 64bit kernel on PA-RISC platform.
-
- At the moment, only people willing to use more than 2GB of RAM,
- or having a 64bit-only capable PA-RISC machine should say Y here.
-
- Since there is no 64bit userland on PA-RISC, there is no point to
- enable this option otherwise. The 64bit kernel is significantly bigger
- and slower than the 32bit one.
choice
prompt "Kernel page size"
diff --git a/arch/parisc/include/asm/bitops.h b/arch/parisc/include/asm/bitops.h
index 56ffd260c669..0ec9cfc5131f 100644
--- a/arch/parisc/include/asm/bitops.h
+++ b/arch/parisc/include/asm/bitops.h
@@ -12,14 +12,6 @@
#include <asm/barrier.h>
#include <linux/atomic.h>
-/* compiler build environment sanity checks: */
-#if !defined(CONFIG_64BIT) && defined(__LP64__)
-#error "Please use 'ARCH=parisc' to build the 32-bit kernel."
-#endif
-#if defined(CONFIG_64BIT) && !defined(__LP64__)
-#error "Please use 'ARCH=parisc64' to build the 64-bit kernel."
-#endif
-
/* See http://marc.theaimsgroup.com/?t=108826637900003 for discussion
* on use of volatile and __*_bit() (set/clear/change):
* *_bit() want use of volatile.
diff --git a/arch/parisc/kernel/head.S b/arch/parisc/kernel/head.S
index e0a9e9657622..fd15fd4bbb61 100644
--- a/arch/parisc/kernel/head.S
+++ b/arch/parisc/kernel/head.S
@@ -22,7 +22,7 @@
#include <linux/init.h>
#include <linux/pgtable.h>
- .level PA_ASM_LEVEL
+ .level 1.1
__INITDATA
ENTRY(boot_args)
@@ -70,6 +70,47 @@ $bss_loop:
stw,ma %arg2,4(%r1)
stw,ma %arg3,4(%r1)
+#if !defined(CONFIG_64BIT) && defined(CONFIG_PA20)
+ /* This 32-bit kernel was compiled for PA2.0 CPUs. Check current CPU
+ * and halt kernel if we detect a PA1.x CPU. */
+ ldi 32,%r10
+ mtctl %r10,%cr11
+ .level 2.0
+ mfctl,w %cr11,%r10
+ .level 1.1
+ comib,<>,n 0,%r10,$cpu_ok
+
+ load32 PA(msg1),%arg0
+ ldi msg1_end-msg1,%arg1
+$iodc_panic:
+ copy %arg0, %r10
+ copy %arg1, %r11
+ load32 PA(init_stack),%sp
+#define MEM_CONS 0x3A0
+ ldw MEM_CONS+32(%r0),%arg0 // HPA
+ ldi ENTRY_IO_COUT,%arg1
+ ldw MEM_CONS+36(%r0),%arg2 // SPA
+ ldw MEM_CONS+8(%r0),%arg3 // layers
+ load32 PA(__bss_start),%r1
+ stw %r1,-52(%sp) // arg4
+ stw %r0,-56(%sp) // arg5
+ stw %r10,-60(%sp) // arg6 = ptr to text
+ stw %r11,-64(%sp) // arg7 = len
+ stw %r0,-68(%sp) // arg8
+ load32 PA(.iodc_panic_ret), %rp
+ ldw MEM_CONS+40(%r0),%r1 // ENTRY_IODC
+ bv,n (%r1)
+.iodc_panic_ret:
+ b . /* wait endless with ... */
+ or %r10,%r10,%r10 /* qemu idle sleep */
+msg1: .ascii "Can't boot kernel which was built for PA8x00 CPUs on this machine.\r\n"
+msg1_end:
+
+$cpu_ok:
+#endif
+
+ .level PA_ASM_LEVEL
+
/* Initialize startup VM. Just map first 16/32 MB of memory */
load32 PA(swapper_pg_dir),%r4
mtctl %r4,%cr24 /* Initialize kernel root pointer */
diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
index bac581b5ecfc..e8a4d77cff53 100644
--- a/arch/parisc/kernel/unaligned.c
+++ b/arch/parisc/kernel/unaligned.c
@@ -93,7 +93,7 @@
#define R1(i) (((i)>>21)&0x1f)
#define R2(i) (((i)>>16)&0x1f)
#define R3(i) ((i)&0x1f)
-#define FR3(i) ((((i)<<1)&0x1f)|(((i)>>6)&1))
+#define FR3(i) ((((i)&0x1f)<<1)|(((i)>>6)&1))
#define IM(i,n) (((i)>>1&((1<<(n-1))-1))|((i)&1?((0-1L)<<(n-1)):0))
#define IM5_2(i) IM((i)>>16,5)
#define IM5_3(i) IM((i),5)
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 973c6bd17f98..0fe9de58af31 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -207,6 +207,20 @@ static __always_inline bool constant_test_bit(long nr, const volatile unsigned l
(addr[nr >> _BITOPS_LONG_SHIFT])) != 0;
}
+static __always_inline bool constant_test_bit_acquire(long nr, const volatile unsigned long *addr)
+{
+ bool oldbit;
+
+ asm volatile("testb %2,%1"
+ CC_SET(nz)
+ : CC_OUT(nz) (oldbit)
+ : "m" (((unsigned char *)addr)[nr >> 3]),
+ "i" (1 << (nr & 7))
+ :"memory");
+
+ return oldbit;
+}
+
static __always_inline bool variable_test_bit(long nr, volatile const unsigned long *addr)
{
bool oldbit;
@@ -226,6 +240,13 @@ arch_test_bit(unsigned long nr, const volatile unsigned long *addr)
variable_test_bit(nr, addr);
}
+static __always_inline bool
+arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr)
+{
+ return __builtin_constant_p(nr) ? constant_test_bit_acquire(nr, addr) :
+ variable_test_bit(nr, addr);
+}
+
/**
* __ffs - find first set bit in word
* @word: The word to search
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 3c1e6b6d991d..c96c8c4f751b 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1931,7 +1931,8 @@ out:
/* If we didn't flush the entire list, we could have told the driver
* there was more coming, but that turned out to be a lie.
*/
- if ((!list_empty(list) || errors) && q->mq_ops->commit_rqs && queued)
+ if ((!list_empty(list) || errors || needs_resource ||
+ ret == BLK_STS_DEV_RESOURCE) && q->mq_ops->commit_rqs && queued)
q->mq_ops->commit_rqs(hctx);
/*
* Any items that need requeuing? Stuff them into hctx->dispatch,
@@ -2660,6 +2661,7 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
list_del_init(&rq->queuelist);
ret = blk_mq_request_issue_directly(rq, list_empty(list));
if (ret != BLK_STS_OK) {
+ errors++;
if (ret == BLK_STS_RESOURCE ||
ret == BLK_STS_DEV_RESOURCE) {
blk_mq_request_bypass_insert(rq, false,
@@ -2667,7 +2669,6 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
break;
}
blk_mq_end_request(rq, ret);
- errors++;
} else
queued++;
}
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index e3c0ba93c1a3..ad92192c7d61 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -979,6 +979,11 @@ loop_set_status_from_info(struct loop_device *lo,
lo->lo_offset = info->lo_offset;
lo->lo_sizelimit = info->lo_sizelimit;
+
+ /* loff_t vars have been assigned __u64 */
+ if (lo->lo_offset < 0 || lo->lo_sizelimit < 0)
+ return -EOVERFLOW;
+
memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
lo->lo_file_name[LO_NAME_SIZE-1] = 0;
lo->lo_flags = info->lo_flags;
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 92cb929a45b7..226ea76cc819 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1146,14 +1146,15 @@ static ssize_t bd_stat_show(struct device *dev,
static ssize_t debug_stat_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- int version = 2;
+ int version = 1;
struct zram *zram = dev_to_zram(dev);
ssize_t ret;
down_read(&zram->init_lock);
ret = scnprintf(buf, PAGE_SIZE,
- "version: %d\n%8llu\n",
+ "version: %d\n%8llu %8llu\n",
version,
+ (u64)atomic64_read(&zram->stats.writestall),
(u64)atomic64_read(&zram->stats.miss_free));
up_read(&zram->init_lock);
@@ -1351,7 +1352,7 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
{
int ret = 0;
unsigned long alloced_pages;
- unsigned long handle = 0;
+ unsigned long handle = -ENOMEM;
unsigned int comp_len = 0;
void *src, *dst, *mem;
struct zcomp_strm *zstrm;
@@ -1369,6 +1370,7 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
}
kunmap_atomic(mem);
+compress_again:
zstrm = zcomp_stream_get(zram->comp);
src = kmap_atomic(page);
ret = zcomp_compress(zstrm, src, &comp_len);
@@ -1377,20 +1379,39 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
if (unlikely(ret)) {
zcomp_stream_put(zram->comp);
pr_err("Compression failed! err=%d\n", ret);
+ zs_free(zram->mem_pool, handle);
return ret;
}
if (comp_len >= huge_class_size)
comp_len = PAGE_SIZE;
-
- handle = zs_malloc(zram->mem_pool, comp_len,
- __GFP_KSWAPD_RECLAIM |
- __GFP_NOWARN |
- __GFP_HIGHMEM |
- __GFP_MOVABLE);
-
+ /*
+ * handle allocation has 2 paths:
+ * a) fast path is executed with preemption disabled (for
+ * per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear,
+ * since we can't sleep;
+ * b) slow path enables preemption and attempts to allocate
+ * the page with __GFP_DIRECT_RECLAIM bit set. we have to
+ * put per-cpu compression stream and, thus, to re-do
+ * the compression once handle is allocated.
+ *
+ * if we have a 'non-null' handle here then we are coming
+ * from the slow path and handle has already been allocated.
+ */
+ if (IS_ERR((void *)handle))
+ handle = zs_malloc(zram->mem_pool, comp_len,
+ __GFP_KSWAPD_RECLAIM |
+ __GFP_NOWARN |
+ __GFP_HIGHMEM |
+ __GFP_MOVABLE);
if (IS_ERR((void *)handle)) {
zcomp_stream_put(zram->comp);
+ atomic64_inc(&zram->stats.writestall);
+ handle = zs_malloc(zram->mem_pool, comp_len,
+ GFP_NOIO | __GFP_HIGHMEM |
+ __GFP_MOVABLE);
+ if (!IS_ERR((void *)handle))
+ goto compress_again;
return PTR_ERR((void *)handle);
}
@@ -1948,6 +1969,7 @@ static int zram_add(void)
if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE)
blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX);
+ blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue);
ret = device_add_disk(NULL, zram->disk, zram_disk_groups);
if (ret)
goto out_cleanup_disk;
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index 158c91e54850..80c3b43b4828 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -81,6 +81,7 @@ struct zram_stats {
atomic64_t huge_pages_since; /* no. of huge pages since zram set up */
atomic64_t pages_stored; /* no. of pages currently stored */
atomic_long_t max_used_pages; /* no. of maximum pages stored */
+ atomic64_t writestall; /* no. of write slow paths */
atomic64_t miss_free; /* no. of missed free */
#ifdef CONFIG_ZRAM_WRITEBACK
atomic64_t bd_count; /* no. of pages in backing device */
diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index f191a1f901ac..0eb6b617f709 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -630,7 +630,7 @@ static int __init dmi_smbios3_present(const u8 *buf)
{
if (memcmp(buf, "_SM3_", 5) == 0 &&
buf[6] < 32 && dmi_checksum(buf, buf[6])) {
- dmi_ver = get_unaligned_be32(buf + 6) & 0xFFFFFF;
+ dmi_ver = get_unaligned_be24(buf + 7);
dmi_num = 0; /* No longer specified */
dmi_len = get_unaligned_le32(buf + 12);
dmi_base = get_unaligned_le64(buf + 16);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index e8a0b19b7398..f095a2513aff 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2456,12 +2456,14 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
if (!hive->reset_domain ||
!amdgpu_reset_get_reset_domain(hive->reset_domain)) {
r = -ENOENT;
+ amdgpu_put_xgmi_hive(hive);
goto init_failed;
}
/* Drop the early temporary reset domain we created for device */
amdgpu_reset_put_reset_domain(adev->reset_domain);
adev->reset_domain = hive->reset_domain;
+ amdgpu_put_xgmi_hive(hive);
}
}
@@ -4413,8 +4415,6 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
retry:
amdgpu_amdkfd_pre_reset(adev);
- amdgpu_amdkfd_pre_reset(adev);
-
if (from_hypervisor)
r = amdgpu_virt_request_full_gpu(adev, true);
else
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index b067ce45d226..1036446abc30 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -2641,6 +2641,9 @@ static int psp_hw_fini(void *handle)
psp_rap_terminate(psp);
psp_dtm_terminate(psp);
psp_hdcp_terminate(psp);
+
+ if (adev->gmc.xgmi.num_physical_nodes > 1)
+ psp_xgmi_terminate(psp);
}
psp_asd_terminate(psp);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 1b108d03e785..f2aebbf3fbe3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -742,7 +742,7 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
amdgpu_put_xgmi_hive(hive);
}
- return psp_xgmi_terminate(&adev->psp);
+ return 0;
}
static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 158d87e6805d..f6b1bb40e503 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -131,6 +131,8 @@ static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
bool all_hub, uint8_t dst_sel);
static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev);
static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev);
+static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
+ bool enable);
static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
{
@@ -1139,6 +1141,7 @@ static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = {
.read_wave_vgprs = &gfx_v11_0_read_wave_vgprs,
.select_me_pipe_q = &gfx_v11_0_select_me_pipe_q,
.init_spm_golden = &gfx_v11_0_init_spm_golden_registers,
+ .update_perfmon_mgcg = &gfx_v11_0_update_perf_clk,
};
static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev)
@@ -5182,9 +5185,12 @@ static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade
data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
- data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
- data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
- WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
+ /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
+ if (adev->sdma.num_instances > 1) {
+ data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
+ data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
+ WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
+ }
} else {
/* Program RLC_CGCG_CGLS_CTRL */
def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
@@ -5213,9 +5219,12 @@ static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade
data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
- data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
- data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
- WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
+ /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
+ if (adev->sdma.num_instances > 1) {
+ data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
+ data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
+ WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
+ }
}
}
@@ -5328,8 +5337,7 @@ static int gfx_v11_0_set_powergating_state(void *handle,
break;
case IP_VERSION(11, 0, 1):
gfx_v11_cntl_pg(adev, enable);
- /* TODO: Enable this when GFXOFF is ready */
- // amdgpu_gfx_off_ctrl(adev, enable);
+ amdgpu_gfx_off_ctrl(adev, enable);
break;
default:
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index c6e0f9313a7f..fc9c1043244c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -2587,7 +2587,8 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
gfx_v9_0_tiling_mode_table_init(adev);
- gfx_v9_0_setup_rb(adev);
+ if (adev->gfx.num_gfx_rings)
+ gfx_v9_0_setup_rb(adev);
gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index 3f44a099c52a..3e51e773f92b 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -176,6 +176,7 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp);
+ tmp = mmVM_L2_CNTL3_DEFAULT;
if (adev->gmc.translate_further) {
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
index 6e0145b2b408..445cb06b9d26 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
@@ -295,9 +295,17 @@ static void mmhub_v9_4_disable_identity_aperture(struct amdgpu_device *adev,
static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ unsigned int num_level, block_size;
uint32_t tmp;
int i;
+ num_level = adev->vm_manager.num_level;
+ block_size = adev->vm_manager.block_size;
+ if (adev->gmc.translate_further)
+ num_level -= 1;
+ else
+ block_size -= 9;
+
for (i = 0; i <= 14; i++) {
tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_CNTL,
hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i);
@@ -305,7 +313,7 @@ static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid)
ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
PAGE_TABLE_DEPTH,
- adev->vm_manager.num_level);
+ num_level);
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
@@ -323,7 +331,7 @@ static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid)
EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
PAGE_TABLE_BLOCK_SIZE,
- adev->vm_manager.block_size - 9);
+ block_size);
/* Send no-retry XNACK on fault to suppress VM fault storm. */
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c
index 01e8288d09a8..1dc95ef21da6 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c
@@ -247,6 +247,81 @@ static void nbio_v7_7_init_registers(struct amdgpu_device *adev)
}
+static void nbio_v7_7_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (enable && !(adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG))
+ return;
+
+ def = data = RREG32_SOC15(NBIO, 0, regBIF0_CPM_CONTROL);
+ if (enable) {
+ data |= (BIF0_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK);
+ } else {
+ data &= ~(BIF0_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK);
+ }
+
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regBIF0_CPM_CONTROL, data);
+}
+
+static void nbio_v7_7_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (enable && !(adev->cg_flags & AMD_CG_SUPPORT_BIF_LS))
+ return;
+
+ def = data = RREG32_SOC15(NBIO, 0, regBIF0_PCIE_CNTL2);
+ if (enable)
+ data |= BIF0_PCIE_CNTL2__SLV_MEM_LS_EN_MASK;
+ else
+ data &= ~BIF0_PCIE_CNTL2__SLV_MEM_LS_EN_MASK;
+
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regBIF0_PCIE_CNTL2, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regBIF0_PCIE_TX_POWER_CTRL_1);
+ if (enable) {
+ data |= (BIF0_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK |
+ BIF0_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK);
+ } else {
+ data &= ~(BIF0_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK |
+ BIF0_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK);
+ }
+
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regBIF0_PCIE_TX_POWER_CTRL_1, data);
+}
+
+static void nbio_v7_7_get_clockgating_state(struct amdgpu_device *adev,
+ u64 *flags)
+{
+ uint32_t data;
+
+ /* AMD_CG_SUPPORT_BIF_MGCG */
+ data = RREG32_SOC15(NBIO, 0, regBIF0_CPM_CONTROL);
+ if (data & BIF0_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_BIF_MGCG;
+
+ /* AMD_CG_SUPPORT_BIF_LS */
+ data = RREG32_SOC15(NBIO, 0, regBIF0_PCIE_CNTL2);
+ if (data & BIF0_PCIE_CNTL2__SLV_MEM_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_BIF_LS;
+}
+
const struct amdgpu_nbio_funcs nbio_v7_7_funcs = {
.get_hdp_flush_req_offset = nbio_v7_7_get_hdp_flush_req_offset,
.get_hdp_flush_done_offset = nbio_v7_7_get_hdp_flush_done_offset,
@@ -262,6 +337,9 @@ const struct amdgpu_nbio_funcs nbio_v7_7_funcs = {
.enable_doorbell_aperture = nbio_v7_7_enable_doorbell_aperture,
.enable_doorbell_selfring_aperture = nbio_v7_7_enable_doorbell_selfring_aperture,
.ih_doorbell_range = nbio_v7_7_ih_doorbell_range,
+ .update_medium_grain_clock_gating = nbio_v7_7_update_medium_grain_clock_gating,
+ .update_medium_grain_light_sleep = nbio_v7_7_update_medium_grain_light_sleep,
+ .get_clockgating_state = nbio_v7_7_get_clockgating_state,
.ih_control = nbio_v7_7_ih_control,
.init_registers = nbio_v7_7_init_registers,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c
index 1ff7fc7bb340..55284b24f113 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc21.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
@@ -494,6 +494,20 @@ static void soc21_pre_asic_init(struct amdgpu_device *adev)
{
}
+static int soc21_update_umd_stable_pstate(struct amdgpu_device *adev,
+ bool enter)
+{
+ if (enter)
+ amdgpu_gfx_rlc_enter_safe_mode(adev);
+ else
+ amdgpu_gfx_rlc_exit_safe_mode(adev);
+
+ if (adev->gfx.funcs->update_perfmon_mgcg)
+ adev->gfx.funcs->update_perfmon_mgcg(adev, !enter);
+
+ return 0;
+}
+
static const struct amdgpu_asic_funcs soc21_asic_funcs =
{
.read_disabled_bios = &soc21_read_disabled_bios,
@@ -513,6 +527,7 @@ static const struct amdgpu_asic_funcs soc21_asic_funcs =
.supports_baco = &amdgpu_dpm_is_baco_supported,
.pre_asic_init = &soc21_pre_asic_init,
.query_video_codecs = &soc21_query_video_codecs,
+ .update_umd_stable_pstate = &soc21_update_umd_stable_pstate,
};
static int soc21_common_early_init(void *handle)
@@ -603,6 +618,8 @@ static int soc21_common_early_init(void *handle)
AMD_CG_SUPPORT_ATHUB_MGCG |
AMD_CG_SUPPORT_ATHUB_LS |
AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_BIF_MGCG |
+ AMD_CG_SUPPORT_BIF_LS |
AMD_CG_SUPPORT_VCN_MGCG |
AMD_CG_SUPPORT_JPEG_MGCG;
adev->pg_flags =
@@ -702,6 +719,7 @@ static int soc21_common_set_clockgating_state(void *handle,
switch (adev->ip_versions[NBIO_HWIP][0]) {
case IP_VERSION(4, 3, 0):
case IP_VERSION(4, 3, 1):
+ case IP_VERSION(7, 7, 0):
adev->nbio.funcs->update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE);
adev->nbio.funcs->update_medium_grain_light_sleep(adev,
@@ -709,10 +727,6 @@ static int soc21_common_set_clockgating_state(void *handle,
adev->hdp.funcs->update_clock_gating(adev,
state == AMD_CG_STATE_GATE);
break;
- case IP_VERSION(7, 7, 0):
- adev->hdp.funcs->update_clock_gating(adev,
- state == AMD_CG_STATE_GATE);
- break;
default:
break;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 357298e69495..22c0929d410b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -382,12 +382,8 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
f2g = &gfx_v10_3_kfd2kgd;
break;
case IP_VERSION(10, 3, 6):
- gfx_target_version = 100306;
- if (!vf)
- f2g = &gfx_v10_3_kfd2kgd;
- break;
case IP_VERSION(10, 3, 7):
- gfx_target_version = 100307;
+ gfx_target_version = 100306;
if (!vf)
f2g = &gfx_v10_3_kfd2kgd;
break;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
index fca7cf9dbaee..987bde4dca3d 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
@@ -34,6 +34,7 @@
#include "dal_asic_id.h"
#include "amdgpu_display.h"
#include "amdgpu_dm_trace.h"
+#include "amdgpu_dm_plane.h"
#include "gc/gc_11_0_0_offset.h"
#include "gc/gc_11_0_0_sh_mask.h"
@@ -149,12 +150,12 @@ static void add_modifier(uint64_t **mods, uint64_t *size, uint64_t *cap, uint64_
*size += 1;
}
-bool modifier_has_dcc(uint64_t modifier)
+static bool modifier_has_dcc(uint64_t modifier)
{
return IS_AMD_FMT_MOD(modifier) && AMD_FMT_MOD_GET(DCC, modifier);
}
-unsigned modifier_gfx9_swizzle_mode(uint64_t modifier)
+static unsigned modifier_gfx9_swizzle_mode(uint64_t modifier)
{
if (modifier == DRM_FORMAT_MOD_LINEAR)
return 0;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h
index 95168c2cfa6f..286981a2dd40 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h
@@ -36,17 +36,9 @@ int fill_dc_scaling_info(struct amdgpu_device *adev,
const struct drm_plane_state *state,
struct dc_scaling_info *scaling_info);
-void get_min_max_dc_plane_scaling(struct drm_device *dev,
- struct drm_framebuffer *fb,
- int *min_downscale, int *max_upscale);
-
int dm_plane_helper_check_state(struct drm_plane_state *state,
struct drm_crtc_state *new_crtc_state);
-bool modifier_has_dcc(uint64_t modifier);
-
-unsigned int modifier_gfx9_swizzle_mode(uint64_t modifier);
-
int fill_plane_buffer_attributes(struct amdgpu_device *adev,
const struct amdgpu_framebuffer *afb,
const enum surface_pixel_format format,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c
index 85f32206a766..3a9e3870b3a9 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c
@@ -1750,6 +1750,7 @@ static bool dcn314_resource_construct(
dc->caps.post_blend_color_processing = true;
dc->caps.force_dp_tps4_for_cp2520 = true;
dc->caps.dp_hpo = true;
+ dc->caps.dp_hdmi21_pcon_support = true;
dc->caps.edp_dsc_support = true;
dc->caps.extended_aux_timeout_support = true;
dc->caps.dmcub_support = true;
diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_7_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_7_0_offset.h
index 2ed95790a600..cf8d60c4df1b 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_7_0_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_7_0_offset.h
@@ -15243,6 +15243,8 @@
#define regBIF0_PCIE_TX_TRACKING_ADDR_HI_BASE_IDX 5
#define regBIF0_PCIE_TX_TRACKING_CTRL_STATUS 0x420186
#define regBIF0_PCIE_TX_TRACKING_CTRL_STATUS_BASE_IDX 5
+#define regBIF0_PCIE_TX_POWER_CTRL_1 0x420187
+#define regBIF0_PCIE_TX_POWER_CTRL_1_BASE_IDX 5
#define regBIF0_PCIE_TX_CTRL_4 0x42018b
#define regBIF0_PCIE_TX_CTRL_4_BASE_IDX 5
#define regBIF0_PCIE_TX_STATUS 0x420194
diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_7_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_7_0_sh_mask.h
index eb62a18fcc48..3d60c9e92548 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_7_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_7_0_sh_mask.h
@@ -85627,6 +85627,19 @@
#define BIF0_PCIE_TX_TRACKING_CTRL_STATUS__TX_TRACKING_PORT_MASK 0x0000000EL
#define BIF0_PCIE_TX_TRACKING_CTRL_STATUS__TX_TRACKING_UNIT_ID_MASK 0x00007F00L
#define BIF0_PCIE_TX_TRACKING_CTRL_STATUS__TX_TRACKING_STATUS_VALID_MASK 0x00008000L
+//BIF0_PCIE_TX_POWER_CTRL_1
+#define BIF0_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN__SHIFT 0x0
+#define BIF0_PCIE_TX_POWER_CTRL_1__MST_MEM_DS_EN__SHIFT 0x1
+#define BIF0_PCIE_TX_POWER_CTRL_1__MST_MEM_SD_EN__SHIFT 0x2
+#define BIF0_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN__SHIFT 0x3
+#define BIF0_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_DS_EN__SHIFT 0x4
+#define BIF0_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_SD_EN__SHIFT 0x5
+#define BIF0_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK 0x00000001L
+#define BIF0_PCIE_TX_POWER_CTRL_1__MST_MEM_DS_EN_MASK 0x00000002L
+#define BIF0_PCIE_TX_POWER_CTRL_1__MST_MEM_SD_EN_MASK 0x00000004L
+#define BIF0_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK 0x00000008L
+#define BIF0_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_DS_EN_MASK 0x00000010L
+#define BIF0_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_SD_EN_MASK 0x00000020L
//BIF0_PCIE_TX_CTRL_4
#define BIF0_PCIE_TX_CTRL_4__TX_PORT_ACCESS_TIMER_SKEW__SHIFT 0x0
#define BIF0_PCIE_TX_CTRL_4__TX_PORT_ACCESS_TIMER_SKEW_MASK 0x0000000FL
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
index 78620b0bd279..f745cd8f1ab7 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
@@ -24,12 +24,8 @@
#ifndef SMU13_DRIVER_IF_V13_0_0_H
#define SMU13_DRIVER_IF_V13_0_0_H
-// *** IMPORTANT ***
-// PMFW TEAM: Always increment the interface version on any change to this file
-#define SMU13_DRIVER_IF_VERSION 0x23
-
//Increment this version if SkuTable_t or BoardTable_t change
-#define PPTABLE_VERSION 0x1D
+#define PPTABLE_VERSION 0x22
#define NUM_GFXCLK_DPM_LEVELS 16
#define NUM_SOCCLK_DPM_LEVELS 8
@@ -1193,8 +1189,17 @@ typedef struct {
// SECTION: Advanced Options
uint32_t DebugOverrides;
+ // Section: Total Board Power idle vs active coefficients
+ uint8_t TotalBoardPowerSupport;
+ uint8_t TotalBoardPowerPadding[3];
+
+ int16_t TotalIdleBoardPowerM;
+ int16_t TotalIdleBoardPowerB;
+ int16_t TotalBoardPowerM;
+ int16_t TotalBoardPowerB;
+
// SECTION: Sku Reserved
- uint32_t Spare[64];
+ uint32_t Spare[61];
// Padding for MMHUB - do not modify this
uint32_t MmHubPadding[8];
@@ -1259,7 +1264,8 @@ typedef struct {
// SECTION: Clock Spread Spectrum
// UCLK Spread Spectrum
- uint16_t UclkSpreadPadding;
+ uint8_t UclkTrainingModeSpreadPercent;
+ uint8_t UclkSpreadPadding;
uint16_t UclkSpreadFreq; // kHz
// UCLK Spread Spectrum
@@ -1272,11 +1278,7 @@ typedef struct {
// Section: Memory Config
uint8_t DramWidth; // Width of interface to the channel for each DRAM module. See DRAM_BIT_WIDTH_TYPE_e
- uint8_t PaddingMem1[3];
-
- // Section: Total Board Power
- uint16_t TotalBoardPower; //Only needed for TCP Estimated case, where TCP = TGP+Total Board Power
- uint16_t BoardPowerPadding;
+ uint8_t PaddingMem1[7];
// SECTION: UMC feature flags
uint8_t HsrEnabled;
@@ -1375,8 +1377,11 @@ typedef struct {
uint16_t Vcn1ActivityPercentage ;
uint32_t EnergyAccumulator;
- uint16_t AverageSocketPower ;
+ uint16_t AverageSocketPower;
+ uint16_t AverageTotalBoardPower;
+
uint16_t AvgTemperature[TEMP_COUNT];
+ uint16_t TempPadding;
uint8_t PcieRate ;
uint8_t PcieWidth ;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
index 6fe2fe92ebd7..ac308e72241a 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
@@ -30,7 +30,7 @@
#define SMU13_DRIVER_IF_VERSION_ALDE 0x08
#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x05
#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04
-#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0 0x2C
+#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0 0x2E
#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x2C
#define SMU13_MODE1_RESET_WAIT_TIME_IN_MS 500 //500ms
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 86d670c71286..ad068865ba20 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -168,21 +168,6 @@ void drm_gem_private_object_init(struct drm_device *dev,
}
EXPORT_SYMBOL(drm_gem_private_object_init);
-static void
-drm_gem_remove_prime_handles(struct drm_gem_object *obj, struct drm_file *filp)
-{
- /*
- * Note: obj->dma_buf can't disappear as long as we still hold a
- * handle reference in obj->handle_count.
- */
- mutex_lock(&filp->prime.lock);
- if (obj->dma_buf) {
- drm_prime_remove_buf_handle_locked(&filp->prime,
- obj->dma_buf);
- }
- mutex_unlock(&filp->prime.lock);
-}
-
/**
* drm_gem_object_handle_free - release resources bound to userspace handles
* @obj: GEM object to clean up.
@@ -253,7 +238,7 @@ drm_gem_object_release_handle(int id, void *ptr, void *data)
if (obj->funcs->close)
obj->funcs->close(obj, file_priv);
- drm_gem_remove_prime_handles(obj, file_priv);
+ drm_prime_remove_buf_handle(&file_priv->prime, id);
drm_vma_node_revoke(&obj->vma_node, file_priv);
drm_gem_object_handle_put_unlocked(obj);
diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index 1fbbc19f1ac0..7bb98e6a446d 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -74,8 +74,8 @@ int drm_prime_fd_to_handle_ioctl(struct drm_device *dev, void *data,
void drm_prime_init_file_private(struct drm_prime_file_private *prime_fpriv);
void drm_prime_destroy_file_private(struct drm_prime_file_private *prime_fpriv);
-void drm_prime_remove_buf_handle_locked(struct drm_prime_file_private *prime_fpriv,
- struct dma_buf *dma_buf);
+void drm_prime_remove_buf_handle(struct drm_prime_file_private *prime_fpriv,
+ uint32_t handle);
/* drm_drv.c */
struct drm_minor *drm_minor_acquire(unsigned int minor_id);
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index a3f180653b8b..eb09e86044c6 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -190,29 +190,33 @@ static int drm_prime_lookup_buf_handle(struct drm_prime_file_private *prime_fpri
return -ENOENT;
}
-void drm_prime_remove_buf_handle_locked(struct drm_prime_file_private *prime_fpriv,
- struct dma_buf *dma_buf)
+void drm_prime_remove_buf_handle(struct drm_prime_file_private *prime_fpriv,
+ uint32_t handle)
{
struct rb_node *rb;
- rb = prime_fpriv->dmabufs.rb_node;
+ mutex_lock(&prime_fpriv->lock);
+
+ rb = prime_fpriv->handles.rb_node;
while (rb) {
struct drm_prime_member *member;
- member = rb_entry(rb, struct drm_prime_member, dmabuf_rb);
- if (member->dma_buf == dma_buf) {
+ member = rb_entry(rb, struct drm_prime_member, handle_rb);
+ if (member->handle == handle) {
rb_erase(&member->handle_rb, &prime_fpriv->handles);
rb_erase(&member->dmabuf_rb, &prime_fpriv->dmabufs);
- dma_buf_put(dma_buf);
+ dma_buf_put(member->dma_buf);
kfree(member);
- return;
- } else if (member->dma_buf < dma_buf) {
+ break;
+ } else if (member->handle < handle) {
rb = rb->rb_right;
} else {
rb = rb->rb_left;
}
}
+
+ mutex_unlock(&prime_fpriv->lock);
}
void drm_prime_init_file_private(struct drm_prime_file_private *prime_fpriv)
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 05076e530e7d..e29175e4b44c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -820,6 +820,15 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict,
if (ret == 0) {
ret = nouveau_fence_new(chan, false, &fence);
if (ret == 0) {
+ /* TODO: figure out a better solution here
+ *
+ * wait on the fence here explicitly as going through
+ * ttm_bo_move_accel_cleanup somehow doesn't seem to do it.
+ *
+ * Without this the operation can timeout and we'll fallback to a
+ * software copy, which might take several minutes to finish.
+ */
+ nouveau_fence_wait(fence, false, false);
ret = ttm_bo_move_accel_cleanup(bo,
&fence->base,
evict, false,
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 2b12389f841a..ee0165687239 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1605,6 +1605,9 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend,
if (r) {
/* delay GPU reset to resume */
radeon_fence_driver_force_completion(rdev, i);
+ } else {
+ /* finish executing delayed work */
+ flush_delayed_work(&rdev->fence_drv[i].lockup_work);
}
}
diff --git a/drivers/gpu/drm/vc4/Kconfig b/drivers/gpu/drm/vc4/Kconfig
index 061be9a6619d..b0f3117102ca 100644
--- a/drivers/gpu/drm/vc4/Kconfig
+++ b/drivers/gpu/drm/vc4/Kconfig
@@ -8,6 +8,7 @@ config DRM_VC4
depends on DRM
depends on SND && SND_SOC
depends on COMMON_CLK
+ depends on PM
select DRM_DISPLAY_HDMI_HELPER
select DRM_DISPLAY_HELPER
select DRM_KMS_HELPER
diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c
index 592c3b5d03e6..1e5f68704d7d 100644
--- a/drivers/gpu/drm/vc4/vc4_hdmi.c
+++ b/drivers/gpu/drm/vc4/vc4_hdmi.c
@@ -2855,7 +2855,7 @@ static int vc5_hdmi_init_resources(struct vc4_hdmi *vc4_hdmi)
return 0;
}
-static int __maybe_unused vc4_hdmi_runtime_suspend(struct device *dev)
+static int vc4_hdmi_runtime_suspend(struct device *dev)
{
struct vc4_hdmi *vc4_hdmi = dev_get_drvdata(dev);
@@ -2972,17 +2972,15 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data)
vc4_hdmi->disable_4kp60 = true;
}
+ pm_runtime_enable(dev);
+
/*
- * We need to have the device powered up at this point to call
- * our reset hook and for the CEC init.
+ * We need to have the device powered up at this point to call
+ * our reset hook and for the CEC init.
*/
- ret = vc4_hdmi_runtime_resume(dev);
+ ret = pm_runtime_resume_and_get(dev);
if (ret)
- goto err_put_ddc;
-
- pm_runtime_get_noresume(dev);
- pm_runtime_set_active(dev);
- pm_runtime_enable(dev);
+ goto err_disable_runtime_pm;
if ((of_device_is_compatible(dev->of_node, "brcm,bcm2711-hdmi0") ||
of_device_is_compatible(dev->of_node, "brcm,bcm2711-hdmi1")) &&
@@ -3028,6 +3026,7 @@ err_destroy_conn:
err_destroy_encoder:
drm_encoder_cleanup(encoder);
pm_runtime_put_sync(dev);
+err_disable_runtime_pm:
pm_runtime_disable(dev);
err_put_ddc:
put_device(&vc4_hdmi->ddc->dev);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index afaf36b2f6ab..729be2c5296c 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -5620,6 +5620,7 @@ struct mddev *md_alloc(dev_t dev, char *name)
* removed (mddev_delayed_delete).
*/
flush_workqueue(md_misc_wq);
+ flush_workqueue(md_rdev_misc_wq);
mutex_lock(&disks_mutex);
mddev = mddev_alloc(dev);
@@ -6238,11 +6239,11 @@ static void mddev_detach(struct mddev *mddev)
static void __md_stop(struct mddev *mddev)
{
struct md_personality *pers = mddev->pers;
+ md_bitmap_destroy(mddev);
mddev_detach(mddev);
/* Ensure ->event_work is done */
if (mddev->event_work.func)
flush_workqueue(md_misc_wq);
- md_bitmap_destroy(mddev);
spin_lock(&mddev->lock);
mddev->pers = NULL;
spin_unlock(&mddev->lock);
@@ -6260,6 +6261,7 @@ void md_stop(struct mddev *mddev)
/* stop the array and free an attached data structures.
* This is called from dm-raid
*/
+ __md_stop_writes(mddev);
__md_stop(mddev);
bioset_exit(&mddev->bio_set);
bioset_exit(&mddev->sync_set);
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 9117fcdee1be..64d6e4cd8a3a 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -2639,18 +2639,18 @@ static void check_decay_read_errors(struct mddev *mddev, struct md_rdev *rdev)
}
static int r10_sync_page_io(struct md_rdev *rdev, sector_t sector,
- int sectors, struct page *page, int rw)
+ int sectors, struct page *page, enum req_op op)
{
sector_t first_bad;
int bad_sectors;
if (is_badblock(rdev, sector, sectors, &first_bad, &bad_sectors)
- && (rw == READ || test_bit(WriteErrorSeen, &rdev->flags)))
+ && (op == REQ_OP_READ || test_bit(WriteErrorSeen, &rdev->flags)))
return -1;
- if (sync_page_io(rdev, sector, sectors << 9, page, rw, false))
+ if (sync_page_io(rdev, sector, sectors << 9, page, op, false))
/* success */
return 1;
- if (rw == WRITE) {
+ if (op == REQ_OP_WRITE) {
set_bit(WriteErrorSeen, &rdev->flags);
if (!test_and_set_bit(WantReplacement, &rdev->flags))
set_bit(MD_RECOVERY_NEEDED,
@@ -2780,7 +2780,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
if (r10_sync_page_io(rdev,
r10_bio->devs[sl].addr +
sect,
- s, conf->tmppage, WRITE)
+ s, conf->tmppage, REQ_OP_WRITE)
== 0) {
/* Well, this device is dead */
pr_notice("md/raid10:%s: read correction write failed (%d sectors at %llu on %pg)\n",
@@ -2814,8 +2814,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
switch (r10_sync_page_io(rdev,
r10_bio->devs[sl].addr +
sect,
- s, conf->tmppage,
- READ)) {
+ s, conf->tmppage, REQ_OP_READ)) {
case 0:
/* Well, this device is dead */
pr_notice("md/raid10:%s: unable to read back corrected sectors (%d sectors at %llu on %pg)\n",
diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index 10c563999d3d..e63608834411 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -171,6 +171,7 @@ config MMC_SDHCI_OF_ASPEED
config MMC_SDHCI_OF_ASPEED_TEST
bool "Tests for the ASPEED SDHCI driver" if !KUNIT_ALL_TESTS
depends on MMC_SDHCI_OF_ASPEED && KUNIT
+ depends on (MMC_SDHCI_OF_ASPEED=m || KUNIT=y)
default KUNIT_ALL_TESTS
help
Enable KUnit tests for the ASPEED SDHCI driver. Select this
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index d7fb33c078e8..184608bd8999 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -84,7 +84,8 @@ enum ad_link_speed_type {
static const u8 null_mac_addr[ETH_ALEN + 2] __long_aligned = {
0, 0, 0, 0, 0, 0
};
-static u16 ad_ticks_per_sec;
+
+static const u16 ad_ticks_per_sec = 1000 / AD_TIMER_INTERVAL;
static const int ad_delta_in_ticks = (AD_TIMER_INTERVAL * HZ) / 1000;
static const u8 lacpdu_mcast_addr[ETH_ALEN + 2] __long_aligned =
@@ -2001,36 +2002,24 @@ void bond_3ad_initiate_agg_selection(struct bonding *bond, int timeout)
/**
* bond_3ad_initialize - initialize a bond's 802.3ad parameters and structures
* @bond: bonding struct to work on
- * @tick_resolution: tick duration (millisecond resolution)
*
* Can be called only after the mac address of the bond is set.
*/
-void bond_3ad_initialize(struct bonding *bond, u16 tick_resolution)
+void bond_3ad_initialize(struct bonding *bond)
{
- /* check that the bond is not initialized yet */
- if (!MAC_ADDRESS_EQUAL(&(BOND_AD_INFO(bond).system.sys_mac_addr),
- bond->dev->dev_addr)) {
-
- BOND_AD_INFO(bond).aggregator_identifier = 0;
-
- BOND_AD_INFO(bond).system.sys_priority =
- bond->params.ad_actor_sys_prio;
- if (is_zero_ether_addr(bond->params.ad_actor_system))
- BOND_AD_INFO(bond).system.sys_mac_addr =
- *((struct mac_addr *)bond->dev->dev_addr);
- else
- BOND_AD_INFO(bond).system.sys_mac_addr =
- *((struct mac_addr *)bond->params.ad_actor_system);
-
- /* initialize how many times this module is called in one
- * second (should be about every 100ms)
- */
- ad_ticks_per_sec = tick_resolution;
+ BOND_AD_INFO(bond).aggregator_identifier = 0;
+ BOND_AD_INFO(bond).system.sys_priority =
+ bond->params.ad_actor_sys_prio;
+ if (is_zero_ether_addr(bond->params.ad_actor_system))
+ BOND_AD_INFO(bond).system.sys_mac_addr =
+ *((struct mac_addr *)bond->dev->dev_addr);
+ else
+ BOND_AD_INFO(bond).system.sys_mac_addr =
+ *((struct mac_addr *)bond->params.ad_actor_system);
- bond_3ad_initiate_agg_selection(bond,
- AD_AGGREGATOR_SELECTION_TIMER *
- ad_ticks_per_sec);
- }
+ bond_3ad_initiate_agg_selection(bond,
+ AD_AGGREGATOR_SELECTION_TIMER *
+ ad_ticks_per_sec);
}
/**
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 50e60843020c..2f4da2c13c0a 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2081,7 +2081,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
/* Initialize AD with the number of times that the AD timer is called in 1 second
* can be called only after the mac address of the bond is set
*/
- bond_3ad_initialize(bond, 1000/AD_TIMER_INTERVAL);
+ bond_3ad_initialize(bond);
} else {
SLAVE_AD_INFO(new_slave)->id =
SLAVE_AD_INFO(prev_slave)->id + 1;
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index ed7d137cba99..6bd69a7e6809 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -803,9 +803,15 @@ static void ksz_phylink_get_caps(struct dsa_switch *ds, int port,
if (dev->info->supports_rgmii[port])
phy_interface_set_rgmii(config->supported_interfaces);
- if (dev->info->internal_phy[port])
+ if (dev->info->internal_phy[port]) {
__set_bit(PHY_INTERFACE_MODE_INTERNAL,
config->supported_interfaces);
+ /* Compatibility for phylib's default interface type when the
+ * phy-mode property is absent
+ */
+ __set_bit(PHY_INTERFACE_MODE_GMII,
+ config->supported_interfaces);
+ }
if (dev->dev_ops->get_caps)
dev->dev_ops->get_caps(dev, port, config);
@@ -962,6 +968,7 @@ static void ksz_update_port_member(struct ksz_device *dev, int port)
static int ksz_setup(struct dsa_switch *ds)
{
struct ksz_device *dev = ds->priv;
+ struct ksz_port *p;
const u16 *regs;
int ret;
@@ -1001,6 +1008,14 @@ static int ksz_setup(struct dsa_switch *ds)
return ret;
}
+ /* Start with learning disabled on standalone user ports, and enabled
+ * on the CPU port. In lack of other finer mechanisms, learning on the
+ * CPU port will avoid flooding bridge local addresses on the network
+ * in some cases.
+ */
+ p = &dev->ports[dev->cpu_port];
+ p->learning = true;
+
/* start switch */
regmap_update_bits(dev->regmap[0], regs[S_START_CTRL],
SW_START, SW_START);
@@ -1277,6 +1292,8 @@ void ksz_port_stp_state_set(struct dsa_switch *ds, int port, u8 state)
ksz_pread8(dev, port, regs[P_STP_CTRL], &data);
data &= ~(PORT_TX_ENABLE | PORT_RX_ENABLE | PORT_LEARN_DISABLE);
+ p = &dev->ports[port];
+
switch (state) {
case BR_STATE_DISABLED:
data |= PORT_LEARN_DISABLE;
@@ -1286,9 +1303,13 @@ void ksz_port_stp_state_set(struct dsa_switch *ds, int port, u8 state)
break;
case BR_STATE_LEARNING:
data |= PORT_RX_ENABLE;
+ if (!p->learning)
+ data |= PORT_LEARN_DISABLE;
break;
case BR_STATE_FORWARDING:
data |= (PORT_TX_ENABLE | PORT_RX_ENABLE);
+ if (!p->learning)
+ data |= PORT_LEARN_DISABLE;
break;
case BR_STATE_BLOCKING:
data |= PORT_LEARN_DISABLE;
@@ -1300,12 +1321,38 @@ void ksz_port_stp_state_set(struct dsa_switch *ds, int port, u8 state)
ksz_pwrite8(dev, port, regs[P_STP_CTRL], data);
- p = &dev->ports[port];
p->stp_state = state;
ksz_update_port_member(dev, port);
}
+static int ksz_port_pre_bridge_flags(struct dsa_switch *ds, int port,
+ struct switchdev_brport_flags flags,
+ struct netlink_ext_ack *extack)
+{
+ if (flags.mask & ~BR_LEARNING)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int ksz_port_bridge_flags(struct dsa_switch *ds, int port,
+ struct switchdev_brport_flags flags,
+ struct netlink_ext_ack *extack)
+{
+ struct ksz_device *dev = ds->priv;
+ struct ksz_port *p = &dev->ports[port];
+
+ if (flags.mask & BR_LEARNING) {
+ p->learning = !!(flags.val & BR_LEARNING);
+
+ /* Make the change take effect immediately */
+ ksz_port_stp_state_set(ds, port, p->stp_state);
+ }
+
+ return 0;
+}
+
static enum dsa_tag_protocol ksz_get_tag_protocol(struct dsa_switch *ds,
int port,
enum dsa_tag_protocol mp)
@@ -1719,6 +1766,8 @@ static const struct dsa_switch_ops ksz_switch_ops = {
.port_bridge_join = ksz_port_bridge_join,
.port_bridge_leave = ksz_port_bridge_leave,
.port_stp_state_set = ksz_port_stp_state_set,
+ .port_pre_bridge_flags = ksz_port_pre_bridge_flags,
+ .port_bridge_flags = ksz_port_bridge_flags,
.port_fast_age = ksz_port_fast_age,
.port_vlan_filtering = ksz_port_vlan_filtering,
.port_vlan_add = ksz_port_vlan_add,
diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h
index 764ada3a0f42..0d9520dc6d2d 100644
--- a/drivers/net/dsa/microchip/ksz_common.h
+++ b/drivers/net/dsa/microchip/ksz_common.h
@@ -65,6 +65,7 @@ struct ksz_chip_data {
struct ksz_port {
bool remove_tag; /* Remove Tag flag set, for ksz8795 only */
+ bool learning;
int stp_state;
struct phy_device phydev;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index ba0f1ffac507..f46eefb5a029 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -11178,10 +11178,7 @@ static netdev_features_t bnxt_fix_features(struct net_device *dev,
if ((features & NETIF_F_NTUPLE) && !bnxt_rfs_capable(bp))
features &= ~NETIF_F_NTUPLE;
- if (bp->flags & BNXT_FLAG_NO_AGG_RINGS)
- features &= ~(NETIF_F_LRO | NETIF_F_GRO_HW);
-
- if (!(bp->flags & BNXT_FLAG_TPA))
+ if ((bp->flags & BNXT_FLAG_NO_AGG_RINGS) || bp->xdp_prog)
features &= ~(NETIF_F_LRO | NETIF_F_GRO_HW);
if (!(features & NETIF_F_GRO))
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 075c6206325c..b1b17f911300 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -2130,6 +2130,7 @@ struct bnxt {
#define BNXT_DUMP_CRASH 1
struct bpf_prog *xdp_prog;
+ u8 xdp_has_frags;
struct bnxt_ptp_cfg *ptp_cfg;
u8 ptp_all_rx_tstamp;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
index 059f96f7a96f..a36803e79e92 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
@@ -1306,6 +1306,7 @@ int bnxt_dl_register(struct bnxt *bp)
if (rc)
goto err_dl_port_unreg;
+ devlink_set_features(dl, DEVLINK_F_RELOAD);
out:
devlink_register(dl);
return 0;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
index 730febd19330..a4cba7cb2783 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
@@ -623,7 +623,7 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs, bool reset)
hw_resc->max_stat_ctxs -= le16_to_cpu(req->min_stat_ctx) * n;
hw_resc->max_vnics -= le16_to_cpu(req->min_vnics) * n;
if (bp->flags & BNXT_FLAG_CHIP_P5)
- hw_resc->max_irqs -= vf_msix * n;
+ hw_resc->max_nqs -= vf_msix;
rc = pf->active_vfs;
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index f53387ed0167..c3065ec0a479 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -181,6 +181,7 @@ void bnxt_xdp_buff_init(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
struct xdp_buff *xdp)
{
struct bnxt_sw_rx_bd *rx_buf;
+ u32 buflen = PAGE_SIZE;
struct pci_dev *pdev;
dma_addr_t mapping;
u32 offset;
@@ -192,7 +193,10 @@ void bnxt_xdp_buff_init(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
mapping = rx_buf->mapping - bp->rx_dma_offset;
dma_sync_single_for_cpu(&pdev->dev, mapping + offset, *len, bp->rx_dir);
- xdp_init_buff(xdp, BNXT_PAGE_MODE_BUF_SIZE + offset, &rxr->xdp_rxq);
+ if (bp->xdp_has_frags)
+ buflen = BNXT_PAGE_MODE_BUF_SIZE + offset;
+
+ xdp_init_buff(xdp, buflen, &rxr->xdp_rxq);
xdp_prepare_buff(xdp, *data_ptr - offset, offset, *len, false);
}
@@ -397,8 +401,10 @@ static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog)
netdev_warn(dev, "ethtool rx/tx channels must be combined to support XDP.\n");
return -EOPNOTSUPP;
}
- if (prog)
+ if (prog) {
tx_xdp = bp->rx_nr_rings;
+ bp->xdp_has_frags = prog->aux->xdp_has_frags;
+ }
tc = netdev_get_num_tc(dev);
if (!tc)
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
index 45634579adb6..a770bab4d1ed 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@ -2886,6 +2886,7 @@ static void dpaa_adjust_link(struct net_device *net_dev)
/* The Aquantia PHYs are capable of performing rate adaptation */
#define PHY_VEND_AQUANTIA 0x03a1b400
+#define PHY_VEND_AQUANTIA2 0x31c31c00
static int dpaa_phy_init(struct net_device *net_dev)
{
@@ -2893,6 +2894,7 @@ static int dpaa_phy_init(struct net_device *net_dev)
struct mac_device *mac_dev;
struct phy_device *phy_dev;
struct dpaa_priv *priv;
+ u32 phy_vendor;
priv = netdev_priv(net_dev);
mac_dev = priv->mac_dev;
@@ -2905,9 +2907,11 @@ static int dpaa_phy_init(struct net_device *net_dev)
return -ENODEV;
}
+ phy_vendor = phy_dev->drv->phy_id & GENMASK(31, 10);
/* Unless the PHY is capable of rate adaptation */
if (mac_dev->phy_if != PHY_INTERFACE_MODE_XGMII ||
- ((phy_dev->drv->phy_id & GENMASK(31, 10)) != PHY_VEND_AQUANTIA)) {
+ (phy_vendor != PHY_VEND_AQUANTIA &&
+ phy_vendor != PHY_VEND_AQUANTIA2)) {
/* remove any features not supported by the controller */
ethtool_convert_legacy_u32_to_link_mode(mask,
mac_dev->if_support);
diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index ed7301b69169..0cebe4b63adb 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -634,6 +634,13 @@ struct fec_enet_private {
int pps_enable;
unsigned int next_counter;
+ struct {
+ struct timespec64 ts_phc;
+ u64 ns_sys;
+ u32 at_corr;
+ u8 at_inc_corr;
+ } ptp_saved_state;
+
u64 ethtool_stats[];
};
@@ -644,5 +651,8 @@ void fec_ptp_disable_hwts(struct net_device *ndev);
int fec_ptp_set(struct net_device *ndev, struct ifreq *ifr);
int fec_ptp_get(struct net_device *ndev, struct ifreq *ifr);
+void fec_ptp_save_state(struct fec_enet_private *fep);
+int fec_ptp_restore_state(struct fec_enet_private *fep);
+
/****************************************************************************/
#endif /* FEC_H */
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index e8e2aa1e7f01..b0d60f898249 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -285,8 +285,11 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address");
#define FEC_MMFR_TA (2 << 16)
#define FEC_MMFR_DATA(v) (v & 0xffff)
/* FEC ECR bits definition */
-#define FEC_ECR_MAGICEN (1 << 2)
-#define FEC_ECR_SLEEP (1 << 3)
+#define FEC_ECR_RESET BIT(0)
+#define FEC_ECR_ETHEREN BIT(1)
+#define FEC_ECR_MAGICEN BIT(2)
+#define FEC_ECR_SLEEP BIT(3)
+#define FEC_ECR_EN1588 BIT(4)
#define FEC_MII_TIMEOUT 30000 /* us */
@@ -982,6 +985,9 @@ fec_restart(struct net_device *ndev)
u32 temp_mac[2];
u32 rcntl = OPT_FRAME_SIZE | 0x04;
u32 ecntl = 0x2; /* ETHEREN */
+ struct ptp_clock_request ptp_rq = { .type = PTP_CLK_REQ_PPS };
+
+ fec_ptp_save_state(fep);
/* Whack a reset. We should wait for this.
* For i.MX6SX SOC, enet use AXI bus, we use disable MAC
@@ -1135,7 +1141,7 @@ fec_restart(struct net_device *ndev)
}
if (fep->bufdesc_ex)
- ecntl |= (1 << 4);
+ ecntl |= FEC_ECR_EN1588;
if (fep->quirks & FEC_QUIRK_DELAYED_CLKS_SUPPORT &&
fep->rgmii_txc_dly)
@@ -1156,6 +1162,14 @@ fec_restart(struct net_device *ndev)
if (fep->bufdesc_ex)
fec_ptp_start_cyclecounter(ndev);
+ /* Restart PPS if needed */
+ if (fep->pps_enable) {
+ /* Clear flag so fec_ptp_enable_pps() doesn't return immediately */
+ fep->pps_enable = 0;
+ fec_ptp_restore_state(fep);
+ fep->ptp_caps.enable(&fep->ptp_caps, &ptp_rq, 1);
+ }
+
/* Enable interrupts we wish to service */
if (fep->link)
writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK);
@@ -1206,6 +1220,8 @@ fec_stop(struct net_device *ndev)
struct fec_enet_private *fep = netdev_priv(ndev);
u32 rmii_mode = readl(fep->hwp + FEC_R_CNTRL) & (1 << 8);
u32 val;
+ struct ptp_clock_request ptp_rq = { .type = PTP_CLK_REQ_PPS };
+ u32 ecntl = 0;
/* We cannot expect a graceful transmit stop without link !!! */
if (fep->link) {
@@ -1215,6 +1231,8 @@ fec_stop(struct net_device *ndev)
netdev_err(ndev, "Graceful transmit stop did not complete!\n");
}
+ fec_ptp_save_state(fep);
+
/* Whack a reset. We should wait for this.
* For i.MX6SX SOC, enet use AXI bus, we use disable MAC
* instead of reset MAC itself.
@@ -1234,12 +1252,28 @@ fec_stop(struct net_device *ndev)
writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED);
writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK);
+ if (fep->bufdesc_ex)
+ ecntl |= FEC_ECR_EN1588;
+
/* We have to keep ENET enabled to have MII interrupt stay working */
if (fep->quirks & FEC_QUIRK_ENET_MAC &&
!(fep->wol_flag & FEC_WOL_FLAG_SLEEP_ON)) {
- writel(2, fep->hwp + FEC_ECNTRL);
+ ecntl |= FEC_ECR_ETHEREN;
writel(rmii_mode, fep->hwp + FEC_R_CNTRL);
}
+
+ writel(ecntl, fep->hwp + FEC_ECNTRL);
+
+ if (fep->bufdesc_ex)
+ fec_ptp_start_cyclecounter(ndev);
+
+ /* Restart PPS if needed */
+ if (fep->pps_enable) {
+ /* Clear flag so fec_ptp_enable_pps() doesn't return immediately */
+ fep->pps_enable = 0;
+ fec_ptp_restore_state(fep);
+ fep->ptp_caps.enable(&fep->ptp_caps, &ptp_rq, 1);
+ }
}
diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c
index 3dc3c0b626c2..c74d04f4b2fd 100644
--- a/drivers/net/ethernet/freescale/fec_ptp.c
+++ b/drivers/net/ethernet/freescale/fec_ptp.c
@@ -633,7 +633,36 @@ void fec_ptp_stop(struct platform_device *pdev)
struct net_device *ndev = platform_get_drvdata(pdev);
struct fec_enet_private *fep = netdev_priv(ndev);
+ if (fep->pps_enable)
+ fec_ptp_enable_pps(fep, 0);
+
cancel_delayed_work_sync(&fep->time_keep);
if (fep->ptp_clock)
ptp_clock_unregister(fep->ptp_clock);
}
+
+void fec_ptp_save_state(struct fec_enet_private *fep)
+{
+ u32 atime_inc_corr;
+
+ fec_ptp_gettime(&fep->ptp_caps, &fep->ptp_saved_state.ts_phc);
+ fep->ptp_saved_state.ns_sys = ktime_get_ns();
+
+ fep->ptp_saved_state.at_corr = readl(fep->hwp + FEC_ATIME_CORR);
+ atime_inc_corr = readl(fep->hwp + FEC_ATIME_INC) & FEC_T_INC_CORR_MASK;
+ fep->ptp_saved_state.at_inc_corr = (u8)(atime_inc_corr >> FEC_T_INC_CORR_OFFSET);
+}
+
+int fec_ptp_restore_state(struct fec_enet_private *fep)
+{
+ u32 atime_inc = readl(fep->hwp + FEC_ATIME_INC) & FEC_T_INC_MASK;
+ u64 ns_sys;
+
+ writel(fep->ptp_saved_state.at_corr, fep->hwp + FEC_ATIME_CORR);
+ atime_inc |= ((u32)fep->ptp_saved_state.at_inc_corr) << FEC_T_INC_CORR_OFFSET;
+ writel(atime_inc, fep->hwp + FEC_ATIME_INC);
+
+ ns_sys = ktime_get_ns() - fep->ptp_saved_state.ns_sys;
+ timespec64_add_ns(&fep->ptp_saved_state.ts_phc, ns_sys);
+ return fec_ptp_settime(&fep->ptp_caps, &fep->ptp_saved_state.ts_phc);
+}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 156e92c43780..e9cd0fa6a0d2 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -4485,7 +4485,7 @@ static int i40e_check_fdir_input_set(struct i40e_vsi *vsi,
(struct in6_addr *)&ipv6_full_mask))
new_mask |= I40E_L3_V6_DST_MASK;
else if (ipv6_addr_any((struct in6_addr *)
- &usr_ip6_spec->ip6src))
+ &usr_ip6_spec->ip6dst))
new_mask &= ~I40E_L3_V6_DST_MASK;
else
return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index cc5b85afd437..841fa149c407 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -684,8 +684,8 @@ static inline void ice_set_ring_xdp(struct ice_tx_ring *ring)
* ice_xsk_pool - get XSK buffer pool bound to a ring
* @ring: Rx ring to use
*
- * Returns a pointer to xdp_umem structure if there is a buffer pool present,
- * NULL otherwise.
+ * Returns a pointer to xsk_buff_pool structure if there is a buffer pool
+ * present, NULL otherwise.
*/
static inline struct xsk_buff_pool *ice_xsk_pool(struct ice_rx_ring *ring)
{
@@ -699,23 +699,33 @@ static inline struct xsk_buff_pool *ice_xsk_pool(struct ice_rx_ring *ring)
}
/**
- * ice_tx_xsk_pool - get XSK buffer pool bound to a ring
- * @ring: Tx ring to use
+ * ice_tx_xsk_pool - assign XSK buff pool to XDP ring
+ * @vsi: pointer to VSI
+ * @qid: index of a queue to look at XSK buff pool presence
*
- * Returns a pointer to xdp_umem structure if there is a buffer pool present,
- * NULL otherwise. Tx equivalent of ice_xsk_pool.
+ * Sets XSK buff pool pointer on XDP ring.
+ *
+ * XDP ring is picked from Rx ring, whereas Rx ring is picked based on provided
+ * queue id. Reason for doing so is that queue vectors might have assigned more
+ * than one XDP ring, e.g. when user reduced the queue count on netdev; Rx ring
+ * carries a pointer to one of these XDP rings for its own purposes, such as
+ * handling XDP_TX action, therefore we can piggyback here on the
+ * rx_ring->xdp_ring assignment that was done during XDP rings initialization.
*/
-static inline struct xsk_buff_pool *ice_tx_xsk_pool(struct ice_tx_ring *ring)
+static inline void ice_tx_xsk_pool(struct ice_vsi *vsi, u16 qid)
{
- struct ice_vsi *vsi = ring->vsi;
- u16 qid;
+ struct ice_tx_ring *ring;
- qid = ring->q_index - vsi->alloc_txq;
+ ring = vsi->rx_rings[qid]->xdp_ring;
+ if (!ring)
+ return;
- if (!ice_is_xdp_ena_vsi(vsi) || !test_bit(qid, vsi->af_xdp_zc_qps))
- return NULL;
+ if (!ice_is_xdp_ena_vsi(vsi) || !test_bit(qid, vsi->af_xdp_zc_qps)) {
+ ring->xsk_pool = NULL;
+ return;
+ }
- return xsk_get_pool_from_qid(vsi->netdev, qid);
+ ring->xsk_pool = xsk_get_pool_from_qid(vsi->netdev, qid);
}
/**
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 733c455f6574..0c4ec9264071 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -1986,8 +1986,8 @@ int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi)
if (ret)
return ret;
- ice_for_each_xdp_txq(vsi, i)
- vsi->xdp_rings[i]->xsk_pool = ice_tx_xsk_pool(vsi->xdp_rings[i]);
+ ice_for_each_rxq(vsi, i)
+ ice_tx_xsk_pool(vsi, i);
return ret;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 4ecaf40cf946..173fe6c31341 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -2581,7 +2581,6 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi)
if (ice_setup_tx_ring(xdp_ring))
goto free_xdp_rings;
ice_set_ring_xdp(xdp_ring);
- xdp_ring->xsk_pool = ice_tx_xsk_pool(xdp_ring);
spin_lock_init(&xdp_ring->tx_lock);
for (j = 0; j < xdp_ring->count; j++) {
tx_desc = ICE_TX_DESC(xdp_ring, j);
@@ -2589,13 +2588,6 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi)
}
}
- ice_for_each_rxq(vsi, i) {
- if (static_key_enabled(&ice_xdp_locking_key))
- vsi->rx_rings[i]->xdp_ring = vsi->xdp_rings[i % vsi->num_xdp_txq];
- else
- vsi->rx_rings[i]->xdp_ring = vsi->xdp_rings[i];
- }
-
return 0;
free_xdp_rings:
@@ -2685,6 +2677,23 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog)
xdp_rings_rem -= xdp_rings_per_v;
}
+ ice_for_each_rxq(vsi, i) {
+ if (static_key_enabled(&ice_xdp_locking_key)) {
+ vsi->rx_rings[i]->xdp_ring = vsi->xdp_rings[i % vsi->num_xdp_txq];
+ } else {
+ struct ice_q_vector *q_vector = vsi->rx_rings[i]->q_vector;
+ struct ice_tx_ring *ring;
+
+ ice_for_each_tx_ring(ring, q_vector->tx) {
+ if (ice_ring_is_xdp(ring)) {
+ vsi->rx_rings[i]->xdp_ring = ring;
+ break;
+ }
+ }
+ }
+ ice_tx_xsk_pool(vsi, i);
+ }
+
/* omit the scheduler update if in reset path; XDP queues will be
* taken into account at the end of ice_vsi_rebuild, where
* ice_cfg_vsi_lan is being called
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index 49ba8bfdbf04..e48e29258450 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -243,7 +243,7 @@ static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx)
if (err)
goto free_buf;
ice_set_ring_xdp(xdp_ring);
- xdp_ring->xsk_pool = ice_tx_xsk_pool(xdp_ring);
+ ice_tx_xsk_pool(vsi, q_idx);
}
err = ice_vsi_cfg_rxq(rx_ring);
@@ -329,6 +329,12 @@ int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
bool if_running, pool_present = !!pool;
int ret = 0, pool_failure = 0;
+ if (qid >= vsi->num_rxq || qid >= vsi->num_txq) {
+ netdev_err(vsi->netdev, "Please use queue id in scope of combined queues count\n");
+ pool_failure = -EINVAL;
+ goto failure;
+ }
+
if (!is_power_of_2(vsi->rx_rings[qid]->count) ||
!is_power_of_2(vsi->tx_rings[qid]->count)) {
netdev_err(vsi->netdev, "Please align ring sizes to power of 2\n");
@@ -353,7 +359,7 @@ xsk_pool_if_up:
if (if_running) {
ret = ice_qp_ena(vsi, qid);
if (!ret && pool_present)
- napi_schedule(&vsi->xdp_rings[qid]->q_vector->napi);
+ napi_schedule(&vsi->rx_rings[qid]->xdp_ring->q_vector->napi);
else if (ret)
netdev_err(vsi->netdev, "ice_qp_ena error = %d\n", ret);
}
@@ -944,13 +950,13 @@ ice_xsk_wakeup(struct net_device *netdev, u32 queue_id,
if (!ice_is_xdp_ena_vsi(vsi))
return -EINVAL;
- if (queue_id >= vsi->num_txq)
+ if (queue_id >= vsi->num_txq || queue_id >= vsi->num_rxq)
return -EINVAL;
- if (!vsi->xdp_rings[queue_id]->xsk_pool)
- return -EINVAL;
+ ring = vsi->rx_rings[queue_id]->xdp_ring;
- ring = vsi->xdp_rings[queue_id];
+ if (!ring->xsk_pool)
+ return -EINVAL;
/* The idea here is that if NAPI is running, mark a miss, so
* it will run again. If not, trigger an interrupt and
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
index 9f06896a049b..f8605f57bd06 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
@@ -1214,7 +1214,6 @@ void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter)
struct cyclecounter cc;
unsigned long flags;
u32 incval = 0;
- u32 tsauxc = 0;
u32 fuse0 = 0;
/* For some of the boards below this mask is technically incorrect.
@@ -1249,18 +1248,6 @@ void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter)
case ixgbe_mac_x550em_a:
case ixgbe_mac_X550:
cc.read = ixgbe_ptp_read_X550;
-
- /* enable SYSTIME counter */
- IXGBE_WRITE_REG(hw, IXGBE_SYSTIMR, 0);
- IXGBE_WRITE_REG(hw, IXGBE_SYSTIML, 0);
- IXGBE_WRITE_REG(hw, IXGBE_SYSTIMH, 0);
- tsauxc = IXGBE_READ_REG(hw, IXGBE_TSAUXC);
- IXGBE_WRITE_REG(hw, IXGBE_TSAUXC,
- tsauxc & ~IXGBE_TSAUXC_DISABLE_SYSTIME);
- IXGBE_WRITE_REG(hw, IXGBE_TSIM, IXGBE_TSIM_TXTS);
- IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMS_TIMESYNC);
-
- IXGBE_WRITE_FLUSH(hw);
break;
case ixgbe_mac_X540:
cc.read = ixgbe_ptp_read_82599;
@@ -1293,6 +1280,50 @@ void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter)
}
/**
+ * ixgbe_ptp_init_systime - Initialize SYSTIME registers
+ * @adapter: the ixgbe private board structure
+ *
+ * Initialize and start the SYSTIME registers.
+ */
+static void ixgbe_ptp_init_systime(struct ixgbe_adapter *adapter)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+ u32 tsauxc;
+
+ switch (hw->mac.type) {
+ case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
+ case ixgbe_mac_X550:
+ tsauxc = IXGBE_READ_REG(hw, IXGBE_TSAUXC);
+
+ /* Reset SYSTIME registers to 0 */
+ IXGBE_WRITE_REG(hw, IXGBE_SYSTIMR, 0);
+ IXGBE_WRITE_REG(hw, IXGBE_SYSTIML, 0);
+ IXGBE_WRITE_REG(hw, IXGBE_SYSTIMH, 0);
+
+ /* Reset interrupt settings */
+ IXGBE_WRITE_REG(hw, IXGBE_TSIM, IXGBE_TSIM_TXTS);
+ IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMS_TIMESYNC);
+
+ /* Activate the SYSTIME counter */
+ IXGBE_WRITE_REG(hw, IXGBE_TSAUXC,
+ tsauxc & ~IXGBE_TSAUXC_DISABLE_SYSTIME);
+ break;
+ case ixgbe_mac_X540:
+ case ixgbe_mac_82599EB:
+ /* Reset SYSTIME registers to 0 */
+ IXGBE_WRITE_REG(hw, IXGBE_SYSTIML, 0);
+ IXGBE_WRITE_REG(hw, IXGBE_SYSTIMH, 0);
+ break;
+ default:
+ /* Other devices aren't supported */
+ return;
+ };
+
+ IXGBE_WRITE_FLUSH(hw);
+}
+
+/**
* ixgbe_ptp_reset
* @adapter: the ixgbe private board structure
*
@@ -1318,6 +1349,8 @@ void ixgbe_ptp_reset(struct ixgbe_adapter *adapter)
ixgbe_ptp_start_cyclecounter(adapter);
+ ixgbe_ptp_init_systime(adapter);
+
spin_lock_irqsave(&adapter->tmreg_lock, flags);
timecounter_init(&adapter->hw_tc, &adapter->hw_cc,
ktime_to_ns(ktime_get_real()));
diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c
index 5edb68a8aab1..57f27cc7724e 100644
--- a/drivers/net/ethernet/lantiq_xrx200.c
+++ b/drivers/net/ethernet/lantiq_xrx200.c
@@ -193,6 +193,7 @@ static int xrx200_alloc_buf(struct xrx200_chan *ch, void *(*alloc)(unsigned int
ch->rx_buff[ch->dma.desc] = alloc(priv->rx_skb_size);
if (!ch->rx_buff[ch->dma.desc]) {
+ ch->rx_buff[ch->dma.desc] = buf;
ret = -ENOMEM;
goto skip;
}
@@ -239,6 +240,12 @@ static int xrx200_hw_receive(struct xrx200_chan *ch)
}
skb = build_skb(buf, priv->rx_skb_size);
+ if (!skb) {
+ skb_free_frag(buf);
+ net_dev->stats.rx_dropped++;
+ return -ENOMEM;
+ }
+
skb_reserve(skb, NET_SKB_PAD);
skb_put(skb, len);
@@ -288,7 +295,7 @@ static int xrx200_poll_rx(struct napi_struct *napi, int budget)
if (ret == XRX200_DMA_PACKET_IN_PROGRESS)
continue;
if (ret != XRX200_DMA_PACKET_COMPLETE)
- return ret;
+ break;
rx++;
} else {
break;
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 8aff4c0c28bd..5ace4609de47 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1891,10 +1891,19 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
skb->dev = netdev;
bytes += skb->len;
- if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2))
+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) {
+ hash = trxd.rxd5 & MTK_RXD5_FOE_ENTRY;
+ if (hash != MTK_RXD5_FOE_ENTRY)
+ skb_set_hash(skb, jhash_1word(hash, 0),
+ PKT_HASH_TYPE_L4);
rxdcsum = &trxd.rxd3;
- else
+ } else {
+ hash = trxd.rxd4 & MTK_RXD4_FOE_ENTRY;
+ if (hash != MTK_RXD4_FOE_ENTRY)
+ skb_set_hash(skb, jhash_1word(hash, 0),
+ PKT_HASH_TYPE_L4);
rxdcsum = &trxd.rxd4;
+ }
if (*rxdcsum & eth->soc->txrx.rx_dma_l4_valid)
skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -1902,16 +1911,9 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
skb_checksum_none_assert(skb);
skb->protocol = eth_type_trans(skb, netdev);
- hash = trxd.rxd4 & MTK_RXD4_FOE_ENTRY;
- if (hash != MTK_RXD4_FOE_ENTRY) {
- hash = jhash_1word(hash, 0);
- skb_set_hash(skb, hash, PKT_HASH_TYPE_L4);
- }
-
reason = FIELD_GET(MTK_RXD4_PPE_CPU_REASON, trxd.rxd4);
if (reason == MTK_PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED)
- mtk_ppe_check_skb(eth->ppe, skb,
- trxd.rxd4 & MTK_RXD4_FOE_ENTRY);
+ mtk_ppe_check_skb(eth->ppe, skb, hash);
if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX) {
if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) {
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index 7405c97cda66..ecf85e9ed824 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -314,6 +314,11 @@
#define RX_DMA_L4_VALID_PDMA BIT(30) /* when PDMA is used */
#define RX_DMA_SPECIAL_TAG BIT(22)
+/* PDMA descriptor rxd5 */
+#define MTK_RXD5_FOE_ENTRY GENMASK(14, 0)
+#define MTK_RXD5_PPE_CPU_REASON GENMASK(22, 18)
+#define MTK_RXD5_SRC_PORT GENMASK(29, 26)
+
#define RX_DMA_GET_SPORT(x) (((x) >> 19) & 0xf)
#define RX_DMA_GET_SPORT_V2(x) (((x) >> 26) & 0x7)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c
index 37522352e4b2..c8e5ca65bb6e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c
@@ -79,6 +79,10 @@ tc_act_police_offload(struct mlx5e_priv *priv,
struct mlx5e_flow_meter_handle *meter;
int err = 0;
+ err = mlx5e_policer_validate(&fl_act->action, act, fl_act->extack);
+ if (err)
+ return err;
+
err = fill_meter_params_from_act(act, &params);
if (err)
return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
index 0aef69527226..3a1f76eac542 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
@@ -246,7 +246,7 @@ static void mlx5e_tls_priv_tx_cleanup(struct mlx5e_ktls_offload_context_tx *priv
static void mlx5e_tls_priv_tx_list_cleanup(struct mlx5_core_dev *mdev,
struct list_head *list, int size)
{
- struct mlx5e_ktls_offload_context_tx *obj;
+ struct mlx5e_ktls_offload_context_tx *obj, *n;
struct mlx5e_async_ctx *bulk_async;
int i;
@@ -255,7 +255,7 @@ static void mlx5e_tls_priv_tx_list_cleanup(struct mlx5_core_dev *mdev,
return;
i = 0;
- list_for_each_entry(obj, list, list_node) {
+ list_for_each_entry_safe(obj, n, list, list_node) {
mlx5e_tls_priv_tx_cleanup(obj, &bulk_async[i]);
i++;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index e2a9b9be5c1f..e0ce5a233d0b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -1395,10 +1395,11 @@ struct mlx5e_flow_steering *mlx5e_fs_init(const struct mlx5e_profile *profile,
}
return fs;
-err_free_fs:
- kvfree(fs);
+
err_free_vlan:
mlx5e_fs_vlan_free(fs);
+err_free_fs:
+ kvfree(fs);
err:
return NULL;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index d858667736a3..02eb2f0fa2ae 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3682,7 +3682,9 @@ static int set_feature_hw_tc(struct net_device *netdev, bool enable)
int err = 0;
#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
- if (!enable && mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD))) {
+ int tc_flag = mlx5e_is_uplink_rep(priv) ? MLX5_TC_FLAG(ESW_OFFLOAD) :
+ MLX5_TC_FLAG(NIC_OFFLOAD);
+ if (!enable && mlx5e_tc_num_filters(priv, tc_flag)) {
netdev_err(netdev,
"Active offloaded tc filters, can't turn hw_tc_offload off\n");
return -EINVAL;
@@ -4769,14 +4771,6 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16
/* RQ */
mlx5e_build_rq_params(mdev, params);
- /* HW LRO */
- if (MLX5_CAP_ETH(mdev, lro_cap) &&
- params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
- /* No XSK params: checking the availability of striding RQ in general. */
- if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL))
- params->packet_merge.type = slow_pci_heuristic(mdev) ?
- MLX5E_PACKET_MERGE_NONE : MLX5E_PACKET_MERGE_LRO;
- }
params->packet_merge.timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT);
/* CQ moderation params */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 0c66774a1720..759f7d3c2cfd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -662,6 +662,8 @@ static void mlx5e_build_rep_params(struct net_device *netdev)
params->mqprio.num_tc = 1;
params->tunneled_offload_en = false;
+ if (rep->vport != MLX5_VPORT_UPLINK)
+ params->vlan_strip_disable = true;
mlx5_query_min_inline(mdev, &params->tx_min_inline_mode);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index ed73132129aa..a9f4c652f859 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -427,7 +427,8 @@ esw_setup_vport_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *f
dest[dest_idx].vport.vhca_id =
MLX5_CAP_GEN(esw_attr->dests[attr_idx].mdev, vhca_id);
dest[dest_idx].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
- if (mlx5_lag_mpesw_is_activated(esw->dev))
+ if (dest[dest_idx].vport.num == MLX5_VPORT_UPLINK &&
+ mlx5_lag_mpesw_is_activated(esw->dev))
dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_UPLINK;
}
if (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP) {
@@ -3115,8 +3116,10 @@ esw_vfs_changed_event_handler(struct mlx5_eswitch *esw, const u32 *out)
err = mlx5_eswitch_load_vf_vports(esw, new_num_vfs,
MLX5_VPORT_UC_ADDR_CHANGE);
- if (err)
+ if (err) {
+ devl_unlock(devlink);
return;
+ }
}
esw->esw_funcs.num_vfs = new_num_vfs;
devl_unlock(devlink);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
index 0f34e3c80d1f..065102278cb8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
@@ -1067,30 +1067,32 @@ static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev,
struct net_device *netdev)
{
unsigned int fn = mlx5_get_dev_index(dev);
+ unsigned long flags;
if (fn >= ldev->ports)
return;
- spin_lock(&lag_lock);
+ spin_lock_irqsave(&lag_lock, flags);
ldev->pf[fn].netdev = netdev;
ldev->tracker.netdev_state[fn].link_up = 0;
ldev->tracker.netdev_state[fn].tx_enabled = 0;
- spin_unlock(&lag_lock);
+ spin_unlock_irqrestore(&lag_lock, flags);
}
static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev,
struct net_device *netdev)
{
+ unsigned long flags;
int i;
- spin_lock(&lag_lock);
+ spin_lock_irqsave(&lag_lock, flags);
for (i = 0; i < ldev->ports; i++) {
if (ldev->pf[i].netdev == netdev) {
ldev->pf[i].netdev = NULL;
break;
}
}
- spin_unlock(&lag_lock);
+ spin_unlock_irqrestore(&lag_lock, flags);
}
static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
@@ -1234,7 +1236,7 @@ void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
mlx5_ldev_add_netdev(ldev, dev, netdev);
for (i = 0; i < ldev->ports; i++)
- if (!ldev->pf[i].dev)
+ if (!ldev->pf[i].netdev)
break;
if (i >= ldev->ports)
@@ -1246,12 +1248,13 @@ void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev;
+ unsigned long flags;
bool res;
- spin_lock(&lag_lock);
+ spin_lock_irqsave(&lag_lock, flags);
ldev = mlx5_lag_dev(dev);
res = ldev && __mlx5_lag_is_roce(ldev);
- spin_unlock(&lag_lock);
+ spin_unlock_irqrestore(&lag_lock, flags);
return res;
}
@@ -1260,12 +1263,13 @@ EXPORT_SYMBOL(mlx5_lag_is_roce);
bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev;
+ unsigned long flags;
bool res;
- spin_lock(&lag_lock);
+ spin_lock_irqsave(&lag_lock, flags);
ldev = mlx5_lag_dev(dev);
res = ldev && __mlx5_lag_is_active(ldev);
- spin_unlock(&lag_lock);
+ spin_unlock_irqrestore(&lag_lock, flags);
return res;
}
@@ -1274,13 +1278,14 @@ EXPORT_SYMBOL(mlx5_lag_is_active);
bool mlx5_lag_is_master(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev;
+ unsigned long flags;
bool res;
- spin_lock(&lag_lock);
+ spin_lock_irqsave(&lag_lock, flags);
ldev = mlx5_lag_dev(dev);
res = ldev && __mlx5_lag_is_active(ldev) &&
dev == ldev->pf[MLX5_LAG_P1].dev;
- spin_unlock(&lag_lock);
+ spin_unlock_irqrestore(&lag_lock, flags);
return res;
}
@@ -1289,12 +1294,13 @@ EXPORT_SYMBOL(mlx5_lag_is_master);
bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev;
+ unsigned long flags;
bool res;
- spin_lock(&lag_lock);
+ spin_lock_irqsave(&lag_lock, flags);
ldev = mlx5_lag_dev(dev);
res = ldev && __mlx5_lag_is_sriov(ldev);
- spin_unlock(&lag_lock);
+ spin_unlock_irqrestore(&lag_lock, flags);
return res;
}
@@ -1303,13 +1309,14 @@ EXPORT_SYMBOL(mlx5_lag_is_sriov);
bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev;
+ unsigned long flags;
bool res;
- spin_lock(&lag_lock);
+ spin_lock_irqsave(&lag_lock, flags);
ldev = mlx5_lag_dev(dev);
res = ldev && __mlx5_lag_is_sriov(ldev) &&
test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
- spin_unlock(&lag_lock);
+ spin_unlock_irqrestore(&lag_lock, flags);
return res;
}
@@ -1352,9 +1359,10 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
{
struct net_device *ndev = NULL;
struct mlx5_lag *ldev;
+ unsigned long flags;
int i;
- spin_lock(&lag_lock);
+ spin_lock_irqsave(&lag_lock, flags);
ldev = mlx5_lag_dev(dev);
if (!(ldev && __mlx5_lag_is_roce(ldev)))
@@ -1373,7 +1381,7 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
dev_hold(ndev);
unlock:
- spin_unlock(&lag_lock);
+ spin_unlock_irqrestore(&lag_lock, flags);
return ndev;
}
@@ -1383,10 +1391,11 @@ u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
struct net_device *slave)
{
struct mlx5_lag *ldev;
+ unsigned long flags;
u8 port = 0;
int i;
- spin_lock(&lag_lock);
+ spin_lock_irqsave(&lag_lock, flags);
ldev = mlx5_lag_dev(dev);
if (!(ldev && __mlx5_lag_is_roce(ldev)))
goto unlock;
@@ -1401,7 +1410,7 @@ u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
port = ldev->v2p_map[port * ldev->buckets];
unlock:
- spin_unlock(&lag_lock);
+ spin_unlock_irqrestore(&lag_lock, flags);
return port;
}
EXPORT_SYMBOL(mlx5_lag_get_slave_port);
@@ -1422,8 +1431,9 @@ struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev)
{
struct mlx5_core_dev *peer_dev = NULL;
struct mlx5_lag *ldev;
+ unsigned long flags;
- spin_lock(&lag_lock);
+ spin_lock_irqsave(&lag_lock, flags);
ldev = mlx5_lag_dev(dev);
if (!ldev)
goto unlock;
@@ -1433,7 +1443,7 @@ struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev)
ldev->pf[MLX5_LAG_P1].dev;
unlock:
- spin_unlock(&lag_lock);
+ spin_unlock_irqrestore(&lag_lock, flags);
return peer_dev;
}
EXPORT_SYMBOL(mlx5_lag_get_peer_mdev);
@@ -1446,6 +1456,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
struct mlx5_core_dev **mdev;
struct mlx5_lag *ldev;
+ unsigned long flags;
int num_ports;
int ret, i, j;
void *out;
@@ -1462,7 +1473,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
memset(values, 0, sizeof(*values) * num_counters);
- spin_lock(&lag_lock);
+ spin_lock_irqsave(&lag_lock, flags);
ldev = mlx5_lag_dev(dev);
if (ldev && __mlx5_lag_is_active(ldev)) {
num_ports = ldev->ports;
@@ -1472,7 +1483,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
num_ports = 1;
mdev[MLX5_LAG_P1] = dev;
}
- spin_unlock(&lag_lock);
+ spin_unlock_irqrestore(&lag_lock, flags);
for (i = 0; i < num_ports; ++i) {
u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index bec8d6d0b5f6..c085b031abfc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1530,7 +1530,9 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx)
memcpy(&dev->profile, &profile[profile_idx], sizeof(dev->profile));
INIT_LIST_HEAD(&priv->ctx_list);
spin_lock_init(&priv->ctx_lock);
+ lockdep_register_key(&dev->lock_key);
mutex_init(&dev->intf_state_mutex);
+ lockdep_set_class(&dev->intf_state_mutex, &dev->lock_key);
mutex_init(&priv->bfregs.reg_head.lock);
mutex_init(&priv->bfregs.wc_head.lock);
@@ -1597,6 +1599,7 @@ err_timeout_init:
mutex_destroy(&priv->bfregs.wc_head.lock);
mutex_destroy(&priv->bfregs.reg_head.lock);
mutex_destroy(&dev->intf_state_mutex);
+ lockdep_unregister_key(&dev->lock_key);
return err;
}
@@ -1618,6 +1621,7 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev)
mutex_destroy(&priv->bfregs.wc_head.lock);
mutex_destroy(&priv->bfregs.reg_head.lock);
mutex_destroy(&dev->intf_state_mutex);
+ lockdep_unregister_key(&dev->lock_key);
}
static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index ec76a8b1acc1..60596357bfc7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -376,8 +376,8 @@ retry:
goto out_dropped;
}
}
+ err = mlx5_cmd_check(dev, err, in, out);
if (err) {
- err = mlx5_cmd_check(dev, err, in, out);
mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n",
func_id, npages, err);
goto out_dropped;
@@ -524,10 +524,13 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
dev->priv.reclaim_pages_discard += npages;
}
/* if triggered by FW event and failed by FW then ignore */
- if (event && err == -EREMOTEIO)
+ if (event && err == -EREMOTEIO) {
err = 0;
+ goto out_free;
+ }
+
+ err = mlx5_cmd_check(dev, err, in, out);
if (err) {
- err = mlx5_cmd_check(dev, err, in, out);
mlx5_core_err(dev, "failed reclaiming pages: err %d\n", err);
goto out_free;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
index ee2e1b7c1310..c0e6c487c63c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
@@ -159,11 +159,11 @@ static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs)
devl_lock(devlink);
err = mlx5_device_enable_sriov(dev, num_vfs);
+ devl_unlock(devlink);
if (err) {
mlx5_core_warn(dev, "mlx5_device_enable_sriov failed : %d\n", err);
return err;
}
- devl_unlock(devlink);
err = pci_enable_sriov(pdev, num_vfs);
if (err) {
diff --git a/drivers/net/ethernet/moxa/moxart_ether.c b/drivers/net/ethernet/moxa/moxart_ether.c
index 19009a6bd33a..9e57d23e57bf 100644
--- a/drivers/net/ethernet/moxa/moxart_ether.c
+++ b/drivers/net/ethernet/moxa/moxart_ether.c
@@ -71,11 +71,6 @@ static int moxart_set_mac_address(struct net_device *ndev, void *addr)
static void moxart_mac_free_memory(struct net_device *ndev)
{
struct moxart_mac_priv_t *priv = netdev_priv(ndev);
- int i;
-
- for (i = 0; i < RX_DESC_NUM; i++)
- dma_unmap_single(&priv->pdev->dev, priv->rx_mapping[i],
- priv->rx_buf_size, DMA_FROM_DEVICE);
if (priv->tx_desc_base)
dma_free_coherent(&priv->pdev->dev,
@@ -187,6 +182,7 @@ static int moxart_mac_open(struct net_device *ndev)
static int moxart_mac_stop(struct net_device *ndev)
{
struct moxart_mac_priv_t *priv = netdev_priv(ndev);
+ int i;
napi_disable(&priv->napi);
@@ -198,6 +194,11 @@ static int moxart_mac_stop(struct net_device *ndev)
/* disable all functions */
writel(0, priv->base + REG_MAC_CTRL);
+ /* unmap areas mapped in moxart_mac_setup_desc_ring() */
+ for (i = 0; i < RX_DESC_NUM; i++)
+ dma_unmap_single(&priv->pdev->dev, priv->rx_mapping[i],
+ priv->rx_buf_size, DMA_FROM_DEVICE);
+
return 0;
}
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 1443f788ee37..0be79c516781 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -1564,8 +1564,67 @@ static int ionic_set_features(struct net_device *netdev,
return err;
}
+static int ionic_set_attr_mac(struct ionic_lif *lif, u8 *mac)
+{
+ struct ionic_admin_ctx ctx = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+ .cmd.lif_setattr = {
+ .opcode = IONIC_CMD_LIF_SETATTR,
+ .index = cpu_to_le16(lif->index),
+ .attr = IONIC_LIF_ATTR_MAC,
+ },
+ };
+
+ ether_addr_copy(ctx.cmd.lif_setattr.mac, mac);
+ return ionic_adminq_post_wait(lif, &ctx);
+}
+
+static int ionic_get_attr_mac(struct ionic_lif *lif, u8 *mac_addr)
+{
+ struct ionic_admin_ctx ctx = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+ .cmd.lif_getattr = {
+ .opcode = IONIC_CMD_LIF_GETATTR,
+ .index = cpu_to_le16(lif->index),
+ .attr = IONIC_LIF_ATTR_MAC,
+ },
+ };
+ int err;
+
+ err = ionic_adminq_post_wait(lif, &ctx);
+ if (err)
+ return err;
+
+ ether_addr_copy(mac_addr, ctx.comp.lif_getattr.mac);
+ return 0;
+}
+
+static int ionic_program_mac(struct ionic_lif *lif, u8 *mac)
+{
+ u8 get_mac[ETH_ALEN];
+ int err;
+
+ err = ionic_set_attr_mac(lif, mac);
+ if (err)
+ return err;
+
+ err = ionic_get_attr_mac(lif, get_mac);
+ if (err)
+ return err;
+
+ /* To deal with older firmware that silently ignores the set attr mac:
+ * doesn't actually change the mac and doesn't return an error, so we
+ * do the get attr to verify whether or not the set actually happened
+ */
+ if (!ether_addr_equal(get_mac, mac))
+ return 1;
+
+ return 0;
+}
+
static int ionic_set_mac_address(struct net_device *netdev, void *sa)
{
+ struct ionic_lif *lif = netdev_priv(netdev);
struct sockaddr *addr = sa;
u8 *mac;
int err;
@@ -1574,6 +1633,14 @@ static int ionic_set_mac_address(struct net_device *netdev, void *sa)
if (ether_addr_equal(netdev->dev_addr, mac))
return 0;
+ err = ionic_program_mac(lif, mac);
+ if (err < 0)
+ return err;
+
+ if (err > 0)
+ netdev_dbg(netdev, "%s: SET and GET ATTR Mac are not equal-due to old FW running\n",
+ __func__);
+
err = eth_prepare_mac_addr_change(netdev, addr);
if (err)
return err;
@@ -2963,6 +3030,9 @@ static void ionic_lif_handle_fw_up(struct ionic_lif *lif)
mutex_lock(&lif->queue_lock);
+ if (test_and_clear_bit(IONIC_LIF_F_BROKEN, lif->state))
+ dev_info(ionic->dev, "FW Up: clearing broken state\n");
+
err = ionic_qcqs_alloc(lif);
if (err)
goto err_unlock;
@@ -3169,6 +3239,7 @@ static int ionic_station_set(struct ionic_lif *lif)
.attr = IONIC_LIF_ATTR_MAC,
},
};
+ u8 mac_address[ETH_ALEN];
struct sockaddr addr;
int err;
@@ -3177,8 +3248,23 @@ static int ionic_station_set(struct ionic_lif *lif)
return err;
netdev_dbg(lif->netdev, "found initial MAC addr %pM\n",
ctx.comp.lif_getattr.mac);
- if (is_zero_ether_addr(ctx.comp.lif_getattr.mac))
- return 0;
+ ether_addr_copy(mac_address, ctx.comp.lif_getattr.mac);
+
+ if (is_zero_ether_addr(mac_address)) {
+ eth_hw_addr_random(netdev);
+ netdev_dbg(netdev, "Random Mac generated: %pM\n", netdev->dev_addr);
+ ether_addr_copy(mac_address, netdev->dev_addr);
+
+ err = ionic_program_mac(lif, mac_address);
+ if (err < 0)
+ return err;
+
+ if (err > 0) {
+ netdev_dbg(netdev, "%s:SET/GET ATTR Mac are not same-due to old FW running\n",
+ __func__);
+ return 0;
+ }
+ }
if (!is_zero_ether_addr(netdev->dev_addr)) {
/* If the netdev mac is non-zero and doesn't match the default
@@ -3186,12 +3272,11 @@ static int ionic_station_set(struct ionic_lif *lif)
* likely here again after a fw-upgrade reset. We need to be
* sure the netdev mac is in our filter list.
*/
- if (!ether_addr_equal(ctx.comp.lif_getattr.mac,
- netdev->dev_addr))
+ if (!ether_addr_equal(mac_address, netdev->dev_addr))
ionic_lif_addr_add(lif, netdev->dev_addr);
} else {
/* Update the netdev mac with the device's mac */
- memcpy(addr.sa_data, ctx.comp.lif_getattr.mac, netdev->addr_len);
+ ether_addr_copy(addr.sa_data, mac_address);
addr.sa_family = AF_INET;
err = eth_prepare_mac_addr_change(netdev, &addr);
if (err) {
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c
index 4029b4e021f8..56f93b030551 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_main.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c
@@ -474,8 +474,8 @@ try_again:
ionic_opcode_to_str(opcode), opcode,
ionic_error_to_str(err), err);
- msleep(1000);
iowrite32(0, &idev->dev_cmd_regs->done);
+ msleep(1000);
iowrite32(1, &idev->dev_cmd_regs->doorbell);
goto try_again;
}
@@ -488,6 +488,8 @@ try_again:
return ionic_error_to_errno(err);
}
+ ionic_dev_cmd_clean(ionic);
+
return 0;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
index caa4bfc4c1d6..9b6138b11776 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
@@ -258,14 +258,18 @@ EXPORT_SYMBOL_GPL(stmmac_set_mac_addr);
/* Enable disable MAC RX/TX */
void stmmac_set_mac(void __iomem *ioaddr, bool enable)
{
- u32 value = readl(ioaddr + MAC_CTRL_REG);
+ u32 old_val, value;
+
+ old_val = readl(ioaddr + MAC_CTRL_REG);
+ value = old_val;
if (enable)
value |= MAC_ENABLE_RX | MAC_ENABLE_TX;
else
value &= ~(MAC_ENABLE_TX | MAC_ENABLE_RX);
- writel(value, ioaddr + MAC_CTRL_REG);
+ if (value != old_val)
+ writel(value, ioaddr + MAC_CTRL_REG);
}
void stmmac_get_mac_addr(void __iomem *ioaddr, unsigned char *addr,
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 070b5ef165eb..592d29abcb1c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -986,10 +986,10 @@ static void stmmac_mac_link_up(struct phylink_config *config,
bool tx_pause, bool rx_pause)
{
struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev));
- u32 ctrl;
+ u32 old_ctrl, ctrl;
- ctrl = readl(priv->ioaddr + MAC_CTRL_REG);
- ctrl &= ~priv->hw->link.speed_mask;
+ old_ctrl = readl(priv->ioaddr + MAC_CTRL_REG);
+ ctrl = old_ctrl & ~priv->hw->link.speed_mask;
if (interface == PHY_INTERFACE_MODE_USXGMII) {
switch (speed) {
@@ -1064,7 +1064,8 @@ static void stmmac_mac_link_up(struct phylink_config *config,
if (tx_pause && rx_pause)
stmmac_mac_flow_ctrl(priv, duplex);
- writel(ctrl, priv->ioaddr + MAC_CTRL_REG);
+ if (ctrl != old_ctrl)
+ writel(ctrl, priv->ioaddr + MAC_CTRL_REG);
stmmac_mac_set(priv, priv->ioaddr, true);
if (phy && priv->dma_cap.eee) {
diff --git a/drivers/net/ipa/ipa_mem.c b/drivers/net/ipa/ipa_mem.c
index 1e9eae208e44..53a1dbeaffa6 100644
--- a/drivers/net/ipa/ipa_mem.c
+++ b/drivers/net/ipa/ipa_mem.c
@@ -568,7 +568,7 @@ static int ipa_smem_init(struct ipa *ipa, u32 item, size_t size)
}
/* Align the address down and the size up to a page boundary */
- addr = qcom_smem_virt_to_phys(virt) & PAGE_MASK;
+ addr = qcom_smem_virt_to_phys(virt);
phys = addr & PAGE_MASK;
size = PAGE_ALIGN(size + addr - phys);
iova = phys; /* We just want a direct mapping */
diff --git a/drivers/net/ipvlan/ipvtap.c b/drivers/net/ipvlan/ipvtap.c
index ef02f2cf5ce1..cbabca167a07 100644
--- a/drivers/net/ipvlan/ipvtap.c
+++ b/drivers/net/ipvlan/ipvtap.c
@@ -194,7 +194,7 @@ static struct notifier_block ipvtap_notifier_block __read_mostly = {
.notifier_call = ipvtap_device_event,
};
-static int ipvtap_init(void)
+static int __init ipvtap_init(void)
{
int err;
@@ -228,7 +228,7 @@ out1:
}
module_init(ipvtap_init);
-static void ipvtap_exit(void)
+static void __exit ipvtap_exit(void)
{
rtnl_link_unregister(&ipvtap_link_ops);
unregister_netdevice_notifier(&ipvtap_notifier_block);
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index ee6087e7b2bf..c6d271e5687e 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -462,11 +462,6 @@ static struct macsec_eth_header *macsec_ethhdr(struct sk_buff *skb)
return (struct macsec_eth_header *)skb_mac_header(skb);
}
-static sci_t dev_to_sci(struct net_device *dev, __be16 port)
-{
- return make_sci(dev->dev_addr, port);
-}
-
static void __macsec_pn_wrapped(struct macsec_secy *secy,
struct macsec_tx_sa *tx_sa)
{
@@ -3661,7 +3656,6 @@ static int macsec_set_mac_address(struct net_device *dev, void *p)
out:
eth_hw_addr_set(dev, addr->sa_data);
- macsec->secy.sci = dev_to_sci(dev, MACSEC_PORT_ES);
/* If h/w offloading is available, propagate to the device */
if (macsec_is_offloaded(macsec)) {
@@ -4000,6 +3994,11 @@ static bool sci_exists(struct net_device *dev, sci_t sci)
return false;
}
+static sci_t dev_to_sci(struct net_device *dev, __be16 port)
+{
+ return make_sci(dev->dev_addr, port);
+}
+
static int macsec_add_dev(struct net_device *dev, sci_t sci, u8 icv_len)
{
struct macsec_dev *macsec = macsec_priv(dev);
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 0c6efd792690..12ff276b80ae 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -316,11 +316,11 @@ static __maybe_unused int mdio_bus_phy_resume(struct device *dev)
phydev->suspended_by_mdio_bus = 0;
- /* If we managed to get here with the PHY state machine in a state other
- * than PHY_HALTED this is an indication that something went wrong and
- * we should most likely be using MAC managed PM and we are not.
+ /* If we manged to get here with the PHY state machine in a state neither
+ * PHY_HALTED nor PHY_READY this is an indication that something went wrong
+ * and we should most likely be using MAC managed PM and we are not.
*/
- WARN_ON(phydev->state != PHY_HALTED && !phydev->mac_managed_pm);
+ WARN_ON(phydev->state != PHY_HALTED && phydev->state != PHY_READY);
ret = phy_init_hw(phydev);
if (ret < 0)
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 0f6efaabaa32..d142ac8fcf6e 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -5906,6 +5906,11 @@ static void r8153_enter_oob(struct r8152 *tp)
ocp_data &= ~NOW_IS_OOB;
ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data);
+ /* RX FIFO settings for OOB */
+ ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RXFIFO_CTRL0, RXFIFO_THR1_OOB);
+ ocp_write_word(tp, MCU_TYPE_PLA, PLA_RXFIFO_CTRL1, RXFIFO_THR2_OOB);
+ ocp_write_word(tp, MCU_TYPE_PLA, PLA_RXFIFO_CTRL2, RXFIFO_THR3_OOB);
+
rtl_disable(tp);
rtl_reset_bmu(tp);
@@ -6431,21 +6436,8 @@ static void r8156_fc_parameter(struct r8152 *tp)
u32 pause_on = tp->fc_pause_on ? tp->fc_pause_on : fc_pause_on_auto(tp);
u32 pause_off = tp->fc_pause_off ? tp->fc_pause_off : fc_pause_off_auto(tp);
- switch (tp->version) {
- case RTL_VER_10:
- case RTL_VER_11:
- ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_FULL, pause_on / 8);
- ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_EMPTY, pause_off / 8);
- break;
- case RTL_VER_12:
- case RTL_VER_13:
- case RTL_VER_15:
- ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_FULL, pause_on / 16);
- ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_EMPTY, pause_off / 16);
- break;
- default:
- break;
- }
+ ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_FULL, pause_on / 16);
+ ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_EMPTY, pause_off / 16);
}
static void rtl8156_change_mtu(struct r8152 *tp)
@@ -6557,6 +6549,11 @@ static void rtl8156_down(struct r8152 *tp)
ocp_data &= ~NOW_IS_OOB;
ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data);
+ /* RX FIFO settings for OOB */
+ ocp_write_word(tp, MCU_TYPE_PLA, PLA_RXFIFO_FULL, 64 / 16);
+ ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_FULL, 1024 / 16);
+ ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_EMPTY, 4096 / 16);
+
rtl_disable(tp);
rtl_reset_bmu(tp);
diff --git a/drivers/nfc/pn533/uart.c b/drivers/nfc/pn533/uart.c
index 2caf997f9bc9..07596bf5f7d6 100644
--- a/drivers/nfc/pn533/uart.c
+++ b/drivers/nfc/pn533/uart.c
@@ -310,6 +310,7 @@ static void pn532_uart_remove(struct serdev_device *serdev)
pn53x_unregister_nfc(pn532->priv);
serdev_device_close(serdev);
pn53x_common_clean(pn532->priv);
+ del_timer_sync(&pn532->cmd_timeout);
kfree_skb(pn532->recv_skb);
kfree(pn532);
}
diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c
index 9be007c9420f..f223afe47d10 100644
--- a/drivers/parisc/ccio-dma.c
+++ b/drivers/parisc/ccio-dma.c
@@ -268,7 +268,7 @@ static int ioc_count;
* Each bit can represent a number of pages.
* LSbs represent lower addresses (IOVA's).
*
-* This was was copied from sba_iommu.c. Don't try to unify
+* This was copied from sba_iommu.c. Don't try to unify
* the two resource managers unless a way to have different
* allocation policies is also adjusted. We'd like to avoid
* I/O TLB thrashing by having resource allocation policy
@@ -1380,15 +1380,17 @@ ccio_init_resource(struct resource *res, char *name, void __iomem *ioaddr)
}
}
-static void __init ccio_init_resources(struct ioc *ioc)
+static int __init ccio_init_resources(struct ioc *ioc)
{
struct resource *res = ioc->mmio_region;
char *name = kmalloc(14, GFP_KERNEL);
-
+ if (unlikely(!name))
+ return -ENOMEM;
snprintf(name, 14, "GSC Bus [%d/]", ioc->hw_path);
ccio_init_resource(res, name, &ioc->ioc_regs->io_io_low);
ccio_init_resource(res + 1, name, &ioc->ioc_regs->io_io_low_hv);
+ return 0;
}
static int new_ioc_area(struct resource *res, unsigned long size,
@@ -1543,7 +1545,10 @@ static int __init ccio_probe(struct parisc_device *dev)
return -ENOMEM;
}
ccio_ioc_init(ioc);
- ccio_init_resources(ioc);
+ if (ccio_init_resources(ioc)) {
+ kfree(ioc);
+ return -ENOMEM;
+ }
hppa_dma_ops = &ccio_ops;
hba = kzalloc(sizeof(*hba), GFP_KERNEL);
diff --git a/drivers/parisc/led.c b/drivers/parisc/led.c
index 1e4a5663d011..d4be9d2ee74d 100644
--- a/drivers/parisc/led.c
+++ b/drivers/parisc/led.c
@@ -646,7 +646,7 @@ int lcd_print( const char *str )
cancel_delayed_work_sync(&led_task);
/* copy display string to buffer for procfs */
- strlcpy(lcd_text, str, sizeof(lcd_text));
+ strscpy(lcd_text, str, sizeof(lcd_text));
/* Set LCD Cursor to 1st character */
gsc_writeb(lcd_info.reset_cmd1, LCD_CMD_REG);
diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index a3e117a4b8e7..f6c37a97544e 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -7153,22 +7153,18 @@ static int megasas_alloc_ctrl_mem(struct megasas_instance *instance)
switch (instance->adapter_type) {
case MFI_SERIES:
if (megasas_alloc_mfi_ctrl_mem(instance))
- goto fail;
+ return -ENOMEM;
break;
case AERO_SERIES:
case VENTURA_SERIES:
case THUNDERBOLT_SERIES:
case INVADER_SERIES:
if (megasas_alloc_fusion_context(instance))
- goto fail;
+ return -ENOMEM;
break;
}
return 0;
- fail:
- kfree(instance->reply_map);
- instance->reply_map = NULL;
- return -ENOMEM;
}
/*
diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c
index e48d4261d0bc..09c5fe37754c 100644
--- a/drivers/scsi/megaraid/megaraid_sas_fusion.c
+++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c
@@ -5310,7 +5310,6 @@ megasas_alloc_fusion_context(struct megasas_instance *instance)
if (!fusion->log_to_span) {
dev_err(&instance->pdev->dev, "Failed from %s %d\n",
__func__, __LINE__);
- kfree(instance->ctrl_context);
return -ENOMEM;
}
}
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 2b2f68288375..62666df1a59e 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -6935,14 +6935,8 @@ qlt_24xx_config_rings(struct scsi_qla_host *vha)
if (ha->flags.msix_enabled) {
if (IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha)) {
- if (IS_QLA2071(ha)) {
- /* 4 ports Baker: Enable Interrupt Handshake */
- icb->msix_atio = 0;
- icb->firmware_options_2 |= cpu_to_le32(BIT_26);
- } else {
- icb->msix_atio = cpu_to_le16(msix->entry);
- icb->firmware_options_2 &= cpu_to_le32(~BIT_26);
- }
+ icb->msix_atio = cpu_to_le16(msix->entry);
+ icb->firmware_options_2 &= cpu_to_le32(~BIT_26);
ql_dbg(ql_dbg_init, vha, 0xf072,
"Registering ICB vector 0x%x for atio que.\n",
msix->entry);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 4dbd29ab1dcc..ef08029a0079 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -111,7 +111,7 @@ scsi_set_blocked(struct scsi_cmnd *cmd, int reason)
}
}
-static void scsi_mq_requeue_cmd(struct scsi_cmnd *cmd)
+static void scsi_mq_requeue_cmd(struct scsi_cmnd *cmd, unsigned long msecs)
{
struct request *rq = scsi_cmd_to_rq(cmd);
@@ -121,7 +121,12 @@ static void scsi_mq_requeue_cmd(struct scsi_cmnd *cmd)
} else {
WARN_ON_ONCE(true);
}
- blk_mq_requeue_request(rq, true);
+
+ if (msecs) {
+ blk_mq_requeue_request(rq, false);
+ blk_mq_delay_kick_requeue_list(rq->q, msecs);
+ } else
+ blk_mq_requeue_request(rq, true);
}
/**
@@ -651,14 +656,6 @@ static unsigned int scsi_rq_err_bytes(const struct request *rq)
return bytes;
}
-/* Helper for scsi_io_completion() when "reprep" action required. */
-static void scsi_io_completion_reprep(struct scsi_cmnd *cmd,
- struct request_queue *q)
-{
- /* A new command will be prepared and issued. */
- scsi_mq_requeue_cmd(cmd);
-}
-
static bool scsi_cmd_runtime_exceeced(struct scsi_cmnd *cmd)
{
struct request *req = scsi_cmd_to_rq(cmd);
@@ -676,14 +673,21 @@ static bool scsi_cmd_runtime_exceeced(struct scsi_cmnd *cmd)
return false;
}
+/*
+ * When ALUA transition state is returned, reprep the cmd to
+ * use the ALUA handler's transition timeout. Delay the reprep
+ * 1 sec to avoid aggressive retries of the target in that
+ * state.
+ */
+#define ALUA_TRANSITION_REPREP_DELAY 1000
+
/* Helper for scsi_io_completion() when special action required. */
static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result)
{
- struct request_queue *q = cmd->device->request_queue;
struct request *req = scsi_cmd_to_rq(cmd);
int level = 0;
- enum {ACTION_FAIL, ACTION_REPREP, ACTION_RETRY,
- ACTION_DELAYED_RETRY} action;
+ enum {ACTION_FAIL, ACTION_REPREP, ACTION_DELAYED_REPREP,
+ ACTION_RETRY, ACTION_DELAYED_RETRY} action;
struct scsi_sense_hdr sshdr;
bool sense_valid;
bool sense_current = true; /* false implies "deferred sense" */
@@ -772,8 +776,8 @@ static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result)
action = ACTION_DELAYED_RETRY;
break;
case 0x0a: /* ALUA state transition */
- blk_stat = BLK_STS_TRANSPORT;
- fallthrough;
+ action = ACTION_DELAYED_REPREP;
+ break;
default:
action = ACTION_FAIL;
break;
@@ -832,7 +836,10 @@ static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result)
return;
fallthrough;
case ACTION_REPREP:
- scsi_io_completion_reprep(cmd, q);
+ scsi_mq_requeue_cmd(cmd, 0);
+ break;
+ case ACTION_DELAYED_REPREP:
+ scsi_mq_requeue_cmd(cmd, ALUA_TRANSITION_REPREP_DELAY);
break;
case ACTION_RETRY:
/* Retry the same command immediately */
@@ -926,7 +933,7 @@ static int scsi_io_completion_nz_result(struct scsi_cmnd *cmd, int result,
* command block will be released and the queue function will be goosed. If we
* are not done then we have to figure out what to do next:
*
- * a) We can call scsi_io_completion_reprep(). The request will be
+ * a) We can call scsi_mq_requeue_cmd(). The request will be
* unprepared and put back on the queue. Then a new command will
* be created for it. This should be used if we made forward
* progress, or if we want to switch from READ(10) to READ(6) for
@@ -942,7 +949,6 @@ static int scsi_io_completion_nz_result(struct scsi_cmnd *cmd, int result,
void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
{
int result = cmd->result;
- struct request_queue *q = cmd->device->request_queue;
struct request *req = scsi_cmd_to_rq(cmd);
blk_status_t blk_stat = BLK_STS_OK;
@@ -979,7 +985,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
* request just queue the command up again.
*/
if (likely(result == 0))
- scsi_io_completion_reprep(cmd, q);
+ scsi_mq_requeue_cmd(cmd, 0);
else
scsi_io_completion_action(cmd, result);
}
@@ -1542,7 +1548,6 @@ static blk_status_t scsi_prepare_cmd(struct request *req)
scsi_init_command(sdev, cmd);
cmd->eh_eflags = 0;
- cmd->allowed = 0;
cmd->prot_type = 0;
cmd->prot_flags = 0;
cmd->submitter = 0;
@@ -1593,6 +1598,8 @@ static blk_status_t scsi_prepare_cmd(struct request *req)
return ret;
}
+ /* Usually overridden by the ULP */
+ cmd->allowed = 0;
memset(cmd->cmnd, 0, sizeof(cmd->cmnd));
return scsi_cmd_to_driver(cmd)->init_command(cmd);
}
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 8f79fa6318fe..eb76ba055021 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -103,7 +103,6 @@ static void sd_config_discard(struct scsi_disk *, unsigned int);
static void sd_config_write_same(struct scsi_disk *);
static int sd_revalidate_disk(struct gendisk *);
static void sd_unlock_native_capacity(struct gendisk *disk);
-static void sd_start_done_work(struct work_struct *work);
static int sd_probe(struct device *);
static int sd_remove(struct device *);
static void sd_shutdown(struct device *);
@@ -3471,7 +3470,6 @@ static int sd_probe(struct device *dev)
sdkp->max_retries = SD_MAX_RETRIES;
atomic_set(&sdkp->openers, 0);
atomic_set(&sdkp->device->ioerr_cnt, 0);
- INIT_WORK(&sdkp->start_done_work, sd_start_done_work);
if (!sdp->request_queue->rq_timeout) {
if (sdp->type != TYPE_MOD)
@@ -3594,69 +3592,12 @@ static void scsi_disk_release(struct device *dev)
kfree(sdkp);
}
-/* Process sense data after a START command finished. */
-static void sd_start_done_work(struct work_struct *work)
-{
- struct scsi_disk *sdkp = container_of(work, typeof(*sdkp),
- start_done_work);
- struct scsi_sense_hdr sshdr;
- int res = sdkp->start_result;
-
- if (res == 0)
- return;
-
- sd_print_result(sdkp, "Start/Stop Unit failed", res);
-
- if (res < 0)
- return;
-
- if (scsi_normalize_sense(sdkp->start_sense_buffer,
- sdkp->start_sense_len, &sshdr))
- sd_print_sense_hdr(sdkp, &sshdr);
-}
-
-/* A START command finished. May be called from interrupt context. */
-static void sd_start_done(struct request *req, blk_status_t status)
-{
- const struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(req);
- struct scsi_disk *sdkp = scsi_disk(req->q->disk);
-
- sdkp->start_result = scmd->result;
- WARN_ON_ONCE(scmd->sense_len > SCSI_SENSE_BUFFERSIZE);
- sdkp->start_sense_len = scmd->sense_len;
- memcpy(sdkp->start_sense_buffer, scmd->sense_buffer,
- ARRAY_SIZE(sdkp->start_sense_buffer));
- WARN_ON_ONCE(!schedule_work(&sdkp->start_done_work));
-}
-
-/* Submit a START command asynchronously. */
-static int sd_submit_start(struct scsi_disk *sdkp, u8 cmd[], u8 cmd_len)
-{
- struct scsi_device *sdev = sdkp->device;
- struct request_queue *q = sdev->request_queue;
- struct request *req;
- struct scsi_cmnd *scmd;
-
- req = scsi_alloc_request(q, REQ_OP_DRV_IN, BLK_MQ_REQ_PM);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
- scmd = blk_mq_rq_to_pdu(req);
- scmd->cmd_len = cmd_len;
- memcpy(scmd->cmnd, cmd, cmd_len);
- scmd->allowed = sdkp->max_retries;
- req->timeout = SD_TIMEOUT;
- req->rq_flags |= RQF_PM | RQF_QUIET;
- req->end_io = sd_start_done;
- blk_execute_rq_nowait(req, /*at_head=*/true);
-
- return 0;
-}
-
static int sd_start_stop_device(struct scsi_disk *sdkp, int start)
{
unsigned char cmd[6] = { START_STOP }; /* START_VALID */
+ struct scsi_sense_hdr sshdr;
struct scsi_device *sdp = sdkp->device;
+ int res;
if (start)
cmd[4] |= 1; /* START */
@@ -3667,10 +3608,23 @@ static int sd_start_stop_device(struct scsi_disk *sdkp, int start)
if (!scsi_device_online(sdp))
return -ENODEV;
- /* Wait until processing of sense data has finished. */
- flush_work(&sdkp->start_done_work);
+ res = scsi_execute(sdp, cmd, DMA_NONE, NULL, 0, NULL, &sshdr,
+ SD_TIMEOUT, sdkp->max_retries, 0, RQF_PM, NULL);
+ if (res) {
+ sd_print_result(sdkp, "Start/Stop Unit failed", res);
+ if (res > 0 && scsi_sense_valid(&sshdr)) {
+ sd_print_sense_hdr(sdkp, &sshdr);
+ /* 0x3a is medium not present */
+ if (sshdr.asc == 0x3a)
+ res = 0;
+ }
+ }
- return sd_submit_start(sdkp, cmd, sizeof(cmd));
+ /* SCSI error codes must not go to the generic layer */
+ if (res)
+ return -EIO;
+
+ return 0;
}
/*
@@ -3697,8 +3651,6 @@ static void sd_shutdown(struct device *dev)
sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n");
sd_start_stop_device(sdkp, 0);
}
-
- flush_work(&sdkp->start_done_work);
}
static int sd_suspend_common(struct device *dev, bool ignore_stop_errors)
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index b89187761d61..5eea762f84d1 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -150,11 +150,6 @@ struct scsi_disk {
unsigned urswrz : 1;
unsigned security : 1;
unsigned ignore_medium_access_errors : 1;
-
- int start_result;
- u32 start_sense_len;
- u8 start_sense_buffer[SCSI_SENSE_BUFFERSIZE];
- struct work_struct start_done_work;
};
#define to_scsi_disk(obj) container_of(obj, struct scsi_disk, disk_dev)
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index fe000da11332..8ced292c4b96 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -2012,7 +2012,7 @@ static int storvsc_probe(struct hv_device *device,
*/
host_dev->handle_error_wq =
alloc_ordered_workqueue("storvsc_error_wq_%d",
- WQ_MEM_RECLAIM,
+ 0,
host->host_no);
if (!host_dev->handle_error_wq) {
ret = -ENOMEM;
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index 6bc679d22927..a202d7d5240d 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -8741,6 +8741,8 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba,
struct scsi_device *sdp;
unsigned long flags;
int ret, retries;
+ unsigned long deadline;
+ int32_t remaining;
spin_lock_irqsave(hba->host->host_lock, flags);
sdp = hba->ufs_device_wlun;
@@ -8773,9 +8775,14 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba,
* callbacks hence set the RQF_PM flag so that it doesn't resume the
* already suspended childs.
*/
+ deadline = jiffies + 10 * HZ;
for (retries = 3; retries > 0; --retries) {
+ ret = -ETIMEDOUT;
+ remaining = deadline - jiffies;
+ if (remaining <= 0)
+ break;
ret = scsi_execute(sdp, cmd, DMA_NONE, NULL, 0, NULL, &sshdr,
- START_STOP_TIMEOUT, 0, 0, RQF_PM, NULL);
+ remaining / HZ, 0, 0, RQF_PM, NULL);
if (!scsi_status_is_check_condition(ret) ||
!scsi_sense_valid(&sshdr) ||
sshdr.sense_key != UNIT_ATTENTION)
diff --git a/drivers/ufs/host/ufs-exynos.c b/drivers/ufs/host/ufs-exynos.c
index eced97538082..c3628a8645a5 100644
--- a/drivers/ufs/host/ufs-exynos.c
+++ b/drivers/ufs/host/ufs-exynos.c
@@ -1711,7 +1711,7 @@ static struct exynos_ufs_uic_attr fsd_uic_attr = {
.pa_dbg_option_suite = 0x2E820183,
};
-struct exynos_ufs_drv_data fsd_ufs_drvs = {
+static const struct exynos_ufs_drv_data fsd_ufs_drvs = {
.uic_attr = &fsd_uic_attr,
.quirks = UFSHCD_QUIRK_PRDT_BYTE_GRAN |
UFSHCI_QUIRK_BROKEN_REQ_LIST_CLR |
diff --git a/fs/inode.c b/fs/inode.c
index 6462276dfdf0..ba1de23c13c1 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -2018,23 +2018,25 @@ static int __file_remove_privs(struct file *file, unsigned int flags)
{
struct dentry *dentry = file_dentry(file);
struct inode *inode = file_inode(file);
- int error;
+ int error = 0;
int kill;
if (IS_NOSEC(inode) || !S_ISREG(inode->i_mode))
return 0;
kill = dentry_needs_remove_privs(dentry);
- if (kill <= 0)
+ if (kill < 0)
return kill;
- if (flags & IOCB_NOWAIT)
- return -EAGAIN;
+ if (kill) {
+ if (flags & IOCB_NOWAIT)
+ return -EAGAIN;
+
+ error = __remove_privs(file_mnt_user_ns(file), dentry, kill);
+ }
- error = __remove_privs(file_mnt_user_ns(file), dentry, kill);
if (!error)
inode_has_no_xattr(inode);
-
return error;
}
diff --git a/fs/locks.c b/fs/locks.c
index c266cfdc3291..607f94a0e789 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2129,6 +2129,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
else
error = locks_lock_file_wait(f.file, &fl);
+ locks_release_private(&fl);
out_putf:
fdput(f);
diff --git a/fs/namespace.c b/fs/namespace.c
index 68789f896f08..df137ba19d37 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -4238,6 +4238,13 @@ static int build_mount_idmapped(const struct mount_attr *attr, size_t usize,
err = -EPERM;
goto out_fput;
}
+
+ /* We're not controlling the target namespace. */
+ if (!ns_capable(mnt_userns, CAP_SYS_ADMIN)) {
+ err = -EPERM;
+ goto out_fput;
+ }
+
kattr->mnt_userns = get_user_ns(mnt_userns);
out_fput:
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index dbab3caa15ed..5d6c2ddc7ea6 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2382,7 +2382,8 @@ static void nfs_dentry_remove_handle_error(struct inode *dir,
{
switch (error) {
case -ENOENT:
- d_delete(dentry);
+ if (d_really_is_positive(dentry))
+ d_delete(dentry);
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
break;
case 0:
@@ -2484,8 +2485,10 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry)
*/
error = -ETXTBSY;
if (WARN_ON(dentry->d_flags & DCACHE_NFSFS_RENAMED) ||
- WARN_ON(dentry->d_fsdata == NFS_FSDATA_BLOCKED))
+ WARN_ON(dentry->d_fsdata == NFS_FSDATA_BLOCKED)) {
+ spin_unlock(&dentry->d_lock);
goto out;
+ }
if (dentry->d_fsdata)
/* old devname */
kfree(dentry->d_fsdata);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index d2bcd4834c0e..e032fe201a36 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -221,8 +221,10 @@ nfs_file_fsync_commit(struct file *file, int datasync)
int
nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
- struct nfs_open_context *ctx = nfs_file_open_context(file);
struct inode *inode = file_inode(file);
+ struct nfs_inode *nfsi = NFS_I(inode);
+ long save_nredirtied = atomic_long_read(&nfsi->redirtied_pages);
+ long nredirtied;
int ret;
trace_nfs_fsync_enter(inode);
@@ -237,15 +239,10 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
ret = pnfs_sync_inode(inode, !!datasync);
if (ret != 0)
break;
- if (!test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags))
+ nredirtied = atomic_long_read(&nfsi->redirtied_pages);
+ if (nredirtied == save_nredirtied)
break;
- /*
- * If nfs_file_fsync_commit detected a server reboot, then
- * resend all dirty pages that might have been covered by
- * the NFS_CONTEXT_RESEND_WRITES flag
- */
- start = 0;
- end = LLONG_MAX;
+ save_nredirtied = nredirtied;
}
trace_nfs_fsync_exit(inode, ret);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index b4e46b0ffa2d..bea7c005119c 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -426,6 +426,7 @@ nfs_ilookup(struct super_block *sb, struct nfs_fattr *fattr, struct nfs_fh *fh)
static void nfs_inode_init_regular(struct nfs_inode *nfsi)
{
atomic_long_set(&nfsi->nrequests, 0);
+ atomic_long_set(&nfsi->redirtied_pages, 0);
INIT_LIST_HEAD(&nfsi->commit_info.list);
atomic_long_set(&nfsi->commit_info.ncommit, 0);
atomic_set(&nfsi->commit_info.rpcs_out, 0);
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index e88f6b18445e..9eb181287879 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -340,6 +340,11 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt,
goto out;
}
+ if (!S_ISREG(fattr->mode)) {
+ res = ERR_PTR(-EBADF);
+ goto out;
+ }
+
res = ERR_PTR(-ENOMEM);
len = strlen(SSC_READ_NAME_BODY) + 16;
read_name = kzalloc(len, GFP_KERNEL);
@@ -357,6 +362,7 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt,
r_ino->i_fop);
if (IS_ERR(filep)) {
res = ERR_CAST(filep);
+ iput(r_ino);
goto out_free_name;
}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 41a9b6b58fb9..2613b7e36eb9 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -2817,7 +2817,6 @@ int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *hdr)
/* Resend all requests through the MDS */
nfs_pageio_init_write(&pgio, hdr->inode, FLUSH_STABLE, true,
hdr->completion_ops);
- set_bit(NFS_CONTEXT_RESEND_WRITES, &hdr->args.context->flags);
return nfs_pageio_resend(&pgio, hdr);
}
EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 51a7e202d6e5..1843fa235d9b 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1420,10 +1420,12 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr,
*/
static void nfs_redirty_request(struct nfs_page *req)
{
+ struct nfs_inode *nfsi = NFS_I(page_file_mapping(req->wb_page)->host);
+
/* Bump the transmission count */
req->wb_nio++;
nfs_mark_request_dirty(req);
- set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags);
+ atomic_long_inc(&nfsi->redirtied_pages);
nfs_end_page_writeback(req);
nfs_release_request(req);
}
@@ -1904,7 +1906,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
/* We have a mismatch. Write the page again */
dprintk_cont(" mismatch\n");
nfs_mark_request_dirty(req);
- set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags);
+ atomic_long_inc(&NFS_I(data->inode)->redirtied_pages);
next:
nfs_unlock_and_release_request(req);
/* Latency breaker */
diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c
index 5bdff12a1232..6ae1f56b7358 100644
--- a/fs/ntfs3/xattr.c
+++ b/fs/ntfs3/xattr.c
@@ -483,8 +483,7 @@ out:
}
#ifdef CONFIG_NTFS3_FS_POSIX_ACL
-static struct posix_acl *ntfs_get_acl_ex(struct user_namespace *mnt_userns,
- struct inode *inode, int type,
+static struct posix_acl *ntfs_get_acl_ex(struct inode *inode, int type,
int locked)
{
struct ntfs_inode *ni = ntfs_i(inode);
@@ -519,7 +518,7 @@ static struct posix_acl *ntfs_get_acl_ex(struct user_namespace *mnt_userns,
/* Translate extended attribute to acl. */
if (err >= 0) {
- acl = posix_acl_from_xattr(mnt_userns, buf, err);
+ acl = posix_acl_from_xattr(&init_user_ns, buf, err);
} else if (err == -ENODATA) {
acl = NULL;
} else {
@@ -542,8 +541,7 @@ struct posix_acl *ntfs_get_acl(struct inode *inode, int type, bool rcu)
if (rcu)
return ERR_PTR(-ECHILD);
- /* TODO: init_user_ns? */
- return ntfs_get_acl_ex(&init_user_ns, inode, type, 0);
+ return ntfs_get_acl_ex(inode, type, 0);
}
static noinline int ntfs_set_acl_ex(struct user_namespace *mnt_userns,
@@ -595,7 +593,7 @@ static noinline int ntfs_set_acl_ex(struct user_namespace *mnt_userns,
value = kmalloc(size, GFP_NOFS);
if (!value)
return -ENOMEM;
- err = posix_acl_to_xattr(mnt_userns, acl, value, size);
+ err = posix_acl_to_xattr(&init_user_ns, acl, value, size);
if (err < 0)
goto out;
flags = 0;
@@ -646,7 +644,7 @@ static int ntfs_xattr_get_acl(struct user_namespace *mnt_userns,
if (!acl)
return -ENODATA;
- err = posix_acl_to_xattr(mnt_userns, acl, buffer, size);
+ err = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
posix_acl_release(acl);
return err;
@@ -670,12 +668,12 @@ static int ntfs_xattr_set_acl(struct user_namespace *mnt_userns,
if (!value) {
acl = NULL;
} else {
- acl = posix_acl_from_xattr(mnt_userns, value, size);
+ acl = posix_acl_from_xattr(&init_user_ns, value, size);
if (IS_ERR(acl))
return PTR_ERR(acl);
if (acl) {
- err = posix_acl_valid(mnt_userns, acl);
+ err = posix_acl_valid(&init_user_ns, acl);
if (err)
goto release_and_out;
}
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index b45fea69fff3..0fbcb590af84 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -460,9 +460,12 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
* of the POSIX ACLs retrieved from the lower layer to this function to not
* alter the POSIX ACLs for the underlying filesystem.
*/
-static void ovl_idmap_posix_acl(struct user_namespace *mnt_userns,
+static void ovl_idmap_posix_acl(struct inode *realinode,
+ struct user_namespace *mnt_userns,
struct posix_acl *acl)
{
+ struct user_namespace *fs_userns = i_user_ns(realinode);
+
for (unsigned int i = 0; i < acl->a_count; i++) {
vfsuid_t vfsuid;
vfsgid_t vfsgid;
@@ -470,11 +473,11 @@ static void ovl_idmap_posix_acl(struct user_namespace *mnt_userns,
struct posix_acl_entry *e = &acl->a_entries[i];
switch (e->e_tag) {
case ACL_USER:
- vfsuid = make_vfsuid(mnt_userns, &init_user_ns, e->e_uid);
+ vfsuid = make_vfsuid(mnt_userns, fs_userns, e->e_uid);
e->e_uid = vfsuid_into_kuid(vfsuid);
break;
case ACL_GROUP:
- vfsgid = make_vfsgid(mnt_userns, &init_user_ns, e->e_gid);
+ vfsgid = make_vfsgid(mnt_userns, fs_userns, e->e_gid);
e->e_gid = vfsgid_into_kgid(vfsgid);
break;
}
@@ -536,7 +539,7 @@ struct posix_acl *ovl_get_acl(struct inode *inode, int type, bool rcu)
if (!clone)
clone = ERR_PTR(-ENOMEM);
else
- ovl_idmap_posix_acl(mnt_user_ns(realpath.mnt), clone);
+ ovl_idmap_posix_acl(realinode, mnt_user_ns(realpath.mnt), clone);
/*
* Since we're not in RCU path walk we always need to release the
* original ACLs.
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 1d17d7b13dcd..5af33800743e 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -361,6 +361,7 @@ posix_acl_permission(struct user_namespace *mnt_userns, struct inode *inode,
const struct posix_acl *acl, int want)
{
const struct posix_acl_entry *pa, *pe, *mask_obj;
+ struct user_namespace *fs_userns = i_user_ns(inode);
int found = 0;
vfsuid_t vfsuid;
vfsgid_t vfsgid;
@@ -376,7 +377,7 @@ posix_acl_permission(struct user_namespace *mnt_userns, struct inode *inode,
goto check_perm;
break;
case ACL_USER:
- vfsuid = make_vfsuid(mnt_userns, &init_user_ns,
+ vfsuid = make_vfsuid(mnt_userns, fs_userns,
pa->e_uid);
if (vfsuid_eq_kuid(vfsuid, current_fsuid()))
goto mask;
@@ -390,7 +391,7 @@ posix_acl_permission(struct user_namespace *mnt_userns, struct inode *inode,
}
break;
case ACL_GROUP:
- vfsgid = make_vfsgid(mnt_userns, &init_user_ns,
+ vfsgid = make_vfsgid(mnt_userns, fs_userns,
pa->e_gid);
if (vfsgid_in_group_p(vfsgid)) {
found = 1;
@@ -736,6 +737,7 @@ void posix_acl_getxattr_idmapped_mnt(struct user_namespace *mnt_userns,
{
struct posix_acl_xattr_header *header = value;
struct posix_acl_xattr_entry *entry = (void *)(header + 1), *end;
+ struct user_namespace *fs_userns = i_user_ns(inode);
int count;
vfsuid_t vfsuid;
vfsgid_t vfsgid;
@@ -753,13 +755,13 @@ void posix_acl_getxattr_idmapped_mnt(struct user_namespace *mnt_userns,
switch (le16_to_cpu(entry->e_tag)) {
case ACL_USER:
uid = make_kuid(&init_user_ns, le32_to_cpu(entry->e_id));
- vfsuid = make_vfsuid(mnt_userns, &init_user_ns, uid);
+ vfsuid = make_vfsuid(mnt_userns, fs_userns, uid);
entry->e_id = cpu_to_le32(from_kuid(&init_user_ns,
vfsuid_into_kuid(vfsuid)));
break;
case ACL_GROUP:
gid = make_kgid(&init_user_ns, le32_to_cpu(entry->e_id));
- vfsgid = make_vfsgid(mnt_userns, &init_user_ns, gid);
+ vfsgid = make_vfsgid(mnt_userns, fs_userns, gid);
entry->e_id = cpu_to_le32(from_kgid(&init_user_ns,
vfsgid_into_kgid(vfsgid)));
break;
@@ -775,6 +777,7 @@ void posix_acl_setxattr_idmapped_mnt(struct user_namespace *mnt_userns,
{
struct posix_acl_xattr_header *header = value;
struct posix_acl_xattr_entry *entry = (void *)(header + 1), *end;
+ struct user_namespace *fs_userns = i_user_ns(inode);
int count;
vfsuid_t vfsuid;
vfsgid_t vfsgid;
@@ -793,13 +796,13 @@ void posix_acl_setxattr_idmapped_mnt(struct user_namespace *mnt_userns,
case ACL_USER:
uid = make_kuid(&init_user_ns, le32_to_cpu(entry->e_id));
vfsuid = VFSUIDT_INIT(uid);
- uid = from_vfsuid(mnt_userns, &init_user_ns, vfsuid);
+ uid = from_vfsuid(mnt_userns, fs_userns, vfsuid);
entry->e_id = cpu_to_le32(from_kuid(&init_user_ns, uid));
break;
case ACL_GROUP:
gid = make_kgid(&init_user_ns, le32_to_cpu(entry->e_id));
vfsgid = VFSGIDT_INIT(gid);
- gid = from_vfsgid(mnt_userns, &init_user_ns, vfsgid);
+ gid = from_vfsgid(mnt_userns, fs_userns, vfsgid);
entry->e_id = cpu_to_le32(from_kgid(&init_user_ns, gid));
break;
default:
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index a3398d0f1927..4e0023643f8b 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -527,10 +527,12 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
struct vm_area_struct *vma = walk->vma;
bool locked = !!(vma->vm_flags & VM_LOCKED);
struct page *page = NULL;
- bool migration = false;
+ bool migration = false, young = false, dirty = false;
if (pte_present(*pte)) {
page = vm_normal_page(vma, addr, *pte);
+ young = pte_young(*pte);
+ dirty = pte_dirty(*pte);
} else if (is_swap_pte(*pte)) {
swp_entry_t swpent = pte_to_swp_entry(*pte);
@@ -560,8 +562,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
if (!page)
return;
- smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte),
- locked, migration);
+ smaps_account(mss, page, false, young, dirty, locked, migration);
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 1c44bf75f916..175de70e3adf 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -1601,6 +1601,10 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
wake_userfault(vma->vm_userfaultfd_ctx.ctx, &range);
}
+ /* Reset ptes for the whole vma range if wr-protected */
+ if (userfaultfd_wp(vma))
+ uffd_wp_range(mm, vma, start, vma_end - start, false);
+
new_flags = vma->vm_flags & ~__VM_UFFD_FLAGS;
prev = vma_merge(mm, prev, start, vma_end, new_flags,
vma->anon_vma, vma->vm_file, vma->vm_pgoff,
diff --git a/include/asm-generic/bitops/generic-non-atomic.h b/include/asm-generic/bitops/generic-non-atomic.h
index 3d5ebd24652b..564a8c675d85 100644
--- a/include/asm-generic/bitops/generic-non-atomic.h
+++ b/include/asm-generic/bitops/generic-non-atomic.h
@@ -4,6 +4,7 @@
#define __ASM_GENERIC_BITOPS_GENERIC_NON_ATOMIC_H
#include <linux/bits.h>
+#include <asm/barrier.h>
#ifndef _LINUX_BITOPS_H
#error only <linux/bitops.h> can be included directly
@@ -127,6 +128,18 @@ generic_test_bit(unsigned long nr, const volatile unsigned long *addr)
return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
}
+/**
+ * generic_test_bit_acquire - Determine, with acquire semantics, whether a bit is set
+ * @nr: bit number to test
+ * @addr: Address to start counting from
+ */
+static __always_inline bool
+generic_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr)
+{
+ unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+ return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1)));
+}
+
/*
* const_*() definitions provide good compile-time optimizations when
* the passed arguments can be resolved at compile time.
@@ -137,6 +150,7 @@ generic_test_bit(unsigned long nr, const volatile unsigned long *addr)
#define const___test_and_set_bit generic___test_and_set_bit
#define const___test_and_clear_bit generic___test_and_clear_bit
#define const___test_and_change_bit generic___test_and_change_bit
+#define const_test_bit_acquire generic_test_bit_acquire
/**
* const_test_bit - Determine whether a bit is set
diff --git a/include/asm-generic/bitops/instrumented-non-atomic.h b/include/asm-generic/bitops/instrumented-non-atomic.h
index 988a3bbfba34..2b238b161a62 100644
--- a/include/asm-generic/bitops/instrumented-non-atomic.h
+++ b/include/asm-generic/bitops/instrumented-non-atomic.h
@@ -142,4 +142,16 @@ _test_bit(unsigned long nr, const volatile unsigned long *addr)
return arch_test_bit(nr, addr);
}
+/**
+ * _test_bit_acquire - Determine, with acquire semantics, whether a bit is set
+ * @nr: bit number to test
+ * @addr: Address to start counting from
+ */
+static __always_inline bool
+_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr)
+{
+ instrument_atomic_read(addr + BIT_WORD(nr), sizeof(long));
+ return arch_test_bit_acquire(nr, addr);
+}
+
#endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H */
diff --git a/include/asm-generic/bitops/non-atomic.h b/include/asm-generic/bitops/non-atomic.h
index 5c37ced343ae..71f8d54a5195 100644
--- a/include/asm-generic/bitops/non-atomic.h
+++ b/include/asm-generic/bitops/non-atomic.h
@@ -13,6 +13,7 @@
#define arch___test_and_change_bit generic___test_and_change_bit
#define arch_test_bit generic_test_bit
+#define arch_test_bit_acquire generic_test_bit_acquire
#include <asm-generic/bitops/non-instrumented-non-atomic.h>
diff --git a/include/asm-generic/bitops/non-instrumented-non-atomic.h b/include/asm-generic/bitops/non-instrumented-non-atomic.h
index bdb9b1ffaee9..0ddc78dfc358 100644
--- a/include/asm-generic/bitops/non-instrumented-non-atomic.h
+++ b/include/asm-generic/bitops/non-instrumented-non-atomic.h
@@ -12,5 +12,6 @@
#define ___test_and_change_bit arch___test_and_change_bit
#define _test_bit arch_test_bit
+#define _test_bit_acquire arch_test_bit_acquire
#endif /* __ASM_GENERIC_BITOPS_NON_INSTRUMENTED_NON_ATOMIC_H */
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index cf9bf65039f2..3b89c64bcfd8 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -59,6 +59,7 @@ extern unsigned long __sw_hweight64(__u64 w);
#define __test_and_clear_bit(nr, addr) bitop(___test_and_clear_bit, nr, addr)
#define __test_and_change_bit(nr, addr) bitop(___test_and_change_bit, nr, addr)
#define test_bit(nr, addr) bitop(_test_bit, nr, addr)
+#define test_bit_acquire(nr, addr) bitop(_test_bit_acquire, nr, addr)
/*
* Include this here because some architectures need generic_ffs/fls in
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index def8b8d30ccc..089c9ade4325 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -156,7 +156,7 @@ static __always_inline int buffer_uptodate(const struct buffer_head *bh)
* make it consistent with folio_test_uptodate
* pairs with smp_mb__before_atomic in set_buffer_uptodate
*/
- return (smp_load_acquire(&bh->b_state) & (1UL << BH_Uptodate)) != 0;
+ return test_bit_acquire(BH_Uptodate, &bh->b_state);
}
#define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK)
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index ed53bfe7c46c..ac5d0515680e 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -734,11 +734,6 @@ static inline struct cgroup *cgroup_parent(struct cgroup *cgrp)
return NULL;
}
-static inline struct psi_group *cgroup_psi(struct cgroup *cgrp)
-{
- return NULL;
-}
-
static inline bool cgroup_psi_enabled(void)
{
return false;
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 96b16fbe1aa4..7b7ce602c808 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -779,6 +779,7 @@ struct mlx5_core_dev {
enum mlx5_device_state state;
/* sync interface state */
struct mutex intf_state_mutex;
+ struct lock_class_key lock_key;
unsigned long intf_state;
struct mlx5_priv priv;
struct mlx5_profile profile;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3bedc449c14d..982f2607180b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2885,7 +2885,6 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
#define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */
#define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */
#define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */
-#define FOLL_COW 0x4000 /* internal GUP flag */
#define FOLL_ANON 0x8000 /* don't do file mappings */
#define FOLL_LONGTERM 0x10000 /* mapping lifetime is indefinite: see below */
#define FOLL_SPLIT_PMD 0x20000 /* split huge pmd before returning */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1a3cb93c3dcc..05d6f3facd5a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -640,9 +640,23 @@ extern int sysctl_devconf_inherit_init_net;
*/
static inline bool net_has_fallback_tunnels(const struct net *net)
{
- return !IS_ENABLED(CONFIG_SYSCTL) ||
- !sysctl_fb_tunnels_only_for_init_net ||
- (net == &init_net && sysctl_fb_tunnels_only_for_init_net == 1);
+#if IS_ENABLED(CONFIG_SYSCTL)
+ int fb_tunnels_only_for_init_net = READ_ONCE(sysctl_fb_tunnels_only_for_init_net);
+
+ return !fb_tunnels_only_for_init_net ||
+ (net_eq(net, &init_net) && fb_tunnels_only_for_init_net == 1);
+#else
+ return true;
+#endif
+}
+
+static inline int net_inherit_devconf(void)
+{
+#if IS_ENABLED(CONFIG_SYSCTL)
+ return READ_ONCE(sysctl_devconf_inherit_init_net);
+#else
+ return 0;
+#endif
}
static inline int netdev_queue_numa_node_read(const struct netdev_queue *q)
diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index a13296d6c7ce..fd533552a062 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -94,10 +94,6 @@ struct ebt_table {
struct ebt_replace_kernel *table;
unsigned int valid_hooks;
rwlock_t lock;
- /* e.g. could be the table explicitly only allows certain
- * matches, targets, ... 0 == let it in */
- int (*check)(const struct ebt_table_info *info,
- unsigned int valid_hooks);
/* the data used by the kernel */
struct ebt_table_info *private;
struct nf_hook_ops *ops;
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index b32ed68e7dc4..7931fa472561 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -83,7 +83,6 @@ struct nfs_open_context {
fmode_t mode;
unsigned long flags;
-#define NFS_CONTEXT_RESEND_WRITES (1)
#define NFS_CONTEXT_BAD (2)
#define NFS_CONTEXT_UNLOCK (3)
#define NFS_CONTEXT_FILE_OPEN (4)
@@ -182,6 +181,7 @@ struct nfs_inode {
/* Regular file */
struct {
atomic_long_t nrequests;
+ atomic_long_t redirtied_pages;
struct nfs_mds_commit_info commit_info;
struct mutex commit_mutex;
};
diff --git a/include/linux/psi.h b/include/linux/psi.h
index 89784763d19e..dd74411ac21d 100644
--- a/include/linux/psi.h
+++ b/include/linux/psi.h
@@ -27,7 +27,7 @@ void psi_memstall_leave(unsigned long *flags);
int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res);
struct psi_trigger *psi_trigger_create(struct psi_group *group,
- char *buf, size_t nbytes, enum psi_res res);
+ char *buf, enum psi_res res);
void psi_trigger_destroy(struct psi_trigger *t);
__poll_t psi_trigger_poll(void **trigger_ptr, struct file *file,
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 1b6c4013f691..ff0b990de83d 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -29,15 +29,10 @@ struct shmem_inode_info {
struct inode vfs_inode;
};
-#define SHMEM_FL_USER_VISIBLE FS_FL_USER_VISIBLE
-#define SHMEM_FL_USER_MODIFIABLE FS_FL_USER_MODIFIABLE
-#define SHMEM_FL_INHERITED FS_FL_USER_MODIFIABLE
-
-/* Flags that are appropriate for regular files (all but dir-specific ones). */
-#define SHMEM_REG_FLMASK (~(FS_DIRSYNC_FL | FS_TOPDIR_FL))
-
-/* Flags that are appropriate for non-directories/regular files. */
-#define SHMEM_OTHER_FLMASK (FS_NODUMP_FL | FS_NOATIME_FL)
+#define SHMEM_FL_USER_VISIBLE FS_FL_USER_VISIBLE
+#define SHMEM_FL_USER_MODIFIABLE \
+ (FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL | FS_NOATIME_FL)
+#define SHMEM_FL_INHERITED (FS_NODUMP_FL | FS_NOATIME_FL)
struct shmem_sb_info {
unsigned long max_blocks; /* How many blocks are allowed */
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index 732b522bacb7..e1b8a915e9e9 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -73,6 +73,8 @@ extern ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long dst_start,
extern int mwriteprotect_range(struct mm_struct *dst_mm,
unsigned long start, unsigned long len,
bool enable_wp, atomic_t *mmap_changing);
+extern void uffd_wp_range(struct mm_struct *dst_mm, struct vm_area_struct *vma,
+ unsigned long start, unsigned long len, bool enable_wp);
/* mm helpers */
static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma,
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 404024486fa5..f3fc36cd2276 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -20,12 +20,19 @@
#define HIGHMEM_ZONE(xx)
#endif
-#define FOR_ALL_ZONES(xx) DMA_ZONE(xx) DMA32_ZONE(xx) xx##_NORMAL, HIGHMEM_ZONE(xx) xx##_MOVABLE
+#ifdef CONFIG_ZONE_DEVICE
+#define DEVICE_ZONE(xx) xx##_DEVICE,
+#else
+#define DEVICE_ZONE(xx)
+#endif
+
+#define FOR_ALL_ZONES(xx) DMA_ZONE(xx) DMA32_ZONE(xx) xx##_NORMAL, \
+ HIGHMEM_ZONE(xx) xx##_MOVABLE, DEVICE_ZONE(xx)
enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
- FOR_ALL_ZONES(PGALLOC),
- FOR_ALL_ZONES(ALLOCSTALL),
- FOR_ALL_ZONES(PGSCAN_SKIP),
+ FOR_ALL_ZONES(PGALLOC)
+ FOR_ALL_ZONES(ALLOCSTALL)
+ FOR_ALL_ZONES(PGSCAN_SKIP)
PGFREE, PGACTIVATE, PGDEACTIVATE, PGLAZYFREE,
PGFAULT, PGMAJFAULT,
PGLAZYFREED,
diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h
index 7dec36aecbd9..7725b7579b78 100644
--- a/include/linux/wait_bit.h
+++ b/include/linux/wait_bit.h
@@ -71,7 +71,7 @@ static inline int
wait_on_bit(unsigned long *word, int bit, unsigned mode)
{
might_sleep();
- if (!test_bit(bit, word))
+ if (!test_bit_acquire(bit, word))
return 0;
return out_of_line_wait_on_bit(word, bit,
bit_wait,
@@ -96,7 +96,7 @@ static inline int
wait_on_bit_io(unsigned long *word, int bit, unsigned mode)
{
might_sleep();
- if (!test_bit(bit, word))
+ if (!test_bit_acquire(bit, word))
return 0;
return out_of_line_wait_on_bit(word, bit,
bit_wait_io,
@@ -123,7 +123,7 @@ wait_on_bit_timeout(unsigned long *word, int bit, unsigned mode,
unsigned long timeout)
{
might_sleep();
- if (!test_bit(bit, word))
+ if (!test_bit_acquire(bit, word))
return 0;
return out_of_line_wait_on_bit_timeout(word, bit,
bit_wait_timeout,
@@ -151,7 +151,7 @@ wait_on_bit_action(unsigned long *word, int bit, wait_bit_action_f *action,
unsigned mode)
{
might_sleep();
- if (!test_bit(bit, word))
+ if (!test_bit_acquire(bit, word))
return 0;
return out_of_line_wait_on_bit(word, bit, action, mode);
}
diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h
index 184105d68294..be2992e6de5d 100644
--- a/include/net/bond_3ad.h
+++ b/include/net/bond_3ad.h
@@ -290,7 +290,7 @@ static inline const char *bond_3ad_churn_desc(churn_state_t state)
}
/* ========== AD Exported functions to the main bonding code ========== */
-void bond_3ad_initialize(struct bonding *bond, u16 tick_resolution);
+void bond_3ad_initialize(struct bonding *bond);
void bond_3ad_bind_slave(struct slave *slave);
void bond_3ad_unbind_slave(struct slave *slave);
void bond_3ad_state_machine_handler(struct work_struct *);
diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index c4898fcbf923..f90f0021f5f2 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -33,7 +33,7 @@ extern unsigned int sysctl_net_busy_poll __read_mostly;
static inline bool net_busy_loop_on(void)
{
- return sysctl_net_busy_poll;
+ return READ_ONCE(sysctl_net_busy_poll);
}
static inline bool sk_can_busy_loop(const struct sock *sk)
diff --git a/include/net/gro.h b/include/net/gro.h
index 867656b0739c..24003dea8fa4 100644
--- a/include/net/gro.h
+++ b/include/net/gro.h
@@ -439,7 +439,7 @@ static inline void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb,
{
list_add_tail(&skb->list, &napi->rx_list);
napi->rx_count += segs;
- if (napi->rx_count >= gro_normal_batch)
+ if (napi->rx_count >= READ_ONCE(gro_normal_batch))
gro_normal_list(napi);
}
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index d5326c44b453..cd982f4a0f50 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -270,6 +270,7 @@ void flow_offload_refresh(struct nf_flowtable *flow_table,
struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table,
struct flow_offload_tuple *tuple);
+void nf_flow_table_gc_run(struct nf_flowtable *flow_table);
void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable,
struct net_device *dev);
void nf_flow_table_cleanup(struct net_device *dev);
@@ -306,6 +307,8 @@ void nf_flow_offload_stats(struct nf_flowtable *flowtable,
struct flow_offload *flow);
void nf_flow_table_offload_flush(struct nf_flowtable *flowtable);
+void nf_flow_table_offload_flush_cleanup(struct nf_flowtable *flowtable);
+
int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
struct net_device *dev,
enum flow_block_command cmd);
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 99aae36c04b9..cdb7db9b0e25 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1652,6 +1652,7 @@ struct nftables_pernet {
struct list_head module_list;
struct list_head notify_list;
struct mutex commit_mutex;
+ u64 table_handle;
unsigned int base_seq;
u8 validate_state;
};
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 1463cfecb56b..9e0b5c8d92ce 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -12,6 +12,10 @@
#include <linux/types.h>
#include <linux/time_types.h>
+#ifdef __cplusplus
+extern "C" {
+#endif
+
/*
* IO submission data structure (Submission Queue Entry)
*/
@@ -661,4 +665,8 @@ struct io_uring_recvmsg_out {
__u32 flags;
};
+#ifdef __cplusplus
+}
+#endif
+
#endif
diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h
index b1f3e6a8f11a..4f84ea7ee14c 100644
--- a/include/uapi/linux/xfrm.h
+++ b/include/uapi/linux/xfrm.h
@@ -296,7 +296,7 @@ enum xfrm_attr_type_t {
XFRMA_ETIMER_THRESH,
XFRMA_SRCADDR, /* xfrm_address_t */
XFRMA_COADDR, /* xfrm_address_t */
- XFRMA_LASTUSED, /* unsigned long */
+ XFRMA_LASTUSED, /* __u64 */
XFRMA_POLICY_TYPE, /* struct xfrm_userpolicy_type */
XFRMA_MIGRATE,
XFRMA_ALG_AEAD, /* struct xfrm_algo_aead */
diff --git a/include/ufs/ufshci.h b/include/ufs/ufshci.h
index f81aa95ffbc4..f525566a0864 100644
--- a/include/ufs/ufshci.h
+++ b/include/ufs/ufshci.h
@@ -135,11 +135,7 @@ static inline u32 ufshci_version(u32 major, u32 minor)
#define UFSHCD_UIC_MASK (UIC_COMMAND_COMPL | UFSHCD_UIC_PWR_MASK)
-#define UFSHCD_ERROR_MASK (UIC_ERROR |\
- DEVICE_FATAL_ERROR |\
- CONTROLLER_FATAL_ERROR |\
- SYSTEM_BUS_FATAL_ERROR |\
- CRYPTO_ENGINE_FATAL_ERROR)
+#define UFSHCD_ERROR_MASK (UIC_ERROR | INT_FATAL_ERRORS)
#define INT_FATAL_ERRORS (DEVICE_FATAL_ERROR |\
CONTROLLER_FATAL_ERROR |\
diff --git a/io_uring/cancel.c b/io_uring/cancel.c
index e4e1dc0325f0..5fc5d3e80fcb 100644
--- a/io_uring/cancel.c
+++ b/io_uring/cancel.c
@@ -218,7 +218,7 @@ static int __io_sync_cancel(struct io_uring_task *tctx,
(cd->flags & IORING_ASYNC_CANCEL_FD_FIXED)) {
unsigned long file_ptr;
- if (unlikely(fd > ctx->nr_user_files))
+ if (unlikely(fd >= ctx->nr_user_files))
return -EBADF;
fd = array_index_nospec(fd, ctx->nr_user_files);
file_ptr = io_fixed_file_slot(&ctx->file_table, fd)->file_ptr;
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index ebfdb2212ec2..77616279000b 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1450,9 +1450,10 @@ int io_req_prep_async(struct io_kiocb *req)
return 0;
if (WARN_ON_ONCE(req_has_async_data(req)))
return -EFAULT;
- if (io_alloc_async_data(req))
- return -EAGAIN;
-
+ if (!io_op_defs[req->opcode].manual_alloc) {
+ if (io_alloc_async_data(req))
+ return -EAGAIN;
+ }
return def->prep_async(req);
}
diff --git a/io_uring/net.c b/io_uring/net.c
index f8cdf1dc3863..0af8a02df580 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -182,6 +182,37 @@ static int io_sendmsg_copy_hdr(struct io_kiocb *req,
&iomsg->free_iov);
}
+int io_sendzc_prep_async(struct io_kiocb *req)
+{
+ struct io_sendzc *zc = io_kiocb_to_cmd(req, struct io_sendzc);
+ struct io_async_msghdr *io;
+ int ret;
+
+ if (!zc->addr || req_has_async_data(req))
+ return 0;
+ if (io_alloc_async_data(req))
+ return -ENOMEM;
+
+ io = req->async_data;
+ ret = move_addr_to_kernel(zc->addr, zc->addr_len, &io->addr);
+ return ret;
+}
+
+static int io_setup_async_addr(struct io_kiocb *req,
+ struct sockaddr_storage *addr,
+ unsigned int issue_flags)
+{
+ struct io_async_msghdr *io;
+
+ if (!addr || req_has_async_data(req))
+ return -EAGAIN;
+ if (io_alloc_async_data(req))
+ return -ENOMEM;
+ io = req->async_data;
+ memcpy(&io->addr, addr, sizeof(io->addr));
+ return -EAGAIN;
+}
+
int io_sendmsg_prep_async(struct io_kiocb *req)
{
int ret;
@@ -944,7 +975,7 @@ static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb,
int io_sendzc(struct io_kiocb *req, unsigned int issue_flags)
{
- struct sockaddr_storage address;
+ struct sockaddr_storage __address, *addr = NULL;
struct io_ring_ctx *ctx = req->ctx;
struct io_sendzc *zc = io_kiocb_to_cmd(req, struct io_sendzc);
struct io_notif_slot *notif_slot;
@@ -978,10 +1009,17 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags)
msg.msg_namelen = 0;
if (zc->addr) {
- ret = move_addr_to_kernel(zc->addr, zc->addr_len, &address);
- if (unlikely(ret < 0))
- return ret;
- msg.msg_name = (struct sockaddr *)&address;
+ if (req_has_async_data(req)) {
+ struct io_async_msghdr *io = req->async_data;
+
+ msg.msg_name = addr = &io->addr;
+ } else {
+ ret = move_addr_to_kernel(zc->addr, zc->addr_len, &__address);
+ if (unlikely(ret < 0))
+ return ret;
+ msg.msg_name = (struct sockaddr *)&__address;
+ addr = &__address;
+ }
msg.msg_namelen = zc->addr_len;
}
@@ -989,7 +1027,7 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags)
ret = io_import_fixed(WRITE, &msg.msg_iter, req->imu,
(u64)(uintptr_t)zc->buf, zc->len);
if (unlikely(ret))
- return ret;
+ return ret;
} else {
ret = import_single_range(WRITE, zc->buf, zc->len, &iov,
&msg.msg_iter);
@@ -1013,16 +1051,18 @@ int io_sendzc(struct io_kiocb *req, unsigned int issue_flags)
if (unlikely(ret < min_ret)) {
if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
- return -EAGAIN;
+ return io_setup_async_addr(req, addr, issue_flags);
+
if (ret > 0 && io_net_retry(sock, msg.msg_flags)) {
zc->len -= ret;
zc->buf += ret;
zc->done_io += ret;
req->flags |= REQ_F_PARTIAL_IO;
- return -EAGAIN;
+ return io_setup_async_addr(req, addr, issue_flags);
}
if (ret == -ERESTARTSYS)
ret = -EINTR;
+ req_set_fail(req);
} else if (zc->flags & IORING_RECVSEND_NOTIF_FLUSH) {
io_notif_slot_flush_submit(notif_slot, 0);
}
diff --git a/io_uring/net.h b/io_uring/net.h
index 7c438d39c089..f91f56c6eeac 100644
--- a/io_uring/net.h
+++ b/io_uring/net.h
@@ -31,6 +31,7 @@ struct io_async_connect {
int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
int io_shutdown(struct io_kiocb *req, unsigned int issue_flags);
+int io_sendzc_prep_async(struct io_kiocb *req);
int io_sendmsg_prep_async(struct io_kiocb *req);
void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req);
int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
diff --git a/io_uring/notif.c b/io_uring/notif.c
index 977736e82c1a..96f076b175e0 100644
--- a/io_uring/notif.c
+++ b/io_uring/notif.c
@@ -73,7 +73,7 @@ struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx,
}
void io_notif_slot_flush(struct io_notif_slot *slot)
- __must_hold(&ctx->uring_lock)
+ __must_hold(&slot->notif->ctx->uring_lock)
{
struct io_kiocb *notif = slot->notif;
struct io_notif_data *nd = io_notif_to_data(notif);
@@ -81,8 +81,10 @@ void io_notif_slot_flush(struct io_notif_slot *slot)
slot->notif = NULL;
/* drop slot's master ref */
- if (refcount_dec_and_test(&nd->uarg.refcnt))
- io_notif_complete(notif);
+ if (refcount_dec_and_test(&nd->uarg.refcnt)) {
+ notif->io_task_work.func = __io_notif_complete_tw;
+ io_req_task_work_add(notif);
+ }
}
__cold int io_notif_unregister(struct io_ring_ctx *ctx)
diff --git a/io_uring/opdef.c b/io_uring/opdef.c
index 72dd2b2d8a9d..41410126c1c6 100644
--- a/io_uring/opdef.c
+++ b/io_uring/opdef.c
@@ -478,13 +478,15 @@ const struct io_op_def io_op_defs[] = {
.pollout = 1,
.audit_skip = 1,
.ioprio = 1,
+ .manual_alloc = 1,
#if defined(CONFIG_NET)
+ .async_size = sizeof(struct io_async_msghdr),
.prep = io_sendzc_prep,
.issue = io_sendzc,
+ .prep_async = io_sendzc_prep_async,
#else
.prep = io_eopnotsupp_prep,
#endif
-
},
};
diff --git a/io_uring/opdef.h b/io_uring/opdef.h
index ece8ed4f96c4..763c6e54e2ee 100644
--- a/io_uring/opdef.h
+++ b/io_uring/opdef.h
@@ -25,6 +25,8 @@ struct io_op_def {
unsigned ioprio : 1;
/* supports iopoll */
unsigned iopoll : 1;
+ /* opcode specific path will handle ->async_data allocation if needed */
+ unsigned manual_alloc : 1;
/* size of async data needed, if any */
unsigned short async_size;
diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
index 8e0cc2d9205e..b9989ae7b957 100644
--- a/io_uring/uring_cmd.c
+++ b/io_uring/uring_cmd.c
@@ -112,7 +112,7 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
if (ret < 0)
req_set_fail(req);
io_req_set_res(req, ret, 0);
- return IOU_OK;
+ return ret;
}
return IOU_ISSUE_SKIP_COMPLETE;
diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c
index 6432a37ac1c9..c565fbf66ac8 100644
--- a/kernel/audit_fsnotify.c
+++ b/kernel/audit_fsnotify.c
@@ -102,6 +102,7 @@ struct audit_fsnotify_mark *audit_alloc_mark(struct audit_krule *krule, char *pa
ret = fsnotify_add_inode_mark(&audit_mark->mark, inode, 0);
if (ret < 0) {
+ audit_mark->path = NULL;
fsnotify_put_mark(&audit_mark->mark);
audit_mark = ERR_PTR(ret);
}
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 2ade21b54dc4..ff6a8099eb2a 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -59,6 +59,7 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
int retval = 0;
mutex_lock(&cgroup_mutex);
+ cpus_read_lock();
percpu_down_write(&cgroup_threadgroup_rwsem);
for_each_root(root) {
struct cgroup *from_cgrp;
@@ -72,6 +73,7 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
break;
}
percpu_up_write(&cgroup_threadgroup_rwsem);
+ cpus_read_unlock();
mutex_unlock(&cgroup_mutex);
return retval;
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index ffaccd6373f1..e4bb5d57f4d1 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -1820,6 +1820,7 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
if (ss->css_rstat_flush) {
list_del_rcu(&css->rstat_css_node);
+ synchronize_rcu();
list_add_rcu(&css->rstat_css_node,
&dcgrp->rstat_css_list);
}
@@ -2370,6 +2371,47 @@ int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
EXPORT_SYMBOL_GPL(task_cgroup_path);
/**
+ * cgroup_attach_lock - Lock for ->attach()
+ * @lock_threadgroup: whether to down_write cgroup_threadgroup_rwsem
+ *
+ * cgroup migration sometimes needs to stabilize threadgroups against forks and
+ * exits by write-locking cgroup_threadgroup_rwsem. However, some ->attach()
+ * implementations (e.g. cpuset), also need to disable CPU hotplug.
+ * Unfortunately, letting ->attach() operations acquire cpus_read_lock() can
+ * lead to deadlocks.
+ *
+ * Bringing up a CPU may involve creating and destroying tasks which requires
+ * read-locking threadgroup_rwsem, so threadgroup_rwsem nests inside
+ * cpus_read_lock(). If we call an ->attach() which acquires the cpus lock while
+ * write-locking threadgroup_rwsem, the locking order is reversed and we end up
+ * waiting for an on-going CPU hotplug operation which in turn is waiting for
+ * the threadgroup_rwsem to be released to create new tasks. For more details:
+ *
+ * http://lkml.kernel.org/r/20220711174629.uehfmqegcwn2lqzu@wubuntu
+ *
+ * Resolve the situation by always acquiring cpus_read_lock() before optionally
+ * write-locking cgroup_threadgroup_rwsem. This allows ->attach() to assume that
+ * CPU hotplug is disabled on entry.
+ */
+static void cgroup_attach_lock(bool lock_threadgroup)
+{
+ cpus_read_lock();
+ if (lock_threadgroup)
+ percpu_down_write(&cgroup_threadgroup_rwsem);
+}
+
+/**
+ * cgroup_attach_unlock - Undo cgroup_attach_lock()
+ * @lock_threadgroup: whether to up_write cgroup_threadgroup_rwsem
+ */
+static void cgroup_attach_unlock(bool lock_threadgroup)
+{
+ if (lock_threadgroup)
+ percpu_up_write(&cgroup_threadgroup_rwsem);
+ cpus_read_unlock();
+}
+
+/**
* cgroup_migrate_add_task - add a migration target task to a migration context
* @task: target task
* @mgctx: target migration context
@@ -2841,8 +2883,7 @@ int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader,
}
struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup,
- bool *locked)
- __acquires(&cgroup_threadgroup_rwsem)
+ bool *threadgroup_locked)
{
struct task_struct *tsk;
pid_t pid;
@@ -2859,12 +2900,8 @@ struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup,
* Therefore, we can skip the global lock.
*/
lockdep_assert_held(&cgroup_mutex);
- if (pid || threadgroup) {
- percpu_down_write(&cgroup_threadgroup_rwsem);
- *locked = true;
- } else {
- *locked = false;
- }
+ *threadgroup_locked = pid || threadgroup;
+ cgroup_attach_lock(*threadgroup_locked);
rcu_read_lock();
if (pid) {
@@ -2895,17 +2932,14 @@ struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup,
goto out_unlock_rcu;
out_unlock_threadgroup:
- if (*locked) {
- percpu_up_write(&cgroup_threadgroup_rwsem);
- *locked = false;
- }
+ cgroup_attach_unlock(*threadgroup_locked);
+ *threadgroup_locked = false;
out_unlock_rcu:
rcu_read_unlock();
return tsk;
}
-void cgroup_procs_write_finish(struct task_struct *task, bool locked)
- __releases(&cgroup_threadgroup_rwsem)
+void cgroup_procs_write_finish(struct task_struct *task, bool threadgroup_locked)
{
struct cgroup_subsys *ss;
int ssid;
@@ -2913,8 +2947,8 @@ void cgroup_procs_write_finish(struct task_struct *task, bool locked)
/* release reference from cgroup_procs_write_start() */
put_task_struct(task);
- if (locked)
- percpu_up_write(&cgroup_threadgroup_rwsem);
+ cgroup_attach_unlock(threadgroup_locked);
+
for_each_subsys(ss, ssid)
if (ss->post_attach)
ss->post_attach();
@@ -3000,8 +3034,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
* write-locking can be skipped safely.
*/
has_tasks = !list_empty(&mgctx.preloaded_src_csets);
- if (has_tasks)
- percpu_down_write(&cgroup_threadgroup_rwsem);
+ cgroup_attach_lock(has_tasks);
/* NULL dst indicates self on default hierarchy */
ret = cgroup_migrate_prepare_dst(&mgctx);
@@ -3022,8 +3055,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
ret = cgroup_migrate_execute(&mgctx);
out_finish:
cgroup_migrate_finish(&mgctx);
- if (has_tasks)
- percpu_up_write(&cgroup_threadgroup_rwsem);
+ cgroup_attach_unlock(has_tasks);
return ret;
}
@@ -3698,7 +3730,7 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf,
}
psi = cgroup_ino(cgrp) == 1 ? &psi_system : cgrp->psi;
- new = psi_trigger_create(psi, buf, nbytes, res);
+ new = psi_trigger_create(psi, buf, res);
if (IS_ERR(new)) {
cgroup_put(cgrp);
return PTR_ERR(new);
@@ -4971,13 +5003,13 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
struct task_struct *task;
const struct cred *saved_cred;
ssize_t ret;
- bool locked;
+ bool threadgroup_locked;
dst_cgrp = cgroup_kn_lock_live(of->kn, false);
if (!dst_cgrp)
return -ENODEV;
- task = cgroup_procs_write_start(buf, threadgroup, &locked);
+ task = cgroup_procs_write_start(buf, threadgroup, &threadgroup_locked);
ret = PTR_ERR_OR_ZERO(task);
if (ret)
goto out_unlock;
@@ -5003,7 +5035,7 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
ret = cgroup_attach_task(dst_cgrp, task, threadgroup);
out_finish:
- cgroup_procs_write_finish(task, locked);
+ cgroup_procs_write_finish(task, threadgroup_locked);
out_unlock:
cgroup_kn_unlock(of->kn);
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 58aadfda9b8b..1f3a55297f39 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -2289,7 +2289,7 @@ static void cpuset_attach(struct cgroup_taskset *tset)
cgroup_taskset_first(tset, &css);
cs = css_cs(css);
- cpus_read_lock();
+ lockdep_assert_cpus_held(); /* see cgroup_attach_lock() */
percpu_down_write(&cpuset_rwsem);
guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
@@ -2343,7 +2343,6 @@ static void cpuset_attach(struct cgroup_taskset *tset)
wake_up(&cpuset_attach_wq);
percpu_up_write(&cpuset_rwsem);
- cpus_read_unlock();
}
/* The various types of files and directories in a cpuset file system */
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 80697e5e03e4..08350e35aba2 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1707,11 +1707,12 @@ static struct kprobe *__disable_kprobe(struct kprobe *p)
/* Try to disarm and disable this/parent probe */
if (p == orig_p || aggr_kprobe_disabled(orig_p)) {
/*
- * If 'kprobes_all_disarmed' is set, 'orig_p'
- * should have already been disarmed, so
- * skip unneed disarming process.
+ * Don't be lazy here. Even if 'kprobes_all_disarmed'
+ * is false, 'orig_p' might not have been armed yet.
+ * Note arm_all_kprobes() __tries__ to arm all kprobes
+ * on the best effort basis.
*/
- if (!kprobes_all_disarmed) {
+ if (!kprobes_all_disarmed && !kprobe_disabled(orig_p)) {
ret = disarm_kprobe(orig_p, true);
if (ret) {
p->flags &= ~KPROBE_FLAG_DISABLED;
diff --git a/kernel/module/main.c b/kernel/module/main.c
index 6a477c622544..a4e4d84b6f4e 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -2099,7 +2099,7 @@ static int find_module_sections(struct module *mod, struct load_info *info)
sizeof(*mod->static_call_sites),
&mod->num_static_call_sites);
#endif
-#ifdef CONFIG_KUNIT
+#if IS_ENABLED(CONFIG_KUNIT)
mod->kunit_suites = section_objs(info, ".kunit_test_suites",
sizeof(*mod->kunit_suites),
&mod->num_kunit_suites);
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index ec66b40bdd40..ecb4b4ff4ce0 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -190,12 +190,8 @@ static void group_init(struct psi_group *group)
/* Init trigger-related members */
mutex_init(&group->trigger_lock);
INIT_LIST_HEAD(&group->triggers);
- memset(group->nr_triggers, 0, sizeof(group->nr_triggers));
- group->poll_states = 0;
group->poll_min_period = U32_MAX;
- memset(group->polling_total, 0, sizeof(group->polling_total));
group->polling_next_update = ULLONG_MAX;
- group->polling_until = 0;
init_waitqueue_head(&group->poll_wait);
timer_setup(&group->poll_timer, poll_timer_fn, 0);
rcu_assign_pointer(group->poll_task, NULL);
@@ -957,7 +953,7 @@ int psi_cgroup_alloc(struct cgroup *cgroup)
if (static_branch_likely(&psi_disabled))
return 0;
- cgroup->psi = kmalloc(sizeof(struct psi_group), GFP_KERNEL);
+ cgroup->psi = kzalloc(sizeof(struct psi_group), GFP_KERNEL);
if (!cgroup->psi)
return -ENOMEM;
@@ -1091,7 +1087,7 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res)
}
struct psi_trigger *psi_trigger_create(struct psi_group *group,
- char *buf, size_t nbytes, enum psi_res res)
+ char *buf, enum psi_res res)
{
struct psi_trigger *t;
enum psi_states state;
@@ -1320,7 +1316,7 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf,
return -EBUSY;
}
- new = psi_trigger_create(&psi_system, buf, nbytes, res);
+ new = psi_trigger_create(&psi_system, buf, res);
if (IS_ERR(new)) {
mutex_unlock(&seq->lock);
return PTR_ERR(new);
diff --git a/kernel/sched/wait_bit.c b/kernel/sched/wait_bit.c
index d4788f810b55..0b1cd985dc27 100644
--- a/kernel/sched/wait_bit.c
+++ b/kernel/sched/wait_bit.c
@@ -47,7 +47,7 @@ __wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_
prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode);
if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags))
ret = (*action)(&wbq_entry->key, mode);
- } while (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret);
+ } while (test_bit_acquire(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret);
finish_wait(wq_head, &wbq_entry->wq_entry);
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index a492f159624f..860b2dcf3ac4 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -277,6 +277,7 @@ COND_SYSCALL(landlock_restrict_self);
/* mm/fadvise.c */
COND_SYSCALL(fadvise64_64);
+COND_SYSCALL_COMPAT(fadvise64_64);
/* mm/, CONFIG_MMU only */
COND_SYSCALL(swapon);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 126c769d36c3..439e2ab6905e 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1861,8 +1861,6 @@ static void ftrace_hash_rec_enable_modify(struct ftrace_ops *ops,
ftrace_hash_rec_update_modify(ops, filter_hash, 1);
}
-static bool ops_references_ip(struct ftrace_ops *ops, unsigned long ip);
-
/*
* Try to update IPMODIFY flag on each ftrace_rec. Return 0 if it is OK
* or no-needed to update, -EBUSY if it detects a conflict of the flag
@@ -3118,49 +3116,6 @@ static inline int ops_traces_mod(struct ftrace_ops *ops)
ftrace_hash_empty(ops->func_hash->notrace_hash);
}
-/*
- * Check if the current ops references the given ip.
- *
- * If the ops traces all functions, then it was already accounted for.
- * If the ops does not trace the current record function, skip it.
- * If the ops ignores the function via notrace filter, skip it.
- */
-static bool
-ops_references_ip(struct ftrace_ops *ops, unsigned long ip)
-{
- /* If ops isn't enabled, ignore it */
- if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
- return false;
-
- /* If ops traces all then it includes this function */
- if (ops_traces_mod(ops))
- return true;
-
- /* The function must be in the filter */
- if (!ftrace_hash_empty(ops->func_hash->filter_hash) &&
- !__ftrace_lookup_ip(ops->func_hash->filter_hash, ip))
- return false;
-
- /* If in notrace hash, we ignore it too */
- if (ftrace_lookup_ip(ops->func_hash->notrace_hash, ip))
- return false;
-
- return true;
-}
-
-/*
- * Check if the current ops references the record.
- *
- * If the ops traces all functions, then it was already accounted for.
- * If the ops does not trace the current record function, skip it.
- * If the ops ignores the function via notrace filter, skip it.
- */
-static bool
-ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec)
-{
- return ops_references_ip(ops, rec->ip);
-}
-
static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs)
{
bool init_nop = ftrace_need_init_nop();
@@ -6822,6 +6777,38 @@ static int ftrace_get_trampoline_kallsym(unsigned int symnum,
return -ERANGE;
}
+#if defined(CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS) || defined(CONFIG_MODULES)
+/*
+ * Check if the current ops references the given ip.
+ *
+ * If the ops traces all functions, then it was already accounted for.
+ * If the ops does not trace the current record function, skip it.
+ * If the ops ignores the function via notrace filter, skip it.
+ */
+static bool
+ops_references_ip(struct ftrace_ops *ops, unsigned long ip)
+{
+ /* If ops isn't enabled, ignore it */
+ if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
+ return false;
+
+ /* If ops traces all then it includes this function */
+ if (ops_traces_mod(ops))
+ return true;
+
+ /* The function must be in the filter */
+ if (!ftrace_hash_empty(ops->func_hash->filter_hash) &&
+ !__ftrace_lookup_ip(ops->func_hash->filter_hash, ip))
+ return false;
+
+ /* If in notrace hash, we ignore it too */
+ if (ftrace_lookup_ip(ops->func_hash->notrace_hash, ip))
+ return false;
+
+ return true;
+}
+#endif
+
#ifdef CONFIG_MODULES
#define next_to_ftrace_page(p) container_of(p, struct ftrace_page, next)
@@ -6834,7 +6821,7 @@ static int referenced_filters(struct dyn_ftrace *rec)
int cnt = 0;
for (ops = ftrace_ops_list; ops != &ftrace_list_end; ops = ops->next) {
- if (ops_references_rec(ops, rec)) {
+ if (ops_references_ip(ops, rec->ip)) {
if (WARN_ON_ONCE(ops->flags & FTRACE_OPS_FL_DIRECT))
continue;
if (WARN_ON_ONCE(ops->flags & FTRACE_OPS_FL_IPMODIFY))
diff --git a/lib/ratelimit.c b/lib/ratelimit.c
index e01a93f46f83..ce945c17980b 100644
--- a/lib/ratelimit.c
+++ b/lib/ratelimit.c
@@ -26,10 +26,16 @@
*/
int ___ratelimit(struct ratelimit_state *rs, const char *func)
{
+ /* Paired with WRITE_ONCE() in .proc_handler().
+ * Changing two values seperately could be inconsistent
+ * and some message could be lost. (See: net_ratelimit_state).
+ */
+ int interval = READ_ONCE(rs->interval);
+ int burst = READ_ONCE(rs->burst);
unsigned long flags;
int ret;
- if (!rs->interval)
+ if (!interval)
return 1;
/*
@@ -44,7 +50,7 @@ int ___ratelimit(struct ratelimit_state *rs, const char *func)
if (!rs->begin)
rs->begin = jiffies;
- if (time_is_before_jiffies(rs->begin + rs->interval)) {
+ if (time_is_before_jiffies(rs->begin + interval)) {
if (rs->missed) {
if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) {
printk_deferred(KERN_WARNING
@@ -56,7 +62,7 @@ int ___ratelimit(struct ratelimit_state *rs, const char *func)
rs->begin = jiffies;
rs->printed = 0;
}
- if (rs->burst && rs->burst > rs->printed) {
+ if (burst && burst > rs->printed) {
rs->printed++;
ret = 1;
} else {
diff --git a/mm/gup.c b/mm/gup.c
index 732825157430..5abdaf487460 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -478,14 +478,42 @@ static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address,
return -EEXIST;
}
-/*
- * FOLL_FORCE can write to even unwritable pte's, but only
- * after we've gone through a COW cycle and they are dirty.
- */
-static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
+/* FOLL_FORCE can write to even unwritable PTEs in COW mappings. */
+static inline bool can_follow_write_pte(pte_t pte, struct page *page,
+ struct vm_area_struct *vma,
+ unsigned int flags)
{
- return pte_write(pte) ||
- ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte));
+ /* If the pte is writable, we can write to the page. */
+ if (pte_write(pte))
+ return true;
+
+ /* Maybe FOLL_FORCE is set to override it? */
+ if (!(flags & FOLL_FORCE))
+ return false;
+
+ /* But FOLL_FORCE has no effect on shared mappings */
+ if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
+ return false;
+
+ /* ... or read-only private ones */
+ if (!(vma->vm_flags & VM_MAYWRITE))
+ return false;
+
+ /* ... or already writable ones that just need to take a write fault */
+ if (vma->vm_flags & VM_WRITE)
+ return false;
+
+ /*
+ * See can_change_pte_writable(): we broke COW and could map the page
+ * writable if we have an exclusive anonymous page ...
+ */
+ if (!page || !PageAnon(page) || !PageAnonExclusive(page))
+ return false;
+
+ /* ... and a write-fault isn't required for other reasons. */
+ if (vma_soft_dirty_enabled(vma) && !pte_soft_dirty(pte))
+ return false;
+ return !userfaultfd_pte_wp(vma, pte);
}
static struct page *follow_page_pte(struct vm_area_struct *vma,
@@ -528,12 +556,19 @@ retry:
}
if ((flags & FOLL_NUMA) && pte_protnone(pte))
goto no_page;
- if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, flags)) {
- pte_unmap_unlock(ptep, ptl);
- return NULL;
- }
page = vm_normal_page(vma, address, pte);
+
+ /*
+ * We only care about anon pages in can_follow_write_pte() and don't
+ * have to worry about pte_devmap() because they are never anon.
+ */
+ if ((flags & FOLL_WRITE) &&
+ !can_follow_write_pte(pte, page, vma, flags)) {
+ page = NULL;
+ goto out;
+ }
+
if (!page && pte_devmap(pte) && (flags & (FOLL_GET | FOLL_PIN))) {
/*
* Only return device mapping pages in the FOLL_GET or FOLL_PIN
@@ -986,17 +1021,6 @@ static int faultin_page(struct vm_area_struct *vma,
return -EBUSY;
}
- /*
- * The VM_FAULT_WRITE bit tells us that do_wp_page has broken COW when
- * necessary, even if maybe_mkwrite decided not to set pte_write. We
- * can thus safely do subsequent page lookups as if they were reads.
- * But only do so when looping for pte_write is futile: in some cases
- * userspace may also be wanting to write to the gotten user page,
- * which a read fault here might prevent (a readonly page might get
- * reCOWed by userspace write).
- */
- if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE))
- *flags |= FOLL_COW;
return 0;
}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 8a7c1b344abe..e9414ee57c5b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1040,12 +1040,6 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
assert_spin_locked(pmd_lockptr(mm, pmd));
- /*
- * When we COW a devmap PMD entry, we split it into PTEs, so we should
- * not be in this function with `flags & FOLL_COW` set.
- */
- WARN_ONCE(flags & FOLL_COW, "mm: In follow_devmap_pmd with FOLL_COW set");
-
/* FOLL_GET and FOLL_PIN are mutually exclusive. */
if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) ==
(FOLL_PIN | FOLL_GET)))
@@ -1395,14 +1389,42 @@ fallback:
return VM_FAULT_FALLBACK;
}
-/*
- * FOLL_FORCE can write to even unwritable pmd's, but only
- * after we've gone through a COW cycle and they are dirty.
- */
-static inline bool can_follow_write_pmd(pmd_t pmd, unsigned int flags)
+/* FOLL_FORCE can write to even unwritable PMDs in COW mappings. */
+static inline bool can_follow_write_pmd(pmd_t pmd, struct page *page,
+ struct vm_area_struct *vma,
+ unsigned int flags)
{
- return pmd_write(pmd) ||
- ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pmd_dirty(pmd));
+ /* If the pmd is writable, we can write to the page. */
+ if (pmd_write(pmd))
+ return true;
+
+ /* Maybe FOLL_FORCE is set to override it? */
+ if (!(flags & FOLL_FORCE))
+ return false;
+
+ /* But FOLL_FORCE has no effect on shared mappings */
+ if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
+ return false;
+
+ /* ... or read-only private ones */
+ if (!(vma->vm_flags & VM_MAYWRITE))
+ return false;
+
+ /* ... or already writable ones that just need to take a write fault */
+ if (vma->vm_flags & VM_WRITE)
+ return false;
+
+ /*
+ * See can_change_pte_writable(): we broke COW and could map the page
+ * writable if we have an exclusive anonymous page ...
+ */
+ if (!page || !PageAnon(page) || !PageAnonExclusive(page))
+ return false;
+
+ /* ... and a write-fault isn't required for other reasons. */
+ if (vma_soft_dirty_enabled(vma) && !pmd_soft_dirty(pmd))
+ return false;
+ return !userfaultfd_huge_pmd_wp(vma, pmd);
}
struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
@@ -1411,12 +1433,16 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
unsigned int flags)
{
struct mm_struct *mm = vma->vm_mm;
- struct page *page = NULL;
+ struct page *page;
assert_spin_locked(pmd_lockptr(mm, pmd));
- if (flags & FOLL_WRITE && !can_follow_write_pmd(*pmd, flags))
- goto out;
+ page = pmd_page(*pmd);
+ VM_BUG_ON_PAGE(!PageHead(page) && !is_zone_device_page(page), page);
+
+ if ((flags & FOLL_WRITE) &&
+ !can_follow_write_pmd(*pmd, page, vma, flags))
+ return NULL;
/* Avoid dumping huge zero page */
if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd))
@@ -1424,10 +1450,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
/* Full NUMA hinting faults to serialise migration in fault paths */
if ((flags & FOLL_NUMA) && pmd_protnone(*pmd))
- goto out;
-
- page = pmd_page(*pmd);
- VM_BUG_ON_PAGE(!PageHead(page) && !is_zone_device_page(page), page);
+ return NULL;
if (!pmd_write(*pmd) && gup_must_unshare(flags, page))
return ERR_PTR(-EMLINK);
@@ -1444,7 +1467,6 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT;
VM_BUG_ON_PAGE(!PageCompound(page) && !is_zone_device_page(page), page);
-out:
return page;
}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 0aee2f3ae15c..2480ba627aa5 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5241,6 +5241,21 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
VM_BUG_ON(unshare && (flags & FOLL_WRITE));
VM_BUG_ON(!unshare && !(flags & FOLL_WRITE));
+ /*
+ * hugetlb does not support FOLL_FORCE-style write faults that keep the
+ * PTE mapped R/O such as maybe_mkwrite() would do.
+ */
+ if (WARN_ON_ONCE(!unshare && !(vma->vm_flags & VM_WRITE)))
+ return VM_FAULT_SIGSEGV;
+
+ /* Let's take out MAP_SHARED mappings first. */
+ if (vma->vm_flags & VM_MAYSHARE) {
+ if (unlikely(unshare))
+ return 0;
+ set_huge_ptep_writable(vma, haddr, ptep);
+ return 0;
+ }
+
pte = huge_ptep_get(ptep);
old_page = pte_page(pte);
@@ -5781,12 +5796,11 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
* If we are going to COW/unshare the mapping later, we examine the
* pending reservations for this page now. This will ensure that any
* allocations necessary to record that reservation occur outside the
- * spinlock. For private mappings, we also lookup the pagecache
- * page now as it is used to determine if a reservation has been
- * consumed.
+ * spinlock. Also lookup the pagecache page now as it is used to
+ * determine if a reservation has been consumed.
*/
if ((flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) &&
- !huge_pte_write(entry)) {
+ !(vma->vm_flags & VM_MAYSHARE) && !huge_pte_write(entry)) {
if (vma_needs_reservation(h, vma, haddr) < 0) {
ret = VM_FAULT_OOM;
goto out_mutex;
@@ -5794,9 +5808,7 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
/* Just decrements count, does not deallocate */
vma_end_reservation(h, vma, haddr);
- if (!(vma->vm_flags & VM_MAYSHARE))
- pagecache_page = hugetlbfs_pagecache_page(h,
- vma, haddr);
+ pagecache_page = hugetlbfs_pagecache_page(h, vma, haddr);
}
ptl = huge_pte_lock(h, mm, ptep);
diff --git a/mm/mmap.c b/mm/mmap.c
index c035020d0c89..9d780f415be3 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1646,8 +1646,11 @@ int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot)
pgprot_val(vm_pgprot_modify(vm_page_prot, vm_flags)))
return 0;
- /* Do we need to track softdirty? */
- if (vma_soft_dirty_enabled(vma))
+ /*
+ * Do we need to track softdirty? hugetlb does not support softdirty
+ * tracking yet.
+ */
+ if (vma_soft_dirty_enabled(vma) && !is_vm_hugetlb_page(vma))
return 1;
/* Specialty mapping? */
diff --git a/mm/shmem.c b/mm/shmem.c
index 5783f11351bb..d075dd2dcc48 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1659,7 +1659,9 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
new = page_folio(newpage);
mem_cgroup_migrate(old, new);
__inc_lruvec_page_state(newpage, NR_FILE_PAGES);
+ __inc_lruvec_page_state(newpage, NR_SHMEM);
__dec_lruvec_page_state(oldpage, NR_FILE_PAGES);
+ __dec_lruvec_page_state(oldpage, NR_SHMEM);
}
xa_unlock_irq(&swap_mapping->i_pages);
@@ -2281,16 +2283,34 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
return 0;
}
-/* Mask out flags that are inappropriate for the given type of inode. */
-static unsigned shmem_mask_flags(umode_t mode, __u32 flags)
+#ifdef CONFIG_TMPFS_XATTR
+static int shmem_initxattrs(struct inode *, const struct xattr *, void *);
+
+/*
+ * chattr's fsflags are unrelated to extended attributes,
+ * but tmpfs has chosen to enable them under the same config option.
+ */
+static void shmem_set_inode_flags(struct inode *inode, unsigned int fsflags)
+{
+ unsigned int i_flags = 0;
+
+ if (fsflags & FS_NOATIME_FL)
+ i_flags |= S_NOATIME;
+ if (fsflags & FS_APPEND_FL)
+ i_flags |= S_APPEND;
+ if (fsflags & FS_IMMUTABLE_FL)
+ i_flags |= S_IMMUTABLE;
+ /*
+ * But FS_NODUMP_FL does not require any action in i_flags.
+ */
+ inode_set_flags(inode, i_flags, S_NOATIME | S_APPEND | S_IMMUTABLE);
+}
+#else
+static void shmem_set_inode_flags(struct inode *inode, unsigned int fsflags)
{
- if (S_ISDIR(mode))
- return flags;
- else if (S_ISREG(mode))
- return flags & SHMEM_REG_FLMASK;
- else
- return flags & SHMEM_OTHER_FLMASK;
}
+#define shmem_initxattrs NULL
+#endif
static struct inode *shmem_get_inode(struct super_block *sb, struct inode *dir,
umode_t mode, dev_t dev, unsigned long flags)
@@ -2319,7 +2339,8 @@ static struct inode *shmem_get_inode(struct super_block *sb, struct inode *dir,
info->i_crtime = inode->i_mtime;
info->fsflags = (dir == NULL) ? 0 :
SHMEM_I(dir)->fsflags & SHMEM_FL_INHERITED;
- info->fsflags = shmem_mask_flags(mode, info->fsflags);
+ if (info->fsflags)
+ shmem_set_inode_flags(inode, info->fsflags);
INIT_LIST_HEAD(&info->shrinklist);
INIT_LIST_HEAD(&info->swaplist);
simple_xattrs_init(&info->xattrs);
@@ -2468,12 +2489,6 @@ out_unacct_blocks:
static const struct inode_operations shmem_symlink_inode_operations;
static const struct inode_operations shmem_short_symlink_operations;
-#ifdef CONFIG_TMPFS_XATTR
-static int shmem_initxattrs(struct inode *, const struct xattr *, void *);
-#else
-#define shmem_initxattrs NULL
-#endif
-
static int
shmem_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len,
@@ -2826,12 +2841,13 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size)
i_size_write(inode, offset + len);
- inode->i_ctime = current_time(inode);
undone:
spin_lock(&inode->i_lock);
inode->i_private = NULL;
spin_unlock(&inode->i_lock);
out:
+ if (!error)
+ file_modified(file);
inode_unlock(inode);
return error;
}
@@ -3179,18 +3195,13 @@ static int shmem_fileattr_set(struct user_namespace *mnt_userns,
if (fileattr_has_fsx(fa))
return -EOPNOTSUPP;
+ if (fa->flags & ~SHMEM_FL_USER_MODIFIABLE)
+ return -EOPNOTSUPP;
info->fsflags = (info->fsflags & ~SHMEM_FL_USER_MODIFIABLE) |
(fa->flags & SHMEM_FL_USER_MODIFIABLE);
- inode->i_flags &= ~(S_APPEND | S_IMMUTABLE | S_NOATIME);
- if (info->fsflags & FS_APPEND_FL)
- inode->i_flags |= S_APPEND;
- if (info->fsflags & FS_IMMUTABLE_FL)
- inode->i_flags |= S_IMMUTABLE;
- if (info->fsflags & FS_NOATIME_FL)
- inode->i_flags |= S_NOATIME;
-
+ shmem_set_inode_flags(inode, info->fsflags);
inode->i_ctime = current_time(inode);
return 0;
}
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 07d3befc80e4..7327b2573f7c 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -703,14 +703,29 @@ ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long start,
mmap_changing, 0);
}
+void uffd_wp_range(struct mm_struct *dst_mm, struct vm_area_struct *dst_vma,
+ unsigned long start, unsigned long len, bool enable_wp)
+{
+ struct mmu_gather tlb;
+ pgprot_t newprot;
+
+ if (enable_wp)
+ newprot = vm_get_page_prot(dst_vma->vm_flags & ~(VM_WRITE));
+ else
+ newprot = vm_get_page_prot(dst_vma->vm_flags);
+
+ tlb_gather_mmu(&tlb, dst_mm);
+ change_protection(&tlb, dst_vma, start, start + len, newprot,
+ enable_wp ? MM_CP_UFFD_WP : MM_CP_UFFD_WP_RESOLVE);
+ tlb_finish_mmu(&tlb);
+}
+
int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
unsigned long len, bool enable_wp,
atomic_t *mmap_changing)
{
struct vm_area_struct *dst_vma;
unsigned long page_mask;
- struct mmu_gather tlb;
- pgprot_t newprot;
int err;
/*
@@ -750,15 +765,7 @@ int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
goto out_unlock;
}
- if (enable_wp)
- newprot = vm_get_page_prot(dst_vma->vm_flags & ~(VM_WRITE));
- else
- newprot = vm_get_page_prot(dst_vma->vm_flags);
-
- tlb_gather_mmu(&tlb, dst_mm);
- change_protection(&tlb, dst_vma, start, start + len, newprot,
- enable_wp ? MM_CP_UFFD_WP : MM_CP_UFFD_WP_RESOLVE);
- tlb_finish_mmu(&tlb);
+ uffd_wp_range(dst_mm, dst_vma, start, len, enable_wp);
err = 0;
out_unlock:
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 373d2730fcf2..90af9a8572f5 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1168,8 +1168,15 @@ int fragmentation_index(struct zone *zone, unsigned int order)
#define TEXT_FOR_HIGHMEM(xx)
#endif
+#ifdef CONFIG_ZONE_DEVICE
+#define TEXT_FOR_DEVICE(xx) xx "_device",
+#else
+#define TEXT_FOR_DEVICE(xx)
+#endif
+
#define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
- TEXT_FOR_HIGHMEM(xx) xx "_movable",
+ TEXT_FOR_HIGHMEM(xx) xx "_movable", \
+ TEXT_FOR_DEVICE(xx)
const char * const vmstat_text[] = {
/* enum zone_stat_item counters */
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index 1a11064f9990..8f19253024b0 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -36,18 +36,10 @@ static struct ebt_replace_kernel initial_table = {
.entries = (char *)&initial_chain,
};
-static int check(const struct ebt_table_info *info, unsigned int valid_hooks)
-{
- if (valid_hooks & ~(1 << NF_BR_BROUTING))
- return -EINVAL;
- return 0;
-}
-
static const struct ebt_table broute_table = {
.name = "broute",
.table = &initial_table,
.valid_hooks = 1 << NF_BR_BROUTING,
- .check = check,
.me = THIS_MODULE,
};
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index cb949436bc0e..278f324e6752 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -43,18 +43,10 @@ static struct ebt_replace_kernel initial_table = {
.entries = (char *)initial_chains,
};
-static int check(const struct ebt_table_info *info, unsigned int valid_hooks)
-{
- if (valid_hooks & ~FILTER_VALID_HOOKS)
- return -EINVAL;
- return 0;
-}
-
static const struct ebt_table frame_filter = {
.name = "filter",
.table = &initial_table,
.valid_hooks = FILTER_VALID_HOOKS,
- .check = check,
.me = THIS_MODULE,
};
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 5ee0531ae506..9066f7f376d5 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -43,18 +43,10 @@ static struct ebt_replace_kernel initial_table = {
.entries = (char *)initial_chains,
};
-static int check(const struct ebt_table_info *info, unsigned int valid_hooks)
-{
- if (valid_hooks & ~NAT_VALID_HOOKS)
- return -EINVAL;
- return 0;
-}
-
static const struct ebt_table frame_nat = {
.name = "nat",
.table = &initial_table,
.valid_hooks = NAT_VALID_HOOKS,
- .check = check,
.me = THIS_MODULE,
};
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index f2dbefb61ce8..9a0ae59cdc50 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1040,8 +1040,7 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl,
goto free_iterate;
}
- /* the table doesn't like it */
- if (t->check && (ret = t->check(newinfo, repl->valid_hooks)))
+ if (repl->valid_hooks != t->valid_hooks)
goto free_unlock;
if (repl->num_counters && repl->num_counters != t->private->nentries) {
@@ -1231,11 +1230,6 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table,
if (ret != 0)
goto free_chainstack;
- if (table->check && table->check(newinfo, table->valid_hooks)) {
- ret = -EINVAL;
- goto free_chainstack;
- }
-
table->private = newinfo;
rwlock_init(&table->lock);
mutex_lock(&ebt_mutex);
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index 1b7f385643b4..94374d529ea4 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -310,11 +310,12 @@ BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk)
static int bpf_sk_storage_charge(struct bpf_local_storage_map *smap,
void *owner, u32 size)
{
+ int optmem_max = READ_ONCE(sysctl_optmem_max);
struct sock *sk = (struct sock *)owner;
/* same check as in sock_kmalloc() */
- if (size <= sysctl_optmem_max &&
- atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
+ if (size <= optmem_max &&
+ atomic_read(&sk->sk_omem_alloc) + size < optmem_max) {
atomic_add(size, &sk->sk_omem_alloc);
return 0;
}
diff --git a/net/core/dev.c b/net/core/dev.c
index 716df64fcfa5..56c8b0921c9f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4624,7 +4624,7 @@ static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)
struct softnet_data *sd;
unsigned int old_flow, new_flow;
- if (qlen < (netdev_max_backlog >> 1))
+ if (qlen < (READ_ONCE(netdev_max_backlog) >> 1))
return false;
sd = this_cpu_ptr(&softnet_data);
@@ -4672,7 +4672,7 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
if (!netif_running(skb->dev))
goto drop;
qlen = skb_queue_len(&sd->input_pkt_queue);
- if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) {
+ if (qlen <= READ_ONCE(netdev_max_backlog) && !skb_flow_limit(skb, qlen)) {
if (qlen) {
enqueue:
__skb_queue_tail(&sd->input_pkt_queue, skb);
@@ -4928,7 +4928,7 @@ static int netif_rx_internal(struct sk_buff *skb)
{
int ret;
- net_timestamp_check(netdev_tstamp_prequeue, skb);
+ net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
trace_netif_rx(skb);
@@ -5281,7 +5281,7 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
int ret = NET_RX_DROP;
__be16 type;
- net_timestamp_check(!netdev_tstamp_prequeue, skb);
+ net_timestamp_check(!READ_ONCE(netdev_tstamp_prequeue), skb);
trace_netif_receive_skb(skb);
@@ -5664,7 +5664,7 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
{
int ret;
- net_timestamp_check(netdev_tstamp_prequeue, skb);
+ net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
if (skb_defer_rx_timestamp(skb))
return NET_RX_SUCCESS;
@@ -5694,7 +5694,7 @@ void netif_receive_skb_list_internal(struct list_head *head)
INIT_LIST_HEAD(&sublist);
list_for_each_entry_safe(skb, next, head, list) {
- net_timestamp_check(netdev_tstamp_prequeue, skb);
+ net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
skb_list_del_init(skb);
if (!skb_defer_rx_timestamp(skb))
list_add_tail(&skb->list, &sublist);
@@ -5918,7 +5918,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
net_rps_action_and_irq_enable(sd);
}
- napi->weight = dev_rx_weight;
+ napi->weight = READ_ONCE(dev_rx_weight);
while (again) {
struct sk_buff *skb;
@@ -6665,8 +6665,8 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
{
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
unsigned long time_limit = jiffies +
- usecs_to_jiffies(netdev_budget_usecs);
- int budget = netdev_budget;
+ usecs_to_jiffies(READ_ONCE(netdev_budget_usecs));
+ int budget = READ_ONCE(netdev_budget);
LIST_HEAD(list);
LIST_HEAD(repoll);
@@ -10284,7 +10284,7 @@ static struct net_device *netdev_wait_allrefs_any(struct list_head *list)
return dev;
if (time_after(jiffies, warning_time +
- netdev_unregister_timeout_secs * HZ)) {
+ READ_ONCE(netdev_unregister_timeout_secs) * HZ)) {
list_for_each_entry(dev, list, todo_list) {
pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
dev->name, netdev_refcnt_read(dev));
diff --git a/net/core/filter.c b/net/core/filter.c
index e8508aaafd27..c191db80ce93 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1214,10 +1214,11 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp)
{
u32 filter_size = bpf_prog_size(fp->prog->len);
+ int optmem_max = READ_ONCE(sysctl_optmem_max);
/* same check as in sock_kmalloc() */
- if (filter_size <= sysctl_optmem_max &&
- atomic_read(&sk->sk_omem_alloc) + filter_size < sysctl_optmem_max) {
+ if (filter_size <= optmem_max &&
+ atomic_read(&sk->sk_omem_alloc) + filter_size < optmem_max) {
atomic_add(filter_size, &sk->sk_omem_alloc);
return true;
}
@@ -1548,7 +1549,7 @@ int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
if (IS_ERR(prog))
return PTR_ERR(prog);
- if (bpf_prog_size(prog->len) > sysctl_optmem_max)
+ if (bpf_prog_size(prog->len) > READ_ONCE(sysctl_optmem_max))
err = -ENOMEM;
else
err = reuseport_attach_prog(sk, prog);
@@ -1615,7 +1616,7 @@ int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
}
} else {
/* BPF_PROG_TYPE_SOCKET_FILTER */
- if (bpf_prog_size(prog->len) > sysctl_optmem_max) {
+ if (bpf_prog_size(prog->len) > READ_ONCE(sysctl_optmem_max)) {
err = -ENOMEM;
goto err_prog_put;
}
@@ -5034,14 +5035,14 @@ static int __bpf_setsockopt(struct sock *sk, int level, int optname,
/* Only some socketops are supported */
switch (optname) {
case SO_RCVBUF:
- val = min_t(u32, val, sysctl_rmem_max);
+ val = min_t(u32, val, READ_ONCE(sysctl_rmem_max));
val = min_t(int, val, INT_MAX / 2);
sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
WRITE_ONCE(sk->sk_rcvbuf,
max_t(int, val * 2, SOCK_MIN_RCVBUF));
break;
case SO_SNDBUF:
- val = min_t(u32, val, sysctl_wmem_max);
+ val = min_t(u32, val, READ_ONCE(sysctl_wmem_max));
val = min_t(int, val, INT_MAX / 2);
sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
WRITE_ONCE(sk->sk_sndbuf,
diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c
index 541c7a72a28a..21619c70a82b 100644
--- a/net/core/gro_cells.c
+++ b/net/core/gro_cells.c
@@ -26,7 +26,7 @@ int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb)
cell = this_cpu_ptr(gcells->cells);
- if (skb_queue_len(&cell->napi_skbs) > netdev_max_backlog) {
+ if (skb_queue_len(&cell->napi_skbs) > READ_ONCE(netdev_max_backlog)) {
drop:
dev_core_stats_rx_dropped_inc(dev);
kfree_skb(skb);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 5b669eb80270..78cc8fb68814 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -309,14 +309,17 @@ static int neigh_del_timer(struct neighbour *n)
static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net)
{
+ struct sk_buff_head tmp;
unsigned long flags;
struct sk_buff *skb;
+ skb_queue_head_init(&tmp);
spin_lock_irqsave(&list->lock, flags);
skb = skb_peek(list);
while (skb != NULL) {
struct sk_buff *skb_next = skb_peek_next(skb, list);
struct net_device *dev = skb->dev;
+
if (net == NULL || net_eq(dev_net(dev), net)) {
struct in_device *in_dev;
@@ -326,13 +329,16 @@ static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net)
in_dev->arp_parms->qlen--;
rcu_read_unlock();
__skb_unlink(skb, list);
-
- dev_put(dev);
- kfree_skb(skb);
+ __skb_queue_tail(&tmp, skb);
}
skb = skb_next;
}
spin_unlock_irqrestore(&list->lock, flags);
+
+ while ((skb = __skb_dequeue(&tmp))) {
+ dev_put(skb->dev);
+ kfree_skb(skb);
+ }
}
static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 974bbbbe7138..84bb5e188d0d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4205,9 +4205,8 @@ normal:
SKB_GSO_CB(nskb)->csum_start =
skb_headroom(nskb) + doffset;
} else {
- skb_copy_bits(head_skb, offset,
- skb_put(nskb, len),
- len);
+ if (skb_copy_bits(head_skb, offset, skb_put(nskb, len), len))
+ goto err;
}
continue;
}
@@ -4798,7 +4797,7 @@ static bool skb_may_tx_timestamp(struct sock *sk, bool tsonly)
{
bool ret;
- if (likely(sysctl_tstamp_allow_data || tsonly))
+ if (likely(READ_ONCE(sysctl_tstamp_allow_data) || tsonly))
return true;
read_lock_bh(&sk->sk_callback_lock);
diff --git a/net/core/sock.c b/net/core/sock.c
index 4cb957d934a2..788c1372663c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1101,7 +1101,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
* play 'guess the biggest size' games. RCVBUF/SNDBUF
* are treated in BSD as hints
*/
- val = min_t(u32, val, sysctl_wmem_max);
+ val = min_t(u32, val, READ_ONCE(sysctl_wmem_max));
set_sndbuf:
/* Ensure val * 2 fits into an int, to prevent max_t()
* from treating it as a negative value.
@@ -1133,7 +1133,7 @@ set_sndbuf:
* play 'guess the biggest size' games. RCVBUF/SNDBUF
* are treated in BSD as hints
*/
- __sock_set_rcvbuf(sk, min_t(u32, val, sysctl_rmem_max));
+ __sock_set_rcvbuf(sk, min_t(u32, val, READ_ONCE(sysctl_rmem_max)));
break;
case SO_RCVBUFFORCE:
@@ -2536,7 +2536,7 @@ struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
/* small safe race: SKB_TRUESIZE may differ from final skb->truesize */
if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) >
- sysctl_optmem_max)
+ READ_ONCE(sysctl_optmem_max))
return NULL;
skb = alloc_skb(size, priority);
@@ -2554,8 +2554,10 @@ struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
*/
void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
{
- if ((unsigned int)size <= sysctl_optmem_max &&
- atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
+ int optmem_max = READ_ONCE(sysctl_optmem_max);
+
+ if ((unsigned int)size <= optmem_max &&
+ atomic_read(&sk->sk_omem_alloc) + size < optmem_max) {
void *mem;
/* First do the add, to avoid the race if kmalloc
* might sleep.
@@ -3309,8 +3311,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
timer_setup(&sk->sk_timer, NULL, 0);
sk->sk_allocation = GFP_KERNEL;
- sk->sk_rcvbuf = sysctl_rmem_default;
- sk->sk_sndbuf = sysctl_wmem_default;
+ sk->sk_rcvbuf = READ_ONCE(sysctl_rmem_default);
+ sk->sk_sndbuf = READ_ONCE(sysctl_wmem_default);
sk->sk_state = TCP_CLOSE;
sk_set_socket(sk, sock);
@@ -3365,7 +3367,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
#ifdef CONFIG_NET_RX_BUSY_POLL
sk->sk_napi_id = 0;
- sk->sk_ll_usec = sysctl_net_busy_read;
+ sk->sk_ll_usec = READ_ONCE(sysctl_net_busy_read);
#endif
sk->sk_max_pacing_rate = ~0UL;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 71a13596ea2b..725891527814 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -234,14 +234,17 @@ static int set_default_qdisc(struct ctl_table *table, int write,
static int proc_do_dev_weight(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
- int ret;
+ static DEFINE_MUTEX(dev_weight_mutex);
+ int ret, weight;
+ mutex_lock(&dev_weight_mutex);
ret = proc_dointvec(table, write, buffer, lenp, ppos);
- if (ret != 0)
- return ret;
-
- dev_rx_weight = weight_p * dev_weight_rx_bias;
- dev_tx_weight = weight_p * dev_weight_tx_bias;
+ if (!ret && write) {
+ weight = READ_ONCE(weight_p);
+ WRITE_ONCE(dev_rx_weight, weight * dev_weight_rx_bias);
+ WRITE_ONCE(dev_tx_weight, weight * dev_weight_tx_bias);
+ }
+ mutex_unlock(&dev_weight_mutex);
return ret;
}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index ad6a6663feeb..1291c2431d44 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -2484,7 +2484,7 @@ static int dsa_slave_changeupper(struct net_device *dev,
if (!err)
dsa_bridge_mtu_normalization(dp);
if (err == -EOPNOTSUPP) {
- if (!extack->_msg)
+ if (extack && !extack->_msg)
NL_SET_ERR_MSG_MOD(extack,
"Offloading not supported");
err = 0;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 92b778e423df..e8b9a9202fec 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -2682,23 +2682,27 @@ static __net_init int devinet_init_net(struct net *net)
#endif
if (!net_eq(net, &init_net)) {
- if (IS_ENABLED(CONFIG_SYSCTL) &&
- sysctl_devconf_inherit_init_net == 3) {
+ switch (net_inherit_devconf()) {
+ case 3:
/* copy from the current netns */
memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all,
sizeof(ipv4_devconf));
memcpy(dflt,
current->nsproxy->net_ns->ipv4.devconf_dflt,
sizeof(ipv4_devconf_dflt));
- } else if (!IS_ENABLED(CONFIG_SYSCTL) ||
- sysctl_devconf_inherit_init_net != 2) {
- /* inherit == 0 or 1: copy from init_net */
+ break;
+ case 0:
+ case 1:
+ /* copy from init_net */
memcpy(all, init_net.ipv4.devconf_all,
sizeof(ipv4_devconf));
memcpy(dflt, init_net.ipv4.devconf_dflt,
sizeof(ipv4_devconf_dflt));
+ break;
+ case 2:
+ /* use compiled values */
+ break;
}
- /* else inherit == 2: use compiled values */
}
#ifdef CONFIG_SYSCTL
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index d7bd1daf022b..04e2034f2f8e 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1730,7 +1730,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
sk->sk_protocol = ip_hdr(skb)->protocol;
sk->sk_bound_dev_if = arg->bound_dev_if;
- sk->sk_sndbuf = sysctl_wmem_default;
+ sk->sk_sndbuf = READ_ONCE(sysctl_wmem_default);
ipc.sockc.mark = fl4.flowi4_mark;
err = ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base,
len, 0, &ipc, &rt, MSG_DONTWAIT);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index a8a323ecbb54..e49a61a053a6 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -772,7 +772,7 @@ static int ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen)
if (optlen < GROUP_FILTER_SIZE(0))
return -EINVAL;
- if (optlen > sysctl_optmem_max)
+ if (optlen > READ_ONCE(sysctl_optmem_max))
return -ENOBUFS;
gsf = memdup_sockptr(optval, optlen);
@@ -808,7 +808,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
if (optlen < size0)
return -EINVAL;
- if (optlen > sysctl_optmem_max - 4)
+ if (optlen > READ_ONCE(sysctl_optmem_max) - 4)
return -ENOBUFS;
p = kmalloc(optlen + 4, GFP_KERNEL);
@@ -1233,7 +1233,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, int optname,
if (optlen < IP_MSFILTER_SIZE(0))
goto e_inval;
- if (optlen > sysctl_optmem_max) {
+ if (optlen > READ_ONCE(sysctl_optmem_max)) {
err = -ENOBUFS;
break;
}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index bbe218753662..e5011c136fdb 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1000,7 +1000,7 @@ new_segment:
i = skb_shinfo(skb)->nr_frags;
can_coalesce = skb_can_coalesce(skb, i, page, offset);
- if (!can_coalesce && i >= sysctl_max_skb_frags) {
+ if (!can_coalesce && i >= READ_ONCE(sysctl_max_skb_frags)) {
tcp_mark_push(tp, skb);
goto new_segment;
}
@@ -1354,7 +1354,7 @@ new_segment:
if (!skb_can_coalesce(skb, i, pfrag->page,
pfrag->offset)) {
- if (i >= sysctl_max_skb_frags) {
+ if (i >= READ_ONCE(sysctl_max_skb_frags)) {
tcp_mark_push(tp, skb);
goto new_segment;
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 78b654ff421b..290019de766d 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -239,7 +239,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
if (wscale_ok) {
/* Set window scaling on max possible window */
space = max_t(u32, space, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
- space = max_t(u32, space, sysctl_rmem_max);
+ space = max_t(u32, space, READ_ONCE(sysctl_rmem_max));
space = min_t(u32, space, *window_clamp);
*rcv_wscale = clamp_t(int, ilog2(space) - 15,
0, TCP_MAX_WSCALE);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index b624e3d8c5f0..e15f64f22fa8 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -7162,9 +7162,8 @@ static int __net_init addrconf_init_net(struct net *net)
if (!dflt)
goto err_alloc_dflt;
- if (IS_ENABLED(CONFIG_SYSCTL) &&
- !net_eq(net, &init_net)) {
- switch (sysctl_devconf_inherit_init_net) {
+ if (!net_eq(net, &init_net)) {
+ switch (net_inherit_devconf()) {
case 1: /* copy from init_net */
memcpy(all, init_net.ipv6.devconf_all,
sizeof(ipv6_devconf));
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 222f6bf220ba..e0dcc7a193df 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -210,7 +210,7 @@ static int ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
if (optlen < GROUP_FILTER_SIZE(0))
return -EINVAL;
- if (optlen > sysctl_optmem_max)
+ if (optlen > READ_ONCE(sysctl_optmem_max))
return -ENOBUFS;
gsf = memdup_sockptr(optval, optlen);
@@ -244,7 +244,7 @@ static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
if (optlen < size0)
return -EINVAL;
- if (optlen > sysctl_optmem_max - 4)
+ if (optlen > READ_ONCE(sysctl_optmem_max) - 4)
return -ENOBUFS;
p = kmalloc(optlen + 4, GFP_KERNEL);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 7dd3629dd19e..38db0064d661 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -86,7 +86,6 @@ static int nf_ct_frag6_sysctl_register(struct net *net)
table[1].extra2 = &nf_frag->fqdir->high_thresh;
table[2].data = &nf_frag->fqdir->high_thresh;
table[2].extra1 = &nf_frag->fqdir->low_thresh;
- table[2].extra2 = &nf_frag->fqdir->high_thresh;
hdr = register_net_sysctl(net, "net/netfilter", table);
if (hdr == NULL)
diff --git a/net/key/af_key.c b/net/key/af_key.c
index fda2dcc8a383..c85df5b958d2 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1697,9 +1697,12 @@ static int pfkey_register(struct sock *sk, struct sk_buff *skb, const struct sad
pfk->registered |= (1<<hdr->sadb_msg_satype);
}
+ mutex_lock(&pfkey_mutex);
xfrm_probe_algs();
supp_skb = compose_sadb_supported(hdr, GFP_KERNEL | __GFP_ZERO);
+ mutex_unlock(&pfkey_mutex);
+
if (!supp_skb) {
if (hdr->sadb_msg_satype != SADB_SATYPE_UNSPEC)
pfk->registered &= ~(1<<hdr->sadb_msg_satype);
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index da4257504fad..d398f3810662 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1263,7 +1263,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
i = skb_shinfo(skb)->nr_frags;
can_coalesce = skb_can_coalesce(skb, i, dfrag->page, offset);
- if (!can_coalesce && i >= sysctl_max_skb_frags) {
+ if (!can_coalesce && i >= READ_ONCE(sysctl_max_skb_frags)) {
tcp_mark_push(tcp_sk(ssk), skb);
goto alloc_skb;
}
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 9d43277b8b4f..a56fd0b5a430 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1280,12 +1280,12 @@ static void set_sock_size(struct sock *sk, int mode, int val)
lock_sock(sk);
if (mode) {
val = clamp_t(int, val, (SOCK_MIN_SNDBUF + 1) / 2,
- sysctl_wmem_max);
+ READ_ONCE(sysctl_wmem_max));
sk->sk_sndbuf = val * 2;
sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
} else {
val = clamp_t(int, val, (SOCK_MIN_RCVBUF + 1) / 2,
- sysctl_rmem_max);
+ READ_ONCE(sysctl_rmem_max));
sk->sk_rcvbuf = val * 2;
sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
}
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index a63b51dceaf2..a634c72b1ffc 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -655,6 +655,37 @@ static bool tcp_in_window(struct nf_conn *ct,
tn->tcp_be_liberal)
res = true;
if (!res) {
+ bool seq_ok = before(seq, sender->td_maxend + 1);
+
+ if (!seq_ok) {
+ u32 overshot = end - sender->td_maxend + 1;
+ bool ack_ok;
+
+ ack_ok = after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1);
+
+ if (in_recv_win &&
+ ack_ok &&
+ overshot <= receiver->td_maxwin &&
+ before(sack, receiver->td_end + 1)) {
+ /* Work around TCPs that send more bytes than allowed by
+ * the receive window.
+ *
+ * If the (marked as invalid) packet is allowed to pass by
+ * the ruleset and the peer acks this data, then its possible
+ * all future packets will trigger 'ACK is over upper bound' check.
+ *
+ * Thus if only the sequence check fails then do update td_end so
+ * possible ACK for this data can update internal state.
+ */
+ sender->td_end = end;
+ sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
+
+ nf_ct_l4proto_log_invalid(skb, ct, hook_state,
+ "%u bytes more than expected", overshot);
+ return res;
+ }
+ }
+
nf_ct_l4proto_log_invalid(skb, ct, hook_state,
"%s",
before(seq, sender->td_maxend + 1) ?
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 765ac779bfc8..81c26a96c30b 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -437,12 +437,17 @@ static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table,
}
}
+void nf_flow_table_gc_run(struct nf_flowtable *flow_table)
+{
+ nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, NULL);
+}
+
static void nf_flow_offload_work_gc(struct work_struct *work)
{
struct nf_flowtable *flow_table;
flow_table = container_of(work, struct nf_flowtable, gc_work.work);
- nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, NULL);
+ nf_flow_table_gc_run(flow_table);
queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
}
@@ -600,11 +605,11 @@ void nf_flow_table_free(struct nf_flowtable *flow_table)
mutex_unlock(&flowtable_lock);
cancel_delayed_work_sync(&flow_table->gc_work);
- nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
- nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, NULL);
nf_flow_table_offload_flush(flow_table);
- if (nf_flowtable_hw_offload(flow_table))
- nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, NULL);
+ /* ... no more pending work after this stage ... */
+ nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
+ nf_flow_table_gc_run(flow_table);
+ nf_flow_table_offload_flush_cleanup(flow_table);
rhashtable_destroy(&flow_table->rhashtable);
}
EXPORT_SYMBOL_GPL(nf_flow_table_free);
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index 103b6cbf257f..b04645ced89b 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -1074,6 +1074,14 @@ void nf_flow_offload_stats(struct nf_flowtable *flowtable,
flow_offload_queue_work(offload);
}
+void nf_flow_table_offload_flush_cleanup(struct nf_flowtable *flowtable)
+{
+ if (nf_flowtable_hw_offload(flowtable)) {
+ flush_workqueue(nf_flow_offload_del_wq);
+ nf_flow_table_gc_run(flowtable);
+ }
+}
+
void nf_flow_table_offload_flush(struct nf_flowtable *flowtable)
{
if (nf_flowtable_hw_offload(flowtable)) {
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 62cfb0e31c40..2ee50e23c9b7 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -32,7 +32,6 @@ static LIST_HEAD(nf_tables_objects);
static LIST_HEAD(nf_tables_flowtables);
static LIST_HEAD(nf_tables_destroy_list);
static DEFINE_SPINLOCK(nf_tables_destroy_list_lock);
-static u64 table_handle;
enum {
NFT_VALIDATE_SKIP = 0,
@@ -1235,7 +1234,7 @@ static int nf_tables_newtable(struct sk_buff *skb, const struct nfnl_info *info,
INIT_LIST_HEAD(&table->flowtables);
table->family = family;
table->flags = flags;
- table->handle = ++table_handle;
+ table->handle = ++nft_net->table_handle;
if (table->flags & NFT_TABLE_F_OWNER)
table->nlpid = NETLINK_CB(skb).portid;
@@ -2196,9 +2195,9 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
struct netlink_ext_ack *extack)
{
const struct nlattr * const *nla = ctx->nla;
+ struct nft_stats __percpu *stats = NULL;
struct nft_table *table = ctx->table;
struct nft_base_chain *basechain;
- struct nft_stats __percpu *stats;
struct net *net = ctx->net;
char name[NFT_NAME_MAXLEN];
struct nft_rule_blob *blob;
@@ -2236,7 +2235,6 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
return PTR_ERR(stats);
}
rcu_assign_pointer(basechain->stats, stats);
- static_branch_inc(&nft_counters_enabled);
}
err = nft_basechain_init(basechain, family, &hook, flags);
@@ -2319,6 +2317,9 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
goto err_unregister_hook;
}
+ if (stats)
+ static_branch_inc(&nft_counters_enabled);
+
table->use++;
return 0;
@@ -2574,6 +2575,9 @@ static int nf_tables_newchain(struct sk_buff *skb, const struct nfnl_info *info,
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, chain, nla);
if (chain != NULL) {
+ if (chain->flags & NFT_CHAIN_BINDING)
+ return -EINVAL;
+
if (info->nlh->nlmsg_flags & NLM_F_EXCL) {
NL_SET_BAD_ATTR(extack, attr);
return -EEXIST;
@@ -9707,6 +9711,8 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
return PTR_ERR(chain);
if (nft_is_base_chain(chain))
return -EOPNOTSUPP;
+ if (nft_chain_is_bound(chain))
+ return -EINVAL;
if (desc->flags & NFT_DATA_DESC_SETELEM &&
chain->flags & NFT_CHAIN_BINDING)
return -EINVAL;
diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c
index 0053a697c931..89342ccccdcc 100644
--- a/net/netfilter/nft_osf.c
+++ b/net/netfilter/nft_osf.c
@@ -115,9 +115,21 @@ static int nft_osf_validate(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nft_data **data)
{
- return nft_chain_validate_hooks(ctx->chain, (1 << NF_INET_LOCAL_IN) |
- (1 << NF_INET_PRE_ROUTING) |
- (1 << NF_INET_FORWARD));
+ unsigned int hooks;
+
+ switch (ctx->family) {
+ case NFPROTO_IPV4:
+ case NFPROTO_IPV6:
+ case NFPROTO_INET:
+ hooks = (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_FORWARD);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return nft_chain_validate_hooks(ctx->chain, hooks);
}
static bool nft_osf_reduce(struct nft_regs_track *track,
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 2e7ac007cb30..eb0e40c29712 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -740,17 +740,23 @@ static int nft_payload_set_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_payload_set *priv = nft_expr_priv(expr);
+ u32 csum_offset, csum_type = NFT_PAYLOAD_CSUM_NONE;
+ int err;
priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE]));
priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
if (tb[NFTA_PAYLOAD_CSUM_TYPE])
- priv->csum_type =
- ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_TYPE]));
- if (tb[NFTA_PAYLOAD_CSUM_OFFSET])
- priv->csum_offset =
- ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_OFFSET]));
+ csum_type = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_TYPE]));
+ if (tb[NFTA_PAYLOAD_CSUM_OFFSET]) {
+ err = nft_parse_u32_check(tb[NFTA_PAYLOAD_CSUM_OFFSET], U8_MAX,
+ &csum_offset);
+ if (err < 0)
+ return err;
+
+ priv->csum_offset = csum_offset;
+ }
if (tb[NFTA_PAYLOAD_CSUM_FLAGS]) {
u32 flags;
@@ -761,7 +767,7 @@ static int nft_payload_set_init(const struct nft_ctx *ctx,
priv->csum_flags = flags;
}
- switch (priv->csum_type) {
+ switch (csum_type) {
case NFT_PAYLOAD_CSUM_NONE:
case NFT_PAYLOAD_CSUM_INET:
break;
@@ -775,6 +781,7 @@ static int nft_payload_set_init(const struct nft_ctx *ctx,
default:
return -EOPNOTSUPP;
}
+ priv->csum_type = csum_type;
return nft_parse_register_load(tb[NFTA_PAYLOAD_SREG], &priv->sreg,
priv->len);
@@ -833,6 +840,7 @@ nft_payload_select_ops(const struct nft_ctx *ctx,
{
enum nft_payload_bases base;
unsigned int offset, len;
+ int err;
if (tb[NFTA_PAYLOAD_BASE] == NULL ||
tb[NFTA_PAYLOAD_OFFSET] == NULL ||
@@ -859,8 +867,13 @@ nft_payload_select_ops(const struct nft_ctx *ctx,
if (tb[NFTA_PAYLOAD_DREG] == NULL)
return ERR_PTR(-EINVAL);
- offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
- len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
+ err = nft_parse_u32_check(tb[NFTA_PAYLOAD_OFFSET], U8_MAX, &offset);
+ if (err < 0)
+ return ERR_PTR(err);
+
+ err = nft_parse_u32_check(tb[NFTA_PAYLOAD_LEN], U8_MAX, &len);
+ if (err < 0)
+ return ERR_PTR(err);
if (len <= 4 && is_power_of_2(len) && IS_ALIGNED(offset, len) &&
base != NFT_PAYLOAD_LL_HEADER && base != NFT_PAYLOAD_INNER_HEADER)
diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c
index 68b2eed742df..62da25ad264b 100644
--- a/net/netfilter/nft_tproxy.c
+++ b/net/netfilter/nft_tproxy.c
@@ -312,6 +312,13 @@ static int nft_tproxy_dump(struct sk_buff *skb,
return 0;
}
+static int nft_tproxy_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **data)
+{
+ return nft_chain_validate_hooks(ctx->chain, 1 << NF_INET_PRE_ROUTING);
+}
+
static struct nft_expr_type nft_tproxy_type;
static const struct nft_expr_ops nft_tproxy_ops = {
.type = &nft_tproxy_type,
@@ -321,6 +328,7 @@ static const struct nft_expr_ops nft_tproxy_ops = {
.destroy = nft_tproxy_destroy,
.dump = nft_tproxy_dump,
.reduce = NFT_REDUCE_READONLY,
+ .validate = nft_tproxy_validate,
};
static struct nft_expr_type nft_tproxy_type __read_mostly = {
diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c
index 5edaaded706d..983ade4be3b3 100644
--- a/net/netfilter/nft_tunnel.c
+++ b/net/netfilter/nft_tunnel.c
@@ -161,6 +161,7 @@ static const struct nft_expr_ops nft_tunnel_get_ops = {
static struct nft_expr_type nft_tunnel_type __read_mostly = {
.name = "tunnel",
+ .family = NFPROTO_NETDEV,
.ops = &nft_tunnel_get_ops,
.policy = nft_tunnel_policy,
.maxattr = NFTA_TUNNEL_MAX,
diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c
index 11c45c8c6c16..036d92c0ad79 100644
--- a/net/rose/rose_loopback.c
+++ b/net/rose/rose_loopback.c
@@ -96,7 +96,8 @@ static void rose_loopback_timer(struct timer_list *unused)
}
if (frametype == ROSE_CALL_REQUEST) {
- if (!rose_loopback_neigh->dev) {
+ if (!rose_loopback_neigh->dev &&
+ !rose_loopback_neigh->loopback) {
kfree_skb(skb);
continue;
}
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 84d0a4109645..6401cdf7a624 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -285,8 +285,10 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
_enter("%p,%lx", rx, p->user_call_ID);
limiter = rxrpc_get_call_slot(p, gfp);
- if (!limiter)
+ if (!limiter) {
+ release_sock(&rx->sk);
return ERR_PTR(-ERESTARTSYS);
+ }
call = rxrpc_alloc_client_call(rx, srx, gfp, debug_id);
if (IS_ERR(call)) {
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 1d38e279e2ef..3c3a626459de 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -51,10 +51,7 @@ static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx,
return sock_intr_errno(*timeo);
trace_rxrpc_transmit(call, rxrpc_transmit_wait);
- mutex_unlock(&call->user_mutex);
*timeo = schedule_timeout(*timeo);
- if (mutex_lock_interruptible(&call->user_mutex) < 0)
- return sock_intr_errno(*timeo);
}
}
@@ -290,37 +287,48 @@ out:
static int rxrpc_send_data(struct rxrpc_sock *rx,
struct rxrpc_call *call,
struct msghdr *msg, size_t len,
- rxrpc_notify_end_tx_t notify_end_tx)
+ rxrpc_notify_end_tx_t notify_end_tx,
+ bool *_dropped_lock)
{
struct rxrpc_skb_priv *sp;
struct sk_buff *skb;
struct sock *sk = &rx->sk;
+ enum rxrpc_call_state state;
long timeo;
- bool more;
- int ret, copied;
+ bool more = msg->msg_flags & MSG_MORE;
+ int ret, copied = 0;
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
/* this should be in poll */
sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
+reload:
+ ret = -EPIPE;
if (sk->sk_shutdown & SEND_SHUTDOWN)
- return -EPIPE;
-
- more = msg->msg_flags & MSG_MORE;
-
+ goto maybe_error;
+ state = READ_ONCE(call->state);
+ ret = -ESHUTDOWN;
+ if (state >= RXRPC_CALL_COMPLETE)
+ goto maybe_error;
+ ret = -EPROTO;
+ if (state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
+ state != RXRPC_CALL_SERVER_ACK_REQUEST &&
+ state != RXRPC_CALL_SERVER_SEND_REPLY)
+ goto maybe_error;
+
+ ret = -EMSGSIZE;
if (call->tx_total_len != -1) {
- if (len > call->tx_total_len)
- return -EMSGSIZE;
- if (!more && len != call->tx_total_len)
- return -EMSGSIZE;
+ if (len - copied > call->tx_total_len)
+ goto maybe_error;
+ if (!more && len - copied != call->tx_total_len)
+ goto maybe_error;
}
skb = call->tx_pending;
call->tx_pending = NULL;
rxrpc_see_skb(skb, rxrpc_skb_seen);
- copied = 0;
do {
/* Check to see if there's a ping ACK to reply to. */
if (call->ackr_reason == RXRPC_ACK_PING_RESPONSE)
@@ -331,16 +339,8 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
_debug("alloc");
- if (!rxrpc_check_tx_space(call, NULL)) {
- ret = -EAGAIN;
- if (msg->msg_flags & MSG_DONTWAIT)
- goto maybe_error;
- ret = rxrpc_wait_for_tx_window(rx, call,
- &timeo,
- msg->msg_flags & MSG_WAITALL);
- if (ret < 0)
- goto maybe_error;
- }
+ if (!rxrpc_check_tx_space(call, NULL))
+ goto wait_for_space;
/* Work out the maximum size of a packet. Assume that
* the security header is going to be in the padded
@@ -468,6 +468,27 @@ maybe_error:
efault:
ret = -EFAULT;
goto out;
+
+wait_for_space:
+ ret = -EAGAIN;
+ if (msg->msg_flags & MSG_DONTWAIT)
+ goto maybe_error;
+ mutex_unlock(&call->user_mutex);
+ *_dropped_lock = true;
+ ret = rxrpc_wait_for_tx_window(rx, call, &timeo,
+ msg->msg_flags & MSG_WAITALL);
+ if (ret < 0)
+ goto maybe_error;
+ if (call->interruptibility == RXRPC_INTERRUPTIBLE) {
+ if (mutex_lock_interruptible(&call->user_mutex) < 0) {
+ ret = sock_intr_errno(timeo);
+ goto maybe_error;
+ }
+ } else {
+ mutex_lock(&call->user_mutex);
+ }
+ *_dropped_lock = false;
+ goto reload;
}
/*
@@ -629,6 +650,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
enum rxrpc_call_state state;
struct rxrpc_call *call;
unsigned long now, j;
+ bool dropped_lock = false;
int ret;
struct rxrpc_send_params p = {
@@ -737,21 +759,13 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
ret = rxrpc_send_abort_packet(call);
} else if (p.command != RXRPC_CMD_SEND_DATA) {
ret = -EINVAL;
- } else if (rxrpc_is_client_call(call) &&
- state != RXRPC_CALL_CLIENT_SEND_REQUEST) {
- /* request phase complete for this client call */
- ret = -EPROTO;
- } else if (rxrpc_is_service_call(call) &&
- state != RXRPC_CALL_SERVER_ACK_REQUEST &&
- state != RXRPC_CALL_SERVER_SEND_REPLY) {
- /* Reply phase not begun or not complete for service call. */
- ret = -EPROTO;
} else {
- ret = rxrpc_send_data(rx, call, msg, len, NULL);
+ ret = rxrpc_send_data(rx, call, msg, len, NULL, &dropped_lock);
}
out_put_unlock:
- mutex_unlock(&call->user_mutex);
+ if (!dropped_lock)
+ mutex_unlock(&call->user_mutex);
error_put:
rxrpc_put_call(call, rxrpc_call_put);
_leave(" = %d", ret);
@@ -779,6 +793,7 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
struct msghdr *msg, size_t len,
rxrpc_notify_end_tx_t notify_end_tx)
{
+ bool dropped_lock = false;
int ret;
_enter("{%d,%s},", call->debug_id, rxrpc_call_states[call->state]);
@@ -796,7 +811,7 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
case RXRPC_CALL_SERVER_ACK_REQUEST:
case RXRPC_CALL_SERVER_SEND_REPLY:
ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len,
- notify_end_tx);
+ notify_end_tx, &dropped_lock);
break;
case RXRPC_CALL_COMPLETE:
read_lock_bh(&call->state_lock);
@@ -810,7 +825,8 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
break;
}
- mutex_unlock(&call->user_mutex);
+ if (!dropped_lock)
+ mutex_unlock(&call->user_mutex);
_leave(" = %d", ret);
return ret;
}
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index d47b9689eba6..99b697ad2b98 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -409,7 +409,7 @@ static inline bool qdisc_restart(struct Qdisc *q, int *packets)
void __qdisc_run(struct Qdisc *q)
{
- int quota = dev_tx_weight;
+ int quota = READ_ONCE(dev_tx_weight);
int packets;
while (qdisc_restart(q, &packets)) {
diff --git a/net/socket.c b/net/socket.c
index 9b27c5e4e5ba..7378375d3a5b 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1801,7 +1801,7 @@ int __sys_listen(int fd, int backlog)
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (sock) {
- somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
+ somaxconn = READ_ONCE(sock_net(sock->sk)->core.sysctl_somaxconn);
if ((unsigned int)backlog > somaxconn)
backlog = somaxconn;
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index b098e707ad41..7d268a291486 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1902,7 +1902,7 @@ call_encode(struct rpc_task *task)
break;
case -EKEYEXPIRED:
if (!task->tk_cred_retry) {
- rpc_exit(task, task->tk_status);
+ rpc_call_rpcerror(task, task->tk_status);
} else {
task->tk_action = call_refresh;
task->tk_cred_retry--;
diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c
index 82d14eea1b5a..974eb97b77d2 100644
--- a/net/xfrm/espintcp.c
+++ b/net/xfrm/espintcp.c
@@ -168,7 +168,7 @@ int espintcp_queue_out(struct sock *sk, struct sk_buff *skb)
{
struct espintcp_ctx *ctx = espintcp_getctx(sk);
- if (skb_queue_len(&ctx->out_queue) >= netdev_max_backlog)
+ if (skb_queue_len(&ctx->out_queue) >= READ_ONCE(netdev_max_backlog))
return -ENOBUFS;
__skb_queue_tail(&ctx->out_queue, skb);
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 144238a50f3d..b2f4ec9c537f 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -669,7 +669,6 @@ resume:
x->curlft.bytes += skb->len;
x->curlft.packets++;
- x->curlft.use_time = ktime_get_real_seconds();
spin_unlock(&x->lock);
@@ -783,7 +782,7 @@ int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb,
trans = this_cpu_ptr(&xfrm_trans_tasklet);
- if (skb_queue_len(&trans->queue) >= netdev_max_backlog)
+ if (skb_queue_len(&trans->queue) >= READ_ONCE(netdev_max_backlog))
return -ENOBUFS;
BUILD_BUG_ON(sizeof(struct xfrm_trans_cb) > sizeof(skb->cb));
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 555ab35cd119..9a5e79a38c67 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -534,7 +534,6 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
x->curlft.bytes += skb->len;
x->curlft.packets++;
- x->curlft.use_time = ktime_get_real_seconds();
spin_unlock_bh(&x->lock);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index f1a0bab920a5..cc6ab79609e2 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -3162,7 +3162,7 @@ ok:
return dst;
nopol:
- if (!(dst_orig->dev->flags & IFF_LOOPBACK) &&
+ if ((!dst_orig->dev || !(dst_orig->dev->flags & IFF_LOOPBACK)) &&
net->xfrm.policy_default[dir] == XFRM_USERPOLICY_BLOCK) {
err = -EPERM;
goto error;
@@ -3599,6 +3599,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
if (pols[1]) {
if (IS_ERR(pols[1])) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
+ xfrm_pol_put(pols[0]);
return 0;
}
pols[1]->curlft.use_time = ktime_get_real_seconds();
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 52e60e607f8a..91c32a3b6924 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1592,6 +1592,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
x->replay = orig->replay;
x->preplay = orig->preplay;
x->mapping_maxage = orig->mapping_maxage;
+ x->lastused = orig->lastused;
x->new_mapping = 0;
x->new_mapping_sport = 0;
diff --git a/tools/include/linux/compiler_types.h b/tools/include/linux/compiler_types.h
index 24ae3054f304..1bdd834bdd57 100644
--- a/tools/include/linux/compiler_types.h
+++ b/tools/include/linux/compiler_types.h
@@ -36,4 +36,8 @@
#include <linux/compiler-gcc.h>
#endif
+#ifndef asm_volatile_goto
+#define asm_volatile_goto(x...) asm goto(x)
+#endif
+
#endif /* __LINUX_COMPILER_TYPES_H */
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 10b34bb03bc1..c2064a35688b 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -12,6 +12,7 @@ TARGETS += cpu-hotplug
TARGETS += damon
TARGETS += drivers/dma-buf
TARGETS += drivers/s390x/uvdevice
+TARGETS += drivers/net/bonding
TARGETS += efivarfs
TARGETS += exec
TARGETS += filesystems
diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile
new file mode 100644
index 000000000000..ab6c54b12098
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for net selftests
+
+TEST_PROGS := bond-break-lacpdu-tx.sh
+
+include ../../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh b/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh
new file mode 100755
index 000000000000..47ab90596acb
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh
@@ -0,0 +1,81 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# Regression Test:
+# Verify LACPDUs get transmitted after setting the MAC address of
+# the bond.
+#
+# https://bugzilla.redhat.com/show_bug.cgi?id=2020773
+#
+# +---------+
+# | fab-br0 |
+# +---------+
+# |
+# +---------+
+# | fbond |
+# +---------+
+# | |
+# +------+ +------+
+# |veth1 | |veth2 |
+# +------+ +------+
+#
+# We use veths instead of physical interfaces
+
+set -e
+tmp=$(mktemp -q dump.XXXXXX)
+cleanup() {
+ ip link del fab-br0 >/dev/null 2>&1 || :
+ ip link del fbond >/dev/null 2>&1 || :
+ ip link del veth1-bond >/dev/null 2>&1 || :
+ ip link del veth2-bond >/dev/null 2>&1 || :
+ modprobe -r bonding >/dev/null 2>&1 || :
+ rm -f -- ${tmp}
+}
+
+trap cleanup 0 1 2
+cleanup
+sleep 1
+
+# create the bridge
+ip link add fab-br0 address 52:54:00:3B:7C:A6 mtu 1500 type bridge \
+ forward_delay 15
+
+# create the bond
+ip link add fbond type bond mode 4 miimon 200 xmit_hash_policy 1 \
+ ad_actor_sys_prio 65535 lacp_rate fast
+
+# set bond address
+ip link set fbond address 52:54:00:3B:7C:A6
+ip link set fbond up
+
+# set again bond sysfs parameters
+ip link set fbond type bond ad_actor_sys_prio 65535
+
+# create veths
+ip link add name veth1-bond type veth peer name veth1-end
+ip link add name veth2-bond type veth peer name veth2-end
+
+# add ports
+ip link set fbond master fab-br0
+ip link set veth1-bond down master fbond
+ip link set veth2-bond down master fbond
+
+# bring up
+ip link set veth1-end up
+ip link set veth2-end up
+ip link set fab-br0 up
+ip link set fbond up
+ip addr add dev fab-br0 10.0.0.3
+
+tcpdump -n -i veth1-end -e ether proto 0x8809 >${tmp} 2>&1 &
+sleep 15
+pkill tcpdump >/dev/null 2>&1
+rc=0
+num=$(grep "packets captured" ${tmp} | awk '{print $1}')
+if test "$num" -gt 0; then
+ echo "PASS, captured ${num}"
+else
+ echo "FAIL"
+ rc=1
+fi
+exit $rc
diff --git a/tools/testing/selftests/drivers/net/bonding/config b/tools/testing/selftests/drivers/net/bonding/config
new file mode 100644
index 000000000000..dc1c22de3c92
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/config
@@ -0,0 +1 @@
+CONFIG_BONDING=y
diff --git a/tools/testing/selftests/drivers/net/bonding/settings b/tools/testing/selftests/drivers/net/bonding/settings
new file mode 100644
index 000000000000..867e118223cd
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/settings
@@ -0,0 +1 @@
+timeout=60
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 947fc72413e9..d44c72b3abe3 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -40,6 +40,7 @@ ifeq (0,$(MAKELEVEL))
endif
endif
selfdir = $(realpath $(dir $(filter %/lib.mk,$(MAKEFILE_LIST))))
+top_srcdir = $(selfdir)/../../..
# The following are built by lib.mk common compile rules.
# TEST_CUSTOM_PROGS should be used by tests that require
diff --git a/tools/testing/selftests/sgx/sigstruct.c b/tools/testing/selftests/sgx/sigstruct.c
index 50c5ab1aa6fa..a07896a46364 100644
--- a/tools/testing/selftests/sgx/sigstruct.c
+++ b/tools/testing/selftests/sgx/sigstruct.c
@@ -17,6 +17,12 @@
#include "defines.h"
#include "main.h"
+/*
+ * FIXME: OpenSSL 3.0 has deprecated some functions. For now just ignore
+ * the warnings.
+ */
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+
struct q1q2_ctx {
BN_CTX *bn_ctx;
BIGNUM *m;