summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/bpf/bpf_design_QA.rst4
-rw-r--r--Documentation/bpf/bpf_devel_QA.rst14
-rw-r--r--Documentation/bpf/cpumasks.rst4
-rw-r--r--Documentation/bpf/instruction-set.rst40
-rw-r--r--Documentation/bpf/kfuncs.rst41
-rw-r--r--Documentation/bpf/maps.rst7
-rw-r--r--Documentation/devicetree/bindings/net/dsa/mediatek,mt7530.yaml6
-rw-r--r--Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml24
-rw-r--r--Documentation/netlink/genetlink-c.yaml2
-rw-r--r--Documentation/netlink/genetlink-legacy.yaml2
-rw-r--r--Documentation/netlink/genetlink.yaml3
-rw-r--r--Documentation/netlink/specs/devlink.yaml198
-rw-r--r--Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst35
-rw-r--r--Documentation/networking/devlink/mlx5.rst12
-rw-r--r--arch/loongarch/net/bpf_jit.c6
-rw-r--r--arch/mips/Kconfig5
-rw-r--r--arch/mips/net/bpf_jit_comp.c4
-rw-r--r--arch/mips/net/bpf_jit_comp64.c3
-rw-r--r--arch/riscv/net/bpf_jit_comp64.c5
-rw-r--r--drivers/net/dsa/hirschmann/hellcreek_ptp.c45
-rw-r--r--drivers/net/dsa/lan9303_i2c.c2
-rw-r--r--drivers/net/dsa/lan9303_mdio.c2
-rw-r--r--drivers/net/dsa/lantiq_gswip.c2
-rw-r--r--drivers/net/dsa/microchip/ksz9477_i2c.c2
-rw-r--r--drivers/net/dsa/microchip/ksz_common.c238
-rw-r--r--drivers/net/dsa/microchip/ksz_common.h18
-rw-r--r--drivers/net/dsa/ocelot/ocelot_ext.c2
-rw-r--r--drivers/net/dsa/ocelot/seville_vsc9953.c2
-rw-r--r--drivers/net/dsa/realtek/rtl8365mb.c40
-rw-r--r--drivers/net/ethernet/Kconfig2
-rw-r--r--drivers/net/ethernet/atheros/alx/main.c4
-rw-r--r--drivers/net/ethernet/broadcom/bnx2.c21
-rw-r--r--drivers/net/ethernet/broadcom/bnx2.h1
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x.h1
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c19
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c4
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c2
-rw-r--r--drivers/net/ethernet/cadence/macb_main.c5
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c4
-rw-r--r--drivers/net/ethernet/emulex/benet/be_cmds.c27
-rw-r--r--drivers/net/ethernet/emulex/benet/be_main.c8
-rw-r--r--drivers/net/ethernet/fungible/funcore/fun_dev.c5
-rw-r--r--drivers/net/ethernet/google/gve/gve.h112
-rw-r--r--drivers/net/ethernet/google/gve/gve_adminq.c8
-rw-r--r--drivers/net/ethernet/google/gve/gve_adminq.h4
-rw-r--r--drivers/net/ethernet/google/gve/gve_ethtool.c91
-rw-r--r--drivers/net/ethernet/google/gve/gve_main.c719
-rw-r--r--drivers/net/ethernet/google/gve/gve_rx.c147
-rw-r--r--drivers/net/ethernet/google/gve/gve_rx_dqo.c2
-rw-r--r--drivers/net/ethernet/google/gve/gve_tx.c298
-rw-r--r--drivers/net/ethernet/google/gve/gve_utils.c6
-rw-r--r--drivers/net/ethernet/google/gve/gve_utils.h3
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.c1
-rw-r--r--drivers/net/ethernet/intel/e1000e/netdev.c1
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_pci.c1
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e.h1
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_ethtool.c7
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c88
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_trace.h20
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c420
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.h20
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf.h1
-rw-r--r--drivers/net/ethernet/intel/ice/ice.h1
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c12
-rw-r--r--drivers/net/ethernet/intel/ice/ice_sriov.c77
-rw-r--r--drivers/net/ethernet/intel/ice/ice_sriov.h15
-rw-r--r--drivers/net/ethernet/intel/ice/ice_type.h17
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vf_lib.c15
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vf_lib.h2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vf_mbx.c249
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vf_mbx.h17
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl.c49
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl.h8
-rw-r--r--drivers/net/ethernet/intel/igb/igb_main.c1
-rw-r--r--drivers/net/ethernet/intel/igc/igc.h4
-rw-r--r--drivers/net/ethernet/intel/igc/igc_ethtool.c1
-rw-r--r--drivers/net/ethernet/intel/igc/igc_hw.h1
-rw-r--r--drivers/net/ethernet/intel/igc/igc_main.c52
-rw-r--r--drivers/net/ethernet/intel/igc/igc_tsn.c12
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe.h1
-rw-r--r--drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c24
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep/octep_main.c4
-rw-r--r--drivers/net/ethernet/marvell/pxa168_eth.c2
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.c81
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4_en.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/devlink.c71
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/devlink.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h17
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/params.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/port.c157
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/port.h14
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c63
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c50
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c46
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c58
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/health.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/port.c151
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/thermal.c108
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/thermal.h20
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_main.h38
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_police.c13
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_regs.h36
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c221
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_vcap_ag_api.c1402
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_vcap_debugfs.c133
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c192
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_main.c1
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_main.h1
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c209
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_vcap_debugfs.c2
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_vcap_impl.c270
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_vcap_impl.h6
-rw-r--r--drivers/net/ethernet/microchip/vcap/vcap_ag_api.h217
-rw-r--r--drivers/net/ethernet/microchip/vcap/vcap_api.c61
-rw-r--r--drivers/net/ethernet/microchip/vcap/vcap_api_client.h11
-rw-r--r--drivers/net/ethernet/microchip/vcap/vcap_api_debugfs_kunit.c4
-rw-r--r--drivers/net/ethernet/microsoft/mana/mana_en.c62
-rw-r--r--drivers/net/ethernet/microsoft/mana/mana_ethtool.c52
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/conntrack.c260
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/conntrack.h32
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/offload.c2
-rw-r--r--drivers/net/ethernet/ni/nixge.c2
-rw-r--r--drivers/net/ethernet/qlogic/netxen/netxen_nic.h2
-rw-r--r--drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c10
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_main.c9
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede.h2
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_ethtool.c1
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_main.c1
-rw-r--r--drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c1
-rw-r--r--drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c4
-rw-r--r--drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c1
-rw-r--r--drivers/net/ethernet/realtek/r8169_main.c145
-rw-r--r--drivers/net/ethernet/renesas/ravb_main.c15
-rw-r--r--drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c2
-rw-r--r--drivers/net/ethernet/sfc/ef100.c3
-rw-r--r--drivers/net/ethernet/sfc/efx.c5
-rw-r--r--drivers/net/ethernet/sfc/falcon/efx.c9
-rw-r--r--drivers/net/ethernet/sfc/mae.c16
-rw-r--r--drivers/net/ethernet/sfc/mcdi.h5
-rw-r--r--drivers/net/ethernet/sfc/siena/efx.c5
-rw-r--r--drivers/net/ethernet/sfc/tc.c42
-rw-r--r--drivers/net/ethernet/sfc/tc.h4
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c2
-rw-r--r--drivers/net/ethernet/ti/am65-cpsw-nuss.c10
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_hw.c21
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_hw.h1
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_type.h2
-rw-r--r--drivers/net/ethernet/wangxun/ngbe/ngbe_main.c9
-rw-r--r--drivers/net/ethernet/wangxun/ngbe/ngbe_type.h1
-rw-r--r--drivers/net/ethernet/wangxun/txgbe/txgbe_main.c10
-rw-r--r--drivers/net/ethernet/wangxun/txgbe/txgbe_type.h1
-rw-r--r--drivers/net/geneve.c2
-rw-r--r--drivers/net/ieee802154/adf7242.c3
-rw-r--r--drivers/net/ieee802154/at86rf230.c2
-rw-r--r--drivers/net/ieee802154/ca8210.c3
-rw-r--r--drivers/net/ieee802154/mcr20a.c2
-rw-r--r--drivers/net/mdio/of_mdio.c4
-rw-r--r--drivers/net/pcs/pcs-lynx.c4
-rw-r--r--drivers/net/pcs/pcs-xpcs.c13
-rw-r--r--drivers/net/phy/dp83867.c28
-rw-r--r--drivers/net/phy/micrel.c397
-rw-r--r--drivers/net/phy/mxl-gpy.c35
-rw-r--r--drivers/net/phy/phy_device.c4
-rw-r--r--drivers/net/phy/sfp.c48
-rw-r--r--drivers/net/phy/smsc.c33
-rw-r--r--drivers/net/phy/spi_ks8995.c2
-rw-r--r--drivers/net/tap.c15
-rw-r--r--drivers/net/tun.c2
-rw-r--r--drivers/net/virtio_net.c6
-rw-r--r--drivers/net/vxlan/Makefile2
-rw-r--r--drivers/net/vxlan/vxlan_core.c82
-rw-r--r--drivers/net/vxlan/vxlan_mdb.c1462
-rw-r--r--drivers/net/vxlan/vxlan_private.h84
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/Makefile2
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/acpi.c51
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/bus.h1
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c324
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c25
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c118
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.h11
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c49
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.h6
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h157
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c9
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c61
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/cfg/22000.c12
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/rs.h27
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/rx.h86
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/rs.c4
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-config.h1
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-csr.h1
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c26
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/fw.c5
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c10
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c30
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/mvm.h5
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c8
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/rs.c17
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/rx.c8
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c601
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/tx.c7
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/drv.c5
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c5
-rw-r--r--drivers/net/wireless/mac80211_hwsim.c56
-rw-r--r--drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8188e.c12
-rw-r--r--drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c6
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c25
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c6
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c9
-rw-r--r--drivers/net/wireless/realtek/rtw88/mac.c17
-rw-r--r--drivers/net/wireless/realtek/rtw88/rtw8821c.c9
-rw-r--r--drivers/net/wireless/realtek/rtw88/rtw8821c.h6
-rw-r--r--drivers/net/wireless/realtek/rtw88/rtw8822b.c9
-rw-r--r--drivers/net/wireless/realtek/rtw88/rtw8822b.h8
-rw-r--r--drivers/net/wireless/realtek/rtw88/rtw8822c.c9
-rw-r--r--drivers/net/wireless/realtek/rtw88/rtw8822c.h8
-rw-r--r--drivers/net/wireless/realtek/rtw89/core.c35
-rw-r--r--drivers/net/wireless/realtek/rtw89/core.h2
-rw-r--r--drivers/net/wireless/realtek/rtw89/fw.c143
-rw-r--r--drivers/net/wireless/realtek/rtw89/fw.h7
-rw-r--r--drivers/net/wireless/realtek/rtw89/mac.c2
-rw-r--r--drivers/net/wireless/realtek/rtw89/mac80211.c2
-rw-r--r--drivers/net/wireless/realtek/rtw89/phy.c72
-rw-r--r--drivers/net/wireless/realtek/rtw89/phy.h3
-rw-r--r--drivers/net/wireless/realtek/rtw89/rtw8852b.c12
-rw-r--r--drivers/net/wireless/realtek/rtw89/rtw8852c.c74
-rw-r--r--drivers/net/wireless/realtek/rtw89/ser.c5
-rw-r--r--drivers/net/wireless/silabs/wfx/main.c10
-rw-r--r--drivers/nfc/nfcmrvl/i2c.c2
-rw-r--r--drivers/nfc/nfcmrvl/main.c6
-rw-r--r--drivers/nfc/nfcmrvl/nfcmrvl.h30
-rw-r--r--drivers/nfc/nfcmrvl/uart.c11
-rw-r--r--drivers/nfc/trf7970a.c2
-rw-r--r--drivers/ptp/ptp_ines.c2
-rw-r--r--drivers/ptp/ptp_ocp.c1
-rw-r--r--drivers/s390/net/ism_drv.c8
-rw-r--r--fs/dlm/lowcomms.c7
-rw-r--r--include/linux/bpf.h105
-rw-r--r--include/linux/bpf_local_storage.h1
-rw-r--r--include/linux/bpf_mem_alloc.h7
-rw-r--r--include/linux/bpf_verifier.h4
-rw-r--r--include/linux/btf.h2
-rw-r--r--include/linux/btf_ids.h2
-rw-r--r--include/linux/filter.h46
-rw-r--r--include/linux/igmp.h2
-rw-r--r--include/linux/mlx5/driver.h3
-rw-r--r--include/linux/mlx5/mlx5_ifc.h26
-rw-r--r--include/linux/mlx5/port.h16
-rw-r--r--include/linux/netdevice.h39
-rw-r--r--include/linux/netfilter_ipv6.h2
-rw-r--r--include/linux/netlink.h1
-rw-r--r--include/linux/phy.h5
-rw-r--r--include/linux/platform_data/nfcmrvl.h48
-rw-r--r--include/linux/skbuff.h2
-rw-r--r--include/net/addrconf.h2
-rw-r--r--include/net/af_unix.h1
-rw-r--r--include/net/cfg80211.h36
-rw-r--r--include/net/ieee80211_radiotap.h215
-rw-r--r--include/net/inet_sock.h5
-rw-r--r--include/net/mac80211.h92
-rw-r--r--include/net/mana/mana.h18
-rw-r--r--include/net/neighbour.h4
-rw-r--r--include/net/pkt_sched.h1
-rw-r--r--include/net/raw.h2
-rw-r--r--include/net/rawv6.h2
-rw-r--r--include/net/scm.h13
-rw-r--r--include/net/sctp/stream_sched.h2
-rw-r--r--include/net/sctp/structs.h8
-rw-r--r--include/net/smc.h1
-rw-r--r--include/net/vxlan.h6
-rw-r--r--include/net/xdp_sock.h1
-rw-r--r--include/trace/events/sock.h4
-rw-r--r--include/trace/events/tcp.h2
-rw-r--r--include/uapi/linux/bpf.h33
-rw-r--r--include/uapi/linux/if_bridge.h10
-rw-r--r--include/uapi/linux/netfilter/nf_tables.h2
-rw-r--r--include/uapi/linux/nl80211.h37
-rw-r--r--include/uapi/linux/sctp.h4
-rw-r--r--include/uapi/linux/virtio_net.h1
-rw-r--r--io_uring/rsrc.c3
-rw-r--r--kernel/bpf/arraymap.c28
-rw-r--r--kernel/bpf/bloom_filter.c12
-rw-r--r--kernel/bpf/bpf_cgrp_storage.c1
-rw-r--r--kernel/bpf/bpf_inode_storage.c1
-rw-r--r--kernel/bpf/bpf_local_storage.c95
-rw-r--r--kernel/bpf/bpf_struct_ops.c16
-rw-r--r--kernel/bpf/bpf_task_storage.c1
-rw-r--r--kernel/bpf/btf.c42
-rw-r--r--kernel/bpf/cgroup.c53
-rw-r--r--kernel/bpf/cpumap.c10
-rw-r--r--kernel/bpf/cpumask.c46
-rw-r--r--kernel/bpf/devmap.c26
-rw-r--r--kernel/bpf/hashtab.c102
-rw-r--r--kernel/bpf/helpers.c257
-rw-r--r--kernel/bpf/local_storage.c7
-rw-r--r--kernel/bpf/lpm_trie.c11
-rw-r--r--kernel/bpf/offload.c6
-rw-r--r--kernel/bpf/queue_stack_maps.c10
-rw-r--r--kernel/bpf/reuseport_array.c8
-rw-r--r--kernel/bpf/ringbuf.c20
-rw-r--r--kernel/bpf/stackmap.c14
-rw-r--r--kernel/bpf/syscall.c28
-rw-r--r--kernel/bpf/verifier.c974
-rw-r--r--kernel/trace/bpf_trace.c4
-rw-r--r--lib/packing.c1
-rw-r--r--net/8021q/vlan_dev.c2
-rw-r--r--net/atm/signaling.c2
-rw-r--r--net/bpf/test_run.c3
-rw-r--r--net/bridge/br_arp_nd_proxy.c4
-rw-r--r--net/bridge/br_device.c3
-rw-r--r--net/bridge/br_mdb.c219
-rw-r--r--net/bridge/br_netfilter_hooks.c3
-rw-r--r--net/bridge/br_netfilter_ipv6.c79
-rw-r--r--net/bridge/br_netlink.c3
-rw-r--r--net/bridge/br_private.h22
-rw-r--r--net/bridge/netfilter/nft_meta_bridge.c71
-rw-r--r--net/core/bpf_sk_storage.c1
-rw-r--r--net/core/dev.c20
-rw-r--r--net/core/filter.c197
-rw-r--r--net/core/neighbour.c59
-rw-r--r--net/core/rtnetlink.c218
-rw-r--r--net/core/skbuff.c8
-rw-r--r--net/core/sock_map.c20
-rw-r--r--net/dccp/ipv4.c12
-rw-r--r--net/dccp/ipv6.c11
-rw-r--r--net/dccp/timer.c2
-rw-r--r--net/ipv4/af_inet.c2
-rw-r--r--net/ipv4/arp.c8
-rw-r--r--net/ipv4/fib_semantics.c4
-rw-r--r--net/ipv4/igmp.c4
-rw-r--r--net/ipv4/ip_output.c7
-rw-r--r--net/ipv4/nexthop.c4
-rw-r--r--net/ipv4/raw.c4
-rw-r--r--net/ipv4/raw_diag.c2
-rw-r--r--net/ipv4/route.c2
-rw-r--r--net/ipv4/tcp.c11
-rw-r--r--net/ipv4/tcp_input.c8
-rw-r--r--net/ipv4/tcp_ipv4.c10
-rw-r--r--net/ipv4/tcp_output.c2
-rw-r--r--net/ipv4/tcp_timer.c6
-rw-r--r--net/ipv4/udp.c31
-rw-r--r--net/ipv6/af_inet6.c2
-rw-r--r--net/ipv6/inet6_connection_sock.c2
-rw-r--r--net/ipv6/ip6_output.c4
-rw-r--r--net/ipv6/mcast.c8
-rw-r--r--net/ipv6/ndisc.c4
-rw-r--r--net/ipv6/ping.c2
-rw-r--r--net/ipv6/raw.c2
-rw-r--r--net/ipv6/route.c10
-rw-r--r--net/ipv6/tcp_ipv6.c15
-rw-r--r--net/ipv6/udp.c6
-rw-r--r--net/mac80211/agg-tx.c17
-rw-r--r--net/mac80211/cfg.c46
-rw-r--r--net/mac80211/debugfs_netdev.c223
-rw-r--r--net/mac80211/debugfs_netdev.h16
-rw-r--r--net/mac80211/driver-ops.c25
-rw-r--r--net/mac80211/driver-ops.h16
-rw-r--r--net/mac80211/ieee80211_i.h4
-rw-r--r--net/mac80211/link.c5
-rw-r--r--net/mac80211/mlme.c6
-rw-r--r--net/mac80211/rx.c93
-rw-r--r--net/mac80211/scan.c8
-rw-r--r--net/mac80211/tx.c10
-rw-r--r--net/mctp/af_mctp.c1
-rw-r--r--net/mptcp/pm_netlink.c2
-rw-r--r--net/mptcp/protocol.c8
-rw-r--r--net/mptcp/sockopt.c2
-rw-r--r--net/mptcp/subflow.c4
-rw-r--r--net/netfilter/nf_conntrack_core.c2
-rw-r--r--net/netfilter/nf_conntrack_ovs.c11
-rw-r--r--net/netfilter/nf_nat_core.c4
-rw-r--r--net/netfilter/utils.c52
-rw-r--r--net/netlink/af_netlink.c2
-rw-r--r--net/netlink/af_netlink.h1
-rw-r--r--net/packet/af_packet.c104
-rw-r--r--net/packet/diag.c12
-rw-r--r--net/packet/internal.h34
-rw-r--r--net/sched/sch_api.c6
-rw-r--r--net/sctp/Makefile3
-rw-r--r--net/sctp/input.c2
-rw-r--r--net/sctp/ipv6.c2
-rw-r--r--net/sctp/stream_sched.c2
-rw-r--r--net/sctp/stream_sched_fc.c225
-rw-r--r--net/smc/smc_core.h10
-rw-r--r--net/smc/smc_ism.c2
-rw-r--r--net/smc/smc_wr.c35
-rw-r--r--net/smc/smc_wr.h5
-rw-r--r--net/socket.c2
-rw-r--r--net/unix/af_unix.c9
-rw-r--r--net/unix/garbage.c2
-rw-r--r--net/unix/scm.c6
-rw-r--r--net/wireless/mlme.c55
-rw-r--r--net/wireless/nl80211.c78
-rw-r--r--net/wireless/rdev-ops.h17
-rw-r--r--net/wireless/scan.c38
-rw-r--r--net/wireless/trace.h36
-rw-r--r--net/xdp/xskmap.c13
-rw-r--r--security/lsm_audit.c4
-rw-r--r--tools/arch/arm64/include/uapi/asm/bpf_perf_event.h9
-rw-r--r--tools/arch/s390/include/uapi/asm/bpf_perf_event.h9
-rw-r--r--tools/arch/s390/include/uapi/asm/ptrace.h458
-rw-r--r--tools/bpf/bpftool/json_writer.c3
-rw-r--r--tools/bpf/resolve_btfids/.gitignore1
-rw-r--r--tools/include/uapi/linux/bpf.h33
-rw-r--r--tools/lib/bpf/Build2
-rw-r--r--tools/lib/bpf/bpf.h69
-rw-r--r--tools/lib/bpf/bpf_helpers.h2
-rw-r--r--tools/lib/bpf/bpf_tracing.h3
-rw-r--r--tools/lib/bpf/btf.c2
-rw-r--r--tools/lib/bpf/libbpf.c197
-rw-r--r--tools/lib/bpf/libbpf.h50
-rw-r--r--tools/lib/bpf/linker.c11
-rw-r--r--tools/lib/bpf/netlink.c8
-rw-r--r--tools/lib/bpf/relo_core.c3
-rw-r--r--tools/lib/bpf/usdt.c196
-rw-r--r--tools/lib/bpf/zip.c327
-rw-r--r--tools/lib/bpf/zip.h47
-rw-r--r--tools/scripts/Makefile.include2
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.s390x2
-rw-r--r--tools/testing/selftests/bpf/Makefile7
-rw-r--r--tools/testing/selftests/bpf/bpf_kfuncs.h38
l---------tools/testing/selftests/bpf/disasm.c1
l---------tools/testing/selftests/bpf/disasm.h1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/align.c18
-rw-r--r--tools/testing/selftests/bpf/prog_tests/attach_probe.c291
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cls_redirect.c25
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c917
-rw-r--r--tools/testing/selftests/bpf/prog_tests/decap_sanity.c16
-rw-r--r--tools/testing/selftests/bpf/prog_tests/dynptr.c74
-rw-r--r--tools/testing/selftests/bpf/prog_tests/empty_skb.c25
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fib_lookup.c28
-rw-r--r--tools/testing/selftests/bpf/prog_tests/flow_dissector.c24
-rw-r--r--tools/testing/selftests/bpf/prog_tests/l4lb_all.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/log_fixup.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_kptr.c136
-rw-r--r--tools/testing/selftests/bpf/prog_tests/mptcp.c19
-rw-r--r--tools/testing/selftests/bpf/prog_tests/parse_tcp_hdr_opt.c93
-rw-r--r--tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c16
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_redirect.c100
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_ima.c29
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_tunnel.c71
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/uninit_stack.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/user_ringbuf.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_attach.c11
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_bonding.c40
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c30
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_metadata.c23
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c41
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xfrm_info.c67
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_flow.c2
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_misc.h23
-rw-r--r--tools/testing/selftests/bpf/progs/cb_refs.c2
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h3
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c2
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c54
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c4
-rw-r--r--tools/testing/selftests/bpf/progs/cpumask_common.h2
-rw-r--r--tools/testing/selftests/bpf/progs/cpumask_failure.c2
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_fail.c287
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_success.c55
-rw-r--r--tools/testing/selftests/bpf/progs/find_vma_fail1.c2
-rw-r--r--tools/testing/selftests/bpf/progs/jit_probe_mem.c2
-rw-r--r--tools/testing/selftests/bpf/progs/lru_bug.c2
-rw-r--r--tools/testing/selftests/bpf/progs/map_kptr.c360
-rw-r--r--tools/testing/selftests/bpf/progs/map_kptr_fail.c10
-rw-r--r--tools/testing/selftests/bpf/progs/nested_trust_failure.c2
-rw-r--r--tools/testing/selftests/bpf/progs/rbtree.c2
-rw-r--r--tools/testing/selftests/bpf/progs/rbtree_fail.c7
-rw-r--r--tools/testing/selftests/bpf/progs/rcu_read_lock.c6
-rw-r--r--tools/testing/selftests/bpf/progs/rcu_tasks_trace_gp.c36
-rw-r--r--tools/testing/selftests/bpf/progs/task_kfunc_common.h2
-rw-r--r--tools/testing/selftests/bpf/progs/test_attach_kprobe_sleepable.c23
-rw-r--r--tools/testing/selftests/bpf/progs/test_attach_probe.c35
-rw-r--r--tools/testing/selftests/bpf/progs/test_attach_probe_manual.c53
-rw-r--r--tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c980
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func10.c8
-rw-r--r--tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_l4lb_noinline_dynptr.c487
-rw-r--r--tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt.c119
-rw-r--r--tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt_dynptr.c114
-rw-r--r--tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_tunnel_kern.c10
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_dynptr.c257
-rw-r--r--tools/testing/selftests/bpf/progs/timer.c45
-rw-r--r--tools/testing/selftests/bpf/progs/uninit_stack.c87
-rw-r--r--tools/testing/selftests/bpf/progs/user_ringbuf_success.c2
-rw-r--r--tools/testing/selftests/bpf/test_loader.c69
-rw-r--r--tools/testing/selftests/bpf/test_progs.h16
-rw-r--r--tools/testing/selftests/bpf/test_tcp_hdr_options.h1
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c22
-rw-r--r--tools/testing/selftests/bpf/verifier/calls.c17
-rw-r--r--tools/testing/selftests/bpf/verifier/ctx.c11
-rw-r--r--tools/testing/selftests/bpf/verifier/helper_access_var_len.c104
-rw-r--r--tools/testing/selftests/bpf/verifier/int_ptr.c9
-rw-r--r--tools/testing/selftests/bpf/verifier/map_kptr.c2
-rw-r--r--tools/testing/selftests/bpf/verifier/search_pruning.c13
-rw-r--r--tools/testing/selftests/bpf/verifier/sock.c27
-rw-r--r--tools/testing/selftests/bpf/verifier/spill_fill.c7
-rw-r--r--tools/testing/selftests/bpf/verifier/unpriv.c23
-rw-r--r--tools/testing/selftests/bpf/verifier/var_off.c52
-rw-r--r--tools/testing/selftests/net/Makefile2
-rwxr-xr-xtools/testing/selftests/net/big_tcp.sh180
-rw-r--r--tools/testing/selftests/net/config1
-rwxr-xr-xtools/testing/selftests/net/test_vxlan_mdb.sh2318
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/infra/actions.json416
519 files changed, 22560 insertions, 4768 deletions
diff --git a/Documentation/bpf/bpf_design_QA.rst b/Documentation/bpf/bpf_design_QA.rst
index bfff0e7e37c2..38372a956d65 100644
--- a/Documentation/bpf/bpf_design_QA.rst
+++ b/Documentation/bpf/bpf_design_QA.rst
@@ -314,7 +314,7 @@ Q: What is the compatibility story for special BPF types in map values?
Q: Users are allowed to embed bpf_spin_lock, bpf_timer fields in their BPF map
values (when using BTF support for BPF maps). This allows to use helpers for
such objects on these fields inside map values. Users are also allowed to embed
-pointers to some kernel types (with __kptr and __kptr_ref BTF tags). Will the
+pointers to some kernel types (with __kptr_untrusted and __kptr BTF tags). Will the
kernel preserve backwards compatibility for these features?
A: It depends. For bpf_spin_lock, bpf_timer: YES, for kptr and everything else:
@@ -324,7 +324,7 @@ For struct types that have been added already, like bpf_spin_lock and bpf_timer,
the kernel will preserve backwards compatibility, as they are part of UAPI.
For kptrs, they are also part of UAPI, but only with respect to the kptr
-mechanism. The types that you can use with a __kptr and __kptr_ref tagged
+mechanism. The types that you can use with a __kptr_untrusted and __kptr tagged
pointer in your struct are NOT part of the UAPI contract. The supported types can
and will change across kernel releases. However, operations like accessing kptr
fields and bpf_kptr_xchg() helper will continue to be supported across kernel
diff --git a/Documentation/bpf/bpf_devel_QA.rst b/Documentation/bpf/bpf_devel_QA.rst
index b421d94dc9f2..7403f81c995c 100644
--- a/Documentation/bpf/bpf_devel_QA.rst
+++ b/Documentation/bpf/bpf_devel_QA.rst
@@ -128,7 +128,7 @@ into the bpf-next tree will make their way into net-next tree. net and
net-next are both run by David S. Miller. From there, they will go
into the kernel mainline tree run by Linus Torvalds. To read up on the
process of net and net-next being merged into the mainline tree, see
-the :ref:`netdev-FAQ`
+the `netdev-FAQ`_.
@@ -147,7 +147,7 @@ request)::
Q: How do I indicate which tree (bpf vs. bpf-next) my patch should be applied to?
---------------------------------------------------------------------------------
-A: The process is the very same as described in the :ref:`netdev-FAQ`,
+A: The process is the very same as described in the `netdev-FAQ`_,
so please read up on it. The subject line must indicate whether the
patch is a fix or rather "next-like" content in order to let the
maintainers know whether it is targeted at bpf or bpf-next.
@@ -206,7 +206,7 @@ ii) run extensive BPF test suite and
Once the BPF pull request was accepted by David S. Miller, then
the patches end up in net or net-next tree, respectively, and
make their way from there further into mainline. Again, see the
-:ref:`netdev-FAQ` for additional information e.g. on how often they are
+`netdev-FAQ`_ for additional information e.g. on how often they are
merged to mainline.
Q: How long do I need to wait for feedback on my BPF patches?
@@ -230,7 +230,7 @@ Q: Are patches applied to bpf-next when the merge window is open?
-----------------------------------------------------------------
A: For the time when the merge window is open, bpf-next will not be
processed. This is roughly analogous to net-next patch processing,
-so feel free to read up on the :ref:`netdev-FAQ` about further details.
+so feel free to read up on the `netdev-FAQ`_ about further details.
During those two weeks of merge window, we might ask you to resend
your patch series once bpf-next is open again. Once Linus released
@@ -394,7 +394,7 @@ netdev kernel mailing list in Cc and ask for the fix to be queued up:
netdev@vger.kernel.org
The process in general is the same as on netdev itself, see also the
-:ref:`netdev-FAQ`.
+`netdev-FAQ`_.
Q: Do you also backport to kernels not currently maintained as stable?
----------------------------------------------------------------------
@@ -410,7 +410,7 @@ Q: The BPF patch I am about to submit needs to go to stable as well
What should I do?
A: The same rules apply as with netdev patch submissions in general, see
-the :ref:`netdev-FAQ`.
+the `netdev-FAQ`_.
Never add "``Cc: stable@vger.kernel.org``" to the patch description, but
ask the BPF maintainers to queue the patches instead. This can be done
@@ -684,7 +684,7 @@ when:
.. Links
-.. _netdev-FAQ: Documentation/process/maintainer-netdev.rst
+.. _netdev-FAQ: https://www.kernel.org/doc/html/latest/process/maintainer-netdev.html
.. _selftests:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/
diff --git a/Documentation/bpf/cpumasks.rst b/Documentation/bpf/cpumasks.rst
index 24bef9cbbeee..75344cd230e5 100644
--- a/Documentation/bpf/cpumasks.rst
+++ b/Documentation/bpf/cpumasks.rst
@@ -51,7 +51,7 @@ For example:
.. code-block:: c
struct cpumask_map_value {
- struct bpf_cpumask __kptr_ref * cpumask;
+ struct bpf_cpumask __kptr * cpumask;
};
struct array_map {
@@ -128,7 +128,7 @@ Here is an example of a ``struct bpf_cpumask *`` being retrieved from a map:
/* struct containing the struct bpf_cpumask kptr which is stored in the map. */
struct cpumasks_kfunc_map_value {
- struct bpf_cpumask __kptr_ref * bpf_cpumask;
+ struct bpf_cpumask __kptr * bpf_cpumask;
};
/* The map containing struct cpumasks_kfunc_map_value entries. */
diff --git a/Documentation/bpf/instruction-set.rst b/Documentation/bpf/instruction-set.rst
index af515de5fc38..db8789e6969e 100644
--- a/Documentation/bpf/instruction-set.rst
+++ b/Documentation/bpf/instruction-set.rst
@@ -38,14 +38,11 @@ eBPF has two instruction encodings:
* the wide instruction encoding, which appends a second 64-bit immediate (i.e.,
constant) value after the basic instruction for a total of 128 bits.
-The basic instruction encoding is as follows, where MSB and LSB mean the most significant
-bits and least significant bits, respectively:
+The fields conforming an encoded basic instruction are stored in the
+following order::
-============= ======= ======= ======= ============
-32 bits (MSB) 16 bits 4 bits 4 bits 8 bits (LSB)
-============= ======= ======= ======= ============
-imm offset src_reg dst_reg opcode
-============= ======= ======= ======= ============
+ opcode:8 src_reg:4 dst_reg:4 offset:16 imm:32 // In little-endian BPF.
+ opcode:8 dst_reg:4 src_reg:4 offset:16 imm:32 // In big-endian BPF.
**imm**
signed integer immediate value
@@ -63,6 +60,18 @@ imm offset src_reg dst_reg opcode
**opcode**
operation to perform
+Note that the contents of multi-byte fields ('imm' and 'offset') are
+stored using big-endian byte ordering in big-endian BPF and
+little-endian byte ordering in little-endian BPF.
+
+For example::
+
+ opcode offset imm assembly
+ src_reg dst_reg
+ 07 0 1 00 00 44 33 22 11 r1 += 0x11223344 // little
+ dst_reg src_reg
+ 07 1 0 00 00 11 22 33 44 r1 += 0x11223344 // big
+
Note that most instructions do not use all of the fields.
Unused fields shall be cleared to zero.
@@ -72,18 +81,23 @@ The 64 bits following the basic instruction contain a pseudo instruction
using the same format but with opcode, dst_reg, src_reg, and offset all set to zero,
and imm containing the high 32 bits of the immediate value.
-================= ==================
-64 bits (MSB) 64 bits (LSB)
-================= ==================
-basic instruction pseudo instruction
-================= ==================
+This is depicted in the following figure::
+
+ basic_instruction
+ .-----------------------------.
+ | |
+ code:8 regs:8 offset:16 imm:32 unused:32 imm:32
+ | |
+ '--------------'
+ pseudo instruction
Thus the 64-bit immediate value is constructed as follows:
imm64 = (next_imm << 32) | imm
where 'next_imm' refers to the imm value of the pseudo instruction
-following the basic instruction.
+following the basic instruction. The unused bytes in the pseudo
+instruction are reserved and shall be cleared to zero.
Instruction classes
-------------------
diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst
index ca96ef3f6896..69eccf6f98ef 100644
--- a/Documentation/bpf/kfuncs.rst
+++ b/Documentation/bpf/kfuncs.rst
@@ -100,6 +100,23 @@ Hence, whenever a constant scalar argument is accepted by a kfunc which is not a
size parameter, and the value of the constant matters for program safety, __k
suffix should be used.
+2.2.2 __uninit Annotation
+-------------------------
+
+This annotation is used to indicate that the argument will be treated as
+uninitialized.
+
+An example is given below::
+
+ __bpf_kfunc int bpf_dynptr_from_skb(..., struct bpf_dynptr_kern *ptr__uninit)
+ {
+ ...
+ }
+
+Here, the dynptr will be treated as an uninitialized dynptr. Without this
+annotation, the verifier will reject the program if the dynptr passed in is
+not initialized.
+
.. _BPF_kfunc_nodef:
2.3 Using an existing kernel function
@@ -232,11 +249,13 @@ added later.
2.4.8 KF_RCU flag
-----------------
-The KF_RCU flag is used for kfuncs which have a rcu ptr as its argument.
-When used together with KF_ACQUIRE, it indicates the kfunc should have a
-single argument which must be a trusted argument or a MEM_RCU pointer.
-The argument may have reference count of 0 and the kfunc must take this
-into consideration.
+The KF_RCU flag is a weaker version of KF_TRUSTED_ARGS. The kfuncs marked with
+KF_RCU expect either PTR_TRUSTED or MEM_RCU arguments. The verifier guarantees
+that the objects are valid and there is no use-after-free. The pointers are not
+NULL, but the object's refcount could have reached zero. The kfuncs need to
+consider doing refcnt != 0 check, especially when returning a KF_ACQUIRE
+pointer. Note as well that a KF_ACQUIRE kfunc that is KF_RCU should very likely
+also be KF_RET_NULL.
.. _KF_deprecated_flag:
@@ -527,7 +546,7 @@ Here's an example of how it can be used:
/* struct containing the struct task_struct kptr which is actually stored in the map. */
struct __cgroups_kfunc_map_value {
- struct cgroup __kptr_ref * cgroup;
+ struct cgroup __kptr * cgroup;
};
/* The map containing struct __cgroups_kfunc_map_value entries. */
@@ -583,13 +602,17 @@ Here's an example of how it can be used:
----
-Another kfunc available for interacting with ``struct cgroup *`` objects is
-bpf_cgroup_ancestor(). This allows callers to access the ancestor of a cgroup,
-and return it as a cgroup kptr.
+Other kfuncs available for interacting with ``struct cgroup *`` objects are
+bpf_cgroup_ancestor() and bpf_cgroup_from_id(), allowing callers to access
+the ancestor of a cgroup and find a cgroup by its ID, respectively. Both
+return a cgroup kptr.
.. kernel-doc:: kernel/bpf/helpers.c
:identifiers: bpf_cgroup_ancestor
+.. kernel-doc:: kernel/bpf/helpers.c
+ :identifiers: bpf_cgroup_from_id
+
Eventually, BPF should be updated to allow this to happen with a normal memory
load in the program itself. This is currently not possible without more work in
the verifier. bpf_cgroup_ancestor() can be used as follows:
diff --git a/Documentation/bpf/maps.rst b/Documentation/bpf/maps.rst
index 4906ff0f8382..6f069f3d6f4b 100644
--- a/Documentation/bpf/maps.rst
+++ b/Documentation/bpf/maps.rst
@@ -11,9 +11,9 @@ maps are accessed from BPF programs via BPF helpers which are documented in the
`man-pages`_ for `bpf-helpers(7)`_.
BPF maps are accessed from user space via the ``bpf`` syscall, which provides
-commands to create maps, lookup elements, update elements and delete
-elements. More details of the BPF syscall are available in
-:doc:`/userspace-api/ebpf/syscall` and in the `man-pages`_ for `bpf(2)`_.
+commands to create maps, lookup elements, update elements and delete elements.
+More details of the BPF syscall are available in `ebpf-syscall`_ and in the
+`man-pages`_ for `bpf(2)`_.
Map Types
=========
@@ -79,3 +79,4 @@ Find and delete element by key in a given map using ``attr->map_fd``,
.. _man-pages: https://www.kernel.org/doc/man-pages/
.. _bpf(2): https://man7.org/linux/man-pages/man2/bpf.2.html
.. _bpf-helpers(7): https://man7.org/linux/man-pages/man7/bpf-helpers.7.html
+.. _ebpf-syscall: https://docs.kernel.org/userspace-api/ebpf/syscall.html
diff --git a/Documentation/devicetree/bindings/net/dsa/mediatek,mt7530.yaml b/Documentation/devicetree/bindings/net/dsa/mediatek,mt7530.yaml
index 449ee0735012..5ae9cd8f99a2 100644
--- a/Documentation/devicetree/bindings/net/dsa/mediatek,mt7530.yaml
+++ b/Documentation/devicetree/bindings/net/dsa/mediatek,mt7530.yaml
@@ -93,7 +93,7 @@ properties:
gpio-controller:
type: boolean
- description:
+ description: |
If defined, LED controller of the MT7530 switch will run on GPIO mode.
There are 15 controllable pins.
@@ -112,7 +112,7 @@ properties:
maxItems: 1
io-supply:
- description:
+ description: |
Phandle to the regulator node necessary for the I/O power.
See Documentation/devicetree/bindings/regulator/mt6323-regulator.txt for
details for the regulator setup on these boards.
@@ -124,7 +124,7 @@ properties:
switch is a part of the multi-chip module.
reset-gpios:
- description:
+ description: |
GPIO to reset the switch. Use this if mediatek,mcm is not used.
This property is optional because some boards share the reset line with
other components which makes it impossible to probe the switch if the
diff --git a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
index 900063411a20..306709bcc9e9 100644
--- a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
+++ b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
@@ -54,11 +54,12 @@ properties:
compatible:
enum:
+ - ti,am642-cpsw-nuss
- ti,am654-cpsw-nuss
- ti,j7200-cpswxg-nuss
- ti,j721e-cpsw-nuss
- ti,j721e-cpswxg-nuss
- - ti,am642-cpsw-nuss
+ - ti,j784s4-cpswxg-nuss
reg:
maxItems: 1
@@ -126,8 +127,18 @@ properties:
description: CPSW port number
phys:
- maxItems: 1
- description: phandle on phy-gmii-sel PHY
+ minItems: 1
+ items:
+ - description: CPSW MAC's PHY.
+ - description: Serdes PHY. Serdes PHY is required only if
+ the Serdes has to be configured in the
+ Single-Link configuration.
+
+ phy-names:
+ minItems: 1
+ items:
+ - const: mac
+ - const: serdes
label:
description: label associated with this port
@@ -187,7 +198,9 @@ allOf:
properties:
compatible:
contains:
- const: ti,j721e-cpswxg-nuss
+ enum:
+ - ti,j721e-cpswxg-nuss
+ - ti,j784s4-cpswxg-nuss
then:
properties:
ethernet-ports:
@@ -205,8 +218,9 @@ allOf:
compatible:
contains:
enum:
- - ti,j721e-cpswxg-nuss
- ti,j7200-cpswxg-nuss
+ - ti,j721e-cpswxg-nuss
+ - ti,j784s4-cpswxg-nuss
then:
properties:
ethernet-ports:
diff --git a/Documentation/netlink/genetlink-c.yaml b/Documentation/netlink/genetlink-c.yaml
index 5c3642b3f802..8e8c17b0a6c6 100644
--- a/Documentation/netlink/genetlink-c.yaml
+++ b/Documentation/netlink/genetlink-c.yaml
@@ -33,10 +33,10 @@ properties:
protocol:
description: Schema compatibility level. Default is "genetlink".
enum: [ genetlink, genetlink-c ]
- # Start genetlink-c
uapi-header:
description: Path to the uAPI header, default is linux/${family-name}.h
type: string
+ # Start genetlink-c
c-family-name:
description: Name of the define for the family name.
type: string
diff --git a/Documentation/netlink/genetlink-legacy.yaml b/Documentation/netlink/genetlink-legacy.yaml
index 5e98c6d2b9aa..5dc6f1c07a97 100644
--- a/Documentation/netlink/genetlink-legacy.yaml
+++ b/Documentation/netlink/genetlink-legacy.yaml
@@ -33,10 +33,10 @@ properties:
protocol:
description: Schema compatibility level. Default is "genetlink".
enum: [ genetlink, genetlink-c, genetlink-legacy ] # Trim
- # Start genetlink-c
uapi-header:
description: Path to the uAPI header, default is linux/${family-name}.h
type: string
+ # Start genetlink-c
c-family-name:
description: Name of the define for the family name.
type: string
diff --git a/Documentation/netlink/genetlink.yaml b/Documentation/netlink/genetlink.yaml
index d35dcd6f8d82..d8b2cdeba058 100644
--- a/Documentation/netlink/genetlink.yaml
+++ b/Documentation/netlink/genetlink.yaml
@@ -33,6 +33,9 @@ properties:
protocol:
description: Schema compatibility level. Default is "genetlink".
enum: [ genetlink ]
+ uapi-header:
+ description: Path to the uAPI header, default is linux/${family-name}.h
+ type: string
definitions:
description: List of type and constant definitions (enums, flags, defines).
diff --git a/Documentation/netlink/specs/devlink.yaml b/Documentation/netlink/specs/devlink.yaml
new file mode 100644
index 000000000000..90641668232e
--- /dev/null
+++ b/Documentation/netlink/specs/devlink.yaml
@@ -0,0 +1,198 @@
+# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+
+name: devlink
+
+protocol: genetlink-legacy
+
+doc: Partial family for Devlink.
+
+attribute-sets:
+ -
+ name: devlink
+ attributes:
+ -
+ name: bus-name
+ type: string
+ value: 1
+ -
+ name: dev-name
+ type: string
+ -
+ name: port-index
+ type: u32
+
+ # TODO: fill in the attributes in between
+
+ -
+ name: info-driver-name
+ type: string
+ value: 98
+ -
+ name: info-serial-number
+ type: string
+ -
+ name: info-version-fixed
+ type: nest
+ multi-attr: true
+ nested-attributes: dl-info-version
+ -
+ name: info-version-running
+ type: nest
+ multi-attr: true
+ nested-attributes: dl-info-version
+ -
+ name: info-version-stored
+ type: nest
+ multi-attr: true
+ nested-attributes: dl-info-version
+ -
+ name: info-version-name
+ type: string
+ -
+ name: info-version-value
+ type: string
+
+ # TODO: fill in the attributes in between
+
+ -
+ name: reload-failed
+ type: u8
+ value: 136
+
+ # TODO: fill in the attributes in between
+
+ -
+ name: reload-action
+ type: u8
+ value: 153
+
+ # TODO: fill in the attributes in between
+
+ -
+ name: dev-stats
+ type: nest
+ value: 156
+ nested-attributes: dl-dev-stats
+ -
+ name: reload-stats
+ type: nest
+ nested-attributes: dl-reload-stats
+ -
+ name: reload-stats-entry
+ type: nest
+ multi-attr: true
+ nested-attributes: dl-reload-stats-entry
+ -
+ name: reload-stats-limit
+ type: u8
+ -
+ name: reload-stats-value
+ type: u32
+ -
+ name: remote-reload-stats
+ type: nest
+ nested-attributes: dl-reload-stats
+ -
+ name: reload-action-info
+ type: nest
+ nested-attributes: dl-reload-act-info
+ -
+ name: reload-action-stats
+ type: nest
+ nested-attributes: dl-reload-act-stats
+ -
+ name: dl-dev-stats
+ subset-of: devlink
+ attributes:
+ -
+ name: reload-stats
+ type: nest
+ -
+ name: remote-reload-stats
+ type: nest
+ -
+ name: dl-reload-stats
+ subset-of: devlink
+ attributes:
+ -
+ name: reload-action-info
+ type: nest
+ -
+ name: dl-reload-act-info
+ subset-of: devlink
+ attributes:
+ -
+ name: reload-action
+ type: u8
+ -
+ name: reload-action-stats
+ type: nest
+ -
+ name: dl-reload-act-stats
+ subset-of: devlink
+ attributes:
+ -
+ name: reload-stats-entry
+ type: nest
+ -
+ name: dl-reload-stats-entry
+ subset-of: devlink
+ attributes:
+ -
+ name: reload-stats-limit
+ type: u8
+ -
+ name: reload-stats-value
+ type: u32
+ -
+ name: dl-info-version
+ subset-of: devlink
+ attributes:
+ -
+ name: info-version-name
+ type: string
+ -
+ name: info-version-value
+ type: string
+
+operations:
+ enum-model: directional
+ list:
+ -
+ name: get
+ doc: Get devlink instances.
+ attribute-set: devlink
+
+ do:
+ request:
+ value: 1
+ attributes: &dev-id-attrs
+ - bus-name
+ - dev-name
+ reply: &get-reply
+ value: 3
+ attributes:
+ - bus-name
+ - dev-name
+ - reload-failed
+ - reload-action
+ - dev-stats
+ dump:
+ reply: *get-reply
+
+ # TODO: fill in the operations in between
+
+ -
+ name: info-get
+ doc: Get device information, like driver name, hardware and firmware versions etc.
+ attribute-set: devlink
+
+ do:
+ request:
+ value: 51
+ attributes: *dev-id-attrs
+ reply:
+ value: 51
+ attributes:
+ - bus-name
+ - dev-name
diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst
index 9b5c40ba7f0d..0995e4e5acd7 100644
--- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst
+++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst
@@ -122,6 +122,41 @@ users try to enable them.
$ devlink dev eswitch set pci/0000:06:00.0 mode switchdev
+hairpin_num_queues: Number of hairpin queues
+--------------------------------------------
+We refer to a TC NIC rule that involves forwarding as "hairpin".
+
+Hairpin queues are mlx5 hardware specific implementation for hardware
+forwarding of such packets.
+
+- Show the number of hairpin queues::
+
+ $ devlink dev param show pci/0000:06:00.0 name hairpin_num_queues
+ pci/0000:06:00.0:
+ name hairpin_num_queues type driver-specific
+ values:
+ cmode driverinit value 2
+
+- Change the number of hairpin queues::
+
+ $ devlink dev param set pci/0000:06:00.0 name hairpin_num_queues value 4 cmode driverinit
+
+hairpin_queue_size: Size of the hairpin queues
+----------------------------------------------
+Control the size of the hairpin queues.
+
+- Show the size of the hairpin queues::
+
+ $ devlink dev param show pci/0000:06:00.0 name hairpin_queue_size
+ pci/0000:06:00.0:
+ name hairpin_queue_size type driver-specific
+ values:
+ cmode driverinit value 1024
+
+- Change the size (in packets) of the hairpin queues::
+
+ $ devlink dev param set pci/0000:06:00.0 name hairpin_queue_size value 512 cmode driverinit
+
Health reporters
================
diff --git a/Documentation/networking/devlink/mlx5.rst b/Documentation/networking/devlink/mlx5.rst
index 3321117cf605..202798d6501e 100644
--- a/Documentation/networking/devlink/mlx5.rst
+++ b/Documentation/networking/devlink/mlx5.rst
@@ -72,6 +72,18 @@ parameters.
Default: disabled
+ * - ``hairpin_num_queues``
+ - u32
+ - driverinit
+ - We refer to a TC NIC rule that involves forwarding as "hairpin".
+ Hairpin queues are mlx5 hardware specific implementation for hardware
+ forwarding of such packets.
+
+ Control the number of hairpin queues.
+ * - ``hairpin_queue_size``
+ - u32
+ - driverinit
+ - Control the size (in packets) of the hairpin queues.
The ``mlx5`` driver supports reloading via ``DEVLINK_CMD_RELOAD``
diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
index 288003a9f0ca..e70c846efaa1 100644
--- a/arch/loongarch/net/bpf_jit.c
+++ b/arch/loongarch/net/bpf_jit.c
@@ -1248,3 +1248,9 @@ out:
return prog;
}
+
+/* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */
+bool bpf_jit_supports_subprog_tailcalls(void)
+{
+ return true;
+}
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index e2f3ca73f40d..459dc6023cf8 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -63,10 +63,7 @@ config MIPS
select HAVE_DEBUG_STACKOVERFLOW
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
- select HAVE_EBPF_JIT if !CPU_MICROMIPS && \
- !CPU_DADDI_WORKAROUNDS && \
- !CPU_R4000_WORKAROUNDS && \
- !CPU_R4400_WORKAROUNDS
+ select HAVE_EBPF_JIT if !CPU_MICROMIPS
select HAVE_EXIT_THREAD
select HAVE_FAST_GUP
select HAVE_FTRACE_MCOUNT_RECORD
diff --git a/arch/mips/net/bpf_jit_comp.c b/arch/mips/net/bpf_jit_comp.c
index b17130d510d4..a40d926b6513 100644
--- a/arch/mips/net/bpf_jit_comp.c
+++ b/arch/mips/net/bpf_jit_comp.c
@@ -218,9 +218,13 @@ bool valid_alu_i(u8 op, s32 imm)
/* All legal eBPF values are valid */
return true;
case BPF_ADD:
+ if (IS_ENABLED(CONFIG_CPU_DADDI_WORKAROUNDS))
+ return false;
/* imm must be 16 bits */
return imm >= -0x8000 && imm <= 0x7fff;
case BPF_SUB:
+ if (IS_ENABLED(CONFIG_CPU_DADDI_WORKAROUNDS))
+ return false;
/* -imm must be 16 bits */
return imm >= -0x7fff && imm <= 0x8000;
case BPF_AND:
diff --git a/arch/mips/net/bpf_jit_comp64.c b/arch/mips/net/bpf_jit_comp64.c
index 0e7c1bdcf914..fa7e9aa37f49 100644
--- a/arch/mips/net/bpf_jit_comp64.c
+++ b/arch/mips/net/bpf_jit_comp64.c
@@ -228,6 +228,9 @@ static void emit_alu_r64(struct jit_context *ctx, u8 dst, u8 src, u8 op)
} else {
emit(ctx, dmultu, dst, src);
emit(ctx, mflo, dst);
+ /* Ensure multiplication is completed */
+ if (IS_ENABLED(CONFIG_CPU_R4000_WORKAROUNDS))
+ emit(ctx, mfhi, MIPS_R_ZERO);
}
break;
/* dst = dst / src */
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index acdc3f040195..c648864c8cd1 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -1752,3 +1752,8 @@ void bpf_jit_build_epilogue(struct rv_jit_context *ctx)
{
__build_epilogue(false, ctx);
}
+
+bool bpf_jit_supports_kfunc_call(void)
+{
+ return true;
+}
diff --git a/drivers/net/dsa/hirschmann/hellcreek_ptp.c b/drivers/net/dsa/hirschmann/hellcreek_ptp.c
index b28baab6d56a..3e44ccb7db84 100644
--- a/drivers/net/dsa/hirschmann/hellcreek_ptp.c
+++ b/drivers/net/dsa/hirschmann/hellcreek_ptp.c
@@ -297,7 +297,8 @@ static enum led_brightness hellcreek_led_is_gm_get(struct led_classdev *ldev)
static int hellcreek_led_setup(struct hellcreek *hellcreek)
{
struct device_node *leds, *led = NULL;
- const char *label, *state;
+ enum led_default_state state;
+ const char *label;
int ret = -EINVAL;
of_node_get(hellcreek->dev->of_node);
@@ -318,16 +319,17 @@ static int hellcreek_led_setup(struct hellcreek *hellcreek)
ret = of_property_read_string(led, "label", &label);
hellcreek->led_sync_good.name = ret ? "sync_good" : label;
- ret = of_property_read_string(led, "default-state", &state);
- if (!ret) {
- if (!strcmp(state, "on"))
- hellcreek->led_sync_good.brightness = 1;
- else if (!strcmp(state, "off"))
- hellcreek->led_sync_good.brightness = 0;
- else if (!strcmp(state, "keep"))
- hellcreek->led_sync_good.brightness =
- hellcreek_get_brightness(hellcreek,
- STATUS_OUT_SYNC_GOOD);
+ state = led_init_default_state_get(of_fwnode_handle(led));
+ switch (state) {
+ case LEDS_DEFSTATE_ON:
+ hellcreek->led_sync_good.brightness = 1;
+ break;
+ case LEDS_DEFSTATE_KEEP:
+ hellcreek->led_sync_good.brightness =
+ hellcreek_get_brightness(hellcreek, STATUS_OUT_SYNC_GOOD);
+ break;
+ default:
+ hellcreek->led_sync_good.brightness = 0;
}
hellcreek->led_sync_good.max_brightness = 1;
@@ -344,16 +346,17 @@ static int hellcreek_led_setup(struct hellcreek *hellcreek)
ret = of_property_read_string(led, "label", &label);
hellcreek->led_is_gm.name = ret ? "is_gm" : label;
- ret = of_property_read_string(led, "default-state", &state);
- if (!ret) {
- if (!strcmp(state, "on"))
- hellcreek->led_is_gm.brightness = 1;
- else if (!strcmp(state, "off"))
- hellcreek->led_is_gm.brightness = 0;
- else if (!strcmp(state, "keep"))
- hellcreek->led_is_gm.brightness =
- hellcreek_get_brightness(hellcreek,
- STATUS_OUT_IS_GM);
+ state = led_init_default_state_get(of_fwnode_handle(led));
+ switch (state) {
+ case LEDS_DEFSTATE_ON:
+ hellcreek->led_is_gm.brightness = 1;
+ break;
+ case LEDS_DEFSTATE_KEEP:
+ hellcreek->led_is_gm.brightness =
+ hellcreek_get_brightness(hellcreek, STATUS_OUT_IS_GM);
+ break;
+ default:
+ hellcreek->led_is_gm.brightness = 0;
}
hellcreek->led_is_gm.max_brightness = 1;
diff --git a/drivers/net/dsa/lan9303_i2c.c b/drivers/net/dsa/lan9303_i2c.c
index 1cb41c36bd47..e8844820c3a9 100644
--- a/drivers/net/dsa/lan9303_i2c.c
+++ b/drivers/net/dsa/lan9303_i2c.c
@@ -103,7 +103,7 @@ MODULE_DEVICE_TABLE(of, lan9303_i2c_of_match);
static struct i2c_driver lan9303_i2c_driver = {
.driver = {
.name = "LAN9303_I2C",
- .of_match_table = of_match_ptr(lan9303_i2c_of_match),
+ .of_match_table = lan9303_i2c_of_match,
},
.probe_new = lan9303_i2c_probe,
.remove = lan9303_i2c_remove,
diff --git a/drivers/net/dsa/lan9303_mdio.c b/drivers/net/dsa/lan9303_mdio.c
index 4f33369a2de5..d8ab2b77d201 100644
--- a/drivers/net/dsa/lan9303_mdio.c
+++ b/drivers/net/dsa/lan9303_mdio.c
@@ -164,7 +164,7 @@ MODULE_DEVICE_TABLE(of, lan9303_mdio_of_match);
static struct mdio_driver lan9303_mdio_driver = {
.mdiodrv.driver = {
.name = "LAN9303_MDIO",
- .of_match_table = of_match_ptr(lan9303_mdio_of_match),
+ .of_match_table = lan9303_mdio_of_match,
},
.probe = lan9303_mdio_probe,
.remove = lan9303_mdio_remove,
diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c
index 05ecaa007ab1..3c76a1a14aee 100644
--- a/drivers/net/dsa/lantiq_gswip.c
+++ b/drivers/net/dsa/lantiq_gswip.c
@@ -1885,7 +1885,7 @@ static const struct xway_gphy_match_data xrx300_gphy_data = {
.ge_firmware_name = "lantiq/xrx300_phy11g_a21.bin",
};
-static const struct of_device_id xway_gphy_match[] = {
+static const struct of_device_id xway_gphy_match[] __maybe_unused = {
{ .compatible = "lantiq,xrx200-gphy-fw", .data = NULL },
{ .compatible = "lantiq,xrx200a1x-gphy-fw", .data = &xrx200a1x_gphy_data },
{ .compatible = "lantiq,xrx200a2x-gphy-fw", .data = &xrx200a2x_gphy_data },
diff --git a/drivers/net/dsa/microchip/ksz9477_i2c.c b/drivers/net/dsa/microchip/ksz9477_i2c.c
index e315f669ec06..97a317263a2f 100644
--- a/drivers/net/dsa/microchip/ksz9477_i2c.c
+++ b/drivers/net/dsa/microchip/ksz9477_i2c.c
@@ -117,7 +117,7 @@ MODULE_DEVICE_TABLE(of, ksz9477_dt_ids);
static struct i2c_driver ksz9477_i2c_driver = {
.driver = {
.name = "ksz9477-switch",
- .of_match_table = of_match_ptr(ksz9477_dt_ids),
+ .of_match_table = ksz9477_dt_ids,
},
.probe_new = ksz9477_i2c_probe,
.remove = ksz9477_i2c_remove,
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index 7fc2155d93d6..50fd548c72d8 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -32,10 +32,6 @@
#include "ksz9477.h"
#include "lan937x.h"
-#define KSZ_CBS_ENABLE ((MTI_SCHEDULE_STRICT_PRIO << MTI_SCHEDULE_MODE_S) | \
- (MTI_SHAPING_SRP << MTI_SHAPING_S))
-#define KSZ_CBS_DISABLE ((MTI_SCHEDULE_WRR << MTI_SCHEDULE_MODE_S) |\
- (MTI_SHAPING_OFF << MTI_SHAPING_S))
#define MIB_COUNTER_NUM 0x20
struct ksz_stats_raw {
@@ -1089,6 +1085,7 @@ const struct ksz_chip_data ksz_switch_chips[] = {
.port_nirqs = 3,
.num_tx_queues = 4,
.tc_cbs_supported = true,
+ .tc_ets_supported = true,
.ops = &ksz9477_dev_ops,
.mib_names = ksz9477_mib_names,
.mib_cnt = ARRAY_SIZE(ksz9477_mib_names),
@@ -1228,6 +1225,7 @@ const struct ksz_chip_data ksz_switch_chips[] = {
.port_nirqs = 4,
.num_tx_queues = 4,
.tc_cbs_supported = true,
+ .tc_ets_supported = true,
.ops = &ksz9477_dev_ops,
.phy_errata_9477 = true,
.mib_names = ksz9477_mib_names,
@@ -1352,6 +1350,7 @@ const struct ksz_chip_data ksz_switch_chips[] = {
.port_nirqs = 3,
.num_tx_queues = 4,
.tc_cbs_supported = true,
+ .tc_ets_supported = true,
.ops = &ksz9477_dev_ops,
.mib_names = ksz9477_mib_names,
.mib_cnt = ARRAY_SIZE(ksz9477_mib_names),
@@ -1379,6 +1378,7 @@ const struct ksz_chip_data ksz_switch_chips[] = {
.port_nirqs = 3,
.num_tx_queues = 4,
.tc_cbs_supported = true,
+ .tc_ets_supported = true,
.ops = &ksz9477_dev_ops,
.phy_errata_9477 = true,
.mib_names = ksz9477_mib_names,
@@ -1411,6 +1411,7 @@ const struct ksz_chip_data ksz_switch_chips[] = {
.port_nirqs = 6,
.num_tx_queues = 8,
.tc_cbs_supported = true,
+ .tc_ets_supported = true,
.ops = &lan937x_dev_ops,
.mib_names = ksz9477_mib_names,
.mib_cnt = ARRAY_SIZE(ksz9477_mib_names),
@@ -1437,6 +1438,7 @@ const struct ksz_chip_data ksz_switch_chips[] = {
.port_nirqs = 6,
.num_tx_queues = 8,
.tc_cbs_supported = true,
+ .tc_ets_supported = true,
.ops = &lan937x_dev_ops,
.mib_names = ksz9477_mib_names,
.mib_cnt = ARRAY_SIZE(ksz9477_mib_names),
@@ -1463,6 +1465,7 @@ const struct ksz_chip_data ksz_switch_chips[] = {
.port_nirqs = 6,
.num_tx_queues = 8,
.tc_cbs_supported = true,
+ .tc_ets_supported = true,
.ops = &lan937x_dev_ops,
.mib_names = ksz9477_mib_names,
.mib_cnt = ARRAY_SIZE(ksz9477_mib_names),
@@ -1493,6 +1496,7 @@ const struct ksz_chip_data ksz_switch_chips[] = {
.port_nirqs = 6,
.num_tx_queues = 8,
.tc_cbs_supported = true,
+ .tc_ets_supported = true,
.ops = &lan937x_dev_ops,
.mib_names = ksz9477_mib_names,
.mib_cnt = ARRAY_SIZE(ksz9477_mib_names),
@@ -1523,6 +1527,7 @@ const struct ksz_chip_data ksz_switch_chips[] = {
.port_nirqs = 6,
.num_tx_queues = 8,
.tc_cbs_supported = true,
+ .tc_ets_supported = true,
.ops = &lan937x_dev_ops,
.mib_names = ksz9477_mib_names,
.mib_cnt = ARRAY_SIZE(ksz9477_mib_names),
@@ -3091,6 +3096,14 @@ static int cinc_cal(s32 idle_slope, s32 send_slope, u32 *bw)
return 0;
}
+static int ksz_setup_tc_mode(struct ksz_device *dev, int port, u8 scheduler,
+ u8 shaper)
+{
+ return ksz_pwrite8(dev, port, REG_PORT_MTI_QUEUE_CTRL_0,
+ FIELD_PREP(MTI_SCHEDULE_MODE_M, scheduler) |
+ FIELD_PREP(MTI_SHAPING_M, shaper));
+}
+
static int ksz_setup_tc_cbs(struct dsa_switch *ds, int port,
struct tc_cbs_qopt_offload *qopt)
{
@@ -3110,8 +3123,8 @@ static int ksz_setup_tc_cbs(struct dsa_switch *ds, int port,
return ret;
if (!qopt->enable)
- return ksz_pwrite8(dev, port, REG_PORT_MTI_QUEUE_CTRL_0,
- KSZ_CBS_DISABLE);
+ return ksz_setup_tc_mode(dev, port, MTI_SCHEDULE_WRR,
+ MTI_SHAPING_OFF);
/* High Credit */
ret = ksz_pwrite16(dev, port, REG_PORT_MTI_HI_WATER_MARK,
@@ -3136,8 +3149,215 @@ static int ksz_setup_tc_cbs(struct dsa_switch *ds, int port,
return ret;
}
- return ksz_pwrite8(dev, port, REG_PORT_MTI_QUEUE_CTRL_0,
- KSZ_CBS_ENABLE);
+ return ksz_setup_tc_mode(dev, port, MTI_SCHEDULE_STRICT_PRIO,
+ MTI_SHAPING_SRP);
+}
+
+static int ksz_disable_egress_rate_limit(struct ksz_device *dev, int port)
+{
+ int queue, ret;
+
+ /* Configuration will not take effect until the last Port Queue X
+ * Egress Limit Control Register is written.
+ */
+ for (queue = 0; queue < dev->info->num_tx_queues; queue++) {
+ ret = ksz_pwrite8(dev, port, KSZ9477_REG_PORT_OUT_RATE_0 + queue,
+ KSZ9477_OUT_RATE_NO_LIMIT);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int ksz_ets_band_to_queue(struct tc_ets_qopt_offload_replace_params *p,
+ int band)
+{
+ /* Compared to queues, bands prioritize packets differently. In strict
+ * priority mode, the lowest priority is assigned to Queue 0 while the
+ * highest priority is given to Band 0.
+ */
+ return p->bands - 1 - band;
+}
+
+static int ksz_queue_set_strict(struct ksz_device *dev, int port, int queue)
+{
+ int ret;
+
+ ret = ksz_pwrite32(dev, port, REG_PORT_MTI_QUEUE_INDEX__4, queue);
+ if (ret)
+ return ret;
+
+ return ksz_setup_tc_mode(dev, port, MTI_SCHEDULE_STRICT_PRIO,
+ MTI_SHAPING_OFF);
+}
+
+static int ksz_queue_set_wrr(struct ksz_device *dev, int port, int queue,
+ int weight)
+{
+ int ret;
+
+ ret = ksz_pwrite32(dev, port, REG_PORT_MTI_QUEUE_INDEX__4, queue);
+ if (ret)
+ return ret;
+
+ ret = ksz_setup_tc_mode(dev, port, MTI_SCHEDULE_WRR,
+ MTI_SHAPING_OFF);
+ if (ret)
+ return ret;
+
+ return ksz_pwrite8(dev, port, KSZ9477_PORT_MTI_QUEUE_CTRL_1, weight);
+}
+
+static int ksz_tc_ets_add(struct ksz_device *dev, int port,
+ struct tc_ets_qopt_offload_replace_params *p)
+{
+ int ret, band, tc_prio;
+ u32 queue_map = 0;
+
+ /* In order to ensure proper prioritization, it is necessary to set the
+ * rate limit for the related queue to zero. Otherwise strict priority
+ * or WRR mode will not work. This is a hardware limitation.
+ */
+ ret = ksz_disable_egress_rate_limit(dev, port);
+ if (ret)
+ return ret;
+
+ /* Configure queue scheduling mode for all bands. Currently only strict
+ * prio mode is supported.
+ */
+ for (band = 0; band < p->bands; band++) {
+ int queue = ksz_ets_band_to_queue(p, band);
+
+ ret = ksz_queue_set_strict(dev, port, queue);
+ if (ret)
+ return ret;
+ }
+
+ /* Configure the mapping between traffic classes and queues. Note:
+ * priomap variable support 16 traffic classes, but the chip can handle
+ * only 8 classes.
+ */
+ for (tc_prio = 0; tc_prio < ARRAY_SIZE(p->priomap); tc_prio++) {
+ int queue;
+
+ if (tc_prio > KSZ9477_MAX_TC_PRIO)
+ break;
+
+ queue = ksz_ets_band_to_queue(p, p->priomap[tc_prio]);
+ queue_map |= queue << (tc_prio * KSZ9477_PORT_TC_MAP_S);
+ }
+
+ return ksz_pwrite32(dev, port, KSZ9477_PORT_MRI_TC_MAP__4, queue_map);
+}
+
+static int ksz_tc_ets_del(struct ksz_device *dev, int port)
+{
+ int ret, queue, tc_prio, s;
+ u32 queue_map = 0;
+
+ /* To restore the default chip configuration, set all queues to use the
+ * WRR scheduler with a weight of 1.
+ */
+ for (queue = 0; queue < dev->info->num_tx_queues; queue++) {
+ ret = ksz_queue_set_wrr(dev, port, queue,
+ KSZ9477_DEFAULT_WRR_WEIGHT);
+ if (ret)
+ return ret;
+ }
+
+ switch (dev->info->num_tx_queues) {
+ case 2:
+ s = 2;
+ break;
+ case 4:
+ s = 1;
+ break;
+ case 8:
+ s = 0;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /* Revert the queue mapping for TC-priority to its default setting on
+ * the chip.
+ */
+ for (tc_prio = 0; tc_prio <= KSZ9477_MAX_TC_PRIO; tc_prio++) {
+ int queue;
+
+ queue = tc_prio >> s;
+ queue_map |= queue << (tc_prio * KSZ9477_PORT_TC_MAP_S);
+ }
+
+ return ksz_pwrite32(dev, port, KSZ9477_PORT_MRI_TC_MAP__4, queue_map);
+}
+
+static int ksz_tc_ets_validate(struct ksz_device *dev, int port,
+ struct tc_ets_qopt_offload_replace_params *p)
+{
+ int band;
+
+ /* Since it is not feasible to share one port among multiple qdisc,
+ * the user must configure all available queues appropriately.
+ */
+ if (p->bands != dev->info->num_tx_queues) {
+ dev_err(dev->dev, "Not supported amount of bands. It should be %d\n",
+ dev->info->num_tx_queues);
+ return -EOPNOTSUPP;
+ }
+
+ for (band = 0; band < p->bands; ++band) {
+ /* The KSZ switches utilize a weighted round robin configuration
+ * where a certain number of packets can be transmitted from a
+ * queue before the next queue is serviced. For more information
+ * on this, refer to section 5.2.8.4 of the KSZ8565R
+ * documentation on the Port Transmit Queue Control 1 Register.
+ * However, the current ETS Qdisc implementation (as of February
+ * 2023) assigns a weight to each queue based on the number of
+ * bytes or extrapolated bandwidth in percentages. Since this
+ * differs from the KSZ switches' method and we don't want to
+ * fake support by converting bytes to packets, it is better to
+ * return an error instead.
+ */
+ if (p->quanta[band]) {
+ dev_err(dev->dev, "Quanta/weights configuration is not supported.\n");
+ return -EOPNOTSUPP;
+ }
+ }
+
+ return 0;
+}
+
+static int ksz_tc_setup_qdisc_ets(struct dsa_switch *ds, int port,
+ struct tc_ets_qopt_offload *qopt)
+{
+ struct ksz_device *dev = ds->priv;
+ int ret;
+
+ if (!dev->info->tc_ets_supported)
+ return -EOPNOTSUPP;
+
+ if (qopt->parent != TC_H_ROOT) {
+ dev_err(dev->dev, "Parent should be \"root\"\n");
+ return -EOPNOTSUPP;
+ }
+
+ switch (qopt->command) {
+ case TC_ETS_REPLACE:
+ ret = ksz_tc_ets_validate(dev, port, &qopt->replace_params);
+ if (ret)
+ return ret;
+
+ return ksz_tc_ets_add(dev, port, &qopt->replace_params);
+ case TC_ETS_DESTROY:
+ return ksz_tc_ets_del(dev, port);
+ case TC_ETS_STATS:
+ case TC_ETS_GRAFT:
+ return -EOPNOTSUPP;
+ }
+
+ return -EOPNOTSUPP;
}
static int ksz_setup_tc(struct dsa_switch *ds, int port,
@@ -3146,6 +3366,8 @@ static int ksz_setup_tc(struct dsa_switch *ds, int port,
switch (type) {
case TC_SETUP_QDISC_CBS:
return ksz_setup_tc_cbs(ds, port, type_data);
+ case TC_SETUP_QDISC_ETS:
+ return ksz_tc_setup_qdisc_ets(ds, port, type_data);
default:
return -EOPNOTSUPP;
}
diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h
index d2d5761d58e9..8abecaf6089e 100644
--- a/drivers/net/dsa/microchip/ksz_common.h
+++ b/drivers/net/dsa/microchip/ksz_common.h
@@ -51,6 +51,7 @@ struct ksz_chip_data {
u8 port_nirqs;
u8 num_tx_queues;
bool tc_cbs_supported;
+ bool tc_ets_supported;
const struct ksz_dev_ops *ops;
bool phy_errata_9477;
bool ksz87xx_eee_link_erratum;
@@ -649,21 +650,30 @@ static inline int is_lan937x(struct ksz_device *dev)
#define KSZ8_LEGAL_PACKET_SIZE 1518
#define KSZ9477_MAX_FRAME_SIZE 9000
+#define KSZ9477_REG_PORT_OUT_RATE_0 0x0420
+#define KSZ9477_OUT_RATE_NO_LIMIT 0
+
+#define KSZ9477_PORT_MRI_TC_MAP__4 0x0808
+
+#define KSZ9477_PORT_TC_MAP_S 4
+#define KSZ9477_MAX_TC_PRIO 7
+
/* CBS related registers */
#define REG_PORT_MTI_QUEUE_INDEX__4 0x0900
#define REG_PORT_MTI_QUEUE_CTRL_0 0x0914
-#define MTI_SCHEDULE_MODE_M 0x3
-#define MTI_SCHEDULE_MODE_S 6
+#define MTI_SCHEDULE_MODE_M GENMASK(7, 6)
#define MTI_SCHEDULE_STRICT_PRIO 0
#define MTI_SCHEDULE_WRR 2
-#define MTI_SHAPING_M 0x3
-#define MTI_SHAPING_S 4
+#define MTI_SHAPING_M GENMASK(5, 4)
#define MTI_SHAPING_OFF 0
#define MTI_SHAPING_SRP 1
#define MTI_SHAPING_TIME_AWARE 2
+#define KSZ9477_PORT_MTI_QUEUE_CTRL_1 0x0915
+#define KSZ9477_DEFAULT_WRR_WEIGHT 1
+
#define REG_PORT_MTI_HI_WATER_MARK 0x0916
#define REG_PORT_MTI_LO_WATER_MARK 0x0918
diff --git a/drivers/net/dsa/ocelot/ocelot_ext.c b/drivers/net/dsa/ocelot/ocelot_ext.c
index 063150659816..228737a32080 100644
--- a/drivers/net/dsa/ocelot/ocelot_ext.c
+++ b/drivers/net/dsa/ocelot/ocelot_ext.c
@@ -149,7 +149,7 @@ MODULE_DEVICE_TABLE(of, ocelot_ext_switch_of_match);
static struct platform_driver ocelot_ext_switch_driver = {
.driver = {
.name = "ocelot-ext-switch",
- .of_match_table = of_match_ptr(ocelot_ext_switch_of_match),
+ .of_match_table = ocelot_ext_switch_of_match,
},
.probe = ocelot_ext_probe,
.remove = ocelot_ext_remove,
diff --git a/drivers/net/dsa/ocelot/seville_vsc9953.c b/drivers/net/dsa/ocelot/seville_vsc9953.c
index 563ad338da25..96d4972a62f0 100644
--- a/drivers/net/dsa/ocelot/seville_vsc9953.c
+++ b/drivers/net/dsa/ocelot/seville_vsc9953.c
@@ -1079,7 +1079,7 @@ static struct platform_driver seville_vsc9953_driver = {
.shutdown = seville_shutdown,
.driver = {
.name = "mscc_seville",
- .of_match_table = of_match_ptr(seville_of_match),
+ .of_match_table = seville_of_match,
},
};
module_platform_driver(seville_vsc9953_driver);
diff --git a/drivers/net/dsa/realtek/rtl8365mb.c b/drivers/net/dsa/realtek/rtl8365mb.c
index da31d8b839ac..41ea3b5a42b1 100644
--- a/drivers/net/dsa/realtek/rtl8365mb.c
+++ b/drivers/net/dsa/realtek/rtl8365mb.c
@@ -98,6 +98,7 @@
#include <linux/of_irq.h>
#include <linux/regmap.h>
#include <linux/if_bridge.h>
+#include <linux/if_vlan.h>
#include "realtek.h"
@@ -267,6 +268,7 @@
/* Maximum packet length register */
#define RTL8365MB_CFG0_MAX_LEN_REG 0x088C
#define RTL8365MB_CFG0_MAX_LEN_MASK 0x3FFF
+#define RTL8365MB_CFG0_MAX_LEN_MAX 0x3FFF
/* Port learning limit registers */
#define RTL8365MB_LUT_PORT_LEARN_LIMIT_BASE 0x0A20
@@ -1135,6 +1137,35 @@ static void rtl8365mb_phylink_mac_link_up(struct dsa_switch *ds, int port,
}
}
+static int rtl8365mb_port_change_mtu(struct dsa_switch *ds, int port,
+ int new_mtu)
+{
+ struct realtek_priv *priv = ds->priv;
+ int frame_size;
+
+ /* When a new MTU is set, DSA always sets the CPU port's MTU to the
+ * largest MTU of the slave ports. Because the switch only has a global
+ * RX length register, only allowing CPU port here is enough.
+ */
+ if (!dsa_is_cpu_port(ds, port))
+ return 0;
+
+ frame_size = new_mtu + VLAN_ETH_HLEN + ETH_FCS_LEN;
+
+ dev_dbg(priv->dev, "changing mtu to %d (frame size: %d)\n",
+ new_mtu, frame_size);
+
+ return regmap_update_bits(priv->map, RTL8365MB_CFG0_MAX_LEN_REG,
+ RTL8365MB_CFG0_MAX_LEN_MASK,
+ FIELD_PREP(RTL8365MB_CFG0_MAX_LEN_MASK,
+ frame_size));
+}
+
+static int rtl8365mb_port_max_mtu(struct dsa_switch *ds, int port)
+{
+ return RTL8365MB_CFG0_MAX_LEN_MAX - VLAN_ETH_HLEN - ETH_FCS_LEN;
+}
+
static void rtl8365mb_port_stp_state_set(struct dsa_switch *ds, int port,
u8 state)
{
@@ -1980,10 +2011,7 @@ static int rtl8365mb_setup(struct dsa_switch *ds)
p->index = i;
}
- /* Set maximum packet length to 1536 bytes */
- ret = regmap_update_bits(priv->map, RTL8365MB_CFG0_MAX_LEN_REG,
- RTL8365MB_CFG0_MAX_LEN_MASK,
- FIELD_PREP(RTL8365MB_CFG0_MAX_LEN_MASK, 1536));
+ ret = rtl8365mb_port_change_mtu(ds, cpu->trap_port, ETH_DATA_LEN);
if (ret)
goto out_teardown_irq;
@@ -2103,6 +2131,8 @@ static const struct dsa_switch_ops rtl8365mb_switch_ops_smi = {
.get_eth_mac_stats = rtl8365mb_get_mac_stats,
.get_eth_ctrl_stats = rtl8365mb_get_ctrl_stats,
.get_stats64 = rtl8365mb_get_stats64,
+ .port_change_mtu = rtl8365mb_port_change_mtu,
+ .port_max_mtu = rtl8365mb_port_max_mtu,
};
static const struct dsa_switch_ops rtl8365mb_switch_ops_mdio = {
@@ -2124,6 +2154,8 @@ static const struct dsa_switch_ops rtl8365mb_switch_ops_mdio = {
.get_eth_mac_stats = rtl8365mb_get_mac_stats,
.get_eth_ctrl_stats = rtl8365mb_get_ctrl_stats,
.get_stats64 = rtl8365mb_get_stats64,
+ .port_change_mtu = rtl8365mb_port_change_mtu,
+ .port_max_mtu = rtl8365mb_port_max_mtu,
};
static const struct realtek_ops rtl8365mb_ops = {
diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig
index 1917da784191..5a274b99f299 100644
--- a/drivers/net/ethernet/Kconfig
+++ b/drivers/net/ethernet/Kconfig
@@ -84,7 +84,6 @@ source "drivers/net/ethernet/huawei/Kconfig"
source "drivers/net/ethernet/i825xx/Kconfig"
source "drivers/net/ethernet/ibm/Kconfig"
source "drivers/net/ethernet/intel/Kconfig"
-source "drivers/net/ethernet/wangxun/Kconfig"
source "drivers/net/ethernet/xscale/Kconfig"
config JME
@@ -189,6 +188,7 @@ source "drivers/net/ethernet/toshiba/Kconfig"
source "drivers/net/ethernet/tundra/Kconfig"
source "drivers/net/ethernet/vertexcom/Kconfig"
source "drivers/net/ethernet/via/Kconfig"
+source "drivers/net/ethernet/wangxun/Kconfig"
source "drivers/net/ethernet/wiznet/Kconfig"
source "drivers/net/ethernet/xilinx/Kconfig"
source "drivers/net/ethernet/xircom/Kconfig"
diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index 306393f8eeca..49bb9a8f00e6 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -39,7 +39,6 @@
#include <linux/ipv6.h>
#include <linux/if_vlan.h>
#include <linux/mdio.h>
-#include <linux/aer.h>
#include <linux/bitops.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
@@ -1745,7 +1744,6 @@ static int alx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
goto out_pci_disable;
}
- pci_enable_pcie_error_reporting(pdev);
pci_set_master(pdev);
if (!pdev->pm_cap) {
@@ -1879,7 +1877,6 @@ out_free_netdev:
free_netdev(netdev);
out_pci_release:
pci_release_mem_regions(pdev);
- pci_disable_pcie_error_reporting(pdev);
out_pci_disable:
pci_disable_device(pdev);
return err;
@@ -1897,7 +1894,6 @@ static void alx_remove(struct pci_dev *pdev)
iounmap(hw->hw_addr);
pci_release_mem_regions(pdev);
- pci_disable_pcie_error_reporting(pdev);
pci_disable_device(pdev);
mutex_destroy(&alx->mtx);
diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
index 9f473854b0f4..a66137b8d1a6 100644
--- a/drivers/net/ethernet/broadcom/bnx2.c
+++ b/drivers/net/ethernet/broadcom/bnx2.c
@@ -48,7 +48,6 @@
#include <linux/cache.h>
#include <linux/firmware.h>
#include <linux/log2.h>
-#include <linux/aer.h>
#include <linux/crash_dump.h>
#if IS_ENABLED(CONFIG_CNIC)
@@ -8093,7 +8092,6 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev)
int rc, i, j;
u32 reg;
u64 dma_mask, persist_dma_mask;
- int err;
SET_NETDEV_DEV(dev, &pdev->dev);
bp = netdev_priv(dev);
@@ -8176,12 +8174,6 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev)
bp->flags |= BNX2_FLAG_PCIE;
if (BNX2_CHIP_REV(bp) == BNX2_CHIP_REV_Ax)
bp->flags |= BNX2_FLAG_JUMBO_BROKEN;
-
- /* AER (Advanced Error Reporting) hooks */
- err = pci_enable_pcie_error_reporting(pdev);
- if (!err)
- bp->flags |= BNX2_FLAG_AER_ENABLED;
-
} else {
bp->pcix_cap = pci_find_capability(pdev, PCI_CAP_ID_PCIX);
if (bp->pcix_cap == 0) {
@@ -8460,11 +8452,6 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev)
return 0;
err_out_unmap:
- if (bp->flags & BNX2_FLAG_AER_ENABLED) {
- pci_disable_pcie_error_reporting(pdev);
- bp->flags &= ~BNX2_FLAG_AER_ENABLED;
- }
-
pci_iounmap(pdev, bp->regview);
bp->regview = NULL;
@@ -8638,11 +8625,6 @@ bnx2_remove_one(struct pci_dev *pdev)
bnx2_free_stats_blk(dev);
kfree(bp->temp_stats_blk);
- if (bp->flags & BNX2_FLAG_AER_ENABLED) {
- pci_disable_pcie_error_reporting(pdev);
- bp->flags &= ~BNX2_FLAG_AER_ENABLED;
- }
-
bnx2_release_firmware(bp);
free_netdev(dev);
@@ -8766,9 +8748,6 @@ static pci_ers_result_t bnx2_io_slot_reset(struct pci_dev *pdev)
}
rtnl_unlock();
- if (!(bp->flags & BNX2_FLAG_AER_ENABLED))
- return result;
-
return result;
}
diff --git a/drivers/net/ethernet/broadcom/bnx2.h b/drivers/net/ethernet/broadcom/bnx2.h
index a09ec47461c9..315b08c64edd 100644
--- a/drivers/net/ethernet/broadcom/bnx2.h
+++ b/drivers/net/ethernet/broadcom/bnx2.h
@@ -6808,7 +6808,6 @@ struct bnx2 {
#define BNX2_FLAG_JUMBO_BROKEN 0x00000800
#define BNX2_FLAG_CAN_KEEP_VLAN 0x00001000
#define BNX2_FLAG_BROKEN_STATS 0x00002000
-#define BNX2_FLAG_AER_ENABLED 0x00004000
struct bnx2_napi bnx2_napi[BNX2_MAX_MSIX_VEC];
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index dd5945c4bfec..8bcde0a6e011 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -1486,7 +1486,6 @@ struct bnx2x {
#define IS_VF_FLAG (1 << 22)
#define BC_SUPPORTS_RMMOD_CMD (1 << 23)
#define HAS_PHYS_PORT_ID (1 << 24)
-#define AER_ENABLED (1 << 25)
#define PTP_SUPPORTED (1 << 26)
#define TX_TIMESTAMPING_EN (1 << 27)
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 5d1e4fe335aa..3bb5ea570c87 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -29,7 +29,6 @@
#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/pci.h>
-#include <linux/aer.h>
#include <linux/init.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
@@ -13037,14 +13036,6 @@ static const struct net_device_ops bnx2x_netdev_ops = {
.ndo_features_check = bnx2x_features_check,
};
-static void bnx2x_disable_pcie_error_reporting(struct bnx2x *bp)
-{
- if (bp->flags & AER_ENABLED) {
- pci_disable_pcie_error_reporting(bp->pdev);
- bp->flags &= ~AER_ENABLED;
- }
-}
-
static int bnx2x_init_dev(struct bnx2x *bp, struct pci_dev *pdev,
struct net_device *dev, unsigned long board_type)
{
@@ -13157,13 +13148,6 @@ static int bnx2x_init_dev(struct bnx2x *bp, struct pci_dev *pdev,
/* Set PCIe reset type to fundamental for EEH recovery */
pdev->needs_freset = 1;
- /* AER (Advanced Error reporting) configuration */
- rc = pci_enable_pcie_error_reporting(pdev);
- if (!rc)
- bp->flags |= AER_ENABLED;
- else
- BNX2X_DEV_INFO("Failed To configure PCIe AER [%d]\n", rc);
-
/*
* Clean the following indirect addresses for all functions since it
* is not used by the driver.
@@ -14020,8 +14004,6 @@ init_one_freemem:
bnx2x_free_mem_bp(bp);
init_one_exit:
- bnx2x_disable_pcie_error_reporting(bp);
-
if (bp->regview)
iounmap(bp->regview);
@@ -14102,7 +14084,6 @@ static void __bnx2x_remove(struct pci_dev *pdev,
pci_set_power_state(pdev, PCI_D3hot);
}
- bnx2x_disable_pcie_error_reporting(bp);
if (remove_netdev) {
if (bp->regview)
iounmap(bp->regview);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index e2e2c986c82b..f533a8f46217 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -48,7 +48,6 @@
#include <linux/prefetch.h>
#include <linux/cache.h>
#include <linux/log2.h>
-#include <linux/aer.h>
#include <linux/bitmap.h>
#include <linux/cpu_rmap.h>
#include <linux/cpumask.h>
@@ -12705,8 +12704,6 @@ static int bnxt_init_board(struct pci_dev *pdev, struct net_device *dev)
goto init_err_release;
}
- pci_enable_pcie_error_reporting(pdev);
-
INIT_WORK(&bp->sp_task, bnxt_sp_task);
INIT_DELAYED_WORK(&bp->fw_reset_task, bnxt_fw_reset_task);
@@ -13186,7 +13183,6 @@ static void bnxt_remove_one(struct pci_dev *pdev)
bnxt_rdma_aux_device_uninit(bp);
bnxt_ptp_clear(bp);
- pci_disable_pcie_error_reporting(pdev);
unregister_netdev(dev);
clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
/* Flush any pending tasks */
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index ec573127b707..696f32dfe41f 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -2862,7 +2862,7 @@ static int bnxt_get_nvram_directory(struct net_device *dev, u32 len, u8 *data)
if (rc)
return rc;
- buflen = dir_entries * entry_length;
+ buflen = mul_u32_u32(dir_entries, entry_length);
buf = hwrm_req_dma_slice(bp, req, buflen, &dma_handle);
if (!buf) {
hwrm_req_drop(bp, req);
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 66e30561569e..26f41243743b 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -4844,7 +4844,7 @@ static const struct macb_config mpfs_config = {
static const struct macb_config sama7g5_gem_config = {
.caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_CLK_HW_CHG |
- MACB_CAPS_MIIONRGMII,
+ MACB_CAPS_MIIONRGMII | MACB_CAPS_GEM_HAS_PTP,
.dma_burst_length = 16,
.clk_init = macb_clk_init,
.init = macb_init,
@@ -4853,7 +4853,8 @@ static const struct macb_config sama7g5_gem_config = {
static const struct macb_config sama7g5_emac_config = {
.caps = MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII |
- MACB_CAPS_USRIO_HAS_CLKEN | MACB_CAPS_MIIONRGMII,
+ MACB_CAPS_USRIO_HAS_CLKEN | MACB_CAPS_MIIONRGMII |
+ MACB_CAPS_GEM_HAS_PTP,
.dma_burst_length = 16,
.clk_init = macb_clk_init,
.init = macb_init,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 7db2403c4c9c..f0bc7396ce2b 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -51,7 +51,6 @@
#include <linux/mutex.h>
#include <linux/netdevice.h>
#include <linux/pci.h>
-#include <linux/aer.h>
#include <linux/rtnetlink.h>
#include <linux/sched.h>
#include <linux/seq_file.h>
@@ -6687,7 +6686,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
goto out_free_adapter;
}
- pci_enable_pcie_error_reporting(pdev);
pci_set_master(pdev);
pci_save_state(pdev);
adap_idx++;
@@ -7092,7 +7090,6 @@ fw_attach_fail:
out_unmap_bar0:
iounmap(regs);
out_disable_device:
- pci_disable_pcie_error_reporting(pdev);
pci_disable_device(pdev);
out_release_regions:
pci_release_regions(pdev);
@@ -7171,7 +7168,6 @@ static void remove_one(struct pci_dev *pdev)
}
#endif
iounmap(adapter->regs);
- pci_disable_pcie_error_reporting(pdev);
if ((adapter->flags & CXGB4_DEV_ENABLED)) {
pci_disable_device(pdev);
adapter->flags &= ~CXGB4_DEV_ENABLED;
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index 08ec84cd21c0..61adcebeef01 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -135,7 +135,8 @@ static int be_mcc_notify(struct be_adapter *adapter)
/* To check if valid bit is set, check the entire word as we don't know
* the endianness of the data (old entry is host endian while a new entry is
- * little endian) */
+ * little endian)
+ */
static inline bool be_mcc_compl_is_new(struct be_mcc_compl *compl)
{
u32 flags;
@@ -248,7 +249,8 @@ static int be_mcc_compl_process(struct be_adapter *adapter,
u8 opcode = 0, subsystem = 0;
/* Just swap the status to host endian; mcc tag is opaquely copied
- * from mcc_wrb */
+ * from mcc_wrb
+ */
be_dws_le_to_cpu(compl, 4);
base_status = base_status(compl->status);
@@ -657,8 +659,7 @@ static int be_mbox_db_ready_wait(struct be_adapter *adapter, void __iomem *db)
return 0;
}
-/*
- * Insert the mailbox address into the doorbell in two steps
+/* Insert the mailbox address into the doorbell in two steps
* Polls on the mbox doorbell till a command completion (or a timeout) occurs
*/
static int be_mbox_notify_wait(struct be_adapter *adapter)
@@ -802,7 +803,7 @@ static void be_wrb_cmd_hdr_prepare(struct be_cmd_req_hdr *req_hdr,
req_hdr->subsystem = subsystem;
req_hdr->request_length = cpu_to_le32(cmd_len - sizeof(*req_hdr));
req_hdr->version = 0;
- fill_wrb_tags(wrb, (ulong) req_hdr);
+ fill_wrb_tags(wrb, (ulong)req_hdr);
wrb->payload_length = cmd_len;
if (mem) {
wrb->embedded |= (1 & MCC_WRB_SGE_CNT_MASK) <<
@@ -832,8 +833,8 @@ static void be_cmd_page_addrs_prepare(struct phys_addr *pages, u32 max_pages,
static inline struct be_mcc_wrb *wrb_from_mbox(struct be_adapter *adapter)
{
struct be_dma_mem *mbox_mem = &adapter->mbox_mem;
- struct be_mcc_wrb *wrb
- = &((struct be_mcc_mailbox *)(mbox_mem->va))->wrb;
+ struct be_mcc_wrb *wrb = &((struct be_mcc_mailbox *)(mbox_mem->va))->wrb;
+
memset(wrb, 0, sizeof(*wrb));
return wrb;
}
@@ -896,7 +897,7 @@ static struct be_mcc_wrb *be_cmd_copy(struct be_adapter *adapter,
memcpy(dest_wrb, wrb, sizeof(*wrb));
if (wrb->embedded & cpu_to_le32(MCC_WRB_EMBEDDED_MASK))
- fill_wrb_tags(dest_wrb, (ulong) embedded_payload(wrb));
+ fill_wrb_tags(dest_wrb, (ulong)embedded_payload(wrb));
return dest_wrb;
}
@@ -1114,7 +1115,7 @@ int be_cmd_pmac_add(struct be_adapter *adapter, const u8 *mac_addr,
err:
mutex_unlock(&adapter->mcc_lock);
- if (base_status(status) == MCC_STATUS_UNAUTHORIZED_REQUEST)
+ if (base_status(status) == MCC_STATUS_UNAUTHORIZED_REQUEST)
status = -EPERM;
return status;
@@ -1803,7 +1804,7 @@ int be_cmd_get_fat_dump(struct be_adapter *adapter, u32 buf_len, void *buf)
total_size = buf_len;
- get_fat_cmd.size = sizeof(struct be_cmd_req_get_fat) + 60*1024;
+ get_fat_cmd.size = sizeof(struct be_cmd_req_get_fat) + 60 * 1024;
get_fat_cmd.va = dma_alloc_coherent(&adapter->pdev->dev,
get_fat_cmd.size,
&get_fat_cmd.dma, GFP_ATOMIC);
@@ -1813,7 +1814,7 @@ int be_cmd_get_fat_dump(struct be_adapter *adapter, u32 buf_len, void *buf)
mutex_lock(&adapter->mcc_lock);
while (total_size) {
- buf_size = min(total_size, (u32)60*1024);
+ buf_size = min(total_size, (u32)60 * 1024);
total_size -= buf_size;
wrb = wrb_from_mccq(adapter);
@@ -3362,7 +3363,7 @@ int be_cmd_ddr_dma_test(struct be_adapter *adapter, u64 pattern,
req->pattern = cpu_to_le64(pattern);
req->byte_count = cpu_to_le32(byte_cnt);
for (i = 0; i < byte_cnt; i++) {
- req->snd_buff[i] = (u8)(pattern >> (j*8));
+ req->snd_buff[i] = (u8)(pattern >> (j * 8));
j++;
if (j > 7)
j = 0;
@@ -3846,7 +3847,7 @@ int be_cmd_set_mac_list(struct be_adapter *adapter, u8 *mac_array,
req->hdr.domain = domain;
req->mac_count = mac_count;
if (mac_count)
- memcpy(req->mac, mac_array, ETH_ALEN*mac_count);
+ memcpy(req->mac, mac_array, ETH_ALEN * mac_count);
status = be_mcc_notify_wait(adapter);
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 46fe3d74e2e9..aed1b622f51f 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -16,7 +16,6 @@
#include "be.h"
#include "be_cmds.h"
#include <asm/div64.h>
-#include <linux/aer.h>
#include <linux/if_bridge.h>
#include <net/busy_poll.h>
#include <net/vxlan.h>
@@ -5726,8 +5725,6 @@ static void be_remove(struct pci_dev *pdev)
be_unmap_pci_bars(adapter);
be_drv_cleanup(adapter);
- pci_disable_pcie_error_reporting(pdev);
-
pci_release_regions(pdev);
pci_disable_device(pdev);
@@ -5845,10 +5842,6 @@ static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
goto free_netdev;
}
- status = pci_enable_pcie_error_reporting(pdev);
- if (!status)
- dev_info(&pdev->dev, "PCIe error reporting enabled\n");
-
status = be_map_pci_bars(adapter);
if (status)
goto free_netdev;
@@ -5893,7 +5886,6 @@ drv_cleanup:
unmap_bars:
be_unmap_pci_bars(adapter);
free_netdev:
- pci_disable_pcie_error_reporting(pdev);
free_netdev(netdev);
rel_reg:
pci_release_regions(pdev);
diff --git a/drivers/net/ethernet/fungible/funcore/fun_dev.c b/drivers/net/ethernet/fungible/funcore/fun_dev.c
index fb5120d90f26..3680f83feba2 100644
--- a/drivers/net/ethernet/fungible/funcore/fun_dev.c
+++ b/drivers/net/ethernet/fungible/funcore/fun_dev.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
-#include <linux/aer.h>
#include <linux/bitmap.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
@@ -748,7 +747,6 @@ void fun_dev_disable(struct fun_dev *fdev)
pci_free_irq_vectors(pdev);
pci_clear_master(pdev);
- pci_disable_pcie_error_reporting(pdev);
pci_disable_device(pdev);
fun_unmap_bars(fdev);
@@ -781,8 +779,6 @@ int fun_dev_enable(struct fun_dev *fdev, struct pci_dev *pdev,
goto unmap;
}
- pci_enable_pcie_error_reporting(pdev);
-
rc = sanitize_dev(fdev);
if (rc)
goto disable_dev;
@@ -830,7 +826,6 @@ free_irq_mgr:
free_irqs:
pci_free_irq_vectors(pdev);
disable_dev:
- pci_disable_pcie_error_reporting(pdev);
pci_disable_device(pdev);
unmap:
fun_unmap_bars(fdev);
diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
index 64eb0442c82f..e214b51d3c8b 100644
--- a/drivers/net/ethernet/google/gve/gve.h
+++ b/drivers/net/ethernet/google/gve/gve.h
@@ -47,6 +47,10 @@
#define GVE_RX_BUFFER_SIZE_DQO 2048
+#define GVE_XDP_ACTIONS 5
+
+#define GVE_TX_MAX_HEADER_SIZE 182
+
/* Each slot in the desc ring has a 1:1 mapping to a slot in the data ring */
struct gve_rx_desc_queue {
struct gve_rx_desc *desc_ring; /* the descriptor ring */
@@ -230,7 +234,10 @@ struct gve_rx_ring {
u64 rx_frag_flip_cnt; /* free-running count of rx segments where page_flip was used */
u64 rx_frag_copy_cnt; /* free-running count of rx segments copied */
u64 rx_frag_alloc_cnt; /* free-running count of rx page allocations */
-
+ u64 xdp_tx_errors;
+ u64 xdp_redirect_errors;
+ u64 xdp_alloc_fails;
+ u64 xdp_actions[GVE_XDP_ACTIONS];
u32 q_num; /* queue index */
u32 ntfy_id; /* notification block index */
struct gve_queue_resources *q_resources; /* head and tail pointer idx */
@@ -238,6 +245,12 @@ struct gve_rx_ring {
struct u64_stats_sync statss; /* sync stats for 32bit archs */
struct gve_rx_ctx ctx; /* Info for packet currently being processed in this ring. */
+
+ /* XDP stuff */
+ struct xdp_rxq_info xdp_rxq;
+ struct xdp_rxq_info xsk_rxq;
+ struct xsk_buff_pool *xsk_pool;
+ struct page_frag_cache page_cache; /* Page cache to allocate XDP frames */
};
/* A TX desc ring entry */
@@ -258,7 +271,14 @@ struct gve_tx_iovec {
* ring entry but only used for a pkt_desc not a seg_desc
*/
struct gve_tx_buffer_state {
- struct sk_buff *skb; /* skb for this pkt */
+ union {
+ struct sk_buff *skb; /* skb for this pkt */
+ struct xdp_frame *xdp_frame; /* xdp_frame */
+ };
+ struct {
+ u16 size; /* size of xmitted xdp pkt */
+ u8 is_xsk; /* xsk buff */
+ } xdp;
union {
struct gve_tx_iovec iov[GVE_TX_MAX_IOVEC]; /* segments of this pkt */
struct {
@@ -373,6 +393,8 @@ struct gve_tx_ring {
struct {
/* Spinlock for when cleanup in progress */
spinlock_t clean_lock;
+ /* Spinlock for XDP tx traffic */
+ spinlock_t xdp_lock;
};
/* DQO fields. */
@@ -450,6 +472,12 @@ struct gve_tx_ring {
dma_addr_t q_resources_bus; /* dma address of the queue resources */
dma_addr_t complq_bus_dqo; /* dma address of the dqo.compl_ring */
struct u64_stats_sync statss; /* sync stats for 32bit archs */
+ struct xsk_buff_pool *xsk_pool;
+ u32 xdp_xsk_wakeup;
+ u32 xdp_xsk_done;
+ u64 xdp_xsk_sent;
+ u64 xdp_xmit;
+ u64 xdp_xmit_errors;
} ____cacheline_aligned;
/* Wraps the info for one irq including the napi struct and the queues
@@ -526,9 +554,11 @@ struct gve_priv {
u16 rx_data_slot_cnt; /* rx buffer length */
u64 max_registered_pages;
u64 num_registered_pages; /* num pages registered with NIC */
+ struct bpf_prog *xdp_prog; /* XDP BPF program */
u32 rx_copybreak; /* copy packets smaller than this */
u16 default_num_queues; /* default num queues to set up */
+ u16 num_xdp_queues;
struct gve_queue_config tx_cfg;
struct gve_queue_config rx_cfg;
struct gve_qpl_config qpl_cfg; /* map used QPL ids */
@@ -785,7 +815,17 @@ static inline u32 gve_num_tx_qpls(struct gve_priv *priv)
if (priv->queue_format != GVE_GQI_QPL_FORMAT)
return 0;
- return priv->tx_cfg.num_queues;
+ return priv->tx_cfg.num_queues + priv->num_xdp_queues;
+}
+
+/* Returns the number of XDP tx queue page lists
+ */
+static inline u32 gve_num_xdp_qpls(struct gve_priv *priv)
+{
+ if (priv->queue_format != GVE_GQI_QPL_FORMAT)
+ return 0;
+
+ return priv->num_xdp_queues;
}
/* Returns the number of rx queue page lists
@@ -798,16 +838,35 @@ static inline u32 gve_num_rx_qpls(struct gve_priv *priv)
return priv->rx_cfg.num_queues;
}
+static inline u32 gve_tx_qpl_id(struct gve_priv *priv, int tx_qid)
+{
+ return tx_qid;
+}
+
+static inline u32 gve_rx_qpl_id(struct gve_priv *priv, int rx_qid)
+{
+ return priv->tx_cfg.max_queues + rx_qid;
+}
+
+static inline u32 gve_tx_start_qpl_id(struct gve_priv *priv)
+{
+ return gve_tx_qpl_id(priv, 0);
+}
+
+static inline u32 gve_rx_start_qpl_id(struct gve_priv *priv)
+{
+ return gve_rx_qpl_id(priv, 0);
+}
+
/* Returns a pointer to the next available tx qpl in the list of qpls
*/
static inline
-struct gve_queue_page_list *gve_assign_tx_qpl(struct gve_priv *priv)
+struct gve_queue_page_list *gve_assign_tx_qpl(struct gve_priv *priv, int tx_qid)
{
- int id = find_first_zero_bit(priv->qpl_cfg.qpl_id_map,
- priv->qpl_cfg.qpl_map_size);
+ int id = gve_tx_qpl_id(priv, tx_qid);
- /* we are out of tx qpls */
- if (id >= gve_num_tx_qpls(priv))
+ /* QPL already in use */
+ if (test_bit(id, priv->qpl_cfg.qpl_id_map))
return NULL;
set_bit(id, priv->qpl_cfg.qpl_id_map);
@@ -817,14 +876,12 @@ struct gve_queue_page_list *gve_assign_tx_qpl(struct gve_priv *priv)
/* Returns a pointer to the next available rx qpl in the list of qpls
*/
static inline
-struct gve_queue_page_list *gve_assign_rx_qpl(struct gve_priv *priv)
+struct gve_queue_page_list *gve_assign_rx_qpl(struct gve_priv *priv, int rx_qid)
{
- int id = find_next_zero_bit(priv->qpl_cfg.qpl_id_map,
- priv->qpl_cfg.qpl_map_size,
- gve_num_tx_qpls(priv));
+ int id = gve_rx_qpl_id(priv, rx_qid);
- /* we are out of rx qpls */
- if (id == gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv))
+ /* QPL already in use */
+ if (test_bit(id, priv->qpl_cfg.qpl_id_map))
return NULL;
set_bit(id, priv->qpl_cfg.qpl_id_map);
@@ -843,7 +900,7 @@ static inline void gve_unassign_qpl(struct gve_priv *priv, int id)
static inline enum dma_data_direction gve_qpl_dma_dir(struct gve_priv *priv,
int id)
{
- if (id < gve_num_tx_qpls(priv))
+ if (id < gve_rx_start_qpl_id(priv))
return DMA_TO_DEVICE;
else
return DMA_FROM_DEVICE;
@@ -855,6 +912,21 @@ static inline bool gve_is_gqi(struct gve_priv *priv)
priv->queue_format == GVE_GQI_QPL_FORMAT;
}
+static inline u32 gve_num_tx_queues(struct gve_priv *priv)
+{
+ return priv->tx_cfg.num_queues + priv->num_xdp_queues;
+}
+
+static inline u32 gve_xdp_tx_queue_id(struct gve_priv *priv, u32 queue_id)
+{
+ return priv->tx_cfg.num_queues + queue_id;
+}
+
+static inline u32 gve_xdp_tx_start_queue_id(struct gve_priv *priv)
+{
+ return gve_xdp_tx_queue_id(priv, 0);
+}
+
/* buffers */
int gve_alloc_page(struct gve_priv *priv, struct device *dev,
struct page **page, dma_addr_t *dma,
@@ -863,9 +935,15 @@ void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
enum dma_data_direction);
/* tx handling */
netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev);
+int gve_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
+ u32 flags);
+int gve_xdp_xmit_one(struct gve_priv *priv, struct gve_tx_ring *tx,
+ void *data, int len, void *frame_p);
+void gve_xdp_tx_flush(struct gve_priv *priv, u32 xdp_qid);
bool gve_tx_poll(struct gve_notify_block *block, int budget);
-int gve_tx_alloc_rings(struct gve_priv *priv);
-void gve_tx_free_rings_gqi(struct gve_priv *priv);
+bool gve_xdp_poll(struct gve_notify_block *block, int budget);
+int gve_tx_alloc_rings(struct gve_priv *priv, int start_id, int num_rings);
+void gve_tx_free_rings_gqi(struct gve_priv *priv, int start_id, int num_rings);
u32 gve_tx_load_event_counter(struct gve_priv *priv,
struct gve_tx_ring *tx);
bool gve_tx_clean_pending(struct gve_priv *priv, struct gve_tx_ring *tx);
diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c
index 60061288ad9d..252974202a3f 100644
--- a/drivers/net/ethernet/google/gve/gve_adminq.c
+++ b/drivers/net/ethernet/google/gve/gve_adminq.c
@@ -516,12 +516,12 @@ static int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_index)
return gve_adminq_issue_cmd(priv, &cmd);
}
-int gve_adminq_create_tx_queues(struct gve_priv *priv, u32 num_queues)
+int gve_adminq_create_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_queues)
{
int err;
int i;
- for (i = 0; i < num_queues; i++) {
+ for (i = start_id; i < start_id + num_queues; i++) {
err = gve_adminq_create_tx_queue(priv, i);
if (err)
return err;
@@ -604,12 +604,12 @@ static int gve_adminq_destroy_tx_queue(struct gve_priv *priv, u32 queue_index)
return 0;
}
-int gve_adminq_destroy_tx_queues(struct gve_priv *priv, u32 num_queues)
+int gve_adminq_destroy_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_queues)
{
int err;
int i;
- for (i = 0; i < num_queues; i++) {
+ for (i = start_id; i < start_id + num_queues; i++) {
err = gve_adminq_destroy_tx_queue(priv, i);
if (err)
return err;
diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h
index cf29662e6ad1..f894beb3deaf 100644
--- a/drivers/net/ethernet/google/gve/gve_adminq.h
+++ b/drivers/net/ethernet/google/gve/gve_adminq.h
@@ -410,8 +410,8 @@ int gve_adminq_configure_device_resources(struct gve_priv *priv,
dma_addr_t db_array_bus_addr,
u32 num_ntfy_blks);
int gve_adminq_deconfigure_device_resources(struct gve_priv *priv);
-int gve_adminq_create_tx_queues(struct gve_priv *priv, u32 num_queues);
-int gve_adminq_destroy_tx_queues(struct gve_priv *priv, u32 queue_id);
+int gve_adminq_create_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_queues);
+int gve_adminq_destroy_tx_queues(struct gve_priv *priv, u32 start_id, u32 num_queues);
int gve_adminq_create_rx_queues(struct gve_priv *priv, u32 num_queues);
int gve_adminq_destroy_rx_queues(struct gve_priv *priv, u32 queue_id);
int gve_adminq_register_page_list(struct gve_priv *priv,
diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c
index ce574d097e28..b18804e934d3 100644
--- a/drivers/net/ethernet/google/gve/gve_ethtool.c
+++ b/drivers/net/ethernet/google/gve/gve_ethtool.c
@@ -34,6 +34,11 @@ static u32 gve_get_msglevel(struct net_device *netdev)
return priv->msg_enable;
}
+/* For the following stats column string names, make sure the order
+ * matches how it is filled in the code. For xdp_aborted, xdp_drop,
+ * xdp_pass, xdp_tx, xdp_redirect, make sure it also matches the order
+ * as declared in enum xdp_action inside file uapi/linux/bpf.h .
+ */
static const char gve_gstrings_main_stats[][ETH_GSTRING_LEN] = {
"rx_packets", "tx_packets", "rx_bytes", "tx_bytes",
"rx_dropped", "tx_dropped", "tx_timeouts",
@@ -49,12 +54,16 @@ static const char gve_gstrings_rx_stats[][ETH_GSTRING_LEN] = {
"rx_dropped_pkt[%u]", "rx_copybreak_pkt[%u]", "rx_copied_pkt[%u]",
"rx_queue_drop_cnt[%u]", "rx_no_buffers_posted[%u]",
"rx_drops_packet_over_mru[%u]", "rx_drops_invalid_checksum[%u]",
+ "rx_xdp_aborted[%u]", "rx_xdp_drop[%u]", "rx_xdp_pass[%u]",
+ "rx_xdp_tx[%u]", "rx_xdp_redirect[%u]",
+ "rx_xdp_tx_errors[%u]", "rx_xdp_redirect_errors[%u]", "rx_xdp_alloc_fails[%u]",
};
static const char gve_gstrings_tx_stats[][ETH_GSTRING_LEN] = {
"tx_posted_desc[%u]", "tx_completed_desc[%u]", "tx_consumed_desc[%u]", "tx_bytes[%u]",
"tx_wake[%u]", "tx_stop[%u]", "tx_event_counter[%u]",
- "tx_dma_mapping_error[%u]",
+ "tx_dma_mapping_error[%u]", "tx_xsk_wakeup[%u]",
+ "tx_xsk_done[%u]", "tx_xsk_sent[%u]", "tx_xdp_xmit[%u]", "tx_xdp_xmit_errors[%u]"
};
static const char gve_gstrings_adminq_stats[][ETH_GSTRING_LEN] = {
@@ -81,8 +90,10 @@ static void gve_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
{
struct gve_priv *priv = netdev_priv(netdev);
char *s = (char *)data;
+ int num_tx_queues;
int i, j;
+ num_tx_queues = gve_num_tx_queues(priv);
switch (stringset) {
case ETH_SS_STATS:
memcpy(s, *gve_gstrings_main_stats,
@@ -97,7 +108,7 @@ static void gve_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
}
}
- for (i = 0; i < priv->tx_cfg.num_queues; i++) {
+ for (i = 0; i < num_tx_queues; i++) {
for (j = 0; j < NUM_GVE_TX_CNTS; j++) {
snprintf(s, ETH_GSTRING_LEN,
gve_gstrings_tx_stats[j], i);
@@ -124,12 +135,14 @@ static void gve_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
static int gve_get_sset_count(struct net_device *netdev, int sset)
{
struct gve_priv *priv = netdev_priv(netdev);
+ int num_tx_queues;
+ num_tx_queues = gve_num_tx_queues(priv);
switch (sset) {
case ETH_SS_STATS:
return GVE_MAIN_STATS_LEN + GVE_ADMINQ_STATS_LEN +
(priv->rx_cfg.num_queues * NUM_GVE_RX_CNTS) +
- (priv->tx_cfg.num_queues * NUM_GVE_TX_CNTS);
+ (num_tx_queues * NUM_GVE_TX_CNTS);
case ETH_SS_PRIV_FLAGS:
return GVE_PRIV_FLAGS_STR_LEN;
default:
@@ -153,18 +166,20 @@ gve_get_ethtool_stats(struct net_device *netdev,
struct gve_priv *priv;
bool skip_nic_stats;
unsigned int start;
+ int num_tx_queues;
int ring;
int i, j;
ASSERT_RTNL();
priv = netdev_priv(netdev);
+ num_tx_queues = gve_num_tx_queues(priv);
report_stats = priv->stats_report->stats;
rx_qid_to_stats_idx = kmalloc_array(priv->rx_cfg.num_queues,
sizeof(int), GFP_KERNEL);
if (!rx_qid_to_stats_idx)
return;
- tx_qid_to_stats_idx = kmalloc_array(priv->tx_cfg.num_queues,
+ tx_qid_to_stats_idx = kmalloc_array(num_tx_queues,
sizeof(int), GFP_KERNEL);
if (!tx_qid_to_stats_idx) {
kfree(rx_qid_to_stats_idx);
@@ -195,7 +210,7 @@ gve_get_ethtool_stats(struct net_device *netdev,
}
}
for (tx_pkts = 0, tx_bytes = 0, tx_dropped = 0, ring = 0;
- ring < priv->tx_cfg.num_queues; ring++) {
+ ring < num_tx_queues; ring++) {
if (priv->tx) {
do {
start =
@@ -232,7 +247,7 @@ gve_get_ethtool_stats(struct net_device *netdev,
i = GVE_MAIN_STATS_LEN;
/* For rx cross-reporting stats, start from nic rx stats in report */
- base_stats_idx = GVE_TX_STATS_REPORT_NUM * priv->tx_cfg.num_queues +
+ base_stats_idx = GVE_TX_STATS_REPORT_NUM * num_tx_queues +
GVE_RX_STATS_REPORT_NUM * priv->rx_cfg.num_queues;
max_stats_idx = NIC_RX_STATS_REPORT_NUM * priv->rx_cfg.num_queues +
base_stats_idx;
@@ -283,14 +298,26 @@ gve_get_ethtool_stats(struct net_device *netdev,
if (skip_nic_stats) {
/* skip NIC rx stats */
i += NIC_RX_STATS_REPORT_NUM;
- continue;
- }
- for (j = 0; j < NIC_RX_STATS_REPORT_NUM; j++) {
- u64 value =
- be64_to_cpu(report_stats[rx_qid_to_stats_idx[ring] + j].value);
+ } else {
+ stats_idx = rx_qid_to_stats_idx[ring];
+ for (j = 0; j < NIC_RX_STATS_REPORT_NUM; j++) {
+ u64 value =
+ be64_to_cpu(report_stats[stats_idx + j].value);
- data[i++] = value;
+ data[i++] = value;
+ }
}
+ /* XDP rx counters */
+ do {
+ start = u64_stats_fetch_begin(&priv->rx[ring].statss);
+ for (j = 0; j < GVE_XDP_ACTIONS; j++)
+ data[i + j] = rx->xdp_actions[j];
+ data[i + j++] = rx->xdp_tx_errors;
+ data[i + j++] = rx->xdp_redirect_errors;
+ data[i + j++] = rx->xdp_alloc_fails;
+ } while (u64_stats_fetch_retry(&priv->rx[ring].statss,
+ start));
+ i += GVE_XDP_ACTIONS + 3; /* XDP rx counters */
}
} else {
i += priv->rx_cfg.num_queues * NUM_GVE_RX_CNTS;
@@ -298,7 +325,7 @@ gve_get_ethtool_stats(struct net_device *netdev,
/* For tx cross-reporting stats, start from nic tx stats in report */
base_stats_idx = max_stats_idx;
- max_stats_idx = NIC_TX_STATS_REPORT_NUM * priv->tx_cfg.num_queues +
+ max_stats_idx = NIC_TX_STATS_REPORT_NUM * num_tx_queues +
max_stats_idx;
/* Preprocess the stats report for tx, map queue id to start index */
skip_nic_stats = false;
@@ -316,7 +343,7 @@ gve_get_ethtool_stats(struct net_device *netdev,
}
/* walk TX rings */
if (priv->tx) {
- for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) {
+ for (ring = 0; ring < num_tx_queues; ring++) {
struct gve_tx_ring *tx = &priv->tx[ring];
if (gve_is_gqi(priv)) {
@@ -346,16 +373,28 @@ gve_get_ethtool_stats(struct net_device *netdev,
if (skip_nic_stats) {
/* skip NIC tx stats */
i += NIC_TX_STATS_REPORT_NUM;
- continue;
- }
- for (j = 0; j < NIC_TX_STATS_REPORT_NUM; j++) {
- u64 value =
- be64_to_cpu(report_stats[tx_qid_to_stats_idx[ring] + j].value);
- data[i++] = value;
+ } else {
+ stats_idx = tx_qid_to_stats_idx[ring];
+ for (j = 0; j < NIC_TX_STATS_REPORT_NUM; j++) {
+ u64 value =
+ be64_to_cpu(report_stats[stats_idx + j].value);
+ data[i++] = value;
+ }
}
+ /* XDP xsk counters */
+ data[i++] = tx->xdp_xsk_wakeup;
+ data[i++] = tx->xdp_xsk_done;
+ do {
+ start = u64_stats_fetch_begin(&priv->tx[ring].statss);
+ data[i] = tx->xdp_xsk_sent;
+ data[i + 1] = tx->xdp_xmit;
+ data[i + 2] = tx->xdp_xmit_errors;
+ } while (u64_stats_fetch_retry(&priv->tx[ring].statss,
+ start));
+ i += 3; /* XDP tx counters */
}
} else {
- i += priv->tx_cfg.num_queues * NUM_GVE_TX_CNTS;
+ i += num_tx_queues * NUM_GVE_TX_CNTS;
}
kfree(rx_qid_to_stats_idx);
@@ -412,6 +451,12 @@ static int gve_set_channels(struct net_device *netdev,
if (!new_rx || !new_tx)
return -EINVAL;
+ if (priv->num_xdp_queues &&
+ (new_tx != new_rx || (2 * new_tx > priv->tx_cfg.max_queues))) {
+ dev_err(&priv->pdev->dev, "XDP load failed: The number of configured RX queues should be equal to the number of configured TX queues and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues");
+ return -EINVAL;
+ }
+
if (!netif_carrier_ok(netdev)) {
priv->tx_cfg.num_queues = new_tx;
priv->rx_cfg.num_queues = new_rx;
@@ -502,7 +547,9 @@ static int gve_set_priv_flags(struct net_device *netdev, u32 flags)
{
struct gve_priv *priv = netdev_priv(netdev);
u64 ori_flags, new_flags;
+ int num_tx_queues;
+ num_tx_queues = gve_num_tx_queues(priv);
ori_flags = READ_ONCE(priv->ethtool_flags);
new_flags = ori_flags;
@@ -522,7 +569,7 @@ static int gve_set_priv_flags(struct net_device *netdev, u32 flags)
/* delete report stats timer. */
if (!(flags & BIT(0)) && (ori_flags & BIT(0))) {
int tx_stats_num = GVE_TX_STATS_REPORT_NUM *
- priv->tx_cfg.num_queues;
+ num_tx_queues;
int rx_stats_num = GVE_RX_STATS_REPORT_NUM *
priv->rx_cfg.num_queues;
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index 07111c241e0e..57ce74315eba 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -4,8 +4,10 @@
* Copyright (C) 2015-2021 Google, Inc.
*/
+#include <linux/bpf.h>
#include <linux/cpumask.h>
#include <linux/etherdevice.h>
+#include <linux/filter.h>
#include <linux/interrupt.h>
#include <linux/module.h>
#include <linux/pci.h>
@@ -15,6 +17,7 @@
#include <linux/utsname.h>
#include <linux/version.h>
#include <net/sch_generic.h>
+#include <net/xdp_sock_drv.h>
#include "gve.h"
#include "gve_dqo.h"
#include "gve_adminq.h"
@@ -90,8 +93,10 @@ static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
struct gve_priv *priv = netdev_priv(dev);
unsigned int start;
u64 packets, bytes;
+ int num_tx_queues;
int ring;
+ num_tx_queues = gve_num_tx_queues(priv);
if (priv->rx) {
for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) {
do {
@@ -106,7 +111,7 @@ static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
}
}
if (priv->tx) {
- for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) {
+ for (ring = 0; ring < num_tx_queues; ring++) {
do {
start =
u64_stats_fetch_begin(&priv->tx[ring].statss);
@@ -180,7 +185,7 @@ static int gve_alloc_stats_report(struct gve_priv *priv)
int tx_stats_num, rx_stats_num;
tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) *
- priv->tx_cfg.num_queues;
+ gve_num_tx_queues(priv);
rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) *
priv->rx_cfg.num_queues;
priv->stats_report_len = struct_size(priv->stats_report, stats,
@@ -245,8 +250,13 @@ static int gve_napi_poll(struct napi_struct *napi, int budget)
block = container_of(napi, struct gve_notify_block, napi);
priv = block->priv;
- if (block->tx)
- reschedule |= gve_tx_poll(block, budget);
+ if (block->tx) {
+ if (block->tx->q_num < priv->tx_cfg.num_queues)
+ reschedule |= gve_tx_poll(block, budget);
+ else
+ reschedule |= gve_xdp_poll(block, budget);
+ }
+
if (block->rx) {
work_done = gve_rx_poll(block, budget);
reschedule |= work_done == budget;
@@ -580,13 +590,36 @@ static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx)
netif_napi_del(&block->napi);
}
+static int gve_register_xdp_qpls(struct gve_priv *priv)
+{
+ int start_id;
+ int err;
+ int i;
+
+ start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
+ for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
+ err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
+ if (err) {
+ netif_err(priv, drv, priv->dev,
+ "failed to register queue page list %d\n",
+ priv->qpls[i].id);
+ /* This failure will trigger a reset - no need to clean
+ * up
+ */
+ return err;
+ }
+ }
+ return 0;
+}
+
static int gve_register_qpls(struct gve_priv *priv)
{
- int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
+ int start_id;
int err;
int i;
- for (i = 0; i < num_qpls; i++) {
+ start_id = gve_tx_start_qpl_id(priv);
+ for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) {
err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
if (err) {
netif_err(priv, drv, priv->dev,
@@ -598,16 +631,63 @@ static int gve_register_qpls(struct gve_priv *priv)
return err;
}
}
+
+ start_id = gve_rx_start_qpl_id(priv);
+ for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) {
+ err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
+ if (err) {
+ netif_err(priv, drv, priv->dev,
+ "failed to register queue page list %d\n",
+ priv->qpls[i].id);
+ /* This failure will trigger a reset - no need to clean
+ * up
+ */
+ return err;
+ }
+ }
+ return 0;
+}
+
+static int gve_unregister_xdp_qpls(struct gve_priv *priv)
+{
+ int start_id;
+ int err;
+ int i;
+
+ start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
+ for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
+ err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
+ /* This failure will trigger a reset - no need to clean up */
+ if (err) {
+ netif_err(priv, drv, priv->dev,
+ "Failed to unregister queue page list %d\n",
+ priv->qpls[i].id);
+ return err;
+ }
+ }
return 0;
}
static int gve_unregister_qpls(struct gve_priv *priv)
{
- int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
+ int start_id;
int err;
int i;
- for (i = 0; i < num_qpls; i++) {
+ start_id = gve_tx_start_qpl_id(priv);
+ for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) {
+ err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
+ /* This failure will trigger a reset - no need to clean up */
+ if (err) {
+ netif_err(priv, drv, priv->dev,
+ "Failed to unregister queue page list %d\n",
+ priv->qpls[i].id);
+ return err;
+ }
+ }
+
+ start_id = gve_rx_start_qpl_id(priv);
+ for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) {
err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
/* This failure will trigger a reset - no need to clean up */
if (err) {
@@ -620,22 +700,44 @@ static int gve_unregister_qpls(struct gve_priv *priv)
return 0;
}
+static int gve_create_xdp_rings(struct gve_priv *priv)
+{
+ int err;
+
+ err = gve_adminq_create_tx_queues(priv,
+ gve_xdp_tx_start_queue_id(priv),
+ priv->num_xdp_queues);
+ if (err) {
+ netif_err(priv, drv, priv->dev, "failed to create %d XDP tx queues\n",
+ priv->num_xdp_queues);
+ /* This failure will trigger a reset - no need to clean
+ * up
+ */
+ return err;
+ }
+ netif_dbg(priv, drv, priv->dev, "created %d XDP tx queues\n",
+ priv->num_xdp_queues);
+
+ return 0;
+}
+
static int gve_create_rings(struct gve_priv *priv)
{
+ int num_tx_queues = gve_num_tx_queues(priv);
int err;
int i;
- err = gve_adminq_create_tx_queues(priv, priv->tx_cfg.num_queues);
+ err = gve_adminq_create_tx_queues(priv, 0, num_tx_queues);
if (err) {
netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n",
- priv->tx_cfg.num_queues);
+ num_tx_queues);
/* This failure will trigger a reset - no need to clean
* up
*/
return err;
}
netif_dbg(priv, drv, priv->dev, "created %d tx queues\n",
- priv->tx_cfg.num_queues);
+ num_tx_queues);
err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues);
if (err) {
@@ -668,6 +770,23 @@ static int gve_create_rings(struct gve_priv *priv)
return 0;
}
+static void add_napi_init_xdp_sync_stats(struct gve_priv *priv,
+ int (*napi_poll)(struct napi_struct *napi,
+ int budget))
+{
+ int start_id = gve_xdp_tx_start_queue_id(priv);
+ int i;
+
+ /* Add xdp tx napi & init sync stats*/
+ for (i = start_id; i < start_id + priv->num_xdp_queues; i++) {
+ int ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
+
+ u64_stats_init(&priv->tx[i].statss);
+ priv->tx[i].ntfy_id = ntfy_idx;
+ gve_add_napi(priv, ntfy_idx, napi_poll);
+ }
+}
+
static void add_napi_init_sync_stats(struct gve_priv *priv,
int (*napi_poll)(struct napi_struct *napi,
int budget))
@@ -675,7 +794,7 @@ static void add_napi_init_sync_stats(struct gve_priv *priv,
int i;
/* Add tx napi & init sync stats*/
- for (i = 0; i < priv->tx_cfg.num_queues; i++) {
+ for (i = 0; i < gve_num_tx_queues(priv); i++) {
int ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
u64_stats_init(&priv->tx[i].statss);
@@ -692,34 +811,51 @@ static void add_napi_init_sync_stats(struct gve_priv *priv,
}
}
-static void gve_tx_free_rings(struct gve_priv *priv)
+static void gve_tx_free_rings(struct gve_priv *priv, int start_id, int num_rings)
{
if (gve_is_gqi(priv)) {
- gve_tx_free_rings_gqi(priv);
+ gve_tx_free_rings_gqi(priv, start_id, num_rings);
} else {
gve_tx_free_rings_dqo(priv);
}
}
+static int gve_alloc_xdp_rings(struct gve_priv *priv)
+{
+ int start_id;
+ int err = 0;
+
+ if (!priv->num_xdp_queues)
+ return 0;
+
+ start_id = gve_xdp_tx_start_queue_id(priv);
+ err = gve_tx_alloc_rings(priv, start_id, priv->num_xdp_queues);
+ if (err)
+ return err;
+ add_napi_init_xdp_sync_stats(priv, gve_napi_poll);
+
+ return 0;
+}
+
static int gve_alloc_rings(struct gve_priv *priv)
{
int err;
/* Setup tx rings */
- priv->tx = kvcalloc(priv->tx_cfg.num_queues, sizeof(*priv->tx),
+ priv->tx = kvcalloc(priv->tx_cfg.max_queues, sizeof(*priv->tx),
GFP_KERNEL);
if (!priv->tx)
return -ENOMEM;
if (gve_is_gqi(priv))
- err = gve_tx_alloc_rings(priv);
+ err = gve_tx_alloc_rings(priv, 0, gve_num_tx_queues(priv));
else
err = gve_tx_alloc_rings_dqo(priv);
if (err)
goto free_tx;
/* Setup rx rings */
- priv->rx = kvcalloc(priv->rx_cfg.num_queues, sizeof(*priv->rx),
+ priv->rx = kvcalloc(priv->rx_cfg.max_queues, sizeof(*priv->rx),
GFP_KERNEL);
if (!priv->rx) {
err = -ENOMEM;
@@ -744,18 +880,39 @@ free_rx:
kvfree(priv->rx);
priv->rx = NULL;
free_tx_queue:
- gve_tx_free_rings(priv);
+ gve_tx_free_rings(priv, 0, gve_num_tx_queues(priv));
free_tx:
kvfree(priv->tx);
priv->tx = NULL;
return err;
}
+static int gve_destroy_xdp_rings(struct gve_priv *priv)
+{
+ int start_id;
+ int err;
+
+ start_id = gve_xdp_tx_start_queue_id(priv);
+ err = gve_adminq_destroy_tx_queues(priv,
+ start_id,
+ priv->num_xdp_queues);
+ if (err) {
+ netif_err(priv, drv, priv->dev,
+ "failed to destroy XDP queues\n");
+ /* This failure will trigger a reset - no need to clean up */
+ return err;
+ }
+ netif_dbg(priv, drv, priv->dev, "destroyed XDP queues\n");
+
+ return 0;
+}
+
static int gve_destroy_rings(struct gve_priv *priv)
{
+ int num_tx_queues = gve_num_tx_queues(priv);
int err;
- err = gve_adminq_destroy_tx_queues(priv, priv->tx_cfg.num_queues);
+ err = gve_adminq_destroy_tx_queues(priv, 0, num_tx_queues);
if (err) {
netif_err(priv, drv, priv->dev,
"failed to destroy tx queues\n");
@@ -782,17 +939,33 @@ static void gve_rx_free_rings(struct gve_priv *priv)
gve_rx_free_rings_dqo(priv);
}
+static void gve_free_xdp_rings(struct gve_priv *priv)
+{
+ int ntfy_idx, start_id;
+ int i;
+
+ start_id = gve_xdp_tx_start_queue_id(priv);
+ if (priv->tx) {
+ for (i = start_id; i < start_id + priv->num_xdp_queues; i++) {
+ ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
+ gve_remove_napi(priv, ntfy_idx);
+ }
+ gve_tx_free_rings(priv, start_id, priv->num_xdp_queues);
+ }
+}
+
static void gve_free_rings(struct gve_priv *priv)
{
+ int num_tx_queues = gve_num_tx_queues(priv);
int ntfy_idx;
int i;
if (priv->tx) {
- for (i = 0; i < priv->tx_cfg.num_queues; i++) {
+ for (i = 0; i < num_tx_queues; i++) {
ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
gve_remove_napi(priv, ntfy_idx);
}
- gve_tx_free_rings(priv);
+ gve_tx_free_rings(priv, 0, num_tx_queues);
kvfree(priv->tx);
priv->tx = NULL;
}
@@ -889,40 +1062,68 @@ static void gve_free_queue_page_list(struct gve_priv *priv, u32 id)
qpl->page_buses[i], gve_qpl_dma_dir(priv, id));
kvfree(qpl->page_buses);
+ qpl->page_buses = NULL;
free_pages:
kvfree(qpl->pages);
+ qpl->pages = NULL;
priv->num_registered_pages -= qpl->num_entries;
}
+static int gve_alloc_xdp_qpls(struct gve_priv *priv)
+{
+ int start_id;
+ int i, j;
+ int err;
+
+ start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
+ for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
+ err = gve_alloc_queue_page_list(priv, i,
+ priv->tx_pages_per_qpl);
+ if (err)
+ goto free_qpls;
+ }
+
+ return 0;
+
+free_qpls:
+ for (j = start_id; j <= i; j++)
+ gve_free_queue_page_list(priv, j);
+ return err;
+}
+
static int gve_alloc_qpls(struct gve_priv *priv)
{
- int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
+ int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues;
+ int start_id;
int i, j;
int err;
- if (num_qpls == 0)
+ if (priv->queue_format != GVE_GQI_QPL_FORMAT)
return 0;
- priv->qpls = kvcalloc(num_qpls, sizeof(*priv->qpls), GFP_KERNEL);
+ priv->qpls = kvcalloc(max_queues, sizeof(*priv->qpls), GFP_KERNEL);
if (!priv->qpls)
return -ENOMEM;
- for (i = 0; i < gve_num_tx_qpls(priv); i++) {
+ start_id = gve_tx_start_qpl_id(priv);
+ for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) {
err = gve_alloc_queue_page_list(priv, i,
priv->tx_pages_per_qpl);
if (err)
goto free_qpls;
}
- for (; i < num_qpls; i++) {
+
+ start_id = gve_rx_start_qpl_id(priv);
+ for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) {
err = gve_alloc_queue_page_list(priv, i,
priv->rx_data_slot_cnt);
if (err)
goto free_qpls;
}
- priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(num_qpls) *
+ priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(max_queues) *
sizeof(unsigned long) * BITS_PER_BYTE;
- priv->qpl_cfg.qpl_id_map = kvcalloc(BITS_TO_LONGS(num_qpls),
+ priv->qpl_cfg.qpl_id_map = kvcalloc(BITS_TO_LONGS(max_queues),
sizeof(unsigned long), GFP_KERNEL);
if (!priv->qpl_cfg.qpl_id_map) {
err = -ENOMEM;
@@ -935,23 +1136,36 @@ free_qpls:
for (j = 0; j <= i; j++)
gve_free_queue_page_list(priv, j);
kvfree(priv->qpls);
+ priv->qpls = NULL;
return err;
}
+static void gve_free_xdp_qpls(struct gve_priv *priv)
+{
+ int start_id;
+ int i;
+
+ start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
+ for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++)
+ gve_free_queue_page_list(priv, i);
+}
+
static void gve_free_qpls(struct gve_priv *priv)
{
- int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
+ int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues;
int i;
- if (num_qpls == 0)
+ if (!priv->qpls)
return;
kvfree(priv->qpl_cfg.qpl_id_map);
+ priv->qpl_cfg.qpl_id_map = NULL;
- for (i = 0; i < num_qpls; i++)
+ for (i = 0; i < max_queues; i++)
gve_free_queue_page_list(priv, i);
kvfree(priv->qpls);
+ priv->qpls = NULL;
}
/* Use this to schedule a reset when the device is capable of continuing
@@ -969,11 +1183,109 @@ static int gve_reset_recovery(struct gve_priv *priv, bool was_up);
static void gve_turndown(struct gve_priv *priv);
static void gve_turnup(struct gve_priv *priv);
+static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev)
+{
+ struct napi_struct *napi;
+ struct gve_rx_ring *rx;
+ int err = 0;
+ int i, j;
+ u32 tx_qid;
+
+ if (!priv->num_xdp_queues)
+ return 0;
+
+ for (i = 0; i < priv->rx_cfg.num_queues; i++) {
+ rx = &priv->rx[i];
+ napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
+
+ err = xdp_rxq_info_reg(&rx->xdp_rxq, dev, i,
+ napi->napi_id);
+ if (err)
+ goto err;
+ err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq,
+ MEM_TYPE_PAGE_SHARED, NULL);
+ if (err)
+ goto err;
+ rx->xsk_pool = xsk_get_pool_from_qid(dev, i);
+ if (rx->xsk_pool) {
+ err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, i,
+ napi->napi_id);
+ if (err)
+ goto err;
+ err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq,
+ MEM_TYPE_XSK_BUFF_POOL, NULL);
+ if (err)
+ goto err;
+ xsk_pool_set_rxq_info(rx->xsk_pool,
+ &rx->xsk_rxq);
+ }
+ }
+
+ for (i = 0; i < priv->num_xdp_queues; i++) {
+ tx_qid = gve_xdp_tx_queue_id(priv, i);
+ priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i);
+ }
+ return 0;
+
+err:
+ for (j = i; j >= 0; j--) {
+ rx = &priv->rx[j];
+ if (xdp_rxq_info_is_reg(&rx->xdp_rxq))
+ xdp_rxq_info_unreg(&rx->xdp_rxq);
+ if (xdp_rxq_info_is_reg(&rx->xsk_rxq))
+ xdp_rxq_info_unreg(&rx->xsk_rxq);
+ }
+ return err;
+}
+
+static void gve_unreg_xdp_info(struct gve_priv *priv)
+{
+ int i, tx_qid;
+
+ if (!priv->num_xdp_queues)
+ return;
+
+ for (i = 0; i < priv->rx_cfg.num_queues; i++) {
+ struct gve_rx_ring *rx = &priv->rx[i];
+
+ xdp_rxq_info_unreg(&rx->xdp_rxq);
+ if (rx->xsk_pool) {
+ xdp_rxq_info_unreg(&rx->xsk_rxq);
+ rx->xsk_pool = NULL;
+ }
+ }
+
+ for (i = 0; i < priv->num_xdp_queues; i++) {
+ tx_qid = gve_xdp_tx_queue_id(priv, i);
+ priv->tx[tx_qid].xsk_pool = NULL;
+ }
+}
+
+static void gve_drain_page_cache(struct gve_priv *priv)
+{
+ struct page_frag_cache *nc;
+ int i;
+
+ for (i = 0; i < priv->rx_cfg.num_queues; i++) {
+ nc = &priv->rx[i].page_cache;
+ if (nc->va) {
+ __page_frag_cache_drain(virt_to_page(nc->va),
+ nc->pagecnt_bias);
+ nc->va = NULL;
+ }
+ }
+}
+
static int gve_open(struct net_device *dev)
{
struct gve_priv *priv = netdev_priv(dev);
int err;
+ if (priv->xdp_prog)
+ priv->num_xdp_queues = priv->rx_cfg.num_queues;
+ else
+ priv->num_xdp_queues = 0;
+
err = gve_alloc_qpls(priv);
if (err)
return err;
@@ -989,6 +1301,10 @@ static int gve_open(struct net_device *dev)
if (err)
goto free_rings;
+ err = gve_reg_xdp_info(priv, dev);
+ if (err)
+ goto free_rings;
+
err = gve_register_qpls(priv);
if (err)
goto reset;
@@ -1043,6 +1359,7 @@ static int gve_close(struct net_device *dev)
netif_carrier_off(dev);
if (gve_get_device_rings_ok(priv)) {
gve_turndown(priv);
+ gve_drain_page_cache(priv);
err = gve_destroy_rings(priv);
if (err)
goto err;
@@ -1053,6 +1370,7 @@ static int gve_close(struct net_device *dev)
}
del_timer_sync(&priv->stats_report_timer);
+ gve_unreg_xdp_info(priv);
gve_free_rings(priv);
gve_free_qpls(priv);
priv->interface_down_cnt++;
@@ -1069,6 +1387,306 @@ err:
return gve_reset_recovery(priv, false);
}
+static int gve_remove_xdp_queues(struct gve_priv *priv)
+{
+ int err;
+
+ err = gve_destroy_xdp_rings(priv);
+ if (err)
+ return err;
+
+ err = gve_unregister_xdp_qpls(priv);
+ if (err)
+ return err;
+
+ gve_unreg_xdp_info(priv);
+ gve_free_xdp_rings(priv);
+ gve_free_xdp_qpls(priv);
+ priv->num_xdp_queues = 0;
+ return 0;
+}
+
+static int gve_add_xdp_queues(struct gve_priv *priv)
+{
+ int err;
+
+ priv->num_xdp_queues = priv->tx_cfg.num_queues;
+
+ err = gve_alloc_xdp_qpls(priv);
+ if (err)
+ goto err;
+
+ err = gve_alloc_xdp_rings(priv);
+ if (err)
+ goto free_xdp_qpls;
+
+ err = gve_reg_xdp_info(priv, priv->dev);
+ if (err)
+ goto free_xdp_rings;
+
+ err = gve_register_xdp_qpls(priv);
+ if (err)
+ goto free_xdp_rings;
+
+ err = gve_create_xdp_rings(priv);
+ if (err)
+ goto free_xdp_rings;
+
+ return 0;
+
+free_xdp_rings:
+ gve_free_xdp_rings(priv);
+free_xdp_qpls:
+ gve_free_xdp_qpls(priv);
+err:
+ priv->num_xdp_queues = 0;
+ return err;
+}
+
+static void gve_handle_link_status(struct gve_priv *priv, bool link_status)
+{
+ if (!gve_get_napi_enabled(priv))
+ return;
+
+ if (link_status == netif_carrier_ok(priv->dev))
+ return;
+
+ if (link_status) {
+ netdev_info(priv->dev, "Device link is up.\n");
+ netif_carrier_on(priv->dev);
+ } else {
+ netdev_info(priv->dev, "Device link is down.\n");
+ netif_carrier_off(priv->dev);
+ }
+}
+
+static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog,
+ struct netlink_ext_ack *extack)
+{
+ struct bpf_prog *old_prog;
+ int err = 0;
+ u32 status;
+
+ old_prog = READ_ONCE(priv->xdp_prog);
+ if (!netif_carrier_ok(priv->dev)) {
+ WRITE_ONCE(priv->xdp_prog, prog);
+ if (old_prog)
+ bpf_prog_put(old_prog);
+ return 0;
+ }
+
+ gve_turndown(priv);
+ if (!old_prog && prog) {
+ // Allocate XDP TX queues if an XDP program is
+ // being installed
+ err = gve_add_xdp_queues(priv);
+ if (err)
+ goto out;
+ } else if (old_prog && !prog) {
+ // Remove XDP TX queues if an XDP program is
+ // being uninstalled
+ err = gve_remove_xdp_queues(priv);
+ if (err)
+ goto out;
+ }
+ WRITE_ONCE(priv->xdp_prog, prog);
+ if (old_prog)
+ bpf_prog_put(old_prog);
+
+out:
+ gve_turnup(priv);
+ status = ioread32be(&priv->reg_bar0->device_status);
+ gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
+ return err;
+}
+
+static int gve_xsk_pool_enable(struct net_device *dev,
+ struct xsk_buff_pool *pool,
+ u16 qid)
+{
+ struct gve_priv *priv = netdev_priv(dev);
+ struct napi_struct *napi;
+ struct gve_rx_ring *rx;
+ int tx_qid;
+ int err;
+
+ if (qid >= priv->rx_cfg.num_queues) {
+ dev_err(&priv->pdev->dev, "xsk pool invalid qid %d", qid);
+ return -EINVAL;
+ }
+ if (xsk_pool_get_rx_frame_size(pool) <
+ priv->dev->max_mtu + sizeof(struct ethhdr)) {
+ dev_err(&priv->pdev->dev, "xsk pool frame_len too small");
+ return -EINVAL;
+ }
+
+ err = xsk_pool_dma_map(pool, &priv->pdev->dev,
+ DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
+ if (err)
+ return err;
+
+ /* If XDP prog is not installed, return */
+ if (!priv->xdp_prog)
+ return 0;
+
+ rx = &priv->rx[qid];
+ napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
+ err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, qid, napi->napi_id);
+ if (err)
+ goto err;
+
+ err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq,
+ MEM_TYPE_XSK_BUFF_POOL, NULL);
+ if (err)
+ goto err;
+
+ xsk_pool_set_rxq_info(pool, &rx->xsk_rxq);
+ rx->xsk_pool = pool;
+
+ tx_qid = gve_xdp_tx_queue_id(priv, qid);
+ priv->tx[tx_qid].xsk_pool = pool;
+
+ return 0;
+err:
+ if (xdp_rxq_info_is_reg(&rx->xsk_rxq))
+ xdp_rxq_info_unreg(&rx->xsk_rxq);
+
+ xsk_pool_dma_unmap(pool,
+ DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
+ return err;
+}
+
+static int gve_xsk_pool_disable(struct net_device *dev,
+ u16 qid)
+{
+ struct gve_priv *priv = netdev_priv(dev);
+ struct napi_struct *napi_rx;
+ struct napi_struct *napi_tx;
+ struct xsk_buff_pool *pool;
+ int tx_qid;
+
+ pool = xsk_get_pool_from_qid(dev, qid);
+ if (!pool)
+ return -EINVAL;
+ if (qid >= priv->rx_cfg.num_queues)
+ return -EINVAL;
+
+ /* If XDP prog is not installed, unmap DMA and return */
+ if (!priv->xdp_prog)
+ goto done;
+
+ tx_qid = gve_xdp_tx_queue_id(priv, qid);
+ if (!netif_running(dev)) {
+ priv->rx[qid].xsk_pool = NULL;
+ xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq);
+ priv->tx[tx_qid].xsk_pool = NULL;
+ goto done;
+ }
+
+ napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi;
+ napi_disable(napi_rx); /* make sure current rx poll is done */
+
+ napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi;
+ napi_disable(napi_tx); /* make sure current tx poll is done */
+
+ priv->rx[qid].xsk_pool = NULL;
+ xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq);
+ priv->tx[tx_qid].xsk_pool = NULL;
+ smp_mb(); /* Make sure it is visible to the workers on datapath */
+
+ napi_enable(napi_rx);
+ if (gve_rx_work_pending(&priv->rx[qid]))
+ napi_schedule(napi_rx);
+
+ napi_enable(napi_tx);
+ if (gve_tx_clean_pending(priv, &priv->tx[tx_qid]))
+ napi_schedule(napi_tx);
+
+done:
+ xsk_pool_dma_unmap(pool,
+ DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
+ return 0;
+}
+
+static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
+{
+ struct gve_priv *priv = netdev_priv(dev);
+ int tx_queue_id = gve_xdp_tx_queue_id(priv, queue_id);
+
+ if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog)
+ return -EINVAL;
+
+ if (flags & XDP_WAKEUP_TX) {
+ struct gve_tx_ring *tx = &priv->tx[tx_queue_id];
+ struct napi_struct *napi =
+ &priv->ntfy_blocks[tx->ntfy_id].napi;
+
+ if (!napi_if_scheduled_mark_missed(napi)) {
+ /* Call local_bh_enable to trigger SoftIRQ processing */
+ local_bh_disable();
+ napi_schedule(napi);
+ local_bh_enable();
+ }
+
+ tx->xdp_xsk_wakeup++;
+ }
+
+ return 0;
+}
+
+static int verify_xdp_configuration(struct net_device *dev)
+{
+ struct gve_priv *priv = netdev_priv(dev);
+
+ if (dev->features & NETIF_F_LRO) {
+ netdev_warn(dev, "XDP is not supported when LRO is on.\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (priv->queue_format != GVE_GQI_QPL_FORMAT) {
+ netdev_warn(dev, "XDP is not supported in mode %d.\n",
+ priv->queue_format);
+ return -EOPNOTSUPP;
+ }
+
+ if (dev->mtu > (PAGE_SIZE / 2) - sizeof(struct ethhdr) - GVE_RX_PAD) {
+ netdev_warn(dev, "XDP is not supported for mtu %d.\n",
+ dev->mtu);
+ return -EOPNOTSUPP;
+ }
+
+ if (priv->rx_cfg.num_queues != priv->tx_cfg.num_queues ||
+ (2 * priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)) {
+ netdev_warn(dev, "XDP load failed: The number of configured RX queues %d should be equal to the number of configured TX queues %d and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues %d",
+ priv->rx_cfg.num_queues,
+ priv->tx_cfg.num_queues,
+ priv->tx_cfg.max_queues);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+{
+ struct gve_priv *priv = netdev_priv(dev);
+ int err;
+
+ err = verify_xdp_configuration(dev);
+ if (err)
+ return err;
+ switch (xdp->command) {
+ case XDP_SETUP_PROG:
+ return gve_set_xdp(priv, xdp->prog, xdp->extack);
+ case XDP_SETUP_XSK_POOL:
+ if (xdp->xsk.pool)
+ return gve_xsk_pool_enable(dev, xdp->xsk.pool, xdp->xsk.queue_id);
+ else
+ return gve_xsk_pool_disable(dev, xdp->xsk.queue_id);
+ default:
+ return -EINVAL;
+ }
+}
+
int gve_adjust_queues(struct gve_priv *priv,
struct gve_queue_config new_rx_config,
struct gve_queue_config new_tx_config)
@@ -1118,7 +1736,7 @@ static void gve_turndown(struct gve_priv *priv)
return;
/* Disable napi to prevent more work from coming in */
- for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
+ for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
@@ -1146,7 +1764,7 @@ static void gve_turnup(struct gve_priv *priv)
netif_tx_start_all_queues(priv->dev);
/* Enable napi and unmask interrupts for all queues */
- for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
+ for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
@@ -1263,6 +1881,9 @@ static const struct net_device_ops gve_netdev_ops = {
.ndo_get_stats64 = gve_get_stats,
.ndo_tx_timeout = gve_tx_timeout,
.ndo_set_features = gve_set_features,
+ .ndo_bpf = gve_xdp,
+ .ndo_xdp_xmit = gve_xdp_xmit,
+ .ndo_xsk_wakeup = gve_xsk_wakeup,
};
static void gve_handle_status(struct gve_priv *priv, u32 status)
@@ -1306,7 +1927,7 @@ void gve_handle_report_stats(struct gve_priv *priv)
be64_add_cpu(&priv->stats_report->written_count, 1);
/* tx stats */
if (priv->tx) {
- for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
+ for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
u32 last_completion = 0;
u32 tx_frames = 0;
@@ -1369,23 +1990,6 @@ void gve_handle_report_stats(struct gve_priv *priv)
}
}
-static void gve_handle_link_status(struct gve_priv *priv, bool link_status)
-{
- if (!gve_get_napi_enabled(priv))
- return;
-
- if (link_status == netif_carrier_ok(priv->dev))
- return;
-
- if (link_status) {
- netdev_info(priv->dev, "Device link is up.\n");
- netif_carrier_on(priv->dev);
- } else {
- netdev_info(priv->dev, "Device link is down.\n");
- netif_carrier_off(priv->dev);
- }
-}
-
/* Handle NIC status register changes, reset requests and report stats */
static void gve_service_task(struct work_struct *work)
{
@@ -1399,6 +2003,18 @@ static void gve_service_task(struct work_struct *work)
gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
}
+static void gve_set_netdev_xdp_features(struct gve_priv *priv)
+{
+ if (priv->queue_format == GVE_GQI_QPL_FORMAT) {
+ priv->dev->xdp_features = NETDEV_XDP_ACT_BASIC;
+ priv->dev->xdp_features |= NETDEV_XDP_ACT_REDIRECT;
+ priv->dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT;
+ priv->dev->xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY;
+ } else {
+ priv->dev->xdp_features = 0;
+ }
+}
+
static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
{
int num_ntfy;
@@ -1477,6 +2093,7 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
}
setup_device:
+ gve_set_netdev_xdp_features(priv);
err = gve_setup_device_resources(priv);
if (!err)
return 0;
diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c
index 1f55137722b0..d1da7413dc4d 100644
--- a/drivers/net/ethernet/google/gve/gve_rx.c
+++ b/drivers/net/ethernet/google/gve/gve_rx.c
@@ -8,6 +8,9 @@
#include "gve_adminq.h"
#include "gve_utils.h"
#include <linux/etherdevice.h>
+#include <linux/filter.h>
+#include <net/xdp.h>
+#include <net/xdp_sock_drv.h>
static void gve_rx_free_buffer(struct device *dev,
struct gve_rx_slot_page_info *page_info,
@@ -124,7 +127,7 @@ static int gve_prefill_rx_pages(struct gve_rx_ring *rx)
return -ENOMEM;
if (!rx->data.raw_addressing) {
- rx->data.qpl = gve_assign_rx_qpl(priv);
+ rx->data.qpl = gve_assign_rx_qpl(priv, rx->q_num);
if (!rx->data.qpl) {
kvfree(rx->data.page_info);
rx->data.page_info = NULL;
@@ -556,7 +559,7 @@ static struct sk_buff *gve_rx_skb(struct gve_priv *priv, struct gve_rx_ring *rx,
if (len <= priv->rx_copybreak && is_only_frag) {
/* Just copy small packets */
- skb = gve_rx_copy(netdev, napi, page_info, len, GVE_RX_PAD);
+ skb = gve_rx_copy(netdev, napi, page_info, len);
if (skb) {
u64_stats_update_begin(&rx->statss);
rx->rx_copied_pkt++;
@@ -591,6 +594,107 @@ static struct sk_buff *gve_rx_skb(struct gve_priv *priv, struct gve_rx_ring *rx,
return skb;
}
+static int gve_xsk_pool_redirect(struct net_device *dev,
+ struct gve_rx_ring *rx,
+ void *data, int len,
+ struct bpf_prog *xdp_prog)
+{
+ struct xdp_buff *xdp;
+ int err;
+
+ if (rx->xsk_pool->frame_len < len)
+ return -E2BIG;
+ xdp = xsk_buff_alloc(rx->xsk_pool);
+ if (!xdp) {
+ u64_stats_update_begin(&rx->statss);
+ rx->xdp_alloc_fails++;
+ u64_stats_update_end(&rx->statss);
+ return -ENOMEM;
+ }
+ xdp->data_end = xdp->data + len;
+ memcpy(xdp->data, data, len);
+ err = xdp_do_redirect(dev, xdp, xdp_prog);
+ if (err)
+ xsk_buff_free(xdp);
+ return err;
+}
+
+static int gve_xdp_redirect(struct net_device *dev, struct gve_rx_ring *rx,
+ struct xdp_buff *orig, struct bpf_prog *xdp_prog)
+{
+ int total_len, len = orig->data_end - orig->data;
+ int headroom = XDP_PACKET_HEADROOM;
+ struct xdp_buff new;
+ void *frame;
+ int err;
+
+ if (rx->xsk_pool)
+ return gve_xsk_pool_redirect(dev, rx, orig->data,
+ len, xdp_prog);
+
+ total_len = headroom + SKB_DATA_ALIGN(len) +
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ frame = page_frag_alloc(&rx->page_cache, total_len, GFP_ATOMIC);
+ if (!frame) {
+ u64_stats_update_begin(&rx->statss);
+ rx->xdp_alloc_fails++;
+ u64_stats_update_end(&rx->statss);
+ return -ENOMEM;
+ }
+ xdp_init_buff(&new, total_len, &rx->xdp_rxq);
+ xdp_prepare_buff(&new, frame, headroom, len, false);
+ memcpy(new.data, orig->data, len);
+
+ err = xdp_do_redirect(dev, &new, xdp_prog);
+ if (err)
+ page_frag_free(frame);
+
+ return err;
+}
+
+static void gve_xdp_done(struct gve_priv *priv, struct gve_rx_ring *rx,
+ struct xdp_buff *xdp, struct bpf_prog *xprog,
+ int xdp_act)
+{
+ struct gve_tx_ring *tx;
+ int tx_qid;
+ int err;
+
+ switch (xdp_act) {
+ case XDP_ABORTED:
+ case XDP_DROP:
+ default:
+ break;
+ case XDP_TX:
+ tx_qid = gve_xdp_tx_queue_id(priv, rx->q_num);
+ tx = &priv->tx[tx_qid];
+ spin_lock(&tx->xdp_lock);
+ err = gve_xdp_xmit_one(priv, tx, xdp->data,
+ xdp->data_end - xdp->data, NULL);
+ spin_unlock(&tx->xdp_lock);
+
+ if (unlikely(err)) {
+ u64_stats_update_begin(&rx->statss);
+ rx->xdp_tx_errors++;
+ u64_stats_update_end(&rx->statss);
+ }
+ break;
+ case XDP_REDIRECT:
+ err = gve_xdp_redirect(priv->dev, rx, xdp, xprog);
+
+ if (unlikely(err)) {
+ u64_stats_update_begin(&rx->statss);
+ rx->xdp_redirect_errors++;
+ u64_stats_update_end(&rx->statss);
+ }
+ break;
+ }
+ u64_stats_update_begin(&rx->statss);
+ if ((u32)xdp_act < GVE_XDP_ACTIONS)
+ rx->xdp_actions[xdp_act]++;
+ u64_stats_update_end(&rx->statss);
+}
+
#define GVE_PKTCONT_BIT_IS_SET(x) (GVE_RXF_PKT_CONT & (x))
static void gve_rx(struct gve_rx_ring *rx, netdev_features_t feat,
struct gve_rx_desc *desc, u32 idx,
@@ -603,9 +707,12 @@ static void gve_rx(struct gve_rx_ring *rx, netdev_features_t feat,
union gve_rx_data_slot *data_slot;
struct gve_priv *priv = rx->gve;
struct sk_buff *skb = NULL;
+ struct bpf_prog *xprog;
+ struct xdp_buff xdp;
dma_addr_t page_bus;
void *va;
+ u16 len = frag_size;
struct napi_struct *napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
bool is_first_frag = ctx->frag_cnt == 0;
@@ -645,9 +752,35 @@ static void gve_rx(struct gve_rx_ring *rx, netdev_features_t feat,
dma_sync_single_for_cpu(&priv->pdev->dev, page_bus,
PAGE_SIZE, DMA_FROM_DEVICE);
page_info->pad = is_first_frag ? GVE_RX_PAD : 0;
+ len -= page_info->pad;
frag_size -= page_info->pad;
- skb = gve_rx_skb(priv, rx, page_info, napi, frag_size,
+ xprog = READ_ONCE(priv->xdp_prog);
+ if (xprog && is_only_frag) {
+ void *old_data;
+ int xdp_act;
+
+ xdp_init_buff(&xdp, rx->packet_buffer_size, &rx->xdp_rxq);
+ xdp_prepare_buff(&xdp, page_info->page_address +
+ page_info->page_offset, GVE_RX_PAD,
+ len, false);
+ old_data = xdp.data;
+ xdp_act = bpf_prog_run_xdp(xprog, &xdp);
+ if (xdp_act != XDP_PASS) {
+ gve_xdp_done(priv, rx, &xdp, xprog, xdp_act);
+ ctx->total_size += frag_size;
+ goto finish_ok_pkt;
+ }
+
+ page_info->pad += xdp.data - old_data;
+ len = xdp.data_end - xdp.data;
+
+ u64_stats_update_begin(&rx->statss);
+ rx->xdp_actions[XDP_PASS]++;
+ u64_stats_update_end(&rx->statss);
+ }
+
+ skb = gve_rx_skb(priv, rx, page_info, napi, len,
data_slot, is_only_frag);
if (!skb) {
u64_stats_update_begin(&rx->statss);
@@ -773,6 +906,8 @@ static bool gve_rx_refill_buffers(struct gve_priv *priv, struct gve_rx_ring *rx)
static int gve_clean_rx_done(struct gve_rx_ring *rx, int budget,
netdev_features_t feat)
{
+ u64 xdp_redirects = rx->xdp_actions[XDP_REDIRECT];
+ u64 xdp_txs = rx->xdp_actions[XDP_TX];
struct gve_rx_ctx *ctx = &rx->ctx;
struct gve_priv *priv = rx->gve;
struct gve_rx_cnts cnts = {0};
@@ -820,6 +955,12 @@ static int gve_clean_rx_done(struct gve_rx_ring *rx, int budget,
u64_stats_update_end(&rx->statss);
}
+ if (xdp_txs != rx->xdp_actions[XDP_TX])
+ gve_xdp_tx_flush(priv, rx->q_num);
+
+ if (xdp_redirects != rx->xdp_actions[XDP_REDIRECT])
+ xdp_do_flush();
+
/* restock ring slots */
if (!rx->data.raw_addressing) {
/* In QPL mode buffs are refilled as the desc are processed */
diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c
index 630f42a3037b..e57b73eb70f6 100644
--- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c
+++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c
@@ -568,7 +568,7 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
if (eop && buf_len <= priv->rx_copybreak) {
rx->ctx.skb_head = gve_rx_copy(priv->dev, napi,
- &buf_state->page_info, buf_len, 0);
+ &buf_state->page_info, buf_len);
if (unlikely(!rx->ctx.skb_head))
goto error;
rx->ctx.skb_tail = rx->ctx.skb_head;
diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c
index 4888bf05fbed..e50510b8e784 100644
--- a/drivers/net/ethernet/google/gve/gve_tx.c
+++ b/drivers/net/ethernet/google/gve/gve_tx.c
@@ -11,6 +11,7 @@
#include <linux/tcp.h>
#include <linux/vmalloc.h>
#include <linux/skbuff.h>
+#include <net/xdp_sock_drv.h>
static inline void gve_tx_put_doorbell(struct gve_priv *priv,
struct gve_queue_resources *q_resources,
@@ -19,6 +20,14 @@ static inline void gve_tx_put_doorbell(struct gve_priv *priv,
iowrite32be(val, &priv->db_bar2[be32_to_cpu(q_resources->db_index)]);
}
+void gve_xdp_tx_flush(struct gve_priv *priv, u32 xdp_qid)
+{
+ u32 tx_qid = gve_xdp_tx_queue_id(priv, xdp_qid);
+ struct gve_tx_ring *tx = &priv->tx[tx_qid];
+
+ gve_tx_put_doorbell(priv, tx->q_resources, tx->req);
+}
+
/* gvnic can only transmit from a Registered Segment.
* We copy skb payloads into the registered segment before writing Tx
* descriptors and ringing the Tx doorbell.
@@ -132,6 +141,58 @@ static void gve_tx_free_fifo(struct gve_tx_fifo *fifo, size_t bytes)
atomic_add(bytes, &fifo->available);
}
+static size_t gve_tx_clear_buffer_state(struct gve_tx_buffer_state *info)
+{
+ size_t space_freed = 0;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(info->iov); i++) {
+ space_freed += info->iov[i].iov_len + info->iov[i].iov_padding;
+ info->iov[i].iov_len = 0;
+ info->iov[i].iov_padding = 0;
+ }
+ return space_freed;
+}
+
+static int gve_clean_xdp_done(struct gve_priv *priv, struct gve_tx_ring *tx,
+ u32 to_do)
+{
+ struct gve_tx_buffer_state *info;
+ u32 clean_end = tx->done + to_do;
+ u64 pkts = 0, bytes = 0;
+ size_t space_freed = 0;
+ u32 xsk_complete = 0;
+ u32 idx;
+
+ for (; tx->done < clean_end; tx->done++) {
+ idx = tx->done & tx->mask;
+ info = &tx->info[idx];
+
+ if (unlikely(!info->xdp.size))
+ continue;
+
+ bytes += info->xdp.size;
+ pkts++;
+ xsk_complete += info->xdp.is_xsk;
+
+ info->xdp.size = 0;
+ if (info->xdp_frame) {
+ xdp_return_frame(info->xdp_frame);
+ info->xdp_frame = NULL;
+ }
+ space_freed += gve_tx_clear_buffer_state(info);
+ }
+
+ gve_tx_free_fifo(&tx->tx_fifo, space_freed);
+ if (xsk_complete > 0 && tx->xsk_pool)
+ xsk_tx_completed(tx->xsk_pool, xsk_complete);
+ u64_stats_update_begin(&tx->statss);
+ tx->bytes_done += bytes;
+ tx->pkt_done += pkts;
+ u64_stats_update_end(&tx->statss);
+ return pkts;
+}
+
static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx,
u32 to_do, bool try_to_wake);
@@ -144,8 +205,12 @@ static void gve_tx_free_ring(struct gve_priv *priv, int idx)
gve_tx_remove_from_block(priv, idx);
slots = tx->mask + 1;
- gve_clean_tx_done(priv, tx, priv->tx_desc_cnt, false);
- netdev_tx_reset_queue(tx->netdev_txq);
+ if (tx->q_num < priv->tx_cfg.num_queues) {
+ gve_clean_tx_done(priv, tx, priv->tx_desc_cnt, false);
+ netdev_tx_reset_queue(tx->netdev_txq);
+ } else {
+ gve_clean_xdp_done(priv, tx, priv->tx_desc_cnt);
+ }
dma_free_coherent(hdev, sizeof(*tx->q_resources),
tx->q_resources, tx->q_resources_bus);
@@ -177,6 +242,7 @@ static int gve_tx_alloc_ring(struct gve_priv *priv, int idx)
/* Make sure everything is zeroed to start */
memset(tx, 0, sizeof(*tx));
spin_lock_init(&tx->clean_lock);
+ spin_lock_init(&tx->xdp_lock);
tx->q_num = idx;
tx->mask = slots - 1;
@@ -195,7 +261,7 @@ static int gve_tx_alloc_ring(struct gve_priv *priv, int idx)
tx->raw_addressing = priv->queue_format == GVE_GQI_RDA_FORMAT;
tx->dev = &priv->pdev->dev;
if (!tx->raw_addressing) {
- tx->tx_fifo.qpl = gve_assign_tx_qpl(priv);
+ tx->tx_fifo.qpl = gve_assign_tx_qpl(priv, idx);
if (!tx->tx_fifo.qpl)
goto abort_with_desc;
/* map Tx FIFO */
@@ -213,7 +279,8 @@ static int gve_tx_alloc_ring(struct gve_priv *priv, int idx)
netif_dbg(priv, drv, priv->dev, "tx[%d]->bus=%lx\n", idx,
(unsigned long)tx->bus);
- tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx);
+ if (idx < priv->tx_cfg.num_queues)
+ tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx);
gve_tx_add_to_block(priv, idx);
return 0;
@@ -233,12 +300,12 @@ abort_with_info:
return -ENOMEM;
}
-int gve_tx_alloc_rings(struct gve_priv *priv)
+int gve_tx_alloc_rings(struct gve_priv *priv, int start_id, int num_rings)
{
int err = 0;
int i;
- for (i = 0; i < priv->tx_cfg.num_queues; i++) {
+ for (i = start_id; i < start_id + num_rings; i++) {
err = gve_tx_alloc_ring(priv, i);
if (err) {
netif_err(priv, drv, priv->dev,
@@ -251,17 +318,17 @@ int gve_tx_alloc_rings(struct gve_priv *priv)
if (err) {
int j;
- for (j = 0; j < i; j++)
+ for (j = start_id; j < i; j++)
gve_tx_free_ring(priv, j);
}
return err;
}
-void gve_tx_free_rings_gqi(struct gve_priv *priv)
+void gve_tx_free_rings_gqi(struct gve_priv *priv, int start_id, int num_rings)
{
int i;
- for (i = 0; i < priv->tx_cfg.num_queues; i++)
+ for (i = start_id; i < start_id + num_rings; i++)
gve_tx_free_ring(priv, i);
}
@@ -374,18 +441,18 @@ static int gve_maybe_stop_tx(struct gve_priv *priv, struct gve_tx_ring *tx,
}
static void gve_tx_fill_pkt_desc(union gve_tx_desc *pkt_desc,
- struct sk_buff *skb, bool is_gso,
+ u16 csum_offset, u8 ip_summed, bool is_gso,
int l4_hdr_offset, u32 desc_cnt,
- u16 hlen, u64 addr)
+ u16 hlen, u64 addr, u16 pkt_len)
{
/* l4_hdr_offset and csum_offset are in units of 16-bit words */
if (is_gso) {
pkt_desc->pkt.type_flags = GVE_TXD_TSO | GVE_TXF_L4CSUM;
- pkt_desc->pkt.l4_csum_offset = skb->csum_offset >> 1;
+ pkt_desc->pkt.l4_csum_offset = csum_offset >> 1;
pkt_desc->pkt.l4_hdr_offset = l4_hdr_offset >> 1;
- } else if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
+ } else if (likely(ip_summed == CHECKSUM_PARTIAL)) {
pkt_desc->pkt.type_flags = GVE_TXD_STD | GVE_TXF_L4CSUM;
- pkt_desc->pkt.l4_csum_offset = skb->csum_offset >> 1;
+ pkt_desc->pkt.l4_csum_offset = csum_offset >> 1;
pkt_desc->pkt.l4_hdr_offset = l4_hdr_offset >> 1;
} else {
pkt_desc->pkt.type_flags = GVE_TXD_STD;
@@ -393,7 +460,7 @@ static void gve_tx_fill_pkt_desc(union gve_tx_desc *pkt_desc,
pkt_desc->pkt.l4_hdr_offset = 0;
}
pkt_desc->pkt.desc_cnt = desc_cnt;
- pkt_desc->pkt.len = cpu_to_be16(skb->len);
+ pkt_desc->pkt.len = cpu_to_be16(pkt_len);
pkt_desc->pkt.seg_len = cpu_to_be16(hlen);
pkt_desc->pkt.seg_addr = cpu_to_be64(addr);
}
@@ -412,15 +479,16 @@ static void gve_tx_fill_mtd_desc(union gve_tx_desc *mtd_desc,
}
static void gve_tx_fill_seg_desc(union gve_tx_desc *seg_desc,
- struct sk_buff *skb, bool is_gso,
+ u16 l3_offset, u16 gso_size,
+ bool is_gso_v6, bool is_gso,
u16 len, u64 addr)
{
seg_desc->seg.type_flags = GVE_TXD_SEG;
if (is_gso) {
- if (skb_is_gso_v6(skb))
+ if (is_gso_v6)
seg_desc->seg.type_flags |= GVE_TXSF_IPV6;
- seg_desc->seg.l3_offset = skb_network_offset(skb) >> 1;
- seg_desc->seg.mss = cpu_to_be16(skb_shinfo(skb)->gso_size);
+ seg_desc->seg.l3_offset = l3_offset >> 1;
+ seg_desc->seg.mss = cpu_to_be16(gso_size);
}
seg_desc->seg.seg_len = cpu_to_be16(len);
seg_desc->seg.seg_addr = cpu_to_be64(addr);
@@ -473,9 +541,10 @@ static int gve_tx_add_skb_copy(struct gve_priv *priv, struct gve_tx_ring *tx, st
payload_nfrags = gve_tx_alloc_fifo(&tx->tx_fifo, skb->len - hlen,
&info->iov[payload_iov]);
- gve_tx_fill_pkt_desc(pkt_desc, skb, is_gso, l4_hdr_offset,
+ gve_tx_fill_pkt_desc(pkt_desc, skb->csum_offset, skb->ip_summed,
+ is_gso, l4_hdr_offset,
1 + mtd_desc_nr + payload_nfrags, hlen,
- info->iov[hdr_nfrags - 1].iov_offset);
+ info->iov[hdr_nfrags - 1].iov_offset, skb->len);
skb_copy_bits(skb, 0,
tx->tx_fifo.base + info->iov[hdr_nfrags - 1].iov_offset,
@@ -494,7 +563,9 @@ static int gve_tx_add_skb_copy(struct gve_priv *priv, struct gve_tx_ring *tx, st
next_idx = (tx->req + 1 + mtd_desc_nr + i - payload_iov) & tx->mask;
seg_desc = &tx->desc[next_idx];
- gve_tx_fill_seg_desc(seg_desc, skb, is_gso,
+ gve_tx_fill_seg_desc(seg_desc, skb_network_offset(skb),
+ skb_shinfo(skb)->gso_size,
+ skb_is_gso_v6(skb), is_gso,
info->iov[i].iov_len,
info->iov[i].iov_offset);
@@ -552,8 +623,9 @@ static int gve_tx_add_skb_no_copy(struct gve_priv *priv, struct gve_tx_ring *tx,
if (mtd_desc_nr)
num_descriptors++;
- gve_tx_fill_pkt_desc(pkt_desc, skb, is_gso, l4_hdr_offset,
- num_descriptors, hlen, addr);
+ gve_tx_fill_pkt_desc(pkt_desc, skb->csum_offset, skb->ip_summed,
+ is_gso, l4_hdr_offset,
+ num_descriptors, hlen, addr, skb->len);
if (mtd_desc_nr) {
idx = (idx + 1) & tx->mask;
@@ -569,7 +641,9 @@ static int gve_tx_add_skb_no_copy(struct gve_priv *priv, struct gve_tx_ring *tx,
addr += hlen;
idx = (idx + 1) & tx->mask;
seg_desc = &tx->desc[idx];
- gve_tx_fill_seg_desc(seg_desc, skb, is_gso, len, addr);
+ gve_tx_fill_seg_desc(seg_desc, skb_network_offset(skb),
+ skb_shinfo(skb)->gso_size,
+ skb_is_gso_v6(skb), is_gso, len, addr);
}
for (i = 0; i < shinfo->nr_frags; i++) {
@@ -587,7 +661,9 @@ static int gve_tx_add_skb_no_copy(struct gve_priv *priv, struct gve_tx_ring *tx,
dma_unmap_len_set(&tx->info[idx], len, len);
dma_unmap_addr_set(&tx->info[idx], dma, addr);
- gve_tx_fill_seg_desc(seg_desc, skb, is_gso, len, addr);
+ gve_tx_fill_seg_desc(seg_desc, skb_network_offset(skb),
+ skb_shinfo(skb)->gso_size,
+ skb_is_gso_v6(skb), is_gso, len, addr);
}
return num_descriptors;
@@ -648,6 +724,103 @@ netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_OK;
}
+static int gve_tx_fill_xdp(struct gve_priv *priv, struct gve_tx_ring *tx,
+ void *data, int len, void *frame_p, bool is_xsk)
+{
+ int pad, nfrags, ndescs, iovi, offset;
+ struct gve_tx_buffer_state *info;
+ u32 reqi = tx->req;
+
+ pad = gve_tx_fifo_pad_alloc_one_frag(&tx->tx_fifo, len);
+ if (pad >= GVE_TX_MAX_HEADER_SIZE)
+ pad = 0;
+ info = &tx->info[reqi & tx->mask];
+ info->xdp_frame = frame_p;
+ info->xdp.size = len;
+ info->xdp.is_xsk = is_xsk;
+
+ nfrags = gve_tx_alloc_fifo(&tx->tx_fifo, pad + len,
+ &info->iov[0]);
+ iovi = pad > 0;
+ ndescs = nfrags - iovi;
+ offset = 0;
+
+ while (iovi < nfrags) {
+ if (!offset)
+ gve_tx_fill_pkt_desc(&tx->desc[reqi & tx->mask], 0,
+ CHECKSUM_NONE, false, 0, ndescs,
+ info->iov[iovi].iov_len,
+ info->iov[iovi].iov_offset, len);
+ else
+ gve_tx_fill_seg_desc(&tx->desc[reqi & tx->mask],
+ 0, 0, false, false,
+ info->iov[iovi].iov_len,
+ info->iov[iovi].iov_offset);
+
+ memcpy(tx->tx_fifo.base + info->iov[iovi].iov_offset,
+ data + offset, info->iov[iovi].iov_len);
+ gve_dma_sync_for_device(&priv->pdev->dev,
+ tx->tx_fifo.qpl->page_buses,
+ info->iov[iovi].iov_offset,
+ info->iov[iovi].iov_len);
+ offset += info->iov[iovi].iov_len;
+ iovi++;
+ reqi++;
+ }
+
+ return ndescs;
+}
+
+int gve_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
+ u32 flags)
+{
+ struct gve_priv *priv = netdev_priv(dev);
+ struct gve_tx_ring *tx;
+ int i, err = 0, qid;
+
+ if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+ return -EINVAL;
+
+ qid = gve_xdp_tx_queue_id(priv,
+ smp_processor_id() % priv->num_xdp_queues);
+
+ tx = &priv->tx[qid];
+
+ spin_lock(&tx->xdp_lock);
+ for (i = 0; i < n; i++) {
+ err = gve_xdp_xmit_one(priv, tx, frames[i]->data,
+ frames[i]->len, frames[i]);
+ if (err)
+ break;
+ }
+
+ if (flags & XDP_XMIT_FLUSH)
+ gve_tx_put_doorbell(priv, tx->q_resources, tx->req);
+
+ spin_unlock(&tx->xdp_lock);
+
+ u64_stats_update_begin(&tx->statss);
+ tx->xdp_xmit += n;
+ tx->xdp_xmit_errors += n - i;
+ u64_stats_update_end(&tx->statss);
+
+ return i ? i : err;
+}
+
+int gve_xdp_xmit_one(struct gve_priv *priv, struct gve_tx_ring *tx,
+ void *data, int len, void *frame_p)
+{
+ int nsegs;
+
+ if (!gve_can_tx(tx, len + GVE_TX_MAX_HEADER_SIZE - 1))
+ return -EBUSY;
+
+ nsegs = gve_tx_fill_xdp(priv, tx, data, len, frame_p, false);
+ tx->req += nsegs;
+
+ return 0;
+}
+
#define GVE_TX_START_THRESH PAGE_SIZE
static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx,
@@ -657,8 +830,8 @@ static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx,
u64 pkts = 0, bytes = 0;
size_t space_freed = 0;
struct sk_buff *skb;
- int i, j;
u32 idx;
+ int j;
for (j = 0; j < to_do; j++) {
idx = tx->done & tx->mask;
@@ -680,12 +853,7 @@ static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx,
dev_consume_skb_any(skb);
if (tx->raw_addressing)
continue;
- /* FIFO free */
- for (i = 0; i < ARRAY_SIZE(info->iov); i++) {
- space_freed += info->iov[i].iov_len + info->iov[i].iov_padding;
- info->iov[i].iov_len = 0;
- info->iov[i].iov_padding = 0;
- }
+ space_freed += gve_tx_clear_buffer_state(info);
}
}
@@ -720,6 +888,70 @@ u32 gve_tx_load_event_counter(struct gve_priv *priv,
return be32_to_cpu(counter);
}
+static int gve_xsk_tx(struct gve_priv *priv, struct gve_tx_ring *tx,
+ int budget)
+{
+ struct xdp_desc desc;
+ int sent = 0, nsegs;
+ void *data;
+
+ spin_lock(&tx->xdp_lock);
+ while (sent < budget) {
+ if (!gve_can_tx(tx, GVE_TX_START_THRESH))
+ goto out;
+
+ if (!xsk_tx_peek_desc(tx->xsk_pool, &desc)) {
+ tx->xdp_xsk_done = tx->xdp_xsk_wakeup;
+ goto out;
+ }
+
+ data = xsk_buff_raw_get_data(tx->xsk_pool, desc.addr);
+ nsegs = gve_tx_fill_xdp(priv, tx, data, desc.len, NULL, true);
+ tx->req += nsegs;
+ sent++;
+ }
+out:
+ if (sent > 0) {
+ gve_tx_put_doorbell(priv, tx->q_resources, tx->req);
+ xsk_tx_release(tx->xsk_pool);
+ }
+ spin_unlock(&tx->xdp_lock);
+ return sent;
+}
+
+bool gve_xdp_poll(struct gve_notify_block *block, int budget)
+{
+ struct gve_priv *priv = block->priv;
+ struct gve_tx_ring *tx = block->tx;
+ u32 nic_done;
+ bool repoll;
+ u32 to_do;
+
+ /* If budget is 0, do all the work */
+ if (budget == 0)
+ budget = INT_MAX;
+
+ /* Find out how much work there is to be done */
+ nic_done = gve_tx_load_event_counter(priv, tx);
+ to_do = min_t(u32, (nic_done - tx->done), budget);
+ gve_clean_xdp_done(priv, tx, to_do);
+ repoll = nic_done != tx->done;
+
+ if (tx->xsk_pool) {
+ int sent = gve_xsk_tx(priv, tx, budget);
+
+ u64_stats_update_begin(&tx->statss);
+ tx->xdp_xsk_sent += sent;
+ u64_stats_update_end(&tx->statss);
+ repoll |= (sent == budget);
+ if (xsk_uses_need_wakeup(tx->xsk_pool))
+ xsk_set_tx_need_wakeup(tx->xsk_pool);
+ }
+
+ /* If we still have work we want to repoll */
+ return repoll;
+}
+
bool gve_tx_poll(struct gve_notify_block *block, int budget)
{
struct gve_priv *priv = block->priv;
diff --git a/drivers/net/ethernet/google/gve/gve_utils.c b/drivers/net/ethernet/google/gve/gve_utils.c
index 6ba46adaaee3..26e08d753270 100644
--- a/drivers/net/ethernet/google/gve/gve_utils.c
+++ b/drivers/net/ethernet/google/gve/gve_utils.c
@@ -49,10 +49,10 @@ void gve_rx_add_to_block(struct gve_priv *priv, int queue_idx)
}
struct sk_buff *gve_rx_copy(struct net_device *dev, struct napi_struct *napi,
- struct gve_rx_slot_page_info *page_info, u16 len,
- u16 padding)
+ struct gve_rx_slot_page_info *page_info, u16 len)
{
- void *va = page_info->page_address + padding + page_info->page_offset;
+ void *va = page_info->page_address + page_info->page_offset +
+ page_info->pad;
struct sk_buff *skb;
skb = napi_alloc_skb(napi, len);
diff --git a/drivers/net/ethernet/google/gve/gve_utils.h b/drivers/net/ethernet/google/gve/gve_utils.h
index 79595940b351..324fd98a6112 100644
--- a/drivers/net/ethernet/google/gve/gve_utils.h
+++ b/drivers/net/ethernet/google/gve/gve_utils.h
@@ -18,8 +18,7 @@ void gve_rx_remove_from_block(struct gve_priv *priv, int queue_idx);
void gve_rx_add_to_block(struct gve_priv *priv, int queue_idx);
struct sk_buff *gve_rx_copy(struct net_device *dev, struct napi_struct *napi,
- struct gve_rx_slot_page_info *page_info, u16 len,
- u16 pad);
+ struct gve_rx_slot_page_info *page_info, u16 len);
/* Decrement pagecnt_bias. Set it back to INT_MAX if it reached zero. */
void gve_dec_pagecnt_bias(struct gve_rx_slot_page_info *page_info);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 25be7f8ac7cd..5caea154362f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -13,7 +13,6 @@
#include <linux/ipv6.h>
#include <linux/module.h>
#include <linux/pci.h>
-#include <linux/aer.h>
#include <linux/skbuff.h>
#include <linux/sctp.h>
#include <net/gre.h>
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index e1eb1de88bf9..6f5c16aebcbf 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -23,7 +23,6 @@
#include <linux/smp.h>
#include <linux/pm_qos.h>
#include <linux/pm_runtime.h>
-#include <linux/aer.h>
#include <linux/prefetch.h>
#include <linux/suspend.h>
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
index 027d721feb18..d748b98274e7 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
@@ -3,7 +3,6 @@
#include <linux/module.h>
#include <linux/interrupt.h>
-#include <linux/aer.h>
#include "fm10k.h"
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 60ce4d15d82a..6e310a539467 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -10,7 +10,6 @@
#include <linux/errno.h>
#include <linux/module.h>
#include <linux/pci.h>
-#include <linux/aer.h>
#include <linux/netdevice.h>
#include <linux/ioport.h>
#include <linux/iommu.h>
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 4934ff58332c..afc4fa8c66af 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -5402,6 +5402,13 @@ flags_complete:
return -EOPNOTSUPP;
}
+ if ((changed_flags & I40E_FLAG_LEGACY_RX) &&
+ I40E_2K_TOO_SMALL_WITH_PADDING) {
+ dev_warn(&pf->pdev->dev,
+ "2k Rx buffer is too small to fit standard MTU and skb_shared_info\n");
+ return -EOPNOTSUPP;
+ }
+
if ((changed_flags & new_flags &
I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED) &&
(new_flags & I40E_FLAG_MFP_ENABLED))
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 228cd502bb48..c8ff5675b29d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -2896,15 +2896,35 @@ static void i40e_sync_filters_subtask(struct i40e_pf *pf)
}
/**
- * i40e_max_xdp_frame_size - returns the maximum allowed frame size for XDP
+ * i40e_calculate_vsi_rx_buf_len - Calculates buffer length
+ *
+ * @vsi: VSI to calculate rx_buf_len from
+ */
+static u16 i40e_calculate_vsi_rx_buf_len(struct i40e_vsi *vsi)
+{
+ if (!vsi->netdev || (vsi->back->flags & I40E_FLAG_LEGACY_RX))
+ return SKB_WITH_OVERHEAD(I40E_RXBUFFER_2048);
+
+ return PAGE_SIZE < 8192 ? I40E_RXBUFFER_3072 : I40E_RXBUFFER_2048;
+}
+
+/**
+ * i40e_max_vsi_frame_size - returns the maximum allowed frame size for VSI
* @vsi: the vsi
+ * @xdp_prog: XDP program
**/
-static int i40e_max_xdp_frame_size(struct i40e_vsi *vsi)
+static int i40e_max_vsi_frame_size(struct i40e_vsi *vsi,
+ struct bpf_prog *xdp_prog)
{
- if (PAGE_SIZE >= 8192 || (vsi->back->flags & I40E_FLAG_LEGACY_RX))
- return I40E_RXBUFFER_2048;
+ u16 rx_buf_len = i40e_calculate_vsi_rx_buf_len(vsi);
+ u16 chain_len;
+
+ if (xdp_prog && !xdp_prog->aux->xdp_has_frags)
+ chain_len = 1;
else
- return I40E_RXBUFFER_3072;
+ chain_len = I40E_MAX_CHAINED_RX_BUFFERS;
+
+ return min_t(u16, rx_buf_len * chain_len, I40E_MAX_RXBUFFER);
}
/**
@@ -2919,12 +2939,13 @@ static int i40e_change_mtu(struct net_device *netdev, int new_mtu)
struct i40e_netdev_priv *np = netdev_priv(netdev);
struct i40e_vsi *vsi = np->vsi;
struct i40e_pf *pf = vsi->back;
+ int frame_size;
- if (i40e_enabled_xdp_vsi(vsi)) {
- int frame_size = new_mtu + I40E_PACKET_HDR_PAD;
-
- if (frame_size > i40e_max_xdp_frame_size(vsi))
- return -EINVAL;
+ frame_size = i40e_max_vsi_frame_size(vsi, vsi->xdp_prog);
+ if (new_mtu > frame_size - I40E_PACKET_HDR_PAD) {
+ netdev_err(netdev, "Error changing mtu to %d, Max is %d\n",
+ new_mtu, frame_size - I40E_PACKET_HDR_PAD);
+ return -EINVAL;
}
netdev_dbg(netdev, "changing MTU from %d to %d\n",
@@ -3595,6 +3616,8 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
}
}
+ xdp_init_buff(&ring->xdp, i40e_rx_pg_size(ring) / 2, &ring->xdp_rxq);
+
rx_ctx.dbuff = DIV_ROUND_UP(ring->rx_buf_len,
BIT_ULL(I40E_RXQ_CTX_DBUFF_SHIFT));
@@ -3640,10 +3663,16 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
}
/* configure Rx buffer alignment */
- if (!vsi->netdev || (vsi->back->flags & I40E_FLAG_LEGACY_RX))
+ if (!vsi->netdev || (vsi->back->flags & I40E_FLAG_LEGACY_RX)) {
+ if (I40E_2K_TOO_SMALL_WITH_PADDING) {
+ dev_info(&vsi->back->pdev->dev,
+ "2k Rx buffer is too small to fit standard MTU and skb_shared_info\n");
+ return -EOPNOTSUPP;
+ }
clear_ring_build_skb_enabled(ring);
- else
+ } else {
set_ring_build_skb_enabled(ring);
+ }
ring->rx_offset = i40e_rx_offset(ring);
@@ -3694,24 +3723,6 @@ static int i40e_vsi_configure_tx(struct i40e_vsi *vsi)
}
/**
- * i40e_calculate_vsi_rx_buf_len - Calculates buffer length
- *
- * @vsi: VSI to calculate rx_buf_len from
- */
-static u16 i40e_calculate_vsi_rx_buf_len(struct i40e_vsi *vsi)
-{
- if (!vsi->netdev || (vsi->back->flags & I40E_FLAG_LEGACY_RX))
- return I40E_RXBUFFER_2048;
-
-#if (PAGE_SIZE < 8192)
- if (!I40E_2K_TOO_SMALL_WITH_PADDING && vsi->netdev->mtu <= ETH_DATA_LEN)
- return I40E_RXBUFFER_1536 - NET_IP_ALIGN;
-#endif
-
- return PAGE_SIZE < 8192 ? I40E_RXBUFFER_3072 : I40E_RXBUFFER_2048;
-}
-
-/**
* i40e_vsi_configure_rx - Configure the VSI for Rx
* @vsi: the VSI being configured
*
@@ -3722,13 +3733,15 @@ static int i40e_vsi_configure_rx(struct i40e_vsi *vsi)
int err = 0;
u16 i;
- vsi->max_frame = I40E_MAX_RXBUFFER;
+ vsi->max_frame = i40e_max_vsi_frame_size(vsi, vsi->xdp_prog);
vsi->rx_buf_len = i40e_calculate_vsi_rx_buf_len(vsi);
#if (PAGE_SIZE < 8192)
if (vsi->netdev && !I40E_2K_TOO_SMALL_WITH_PADDING &&
- vsi->netdev->mtu <= ETH_DATA_LEN)
- vsi->max_frame = I40E_RXBUFFER_1536 - NET_IP_ALIGN;
+ vsi->netdev->mtu <= ETH_DATA_LEN) {
+ vsi->rx_buf_len = I40E_RXBUFFER_1536 - NET_IP_ALIGN;
+ vsi->max_frame = vsi->rx_buf_len;
+ }
#endif
/* set up individual rings */
@@ -13316,15 +13329,15 @@ out_err:
static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog,
struct netlink_ext_ack *extack)
{
- int frame_size = vsi->netdev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
+ int frame_size = i40e_max_vsi_frame_size(vsi, prog);
struct i40e_pf *pf = vsi->back;
struct bpf_prog *old_prog;
bool need_reset;
int i;
/* Don't allow frames that span over multiple buffers */
- if (frame_size > i40e_calculate_vsi_rx_buf_len(vsi)) {
- NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP");
+ if (vsi->netdev->mtu > frame_size - I40E_PACKET_HDR_PAD) {
+ NL_SET_ERR_MSG_MOD(extack, "MTU too large for linear frames and XDP prog does not support frags");
return -EINVAL;
}
@@ -13810,7 +13823,8 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
netdev->xdp_features = NETDEV_XDP_ACT_BASIC |
NETDEV_XDP_ACT_REDIRECT |
- NETDEV_XDP_ACT_XSK_ZEROCOPY;
+ NETDEV_XDP_ACT_XSK_ZEROCOPY |
+ NETDEV_XDP_ACT_RX_SG;
} else {
/* Relate the VSI_VMDQ name to the VSI_MAIN name. Note that we
* are still limited by IFNAMSIZ, but we're adding 'v%d\0' to
diff --git a/drivers/net/ethernet/intel/i40e/i40e_trace.h b/drivers/net/ethernet/intel/i40e/i40e_trace.h
index 79d587ad5409..33b4e30f5e00 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_trace.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_trace.h
@@ -162,45 +162,45 @@ DECLARE_EVENT_CLASS(
TP_PROTO(struct i40e_ring *ring,
union i40e_16byte_rx_desc *desc,
- struct sk_buff *skb),
+ struct xdp_buff *xdp),
- TP_ARGS(ring, desc, skb),
+ TP_ARGS(ring, desc, xdp),
TP_STRUCT__entry(
__field(void*, ring)
__field(void*, desc)
- __field(void*, skb)
+ __field(void*, xdp)
__string(devname, ring->netdev->name)
),
TP_fast_assign(
__entry->ring = ring;
__entry->desc = desc;
- __entry->skb = skb;
+ __entry->xdp = xdp;
__assign_str(devname, ring->netdev->name);
),
TP_printk(
- "netdev: %s ring: %p desc: %p skb %p",
+ "netdev: %s ring: %p desc: %p xdp %p",
__get_str(devname), __entry->ring,
- __entry->desc, __entry->skb)
+ __entry->desc, __entry->xdp)
);
DEFINE_EVENT(
i40e_rx_template, i40e_clean_rx_irq,
TP_PROTO(struct i40e_ring *ring,
union i40e_16byte_rx_desc *desc,
- struct sk_buff *skb),
+ struct xdp_buff *xdp),
- TP_ARGS(ring, desc, skb));
+ TP_ARGS(ring, desc, xdp));
DEFINE_EVENT(
i40e_rx_template, i40e_clean_rx_irq_rx,
TP_PROTO(struct i40e_ring *ring,
union i40e_16byte_rx_desc *desc,
- struct sk_buff *skb),
+ struct xdp_buff *xdp),
- TP_ARGS(ring, desc, skb));
+ TP_ARGS(ring, desc, xdp));
DECLARE_EVENT_CLASS(
i40e_xmit_template,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 924f972b91fa..32cce90abbb4 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -1477,9 +1477,6 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
if (!rx_ring->rx_bi)
return;
- dev_kfree_skb(rx_ring->skb);
- rx_ring->skb = NULL;
-
if (rx_ring->xsk_pool) {
i40e_xsk_clean_rx_ring(rx_ring);
goto skip_free;
@@ -1524,6 +1521,7 @@ skip_free:
rx_ring->next_to_alloc = 0;
rx_ring->next_to_clean = 0;
+ rx_ring->next_to_process = 0;
rx_ring->next_to_use = 0;
}
@@ -1576,6 +1574,7 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
rx_ring->next_to_alloc = 0;
rx_ring->next_to_clean = 0;
+ rx_ring->next_to_process = 0;
rx_ring->next_to_use = 0;
/* XDP RX-queue info only needed for RX rings exposed to XDP */
@@ -1617,21 +1616,19 @@ void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
writel(val, rx_ring->tail);
}
+#if (PAGE_SIZE >= 8192)
static unsigned int i40e_rx_frame_truesize(struct i40e_ring *rx_ring,
unsigned int size)
{
unsigned int truesize;
-#if (PAGE_SIZE < 8192)
- truesize = i40e_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */
-#else
truesize = rx_ring->rx_offset ?
SKB_DATA_ALIGN(size + rx_ring->rx_offset) +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
SKB_DATA_ALIGN(size);
-#endif
return truesize;
}
+#endif
/**
* i40e_alloc_mapped_page - recycle or make a new page
@@ -1970,7 +1967,6 @@ static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb,
* i40e_can_reuse_rx_page - Determine if page can be reused for another Rx
* @rx_buffer: buffer containing the page
* @rx_stats: rx stats structure for the rx ring
- * @rx_buffer_pgcnt: buffer page refcount pre xdp_do_redirect() call
*
* If page is reusable, we have a green light for calling i40e_reuse_rx_page,
* which will assign the current buffer to the buffer that next_to_alloc is
@@ -1981,8 +1977,7 @@ static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb,
* or busy if it could not be reused.
*/
static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer,
- struct i40e_rx_queue_stats *rx_stats,
- int rx_buffer_pgcnt)
+ struct i40e_rx_queue_stats *rx_stats)
{
unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
struct page *page = rx_buffer->page;
@@ -1995,7 +1990,7 @@ static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer,
#if (PAGE_SIZE < 8192)
/* if we are only owner of page we can reuse it */
- if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1)) {
+ if (unlikely((rx_buffer->page_count - pagecnt_bias) > 1)) {
rx_stats->page_busy_count++;
return false;
}
@@ -2021,33 +2016,14 @@ static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer,
}
/**
- * i40e_add_rx_frag - Add contents of Rx buffer to sk_buff
- * @rx_ring: rx descriptor ring to transact packets on
- * @rx_buffer: buffer containing page to add
- * @skb: sk_buff to place the data into
- * @size: packet length from rx_desc
- *
- * This function will add the data contained in rx_buffer->page to the skb.
- * It will just attach the page as a frag to the skb.
- *
- * The function will then update the page offset.
+ * i40e_rx_buffer_flip - adjusted rx_buffer to point to an unused region
+ * @rx_buffer: Rx buffer to adjust
+ * @truesize: Size of adjustment
**/
-static void i40e_add_rx_frag(struct i40e_ring *rx_ring,
- struct i40e_rx_buffer *rx_buffer,
- struct sk_buff *skb,
- unsigned int size)
+static void i40e_rx_buffer_flip(struct i40e_rx_buffer *rx_buffer,
+ unsigned int truesize)
{
#if (PAGE_SIZE < 8192)
- unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
-#else
- unsigned int truesize = SKB_DATA_ALIGN(size + rx_ring->rx_offset);
-#endif
-
- skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
- rx_buffer->page_offset, size, truesize);
-
- /* page is being used so we must update the page offset */
-#if (PAGE_SIZE < 8192)
rx_buffer->page_offset ^= truesize;
#else
rx_buffer->page_offset += truesize;
@@ -2058,19 +2034,17 @@ static void i40e_add_rx_frag(struct i40e_ring *rx_ring,
* i40e_get_rx_buffer - Fetch Rx buffer and synchronize data for use
* @rx_ring: rx descriptor ring to transact packets on
* @size: size of buffer to add to skb
- * @rx_buffer_pgcnt: buffer page refcount
*
* This function will pull an Rx buffer from the ring and synchronize it
* for use by the CPU.
*/
static struct i40e_rx_buffer *i40e_get_rx_buffer(struct i40e_ring *rx_ring,
- const unsigned int size,
- int *rx_buffer_pgcnt)
+ const unsigned int size)
{
struct i40e_rx_buffer *rx_buffer;
- rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
- *rx_buffer_pgcnt =
+ rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_process);
+ rx_buffer->page_count =
#if (PAGE_SIZE < 8192)
page_count(rx_buffer->page);
#else
@@ -2092,25 +2066,82 @@ static struct i40e_rx_buffer *i40e_get_rx_buffer(struct i40e_ring *rx_ring,
}
/**
- * i40e_construct_skb - Allocate skb and populate it
+ * i40e_put_rx_buffer - Clean up used buffer and either recycle or free
* @rx_ring: rx descriptor ring to transact packets on
* @rx_buffer: rx buffer to pull data from
+ *
+ * This function will clean up the contents of the rx_buffer. It will
+ * either recycle the buffer or unmap it and free the associated resources.
+ */
+static void i40e_put_rx_buffer(struct i40e_ring *rx_ring,
+ struct i40e_rx_buffer *rx_buffer)
+{
+ if (i40e_can_reuse_rx_page(rx_buffer, &rx_ring->rx_stats)) {
+ /* hand second half of page back to the ring */
+ i40e_reuse_rx_page(rx_ring, rx_buffer);
+ } else {
+ /* we are not reusing the buffer so unmap it */
+ dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
+ i40e_rx_pg_size(rx_ring),
+ DMA_FROM_DEVICE, I40E_RX_DMA_ATTR);
+ __page_frag_cache_drain(rx_buffer->page,
+ rx_buffer->pagecnt_bias);
+ /* clear contents of buffer_info */
+ rx_buffer->page = NULL;
+ }
+}
+
+/**
+ * i40e_process_rx_buffs- Processing of buffers post XDP prog or on error
+ * @rx_ring: Rx descriptor ring to transact packets on
+ * @xdp_res: Result of the XDP program
+ * @xdp: xdp_buff pointing to the data
+ **/
+static void i40e_process_rx_buffs(struct i40e_ring *rx_ring, int xdp_res,
+ struct xdp_buff *xdp)
+{
+ u32 next = rx_ring->next_to_clean;
+ struct i40e_rx_buffer *rx_buffer;
+
+ xdp->flags = 0;
+
+ while (1) {
+ rx_buffer = i40e_rx_bi(rx_ring, next);
+ if (++next == rx_ring->count)
+ next = 0;
+
+ if (!rx_buffer->page)
+ continue;
+
+ if (xdp_res == I40E_XDP_CONSUMED)
+ rx_buffer->pagecnt_bias++;
+ else
+ i40e_rx_buffer_flip(rx_buffer, xdp->frame_sz);
+
+ /* EOP buffer will be put in i40e_clean_rx_irq() */
+ if (next == rx_ring->next_to_process)
+ return;
+
+ i40e_put_rx_buffer(rx_ring, rx_buffer);
+ }
+}
+
+/**
+ * i40e_construct_skb - Allocate skb and populate it
+ * @rx_ring: rx descriptor ring to transact packets on
* @xdp: xdp_buff pointing to the data
+ * @nr_frags: number of buffers for the packet
*
* This function allocates an skb. It then populates it with the page
* data from the current receive descriptor, taking care to set up the
* skb correctly.
*/
static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
- struct i40e_rx_buffer *rx_buffer,
- struct xdp_buff *xdp)
+ struct xdp_buff *xdp,
+ u32 nr_frags)
{
unsigned int size = xdp->data_end - xdp->data;
-#if (PAGE_SIZE < 8192)
- unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
-#else
- unsigned int truesize = SKB_DATA_ALIGN(size);
-#endif
+ struct i40e_rx_buffer *rx_buffer;
unsigned int headlen;
struct sk_buff *skb;
@@ -2150,48 +2181,60 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
memcpy(__skb_put(skb, headlen), xdp->data,
ALIGN(headlen, sizeof(long)));
+ rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
/* update all of the pointers */
size -= headlen;
if (size) {
+ if (unlikely(nr_frags >= MAX_SKB_FRAGS)) {
+ dev_kfree_skb(skb);
+ return NULL;
+ }
skb_add_rx_frag(skb, 0, rx_buffer->page,
rx_buffer->page_offset + headlen,
- size, truesize);
-
+ size, xdp->frame_sz);
/* buffer is used by skb, update page_offset */
-#if (PAGE_SIZE < 8192)
- rx_buffer->page_offset ^= truesize;
-#else
- rx_buffer->page_offset += truesize;
-#endif
+ i40e_rx_buffer_flip(rx_buffer, xdp->frame_sz);
} else {
/* buffer is unused, reset bias back to rx_buffer */
rx_buffer->pagecnt_bias++;
}
+ if (unlikely(xdp_buff_has_frags(xdp))) {
+ struct skb_shared_info *sinfo, *skinfo = skb_shinfo(skb);
+
+ sinfo = xdp_get_shared_info_from_buff(xdp);
+ memcpy(&skinfo->frags[skinfo->nr_frags], &sinfo->frags[0],
+ sizeof(skb_frag_t) * nr_frags);
+
+ xdp_update_skb_shared_info(skb, skinfo->nr_frags + nr_frags,
+ sinfo->xdp_frags_size,
+ nr_frags * xdp->frame_sz,
+ xdp_buff_is_frag_pfmemalloc(xdp));
+
+ /* First buffer has already been processed, so bump ntc */
+ if (++rx_ring->next_to_clean == rx_ring->count)
+ rx_ring->next_to_clean = 0;
+
+ i40e_process_rx_buffs(rx_ring, I40E_XDP_PASS, xdp);
+ }
+
return skb;
}
/**
* i40e_build_skb - Build skb around an existing buffer
* @rx_ring: Rx descriptor ring to transact packets on
- * @rx_buffer: Rx buffer to pull data from
* @xdp: xdp_buff pointing to the data
+ * @nr_frags: number of buffers for the packet
*
* This function builds an skb around an existing Rx buffer, taking care
* to set up the skb correctly and avoid any memcpy overhead.
*/
static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
- struct i40e_rx_buffer *rx_buffer,
- struct xdp_buff *xdp)
+ struct xdp_buff *xdp,
+ u32 nr_frags)
{
unsigned int metasize = xdp->data - xdp->data_meta;
-#if (PAGE_SIZE < 8192)
- unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
-#else
- unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
- SKB_DATA_ALIGN(xdp->data_end -
- xdp->data_hard_start);
-#endif
struct sk_buff *skb;
/* Prefetch first cache line of first page. If xdp->data_meta
@@ -2202,7 +2245,7 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
net_prefetch(xdp->data_meta);
/* build an skb around the page buffer */
- skb = napi_build_skb(xdp->data_hard_start, truesize);
+ skb = napi_build_skb(xdp->data_hard_start, xdp->frame_sz);
if (unlikely(!skb))
return NULL;
@@ -2212,42 +2255,25 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
if (metasize)
skb_metadata_set(skb, metasize);
- /* buffer is used by skb, update page_offset */
-#if (PAGE_SIZE < 8192)
- rx_buffer->page_offset ^= truesize;
-#else
- rx_buffer->page_offset += truesize;
-#endif
+ if (unlikely(xdp_buff_has_frags(xdp))) {
+ struct skb_shared_info *sinfo;
- return skb;
-}
+ sinfo = xdp_get_shared_info_from_buff(xdp);
+ xdp_update_skb_shared_info(skb, nr_frags,
+ sinfo->xdp_frags_size,
+ nr_frags * xdp->frame_sz,
+ xdp_buff_is_frag_pfmemalloc(xdp));
-/**
- * i40e_put_rx_buffer - Clean up used buffer and either recycle or free
- * @rx_ring: rx descriptor ring to transact packets on
- * @rx_buffer: rx buffer to pull data from
- * @rx_buffer_pgcnt: rx buffer page refcount pre xdp_do_redirect() call
- *
- * This function will clean up the contents of the rx_buffer. It will
- * either recycle the buffer or unmap it and free the associated resources.
- */
-static void i40e_put_rx_buffer(struct i40e_ring *rx_ring,
- struct i40e_rx_buffer *rx_buffer,
- int rx_buffer_pgcnt)
-{
- if (i40e_can_reuse_rx_page(rx_buffer, &rx_ring->rx_stats, rx_buffer_pgcnt)) {
- /* hand second half of page back to the ring */
- i40e_reuse_rx_page(rx_ring, rx_buffer);
+ i40e_process_rx_buffs(rx_ring, I40E_XDP_PASS, xdp);
} else {
- /* we are not reusing the buffer so unmap it */
- dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
- i40e_rx_pg_size(rx_ring),
- DMA_FROM_DEVICE, I40E_RX_DMA_ATTR);
- __page_frag_cache_drain(rx_buffer->page,
- rx_buffer->pagecnt_bias);
- /* clear contents of buffer_info */
- rx_buffer->page = NULL;
+ struct i40e_rx_buffer *rx_buffer;
+
+ rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
+ /* buffer is used by skb, update page_offset */
+ i40e_rx_buffer_flip(rx_buffer, xdp->frame_sz);
}
+
+ return skb;
}
/**
@@ -2333,25 +2359,6 @@ xdp_out:
}
/**
- * i40e_rx_buffer_flip - adjusted rx_buffer to point to an unused region
- * @rx_ring: Rx ring
- * @rx_buffer: Rx buffer to adjust
- * @size: Size of adjustment
- **/
-static void i40e_rx_buffer_flip(struct i40e_ring *rx_ring,
- struct i40e_rx_buffer *rx_buffer,
- unsigned int size)
-{
- unsigned int truesize = i40e_rx_frame_truesize(rx_ring, size);
-
-#if (PAGE_SIZE < 8192)
- rx_buffer->page_offset ^= truesize;
-#else
- rx_buffer->page_offset += truesize;
-#endif
-}
-
-/**
* i40e_xdp_ring_update_tail - Updates the XDP Tx ring tail register
* @xdp_ring: XDP Tx ring
*
@@ -2409,16 +2416,65 @@ void i40e_finalize_xdp_rx(struct i40e_ring *rx_ring, unsigned int xdp_res)
}
/**
- * i40e_inc_ntc: Advance the next_to_clean index
+ * i40e_inc_ntp: Advance the next_to_process index
* @rx_ring: Rx ring
**/
-static void i40e_inc_ntc(struct i40e_ring *rx_ring)
+static void i40e_inc_ntp(struct i40e_ring *rx_ring)
+{
+ u32 ntp = rx_ring->next_to_process + 1;
+
+ ntp = (ntp < rx_ring->count) ? ntp : 0;
+ rx_ring->next_to_process = ntp;
+ prefetch(I40E_RX_DESC(rx_ring, ntp));
+}
+
+/**
+ * i40e_add_xdp_frag: Add a frag to xdp_buff
+ * @xdp: xdp_buff pointing to the data
+ * @nr_frags: return number of buffers for the packet
+ * @rx_buffer: rx_buffer holding data of the current frag
+ * @size: size of data of current frag
+ */
+static int i40e_add_xdp_frag(struct xdp_buff *xdp, u32 *nr_frags,
+ struct i40e_rx_buffer *rx_buffer, u32 size)
{
- u32 ntc = rx_ring->next_to_clean + 1;
+ struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
+
+ if (!xdp_buff_has_frags(xdp)) {
+ sinfo->nr_frags = 0;
+ sinfo->xdp_frags_size = 0;
+ xdp_buff_set_frags_flag(xdp);
+ } else if (unlikely(sinfo->nr_frags >= MAX_SKB_FRAGS)) {
+ /* Overflowing packet: All frags need to be dropped */
+ return -ENOMEM;
+ }
+
+ __skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, rx_buffer->page,
+ rx_buffer->page_offset, size);
+
+ sinfo->xdp_frags_size += size;
- ntc = (ntc < rx_ring->count) ? ntc : 0;
- rx_ring->next_to_clean = ntc;
- prefetch(I40E_RX_DESC(rx_ring, ntc));
+ if (page_is_pfmemalloc(rx_buffer->page))
+ xdp_buff_set_frag_pfmemalloc(xdp);
+ *nr_frags = sinfo->nr_frags;
+
+ return 0;
+}
+
+/**
+ * i40e_consume_xdp_buff - Consume all the buffers of the packet and update ntc
+ * @rx_ring: rx descriptor ring to transact packets on
+ * @xdp: xdp_buff pointing to the data
+ * @rx_buffer: rx_buffer of eop desc
+ */
+static void i40e_consume_xdp_buff(struct i40e_ring *rx_ring,
+ struct xdp_buff *xdp,
+ struct i40e_rx_buffer *rx_buffer)
+{
+ i40e_process_rx_buffs(rx_ring, I40E_XDP_CONSUMED, xdp);
+ i40e_put_rx_buffer(rx_ring, rx_buffer);
+ rx_ring->next_to_clean = rx_ring->next_to_process;
+ xdp->data = NULL;
}
/**
@@ -2437,38 +2493,36 @@ static void i40e_inc_ntc(struct i40e_ring *rx_ring)
static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
unsigned int *rx_cleaned)
{
- unsigned int total_rx_bytes = 0, total_rx_packets = 0, frame_sz = 0;
+ unsigned int total_rx_bytes = 0, total_rx_packets = 0;
u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
+ u16 clean_threshold = rx_ring->count / 2;
unsigned int offset = rx_ring->rx_offset;
- struct sk_buff *skb = rx_ring->skb;
+ struct xdp_buff *xdp = &rx_ring->xdp;
unsigned int xdp_xmit = 0;
struct bpf_prog *xdp_prog;
bool failure = false;
- struct xdp_buff xdp;
int xdp_res = 0;
-#if (PAGE_SIZE < 8192)
- frame_sz = i40e_rx_frame_truesize(rx_ring, 0);
-#endif
- xdp_init_buff(&xdp, frame_sz, &rx_ring->xdp_rxq);
-
xdp_prog = READ_ONCE(rx_ring->xdp_prog);
while (likely(total_rx_packets < (unsigned int)budget)) {
+ u16 ntp = rx_ring->next_to_process;
struct i40e_rx_buffer *rx_buffer;
union i40e_rx_desc *rx_desc;
- int rx_buffer_pgcnt;
+ struct sk_buff *skb;
unsigned int size;
+ u32 nfrags = 0;
+ bool neop;
u64 qword;
/* return some buffers to hardware, one at a time is too slow */
- if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
+ if (cleaned_count >= clean_threshold) {
failure = failure ||
i40e_alloc_rx_buffers(rx_ring, cleaned_count);
cleaned_count = 0;
}
- rx_desc = I40E_RX_DESC(rx_ring, rx_ring->next_to_clean);
+ rx_desc = I40E_RX_DESC(rx_ring, ntp);
/* status_error_len will always be zero for unused descriptors
* because it's cleared in cleanup, and overlaps with hdr_addr
@@ -2487,8 +2541,8 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
i40e_clean_programming_status(rx_ring,
rx_desc->raw.qword[0],
qword);
- rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
- i40e_inc_ntc(rx_ring);
+ rx_buffer = i40e_rx_bi(rx_ring, ntp);
+ i40e_inc_ntp(rx_ring);
i40e_reuse_rx_page(rx_ring, rx_buffer);
cleaned_count++;
continue;
@@ -2499,76 +2553,84 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
if (!size)
break;
- i40e_trace(clean_rx_irq, rx_ring, rx_desc, skb);
- rx_buffer = i40e_get_rx_buffer(rx_ring, size, &rx_buffer_pgcnt);
-
+ i40e_trace(clean_rx_irq, rx_ring, rx_desc, xdp);
/* retrieve a buffer from the ring */
- if (!skb) {
+ rx_buffer = i40e_get_rx_buffer(rx_ring, size);
+
+ neop = i40e_is_non_eop(rx_ring, rx_desc);
+ i40e_inc_ntp(rx_ring);
+
+ if (!xdp->data) {
unsigned char *hard_start;
hard_start = page_address(rx_buffer->page) +
rx_buffer->page_offset - offset;
- xdp_prepare_buff(&xdp, hard_start, offset, size, true);
- xdp_buff_clear_frags_flag(&xdp);
+ xdp_prepare_buff(xdp, hard_start, offset, size, true);
#if (PAGE_SIZE > 4096)
/* At larger PAGE_SIZE, frame_sz depend on len size */
- xdp.frame_sz = i40e_rx_frame_truesize(rx_ring, size);
+ xdp->frame_sz = i40e_rx_frame_truesize(rx_ring, size);
#endif
- xdp_res = i40e_run_xdp(rx_ring, &xdp, xdp_prog);
+ } else if (i40e_add_xdp_frag(xdp, &nfrags, rx_buffer, size) &&
+ !neop) {
+ /* Overflowing packet: Drop all frags on EOP */
+ i40e_consume_xdp_buff(rx_ring, xdp, rx_buffer);
+ break;
}
+ if (neop)
+ continue;
+
+ xdp_res = i40e_run_xdp(rx_ring, xdp, xdp_prog);
+
if (xdp_res) {
- if (xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR)) {
- xdp_xmit |= xdp_res;
- i40e_rx_buffer_flip(rx_ring, rx_buffer, size);
+ xdp_xmit |= xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR);
+
+ if (unlikely(xdp_buff_has_frags(xdp))) {
+ i40e_process_rx_buffs(rx_ring, xdp_res, xdp);
+ size = xdp_get_buff_len(xdp);
+ } else if (xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR)) {
+ i40e_rx_buffer_flip(rx_buffer, xdp->frame_sz);
} else {
rx_buffer->pagecnt_bias++;
}
total_rx_bytes += size;
- total_rx_packets++;
- } else if (skb) {
- i40e_add_rx_frag(rx_ring, rx_buffer, skb, size);
- } else if (ring_uses_build_skb(rx_ring)) {
- skb = i40e_build_skb(rx_ring, rx_buffer, &xdp);
} else {
- skb = i40e_construct_skb(rx_ring, rx_buffer, &xdp);
- }
+ if (ring_uses_build_skb(rx_ring))
+ skb = i40e_build_skb(rx_ring, xdp, nfrags);
+ else
+ skb = i40e_construct_skb(rx_ring, xdp, nfrags);
+
+ /* drop if we failed to retrieve a buffer */
+ if (!skb) {
+ rx_ring->rx_stats.alloc_buff_failed++;
+ i40e_consume_xdp_buff(rx_ring, xdp, rx_buffer);
+ break;
+ }
- /* exit if we failed to retrieve a buffer */
- if (!xdp_res && !skb) {
- rx_ring->rx_stats.alloc_buff_failed++;
- rx_buffer->pagecnt_bias++;
- break;
- }
+ if (i40e_cleanup_headers(rx_ring, skb, rx_desc))
+ goto process_next;
- i40e_put_rx_buffer(rx_ring, rx_buffer, rx_buffer_pgcnt);
- cleaned_count++;
+ /* probably a little skewed due to removing CRC */
+ total_rx_bytes += skb->len;
- i40e_inc_ntc(rx_ring);
- if (i40e_is_non_eop(rx_ring, rx_desc))
- continue;
+ /* populate checksum, VLAN, and protocol */
+ i40e_process_skb_fields(rx_ring, rx_desc, skb);
- if (xdp_res || i40e_cleanup_headers(rx_ring, skb, rx_desc)) {
- skb = NULL;
- continue;
+ i40e_trace(clean_rx_irq_rx, rx_ring, rx_desc, xdp);
+ napi_gro_receive(&rx_ring->q_vector->napi, skb);
}
- /* probably a little skewed due to removing CRC */
- total_rx_bytes += skb->len;
-
- /* populate checksum, VLAN, and protocol */
- i40e_process_skb_fields(rx_ring, rx_desc, skb);
-
- i40e_trace(clean_rx_irq_rx, rx_ring, rx_desc, skb);
- napi_gro_receive(&rx_ring->q_vector->napi, skb);
- skb = NULL;
-
/* update budget accounting */
total_rx_packets++;
+process_next:
+ cleaned_count += nfrags + 1;
+ i40e_put_rx_buffer(rx_ring, rx_buffer);
+ rx_ring->next_to_clean = rx_ring->next_to_process;
+
+ xdp->data = NULL;
}
i40e_finalize_xdp_rx(rx_ring, xdp_xmit);
- rx_ring->skb = skb;
i40e_update_rx_stats(rx_ring, total_rx_bytes, total_rx_packets);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index 768290dc6f48..8c3d24012c54 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -277,6 +277,7 @@ struct i40e_rx_buffer {
struct page *page;
__u32 page_offset;
__u16 pagecnt_bias;
+ __u32 page_count;
};
struct i40e_queue_stats {
@@ -336,6 +337,17 @@ struct i40e_ring {
u8 dcb_tc; /* Traffic class of ring */
u8 __iomem *tail;
+ /* Storing xdp_buff on ring helps in saving the state of partially built
+ * packet when i40e_clean_rx_ring_irq() must return before it sees EOP
+ * and to resume packet building for this ring in the next call to
+ * i40e_clean_rx_ring_irq().
+ */
+ struct xdp_buff xdp;
+
+ /* Next descriptor to be processed; next_to_clean is updated only on
+ * processing EOP descriptor
+ */
+ u16 next_to_process;
/* high bit set means dynamic, use accessor routines to read/write.
* hardware only supports 2us resolution for the ITR registers.
* these values always store the USER setting, and must be converted
@@ -380,14 +392,6 @@ struct i40e_ring {
struct rcu_head rcu; /* to avoid race on free */
u16 next_to_alloc;
- struct sk_buff *skb; /* When i40e_clean_rx_ring_irq() must
- * return before it sees the EOP for
- * the current packet, we save that skb
- * here and resume receiving this
- * packet the next time
- * i40e_clean_rx_ring_irq() is called
- * for this ring.
- */
struct i40e_channel *ch;
u16 rx_offset;
diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h
index 232bc61d9eee..2cdce251472c 100644
--- a/drivers/net/ethernet/intel/iavf/iavf.h
+++ b/drivers/net/ethernet/intel/iavf/iavf.h
@@ -6,7 +6,6 @@
#include <linux/module.h>
#include <linux/pci.h>
-#include <linux/aer.h>
#include <linux/netdevice.h>
#include <linux/vmalloc.h>
#include <linux/interrupt.h>
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index e809249500e1..aa32111afd6e 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -20,7 +20,6 @@
#include <linux/pci.h>
#include <linux/workqueue.h>
#include <linux/wait.h>
-#include <linux/aer.h>
#include <linux/interrupt.h>
#include <linux/ethtool.h>
#include <linux/timer.h>
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index c233464b8f6b..8dc1ae3f8e53 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -1393,6 +1393,8 @@ static void ice_aq_cancel_waiting_tasks(struct ice_pf *pf)
wake_up(&pf->aq_wait_queue);
}
+#define ICE_MBX_OVERFLOW_WATERMARK 64
+
/**
* __ice_clean_ctrlq - helper function to clean controlq rings
* @pf: ptr to struct ice_pf
@@ -1483,6 +1485,7 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type)
return 0;
do {
+ struct ice_mbx_data data = {};
u16 opcode;
int ret;
@@ -1509,8 +1512,12 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type)
ice_vf_lan_overflow_event(pf, &event);
break;
case ice_mbx_opc_send_msg_to_pf:
- if (!ice_is_malicious_vf(pf, &event, i, pending))
- ice_vc_process_vf_msg(pf, &event);
+ data.num_msg_proc = i;
+ data.num_pending_arq = pending;
+ data.max_num_msgs_mbx = hw->mailboxq.num_rq_entries;
+ data.async_watermark_val = ICE_MBX_OVERFLOW_WATERMARK;
+
+ ice_vc_process_vf_msg(pf, &event, &data);
break;
case ice_aqc_opc_fw_logging:
ice_output_fw_log(hw, &event.desc, event.msg_buf);
@@ -3888,6 +3895,7 @@ static int ice_init_pf(struct ice_pf *pf)
mutex_init(&pf->vfs.table_lock);
hash_init(pf->vfs.table);
+ ice_mbx_init_snapshot(&pf->hw);
return 0;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c
index 96a64c25e2ef..6fa62c3cedb0 100644
--- a/drivers/net/ethernet/intel/ice/ice_sriov.c
+++ b/drivers/net/ethernet/intel/ice/ice_sriov.c
@@ -204,10 +204,7 @@ void ice_free_vfs(struct ice_pf *pf)
}
/* clear malicious info since the VF is getting released */
- if (ice_mbx_clear_malvf(&hw->mbx_snapshot, pf->vfs.malvfs,
- ICE_MAX_SRIOV_VFS, vf->vf_id))
- dev_dbg(dev, "failed to clear malicious VF state for VF %u\n",
- vf->vf_id);
+ list_del(&vf->mbx_info.list_entry);
mutex_unlock(&vf->cfg_lock);
}
@@ -1017,7 +1014,6 @@ int ice_sriov_configure(struct pci_dev *pdev, int num_vfs)
if (!num_vfs) {
if (!pci_vfs_assigned(pdev)) {
ice_free_vfs(pf);
- ice_mbx_deinit_snapshot(&pf->hw);
if (pf->lag)
ice_enable_lag(pf->lag);
return 0;
@@ -1027,15 +1023,9 @@ int ice_sriov_configure(struct pci_dev *pdev, int num_vfs)
return -EBUSY;
}
- err = ice_mbx_init_snapshot(&pf->hw, num_vfs);
- if (err)
- return err;
-
err = ice_pci_sriov_ena(pf, num_vfs);
- if (err) {
- ice_mbx_deinit_snapshot(&pf->hw);
+ if (err)
return err;
- }
if (pf->lag)
ice_disable_lag(pf->lag);
@@ -1787,66 +1777,3 @@ void ice_restore_all_vfs_msi_state(struct pci_dev *pdev)
}
}
}
-
-/**
- * ice_is_malicious_vf - helper function to detect a malicious VF
- * @pf: ptr to struct ice_pf
- * @event: pointer to the AQ event
- * @num_msg_proc: the number of messages processed so far
- * @num_msg_pending: the number of messages peinding in admin queue
- */
-bool
-ice_is_malicious_vf(struct ice_pf *pf, struct ice_rq_event_info *event,
- u16 num_msg_proc, u16 num_msg_pending)
-{
- s16 vf_id = le16_to_cpu(event->desc.retval);
- struct device *dev = ice_pf_to_dev(pf);
- struct ice_mbx_data mbxdata;
- bool malvf = false;
- struct ice_vf *vf;
- int status;
-
- vf = ice_get_vf_by_id(pf, vf_id);
- if (!vf)
- return false;
-
- if (test_bit(ICE_VF_STATE_DIS, vf->vf_states))
- goto out_put_vf;
-
- mbxdata.num_msg_proc = num_msg_proc;
- mbxdata.num_pending_arq = num_msg_pending;
- mbxdata.max_num_msgs_mbx = pf->hw.mailboxq.num_rq_entries;
-#define ICE_MBX_OVERFLOW_WATERMARK 64
- mbxdata.async_watermark_val = ICE_MBX_OVERFLOW_WATERMARK;
-
- /* check to see if we have a malicious VF */
- status = ice_mbx_vf_state_handler(&pf->hw, &mbxdata, vf_id, &malvf);
- if (status)
- goto out_put_vf;
-
- if (malvf) {
- bool report_vf = false;
-
- /* if the VF is malicious and we haven't let the user
- * know about it, then let them know now
- */
- status = ice_mbx_report_malvf(&pf->hw, pf->vfs.malvfs,
- ICE_MAX_SRIOV_VFS, vf_id,
- &report_vf);
- if (status)
- dev_dbg(dev, "Error reporting malicious VF\n");
-
- if (report_vf) {
- struct ice_vsi *pf_vsi = ice_get_main_vsi(pf);
-
- if (pf_vsi)
- dev_warn(dev, "VF MAC %pM on PF MAC %pM is generating asynchronous messages and may be overflowing the PF message queue. Please see the Adapter User Guide for more information\n",
- &vf->dev_lan_addr[0],
- pf_vsi->netdev->dev_addr);
- }
- }
-
-out_put_vf:
- ice_put_vf(vf);
- return malvf;
-}
diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.h b/drivers/net/ethernet/intel/ice/ice_sriov.h
index 955ab810a198..346cb2666f3a 100644
--- a/drivers/net/ethernet/intel/ice/ice_sriov.h
+++ b/drivers/net/ethernet/intel/ice/ice_sriov.h
@@ -33,11 +33,7 @@ int
ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi);
void ice_free_vfs(struct ice_pf *pf);
-void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event);
void ice_restore_all_vfs_msi_state(struct pci_dev *pdev);
-bool
-ice_is_malicious_vf(struct ice_pf *pf, struct ice_rq_event_info *event,
- u16 num_msg_proc, u16 num_msg_pending);
int
ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
@@ -68,22 +64,11 @@ ice_vc_validate_pattern(struct ice_vf *vf, struct virtchnl_proto_hdrs *proto);
static inline void ice_process_vflr_event(struct ice_pf *pf) { }
static inline void ice_free_vfs(struct ice_pf *pf) { }
static inline
-void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event) { }
-static inline
void ice_vf_lan_overflow_event(struct ice_pf *pf, struct ice_rq_event_info *event) { }
static inline void ice_print_vfs_mdd_events(struct ice_pf *pf) { }
static inline void ice_print_vf_rx_mdd_event(struct ice_vf *vf) { }
static inline void ice_restore_all_vfs_msi_state(struct pci_dev *pdev) { }
-static inline bool
-ice_is_malicious_vf(struct ice_pf __always_unused *pf,
- struct ice_rq_event_info __always_unused *event,
- u16 __always_unused num_msg_proc,
- u16 __always_unused num_msg_pending)
-{
- return false;
-}
-
static inline int
ice_sriov_configure(struct pci_dev __always_unused *pdev,
int __always_unused num_vfs)
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index e3f622cad425..a09556e57803 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -784,14 +784,15 @@ struct ice_mbx_snap_buffer_data {
u16 max_num_msgs_mbx;
};
-/* Structure to track messages sent by VFs on mailbox:
- * 1. vf_cntr: a counter array of VFs to track the number of
- * asynchronous messages sent by each VF
- * 2. vfcntr_len: number of entries in VF counter array
+/* Structure used to track a single VF's messages on the mailbox:
+ * 1. list_entry: linked list entry node
+ * 2. msg_count: the number of asynchronous messages sent by this VF
+ * 3. malicious: whether this VF has been detected as malicious before
*/
-struct ice_mbx_vf_counter {
- u32 *vf_cntr;
- u32 vfcntr_len;
+struct ice_mbx_vf_info {
+ struct list_head list_entry;
+ u32 msg_count;
+ u8 malicious : 1;
};
/* Structure to hold data relevant to the captured static snapshot
@@ -799,7 +800,7 @@ struct ice_mbx_vf_counter {
*/
struct ice_mbx_snapshot {
struct ice_mbx_snap_buffer_data mbx_buf;
- struct ice_mbx_vf_counter mbx_vf;
+ struct list_head mbx_vf;
};
/* Structure to hold data to be used for capturing or updating a
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c
index 0e57bd1b85fd..89fd6982df09 100644
--- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c
@@ -496,10 +496,7 @@ void ice_reset_all_vfs(struct ice_pf *pf)
/* clear all malicious info if the VFs are getting reset */
ice_for_each_vf(pf, bkt, vf)
- if (ice_mbx_clear_malvf(&hw->mbx_snapshot, pf->vfs.malvfs,
- ICE_MAX_SRIOV_VFS, vf->vf_id))
- dev_dbg(dev, "failed to clear malicious VF state for VF %u\n",
- vf->vf_id);
+ ice_mbx_clear_malvf(&vf->mbx_info);
/* If VFs have been disabled, there is no need to reset */
if (test_and_set_bit(ICE_VF_DIS, pf->state)) {
@@ -601,12 +598,10 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags)
struct ice_pf *pf = vf->pf;
struct ice_vsi *vsi;
struct device *dev;
- struct ice_hw *hw;
int err = 0;
bool rsd;
dev = ice_pf_to_dev(pf);
- hw = &pf->hw;
if (flags & ICE_VF_RESET_NOTIFY)
ice_notify_vf_reset(vf);
@@ -705,10 +700,7 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags)
ice_eswitch_replay_vf_mac_rule(vf);
/* if the VF has been reset allow it to come up again */
- if (ice_mbx_clear_malvf(&hw->mbx_snapshot, pf->vfs.malvfs,
- ICE_MAX_SRIOV_VFS, vf->vf_id))
- dev_dbg(dev, "failed to clear malicious VF state for VF %u\n",
- vf->vf_id);
+ ice_mbx_clear_malvf(&vf->mbx_info);
out_unlock:
if (flags & ICE_VF_RESET_LOCK)
@@ -764,6 +756,9 @@ void ice_initialize_vf_entry(struct ice_vf *vf)
ice_vf_ctrl_invalidate_vsi(vf);
ice_vf_fdir_init(vf);
+ /* Initialize mailbox info for this VF */
+ ice_mbx_init_vf_info(&pf->hw, &vf->mbx_info);
+
mutex_init(&vf->cfg_lock);
}
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.h b/drivers/net/ethernet/intel/ice/ice_vf_lib.h
index ef30f05b5d02..e3cda6fb71ab 100644
--- a/drivers/net/ethernet/intel/ice/ice_vf_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.h
@@ -74,7 +74,6 @@ struct ice_vfs {
u16 num_qps_per; /* number of queue pairs per VF */
u16 num_msix_per; /* number of MSI-X vectors per VF */
unsigned long last_printed_mdd_jiffies; /* MDD message rate limit */
- DECLARE_BITMAP(malvfs, ICE_MAX_SRIOV_VFS); /* malicious VF indicator */
};
/* VF information structure */
@@ -105,6 +104,7 @@ struct ice_vf {
DECLARE_BITMAP(rxq_ena, ICE_MAX_RSS_QS_PER_VF);
struct ice_vlan port_vlan_info; /* Port VLAN ID, QoS, and TPID */
struct virtchnl_vlan_caps vlan_v2_caps;
+ struct ice_mbx_vf_info mbx_info;
u8 pf_set_mac:1; /* VF MAC address set by VMM admin */
u8 trusted:1;
u8 spoofchk:1;
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_mbx.c b/drivers/net/ethernet/intel/ice/ice_vf_mbx.c
index f56fa94ff3d0..40cb4ba0789c 100644
--- a/drivers/net/ethernet/intel/ice/ice_vf_mbx.c
+++ b/drivers/net/ethernet/intel/ice/ice_vf_mbx.c
@@ -93,36 +93,31 @@ u32 ice_conv_link_speed_to_virtchnl(bool adv_link_support, u16 link_speed)
*
* 2. When the caller starts processing its mailbox queue in response to an
* interrupt, the structure ice_mbx_snapshot is expected to be cleared before
- * the algorithm can be run for the first time for that interrupt. This can be
- * done via ice_mbx_reset_snapshot().
+ * the algorithm can be run for the first time for that interrupt. This
+ * requires calling ice_mbx_reset_snapshot() as well as calling
+ * ice_mbx_reset_vf_info() for each VF tracking structure.
*
* 3. For every message read by the caller from the MBX Queue, the caller must
* call the detection algorithm's entry function ice_mbx_vf_state_handler().
* Before every call to ice_mbx_vf_state_handler() the struct ice_mbx_data is
* filled as it is required to be passed to the algorithm.
*
- * 4. Every time a message is read from the MBX queue, a VFId is received which
- * is passed to the state handler. The boolean output is_malvf of the state
- * handler ice_mbx_vf_state_handler() serves as an indicator to the caller
- * whether this VF is malicious or not.
+ * 4. Every time a message is read from the MBX queue, a tracking structure
+ * for the VF must be passed to the state handler. The boolean output
+ * report_malvf from ice_mbx_vf_state_handler() serves as an indicator to the
+ * caller whether it must report this VF as malicious or not.
*
* 5. When a VF is identified to be malicious, the caller can send a message
- * to the system administrator. The caller can invoke ice_mbx_report_malvf()
- * to help determine if a malicious VF is to be reported or not. This function
- * requires the caller to maintain a global bitmap to track all malicious VFs
- * and pass that to ice_mbx_report_malvf() along with the VFID which was identified
- * to be malicious by ice_mbx_vf_state_handler().
+ * to the system administrator.
*
- * 6. The global bitmap maintained by PF can be cleared completely if PF is in
- * reset or the bit corresponding to a VF can be cleared if that VF is in reset.
- * When a VF is shut down and brought back up, we assume that the new VF
- * brought up is not malicious and hence report it if found malicious.
+ * 6. The PF is responsible for maintaining the struct ice_mbx_vf_info
+ * structure for each VF. The PF should clear the VF tracking structure if the
+ * VF is reset. When a VF is shut down and brought back up, we will then
+ * assume that the new VF is not malicious and may report it again if we
+ * detect it again.
*
* 7. The function ice_mbx_reset_snapshot() is called to reset the information
* in ice_mbx_snapshot for every new mailbox interrupt handled.
- *
- * 8. The memory allocated for variables in ice_mbx_snapshot is de-allocated
- * when driver is unloaded.
*/
#define ICE_RQ_DATA_MASK(rq_data) ((rq_data) & PF_MBX_ARQH_ARQH_M)
/* Using the highest value for an unsigned 16-bit value 0xFFFF to indicate that
@@ -131,6 +126,25 @@ u32 ice_conv_link_speed_to_virtchnl(bool adv_link_support, u16 link_speed)
#define ICE_IGNORE_MAX_MSG_CNT 0xFFFF
/**
+ * ice_mbx_reset_snapshot - Reset mailbox snapshot structure
+ * @snap: pointer to the mailbox snapshot
+ */
+static void ice_mbx_reset_snapshot(struct ice_mbx_snapshot *snap)
+{
+ struct ice_mbx_vf_info *vf_info;
+
+ /* Clear mbx_buf in the mailbox snaphot structure and setting the
+ * mailbox snapshot state to a new capture.
+ */
+ memset(&snap->mbx_buf, 0, sizeof(snap->mbx_buf));
+ snap->mbx_buf.state = ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT;
+
+ /* Reset message counts for all VFs to zero */
+ list_for_each_entry(vf_info, &snap->mbx_vf, list_entry)
+ vf_info->msg_count = 0;
+}
+
+/**
* ice_mbx_traverse - Pass through mailbox snapshot
* @hw: pointer to the HW struct
* @new_state: new algorithm state
@@ -171,7 +185,7 @@ ice_mbx_traverse(struct ice_hw *hw,
/**
* ice_mbx_detect_malvf - Detect malicious VF in snapshot
* @hw: pointer to the HW struct
- * @vf_id: relative virtual function ID
+ * @vf_info: mailbox tracking structure for a VF
* @new_state: new algorithm state
* @is_malvf: boolean output to indicate if VF is malicious
*
@@ -180,19 +194,14 @@ ice_mbx_traverse(struct ice_hw *hw,
* the permissible number of messages to send.
*/
static int
-ice_mbx_detect_malvf(struct ice_hw *hw, u16 vf_id,
+ice_mbx_detect_malvf(struct ice_hw *hw, struct ice_mbx_vf_info *vf_info,
enum ice_mbx_snapshot_state *new_state,
bool *is_malvf)
{
- struct ice_mbx_snapshot *snap = &hw->mbx_snapshot;
-
- if (vf_id >= snap->mbx_vf.vfcntr_len)
- return -EIO;
-
- /* increment the message count in the VF array */
- snap->mbx_vf.vf_cntr[vf_id]++;
+ /* increment the message count for this VF */
+ vf_info->msg_count++;
- if (snap->mbx_vf.vf_cntr[vf_id] >= ICE_ASYNC_VF_MSG_THRESHOLD)
+ if (vf_info->msg_count >= ICE_ASYNC_VF_MSG_THRESHOLD)
*is_malvf = true;
/* continue to iterate through the mailbox snapshot */
@@ -202,35 +211,11 @@ ice_mbx_detect_malvf(struct ice_hw *hw, u16 vf_id,
}
/**
- * ice_mbx_reset_snapshot - Reset mailbox snapshot structure
- * @snap: pointer to mailbox snapshot structure in the ice_hw struct
- *
- * Reset the mailbox snapshot structure and clear VF counter array.
- */
-static void ice_mbx_reset_snapshot(struct ice_mbx_snapshot *snap)
-{
- u32 vfcntr_len;
-
- if (!snap || !snap->mbx_vf.vf_cntr)
- return;
-
- /* Clear VF counters. */
- vfcntr_len = snap->mbx_vf.vfcntr_len;
- if (vfcntr_len)
- memset(snap->mbx_vf.vf_cntr, 0,
- (vfcntr_len * sizeof(*snap->mbx_vf.vf_cntr)));
-
- /* Reset mailbox snapshot for a new capture. */
- memset(&snap->mbx_buf, 0, sizeof(snap->mbx_buf));
- snap->mbx_buf.state = ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT;
-}
-
-/**
* ice_mbx_vf_state_handler - Handle states of the overflow algorithm
* @hw: pointer to the HW struct
* @mbx_data: pointer to structure containing mailbox data
- * @vf_id: relative virtual function (VF) ID
- * @is_malvf: boolean output to indicate if VF is malicious
+ * @vf_info: mailbox tracking structure for the VF in question
+ * @report_malvf: boolean output to indicate whether VF should be reported
*
* The function serves as an entry point for the malicious VF
* detection algorithm by handling the different states and state
@@ -249,24 +234,24 @@ static void ice_mbx_reset_snapshot(struct ice_mbx_snapshot *snap)
* the static snapshot and look for a malicious VF.
*/
int
-ice_mbx_vf_state_handler(struct ice_hw *hw,
- struct ice_mbx_data *mbx_data, u16 vf_id,
- bool *is_malvf)
+ice_mbx_vf_state_handler(struct ice_hw *hw, struct ice_mbx_data *mbx_data,
+ struct ice_mbx_vf_info *vf_info, bool *report_malvf)
{
struct ice_mbx_snapshot *snap = &hw->mbx_snapshot;
struct ice_mbx_snap_buffer_data *snap_buf;
struct ice_ctl_q_info *cq = &hw->mailboxq;
enum ice_mbx_snapshot_state new_state;
+ bool is_malvf = false;
int status = 0;
- if (!is_malvf || !mbx_data)
+ if (!report_malvf || !mbx_data || !vf_info)
return -EINVAL;
+ *report_malvf = false;
+
/* When entering the mailbox state machine assume that the VF
* is not malicious until detected.
*/
- *is_malvf = false;
-
/* Checking if max messages allowed to be processed while servicing current
* interrupt is not less than the defined AVF message threshold.
*/
@@ -315,7 +300,7 @@ ice_mbx_vf_state_handler(struct ice_hw *hw,
if (snap_buf->num_pending_arq >=
mbx_data->async_watermark_val) {
new_state = ICE_MAL_VF_DETECT_STATE_DETECT;
- status = ice_mbx_detect_malvf(hw, vf_id, &new_state, is_malvf);
+ status = ice_mbx_detect_malvf(hw, vf_info, &new_state, &is_malvf);
} else {
new_state = ICE_MAL_VF_DETECT_STATE_TRAVERSE;
ice_mbx_traverse(hw, &new_state);
@@ -329,7 +314,7 @@ ice_mbx_vf_state_handler(struct ice_hw *hw,
case ICE_MAL_VF_DETECT_STATE_DETECT:
new_state = ICE_MAL_VF_DETECT_STATE_DETECT;
- status = ice_mbx_detect_malvf(hw, vf_id, &new_state, is_malvf);
+ status = ice_mbx_detect_malvf(hw, vf_info, &new_state, &is_malvf);
break;
default:
@@ -339,145 +324,57 @@ ice_mbx_vf_state_handler(struct ice_hw *hw,
snap_buf->state = new_state;
- return status;
-}
-
-/**
- * ice_mbx_report_malvf - Track and note malicious VF
- * @hw: pointer to the HW struct
- * @all_malvfs: all malicious VFs tracked by PF
- * @bitmap_len: length of bitmap in bits
- * @vf_id: relative virtual function ID of the malicious VF
- * @report_malvf: boolean to indicate if malicious VF must be reported
- *
- * This function will update a bitmap that keeps track of the malicious
- * VFs attached to the PF. A malicious VF must be reported only once if
- * discovered between VF resets or loading so the function checks
- * the input vf_id against the bitmap to verify if the VF has been
- * detected in any previous mailbox iterations.
- */
-int
-ice_mbx_report_malvf(struct ice_hw *hw, unsigned long *all_malvfs,
- u16 bitmap_len, u16 vf_id, bool *report_malvf)
-{
- if (!all_malvfs || !report_malvf)
- return -EINVAL;
-
- *report_malvf = false;
-
- if (bitmap_len < hw->mbx_snapshot.mbx_vf.vfcntr_len)
- return -EINVAL;
-
- if (vf_id >= bitmap_len)
- return -EIO;
-
- /* If the vf_id is found in the bitmap set bit and boolean to true */
- if (!test_and_set_bit(vf_id, all_malvfs))
+ /* Only report VFs as malicious the first time we detect it */
+ if (is_malvf && !vf_info->malicious) {
+ vf_info->malicious = 1;
*report_malvf = true;
+ }
- return 0;
+ return status;
}
/**
- * ice_mbx_clear_malvf - Clear VF bitmap and counter for VF ID
- * @snap: pointer to the mailbox snapshot structure
- * @all_malvfs: all malicious VFs tracked by PF
- * @bitmap_len: length of bitmap in bits
- * @vf_id: relative virtual function ID of the malicious VF
+ * ice_mbx_clear_malvf - Clear VF mailbox info
+ * @vf_info: the mailbox tracking structure for a VF
*
- * In case of a VF reset, this function can be called to clear
- * the bit corresponding to the VF ID in the bitmap tracking all
- * malicious VFs attached to the PF. The function also clears the
- * VF counter array at the index of the VF ID. This is to ensure
- * that the new VF loaded is not considered malicious before going
- * through the overflow detection algorithm.
+ * In case of a VF reset, this function shall be called to clear the VF's
+ * current mailbox tracking state.
*/
-int
-ice_mbx_clear_malvf(struct ice_mbx_snapshot *snap, unsigned long *all_malvfs,
- u16 bitmap_len, u16 vf_id)
+void ice_mbx_clear_malvf(struct ice_mbx_vf_info *vf_info)
{
- if (!snap || !all_malvfs)
- return -EINVAL;
-
- if (bitmap_len < snap->mbx_vf.vfcntr_len)
- return -EINVAL;
-
- /* Ensure VF ID value is not larger than bitmap or VF counter length */
- if (vf_id >= bitmap_len || vf_id >= snap->mbx_vf.vfcntr_len)
- return -EIO;
-
- /* Clear VF ID bit in the bitmap tracking malicious VFs attached to PF */
- clear_bit(vf_id, all_malvfs);
-
- /* Clear the VF counter in the mailbox snapshot structure for that VF ID.
- * This is to ensure that if a VF is unloaded and a new one brought back
- * up with the same VF ID for a snapshot currently in traversal or detect
- * state the counter for that VF ID does not increment on top of existing
- * values in the mailbox overflow detection algorithm.
- */
- snap->mbx_vf.vf_cntr[vf_id] = 0;
-
- return 0;
+ vf_info->malicious = 0;
+ vf_info->msg_count = 0;
}
/**
- * ice_mbx_init_snapshot - Initialize mailbox snapshot structure
+ * ice_mbx_init_vf_info - Initialize a new VF mailbox tracking info
* @hw: pointer to the hardware structure
- * @vf_count: number of VFs allocated on a PF
+ * @vf_info: the mailbox tracking info structure for a VF
*
- * Clear the mailbox snapshot structure and allocate memory
- * for the VF counter array based on the number of VFs allocated
- * on that PF.
+ * Initialize a VF mailbox tracking info structure and insert it into the
+ * snapshot list.
*
- * Assumption: This function will assume ice_get_caps() has already been
- * called to ensure that the vf_count can be compared against the number
- * of VFs supported as defined in the functional capabilities of the device.
+ * If you remove the VF, you must also delete the associated VF info structure
+ * from the linked list.
*/
-int ice_mbx_init_snapshot(struct ice_hw *hw, u16 vf_count)
+void ice_mbx_init_vf_info(struct ice_hw *hw, struct ice_mbx_vf_info *vf_info)
{
struct ice_mbx_snapshot *snap = &hw->mbx_snapshot;
- /* Ensure that the number of VFs allocated is non-zero and
- * is not greater than the number of supported VFs defined in
- * the functional capabilities of the PF.
- */
- if (!vf_count || vf_count > hw->func_caps.num_allocd_vfs)
- return -EINVAL;
-
- snap->mbx_vf.vf_cntr = devm_kcalloc(ice_hw_to_dev(hw), vf_count,
- sizeof(*snap->mbx_vf.vf_cntr),
- GFP_KERNEL);
- if (!snap->mbx_vf.vf_cntr)
- return -ENOMEM;
-
- /* Setting the VF counter length to the number of allocated
- * VFs for given PF's functional capabilities.
- */
- snap->mbx_vf.vfcntr_len = vf_count;
-
- /* Clear mbx_buf in the mailbox snaphot structure and setting the
- * mailbox snapshot state to a new capture.
- */
- memset(&snap->mbx_buf, 0, sizeof(snap->mbx_buf));
- snap->mbx_buf.state = ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT;
-
- return 0;
+ ice_mbx_clear_malvf(vf_info);
+ list_add(&vf_info->list_entry, &snap->mbx_vf);
}
/**
- * ice_mbx_deinit_snapshot - Free mailbox snapshot structure
+ * ice_mbx_init_snapshot - Initialize mailbox snapshot data
* @hw: pointer to the hardware structure
*
- * Clear the mailbox snapshot structure and free the VF counter array.
+ * Clear the mailbox snapshot structure and initialize the VF mailbox list.
*/
-void ice_mbx_deinit_snapshot(struct ice_hw *hw)
+void ice_mbx_init_snapshot(struct ice_hw *hw)
{
struct ice_mbx_snapshot *snap = &hw->mbx_snapshot;
- /* Free VF counter array and reset VF counter length */
- devm_kfree(ice_hw_to_dev(hw), snap->mbx_vf.vf_cntr);
- snap->mbx_vf.vfcntr_len = 0;
-
- /* Clear mbx_buf in the mailbox snaphot structure */
- memset(&snap->mbx_buf, 0, sizeof(snap->mbx_buf));
+ INIT_LIST_HEAD(&snap->mbx_vf);
+ ice_mbx_reset_snapshot(snap);
}
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_mbx.h b/drivers/net/ethernet/intel/ice/ice_vf_mbx.h
index 582716e6d5f9..44bc030d17e0 100644
--- a/drivers/net/ethernet/intel/ice/ice_vf_mbx.h
+++ b/drivers/net/ethernet/intel/ice/ice_vf_mbx.h
@@ -21,15 +21,10 @@ ice_aq_send_msg_to_vf(struct ice_hw *hw, u16 vfid, u32 v_opcode, u32 v_retval,
u32 ice_conv_link_speed_to_virtchnl(bool adv_link_support, u16 link_speed);
int
ice_mbx_vf_state_handler(struct ice_hw *hw, struct ice_mbx_data *mbx_data,
- u16 vf_id, bool *is_mal_vf);
-int
-ice_mbx_clear_malvf(struct ice_mbx_snapshot *snap, unsigned long *all_malvfs,
- u16 bitmap_len, u16 vf_id);
-int ice_mbx_init_snapshot(struct ice_hw *hw, u16 vf_count);
-void ice_mbx_deinit_snapshot(struct ice_hw *hw);
-int
-ice_mbx_report_malvf(struct ice_hw *hw, unsigned long *all_malvfs,
- u16 bitmap_len, u16 vf_id, bool *report_malvf);
+ struct ice_mbx_vf_info *vf_info, bool *report_malvf);
+void ice_mbx_clear_malvf(struct ice_mbx_vf_info *vf_info);
+void ice_mbx_init_vf_info(struct ice_hw *hw, struct ice_mbx_vf_info *vf_info);
+void ice_mbx_init_snapshot(struct ice_hw *hw);
#else /* CONFIG_PCI_IOV */
static inline int
ice_aq_send_msg_to_vf(struct ice_hw __always_unused *hw,
@@ -48,5 +43,9 @@ ice_conv_link_speed_to_virtchnl(bool __always_unused adv_link_support,
return 0;
}
+static inline void ice_mbx_init_snapshot(struct ice_hw *hw)
+{
+}
+
#endif /* CONFIG_PCI_IOV */
#endif /* _ICE_VF_MBX_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c
index e24e3f5017ca..97243c616d5d 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c
@@ -3834,14 +3834,57 @@ void ice_virtchnl_set_repr_ops(struct ice_vf *vf)
}
/**
+ * ice_is_malicious_vf - check if this vf might be overflowing mailbox
+ * @vf: the VF to check
+ * @mbxdata: data about the state of the mailbox
+ *
+ * Detect if a given VF might be malicious and attempting to overflow the PF
+ * mailbox. If so, log a warning message and ignore this event.
+ */
+static bool
+ice_is_malicious_vf(struct ice_vf *vf, struct ice_mbx_data *mbxdata)
+{
+ bool report_malvf = false;
+ struct device *dev;
+ struct ice_pf *pf;
+ int status;
+
+ pf = vf->pf;
+ dev = ice_pf_to_dev(pf);
+
+ if (test_bit(ICE_VF_STATE_DIS, vf->vf_states))
+ return vf->mbx_info.malicious;
+
+ /* check to see if we have a newly malicious VF */
+ status = ice_mbx_vf_state_handler(&pf->hw, mbxdata, &vf->mbx_info,
+ &report_malvf);
+ if (status)
+ dev_warn_ratelimited(dev, "Unable to check status of mailbox overflow for VF %u MAC %pM, status %d\n",
+ vf->vf_id, vf->dev_lan_addr, status);
+
+ if (report_malvf) {
+ struct ice_vsi *pf_vsi = ice_get_main_vsi(pf);
+ u8 zero_addr[ETH_ALEN] = {};
+
+ dev_warn(dev, "VF MAC %pM on PF MAC %pM is generating asynchronous messages and may be overflowing the PF message queue. Please see the Adapter User Guide for more information\n",
+ vf->dev_lan_addr,
+ pf_vsi ? pf_vsi->netdev->dev_addr : zero_addr);
+ }
+
+ return vf->mbx_info.malicious;
+}
+
+/**
* ice_vc_process_vf_msg - Process request from VF
* @pf: pointer to the PF structure
* @event: pointer to the AQ event
+ * @mbxdata: information used to detect VF attempting mailbox overflow
*
* called from the common asq/arq handler to
* process request from VF
*/
-void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event)
+void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event,
+ struct ice_mbx_data *mbxdata)
{
u32 v_opcode = le32_to_cpu(event->desc.cookie_high);
s16 vf_id = le16_to_cpu(event->desc.retval);
@@ -3863,6 +3906,10 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event)
mutex_lock(&vf->cfg_lock);
+ /* Check if the VF is trying to overflow the mailbox */
+ if (ice_is_malicious_vf(vf, mbxdata))
+ goto finish;
+
/* Check if VF is disabled. */
if (test_bit(ICE_VF_STATE_DIS, vf->vf_states)) {
err = -EPERM;
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.h b/drivers/net/ethernet/intel/ice/ice_virtchnl.h
index b454654d7b0c..cd747718de73 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl.h
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.h
@@ -63,6 +63,8 @@ int
ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode,
enum virtchnl_status_code v_retval, u8 *msg, u16 msglen);
bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id);
+void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event,
+ struct ice_mbx_data *mbxdata);
#else /* CONFIG_PCI_IOV */
static inline void ice_virtchnl_set_dflt_ops(struct ice_vf *vf) { }
static inline void ice_virtchnl_set_repr_ops(struct ice_vf *vf) { }
@@ -81,6 +83,12 @@ static inline bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id)
{
return false;
}
+
+static inline void
+ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event,
+ struct ice_mbx_data *mbxdata)
+{
+}
#endif /* !CONFIG_PCI_IOV */
#endif /* _ICE_VIRTCHNL_H_ */
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 03bc1e8af575..a2914298dd69 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -28,7 +28,6 @@
#include <linux/tcp.h>
#include <linux/sctp.h>
#include <linux/if_ether.h>
-#include <linux/aer.h>
#include <linux/prefetch.h>
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index df3e26c0cf01..34aebf00a512 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -99,6 +99,7 @@ struct igc_ring {
u32 start_time;
u32 end_time;
+ u32 max_sdu;
/* CBS parameters */
bool cbs_enable; /* indicates if CBS is enabled */
@@ -185,6 +186,7 @@ struct igc_adapter {
ktime_t base_time;
ktime_t cycle_time;
bool qbv_enable;
+ u32 qbv_config_change_errors;
/* OS defined structs */
struct pci_dev *pdev;
@@ -292,8 +294,6 @@ extern char igc_driver_name[];
#define IGC_FLAG_PTP BIT(8)
#define IGC_FLAG_WOL_SUPPORTED BIT(8)
#define IGC_FLAG_NEED_LINK_UPDATE BIT(9)
-#define IGC_FLAG_MEDIA_RESET BIT(10)
-#define IGC_FLAG_MAS_ENABLE BIT(12)
#define IGC_FLAG_HAS_MSIX BIT(13)
#define IGC_FLAG_EEE BIT(14)
#define IGC_FLAG_VLAN_PROMISC BIT(15)
diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index 5a26a7805ef8..0e2cb00622d1 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -67,6 +67,7 @@ static const struct igc_stats igc_gstrings_stats[] = {
IGC_STAT("rx_hwtstamp_cleared", rx_hwtstamp_cleared),
IGC_STAT("tx_lpi_counter", stats.tlpic),
IGC_STAT("rx_lpi_counter", stats.rlpic),
+ IGC_STAT("qbv_config_change_errors", qbv_config_change_errors),
};
#define IGC_NETDEV_STAT(_net_stat) { \
diff --git a/drivers/net/ethernet/intel/igc/igc_hw.h b/drivers/net/ethernet/intel/igc/igc_hw.h
index 88680e3d613d..e1c572e0d4ef 100644
--- a/drivers/net/ethernet/intel/igc/igc_hw.h
+++ b/drivers/net/ethernet/intel/igc/igc_hw.h
@@ -273,6 +273,7 @@ struct igc_hw_stats {
u64 o2bspc;
u64 b2ospc;
u64 b2ogprc;
+ u64 txdrop;
};
struct net_device *igc_get_hw_dev(struct igc_hw *hw);
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 2928a6c73692..0a1570129e5b 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -4,7 +4,6 @@
#include <linux/module.h>
#include <linux/types.h>
#include <linux/if_vlan.h>
-#include <linux/aer.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/ip.h>
@@ -1501,6 +1500,7 @@ static int igc_tso(struct igc_ring *tx_ring,
static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
struct igc_ring *tx_ring)
{
+ struct igc_adapter *adapter = netdev_priv(tx_ring->netdev);
bool first_flag = false, insert_empty = false;
u16 count = TXD_USE_COUNT(skb_headlen(skb));
__be16 protocol = vlan_get_protocol(skb);
@@ -1563,9 +1563,19 @@ done:
first->bytecount = skb->len;
first->gso_segs = 1;
- if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
- struct igc_adapter *adapter = netdev_priv(tx_ring->netdev);
+ if (tx_ring->max_sdu > 0) {
+ u32 max_sdu = 0;
+
+ max_sdu = tx_ring->max_sdu +
+ (skb_vlan_tagged(first->skb) ? VLAN_HLEN : 0);
+
+ if (first->bytecount > max_sdu) {
+ adapter->stats.txdrop++;
+ goto out_drop;
+ }
+ }
+ if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
/* FIXME: add support for retrieving timestamps from
* the other timer registers before skipping the
* timestamping request.
@@ -4920,7 +4930,8 @@ void igc_update_stats(struct igc_adapter *adapter)
net_stats->tx_window_errors = adapter->stats.latecol;
net_stats->tx_carrier_errors = adapter->stats.tncrs;
- /* Tx Dropped needs to be maintained elsewhere */
+ /* Tx Dropped */
+ net_stats->tx_dropped = adapter->stats.txdrop;
/* Management Stats */
adapter->stats.mgptc += rd32(IGC_MGTPTC);
@@ -5566,25 +5577,8 @@ no_wait:
mod_timer(&adapter->phy_info_timer,
round_jiffies(jiffies + 2 * HZ));
- /* link is down, time to check for alternate media */
- if (adapter->flags & IGC_FLAG_MAS_ENABLE) {
- if (adapter->flags & IGC_FLAG_MEDIA_RESET) {
- schedule_work(&adapter->reset_task);
- /* return immediately */
- return;
- }
- }
pm_schedule_suspend(netdev->dev.parent,
MSEC_PER_SEC * 5);
-
- /* also check for alternate media here */
- } else if (!netif_carrier_ok(netdev) &&
- (adapter->flags & IGC_FLAG_MAS_ENABLE)) {
- if (adapter->flags & IGC_FLAG_MEDIA_RESET) {
- schedule_work(&adapter->reset_task);
- /* return immediately */
- return;
- }
}
}
@@ -6049,12 +6043,14 @@ static int igc_tsn_clear_schedule(struct igc_adapter *adapter)
adapter->base_time = 0;
adapter->cycle_time = NSEC_PER_SEC;
+ adapter->qbv_config_change_errors = 0;
for (i = 0; i < adapter->num_tx_queues; i++) {
struct igc_ring *ring = adapter->tx_ring[i];
ring->start_time = 0;
ring->end_time = NSEC_PER_SEC;
+ ring->max_sdu = 0;
}
return 0;
@@ -6138,6 +6134,16 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter,
}
}
+ for (i = 0; i < adapter->num_tx_queues; i++) {
+ struct igc_ring *ring = adapter->tx_ring[i];
+ struct net_device *dev = adapter->netdev;
+
+ if (qopt->max_sdu[i])
+ ring->max_sdu = qopt->max_sdu[i] + dev->hard_header_len;
+ else
+ ring->max_sdu = 0;
+ }
+
return 0;
}
@@ -6236,8 +6242,10 @@ static int igc_tc_query_caps(struct igc_adapter *adapter,
caps->broken_mqprio = true;
- if (hw->mac.type == igc_i225)
+ if (hw->mac.type == igc_i225) {
+ caps->supports_queue_max_sdu = true;
caps->gate_mask_per_txq = true;
+ }
return 0;
}
diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c
index a386c8d61dbf..94a2b0dfb54d 100644
--- a/drivers/net/ethernet/intel/igc/igc_tsn.c
+++ b/drivers/net/ethernet/intel/igc/igc_tsn.c
@@ -114,6 +114,7 @@ static int igc_tsn_disable_offload(struct igc_adapter *adapter)
static int igc_tsn_enable_offload(struct igc_adapter *adapter)
{
struct igc_hw *hw = &adapter->hw;
+ bool tsn_mode_reconfig = false;
u32 tqavctrl, baset_l, baset_h;
u32 sec, nsec, cycle;
ktime_t base_time, systim;
@@ -226,6 +227,10 @@ skip_cbs:
}
tqavctrl = rd32(IGC_TQAVCTRL) & ~IGC_TQAVCTRL_FUTSCDDIS;
+
+ if (tqavctrl & IGC_TQAVCTRL_TRANSMIT_MODE_TSN)
+ tsn_mode_reconfig = true;
+
tqavctrl |= IGC_TQAVCTRL_TRANSMIT_MODE_TSN | IGC_TQAVCTRL_ENHANCED_QAV;
cycle = adapter->cycle_time;
@@ -239,6 +244,13 @@ skip_cbs:
s64 n = div64_s64(ktime_sub_ns(systim, base_time), cycle);
base_time = ktime_add_ns(base_time, (n + 1) * cycle);
+
+ /* Increase the counter if scheduling into the past while
+ * Gate Control List (GCL) is running.
+ */
+ if ((rd32(IGC_BASET_H) || rd32(IGC_BASET_L)) &&
+ tsn_mode_reconfig)
+ adapter->qbv_config_change_errors++;
} else {
/* According to datasheet section 7.5.2.9.3.3, FutScdDis bit
* has to be configured before the cycle time and base time.
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 8736ca4b2628..63d4e32df029 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -9,7 +9,6 @@
#include <linux/pci.h>
#include <linux/netdevice.h>
#include <linux/cpumask.h>
-#include <linux/aer.h>
#include <linux/if_vlan.h>
#include <linux/jiffies.h>
#include <linux/phy.h>
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 3ea00bc9b91c..adc953611913 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -6089,18 +6089,19 @@ static bool mvpp2_port_has_irqs(struct mvpp2 *priv,
return true;
}
-static void mvpp2_port_copy_mac_addr(struct net_device *dev, struct mvpp2 *priv,
- struct fwnode_handle *fwnode,
- char **mac_from)
+static int mvpp2_port_copy_mac_addr(struct net_device *dev, struct mvpp2 *priv,
+ struct fwnode_handle *fwnode,
+ char **mac_from)
{
struct mvpp2_port *port = netdev_priv(dev);
char hw_mac_addr[ETH_ALEN] = {0};
char fw_mac_addr[ETH_ALEN];
+ int ret;
if (!fwnode_get_mac_address(fwnode, fw_mac_addr)) {
*mac_from = "firmware node";
eth_hw_addr_set(dev, fw_mac_addr);
- return;
+ return 0;
}
if (priv->hw_version == MVPP21) {
@@ -6108,19 +6109,24 @@ static void mvpp2_port_copy_mac_addr(struct net_device *dev, struct mvpp2 *priv,
if (is_valid_ether_addr(hw_mac_addr)) {
*mac_from = "hardware";
eth_hw_addr_set(dev, hw_mac_addr);
- return;
+ return 0;
}
}
/* Only valid on OF enabled platforms */
- if (!of_get_mac_address_nvmem(to_of_node(fwnode), fw_mac_addr)) {
+ ret = of_get_mac_address_nvmem(to_of_node(fwnode), fw_mac_addr);
+ if (ret == -EPROBE_DEFER)
+ return ret;
+ if (!ret) {
*mac_from = "nvmem cell";
eth_hw_addr_set(dev, fw_mac_addr);
- return;
+ return 0;
}
*mac_from = "random";
eth_hw_addr_random(dev);
+
+ return 0;
}
static struct mvpp2_port *mvpp2_phylink_to_port(struct phylink_config *config)
@@ -6823,7 +6829,9 @@ static int mvpp2_port_probe(struct platform_device *pdev,
mutex_init(&port->gather_stats_lock);
INIT_DELAYED_WORK(&port->stats_work, mvpp2_gather_hw_statistics);
- mvpp2_port_copy_mac_addr(dev, priv, port_fwnode, &mac_from);
+ err = mvpp2_port_copy_mac_addr(dev, priv, port_fwnode, &mac_from);
+ if (err < 0)
+ goto err_free_stats;
port->tx_ring_size = MVPP2_MAX_TXD_DFLT;
port->rx_ring_size = MVPP2_MAX_RXD_DFLT;
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
index 5a898fb88e37..fdce78ceea87 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
@@ -8,7 +8,6 @@
#include <linux/types.h>
#include <linux/module.h>
#include <linux/pci.h>
-#include <linux/aer.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/rtnetlink.h>
@@ -1050,7 +1049,6 @@ static int octep_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
goto err_pci_regions;
}
- pci_enable_pcie_error_reporting(pdev);
pci_set_master(pdev);
netdev = alloc_etherdev_mq(sizeof(struct octep_device),
@@ -1106,7 +1104,6 @@ register_dev_err:
err_octep_config:
free_netdev(netdev);
err_alloc_netdev:
- pci_disable_pcie_error_reporting(pdev);
pci_release_mem_regions(pdev);
err_pci_regions:
err_dma_mask:
@@ -1139,7 +1136,6 @@ static void octep_remove(struct pci_dev *pdev)
octep_device_cleanup(oct);
pci_release_mem_regions(pdev);
free_netdev(netdev);
- pci_disable_pcie_error_reporting(pdev);
pci_disable_device(pdev);
}
diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c
index 87fff539d39d..d5691b6a2bc5 100644
--- a/drivers/net/ethernet/marvell/pxa168_eth.c
+++ b/drivers/net/ethernet/marvell/pxa168_eth.c
@@ -1586,7 +1586,7 @@ static struct platform_driver pxa168_eth_driver = {
.suspend = pxa168_eth_suspend,
.driver = {
.name = DRIVER_NAME,
- .of_match_table = of_match_ptr(pxa168_eth_of_match),
+ .of_match_table = pxa168_eth_of_match,
},
};
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 3cb43623d3db..52aa71f0c499 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -374,17 +374,6 @@ static int mt7621_gmac0_rgmii_adjust(struct mtk_eth *eth,
{
u32 val;
- /* Check DDR memory type.
- * Currently TRGMII mode with DDR2 memory is not supported.
- */
- regmap_read(eth->ethsys, ETHSYS_SYSCFG, &val);
- if (interface == PHY_INTERFACE_MODE_TRGMII &&
- val & SYSCFG_DRAM_TYPE_DDR2) {
- dev_err(eth->dev,
- "TRGMII mode with DDR2 memory is not supported!\n");
- return -EOPNOTSUPP;
- }
-
val = (interface == PHY_INTERFACE_MODE_TRGMII) ?
ETHSYS_TRGMII_MT7621_DDR_PLL : 0;
@@ -397,38 +386,42 @@ static int mt7621_gmac0_rgmii_adjust(struct mtk_eth *eth,
static void mtk_gmac0_rgmii_adjust(struct mtk_eth *eth,
phy_interface_t interface, int speed)
{
- u32 val;
+ unsigned long rate;
+ u32 tck, rck, intf;
int ret;
if (interface == PHY_INTERFACE_MODE_TRGMII) {
mtk_w32(eth, TRGMII_MODE, INTF_MODE);
- val = 500000000;
- ret = clk_set_rate(eth->clks[MTK_CLK_TRGPLL], val);
+ ret = clk_set_rate(eth->clks[MTK_CLK_TRGPLL], 500000000);
if (ret)
dev_err(eth->dev, "Failed to set trgmii pll: %d\n", ret);
return;
}
- val = (speed == SPEED_1000) ?
- INTF_MODE_RGMII_1000 : INTF_MODE_RGMII_10_100;
- mtk_w32(eth, val, INTF_MODE);
+ if (speed == SPEED_1000) {
+ intf = INTF_MODE_RGMII_1000;
+ rate = 250000000;
+ rck = RCK_CTRL_RGMII_1000;
+ tck = TCK_CTRL_RGMII_1000;
+ } else {
+ intf = INTF_MODE_RGMII_10_100;
+ rate = 500000000;
+ rck = RCK_CTRL_RGMII_10_100;
+ tck = TCK_CTRL_RGMII_10_100;
+ }
+
+ mtk_w32(eth, intf, INTF_MODE);
regmap_update_bits(eth->ethsys, ETHSYS_CLKCFG0,
ETHSYS_TRGMII_CLK_SEL362_5,
ETHSYS_TRGMII_CLK_SEL362_5);
- val = (speed == SPEED_1000) ? 250000000 : 500000000;
- ret = clk_set_rate(eth->clks[MTK_CLK_TRGPLL], val);
+ ret = clk_set_rate(eth->clks[MTK_CLK_TRGPLL], rate);
if (ret)
dev_err(eth->dev, "Failed to set trgmii pll: %d\n", ret);
- val = (speed == SPEED_1000) ?
- RCK_CTRL_RGMII_1000 : RCK_CTRL_RGMII_10_100;
- mtk_w32(eth, val, TRGMII_RCK_CTRL);
-
- val = (speed == SPEED_1000) ?
- TCK_CTRL_RGMII_1000 : TCK_CTRL_RGMII_10_100;
- mtk_w32(eth, val, TRGMII_TCK_CTRL);
+ mtk_w32(eth, rck, TRGMII_RCK_CTRL);
+ mtk_w32(eth, tck, TRGMII_TCK_CTRL);
}
static struct phylink_pcs *mtk_mac_select_pcs(struct phylink_config *config,
@@ -465,19 +458,11 @@ static void mtk_mac_config(struct phylink_config *config, unsigned int mode,
/* Setup soc pin functions */
switch (state->interface) {
case PHY_INTERFACE_MODE_TRGMII:
- if (mac->id)
- goto err_phy;
- if (!MTK_HAS_CAPS(mac->hw->soc->caps,
- MTK_GMAC1_TRGMII))
- goto err_phy;
- fallthrough;
case PHY_INTERFACE_MODE_RGMII_TXID:
case PHY_INTERFACE_MODE_RGMII_RXID:
case PHY_INTERFACE_MODE_RGMII_ID:
case PHY_INTERFACE_MODE_RGMII:
case PHY_INTERFACE_MODE_MII:
- case PHY_INTERFACE_MODE_REVMII:
- case PHY_INTERFACE_MODE_RMII:
if (MTK_HAS_CAPS(eth->soc->caps, MTK_RGMII)) {
err = mtk_gmac_rgmii_path_setup(eth, mac->id);
if (err)
@@ -487,11 +472,9 @@ static void mtk_mac_config(struct phylink_config *config, unsigned int mode,
case PHY_INTERFACE_MODE_1000BASEX:
case PHY_INTERFACE_MODE_2500BASEX:
case PHY_INTERFACE_MODE_SGMII:
- if (MTK_HAS_CAPS(eth->soc->caps, MTK_SGMII)) {
- err = mtk_gmac_sgmii_path_setup(eth, mac->id);
- if (err)
- goto init_err;
- }
+ err = mtk_gmac_sgmii_path_setup(eth, mac->id);
+ if (err)
+ goto init_err;
break;
case PHY_INTERFACE_MODE_GMII:
if (MTK_HAS_CAPS(eth->soc->caps, MTK_GEPHY)) {
@@ -539,21 +522,13 @@ static void mtk_mac_config(struct phylink_config *config, unsigned int mode,
}
}
- ge_mode = 0;
switch (state->interface) {
case PHY_INTERFACE_MODE_MII:
case PHY_INTERFACE_MODE_GMII:
ge_mode = 1;
break;
- case PHY_INTERFACE_MODE_REVMII:
- ge_mode = 2;
- break;
- case PHY_INTERFACE_MODE_RMII:
- if (mac->id)
- goto err_phy;
- ge_mode = 3;
- break;
default:
+ ge_mode = 0;
break;
}
@@ -4330,6 +4305,7 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
struct mtk_mac *mac;
int id, err;
int txqs = 1;
+ u32 val;
if (!_id) {
dev_err(eth->dev, "missing mac id\n");
@@ -4406,6 +4382,15 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
__set_bit(PHY_INTERFACE_MODE_TRGMII,
mac->phylink_config.supported_interfaces);
+ /* TRGMII is not permitted on MT7621 if using DDR2 */
+ if (MTK_HAS_CAPS(mac->hw->soc->caps, MTK_GMAC1_TRGMII) &&
+ MTK_HAS_CAPS(mac->hw->soc->caps, MTK_TRGMII_MT7621_CLK)) {
+ regmap_read(eth->ethsys, ETHSYS_SYSCFG, &val);
+ if (val & SYSCFG_DRAM_TYPE_DDR2)
+ __clear_bit(PHY_INTERFACE_MODE_TRGMII,
+ mac->phylink_config.supported_interfaces);
+ }
+
if (MTK_HAS_CAPS(mac->hw->soc->caps, MTK_SGMII)) {
__set_bit(PHY_INTERFACE_MODE_SGMII,
mac->phylink_config.supported_interfaces);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 544e09b97483..034733b13b1a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -323,7 +323,7 @@ struct mlx4_en_tx_ring {
struct mlx4_en_rx_desc {
/* actual number of entries depends on rx ring stride */
- struct mlx4_wqe_data_seg data[0];
+ DECLARE_FLEX_ARRAY(struct mlx4_wqe_data_seg, data);
};
struct mlx4_en_rx_ring {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 8d4e25cc54ea..6c2f1d4a58ab 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -77,6 +77,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH) += esw/acl/helper.o \
mlx5_core-$(CONFIG_MLX5_BRIDGE) += esw/bridge.o en/rep/bridge.o
+mlx5_core-$(CONFIG_THERMAL) += thermal.o
mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o
mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o
mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
index c5d2fdcabd56..1ee2a472e1d2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -494,6 +494,61 @@ static int mlx5_devlink_eq_depth_validate(struct devlink *devlink, u32 id,
return (val.vu32 >= 64 && val.vu32 <= 4096) ? 0 : -EINVAL;
}
+static int
+mlx5_devlink_hairpin_num_queues_validate(struct devlink *devlink, u32 id,
+ union devlink_param_value val,
+ struct netlink_ext_ack *extack)
+{
+ return val.vu32 ? 0 : -EINVAL;
+}
+
+static int
+mlx5_devlink_hairpin_queue_size_validate(struct devlink *devlink, u32 id,
+ union devlink_param_value val,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ u32 val32 = val.vu32;
+
+ if (!is_power_of_2(val32)) {
+ NL_SET_ERR_MSG_MOD(extack, "Value is not power of two");
+ return -EINVAL;
+ }
+
+ if (val32 > BIT(MLX5_CAP_GEN(dev, log_max_hairpin_num_packets))) {
+ NL_SET_ERR_MSG_FMT_MOD(
+ extack, "Maximum hairpin queue size is %lu",
+ BIT(MLX5_CAP_GEN(dev, log_max_hairpin_num_packets)));
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void mlx5_devlink_hairpin_params_init_values(struct devlink *devlink)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ union devlink_param_value value;
+ u64 link_speed64;
+ u32 link_speed;
+
+ /* set hairpin pair per each 50Gbs share of the link */
+ mlx5_port_max_linkspeed(dev, &link_speed);
+ link_speed = max_t(u32, link_speed, 50000);
+ link_speed64 = link_speed;
+ do_div(link_speed64, 50000);
+
+ value.vu32 = link_speed64;
+ devl_param_driverinit_value_set(
+ devlink, MLX5_DEVLINK_PARAM_ID_HAIRPIN_NUM_QUEUES, value);
+
+ value.vu32 =
+ BIT(min_t(u32, 16 - MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(dev),
+ MLX5_CAP_GEN(dev, log_max_hairpin_num_packets)));
+ devl_param_driverinit_value_set(
+ devlink, MLX5_DEVLINK_PARAM_ID_HAIRPIN_QUEUE_SIZE, value);
+}
+
static const struct devlink_param mlx5_devlink_params[] = {
DEVLINK_PARAM_GENERIC(ENABLE_ROCE, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
NULL, NULL, mlx5_devlink_enable_roce_validate),
@@ -547,6 +602,14 @@ static void mlx5_devlink_set_params_init_values(struct devlink *devlink)
static const struct devlink_param mlx5_devlink_eth_params[] = {
DEVLINK_PARAM_GENERIC(ENABLE_ETH, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
NULL, NULL, NULL),
+ DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_HAIRPIN_NUM_QUEUES,
+ "hairpin_num_queues", DEVLINK_PARAM_TYPE_U32,
+ BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL,
+ mlx5_devlink_hairpin_num_queues_validate),
+ DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_HAIRPIN_QUEUE_SIZE,
+ "hairpin_queue_size", DEVLINK_PARAM_TYPE_U32,
+ BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL,
+ mlx5_devlink_hairpin_queue_size_validate),
};
static int mlx5_devlink_eth_params_register(struct devlink *devlink)
@@ -567,6 +630,9 @@ static int mlx5_devlink_eth_params_register(struct devlink *devlink)
devl_param_driverinit_value_set(devlink,
DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH,
value);
+
+ mlx5_devlink_hairpin_params_init_values(devlink);
+
return 0;
}
@@ -805,6 +871,11 @@ int mlx5_devlink_params_register(struct devlink *devlink)
{
int err;
+ /* Here only the driver init params should be registered.
+ * Runtime params should be registered by the code which
+ * behaviour they configure.
+ */
+
err = devl_params_register(devlink, mlx5_devlink_params,
ARRAY_SIZE(mlx5_devlink_params));
if (err)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h
index 212b12424146..5dcfb4d86d8a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h
@@ -12,6 +12,8 @@ enum mlx5_devlink_param_id {
MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM,
MLX5_DEVLINK_PARAM_ID_ESW_PORT_METADATA,
MLX5_DEVLINK_PARAM_ID_ESW_MULTIPORT,
+ MLX5_DEVLINK_PARAM_ID_HAIRPIN_NUM_QUEUES,
+ MLX5_DEVLINK_PARAM_ID_HAIRPIN_QUEUE_SIZE,
};
struct mlx5_trap_ctx {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 4a19ef4a9811..02237e630d13 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -335,15 +335,20 @@ static inline u8 mlx5e_get_dcb_num_tc(struct mlx5e_params *params)
params->mqprio.num_tc : 1;
}
+/* Keep this enum consistent with the corresponding strings array
+ * declared in en/reporter_rx.c
+ */
enum {
- MLX5E_RQ_STATE_ENABLED,
+ MLX5E_RQ_STATE_ENABLED = 0,
MLX5E_RQ_STATE_RECOVERING,
- MLX5E_RQ_STATE_AM,
+ MLX5E_RQ_STATE_DIM,
MLX5E_RQ_STATE_NO_CSUM_COMPLETE,
MLX5E_RQ_STATE_CSUM_FULL, /* cqe_csum_full hw bit is set */
MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, /* set when mini_cqe_resp_stride_index cap is used */
MLX5E_RQ_STATE_SHAMPO, /* set when SHAMPO cap is used */
MLX5E_RQ_STATE_MINI_CQE_ENHANCED, /* set when enhanced mini_cqe_cap is used */
+ MLX5E_RQ_STATE_XSK, /* set to indicate an xsk rq */
+ MLX5E_NUM_RQ_STATES, /* Must be kept last */
};
struct mlx5e_cq {
@@ -384,16 +389,20 @@ struct mlx5e_sq_dma {
enum mlx5e_dma_map_type type;
};
+/* Keep this enum consistent with with the corresponding strings array
+ * declared in en/reporter_tx.c
+ */
enum {
- MLX5E_SQ_STATE_ENABLED,
+ MLX5E_SQ_STATE_ENABLED = 0,
MLX5E_SQ_STATE_MPWQE,
MLX5E_SQ_STATE_RECOVERING,
MLX5E_SQ_STATE_IPSEC,
- MLX5E_SQ_STATE_AM,
+ MLX5E_SQ_STATE_DIM,
MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE,
MLX5E_SQ_STATE_PENDING_XSK_TX,
MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC,
MLX5E_SQ_STATE_XDP_MULTIBUF,
+ MLX5E_NUM_SQ_STATES, /* Must be kept last */
};
struct mlx5e_tx_mpwqe {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index a21bd1179477..561da78d3b5c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -553,7 +553,7 @@ bool slow_pci_heuristic(struct mlx5_core_dev *mdev)
u32 link_speed = 0;
u32 pci_bw = 0;
- mlx5e_port_max_linkspeed(mdev, &link_speed);
+ mlx5_port_max_linkspeed(mdev, &link_speed);
pci_bw = pcie_bandwidth_available(mdev->pdev, NULL, NULL, NULL);
mlx5_core_dbg_once(mdev, "Max link speed = %d, PCI BW = %d\n",
link_speed, pci_bw);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
index 505ba41195b9..dbe2b19a9570 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
@@ -32,101 +32,6 @@
#include "port.h"
-/* speed in units of 1Mb */
-static const u32 mlx5e_link_speed[MLX5E_LINK_MODES_NUMBER] = {
- [MLX5E_1000BASE_CX_SGMII] = 1000,
- [MLX5E_1000BASE_KX] = 1000,
- [MLX5E_10GBASE_CX4] = 10000,
- [MLX5E_10GBASE_KX4] = 10000,
- [MLX5E_10GBASE_KR] = 10000,
- [MLX5E_20GBASE_KR2] = 20000,
- [MLX5E_40GBASE_CR4] = 40000,
- [MLX5E_40GBASE_KR4] = 40000,
- [MLX5E_56GBASE_R4] = 56000,
- [MLX5E_10GBASE_CR] = 10000,
- [MLX5E_10GBASE_SR] = 10000,
- [MLX5E_10GBASE_ER] = 10000,
- [MLX5E_40GBASE_SR4] = 40000,
- [MLX5E_40GBASE_LR4] = 40000,
- [MLX5E_50GBASE_SR2] = 50000,
- [MLX5E_100GBASE_CR4] = 100000,
- [MLX5E_100GBASE_SR4] = 100000,
- [MLX5E_100GBASE_KR4] = 100000,
- [MLX5E_100GBASE_LR4] = 100000,
- [MLX5E_100BASE_TX] = 100,
- [MLX5E_1000BASE_T] = 1000,
- [MLX5E_10GBASE_T] = 10000,
- [MLX5E_25GBASE_CR] = 25000,
- [MLX5E_25GBASE_KR] = 25000,
- [MLX5E_25GBASE_SR] = 25000,
- [MLX5E_50GBASE_CR2] = 50000,
- [MLX5E_50GBASE_KR2] = 50000,
-};
-
-static const u32 mlx5e_ext_link_speed[MLX5E_EXT_LINK_MODES_NUMBER] = {
- [MLX5E_SGMII_100M] = 100,
- [MLX5E_1000BASE_X_SGMII] = 1000,
- [MLX5E_5GBASE_R] = 5000,
- [MLX5E_10GBASE_XFI_XAUI_1] = 10000,
- [MLX5E_40GBASE_XLAUI_4_XLPPI_4] = 40000,
- [MLX5E_25GAUI_1_25GBASE_CR_KR] = 25000,
- [MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2] = 50000,
- [MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR] = 50000,
- [MLX5E_CAUI_4_100GBASE_CR4_KR4] = 100000,
- [MLX5E_100GAUI_2_100GBASE_CR2_KR2] = 100000,
- [MLX5E_200GAUI_4_200GBASE_CR4_KR4] = 200000,
- [MLX5E_400GAUI_8] = 400000,
- [MLX5E_100GAUI_1_100GBASE_CR_KR] = 100000,
- [MLX5E_200GAUI_2_200GBASE_CR2_KR2] = 200000,
- [MLX5E_400GAUI_4_400GBASE_CR4_KR4] = 400000,
-};
-
-bool mlx5e_ptys_ext_supported(struct mlx5_core_dev *mdev)
-{
- struct mlx5e_port_eth_proto eproto;
- int err;
-
- if (MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet))
- return true;
-
- err = mlx5_port_query_eth_proto(mdev, 1, true, &eproto);
- if (err)
- return false;
-
- return !!eproto.cap;
-}
-
-static void mlx5e_port_get_speed_arr(struct mlx5_core_dev *mdev,
- const u32 **arr, u32 *size,
- bool force_legacy)
-{
- bool ext = force_legacy ? false : mlx5e_ptys_ext_supported(mdev);
-
- *size = ext ? ARRAY_SIZE(mlx5e_ext_link_speed) :
- ARRAY_SIZE(mlx5e_link_speed);
- *arr = ext ? mlx5e_ext_link_speed : mlx5e_link_speed;
-}
-
-int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext,
- struct mlx5e_port_eth_proto *eproto)
-{
- u32 out[MLX5_ST_SZ_DW(ptys_reg)];
- int err;
-
- if (!eproto)
- return -EINVAL;
-
- err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, port);
- if (err)
- return err;
-
- eproto->cap = MLX5_GET_ETH_PROTO(ptys_reg, out, ext,
- eth_proto_capability);
- eproto->admin = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_admin);
- eproto->oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_oper);
- return 0;
-}
-
void mlx5_port_query_eth_autoneg(struct mlx5_core_dev *dev, u8 *an_status,
u8 *an_disable_cap, u8 *an_disable_admin)
{
@@ -172,30 +77,14 @@ int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable,
sizeof(out), MLX5_REG_PTYS, 0, 1);
}
-u32 mlx5e_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper,
- bool force_legacy)
-{
- unsigned long temp = eth_proto_oper;
- const u32 *table;
- u32 speed = 0;
- u32 max_size;
- int i;
-
- mlx5e_port_get_speed_arr(mdev, &table, &max_size, force_legacy);
- i = find_first_bit(&temp, max_size);
- if (i < max_size)
- speed = table[i];
- return speed;
-}
-
int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed)
{
- struct mlx5e_port_eth_proto eproto;
+ struct mlx5_port_eth_proto eproto;
bool force_legacy = false;
bool ext;
int err;
- ext = mlx5e_ptys_ext_supported(mdev);
+ ext = mlx5_ptys_ext_supported(mdev);
err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto);
if (err)
goto out;
@@ -205,7 +94,7 @@ int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed)
if (err)
goto out;
}
- *speed = mlx5e_port_ptys2speed(mdev, eproto.oper, force_legacy);
+ *speed = mlx5_port_ptys2speed(mdev, eproto.oper, force_legacy);
if (!(*speed))
err = -EINVAL;
@@ -213,46 +102,6 @@ out:
return err;
}
-int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed)
-{
- struct mlx5e_port_eth_proto eproto;
- u32 max_speed = 0;
- const u32 *table;
- u32 max_size;
- bool ext;
- int err;
- int i;
-
- ext = mlx5e_ptys_ext_supported(mdev);
- err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto);
- if (err)
- return err;
-
- mlx5e_port_get_speed_arr(mdev, &table, &max_size, false);
- for (i = 0; i < max_size; ++i)
- if (eproto.cap & MLX5E_PROT_MASK(i))
- max_speed = max(max_speed, table[i]);
-
- *speed = max_speed;
- return 0;
-}
-
-u32 mlx5e_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed,
- bool force_legacy)
-{
- u32 link_modes = 0;
- const u32 *table;
- u32 max_size;
- int i;
-
- mlx5e_port_get_speed_arr(mdev, &table, &max_size, force_legacy);
- for (i = 0; i < max_size; ++i) {
- if (table[i] == speed)
- link_modes |= MLX5E_PROT_MASK(i);
- }
- return link_modes;
-}
-
int mlx5e_port_query_pbmc(struct mlx5_core_dev *mdev, void *out)
{
int sz = MLX5_ST_SZ_BYTES(pbmc_reg);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
index 3f474e370828..d1da225f35da 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
@@ -36,25 +36,11 @@
#include <linux/mlx5/driver.h>
#include "en.h"
-struct mlx5e_port_eth_proto {
- u32 cap;
- u32 admin;
- u32 oper;
-};
-
-int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext,
- struct mlx5e_port_eth_proto *eproto);
void mlx5_port_query_eth_autoneg(struct mlx5_core_dev *dev, u8 *an_status,
u8 *an_disable_cap, u8 *an_disable_admin);
int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable,
u32 proto_admin, bool ext);
-u32 mlx5e_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper,
- bool force_legacy);
int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
-int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
-u32 mlx5e_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed,
- bool force_legacy);
-bool mlx5e_ptys_ext_supported(struct mlx5_core_dev *mdev);
int mlx5e_port_query_pbmc(struct mlx5_core_dev *mdev, void *out);
int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in);
int mlx5e_port_query_sbpr(struct mlx5_core_dev *mdev, u32 desc, u8 dir,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
index 8f7452dc00ee..19c4a83982ca 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
@@ -426,39 +426,58 @@ static bool mlx5e_rep_macvlan_mode_supported(const struct net_device *dev)
return macvlan->mode == MACVLAN_MODE_PASSTHRU;
}
-static int
-mlx5e_rep_indr_setup_block(struct net_device *netdev, struct Qdisc *sch,
- struct mlx5e_rep_priv *rpriv,
- struct flow_block_offload *f,
- flow_setup_cb_t *setup_cb,
- void *data,
- void (*cleanup)(struct flow_block_cb *block_cb))
+static bool
+mlx5e_rep_check_indr_block_supported(struct mlx5e_rep_priv *rpriv,
+ struct net_device *netdev,
+ struct flow_block_offload *f)
{
struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- bool is_ovs_int_port = netif_is_ovs_master(netdev);
- struct mlx5e_rep_indr_block_priv *indr_priv;
- struct flow_block_cb *block_cb;
+ struct net_device *macvlan_real_dev;
- if (!mlx5e_tc_tun_device_to_offload(priv, netdev) &&
- !(is_vlan_dev(netdev) && vlan_dev_real_dev(netdev) == rpriv->netdev) &&
- !is_ovs_int_port) {
- if (!(netif_is_macvlan(netdev) && macvlan_dev_real_dev(netdev) == rpriv->netdev))
- return -EOPNOTSUPP;
+ if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS &&
+ f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
+ return false;
+
+ if (mlx5e_tc_tun_device_to_offload(priv, netdev))
+ return true;
+
+ if (is_vlan_dev(netdev) && vlan_dev_real_dev(netdev) == rpriv->netdev)
+ return true;
+
+ if (netif_is_macvlan(netdev)) {
if (!mlx5e_rep_macvlan_mode_supported(netdev)) {
netdev_warn(netdev, "Offloading ingress filter is supported only with macvlan passthru mode");
- return -EOPNOTSUPP;
+ return false;
}
+
+ macvlan_real_dev = macvlan_dev_real_dev(netdev);
+
+ if (macvlan_real_dev == rpriv->netdev)
+ return true;
+ if (netif_is_bond_master(macvlan_real_dev))
+ return true;
}
- if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS &&
- f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
- return -EOPNOTSUPP;
+ if (netif_is_ovs_master(netdev) && f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS &&
+ mlx5e_tc_int_port_supported(esw))
+ return true;
- if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS && !is_ovs_int_port)
- return -EOPNOTSUPP;
+ return false;
+}
+
+static int
+mlx5e_rep_indr_setup_block(struct net_device *netdev, struct Qdisc *sch,
+ struct mlx5e_rep_priv *rpriv,
+ struct flow_block_offload *f,
+ flow_setup_cb_t *setup_cb,
+ void *data,
+ void (*cleanup)(struct flow_block_cb *block_cb))
+{
+ struct mlx5e_rep_indr_block_priv *indr_priv;
+ struct flow_block_cb *block_cb;
- if (is_ovs_int_port && !mlx5e_tc_int_port_supported(esw))
+ if (!mlx5e_rep_check_indr_block_supported(rpriv, netdev, f))
return -EOPNOTSUPP;
f->unlocked_driver_cb = true;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
index c462fe76495b..b621f735cdc3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
@@ -8,6 +8,19 @@
#include "ptp.h"
#include "lib/tout.h"
+/* Keep this string array consistent with the MLX5E_RQ_STATE_* enums in en.h */
+static const char * const rq_sw_state_type_name[] = {
+ [MLX5E_RQ_STATE_ENABLED] = "enabled",
+ [MLX5E_RQ_STATE_RECOVERING] = "recovering",
+ [MLX5E_RQ_STATE_DIM] = "dim",
+ [MLX5E_RQ_STATE_NO_CSUM_COMPLETE] = "no_csum_complete",
+ [MLX5E_RQ_STATE_CSUM_FULL] = "csum_full",
+ [MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX] = "mini_cqe_hw_stridx",
+ [MLX5E_RQ_STATE_SHAMPO] = "shampo",
+ [MLX5E_RQ_STATE_MINI_CQE_ENHANCED] = "mini_cqe_enhanced",
+ [MLX5E_RQ_STATE_XSK] = "xsk",
+};
+
static int mlx5e_query_rq_state(struct mlx5_core_dev *dev, u32 rqn, u8 *state)
{
int outlen = MLX5_ST_SZ_BYTES(query_rq_out);
@@ -239,6 +252,35 @@ static int mlx5e_reporter_icosq_diagnose(struct mlx5e_icosq *icosq, u8 hw_state,
return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
}
+static int mlx5e_health_rq_put_sw_state(struct devlink_fmsg *fmsg, struct mlx5e_rq *rq)
+{
+ int err;
+ int i;
+
+ BUILD_BUG_ON_MSG(ARRAY_SIZE(rq_sw_state_type_name) != MLX5E_NUM_RQ_STATES,
+ "rq_sw_state_type_name string array must be consistent with MLX5E_RQ_STATE_* enum in en.h");
+ err = devlink_fmsg_obj_nest_start(fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SW State");
+ if (err)
+ return err;
+
+ for (i = 0; i < ARRAY_SIZE(rq_sw_state_type_name); ++i) {
+ err = devlink_fmsg_u32_pair_put(fmsg, rq_sw_state_type_name[i],
+ test_bit(i, &rq->state));
+ if (err)
+ return err;
+ }
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return devlink_fmsg_obj_nest_end(fmsg);
+}
+
static int
mlx5e_rx_reporter_build_diagnose_output_rq_common(struct mlx5e_rq *rq,
struct devlink_fmsg *fmsg)
@@ -265,10 +307,6 @@ mlx5e_rx_reporter_build_diagnose_output_rq_common(struct mlx5e_rq *rq,
if (err)
return err;
- err = devlink_fmsg_u8_pair_put(fmsg, "SW state", rq->state);
- if (err)
- return err;
-
err = devlink_fmsg_u32_pair_put(fmsg, "WQE counter", wqe_counter);
if (err)
return err;
@@ -281,6 +319,10 @@ mlx5e_rx_reporter_build_diagnose_output_rq_common(struct mlx5e_rq *rq,
if (err)
return err;
+ err = mlx5e_health_rq_put_sw_state(fmsg, rq);
+ if (err)
+ return err;
+
err = mlx5e_health_cq_diag_fmsg(&rq->cq, fmsg);
if (err)
return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
index 34666e2b3871..44c1926843a1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -6,6 +6,19 @@
#include "en/devlink.h"
#include "lib/tout.h"
+/* Keep this string array consistent with the MLX5E_SQ_STATE_* enums in en.h */
+static const char * const sq_sw_state_type_name[] = {
+ [MLX5E_SQ_STATE_ENABLED] = "enabled",
+ [MLX5E_SQ_STATE_MPWQE] = "mpwqe",
+ [MLX5E_SQ_STATE_RECOVERING] = "recovering",
+ [MLX5E_SQ_STATE_IPSEC] = "ipsec",
+ [MLX5E_SQ_STATE_DIM] = "dim",
+ [MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE] = "vlan_need_l2_inline",
+ [MLX5E_SQ_STATE_PENDING_XSK_TX] = "pending_xsk_tx",
+ [MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC] = "pending_tls_rx_resync",
+ [MLX5E_SQ_STATE_XDP_MULTIBUF] = "xdp_multibuf",
+};
+
static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq)
{
struct mlx5_core_dev *dev = sq->mdev;
@@ -37,6 +50,35 @@ static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq)
sq->pc = 0;
}
+static int mlx5e_health_sq_put_sw_state(struct devlink_fmsg *fmsg, struct mlx5e_txqsq *sq)
+{
+ int err;
+ int i;
+
+ BUILD_BUG_ON_MSG(ARRAY_SIZE(sq_sw_state_type_name) != MLX5E_NUM_SQ_STATES,
+ "sq_sw_state_type_name string array must be consistent with MLX5E_SQ_STATE_* enum in en.h");
+ err = devlink_fmsg_obj_nest_start(fmsg);
+ if (err)
+ return err;
+
+ err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SW State");
+ if (err)
+ return err;
+
+ for (i = 0; i < ARRAY_SIZE(sq_sw_state_type_name); ++i) {
+ err = devlink_fmsg_u32_pair_put(fmsg, sq_sw_state_type_name[i],
+ test_bit(i, &sq->state));
+ if (err)
+ return err;
+ }
+
+ err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return devlink_fmsg_obj_nest_end(fmsg);
+}
+
static int mlx5e_tx_reporter_err_cqe_recover(void *ctx)
{
struct mlx5_core_dev *mdev;
@@ -190,6 +232,10 @@ mlx5e_tx_reporter_build_diagnose_output_sq_common(struct devlink_fmsg *fmsg,
if (err)
return err;
+ err = mlx5e_health_sq_put_sw_state(fmsg, sq);
+ if (err)
+ return err;
+
err = mlx5e_health_cq_diag_fmsg(&sq->cq, fmsg);
if (err)
return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c
index 07cc65596f89..291193f7120d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c
@@ -234,6 +234,9 @@ parse_mirred(struct mlx5e_tc_act_parse_state *parse_state,
if (mlx5_lag_mpesw_do_mirred(priv->mdev, out_dev, extack))
return -EOPNOTSUPP;
+ if (netif_is_macvlan(out_dev))
+ out_dev = macvlan_dev_real_dev(out_dev);
+
out_dev = get_fdb_out_dev(uplink_dev, out_dev);
if (!out_dev)
return -ENODEV;
@@ -250,9 +253,6 @@ parse_mirred(struct mlx5e_tc_act_parse_state *parse_state,
return err;
}
- if (netif_is_macvlan(out_dev))
- out_dev = macvlan_dev_real_dev(out_dev);
-
err = verify_uplink_forwarding(priv, attr, out_dev, extack);
if (err)
return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
index b9c2f67d3794..816ea83e6413 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -86,7 +86,7 @@ void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq);
static inline bool
mlx5e_skb_fifo_has_room(struct mlx5e_skb_fifo *fifo)
{
- return (u16)(*fifo->pc - *fifo->cc) < fifo->mask;
+ return (u16)(*fifo->pc - *fifo->cc) <= fifo->mask;
}
static inline bool
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
index 81a567e17264..ed279f450976 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
@@ -93,13 +93,19 @@ static int mlx5e_open_xsk_rq(struct mlx5e_channel *c, struct mlx5e_params *param
struct mlx5e_rq_param *rq_params, struct xsk_buff_pool *pool,
struct mlx5e_xsk_param *xsk)
{
+ struct mlx5e_rq *xskrq = &c->xskrq;
int err;
- err = mlx5e_init_xsk_rq(c, params, pool, xsk, &c->xskrq);
+ err = mlx5e_init_xsk_rq(c, params, pool, xsk, xskrq);
if (err)
return err;
- return mlx5e_open_rq(params, rq_params, xsk, cpu_to_node(c->cpu), &c->xskrq);
+ err = mlx5e_open_rq(params, rq_params, xsk, cpu_to_node(c->cpu), xskrq);
+ if (err)
+ return err;
+
+ __set_bit(MLX5E_RQ_STATE_XSK, &xskrq->state);
+ return 0;
}
int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 79fd21ecb9cb..1f5a2110d31f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -220,7 +220,7 @@ static void mlx5e_ethtool_get_speed_arr(struct mlx5_core_dev *mdev,
struct ptys2ethtool_config **arr,
u32 *size)
{
- bool ext = mlx5e_ptys_ext_supported(mdev);
+ bool ext = mlx5_ptys_ext_supported(mdev);
*arr = ext ? ptys2ext_ethtool_table : ptys2legacy_ethtool_table;
*size = ext ? ARRAY_SIZE(ptys2ext_ethtool_table) :
@@ -895,7 +895,7 @@ static void get_speed_duplex(struct net_device *netdev,
if (!netif_carrier_ok(netdev))
goto out;
- speed = mlx5e_port_ptys2speed(priv->mdev, eth_proto_oper, force_legacy);
+ speed = mlx5_port_ptys2speed(priv->mdev, eth_proto_oper, force_legacy);
if (!speed) {
if (data_rate_oper)
speed = 100 * data_rate_oper;
@@ -980,7 +980,7 @@ static void get_lp_advertising(struct mlx5_core_dev *mdev, u32 eth_proto_lp,
struct ethtool_link_ksettings *link_ksettings)
{
unsigned long *lp_advertising = link_ksettings->link_modes.lp_advertising;
- bool ext = mlx5e_ptys_ext_supported(mdev);
+ bool ext = mlx5_ptys_ext_supported(mdev);
ptys2ethtool_adver_link(lp_advertising, eth_proto_lp, ext);
}
@@ -1160,7 +1160,7 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv,
const struct ethtool_link_ksettings *link_ksettings)
{
struct mlx5_core_dev *mdev = priv->mdev;
- struct mlx5e_port_eth_proto eproto;
+ struct mlx5_port_eth_proto eproto;
const unsigned long *adver;
bool an_changes = false;
u8 an_disable_admin;
@@ -1180,7 +1180,7 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv,
autoneg = link_ksettings->base.autoneg;
speed = link_ksettings->base.speed;
- ext_supported = mlx5e_ptys_ext_supported(mdev);
+ ext_supported = mlx5_ptys_ext_supported(mdev);
ext = ext_requested(autoneg, adver, ext_supported);
if (!ext_supported && ext)
return -EOPNOTSUPP;
@@ -1194,7 +1194,7 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv,
goto out;
}
link_modes = autoneg == AUTONEG_ENABLE ? ethtool2ptys_adver_func(adver) :
- mlx5e_port_speed2linkmodes(mdev, speed, !ext);
+ mlx5_port_speed2linkmodes(mdev, speed, !ext);
err = mlx5e_speed_validate(priv->netdev, ext, link_modes, autoneg);
if (err)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index a7f2ab22cc40..212e9a16fcf0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1188,7 +1188,7 @@ int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param,
__set_bit(MLX5E_RQ_STATE_CSUM_FULL, &rq->state);
if (params->rx_dim_enabled)
- __set_bit(MLX5E_RQ_STATE_AM, &rq->state);
+ __set_bit(MLX5E_RQ_STATE_DIM, &rq->state);
/* We disable csum_complete when XDP is enabled since
* XDP programs might manipulate packets which will render
@@ -1664,7 +1664,7 @@ int mlx5e_open_txqsq(struct mlx5e_channel *c, u32 tisn, int txq_ix,
mlx5e_set_sq_maxrate(c->netdev, sq, tx_rate);
if (params->tx_dim_enabled)
- sq->state |= BIT(MLX5E_SQ_STATE_AM);
+ sq->state |= BIT(MLX5E_SQ_STATE_DIM);
return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 6bfed633343a..f002a1fbb4e3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -44,6 +44,7 @@
#include <net/bareudp.h>
#include <net/bonding.h>
#include <net/dst_metadata.h>
+#include "devlink.h"
#include "en.h"
#include "en/tc/post_act.h"
#include "en/tc/act_stats.h"
@@ -73,12 +74,6 @@
#define MLX5E_TC_TABLE_NUM_GROUPS 4
#define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18)
-struct mlx5e_hairpin_params {
- struct mlx5_core_dev *mdev;
- u32 num_queues;
- u32 queue_size;
-};
-
struct mlx5e_tc_table {
/* Protects the dynamic assignment of the t parameter
* which is the nic tc root table.
@@ -101,7 +96,6 @@ struct mlx5e_tc_table {
struct mlx5_tc_ct_priv *ct;
struct mapping_ctx *mapping;
- struct mlx5e_hairpin_params hairpin_params;
struct dentry *dfs_root;
/* tc action stats */
@@ -589,6 +583,7 @@ struct mlx5e_hairpin {
struct mlx5e_tir direct_tir;
int num_channels;
+ u8 log_num_packets;
struct mlx5e_rqt indir_rqt;
struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS];
struct mlx5_ttc_table *ttc;
@@ -935,6 +930,7 @@ mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params
hp->func_mdev = func_mdev;
hp->func_priv = priv;
hp->num_channels = params->num_channels;
+ hp->log_num_packets = params->log_num_packets;
err = mlx5e_hairpin_create_transport(hp);
if (err)
@@ -1076,9 +1072,11 @@ static int debugfs_hairpin_table_dump_show(struct seq_file *file, void *priv)
mutex_lock(&tc->hairpin_tbl_lock);
hash_for_each(tc->hairpin_tbl, bkt, hpe, hairpin_hlist)
- seq_printf(file, "Hairpin peer_vhca_id %u prio %u refcnt %u\n",
+ seq_printf(file,
+ "Hairpin peer_vhca_id %u prio %u refcnt %u num_channels %u num_packets %lu\n",
hpe->peer_vhca_id, hpe->prio,
- refcount_read(&hpe->refcnt));
+ refcount_read(&hpe->refcnt), hpe->hp->num_channels,
+ BIT(hpe->hp->log_num_packets));
mutex_unlock(&tc->hairpin_tbl_lock);
return 0;
@@ -1099,33 +1097,15 @@ static void mlx5e_tc_debugfs_init(struct mlx5e_tc_table *tc,
&debugfs_hairpin_table_dump_fops);
}
-static void
-mlx5e_hairpin_params_init(struct mlx5e_hairpin_params *hairpin_params,
- struct mlx5_core_dev *mdev)
-{
- u64 link_speed64;
- u32 link_speed;
-
- hairpin_params->mdev = mdev;
- /* set hairpin pair per each 50Gbs share of the link */
- mlx5e_port_max_linkspeed(mdev, &link_speed);
- link_speed = max_t(u32, link_speed, 50000);
- link_speed64 = link_speed;
- do_div(link_speed64, 50000);
- hairpin_params->num_queues = link_speed64;
-
- hairpin_params->queue_size =
- BIT(min_t(u32, 16 - MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev),
- MLX5_CAP_GEN(mdev, log_max_hairpin_num_packets)));
-}
-
static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
struct mlx5e_tc_flow_parse_attr *parse_attr,
struct netlink_ext_ack *extack)
{
struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
+ struct devlink *devlink = priv_to_devlink(priv->mdev);
int peer_ifindex = parse_attr->mirred_ifindex[0];
+ union devlink_param_value val = {};
struct mlx5_hairpin_params params;
struct mlx5_core_dev *peer_mdev;
struct mlx5e_hairpin_entry *hpe;
@@ -1182,7 +1162,14 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
hash_hairpin_info(peer_id, match_prio));
mutex_unlock(&tc->hairpin_tbl_lock);
- params.log_num_packets = ilog2(tc->hairpin_params.queue_size);
+ err = devl_param_driverinit_value_get(
+ devlink, MLX5_DEVLINK_PARAM_ID_HAIRPIN_QUEUE_SIZE, &val);
+ if (err) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+
+ params.log_num_packets = ilog2(val.vu32);
params.log_data_size =
clamp_t(u32,
params.log_num_packets +
@@ -1191,7 +1178,14 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz));
params.q_counter = priv->q_counter;
- params.num_channels = tc->hairpin_params.num_queues;
+ err = devl_param_driverinit_value_get(
+ devlink, MLX5_DEVLINK_PARAM_ID_HAIRPIN_NUM_QUEUES, &val);
+ if (err) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+
+ params.num_channels = val.vu32;
hp = mlx5e_hairpin_create(priv, &params, peer_ifindex);
hpe->hp = hp;
@@ -5290,8 +5284,6 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
tc->ct = mlx5_tc_ct_init(priv, tc->chains, &tc->mod_hdr,
MLX5_FLOW_NAMESPACE_KERNEL, tc->post_act);
- mlx5e_hairpin_params_init(&tc->hairpin_params, dev);
-
tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event;
err = register_netdevice_notifier_dev_net(priv->netdev,
&tc->netdevice_nb,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index 9a458a5d9853..a50bfda18e96 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -51,7 +51,7 @@ static void mlx5e_handle_tx_dim(struct mlx5e_txqsq *sq)
struct mlx5e_sq_stats *stats = sq->stats;
struct dim_sample dim_sample = {};
- if (unlikely(!test_bit(MLX5E_SQ_STATE_AM, &sq->state)))
+ if (unlikely(!test_bit(MLX5E_SQ_STATE_DIM, &sq->state)))
return;
dim_update_sample(sq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample);
@@ -63,7 +63,7 @@ static void mlx5e_handle_rx_dim(struct mlx5e_rq *rq)
struct mlx5e_rq_stats *stats = rq->stats;
struct dim_sample dim_sample = {};
- if (unlikely(!test_bit(MLX5E_RQ_STATE_AM, &rq->state)))
+ if (unlikely(!test_bit(MLX5E_RQ_STATE_DIM, &rq->state)))
return;
dim_update_sample(rq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
index 75015d370922..7c79476cc5f9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
@@ -744,7 +744,7 @@ static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *
u64 value;
int err;
- err = mlx5e_port_max_linkspeed(mdev, &link_speed_max);
+ err = mlx5_port_max_linkspeed(mdev, &link_speed_max);
if (err) {
NL_SET_ERR_MSG_MOD(extack, "Failed to get link maximum speed");
return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index f9438d4e43ca..016c5f99c470 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -325,6 +325,10 @@ int mlx5_health_wait_pci_up(struct mlx5_core_dev *dev)
while (sensor_pci_not_working(dev)) {
if (time_after(jiffies, end))
return -ETIMEDOUT;
+ if (test_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state)) {
+ mlx5_core_warn(dev, "device is being removed, stop waiting for PCI\n");
+ return -ENODEV;
+ }
msleep(100);
}
return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index f1de152a6113..d39c3476b6d1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -52,6 +52,7 @@
#include <linux/version.h>
#include <net/devlink.h>
#include "mlx5_core.h"
+#include "thermal.h"
#include "lib/eq.h"
#include "fs_core.h"
#include "lib/mpfs.h"
@@ -191,7 +192,7 @@ static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili,
if (!(fw_initializing >> 31))
break;
if (time_after(jiffies, end) ||
- test_and_clear_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state)) {
+ test_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state)) {
err = -EBUSY;
break;
}
@@ -1768,6 +1769,10 @@ static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
if (err)
dev_err(&pdev->dev, "mlx5_crdump_enable failed with error code %d\n", err);
+ err = mlx5_thermal_init(dev);
+ if (err)
+ dev_err(&pdev->dev, "mlx5_thermal_init failed with error code %d\n", err);
+
pci_save_state(pdev);
devlink_register(devlink);
return 0;
@@ -1796,6 +1801,7 @@ static void remove_one(struct pci_dev *pdev)
mlx5_drain_fw_reset(dev);
devlink_unregister(devlink);
mlx5_sriov_disable(pdev);
+ mlx5_thermal_uninit(dev);
mlx5_crdump_disable(dev);
mlx5_drain_health_wq(dev);
mlx5_uninit_one(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index a1548e6bfb35..0daeb4b72cca 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -1054,3 +1054,154 @@ out:
kfree(out);
return err;
}
+
+/* speed in units of 1Mb */
+static const u32 mlx5e_link_speed[MLX5E_LINK_MODES_NUMBER] = {
+ [MLX5E_1000BASE_CX_SGMII] = 1000,
+ [MLX5E_1000BASE_KX] = 1000,
+ [MLX5E_10GBASE_CX4] = 10000,
+ [MLX5E_10GBASE_KX4] = 10000,
+ [MLX5E_10GBASE_KR] = 10000,
+ [MLX5E_20GBASE_KR2] = 20000,
+ [MLX5E_40GBASE_CR4] = 40000,
+ [MLX5E_40GBASE_KR4] = 40000,
+ [MLX5E_56GBASE_R4] = 56000,
+ [MLX5E_10GBASE_CR] = 10000,
+ [MLX5E_10GBASE_SR] = 10000,
+ [MLX5E_10GBASE_ER] = 10000,
+ [MLX5E_40GBASE_SR4] = 40000,
+ [MLX5E_40GBASE_LR4] = 40000,
+ [MLX5E_50GBASE_SR2] = 50000,
+ [MLX5E_100GBASE_CR4] = 100000,
+ [MLX5E_100GBASE_SR4] = 100000,
+ [MLX5E_100GBASE_KR4] = 100000,
+ [MLX5E_100GBASE_LR4] = 100000,
+ [MLX5E_100BASE_TX] = 100,
+ [MLX5E_1000BASE_T] = 1000,
+ [MLX5E_10GBASE_T] = 10000,
+ [MLX5E_25GBASE_CR] = 25000,
+ [MLX5E_25GBASE_KR] = 25000,
+ [MLX5E_25GBASE_SR] = 25000,
+ [MLX5E_50GBASE_CR2] = 50000,
+ [MLX5E_50GBASE_KR2] = 50000,
+};
+
+static const u32 mlx5e_ext_link_speed[MLX5E_EXT_LINK_MODES_NUMBER] = {
+ [MLX5E_SGMII_100M] = 100,
+ [MLX5E_1000BASE_X_SGMII] = 1000,
+ [MLX5E_5GBASE_R] = 5000,
+ [MLX5E_10GBASE_XFI_XAUI_1] = 10000,
+ [MLX5E_40GBASE_XLAUI_4_XLPPI_4] = 40000,
+ [MLX5E_25GAUI_1_25GBASE_CR_KR] = 25000,
+ [MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2] = 50000,
+ [MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR] = 50000,
+ [MLX5E_CAUI_4_100GBASE_CR4_KR4] = 100000,
+ [MLX5E_100GAUI_2_100GBASE_CR2_KR2] = 100000,
+ [MLX5E_200GAUI_4_200GBASE_CR4_KR4] = 200000,
+ [MLX5E_400GAUI_8] = 400000,
+ [MLX5E_100GAUI_1_100GBASE_CR_KR] = 100000,
+ [MLX5E_200GAUI_2_200GBASE_CR2_KR2] = 200000,
+ [MLX5E_400GAUI_4_400GBASE_CR4_KR4] = 400000,
+};
+
+int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext,
+ struct mlx5_port_eth_proto *eproto)
+{
+ u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+ int err;
+
+ if (!eproto)
+ return -EINVAL;
+
+ err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, port);
+ if (err)
+ return err;
+
+ eproto->cap = MLX5_GET_ETH_PROTO(ptys_reg, out, ext,
+ eth_proto_capability);
+ eproto->admin = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_admin);
+ eproto->oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_oper);
+ return 0;
+}
+
+bool mlx5_ptys_ext_supported(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_port_eth_proto eproto;
+ int err;
+
+ if (MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet))
+ return true;
+
+ err = mlx5_port_query_eth_proto(mdev, 1, true, &eproto);
+ if (err)
+ return false;
+
+ return !!eproto.cap;
+}
+
+static void mlx5e_port_get_speed_arr(struct mlx5_core_dev *mdev,
+ const u32 **arr, u32 *size,
+ bool force_legacy)
+{
+ bool ext = force_legacy ? false : mlx5_ptys_ext_supported(mdev);
+
+ *size = ext ? ARRAY_SIZE(mlx5e_ext_link_speed) :
+ ARRAY_SIZE(mlx5e_link_speed);
+ *arr = ext ? mlx5e_ext_link_speed : mlx5e_link_speed;
+}
+
+u32 mlx5_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper,
+ bool force_legacy)
+{
+ unsigned long temp = eth_proto_oper;
+ const u32 *table;
+ u32 speed = 0;
+ u32 max_size;
+ int i;
+
+ mlx5e_port_get_speed_arr(mdev, &table, &max_size, force_legacy);
+ i = find_first_bit(&temp, max_size);
+ if (i < max_size)
+ speed = table[i];
+ return speed;
+}
+
+u32 mlx5_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed,
+ bool force_legacy)
+{
+ u32 link_modes = 0;
+ const u32 *table;
+ u32 max_size;
+ int i;
+
+ mlx5e_port_get_speed_arr(mdev, &table, &max_size, force_legacy);
+ for (i = 0; i < max_size; ++i) {
+ if (table[i] == speed)
+ link_modes |= MLX5E_PROT_MASK(i);
+ }
+ return link_modes;
+}
+
+int mlx5_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed)
+{
+ struct mlx5_port_eth_proto eproto;
+ u32 max_speed = 0;
+ const u32 *table;
+ u32 max_size;
+ bool ext;
+ int err;
+ int i;
+
+ ext = mlx5_ptys_ext_supported(mdev);
+ err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto);
+ if (err)
+ return err;
+
+ mlx5e_port_get_speed_arr(mdev, &table, &max_size, false);
+ for (i = 0; i < max_size; ++i)
+ if (eproto.cap & MLX5E_PROT_MASK(i))
+ max_speed = max(max_speed, table[i]);
+
+ *speed = max_speed;
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/thermal.c b/drivers/net/ethernet/mellanox/mlx5/core/thermal.c
new file mode 100644
index 000000000000..e47fa6fb836f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/thermal.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/thermal.h>
+#include <linux/err.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+#include "thermal.h"
+
+#define MLX5_THERMAL_POLL_INT_MSEC 1000
+#define MLX5_THERMAL_NUM_TRIPS 0
+#define MLX5_THERMAL_ASIC_SENSOR_INDEX 0
+
+/* Bit string indicating the writeablility of trip points if any */
+#define MLX5_THERMAL_TRIP_MASK (BIT(MLX5_THERMAL_NUM_TRIPS) - 1)
+
+struct mlx5_thermal {
+ struct mlx5_core_dev *mdev;
+ struct thermal_zone_device *tzdev;
+};
+
+static int mlx5_thermal_get_mtmp_temp(struct mlx5_core_dev *mdev, u32 id, int *p_temp)
+{
+ u32 mtmp_out[MLX5_ST_SZ_DW(mtmp_reg)] = {};
+ u32 mtmp_in[MLX5_ST_SZ_DW(mtmp_reg)] = {};
+ int err;
+
+ MLX5_SET(mtmp_reg, mtmp_in, sensor_index, id);
+
+ err = mlx5_core_access_reg(mdev, mtmp_in, sizeof(mtmp_in),
+ mtmp_out, sizeof(mtmp_out),
+ MLX5_REG_MTMP, 0, 0);
+
+ if (err)
+ return err;
+
+ *p_temp = MLX5_GET(mtmp_reg, mtmp_out, temperature);
+
+ return 0;
+}
+
+static int mlx5_thermal_get_temp(struct thermal_zone_device *tzdev,
+ int *p_temp)
+{
+ struct mlx5_thermal *thermal = tzdev->devdata;
+ struct mlx5_core_dev *mdev = thermal->mdev;
+ int err;
+
+ err = mlx5_thermal_get_mtmp_temp(mdev, MLX5_THERMAL_ASIC_SENSOR_INDEX, p_temp);
+
+ if (err)
+ return err;
+
+ /* The unit of temp returned is in 0.125 C. The thermal
+ * framework expects the value in 0.001 C.
+ */
+ *p_temp *= 125;
+
+ return 0;
+}
+
+static struct thermal_zone_device_ops mlx5_thermal_ops = {
+ .get_temp = mlx5_thermal_get_temp,
+};
+
+int mlx5_thermal_init(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_thermal *thermal;
+ struct thermal_zone_device *tzd;
+ const char *data = "mlx5";
+
+ tzd = thermal_zone_get_zone_by_name(data);
+ if (!IS_ERR(tzd))
+ return 0;
+
+ thermal = kzalloc(sizeof(*thermal), GFP_KERNEL);
+ if (!thermal)
+ return -ENOMEM;
+
+ thermal->mdev = mdev;
+ thermal->tzdev = thermal_zone_device_register(data,
+ MLX5_THERMAL_NUM_TRIPS,
+ MLX5_THERMAL_TRIP_MASK,
+ thermal,
+ &mlx5_thermal_ops,
+ NULL, 0, MLX5_THERMAL_POLL_INT_MSEC);
+ if (IS_ERR(thermal->tzdev)) {
+ dev_err(mdev->device, "Failed to register thermal zone device (%s) %ld\n",
+ data, PTR_ERR(thermal->tzdev));
+ kfree(thermal);
+ return -EINVAL;
+ }
+
+ mdev->thermal = thermal;
+ return 0;
+}
+
+void mlx5_thermal_uninit(struct mlx5_core_dev *mdev)
+{
+ if (!mdev->thermal)
+ return;
+
+ thermal_zone_device_unregister(mdev->thermal->tzdev);
+ kfree(mdev->thermal);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/thermal.h b/drivers/net/ethernet/mellanox/mlx5/core/thermal.h
new file mode 100644
index 000000000000..7d752c122192
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/thermal.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+ * Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.
+ */
+#ifndef __MLX5_THERMAL_DRIVER_H
+#define __MLX5_THERMAL_DRIVER_H
+
+#if IS_ENABLED(CONFIG_THERMAL)
+int mlx5_thermal_init(struct mlx5_core_dev *mdev);
+void mlx5_thermal_uninit(struct mlx5_core_dev *mdev);
+#else
+static inline int mlx5_thermal_init(struct mlx5_core_dev *mdev)
+{
+ mdev->thermal = NULL;
+ return 0;
+}
+
+static inline void mlx5_thermal_uninit(struct mlx5_core_dev *mdev) { }
+#endif
+
+#endif /* __MLX5_THERMAL_DRIVER_H */
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
index 49f5159afbf3..cbdae0ab8bb6 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
@@ -92,6 +92,11 @@
#define SE_IDX_QUEUE 0 /* 0-79 : Queue scheduler elements */
#define SE_IDX_PORT 80 /* 80-89 : Port schedular elements */
+#define LAN966X_VCAP_CID_IS1_L0 VCAP_CID_INGRESS_L0 /* IS1 lookup 0 */
+#define LAN966X_VCAP_CID_IS1_L1 VCAP_CID_INGRESS_L1 /* IS1 lookup 1 */
+#define LAN966X_VCAP_CID_IS1_L2 VCAP_CID_INGRESS_L2 /* IS1 lookup 2 */
+#define LAN966X_VCAP_CID_IS1_MAX (VCAP_CID_INGRESS_L3 - 1) /* IS1 Max */
+
#define LAN966X_VCAP_CID_IS2_L0 VCAP_CID_INGRESS_STAGE2_L0 /* IS2 lookup 0 */
#define LAN966X_VCAP_CID_IS2_L1 VCAP_CID_INGRESS_STAGE2_L1 /* IS2 lookup 1 */
#define LAN966X_VCAP_CID_IS2_MAX (VCAP_CID_INGRESS_STAGE2_L2 - 1) /* IS2 Max */
@@ -139,6 +144,39 @@ enum vcap_is2_port_sel_ipv6 {
VCAP_IS2_PS_IPV6_MAC_ETYPE,
};
+enum vcap_is1_port_sel_other {
+ VCAP_IS1_PS_OTHER_NORMAL,
+ VCAP_IS1_PS_OTHER_7TUPLE,
+ VCAP_IS1_PS_OTHER_DBL_VID,
+ VCAP_IS1_PS_OTHER_DMAC_VID,
+};
+
+enum vcap_is1_port_sel_ipv4 {
+ VCAP_IS1_PS_IPV4_NORMAL,
+ VCAP_IS1_PS_IPV4_7TUPLE,
+ VCAP_IS1_PS_IPV4_5TUPLE_IP4,
+ VCAP_IS1_PS_IPV4_DBL_VID,
+ VCAP_IS1_PS_IPV4_DMAC_VID,
+};
+
+enum vcap_is1_port_sel_ipv6 {
+ VCAP_IS1_PS_IPV6_NORMAL,
+ VCAP_IS1_PS_IPV6_7TUPLE,
+ VCAP_IS1_PS_IPV6_5TUPLE_IP4,
+ VCAP_IS1_PS_IPV6_NORMAL_IP6,
+ VCAP_IS1_PS_IPV6_5TUPLE_IP6,
+ VCAP_IS1_PS_IPV6_DBL_VID,
+ VCAP_IS1_PS_IPV6_DMAC_VID,
+};
+
+enum vcap_is1_port_sel_rt {
+ VCAP_IS1_PS_RT_NORMAL,
+ VCAP_IS1_PS_RT_7TUPLE,
+ VCAP_IS1_PS_RT_DBL_VID,
+ VCAP_IS1_PS_RT_DMAC_VID,
+ VCAP_IS1_PS_RT_FOLLOW_OTHER = 7,
+};
+
struct lan966x_port;
struct lan966x_db {
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_police.c b/drivers/net/ethernet/microchip/lan966x/lan966x_police.c
index 7d66fe75cd3b..7302df2300fd 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_police.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_police.c
@@ -49,8 +49,7 @@ static int lan966x_police_add(struct lan966x_port *port,
return 0;
}
-static int lan966x_police_del(struct lan966x_port *port,
- u16 pol_idx)
+static void lan966x_police_del(struct lan966x_port *port, u16 pol_idx)
{
struct lan966x *lan966x = port->lan966x;
@@ -67,8 +66,6 @@ static int lan966x_police_del(struct lan966x_port *port,
lan_wr(ANA_POL_PIR_CFG_PIR_RATE_SET(GENMASK(14, 0)) |
ANA_POL_PIR_CFG_PIR_BURST_SET(0),
lan966x, ANA_POL_PIR_CFG(pol_idx));
-
- return 0;
}
static int lan966x_police_validate(struct lan966x_port *port,
@@ -186,7 +183,6 @@ int lan966x_police_port_del(struct lan966x_port *port,
struct netlink_ext_ack *extack)
{
struct lan966x *lan966x = port->lan966x;
- int err;
if (port->tc.police_id != police_id) {
NL_SET_ERR_MSG_MOD(extack,
@@ -194,12 +190,7 @@ int lan966x_police_port_del(struct lan966x_port *port,
return -EINVAL;
}
- err = lan966x_police_del(port, POL_IDX_PORT + port->chip_port);
- if (err) {
- NL_SET_ERR_MSG_MOD(extack,
- "Failed to add policer to port");
- return err;
- }
+ lan966x_police_del(port, POL_IDX_PORT + port->chip_port);
lan_rmw(ANA_POL_CFG_PORT_POL_ENA_SET(0) |
ANA_POL_CFG_POL_ORDER_SET(POL_ORDER),
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h b/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h
index 9767b5a1c958..f99f88b5caa8 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h
@@ -316,6 +316,42 @@ enum lan966x_target {
#define ANA_DROP_CFG_DROP_MC_SMAC_ENA_GET(x)\
FIELD_GET(ANA_DROP_CFG_DROP_MC_SMAC_ENA, x)
+/* ANA:PORT:VCAP_CFG */
+#define ANA_VCAP_CFG(g) __REG(TARGET_ANA, 0, 1, 28672, g, 9, 128, 12, 0, 1, 4)
+
+#define ANA_VCAP_CFG_S1_ENA BIT(14)
+#define ANA_VCAP_CFG_S1_ENA_SET(x)\
+ FIELD_PREP(ANA_VCAP_CFG_S1_ENA, x)
+#define ANA_VCAP_CFG_S1_ENA_GET(x)\
+ FIELD_GET(ANA_VCAP_CFG_S1_ENA, x)
+
+/* ANA:PORT:VCAP_S1_KEY_CFG */
+#define ANA_VCAP_S1_CFG(g, r) __REG(TARGET_ANA, 0, 1, 28672, g, 9, 128, 16, r, 3, 4)
+
+#define ANA_VCAP_S1_CFG_KEY_RT_CFG GENMASK(11, 9)
+#define ANA_VCAP_S1_CFG_KEY_RT_CFG_SET(x)\
+ FIELD_PREP(ANA_VCAP_S1_CFG_KEY_RT_CFG, x)
+#define ANA_VCAP_S1_CFG_KEY_RT_CFG_GET(x)\
+ FIELD_GET(ANA_VCAP_S1_CFG_KEY_RT_CFG, x)
+
+#define ANA_VCAP_S1_CFG_KEY_IP6_CFG GENMASK(8, 6)
+#define ANA_VCAP_S1_CFG_KEY_IP6_CFG_SET(x)\
+ FIELD_PREP(ANA_VCAP_S1_CFG_KEY_IP6_CFG, x)
+#define ANA_VCAP_S1_CFG_KEY_IP6_CFG_GET(x)\
+ FIELD_GET(ANA_VCAP_S1_CFG_KEY_IP6_CFG, x)
+
+#define ANA_VCAP_S1_CFG_KEY_IP4_CFG GENMASK(5, 3)
+#define ANA_VCAP_S1_CFG_KEY_IP4_CFG_SET(x)\
+ FIELD_PREP(ANA_VCAP_S1_CFG_KEY_IP4_CFG, x)
+#define ANA_VCAP_S1_CFG_KEY_IP4_CFG_GET(x)\
+ FIELD_GET(ANA_VCAP_S1_CFG_KEY_IP4_CFG, x)
+
+#define ANA_VCAP_S1_CFG_KEY_OTHER_CFG GENMASK(2, 0)
+#define ANA_VCAP_S1_CFG_KEY_OTHER_CFG_SET(x)\
+ FIELD_PREP(ANA_VCAP_S1_CFG_KEY_OTHER_CFG, x)
+#define ANA_VCAP_S1_CFG_KEY_OTHER_CFG_GET(x)\
+ FIELD_GET(ANA_VCAP_S1_CFG_KEY_OTHER_CFG, x)
+
/* ANA:PORT:VCAP_S2_CFG */
#define ANA_VCAP_S2_CFG(g) __REG(TARGET_ANA, 0, 1, 28672, g, 9, 128, 28, 0, 1, 4)
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c b/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c
index f960727ecaee..47b2f7579dd2 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c
@@ -5,14 +5,34 @@
#include "vcap_api_client.h"
#include "vcap_tc.h"
-static bool lan966x_tc_is_known_etype(u16 etype)
+static bool lan966x_tc_is_known_etype(struct vcap_tc_flower_parse_usage *st,
+ u16 etype)
{
- switch (etype) {
- case ETH_P_ALL:
- case ETH_P_ARP:
- case ETH_P_IP:
- case ETH_P_IPV6:
- return true;
+ switch (st->admin->vtype) {
+ case VCAP_TYPE_IS1:
+ switch (etype) {
+ case ETH_P_ALL:
+ case ETH_P_ARP:
+ case ETH_P_IP:
+ case ETH_P_IPV6:
+ return true;
+ }
+ break;
+ case VCAP_TYPE_IS2:
+ switch (etype) {
+ case ETH_P_ALL:
+ case ETH_P_ARP:
+ case ETH_P_IP:
+ case ETH_P_IPV6:
+ case ETH_P_SNAP:
+ case ETH_P_802_2:
+ return true;
+ }
+ break;
+ default:
+ NL_SET_ERR_MSG_MOD(st->fco->common.extack,
+ "VCAP type not supported");
+ return false;
}
return false;
@@ -69,7 +89,7 @@ lan966x_tc_flower_handler_basic_usage(struct vcap_tc_flower_parse_usage *st)
flow_rule_match_basic(st->frule, &match);
if (match.mask->n_proto) {
st->l3_proto = be16_to_cpu(match.key->n_proto);
- if (!lan966x_tc_is_known_etype(st->l3_proto)) {
+ if (!lan966x_tc_is_known_etype(st, st->l3_proto)) {
err = vcap_rule_add_key_u32(st->vrule, VCAP_KF_ETYPE,
st->l3_proto, ~0);
if (err)
@@ -79,18 +99,61 @@ lan966x_tc_flower_handler_basic_usage(struct vcap_tc_flower_parse_usage *st)
VCAP_BIT_1);
if (err)
goto out;
+ } else if (st->l3_proto == ETH_P_IPV6 &&
+ st->admin->vtype == VCAP_TYPE_IS1) {
+ /* Don't set any keys in this case */
+ } else if (st->l3_proto == ETH_P_SNAP &&
+ st->admin->vtype == VCAP_TYPE_IS1) {
+ err = vcap_rule_add_key_bit(st->vrule,
+ VCAP_KF_ETYPE_LEN_IS,
+ VCAP_BIT_0);
+ if (err)
+ goto out;
+
+ err = vcap_rule_add_key_bit(st->vrule,
+ VCAP_KF_IP_SNAP_IS,
+ VCAP_BIT_1);
+ if (err)
+ goto out;
+ } else if (st->admin->vtype == VCAP_TYPE_IS1) {
+ err = vcap_rule_add_key_bit(st->vrule,
+ VCAP_KF_ETYPE_LEN_IS,
+ VCAP_BIT_1);
+ if (err)
+ goto out;
+
+ err = vcap_rule_add_key_u32(st->vrule, VCAP_KF_ETYPE,
+ st->l3_proto, ~0);
+ if (err)
+ goto out;
}
}
if (match.mask->ip_proto) {
st->l4_proto = match.key->ip_proto;
if (st->l4_proto == IPPROTO_TCP) {
+ if (st->admin->vtype == VCAP_TYPE_IS1) {
+ err = vcap_rule_add_key_bit(st->vrule,
+ VCAP_KF_TCP_UDP_IS,
+ VCAP_BIT_1);
+ if (err)
+ goto out;
+ }
+
err = vcap_rule_add_key_bit(st->vrule,
VCAP_KF_TCP_IS,
VCAP_BIT_1);
if (err)
goto out;
} else if (st->l4_proto == IPPROTO_UDP) {
+ if (st->admin->vtype == VCAP_TYPE_IS1) {
+ err = vcap_rule_add_key_bit(st->vrule,
+ VCAP_KF_TCP_UDP_IS,
+ VCAP_BIT_1);
+ if (err)
+ goto out;
+ }
+
err = vcap_rule_add_key_bit(st->vrule,
VCAP_KF_TCP_IS,
VCAP_BIT_0);
@@ -113,11 +176,29 @@ out:
}
static int
+lan966x_tc_flower_handler_cvlan_usage(struct vcap_tc_flower_parse_usage *st)
+{
+ if (st->admin->vtype != VCAP_TYPE_IS1) {
+ NL_SET_ERR_MSG_MOD(st->fco->common.extack,
+ "cvlan not supported in this VCAP");
+ return -EINVAL;
+ }
+
+ return vcap_tc_flower_handler_cvlan_usage(st);
+}
+
+static int
lan966x_tc_flower_handler_vlan_usage(struct vcap_tc_flower_parse_usage *st)
{
- return vcap_tc_flower_handler_vlan_usage(st,
- VCAP_KF_8021Q_VID_CLS,
- VCAP_KF_8021Q_PCP_CLS);
+ enum vcap_key_field vid_key = VCAP_KF_8021Q_VID_CLS;
+ enum vcap_key_field pcp_key = VCAP_KF_8021Q_PCP_CLS;
+
+ if (st->admin->vtype == VCAP_TYPE_IS1) {
+ vid_key = VCAP_KF_8021Q_VID0;
+ pcp_key = VCAP_KF_8021Q_PCP0;
+ }
+
+ return vcap_tc_flower_handler_vlan_usage(st, vid_key, pcp_key);
}
static int
@@ -128,6 +209,7 @@ static int
[FLOW_DISSECTOR_KEY_CONTROL] = lan966x_tc_flower_handler_control_usage,
[FLOW_DISSECTOR_KEY_PORTS] = vcap_tc_flower_handler_portnum_usage,
[FLOW_DISSECTOR_KEY_BASIC] = lan966x_tc_flower_handler_basic_usage,
+ [FLOW_DISSECTOR_KEY_CVLAN] = lan966x_tc_flower_handler_cvlan_usage,
[FLOW_DISSECTOR_KEY_VLAN] = lan966x_tc_flower_handler_vlan_usage,
[FLOW_DISSECTOR_KEY_TCP] = vcap_tc_flower_handler_tcp_usage,
[FLOW_DISSECTOR_KEY_ARP] = vcap_tc_flower_handler_arp_usage,
@@ -143,6 +225,7 @@ static int lan966x_tc_flower_use_dissectors(struct flow_cls_offload *f,
.fco = f,
.vrule = vrule,
.l3_proto = ETH_P_ALL,
+ .admin = admin,
};
int err = 0;
@@ -221,6 +304,100 @@ static int lan966x_tc_flower_action_check(struct vcap_control *vctrl,
return 0;
}
+/* Add the actionset that is the default for the VCAP type */
+static int lan966x_tc_set_actionset(struct vcap_admin *admin,
+ struct vcap_rule *vrule)
+{
+ enum vcap_actionfield_set aset;
+ int err = 0;
+
+ switch (admin->vtype) {
+ case VCAP_TYPE_IS1:
+ aset = VCAP_AFS_S1;
+ break;
+ case VCAP_TYPE_IS2:
+ aset = VCAP_AFS_BASE_TYPE;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /* Do not overwrite any current actionset */
+ if (vrule->actionset == VCAP_AFS_NO_VALUE)
+ err = vcap_set_rule_set_actionset(vrule, aset);
+
+ return err;
+}
+
+static int lan966x_tc_add_rule_link_target(struct vcap_admin *admin,
+ struct vcap_rule *vrule,
+ int target_cid)
+{
+ int link_val = target_cid % VCAP_CID_LOOKUP_SIZE;
+ int err;
+
+ if (!link_val)
+ return 0;
+
+ switch (admin->vtype) {
+ case VCAP_TYPE_IS1:
+ /* Choose IS1 specific NXT_IDX key (for chaining rules from IS1) */
+ err = vcap_rule_add_key_u32(vrule, VCAP_KF_LOOKUP_GEN_IDX_SEL,
+ 1, ~0);
+ if (err)
+ return err;
+
+ return vcap_rule_add_key_u32(vrule, VCAP_KF_LOOKUP_GEN_IDX,
+ link_val, ~0);
+ case VCAP_TYPE_IS2:
+ /* Add IS2 specific PAG key (for chaining rules from IS1) */
+ return vcap_rule_add_key_u32(vrule, VCAP_KF_LOOKUP_PAG,
+ link_val, ~0);
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int lan966x_tc_add_rule_link(struct vcap_control *vctrl,
+ struct vcap_admin *admin,
+ struct vcap_rule *vrule,
+ struct flow_cls_offload *f,
+ int to_cid)
+{
+ struct vcap_admin *to_admin = vcap_find_admin(vctrl, to_cid);
+ int diff, err = 0;
+
+ if (!to_admin) {
+ NL_SET_ERR_MSG_MOD(f->common.extack,
+ "Unknown destination chain");
+ return -EINVAL;
+ }
+
+ diff = vcap_chain_offset(vctrl, f->common.chain_index, to_cid);
+ if (!diff)
+ return 0;
+
+ /* Between IS1 and IS2 the PAG value is used */
+ if (admin->vtype == VCAP_TYPE_IS1 && to_admin->vtype == VCAP_TYPE_IS2) {
+ /* This works for IS1->IS2 */
+ err = vcap_rule_add_action_u32(vrule, VCAP_AF_PAG_VAL, diff);
+ if (err)
+ return err;
+
+ err = vcap_rule_add_action_u32(vrule, VCAP_AF_PAG_OVERRIDE_MASK,
+ 0xff);
+ if (err)
+ return err;
+ } else {
+ NL_SET_ERR_MSG_MOD(f->common.extack,
+ "Unsupported chain destination");
+ return -EOPNOTSUPP;
+ }
+
+ return err;
+}
+
static int lan966x_tc_flower_add(struct lan966x_port *port,
struct flow_cls_offload *f,
struct vcap_admin *admin,
@@ -248,11 +425,23 @@ static int lan966x_tc_flower_add(struct lan966x_port *port,
if (err)
goto out;
+ err = lan966x_tc_add_rule_link_target(admin, vrule,
+ f->common.chain_index);
+ if (err)
+ goto out;
+
frule = flow_cls_offload_flow_rule(f);
flow_action_for_each(idx, act, &frule->action) {
switch (act->id) {
case FLOW_ACTION_TRAP:
+ if (admin->vtype != VCAP_TYPE_IS2) {
+ NL_SET_ERR_MSG_MOD(f->common.extack,
+ "Trap action not supported in this VCAP");
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
err = vcap_rule_add_action_bit(vrule,
VCAP_AF_CPU_COPY_ENA,
VCAP_BIT_1);
@@ -266,6 +455,16 @@ static int lan966x_tc_flower_add(struct lan966x_port *port,
break;
case FLOW_ACTION_GOTO:
+ err = lan966x_tc_set_actionset(admin, vrule);
+ if (err)
+ goto out;
+
+ err = lan966x_tc_add_rule_link(port->lan966x->vcap_ctrl,
+ admin, vrule,
+ f, act->chain_index);
+ if (err)
+ goto out;
+
break;
default:
NL_SET_ERR_MSG_MOD(f->common.extack,
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_ag_api.c b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_ag_api.c
index 928e711960e6..66400a082d02 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_ag_api.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_ag_api.c
@@ -6,6 +6,965 @@
#include "lan966x_vcap_ag_api.h"
/* keyfields */
+static const struct vcap_field is1_normal_keyfield[] = {
+ [VCAP_KF_TYPE] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 0,
+ .width = 1,
+ },
+ [VCAP_KF_LOOKUP_INDEX] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 1,
+ .width = 2,
+ },
+ [VCAP_KF_IF_IGR_PORT_MASK] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 3,
+ .width = 9,
+ },
+ [VCAP_KF_L2_MC_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 12,
+ .width = 1,
+ },
+ [VCAP_KF_L2_BC_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 13,
+ .width = 1,
+ },
+ [VCAP_KF_IP_MC_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 14,
+ .width = 1,
+ },
+ [VCAP_KF_8021CB_R_TAGGED_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 15,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_VLAN_TAGGED_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 16,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_VLAN_DBL_TAGGED_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 17,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_TPID0] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 18,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_VID0] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 19,
+ .width = 12,
+ },
+ [VCAP_KF_8021Q_DEI0] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 31,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_PCP0] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 32,
+ .width = 3,
+ },
+ [VCAP_KF_L2_SMAC] = {
+ .type = VCAP_FIELD_U48,
+ .offset = 35,
+ .width = 48,
+ },
+ [VCAP_KF_ETYPE_LEN_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 83,
+ .width = 1,
+ },
+ [VCAP_KF_ETYPE] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 84,
+ .width = 16,
+ },
+ [VCAP_KF_IP_SNAP_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 100,
+ .width = 1,
+ },
+ [VCAP_KF_IP4_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 101,
+ .width = 1,
+ },
+ [VCAP_KF_L3_FRAGMENT] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 102,
+ .width = 1,
+ },
+ [VCAP_KF_L3_FRAG_OFS_GT0] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 103,
+ .width = 1,
+ },
+ [VCAP_KF_L3_OPTIONS_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 104,
+ .width = 1,
+ },
+ [VCAP_KF_L3_DSCP] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 105,
+ .width = 6,
+ },
+ [VCAP_KF_L3_IP4_SIP] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 111,
+ .width = 32,
+ },
+ [VCAP_KF_TCP_UDP_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 143,
+ .width = 1,
+ },
+ [VCAP_KF_TCP_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 144,
+ .width = 1,
+ },
+ [VCAP_KF_L4_SPORT] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 145,
+ .width = 16,
+ },
+ [VCAP_KF_L4_RNG] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 161,
+ .width = 8,
+ },
+};
+
+static const struct vcap_field is1_5tuple_ip4_keyfield[] = {
+ [VCAP_KF_TYPE] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 0,
+ .width = 1,
+ },
+ [VCAP_KF_LOOKUP_INDEX] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 1,
+ .width = 2,
+ },
+ [VCAP_KF_IF_IGR_PORT_MASK] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 3,
+ .width = 9,
+ },
+ [VCAP_KF_L2_MC_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 12,
+ .width = 1,
+ },
+ [VCAP_KF_L2_BC_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 13,
+ .width = 1,
+ },
+ [VCAP_KF_IP_MC_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 14,
+ .width = 1,
+ },
+ [VCAP_KF_8021CB_R_TAGGED_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 15,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_VLAN_TAGGED_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 16,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_VLAN_DBL_TAGGED_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 17,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_TPID0] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 18,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_VID0] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 19,
+ .width = 12,
+ },
+ [VCAP_KF_8021Q_DEI0] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 31,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_PCP0] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 32,
+ .width = 3,
+ },
+ [VCAP_KF_8021Q_TPID1] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 35,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_VID1] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 36,
+ .width = 12,
+ },
+ [VCAP_KF_8021Q_DEI1] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 48,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_PCP1] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 49,
+ .width = 3,
+ },
+ [VCAP_KF_IP4_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 52,
+ .width = 1,
+ },
+ [VCAP_KF_L3_FRAGMENT] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 53,
+ .width = 1,
+ },
+ [VCAP_KF_L3_FRAG_OFS_GT0] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 54,
+ .width = 1,
+ },
+ [VCAP_KF_L3_OPTIONS_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 55,
+ .width = 1,
+ },
+ [VCAP_KF_L3_DSCP] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 56,
+ .width = 6,
+ },
+ [VCAP_KF_L3_IP4_DIP] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 62,
+ .width = 32,
+ },
+ [VCAP_KF_L3_IP4_SIP] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 94,
+ .width = 32,
+ },
+ [VCAP_KF_L3_IP_PROTO] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 126,
+ .width = 8,
+ },
+ [VCAP_KF_TCP_UDP_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 134,
+ .width = 1,
+ },
+ [VCAP_KF_TCP_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 135,
+ .width = 1,
+ },
+ [VCAP_KF_L4_RNG] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 136,
+ .width = 8,
+ },
+ [VCAP_KF_IP_PAYLOAD_5TUPLE] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 144,
+ .width = 32,
+ },
+};
+
+static const struct vcap_field is1_normal_ip6_keyfield[] = {
+ [VCAP_KF_TYPE] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 0,
+ .width = 2,
+ },
+ [VCAP_KF_LOOKUP_INDEX] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 2,
+ .width = 2,
+ },
+ [VCAP_KF_IF_IGR_PORT_MASK] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 4,
+ .width = 9,
+ },
+ [VCAP_KF_L2_MC_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 13,
+ .width = 1,
+ },
+ [VCAP_KF_L2_BC_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 14,
+ .width = 1,
+ },
+ [VCAP_KF_IP_MC_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 15,
+ .width = 1,
+ },
+ [VCAP_KF_8021CB_R_TAGGED_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 16,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_VLAN_TAGGED_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 17,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_VLAN_DBL_TAGGED_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 18,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_TPID0] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 19,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_VID0] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 20,
+ .width = 12,
+ },
+ [VCAP_KF_8021Q_DEI0] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 32,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_PCP0] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 33,
+ .width = 3,
+ },
+ [VCAP_KF_8021Q_TPID1] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 36,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_VID1] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 37,
+ .width = 12,
+ },
+ [VCAP_KF_8021Q_DEI1] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 49,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_PCP1] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 50,
+ .width = 3,
+ },
+ [VCAP_KF_L2_SMAC] = {
+ .type = VCAP_FIELD_U48,
+ .offset = 53,
+ .width = 48,
+ },
+ [VCAP_KF_L3_DSCP] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 101,
+ .width = 6,
+ },
+ [VCAP_KF_L3_IP6_SIP] = {
+ .type = VCAP_FIELD_U128,
+ .offset = 107,
+ .width = 128,
+ },
+ [VCAP_KF_L3_IP_PROTO] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 235,
+ .width = 8,
+ },
+ [VCAP_KF_TCP_UDP_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 243,
+ .width = 1,
+ },
+ [VCAP_KF_L4_RNG] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 244,
+ .width = 8,
+ },
+ [VCAP_KF_IP_PAYLOAD_S1_IP6] = {
+ .type = VCAP_FIELD_U112,
+ .offset = 252,
+ .width = 112,
+ },
+};
+
+static const struct vcap_field is1_7tuple_keyfield[] = {
+ [VCAP_KF_TYPE] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 0,
+ .width = 2,
+ },
+ [VCAP_KF_LOOKUP_INDEX] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 2,
+ .width = 2,
+ },
+ [VCAP_KF_IF_IGR_PORT_MASK] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 4,
+ .width = 9,
+ },
+ [VCAP_KF_L2_MC_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 13,
+ .width = 1,
+ },
+ [VCAP_KF_L2_BC_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 14,
+ .width = 1,
+ },
+ [VCAP_KF_IP_MC_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 15,
+ .width = 1,
+ },
+ [VCAP_KF_8021CB_R_TAGGED_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 16,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_VLAN_TAGGED_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 17,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_VLAN_DBL_TAGGED_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 18,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_TPID0] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 19,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_VID0] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 20,
+ .width = 12,
+ },
+ [VCAP_KF_8021Q_DEI0] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 32,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_PCP0] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 33,
+ .width = 3,
+ },
+ [VCAP_KF_8021Q_TPID1] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 36,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_VID1] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 37,
+ .width = 12,
+ },
+ [VCAP_KF_8021Q_DEI1] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 49,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_PCP1] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 50,
+ .width = 3,
+ },
+ [VCAP_KF_L2_DMAC] = {
+ .type = VCAP_FIELD_U48,
+ .offset = 53,
+ .width = 48,
+ },
+ [VCAP_KF_L2_SMAC] = {
+ .type = VCAP_FIELD_U48,
+ .offset = 101,
+ .width = 48,
+ },
+ [VCAP_KF_ETYPE_LEN_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 149,
+ .width = 1,
+ },
+ [VCAP_KF_ETYPE] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 150,
+ .width = 16,
+ },
+ [VCAP_KF_IP_SNAP_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 166,
+ .width = 1,
+ },
+ [VCAP_KF_IP4_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 167,
+ .width = 1,
+ },
+ [VCAP_KF_L3_FRAGMENT] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 168,
+ .width = 1,
+ },
+ [VCAP_KF_L3_FRAG_OFS_GT0] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 169,
+ .width = 1,
+ },
+ [VCAP_KF_L3_OPTIONS_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 170,
+ .width = 1,
+ },
+ [VCAP_KF_L3_DSCP] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 171,
+ .width = 6,
+ },
+ [VCAP_KF_L3_IP6_DIP_MSB] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 177,
+ .width = 16,
+ },
+ [VCAP_KF_L3_IP6_DIP] = {
+ .type = VCAP_FIELD_U64,
+ .offset = 193,
+ .width = 64,
+ },
+ [VCAP_KF_L3_IP6_SIP_MSB] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 257,
+ .width = 16,
+ },
+ [VCAP_KF_L3_IP6_SIP] = {
+ .type = VCAP_FIELD_U64,
+ .offset = 273,
+ .width = 64,
+ },
+ [VCAP_KF_TCP_UDP_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 337,
+ .width = 1,
+ },
+ [VCAP_KF_TCP_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 338,
+ .width = 1,
+ },
+ [VCAP_KF_L4_SPORT] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 339,
+ .width = 16,
+ },
+ [VCAP_KF_L4_RNG] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 355,
+ .width = 8,
+ },
+};
+
+static const struct vcap_field is1_5tuple_ip6_keyfield[] = {
+ [VCAP_KF_TYPE] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 0,
+ .width = 2,
+ },
+ [VCAP_KF_LOOKUP_INDEX] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 2,
+ .width = 2,
+ },
+ [VCAP_KF_IF_IGR_PORT_MASK] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 4,
+ .width = 9,
+ },
+ [VCAP_KF_L2_MC_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 13,
+ .width = 1,
+ },
+ [VCAP_KF_L2_BC_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 14,
+ .width = 1,
+ },
+ [VCAP_KF_IP_MC_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 15,
+ .width = 1,
+ },
+ [VCAP_KF_8021CB_R_TAGGED_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 16,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_VLAN_TAGGED_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 17,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_VLAN_DBL_TAGGED_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 18,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_TPID0] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 19,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_VID0] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 20,
+ .width = 12,
+ },
+ [VCAP_KF_8021Q_DEI0] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 32,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_PCP0] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 33,
+ .width = 3,
+ },
+ [VCAP_KF_8021Q_TPID1] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 36,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_VID1] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 37,
+ .width = 12,
+ },
+ [VCAP_KF_8021Q_DEI1] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 49,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_PCP1] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 50,
+ .width = 3,
+ },
+ [VCAP_KF_L3_DSCP] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 53,
+ .width = 6,
+ },
+ [VCAP_KF_L3_IP6_DIP] = {
+ .type = VCAP_FIELD_U128,
+ .offset = 59,
+ .width = 128,
+ },
+ [VCAP_KF_L3_IP6_SIP] = {
+ .type = VCAP_FIELD_U128,
+ .offset = 187,
+ .width = 128,
+ },
+ [VCAP_KF_L3_IP_PROTO] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 315,
+ .width = 8,
+ },
+ [VCAP_KF_TCP_UDP_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 323,
+ .width = 1,
+ },
+ [VCAP_KF_L4_RNG] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 324,
+ .width = 8,
+ },
+ [VCAP_KF_IP_PAYLOAD_5TUPLE] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 332,
+ .width = 32,
+ },
+};
+
+static const struct vcap_field is1_dbl_vid_keyfield[] = {
+ [VCAP_KF_TYPE] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 0,
+ .width = 2,
+ },
+ [VCAP_KF_LOOKUP_INDEX] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 2,
+ .width = 2,
+ },
+ [VCAP_KF_IF_IGR_PORT_MASK] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 4,
+ .width = 9,
+ },
+ [VCAP_KF_L2_MC_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 13,
+ .width = 1,
+ },
+ [VCAP_KF_L2_BC_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 14,
+ .width = 1,
+ },
+ [VCAP_KF_IP_MC_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 15,
+ .width = 1,
+ },
+ [VCAP_KF_8021CB_R_TAGGED_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 16,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_VLAN_TAGGED_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 17,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_VLAN_DBL_TAGGED_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 18,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_TPID0] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 19,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_VID0] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 20,
+ .width = 12,
+ },
+ [VCAP_KF_8021Q_DEI0] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 32,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_PCP0] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 33,
+ .width = 3,
+ },
+ [VCAP_KF_8021Q_TPID1] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 36,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_VID1] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 37,
+ .width = 12,
+ },
+ [VCAP_KF_8021Q_DEI1] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 49,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_PCP1] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 50,
+ .width = 3,
+ },
+ [VCAP_KF_ETYPE_LEN_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 53,
+ .width = 1,
+ },
+ [VCAP_KF_ETYPE] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 54,
+ .width = 16,
+ },
+ [VCAP_KF_IP_SNAP_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 70,
+ .width = 1,
+ },
+ [VCAP_KF_IP4_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 71,
+ .width = 1,
+ },
+ [VCAP_KF_L3_FRAGMENT] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 72,
+ .width = 1,
+ },
+ [VCAP_KF_L3_FRAG_OFS_GT0] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 73,
+ .width = 1,
+ },
+ [VCAP_KF_L3_OPTIONS_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 74,
+ .width = 1,
+ },
+ [VCAP_KF_L3_DSCP] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 75,
+ .width = 6,
+ },
+ [VCAP_KF_TCP_UDP_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 81,
+ .width = 1,
+ },
+ [VCAP_KF_TCP_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 82,
+ .width = 1,
+ },
+};
+
+static const struct vcap_field is1_rt_keyfield[] = {
+ [VCAP_KF_TYPE] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 0,
+ .width = 2,
+ },
+ [VCAP_KF_LOOKUP_FIRST_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 2,
+ .width = 1,
+ },
+ [VCAP_KF_IF_IGR_PORT] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 3,
+ .width = 3,
+ },
+ [VCAP_KF_8021CB_R_TAGGED_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 6,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_VLAN_TAGGED_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 7,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_VLAN_DBL_TAGGED_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 8,
+ .width = 1,
+ },
+ [VCAP_KF_L2_MAC] = {
+ .type = VCAP_FIELD_U48,
+ .offset = 9,
+ .width = 48,
+ },
+ [VCAP_KF_RT_VLAN_IDX] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 57,
+ .width = 3,
+ },
+ [VCAP_KF_RT_TYPE] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 60,
+ .width = 2,
+ },
+ [VCAP_KF_RT_FRMID] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 62,
+ .width = 32,
+ },
+};
+
+static const struct vcap_field is1_dmac_vid_keyfield[] = {
+ [VCAP_KF_TYPE] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 0,
+ .width = 2,
+ },
+ [VCAP_KF_LOOKUP_INDEX] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 2,
+ .width = 2,
+ },
+ [VCAP_KF_IF_IGR_PORT_MASK] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 4,
+ .width = 9,
+ },
+ [VCAP_KF_8021CB_R_TAGGED_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 13,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_VLAN_TAGGED_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 14,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_VLAN_DBL_TAGGED_IS] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 15,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_TPID0] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 16,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_VID0] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 17,
+ .width = 12,
+ },
+ [VCAP_KF_8021Q_DEI0] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 29,
+ .width = 1,
+ },
+ [VCAP_KF_8021Q_PCP0] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 30,
+ .width = 3,
+ },
+ [VCAP_KF_L2_DMAC] = {
+ .type = VCAP_FIELD_U48,
+ .offset = 33,
+ .width = 48,
+ },
+};
+
static const struct vcap_field is2_mac_etype_keyfield[] = {
[VCAP_KF_TYPE] = {
.type = VCAP_FIELD_U32,
@@ -1163,6 +2122,49 @@ static const struct vcap_field is2_smac_sip6_keyfield[] = {
};
/* keyfield_set */
+static const struct vcap_set is1_keyfield_set[] = {
+ [VCAP_KFS_NORMAL] = {
+ .type_id = 0,
+ .sw_per_item = 2,
+ .sw_cnt = 2,
+ },
+ [VCAP_KFS_5TUPLE_IP4] = {
+ .type_id = 1,
+ .sw_per_item = 2,
+ .sw_cnt = 2,
+ },
+ [VCAP_KFS_NORMAL_IP6] = {
+ .type_id = 0,
+ .sw_per_item = 4,
+ .sw_cnt = 1,
+ },
+ [VCAP_KFS_7TUPLE] = {
+ .type_id = 1,
+ .sw_per_item = 4,
+ .sw_cnt = 1,
+ },
+ [VCAP_KFS_5TUPLE_IP6] = {
+ .type_id = 2,
+ .sw_per_item = 4,
+ .sw_cnt = 1,
+ },
+ [VCAP_KFS_DBL_VID] = {
+ .type_id = 0,
+ .sw_per_item = 1,
+ .sw_cnt = 4,
+ },
+ [VCAP_KFS_RT] = {
+ .type_id = 1,
+ .sw_per_item = 1,
+ .sw_cnt = 4,
+ },
+ [VCAP_KFS_DMAC_VID] = {
+ .type_id = 2,
+ .sw_per_item = 1,
+ .sw_cnt = 4,
+ },
+};
+
static const struct vcap_set is2_keyfield_set[] = {
[VCAP_KFS_MAC_ETYPE] = {
.type_id = 0,
@@ -1227,6 +2229,17 @@ static const struct vcap_set is2_keyfield_set[] = {
};
/* keyfield_set map */
+static const struct vcap_field *is1_keyfield_set_map[] = {
+ [VCAP_KFS_NORMAL] = is1_normal_keyfield,
+ [VCAP_KFS_5TUPLE_IP4] = is1_5tuple_ip4_keyfield,
+ [VCAP_KFS_NORMAL_IP6] = is1_normal_ip6_keyfield,
+ [VCAP_KFS_7TUPLE] = is1_7tuple_keyfield,
+ [VCAP_KFS_5TUPLE_IP6] = is1_5tuple_ip6_keyfield,
+ [VCAP_KFS_DBL_VID] = is1_dbl_vid_keyfield,
+ [VCAP_KFS_RT] = is1_rt_keyfield,
+ [VCAP_KFS_DMAC_VID] = is1_dmac_vid_keyfield,
+};
+
static const struct vcap_field *is2_keyfield_set_map[] = {
[VCAP_KFS_MAC_ETYPE] = is2_mac_etype_keyfield,
[VCAP_KFS_MAC_LLC] = is2_mac_llc_keyfield,
@@ -1243,6 +2256,17 @@ static const struct vcap_field *is2_keyfield_set_map[] = {
};
/* keyfield_set map sizes */
+static int is1_keyfield_set_map_size[] = {
+ [VCAP_KFS_NORMAL] = ARRAY_SIZE(is1_normal_keyfield),
+ [VCAP_KFS_5TUPLE_IP4] = ARRAY_SIZE(is1_5tuple_ip4_keyfield),
+ [VCAP_KFS_NORMAL_IP6] = ARRAY_SIZE(is1_normal_ip6_keyfield),
+ [VCAP_KFS_7TUPLE] = ARRAY_SIZE(is1_7tuple_keyfield),
+ [VCAP_KFS_5TUPLE_IP6] = ARRAY_SIZE(is1_5tuple_ip6_keyfield),
+ [VCAP_KFS_DBL_VID] = ARRAY_SIZE(is1_dbl_vid_keyfield),
+ [VCAP_KFS_RT] = ARRAY_SIZE(is1_rt_keyfield),
+ [VCAP_KFS_DMAC_VID] = ARRAY_SIZE(is1_dmac_vid_keyfield),
+};
+
static int is2_keyfield_set_map_size[] = {
[VCAP_KFS_MAC_ETYPE] = ARRAY_SIZE(is2_mac_etype_keyfield),
[VCAP_KFS_MAC_LLC] = ARRAY_SIZE(is2_mac_llc_keyfield),
@@ -1259,6 +2283,154 @@ static int is2_keyfield_set_map_size[] = {
};
/* actionfields */
+static const struct vcap_field is1_s1_actionfield[] = {
+ [VCAP_AF_TYPE] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 0,
+ .width = 1,
+ },
+ [VCAP_AF_DSCP_ENA] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 1,
+ .width = 1,
+ },
+ [VCAP_AF_DSCP_VAL] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 2,
+ .width = 6,
+ },
+ [VCAP_AF_QOS_ENA] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 8,
+ .width = 1,
+ },
+ [VCAP_AF_QOS_VAL] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 9,
+ .width = 3,
+ },
+ [VCAP_AF_DP_ENA] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 12,
+ .width = 1,
+ },
+ [VCAP_AF_DP_VAL] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 13,
+ .width = 1,
+ },
+ [VCAP_AF_PAG_OVERRIDE_MASK] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 14,
+ .width = 8,
+ },
+ [VCAP_AF_PAG_VAL] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 22,
+ .width = 8,
+ },
+ [VCAP_AF_ISDX_REPLACE_ENA] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 30,
+ .width = 1,
+ },
+ [VCAP_AF_ISDX_ADD_VAL] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 31,
+ .width = 8,
+ },
+ [VCAP_AF_VID_REPLACE_ENA] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 39,
+ .width = 1,
+ },
+ [VCAP_AF_VID_VAL] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 40,
+ .width = 12,
+ },
+ [VCAP_AF_PCP_ENA] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 67,
+ .width = 1,
+ },
+ [VCAP_AF_PCP_VAL] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 68,
+ .width = 3,
+ },
+ [VCAP_AF_DEI_ENA] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 71,
+ .width = 1,
+ },
+ [VCAP_AF_DEI_VAL] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 72,
+ .width = 1,
+ },
+ [VCAP_AF_VLAN_POP_CNT_ENA] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 73,
+ .width = 1,
+ },
+ [VCAP_AF_VLAN_POP_CNT] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 74,
+ .width = 2,
+ },
+ [VCAP_AF_CUSTOM_ACE_TYPE_ENA] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 76,
+ .width = 4,
+ },
+ [VCAP_AF_SFID_ENA] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 80,
+ .width = 1,
+ },
+ [VCAP_AF_SFID_VAL] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 81,
+ .width = 8,
+ },
+ [VCAP_AF_SGID_ENA] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 89,
+ .width = 1,
+ },
+ [VCAP_AF_SGID_VAL] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 90,
+ .width = 8,
+ },
+ [VCAP_AF_POLICE_ENA] = {
+ .type = VCAP_FIELD_BIT,
+ .offset = 98,
+ .width = 1,
+ },
+ [VCAP_AF_POLICE_IDX] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 99,
+ .width = 9,
+ },
+ [VCAP_AF_OAM_SEL] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 108,
+ .width = 3,
+ },
+ [VCAP_AF_MRP_SEL] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 111,
+ .width = 2,
+ },
+ [VCAP_AF_DLR_SEL] = {
+ .type = VCAP_FIELD_U32,
+ .offset = 113,
+ .width = 2,
+ },
+};
+
static const struct vcap_field is2_base_type_actionfield[] = {
[VCAP_AF_HIT_ME_ONCE] = {
.type = VCAP_FIELD_BIT,
@@ -1351,6 +2523,14 @@ static const struct vcap_field is2_smac_sip_actionfield[] = {
};
/* actionfield_set */
+static const struct vcap_set is1_actionfield_set[] = {
+ [VCAP_AFS_S1] = {
+ .type_id = 0,
+ .sw_per_item = 1,
+ .sw_cnt = 4,
+ },
+};
+
static const struct vcap_set is2_actionfield_set[] = {
[VCAP_AFS_BASE_TYPE] = {
.type_id = -1,
@@ -1365,18 +2545,73 @@ static const struct vcap_set is2_actionfield_set[] = {
};
/* actionfield_set map */
+static const struct vcap_field *is1_actionfield_set_map[] = {
+ [VCAP_AFS_S1] = is1_s1_actionfield,
+};
+
static const struct vcap_field *is2_actionfield_set_map[] = {
[VCAP_AFS_BASE_TYPE] = is2_base_type_actionfield,
[VCAP_AFS_SMAC_SIP] = is2_smac_sip_actionfield,
};
/* actionfield_set map size */
+static int is1_actionfield_set_map_size[] = {
+ [VCAP_AFS_S1] = ARRAY_SIZE(is1_s1_actionfield),
+};
+
static int is2_actionfield_set_map_size[] = {
[VCAP_AFS_BASE_TYPE] = ARRAY_SIZE(is2_base_type_actionfield),
[VCAP_AFS_SMAC_SIP] = ARRAY_SIZE(is2_smac_sip_actionfield),
};
/* Type Groups */
+static const struct vcap_typegroup is1_x4_keyfield_set_typegroups[] = {
+ {
+ .offset = 0,
+ .width = 3,
+ .value = 4,
+ },
+ {
+ .offset = 96,
+ .width = 1,
+ .value = 0,
+ },
+ {
+ .offset = 192,
+ .width = 2,
+ .value = 0,
+ },
+ {
+ .offset = 288,
+ .width = 1,
+ .value = 0,
+ },
+ {}
+};
+
+static const struct vcap_typegroup is1_x2_keyfield_set_typegroups[] = {
+ {
+ .offset = 0,
+ .width = 2,
+ .value = 2,
+ },
+ {
+ .offset = 96,
+ .width = 1,
+ .value = 0,
+ },
+ {}
+};
+
+static const struct vcap_typegroup is1_x1_keyfield_set_typegroups[] = {
+ {
+ .offset = 0,
+ .width = 1,
+ .value = 1,
+ },
+ {}
+};
+
static const struct vcap_typegroup is2_x4_keyfield_set_typegroups[] = {
{
.offset = 0,
@@ -1424,6 +2659,13 @@ static const struct vcap_typegroup is2_x1_keyfield_set_typegroups[] = {
{}
};
+static const struct vcap_typegroup *is1_keyfield_set_typegroups[] = {
+ [4] = is1_x4_keyfield_set_typegroups,
+ [2] = is1_x2_keyfield_set_typegroups,
+ [1] = is1_x1_keyfield_set_typegroups,
+ [5] = NULL,
+};
+
static const struct vcap_typegroup *is2_keyfield_set_typegroups[] = {
[4] = is2_x4_keyfield_set_typegroups,
[2] = is2_x2_keyfield_set_typegroups,
@@ -1431,6 +2673,10 @@ static const struct vcap_typegroup *is2_keyfield_set_typegroups[] = {
[5] = NULL,
};
+static const struct vcap_typegroup is1_x1_actionfield_set_typegroups[] = {
+ {}
+};
+
static const struct vcap_typegroup is2_x2_actionfield_set_typegroups[] = {
{
.offset = 0,
@@ -1454,6 +2700,11 @@ static const struct vcap_typegroup is2_x1_actionfield_set_typegroups[] = {
{}
};
+static const struct vcap_typegroup *is1_actionfield_set_typegroups[] = {
+ [1] = is1_x1_actionfield_set_typegroups,
+ [5] = NULL,
+};
+
static const struct vcap_typegroup *is2_actionfield_set_typegroups[] = {
[2] = is2_x2_actionfield_set_typegroups,
[1] = is2_x1_actionfield_set_typegroups,
@@ -1463,16 +2714,33 @@ static const struct vcap_typegroup *is2_actionfield_set_typegroups[] = {
/* Keyfieldset names */
static const char * const vcap_keyfield_set_names[] = {
[VCAP_KFS_NO_VALUE] = "(None)",
+ [VCAP_KFS_5TUPLE_IP4] = "VCAP_KFS_5TUPLE_IP4",
+ [VCAP_KFS_5TUPLE_IP6] = "VCAP_KFS_5TUPLE_IP6",
+ [VCAP_KFS_7TUPLE] = "VCAP_KFS_7TUPLE",
[VCAP_KFS_ARP] = "VCAP_KFS_ARP",
+ [VCAP_KFS_DBL_VID] = "VCAP_KFS_DBL_VID",
+ [VCAP_KFS_DMAC_VID] = "VCAP_KFS_DMAC_VID",
+ [VCAP_KFS_ETAG] = "VCAP_KFS_ETAG",
[VCAP_KFS_IP4_OTHER] = "VCAP_KFS_IP4_OTHER",
[VCAP_KFS_IP4_TCP_UDP] = "VCAP_KFS_IP4_TCP_UDP",
+ [VCAP_KFS_IP4_VID] = "VCAP_KFS_IP4_VID",
[VCAP_KFS_IP6_OTHER] = "VCAP_KFS_IP6_OTHER",
[VCAP_KFS_IP6_STD] = "VCAP_KFS_IP6_STD",
[VCAP_KFS_IP6_TCP_UDP] = "VCAP_KFS_IP6_TCP_UDP",
+ [VCAP_KFS_IP6_VID] = "VCAP_KFS_IP6_VID",
+ [VCAP_KFS_IP_7TUPLE] = "VCAP_KFS_IP_7TUPLE",
+ [VCAP_KFS_ISDX] = "VCAP_KFS_ISDX",
+ [VCAP_KFS_LL_FULL] = "VCAP_KFS_LL_FULL",
[VCAP_KFS_MAC_ETYPE] = "VCAP_KFS_MAC_ETYPE",
[VCAP_KFS_MAC_LLC] = "VCAP_KFS_MAC_LLC",
[VCAP_KFS_MAC_SNAP] = "VCAP_KFS_MAC_SNAP",
+ [VCAP_KFS_NORMAL] = "VCAP_KFS_NORMAL",
+ [VCAP_KFS_NORMAL_5TUPLE_IP4] = "VCAP_KFS_NORMAL_5TUPLE_IP4",
+ [VCAP_KFS_NORMAL_7TUPLE] = "VCAP_KFS_NORMAL_7TUPLE",
+ [VCAP_KFS_NORMAL_IP6] = "VCAP_KFS_NORMAL_IP6",
[VCAP_KFS_OAM] = "VCAP_KFS_OAM",
+ [VCAP_KFS_PURE_5TUPLE_IP4] = "VCAP_KFS_PURE_5TUPLE_IP4",
+ [VCAP_KFS_RT] = "VCAP_KFS_RT",
[VCAP_KFS_SMAC_SIP4] = "VCAP_KFS_SMAC_SIP4",
[VCAP_KFS_SMAC_SIP6] = "VCAP_KFS_SMAC_SIP6",
};
@@ -1481,16 +2749,42 @@ static const char * const vcap_keyfield_set_names[] = {
static const char * const vcap_actionfield_set_names[] = {
[VCAP_AFS_NO_VALUE] = "(None)",
[VCAP_AFS_BASE_TYPE] = "VCAP_AFS_BASE_TYPE",
+ [VCAP_AFS_CLASSIFICATION] = "VCAP_AFS_CLASSIFICATION",
+ [VCAP_AFS_CLASS_REDUCED] = "VCAP_AFS_CLASS_REDUCED",
+ [VCAP_AFS_FULL] = "VCAP_AFS_FULL",
+ [VCAP_AFS_S1] = "VCAP_AFS_S1",
[VCAP_AFS_SMAC_SIP] = "VCAP_AFS_SMAC_SIP",
};
/* Keyfield names */
static const char * const vcap_keyfield_names[] = {
[VCAP_KF_NO_VALUE] = "(None)",
+ [VCAP_KF_8021BR_ECID_BASE] = "8021BR_ECID_BASE",
+ [VCAP_KF_8021BR_ECID_EXT] = "8021BR_ECID_EXT",
+ [VCAP_KF_8021BR_E_TAGGED] = "8021BR_E_TAGGED",
+ [VCAP_KF_8021BR_GRP] = "8021BR_GRP",
+ [VCAP_KF_8021BR_IGR_ECID_BASE] = "8021BR_IGR_ECID_BASE",
+ [VCAP_KF_8021BR_IGR_ECID_EXT] = "8021BR_IGR_ECID_EXT",
+ [VCAP_KF_8021CB_R_TAGGED_IS] = "8021CB_R_TAGGED_IS",
+ [VCAP_KF_8021Q_DEI0] = "8021Q_DEI0",
+ [VCAP_KF_8021Q_DEI1] = "8021Q_DEI1",
+ [VCAP_KF_8021Q_DEI2] = "8021Q_DEI2",
[VCAP_KF_8021Q_DEI_CLS] = "8021Q_DEI_CLS",
+ [VCAP_KF_8021Q_PCP0] = "8021Q_PCP0",
+ [VCAP_KF_8021Q_PCP1] = "8021Q_PCP1",
+ [VCAP_KF_8021Q_PCP2] = "8021Q_PCP2",
[VCAP_KF_8021Q_PCP_CLS] = "8021Q_PCP_CLS",
+ [VCAP_KF_8021Q_TPID0] = "8021Q_TPID0",
+ [VCAP_KF_8021Q_TPID1] = "8021Q_TPID1",
+ [VCAP_KF_8021Q_TPID2] = "8021Q_TPID2",
+ [VCAP_KF_8021Q_VID0] = "8021Q_VID0",
+ [VCAP_KF_8021Q_VID1] = "8021Q_VID1",
+ [VCAP_KF_8021Q_VID2] = "8021Q_VID2",
[VCAP_KF_8021Q_VID_CLS] = "8021Q_VID_CLS",
+ [VCAP_KF_8021Q_VLAN_DBL_TAGGED_IS] = "8021Q_VLAN_DBL_TAGGED_IS",
[VCAP_KF_8021Q_VLAN_TAGGED_IS] = "8021Q_VLAN_TAGGED_IS",
+ [VCAP_KF_8021Q_VLAN_TAGS] = "8021Q_VLAN_TAGS",
+ [VCAP_KF_ACL_GRP_ID] = "ACL_GRP_ID",
[VCAP_KF_ARP_ADDR_SPACE_OK_IS] = "ARP_ADDR_SPACE_OK_IS",
[VCAP_KF_ARP_LEN_OK_IS] = "ARP_LEN_OK_IS",
[VCAP_KF_ARP_OPCODE] = "ARP_OPCODE",
@@ -1498,32 +2792,57 @@ static const char * const vcap_keyfield_names[] = {
[VCAP_KF_ARP_PROTO_SPACE_OK_IS] = "ARP_PROTO_SPACE_OK_IS",
[VCAP_KF_ARP_SENDER_MATCH_IS] = "ARP_SENDER_MATCH_IS",
[VCAP_KF_ARP_TGT_MATCH_IS] = "ARP_TGT_MATCH_IS",
+ [VCAP_KF_COSID_CLS] = "COSID_CLS",
+ [VCAP_KF_ES0_ISDX_KEY_ENA] = "ES0_ISDX_KEY_ENA",
[VCAP_KF_ETYPE] = "ETYPE",
+ [VCAP_KF_ETYPE_LEN_IS] = "ETYPE_LEN_IS",
[VCAP_KF_HOST_MATCH] = "HOST_MATCH",
+ [VCAP_KF_IF_EGR_PORT_MASK] = "IF_EGR_PORT_MASK",
+ [VCAP_KF_IF_EGR_PORT_MASK_RNG] = "IF_EGR_PORT_MASK_RNG",
[VCAP_KF_IF_IGR_PORT] = "IF_IGR_PORT",
[VCAP_KF_IF_IGR_PORT_MASK] = "IF_IGR_PORT_MASK",
+ [VCAP_KF_IF_IGR_PORT_MASK_L3] = "IF_IGR_PORT_MASK_L3",
+ [VCAP_KF_IF_IGR_PORT_MASK_RNG] = "IF_IGR_PORT_MASK_RNG",
+ [VCAP_KF_IF_IGR_PORT_MASK_SEL] = "IF_IGR_PORT_MASK_SEL",
+ [VCAP_KF_IF_IGR_PORT_SEL] = "IF_IGR_PORT_SEL",
[VCAP_KF_IP4_IS] = "IP4_IS",
+ [VCAP_KF_IP_MC_IS] = "IP_MC_IS",
+ [VCAP_KF_IP_PAYLOAD_5TUPLE] = "IP_PAYLOAD_5TUPLE",
+ [VCAP_KF_IP_PAYLOAD_S1_IP6] = "IP_PAYLOAD_S1_IP6",
+ [VCAP_KF_IP_SNAP_IS] = "IP_SNAP_IS",
+ [VCAP_KF_ISDX_CLS] = "ISDX_CLS",
[VCAP_KF_ISDX_GT0_IS] = "ISDX_GT0_IS",
[VCAP_KF_L2_BC_IS] = "L2_BC_IS",
[VCAP_KF_L2_DMAC] = "L2_DMAC",
[VCAP_KF_L2_FRM_TYPE] = "L2_FRM_TYPE",
+ [VCAP_KF_L2_FWD_IS] = "L2_FWD_IS",
[VCAP_KF_L2_LLC] = "L2_LLC",
+ [VCAP_KF_L2_MAC] = "L2_MAC",
[VCAP_KF_L2_MC_IS] = "L2_MC_IS",
[VCAP_KF_L2_PAYLOAD0] = "L2_PAYLOAD0",
[VCAP_KF_L2_PAYLOAD1] = "L2_PAYLOAD1",
[VCAP_KF_L2_PAYLOAD2] = "L2_PAYLOAD2",
+ [VCAP_KF_L2_PAYLOAD_ETYPE] = "L2_PAYLOAD_ETYPE",
[VCAP_KF_L2_SMAC] = "L2_SMAC",
[VCAP_KF_L2_SNAP] = "L2_SNAP",
[VCAP_KF_L3_DIP_EQ_SIP_IS] = "L3_DIP_EQ_SIP_IS",
+ [VCAP_KF_L3_DPL_CLS] = "L3_DPL_CLS",
+ [VCAP_KF_L3_DSCP] = "L3_DSCP",
+ [VCAP_KF_L3_DST_IS] = "L3_DST_IS",
[VCAP_KF_L3_FRAGMENT] = "L3_FRAGMENT",
+ [VCAP_KF_L3_FRAGMENT_TYPE] = "L3_FRAGMENT_TYPE",
+ [VCAP_KF_L3_FRAG_INVLD_L4_LEN] = "L3_FRAG_INVLD_L4_LEN",
[VCAP_KF_L3_FRAG_OFS_GT0] = "L3_FRAG_OFS_GT0",
[VCAP_KF_L3_IP4_DIP] = "L3_IP4_DIP",
[VCAP_KF_L3_IP4_SIP] = "L3_IP4_SIP",
[VCAP_KF_L3_IP6_DIP] = "L3_IP6_DIP",
+ [VCAP_KF_L3_IP6_DIP_MSB] = "L3_IP6_DIP_MSB",
[VCAP_KF_L3_IP6_SIP] = "L3_IP6_SIP",
+ [VCAP_KF_L3_IP6_SIP_MSB] = "L3_IP6_SIP_MSB",
[VCAP_KF_L3_IP_PROTO] = "L3_IP_PROTO",
[VCAP_KF_L3_OPTIONS_IS] = "L3_OPTIONS_IS",
[VCAP_KF_L3_PAYLOAD] = "L3_PAYLOAD",
+ [VCAP_KF_L3_RT_IS] = "L3_RT_IS",
[VCAP_KF_L3_TOS] = "L3_TOS",
[VCAP_KF_L3_TTL_GT0] = "L3_TTL_GT0",
[VCAP_KF_L4_1588_DOM] = "L4_1588_DOM",
@@ -1531,6 +2850,7 @@ static const char * const vcap_keyfield_names[] = {
[VCAP_KF_L4_ACK] = "L4_ACK",
[VCAP_KF_L4_DPORT] = "L4_DPORT",
[VCAP_KF_L4_FIN] = "L4_FIN",
+ [VCAP_KF_L4_PAYLOAD] = "L4_PAYLOAD",
[VCAP_KF_L4_PSH] = "L4_PSH",
[VCAP_KF_L4_RNG] = "L4_RNG",
[VCAP_KF_L4_RST] = "L4_RST",
@@ -1540,7 +2860,11 @@ static const char * const vcap_keyfield_names[] = {
[VCAP_KF_L4_SYN] = "L4_SYN",
[VCAP_KF_L4_URG] = "L4_URG",
[VCAP_KF_LOOKUP_FIRST_IS] = "LOOKUP_FIRST_IS",
+ [VCAP_KF_LOOKUP_GEN_IDX] = "LOOKUP_GEN_IDX",
+ [VCAP_KF_LOOKUP_GEN_IDX_SEL] = "LOOKUP_GEN_IDX_SEL",
+ [VCAP_KF_LOOKUP_INDEX] = "LOOKUP_INDEX",
[VCAP_KF_LOOKUP_PAG] = "LOOKUP_PAG",
+ [VCAP_KF_MIRROR_PROBE] = "MIRROR_PROBE",
[VCAP_KF_OAM_CCM_CNTS_EQ0] = "OAM_CCM_CNTS_EQ0",
[VCAP_KF_OAM_DETECTED] = "OAM_DETECTED",
[VCAP_KF_OAM_FLAGS] = "OAM_FLAGS",
@@ -1549,7 +2873,12 @@ static const char * const vcap_keyfield_names[] = {
[VCAP_KF_OAM_OPCODE] = "OAM_OPCODE",
[VCAP_KF_OAM_VER] = "OAM_VER",
[VCAP_KF_OAM_Y1731_IS] = "OAM_Y1731_IS",
+ [VCAP_KF_PROT_ACTIVE] = "PROT_ACTIVE",
+ [VCAP_KF_RT_FRMID] = "RT_FRMID",
+ [VCAP_KF_RT_TYPE] = "RT_TYPE",
+ [VCAP_KF_RT_VLAN_IDX] = "RT_VLAN_IDX",
[VCAP_KF_TCP_IS] = "TCP_IS",
+ [VCAP_KF_TCP_UDP_IS] = "TCP_UDP_IS",
[VCAP_KF_TYPE] = "TYPE",
};
@@ -1557,24 +2886,95 @@ static const char * const vcap_keyfield_names[] = {
static const char * const vcap_actionfield_names[] = {
[VCAP_AF_NO_VALUE] = "(None)",
[VCAP_AF_ACL_ID] = "ACL_ID",
+ [VCAP_AF_CLS_VID_SEL] = "CLS_VID_SEL",
+ [VCAP_AF_CNT_ID] = "CNT_ID",
+ [VCAP_AF_COPY_PORT_NUM] = "COPY_PORT_NUM",
+ [VCAP_AF_COPY_QUEUE_NUM] = "COPY_QUEUE_NUM",
[VCAP_AF_CPU_COPY_ENA] = "CPU_COPY_ENA",
[VCAP_AF_CPU_QUEUE_NUM] = "CPU_QUEUE_NUM",
+ [VCAP_AF_CUSTOM_ACE_TYPE_ENA] = "CUSTOM_ACE_TYPE_ENA",
+ [VCAP_AF_DEI_ENA] = "DEI_ENA",
+ [VCAP_AF_DEI_VAL] = "DEI_VAL",
+ [VCAP_AF_DLR_SEL] = "DLR_SEL",
+ [VCAP_AF_DP_ENA] = "DP_ENA",
+ [VCAP_AF_DP_VAL] = "DP_VAL",
+ [VCAP_AF_DSCP_ENA] = "DSCP_ENA",
+ [VCAP_AF_DSCP_VAL] = "DSCP_VAL",
+ [VCAP_AF_ES2_REW_CMD] = "ES2_REW_CMD",
[VCAP_AF_FWD_KILL_ENA] = "FWD_KILL_ENA",
+ [VCAP_AF_FWD_MODE] = "FWD_MODE",
[VCAP_AF_HIT_ME_ONCE] = "HIT_ME_ONCE",
[VCAP_AF_HOST_MATCH] = "HOST_MATCH",
+ [VCAP_AF_IGNORE_PIPELINE_CTRL] = "IGNORE_PIPELINE_CTRL",
+ [VCAP_AF_INTR_ENA] = "INTR_ENA",
+ [VCAP_AF_ISDX_ADD_REPLACE_SEL] = "ISDX_ADD_REPLACE_SEL",
+ [VCAP_AF_ISDX_ADD_VAL] = "ISDX_ADD_VAL",
[VCAP_AF_ISDX_ENA] = "ISDX_ENA",
+ [VCAP_AF_ISDX_REPLACE_ENA] = "ISDX_REPLACE_ENA",
+ [VCAP_AF_ISDX_VAL] = "ISDX_VAL",
[VCAP_AF_LRN_DIS] = "LRN_DIS",
+ [VCAP_AF_MAP_IDX] = "MAP_IDX",
+ [VCAP_AF_MAP_KEY] = "MAP_KEY",
+ [VCAP_AF_MAP_LOOKUP_SEL] = "MAP_LOOKUP_SEL",
[VCAP_AF_MASK_MODE] = "MASK_MODE",
+ [VCAP_AF_MATCH_ID] = "MATCH_ID",
+ [VCAP_AF_MATCH_ID_MASK] = "MATCH_ID_MASK",
[VCAP_AF_MIRROR_ENA] = "MIRROR_ENA",
+ [VCAP_AF_MIRROR_PROBE] = "MIRROR_PROBE",
+ [VCAP_AF_MIRROR_PROBE_ID] = "MIRROR_PROBE_ID",
+ [VCAP_AF_MRP_SEL] = "MRP_SEL",
+ [VCAP_AF_NXT_IDX] = "NXT_IDX",
+ [VCAP_AF_NXT_IDX_CTRL] = "NXT_IDX_CTRL",
+ [VCAP_AF_OAM_SEL] = "OAM_SEL",
+ [VCAP_AF_PAG_OVERRIDE_MASK] = "PAG_OVERRIDE_MASK",
+ [VCAP_AF_PAG_VAL] = "PAG_VAL",
+ [VCAP_AF_PCP_ENA] = "PCP_ENA",
+ [VCAP_AF_PCP_VAL] = "PCP_VAL",
+ [VCAP_AF_PIPELINE_FORCE_ENA] = "PIPELINE_FORCE_ENA",
+ [VCAP_AF_PIPELINE_PT] = "PIPELINE_PT",
[VCAP_AF_POLICE_ENA] = "POLICE_ENA",
[VCAP_AF_POLICE_IDX] = "POLICE_IDX",
+ [VCAP_AF_POLICE_REMARK] = "POLICE_REMARK",
[VCAP_AF_POLICE_VCAP_ONLY] = "POLICE_VCAP_ONLY",
[VCAP_AF_PORT_MASK] = "PORT_MASK",
+ [VCAP_AF_QOS_ENA] = "QOS_ENA",
+ [VCAP_AF_QOS_VAL] = "QOS_VAL",
[VCAP_AF_REW_OP] = "REW_OP",
+ [VCAP_AF_RT_DIS] = "RT_DIS",
+ [VCAP_AF_SFID_ENA] = "SFID_ENA",
+ [VCAP_AF_SFID_VAL] = "SFID_VAL",
+ [VCAP_AF_SGID_ENA] = "SGID_ENA",
+ [VCAP_AF_SGID_VAL] = "SGID_VAL",
+ [VCAP_AF_TYPE] = "TYPE",
+ [VCAP_AF_VID_REPLACE_ENA] = "VID_REPLACE_ENA",
+ [VCAP_AF_VID_VAL] = "VID_VAL",
+ [VCAP_AF_VLAN_POP_CNT] = "VLAN_POP_CNT",
+ [VCAP_AF_VLAN_POP_CNT_ENA] = "VLAN_POP_CNT_ENA",
};
/* VCAPs */
const struct vcap_info lan966x_vcaps[] = {
+ [VCAP_TYPE_IS1] = {
+ .name = "is1",
+ .rows = 192,
+ .sw_count = 4,
+ .sw_width = 96,
+ .sticky_width = 32,
+ .act_width = 123,
+ .default_cnt = 0,
+ .require_cnt_dis = 1,
+ .version = 1,
+ .keyfield_set = is1_keyfield_set,
+ .keyfield_set_size = ARRAY_SIZE(is1_keyfield_set),
+ .actionfield_set = is1_actionfield_set,
+ .actionfield_set_size = ARRAY_SIZE(is1_actionfield_set),
+ .keyfield_set_map = is1_keyfield_set_map,
+ .keyfield_set_map_size = is1_keyfield_set_map_size,
+ .actionfield_set_map = is1_actionfield_set_map,
+ .actionfield_set_map_size = is1_actionfield_set_map_size,
+ .keyfield_set_typegroups = is1_keyfield_set_typegroups,
+ .actionfield_set_typegroups = is1_actionfield_set_typegroups,
+ },
[VCAP_TYPE_IS2] = {
.name = "is2",
.rows = 64,
@@ -1600,7 +3000,7 @@ const struct vcap_info lan966x_vcaps[] = {
const struct vcap_statistics lan966x_vcap_stats = {
.name = "lan966x",
- .count = 1,
+ .count = 2,
.keyfield_set_names = vcap_keyfield_set_names,
.actionfield_set_names = vcap_actionfield_set_names,
.keyfield_names = vcap_keyfield_names,
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_debugfs.c b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_debugfs.c
index 7a0db58f5513..d90c08cfcf14 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_debugfs.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_debugfs.c
@@ -5,9 +5,124 @@
#include "vcap_api.h"
#include "vcap_api_client.h"
-static void lan966x_vcap_port_keys(struct lan966x_port *port,
- struct vcap_admin *admin,
- struct vcap_output_print *out)
+static void lan966x_vcap_is1_port_keys(struct lan966x_port *port,
+ struct vcap_admin *admin,
+ struct vcap_output_print *out)
+{
+ struct lan966x *lan966x = port->lan966x;
+ u32 val;
+
+ out->prf(out->dst, " port[%d] (%s): ", port->chip_port,
+ netdev_name(port->dev));
+
+ val = lan_rd(lan966x, ANA_VCAP_CFG(port->chip_port));
+ out->prf(out->dst, "\n state: ");
+ if (ANA_VCAP_CFG_S1_ENA_GET(val))
+ out->prf(out->dst, "on");
+ else
+ out->prf(out->dst, "off");
+
+ for (int l = 0; l < admin->lookups; ++l) {
+ out->prf(out->dst, "\n Lookup %d: ", l);
+
+ out->prf(out->dst, "\n other: ");
+ switch (ANA_VCAP_S1_CFG_KEY_OTHER_CFG_GET(val)) {
+ case VCAP_IS1_PS_OTHER_NORMAL:
+ out->prf(out->dst, "normal");
+ break;
+ case VCAP_IS1_PS_OTHER_7TUPLE:
+ out->prf(out->dst, "7tuple");
+ break;
+ case VCAP_IS1_PS_OTHER_DBL_VID:
+ out->prf(out->dst, "dbl_vid");
+ break;
+ case VCAP_IS1_PS_OTHER_DMAC_VID:
+ out->prf(out->dst, "dmac_vid");
+ break;
+ default:
+ out->prf(out->dst, "-");
+ break;
+ }
+
+ out->prf(out->dst, "\n ipv4: ");
+ switch (ANA_VCAP_S1_CFG_KEY_IP4_CFG_GET(val)) {
+ case VCAP_IS1_PS_IPV4_NORMAL:
+ out->prf(out->dst, "normal");
+ break;
+ case VCAP_IS1_PS_IPV4_7TUPLE:
+ out->prf(out->dst, "7tuple");
+ break;
+ case VCAP_IS1_PS_IPV4_5TUPLE_IP4:
+ out->prf(out->dst, "5tuple_ipv4");
+ break;
+ case VCAP_IS1_PS_IPV4_DBL_VID:
+ out->prf(out->dst, "dbl_vid");
+ break;
+ case VCAP_IS1_PS_IPV4_DMAC_VID:
+ out->prf(out->dst, "dmac_vid");
+ break;
+ default:
+ out->prf(out->dst, "-");
+ break;
+ }
+
+ out->prf(out->dst, "\n ipv6: ");
+ switch (ANA_VCAP_S1_CFG_KEY_IP6_CFG_GET(val)) {
+ case VCAP_IS1_PS_IPV6_NORMAL:
+ out->prf(out->dst, "normal");
+ break;
+ case VCAP_IS1_PS_IPV6_7TUPLE:
+ out->prf(out->dst, "7tuple");
+ break;
+ case VCAP_IS1_PS_IPV6_5TUPLE_IP4:
+ out->prf(out->dst, "5tuple_ip4");
+ break;
+ case VCAP_IS1_PS_IPV6_NORMAL_IP6:
+ out->prf(out->dst, "normal_ip6");
+ break;
+ case VCAP_IS1_PS_IPV6_5TUPLE_IP6:
+ out->prf(out->dst, "5tuple_ip6");
+ break;
+ case VCAP_IS1_PS_IPV6_DBL_VID:
+ out->prf(out->dst, "dbl_vid");
+ break;
+ case VCAP_IS1_PS_IPV6_DMAC_VID:
+ out->prf(out->dst, "dmac_vid");
+ break;
+ default:
+ out->prf(out->dst, "-");
+ break;
+ }
+
+ out->prf(out->dst, "\n rt: ");
+ switch (ANA_VCAP_S1_CFG_KEY_RT_CFG_GET(val)) {
+ case VCAP_IS1_PS_RT_NORMAL:
+ out->prf(out->dst, "normal");
+ break;
+ case VCAP_IS1_PS_RT_7TUPLE:
+ out->prf(out->dst, "7tuple");
+ break;
+ case VCAP_IS1_PS_RT_DBL_VID:
+ out->prf(out->dst, "dbl_vid");
+ break;
+ case VCAP_IS1_PS_RT_DMAC_VID:
+ out->prf(out->dst, "dmac_vid");
+ break;
+ case VCAP_IS1_PS_RT_FOLLOW_OTHER:
+ out->prf(out->dst, "follow_other");
+ break;
+ default:
+ out->prf(out->dst, "-");
+ break;
+ }
+ }
+
+ out->prf(out->dst, "\n");
+}
+
+static void lan966x_vcap_is2_port_keys(struct lan966x_port *port,
+ struct vcap_admin *admin,
+ struct vcap_output_print *out)
{
struct lan966x *lan966x = port->lan966x;
u32 val;
@@ -88,7 +203,17 @@ int lan966x_vcap_port_info(struct net_device *dev,
vcap = &vctrl->vcaps[admin->vtype];
out->prf(out->dst, "%s:\n", vcap->name);
- lan966x_vcap_port_keys(port, admin, out);
+ switch (admin->vtype) {
+ case VCAP_TYPE_IS2:
+ lan966x_vcap_is2_port_keys(port, admin, out);
+ break;
+ case VCAP_TYPE_IS1:
+ lan966x_vcap_is1_port_keys(port, admin, out);
+ break;
+ default:
+ out->prf(out->dst, " no info\n");
+ break;
+ }
return 0;
}
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c
index 68f9d69fd37b..7ea8e8633609 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c
@@ -8,6 +8,7 @@
#define STREAMSIZE (64 * 4)
+#define LAN966X_IS1_LOOKUPS 3
#define LAN966X_IS2_LOOKUPS 2
static struct lan966x_vcap_inst {
@@ -20,6 +21,15 @@ static struct lan966x_vcap_inst {
bool ingress; /* is vcap in the ingress path */
} lan966x_vcap_inst_cfg[] = {
{
+ .vtype = VCAP_TYPE_IS1, /* IS1-0 */
+ .tgt_inst = 1,
+ .lookups = LAN966X_IS1_LOOKUPS,
+ .first_cid = LAN966X_VCAP_CID_IS1_L0,
+ .last_cid = LAN966X_VCAP_CID_IS1_MAX,
+ .count = 768,
+ .ingress = true,
+ },
+ {
.vtype = VCAP_TYPE_IS2, /* IS2-0 */
.tgt_inst = 2,
.lookups = LAN966X_IS2_LOOKUPS,
@@ -72,7 +82,21 @@ static void __lan966x_vcap_range_init(struct lan966x *lan966x,
lan966x_vcap_wait_update(lan966x, admin->tgt_inst);
}
-static int lan966x_vcap_cid_to_lookup(int cid)
+static int lan966x_vcap_is1_cid_to_lookup(int cid)
+{
+ int lookup = 0;
+
+ if (cid >= LAN966X_VCAP_CID_IS1_L1 &&
+ cid < LAN966X_VCAP_CID_IS1_L2)
+ lookup = 1;
+ else if (cid >= LAN966X_VCAP_CID_IS1_L2 &&
+ cid < LAN966X_VCAP_CID_IS1_MAX)
+ lookup = 2;
+
+ return lookup;
+}
+
+static int lan966x_vcap_is2_cid_to_lookup(int cid)
{
if (cid >= LAN966X_VCAP_CID_IS2_L1 &&
cid < LAN966X_VCAP_CID_IS2_MAX)
@@ -81,6 +105,67 @@ static int lan966x_vcap_cid_to_lookup(int cid)
return 0;
}
+/* Return the list of keysets for the vcap port configuration */
+static int
+lan966x_vcap_is1_get_port_keysets(struct net_device *ndev, int lookup,
+ struct vcap_keyset_list *keysetlist,
+ u16 l3_proto)
+{
+ struct lan966x_port *port = netdev_priv(ndev);
+ struct lan966x *lan966x = port->lan966x;
+ u32 val;
+
+ val = lan_rd(lan966x, ANA_VCAP_S1_CFG(port->chip_port, lookup));
+
+ /* Collect all keysets for the port in a list */
+ if (l3_proto == ETH_P_ALL || l3_proto == ETH_P_IP) {
+ switch (ANA_VCAP_S1_CFG_KEY_IP4_CFG_GET(val)) {
+ case VCAP_IS1_PS_IPV4_7TUPLE:
+ vcap_keyset_list_add(keysetlist, VCAP_KFS_7TUPLE);
+ break;
+ case VCAP_IS1_PS_IPV4_5TUPLE_IP4:
+ vcap_keyset_list_add(keysetlist, VCAP_KFS_5TUPLE_IP4);
+ break;
+ case VCAP_IS1_PS_IPV4_NORMAL:
+ vcap_keyset_list_add(keysetlist, VCAP_KFS_NORMAL);
+ break;
+ }
+ }
+
+ if (l3_proto == ETH_P_ALL || l3_proto == ETH_P_IPV6) {
+ switch (ANA_VCAP_S1_CFG_KEY_IP6_CFG_GET(val)) {
+ case VCAP_IS1_PS_IPV6_NORMAL:
+ case VCAP_IS1_PS_IPV6_NORMAL_IP6:
+ vcap_keyset_list_add(keysetlist, VCAP_KFS_NORMAL);
+ vcap_keyset_list_add(keysetlist, VCAP_KFS_NORMAL_IP6);
+ break;
+ case VCAP_IS1_PS_IPV6_5TUPLE_IP6:
+ vcap_keyset_list_add(keysetlist, VCAP_KFS_5TUPLE_IP6);
+ break;
+ case VCAP_IS1_PS_IPV6_7TUPLE:
+ vcap_keyset_list_add(keysetlist, VCAP_KFS_7TUPLE);
+ break;
+ case VCAP_IS1_PS_IPV6_5TUPLE_IP4:
+ vcap_keyset_list_add(keysetlist, VCAP_KFS_5TUPLE_IP4);
+ break;
+ case VCAP_IS1_PS_IPV6_DMAC_VID:
+ vcap_keyset_list_add(keysetlist, VCAP_KFS_DMAC_VID);
+ break;
+ }
+ }
+
+ switch (ANA_VCAP_S1_CFG_KEY_OTHER_CFG_GET(val)) {
+ case VCAP_IS1_PS_OTHER_7TUPLE:
+ vcap_keyset_list_add(keysetlist, VCAP_KFS_7TUPLE);
+ break;
+ case VCAP_IS1_PS_OTHER_NORMAL:
+ vcap_keyset_list_add(keysetlist, VCAP_KFS_NORMAL);
+ break;
+ }
+
+ return 0;
+}
+
static int
lan966x_vcap_is2_get_port_keysets(struct net_device *dev, int lookup,
struct vcap_keyset_list *keysetlist,
@@ -180,11 +265,26 @@ lan966x_vcap_validate_keyset(struct net_device *dev,
if (!kslist || kslist->cnt == 0)
return VCAP_KFS_NO_VALUE;
- lookup = lan966x_vcap_cid_to_lookup(rule->vcap_chain_id);
keysetlist.max = ARRAY_SIZE(keysets);
keysetlist.keysets = keysets;
- err = lan966x_vcap_is2_get_port_keysets(dev, lookup, &keysetlist,
- l3_proto);
+
+ switch (admin->vtype) {
+ case VCAP_TYPE_IS1:
+ lookup = lan966x_vcap_is1_cid_to_lookup(rule->vcap_chain_id);
+ err = lan966x_vcap_is1_get_port_keysets(dev, lookup, &keysetlist,
+ l3_proto);
+ break;
+ case VCAP_TYPE_IS2:
+ lookup = lan966x_vcap_is2_cid_to_lookup(rule->vcap_chain_id);
+ err = lan966x_vcap_is2_get_port_keysets(dev, lookup, &keysetlist,
+ l3_proto);
+ break;
+ default:
+ pr_err("vcap type: %s not supported\n",
+ lan966x_vcaps[admin->vtype].name);
+ return VCAP_KFS_NO_VALUE;
+ }
+
if (err)
return VCAP_KFS_NO_VALUE;
@@ -197,17 +297,32 @@ lan966x_vcap_validate_keyset(struct net_device *dev,
return VCAP_KFS_NO_VALUE;
}
-static bool lan966x_vcap_is_first_chain(struct vcap_rule *rule)
+static bool lan966x_vcap_is2_is_first_chain(struct vcap_rule *rule)
{
return (rule->vcap_chain_id >= LAN966X_VCAP_CID_IS2_L0 &&
rule->vcap_chain_id < LAN966X_VCAP_CID_IS2_L1);
}
-static void lan966x_vcap_add_default_fields(struct net_device *dev,
- struct vcap_admin *admin,
- struct vcap_rule *rule)
+static void lan966x_vcap_is1_add_default_fields(struct lan966x_port *port,
+ struct vcap_admin *admin,
+ struct vcap_rule *rule)
+{
+ u32 value, mask;
+ u32 lookup;
+
+ if (vcap_rule_get_key_u32(rule, VCAP_KF_IF_IGR_PORT_MASK,
+ &value, &mask))
+ vcap_rule_add_key_u32(rule, VCAP_KF_IF_IGR_PORT_MASK, 0,
+ ~BIT(port->chip_port));
+
+ lookup = lan966x_vcap_is1_cid_to_lookup(rule->vcap_chain_id);
+ vcap_rule_add_key_u32(rule, VCAP_KF_LOOKUP_INDEX, lookup, 0x3);
+}
+
+static void lan966x_vcap_is2_add_default_fields(struct lan966x_port *port,
+ struct vcap_admin *admin,
+ struct vcap_rule *rule)
{
- struct lan966x_port *port = netdev_priv(dev);
u32 value, mask;
if (vcap_rule_get_key_u32(rule, VCAP_KF_IF_IGR_PORT_MASK,
@@ -215,7 +330,7 @@ static void lan966x_vcap_add_default_fields(struct net_device *dev,
vcap_rule_add_key_u32(rule, VCAP_KF_IF_IGR_PORT_MASK, 0,
~BIT(port->chip_port));
- if (lan966x_vcap_is_first_chain(rule))
+ if (lan966x_vcap_is2_is_first_chain(rule))
vcap_rule_add_key_bit(rule, VCAP_KF_LOOKUP_FIRST_IS,
VCAP_BIT_1);
else
@@ -223,6 +338,26 @@ static void lan966x_vcap_add_default_fields(struct net_device *dev,
VCAP_BIT_0);
}
+static void lan966x_vcap_add_default_fields(struct net_device *dev,
+ struct vcap_admin *admin,
+ struct vcap_rule *rule)
+{
+ struct lan966x_port *port = netdev_priv(dev);
+
+ switch (admin->vtype) {
+ case VCAP_TYPE_IS1:
+ lan966x_vcap_is1_add_default_fields(port, admin, rule);
+ break;
+ case VCAP_TYPE_IS2:
+ lan966x_vcap_is2_add_default_fields(port, admin, rule);
+ break;
+ default:
+ pr_err("vcap type: %s not supported\n",
+ lan966x_vcaps[admin->vtype].name);
+ break;
+ }
+}
+
static void lan966x_vcap_cache_erase(struct vcap_admin *admin)
{
memset(admin->cache.keystream, 0, STREAMSIZE);
@@ -464,8 +599,37 @@ static void lan966x_vcap_block_init(struct lan966x *lan966x,
static void lan966x_vcap_port_key_deselection(struct lan966x *lan966x,
struct vcap_admin *admin)
{
- for (int p = 0; p < lan966x->num_phys_ports; ++p)
- lan_wr(0, lan966x, ANA_VCAP_S2_CFG(p));
+ u32 val;
+
+ switch (admin->vtype) {
+ case VCAP_TYPE_IS1:
+ val = ANA_VCAP_S1_CFG_KEY_IP6_CFG_SET(VCAP_IS1_PS_IPV6_5TUPLE_IP6) |
+ ANA_VCAP_S1_CFG_KEY_IP4_CFG_SET(VCAP_IS1_PS_IPV4_5TUPLE_IP4) |
+ ANA_VCAP_S1_CFG_KEY_OTHER_CFG_SET(VCAP_IS1_PS_OTHER_NORMAL);
+
+ for (int p = 0; p < lan966x->num_phys_ports; ++p) {
+ if (!lan966x->ports[p])
+ continue;
+
+ for (int l = 0; l < LAN966X_IS1_LOOKUPS; ++l)
+ lan_wr(val, lan966x, ANA_VCAP_S1_CFG(p, l));
+
+ lan_rmw(ANA_VCAP_CFG_S1_ENA_SET(true),
+ ANA_VCAP_CFG_S1_ENA, lan966x,
+ ANA_VCAP_CFG(p));
+ }
+
+ break;
+ case VCAP_TYPE_IS2:
+ for (int p = 0; p < lan966x->num_phys_ports; ++p)
+ lan_wr(0, lan966x, ANA_VCAP_S2_CFG(p));
+
+ break;
+ default:
+ pr_err("vcap type: %s not supported\n",
+ lan966x_vcaps[admin->vtype].name);
+ break;
+ }
}
int lan966x_vcap_init(struct lan966x *lan966x)
@@ -506,6 +670,10 @@ int lan966x_vcap_init(struct lan966x *lan966x)
lan_rmw(ANA_VCAP_S2_CFG_ENA_SET(true),
ANA_VCAP_S2_CFG_ENA, lan966x,
ANA_VCAP_S2_CFG(lan966x->ports[p]->chip_port));
+
+ lan_rmw(ANA_VCAP_CFG_S1_ENA_SET(true),
+ ANA_VCAP_CFG_S1_ENA, lan966x,
+ ANA_VCAP_CFG(lan966x->ports[p]->chip_port));
}
}
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
index 42b77ba9b572..a7edf524eedb 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c
@@ -282,6 +282,7 @@ static int sparx5_create_port(struct sparx5 *sparx5,
spx5_port->phylink_pcs.poll = true;
spx5_port->phylink_pcs.ops = &sparx5_phylink_pcs_ops;
spx5_port->is_mrouter = false;
+ INIT_LIST_HEAD(&spx5_port->tc_templates);
sparx5->ports[config->portno] = spx5_port;
err = sparx5_port_init(sparx5, spx5_port, &config->conf);
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h
index 72e7928912eb..62c85463b634 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h
@@ -192,6 +192,7 @@ struct sparx5_port {
u16 ts_id;
struct sk_buff_head tx_skbs;
bool is_mrouter;
+ struct list_head tc_templates; /* list of TC templates on this port */
};
enum sparx5_core_clockfreq {
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c b/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c
index b36819aafaca..3f87a5285a6d 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c
@@ -28,6 +28,14 @@ struct sparx5_multiple_rules {
struct sparx5_wildcard_rule rule[SPX5_MAX_RULE_SIZE];
};
+struct sparx5_tc_flower_template {
+ struct list_head list; /* for insertion in the list of templates */
+ int cid; /* chain id */
+ enum vcap_keyfield_set orig; /* keyset used before the template */
+ enum vcap_keyfield_set keyset; /* new keyset used by template */
+ u16 l3_proto; /* protocol specified in the template */
+};
+
static int
sparx5_tc_flower_es0_tpid(struct vcap_tc_flower_parse_usage *st)
{
@@ -382,7 +390,7 @@ static int sparx5_tc_select_protocol_keyset(struct net_device *ndev,
/* Find the keysets that the rule can use */
matches.keysets = keysets;
matches.max = ARRAY_SIZE(keysets);
- if (vcap_rule_find_keysets(vrule, &matches) == 0)
+ if (!vcap_rule_find_keysets(vrule, &matches))
return -EINVAL;
/* Find the keysets that the port configuration supports */
@@ -996,6 +1004,73 @@ static int sparx5_tc_action_vlan_push(struct vcap_admin *admin,
return err;
}
+/* Remove rule keys that may prevent templates from matching a keyset */
+static void sparx5_tc_flower_simplify_rule(struct vcap_admin *admin,
+ struct vcap_rule *vrule,
+ u16 l3_proto)
+{
+ switch (admin->vtype) {
+ case VCAP_TYPE_IS0:
+ vcap_rule_rem_key(vrule, VCAP_KF_ETYPE);
+ switch (l3_proto) {
+ case ETH_P_IP:
+ break;
+ case ETH_P_IPV6:
+ vcap_rule_rem_key(vrule, VCAP_KF_IP_SNAP_IS);
+ break;
+ default:
+ break;
+ }
+ break;
+ case VCAP_TYPE_ES2:
+ switch (l3_proto) {
+ case ETH_P_IP:
+ if (vrule->keyset == VCAP_KFS_IP4_OTHER)
+ vcap_rule_rem_key(vrule, VCAP_KF_TCP_IS);
+ break;
+ case ETH_P_IPV6:
+ if (vrule->keyset == VCAP_KFS_IP6_STD)
+ vcap_rule_rem_key(vrule, VCAP_KF_TCP_IS);
+ vcap_rule_rem_key(vrule, VCAP_KF_IP4_IS);
+ break;
+ default:
+ break;
+ }
+ break;
+ case VCAP_TYPE_IS2:
+ switch (l3_proto) {
+ case ETH_P_IP:
+ case ETH_P_IPV6:
+ vcap_rule_rem_key(vrule, VCAP_KF_IP4_IS);
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+static bool sparx5_tc_flower_use_template(struct net_device *ndev,
+ struct flow_cls_offload *fco,
+ struct vcap_admin *admin,
+ struct vcap_rule *vrule)
+{
+ struct sparx5_port *port = netdev_priv(ndev);
+ struct sparx5_tc_flower_template *ftp;
+
+ list_for_each_entry(ftp, &port->tc_templates, list) {
+ if (ftp->cid != fco->common.chain_index)
+ continue;
+
+ vcap_set_rule_set_keyset(vrule, ftp->keyset);
+ sparx5_tc_flower_simplify_rule(admin, vrule, ftp->l3_proto);
+ return true;
+ }
+ return false;
+}
+
static int sparx5_tc_flower_replace(struct net_device *ndev,
struct flow_cls_offload *fco,
struct vcap_admin *admin,
@@ -1122,12 +1197,14 @@ static int sparx5_tc_flower_replace(struct net_device *ndev,
goto out;
}
- err = sparx5_tc_select_protocol_keyset(ndev, vrule, admin,
- state.l3_proto, &multi);
- if (err) {
- NL_SET_ERR_MSG_MOD(fco->common.extack,
- "No matching port keyset for filter protocol and keys");
- goto out;
+ if (!sparx5_tc_flower_use_template(ndev, fco, admin, vrule)) {
+ err = sparx5_tc_select_protocol_keyset(ndev, vrule, admin,
+ state.l3_proto, &multi);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(fco->common.extack,
+ "No matching port keyset for filter protocol and keys");
+ goto out;
+ }
}
/* provide the l3 protocol to guide the keyset selection */
@@ -1259,6 +1336,120 @@ static int sparx5_tc_flower_stats(struct net_device *ndev,
return err;
}
+static int sparx5_tc_flower_template_create(struct net_device *ndev,
+ struct flow_cls_offload *fco,
+ struct vcap_admin *admin)
+{
+ struct sparx5_port *port = netdev_priv(ndev);
+ struct vcap_tc_flower_parse_usage state = {
+ .fco = fco,
+ .l3_proto = ETH_P_ALL,
+ .admin = admin,
+ };
+ struct sparx5_tc_flower_template *ftp;
+ struct vcap_keyset_list kslist = {};
+ enum vcap_keyfield_set keysets[10];
+ struct vcap_control *vctrl;
+ struct vcap_rule *vrule;
+ int count, err;
+
+ if (admin->vtype == VCAP_TYPE_ES0) {
+ pr_err("%s:%d: %s\n", __func__, __LINE__,
+ "VCAP does not support templates");
+ return -EINVAL;
+ }
+
+ count = vcap_admin_rule_count(admin, fco->common.chain_index);
+ if (count > 0) {
+ pr_err("%s:%d: %s\n", __func__, __LINE__,
+ "Filters are already present");
+ return -EBUSY;
+ }
+
+ ftp = kzalloc(sizeof(*ftp), GFP_KERNEL);
+ if (!ftp)
+ return -ENOMEM;
+
+ ftp->cid = fco->common.chain_index;
+ ftp->orig = VCAP_KFS_NO_VALUE;
+ ftp->keyset = VCAP_KFS_NO_VALUE;
+
+ vctrl = port->sparx5->vcap_ctrl;
+ vrule = vcap_alloc_rule(vctrl, ndev, fco->common.chain_index,
+ VCAP_USER_TC, fco->common.prio, 0);
+ if (IS_ERR(vrule)) {
+ err = PTR_ERR(vrule);
+ goto err_rule;
+ }
+
+ state.vrule = vrule;
+ state.frule = flow_cls_offload_flow_rule(fco);
+ err = sparx5_tc_use_dissectors(&state, admin, vrule);
+ if (err) {
+ pr_err("%s:%d: key error: %d\n", __func__, __LINE__, err);
+ goto out;
+ }
+
+ ftp->l3_proto = state.l3_proto;
+
+ sparx5_tc_flower_simplify_rule(admin, vrule, state.l3_proto);
+
+ /* Find the keysets that the rule can use */
+ kslist.keysets = keysets;
+ kslist.max = ARRAY_SIZE(keysets);
+ if (!vcap_rule_find_keysets(vrule, &kslist)) {
+ pr_err("%s:%d: %s\n", __func__, __LINE__,
+ "Could not find a suitable keyset");
+ err = -ENOENT;
+ goto out;
+ }
+
+ ftp->keyset = vcap_select_min_rule_keyset(vctrl, admin->vtype, &kslist);
+ kslist.cnt = 0;
+ sparx5_vcap_set_port_keyset(ndev, admin, fco->common.chain_index,
+ state.l3_proto,
+ ftp->keyset,
+ &kslist);
+
+ if (kslist.cnt > 0)
+ ftp->orig = kslist.keysets[0];
+
+ /* Store new template */
+ list_add_tail(&ftp->list, &port->tc_templates);
+ vcap_free_rule(vrule);
+ return 0;
+
+out:
+ vcap_free_rule(vrule);
+err_rule:
+ kfree(ftp);
+ return err;
+}
+
+static int sparx5_tc_flower_template_destroy(struct net_device *ndev,
+ struct flow_cls_offload *fco,
+ struct vcap_admin *admin)
+{
+ struct sparx5_port *port = netdev_priv(ndev);
+ struct sparx5_tc_flower_template *ftp, *tmp;
+ int err = -ENOENT;
+
+ /* Rules using the template are removed by the tc framework */
+ list_for_each_entry_safe(ftp, tmp, &port->tc_templates, list) {
+ if (ftp->cid != fco->common.chain_index)
+ continue;
+
+ sparx5_vcap_set_port_keyset(ndev, admin,
+ fco->common.chain_index,
+ ftp->l3_proto, ftp->orig,
+ NULL);
+ list_del(&ftp->list);
+ kfree(ftp);
+ break;
+ }
+ return err;
+}
+
int sparx5_tc_flower(struct net_device *ndev, struct flow_cls_offload *fco,
bool ingress)
{
@@ -1282,6 +1473,10 @@ int sparx5_tc_flower(struct net_device *ndev, struct flow_cls_offload *fco,
return sparx5_tc_flower_destroy(ndev, fco, admin);
case FLOW_CLS_STATS:
return sparx5_tc_flower_stats(ndev, fco, admin);
+ case FLOW_CLS_TMPLT_CREATE:
+ return sparx5_tc_flower_template_create(ndev, fco, admin);
+ case FLOW_CLS_TMPLT_DESTROY:
+ return sparx5_tc_flower_template_destroy(ndev, fco, admin);
default:
return -EOPNOTSUPP;
}
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_vcap_debugfs.c b/drivers/net/ethernet/microchip/sparx5/sparx5_vcap_debugfs.c
index 07b472c84a47..12722f728ef7 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_vcap_debugfs.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_vcap_debugfs.c
@@ -198,7 +198,7 @@ static void sparx5_vcap_is2_port_keys(struct sparx5 *sparx5,
out->prf(out->dst, "ip6_std");
break;
case VCAP_IS2_PS_IPV6_MC_IP4_TCP_UDP_OTHER:
- out->prf(out->dst, "ip4_tcp_udp ipv4_other");
+ out->prf(out->dst, "ip4_tcp_udp ip4_other");
break;
}
out->prf(out->dst, "\n ipv6_uc: ");
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_vcap_impl.c b/drivers/net/ethernet/microchip/sparx5/sparx5_vcap_impl.c
index d0d4e0385ac7..187efa1fc904 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_vcap_impl.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_vcap_impl.c
@@ -1519,6 +1519,276 @@ static struct vcap_operations sparx5_vcap_ops = {
.port_info = sparx5_port_info,
};
+static u32 sparx5_vcap_is0_keyset_to_etype_ps(enum vcap_keyfield_set keyset)
+{
+ switch (keyset) {
+ case VCAP_KFS_NORMAL_7TUPLE:
+ return VCAP_IS0_PS_ETYPE_NORMAL_7TUPLE;
+ case VCAP_KFS_NORMAL_5TUPLE_IP4:
+ return VCAP_IS0_PS_ETYPE_NORMAL_5TUPLE_IP4;
+ default:
+ return VCAP_IS0_PS_ETYPE_NORMAL_7TUPLE;
+ }
+}
+
+static void sparx5_vcap_is0_set_port_keyset(struct net_device *ndev, int lookup,
+ enum vcap_keyfield_set keyset,
+ int l3_proto)
+{
+ struct sparx5_port *port = netdev_priv(ndev);
+ struct sparx5 *sparx5 = port->sparx5;
+ int portno = port->portno;
+ u32 value;
+
+ switch (l3_proto) {
+ case ETH_P_IP:
+ value = sparx5_vcap_is0_keyset_to_etype_ps(keyset);
+ spx5_rmw(ANA_CL_ADV_CL_CFG_IP4_CLM_KEY_SEL_SET(value),
+ ANA_CL_ADV_CL_CFG_IP4_CLM_KEY_SEL,
+ sparx5,
+ ANA_CL_ADV_CL_CFG(portno, lookup));
+ break;
+ case ETH_P_IPV6:
+ value = sparx5_vcap_is0_keyset_to_etype_ps(keyset);
+ spx5_rmw(ANA_CL_ADV_CL_CFG_IP6_CLM_KEY_SEL_SET(value),
+ ANA_CL_ADV_CL_CFG_IP6_CLM_KEY_SEL,
+ sparx5,
+ ANA_CL_ADV_CL_CFG(portno, lookup));
+ break;
+ default:
+ value = sparx5_vcap_is0_keyset_to_etype_ps(keyset);
+ spx5_rmw(ANA_CL_ADV_CL_CFG_ETYPE_CLM_KEY_SEL_SET(value),
+ ANA_CL_ADV_CL_CFG_ETYPE_CLM_KEY_SEL,
+ sparx5,
+ ANA_CL_ADV_CL_CFG(portno, lookup));
+ break;
+ }
+}
+
+static u32 sparx5_vcap_is2_keyset_to_arp_ps(enum vcap_keyfield_set keyset)
+{
+ switch (keyset) {
+ case VCAP_KFS_ARP:
+ return VCAP_IS2_PS_ARP_ARP;
+ default:
+ return VCAP_IS2_PS_ARP_MAC_ETYPE;
+ }
+}
+
+static u32 sparx5_vcap_is2_keyset_to_ipv4_ps(enum vcap_keyfield_set keyset)
+{
+ switch (keyset) {
+ case VCAP_KFS_MAC_ETYPE:
+ return VCAP_IS2_PS_IPV4_UC_MAC_ETYPE;
+ case VCAP_KFS_IP4_OTHER:
+ case VCAP_KFS_IP4_TCP_UDP:
+ return VCAP_IS2_PS_IPV4_UC_IP4_TCP_UDP_OTHER;
+ case VCAP_KFS_IP_7TUPLE:
+ return VCAP_IS2_PS_IPV4_UC_IP_7TUPLE;
+ default:
+ return VCAP_KFS_NO_VALUE;
+ }
+}
+
+static u32 sparx5_vcap_is2_keyset_to_ipv6_uc_ps(enum vcap_keyfield_set keyset)
+{
+ switch (keyset) {
+ case VCAP_KFS_MAC_ETYPE:
+ return VCAP_IS2_PS_IPV6_UC_MAC_ETYPE;
+ case VCAP_KFS_IP4_OTHER:
+ case VCAP_KFS_IP4_TCP_UDP:
+ return VCAP_IS2_PS_IPV6_UC_IP4_TCP_UDP_OTHER;
+ case VCAP_KFS_IP_7TUPLE:
+ return VCAP_IS2_PS_IPV6_UC_IP_7TUPLE;
+ default:
+ return VCAP_KFS_NO_VALUE;
+ }
+}
+
+static u32 sparx5_vcap_is2_keyset_to_ipv6_mc_ps(enum vcap_keyfield_set keyset)
+{
+ switch (keyset) {
+ case VCAP_KFS_MAC_ETYPE:
+ return VCAP_IS2_PS_IPV6_MC_MAC_ETYPE;
+ case VCAP_KFS_IP4_OTHER:
+ case VCAP_KFS_IP4_TCP_UDP:
+ return VCAP_IS2_PS_IPV6_MC_IP4_TCP_UDP_OTHER;
+ case VCAP_KFS_IP_7TUPLE:
+ return VCAP_IS2_PS_IPV6_MC_IP_7TUPLE;
+ default:
+ return VCAP_KFS_NO_VALUE;
+ }
+}
+
+static void sparx5_vcap_is2_set_port_keyset(struct net_device *ndev, int lookup,
+ enum vcap_keyfield_set keyset,
+ int l3_proto)
+{
+ struct sparx5_port *port = netdev_priv(ndev);
+ struct sparx5 *sparx5 = port->sparx5;
+ int portno = port->portno;
+ u32 value;
+
+ switch (l3_proto) {
+ case ETH_P_ARP:
+ value = sparx5_vcap_is2_keyset_to_arp_ps(keyset);
+ spx5_rmw(ANA_ACL_VCAP_S2_KEY_SEL_ARP_KEY_SEL_SET(value),
+ ANA_ACL_VCAP_S2_KEY_SEL_ARP_KEY_SEL,
+ sparx5,
+ ANA_ACL_VCAP_S2_KEY_SEL(portno, lookup));
+ break;
+ case ETH_P_IP:
+ value = sparx5_vcap_is2_keyset_to_ipv4_ps(keyset);
+ spx5_rmw(ANA_ACL_VCAP_S2_KEY_SEL_IP4_UC_KEY_SEL_SET(value),
+ ANA_ACL_VCAP_S2_KEY_SEL_IP4_UC_KEY_SEL,
+ sparx5,
+ ANA_ACL_VCAP_S2_KEY_SEL(portno, lookup));
+ spx5_rmw(ANA_ACL_VCAP_S2_KEY_SEL_IP4_MC_KEY_SEL_SET(value),
+ ANA_ACL_VCAP_S2_KEY_SEL_IP4_MC_KEY_SEL,
+ sparx5,
+ ANA_ACL_VCAP_S2_KEY_SEL(portno, lookup));
+ break;
+ case ETH_P_IPV6:
+ value = sparx5_vcap_is2_keyset_to_ipv6_uc_ps(keyset);
+ spx5_rmw(ANA_ACL_VCAP_S2_KEY_SEL_IP6_UC_KEY_SEL_SET(value),
+ ANA_ACL_VCAP_S2_KEY_SEL_IP6_UC_KEY_SEL,
+ sparx5,
+ ANA_ACL_VCAP_S2_KEY_SEL(portno, lookup));
+ value = sparx5_vcap_is2_keyset_to_ipv6_mc_ps(keyset);
+ spx5_rmw(ANA_ACL_VCAP_S2_KEY_SEL_IP6_MC_KEY_SEL_SET(value),
+ ANA_ACL_VCAP_S2_KEY_SEL_IP6_MC_KEY_SEL,
+ sparx5,
+ ANA_ACL_VCAP_S2_KEY_SEL(portno, lookup));
+ break;
+ default:
+ value = VCAP_IS2_PS_NONETH_MAC_ETYPE;
+ spx5_rmw(ANA_ACL_VCAP_S2_KEY_SEL_NON_ETH_KEY_SEL_SET(value),
+ ANA_ACL_VCAP_S2_KEY_SEL_NON_ETH_KEY_SEL,
+ sparx5,
+ ANA_ACL_VCAP_S2_KEY_SEL(portno, lookup));
+ break;
+ }
+}
+
+static u32 sparx5_vcap_es2_keyset_to_arp_ps(enum vcap_keyfield_set keyset)
+{
+ switch (keyset) {
+ case VCAP_KFS_ARP:
+ return VCAP_ES2_PS_ARP_ARP;
+ default:
+ return VCAP_ES2_PS_ARP_MAC_ETYPE;
+ }
+}
+
+static u32 sparx5_vcap_es2_keyset_to_ipv4_ps(enum vcap_keyfield_set keyset)
+{
+ switch (keyset) {
+ case VCAP_KFS_MAC_ETYPE:
+ return VCAP_ES2_PS_IPV4_MAC_ETYPE;
+ case VCAP_KFS_IP_7TUPLE:
+ return VCAP_ES2_PS_IPV4_IP_7TUPLE;
+ case VCAP_KFS_IP4_TCP_UDP:
+ return VCAP_ES2_PS_IPV4_IP4_TCP_UDP_OTHER;
+ case VCAP_KFS_IP4_OTHER:
+ return VCAP_ES2_PS_IPV4_IP4_OTHER;
+ default:
+ return VCAP_ES2_PS_IPV4_MAC_ETYPE;
+ }
+}
+
+static u32 sparx5_vcap_es2_keyset_to_ipv6_ps(enum vcap_keyfield_set keyset)
+{
+ switch (keyset) {
+ case VCAP_KFS_MAC_ETYPE:
+ return VCAP_ES2_PS_IPV6_MAC_ETYPE;
+ case VCAP_KFS_IP4_TCP_UDP:
+ case VCAP_KFS_IP4_OTHER:
+ return VCAP_ES2_PS_IPV6_IP4_DOWNGRADE;
+ case VCAP_KFS_IP_7TUPLE:
+ return VCAP_ES2_PS_IPV6_IP_7TUPLE;
+ case VCAP_KFS_IP6_STD:
+ return VCAP_ES2_PS_IPV6_IP6_STD;
+ default:
+ return VCAP_ES2_PS_IPV6_MAC_ETYPE;
+ }
+}
+
+static void sparx5_vcap_es2_set_port_keyset(struct net_device *ndev, int lookup,
+ enum vcap_keyfield_set keyset,
+ int l3_proto)
+{
+ struct sparx5_port *port = netdev_priv(ndev);
+ struct sparx5 *sparx5 = port->sparx5;
+ int portno = port->portno;
+ u32 value;
+
+ switch (l3_proto) {
+ case ETH_P_IP:
+ value = sparx5_vcap_es2_keyset_to_ipv4_ps(keyset);
+ spx5_rmw(EACL_VCAP_ES2_KEY_SEL_IP4_KEY_SEL_SET(value),
+ EACL_VCAP_ES2_KEY_SEL_IP4_KEY_SEL,
+ sparx5,
+ EACL_VCAP_ES2_KEY_SEL(portno, lookup));
+ break;
+ case ETH_P_IPV6:
+ value = sparx5_vcap_es2_keyset_to_ipv6_ps(keyset);
+ spx5_rmw(EACL_VCAP_ES2_KEY_SEL_IP6_KEY_SEL_SET(value),
+ EACL_VCAP_ES2_KEY_SEL_IP6_KEY_SEL,
+ sparx5,
+ EACL_VCAP_ES2_KEY_SEL(portno, lookup));
+ break;
+ case ETH_P_ARP:
+ value = sparx5_vcap_es2_keyset_to_arp_ps(keyset);
+ spx5_rmw(EACL_VCAP_ES2_KEY_SEL_ARP_KEY_SEL_SET(value),
+ EACL_VCAP_ES2_KEY_SEL_ARP_KEY_SEL,
+ sparx5,
+ EACL_VCAP_ES2_KEY_SEL(portno, lookup));
+ break;
+ }
+}
+
+/* Change the port keyset for the lookup and protocol */
+void sparx5_vcap_set_port_keyset(struct net_device *ndev,
+ struct vcap_admin *admin,
+ int cid,
+ u16 l3_proto,
+ enum vcap_keyfield_set keyset,
+ struct vcap_keyset_list *orig)
+{
+ struct sparx5_port *port;
+ int lookup;
+
+ switch (admin->vtype) {
+ case VCAP_TYPE_IS0:
+ lookup = sparx5_vcap_is0_cid_to_lookup(cid);
+ if (orig)
+ sparx5_vcap_is0_get_port_keysets(ndev, lookup, orig,
+ l3_proto);
+ sparx5_vcap_is0_set_port_keyset(ndev, lookup, keyset, l3_proto);
+ break;
+ case VCAP_TYPE_IS2:
+ lookup = sparx5_vcap_is2_cid_to_lookup(cid);
+ if (orig)
+ sparx5_vcap_is2_get_port_keysets(ndev, lookup, orig,
+ l3_proto);
+ sparx5_vcap_is2_set_port_keyset(ndev, lookup, keyset, l3_proto);
+ break;
+ case VCAP_TYPE_ES0:
+ break;
+ case VCAP_TYPE_ES2:
+ lookup = sparx5_vcap_es2_cid_to_lookup(cid);
+ if (orig)
+ sparx5_vcap_es2_get_port_keysets(ndev, lookup, orig,
+ l3_proto);
+ sparx5_vcap_es2_set_port_keyset(ndev, lookup, keyset, l3_proto);
+ break;
+ default:
+ port = netdev_priv(ndev);
+ sparx5_vcap_type_err(port->sparx5, admin, __func__);
+ break;
+ }
+}
+
/* Enable IS0 lookups per port and set the keyset generation */
static void sparx5_vcap_is0_port_key_selection(struct sparx5 *sparx5,
struct vcap_admin *admin)
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_vcap_impl.h b/drivers/net/ethernet/microchip/sparx5/sparx5_vcap_impl.h
index 3260ab5e3a82..2684d9199b05 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_vcap_impl.h
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_vcap_impl.h
@@ -195,6 +195,12 @@ int sparx5_vcap_get_port_keyset(struct net_device *ndev,
u16 l3_proto,
struct vcap_keyset_list *kslist);
+/* Change the port keyset for the lookup and protocol */
+void sparx5_vcap_set_port_keyset(struct net_device *ndev,
+ struct vcap_admin *admin, int cid,
+ u16 l3_proto, enum vcap_keyfield_set keyset,
+ struct vcap_keyset_list *orig);
+
/* Check if the ethertype is supported by the vcap port classification */
bool sparx5_vcap_is_known_etype(struct vcap_admin *admin, u16 etype);
diff --git a/drivers/net/ethernet/microchip/vcap/vcap_ag_api.h b/drivers/net/ethernet/microchip/vcap/vcap_ag_api.h
index 0844fcaeee68..a556c4419986 100644
--- a/drivers/net/ethernet/microchip/vcap/vcap_ag_api.h
+++ b/drivers/net/ethernet/microchip/vcap/vcap_ag_api.h
@@ -3,8 +3,8 @@
* Microchip VCAP API
*/
-/* This file is autogenerated by cml-utils 2023-02-10 11:15:56 +0100.
- * Commit ID: c30fb4bf0281cd4a7133bdab6682f9e43c872ada
+/* This file is autogenerated by cml-utils 2023-02-16 11:41:14 +0100.
+ * Commit ID: be85f176b3a151fa748dcaf97c8824a5c2e065f3
*/
#ifndef __VCAP_AG_API__
@@ -14,6 +14,7 @@ enum vcap_type {
VCAP_TYPE_ES0,
VCAP_TYPE_ES2,
VCAP_TYPE_IS0,
+ VCAP_TYPE_IS1,
VCAP_TYPE_IS2,
VCAP_TYPE_MAX
};
@@ -21,7 +22,12 @@ enum vcap_type {
/* Keyfieldset names with origin information */
enum vcap_keyfield_set {
VCAP_KFS_NO_VALUE, /* initial value */
+ VCAP_KFS_5TUPLE_IP4, /* lan966x is1 X2 */
+ VCAP_KFS_5TUPLE_IP6, /* lan966x is1 X4 */
+ VCAP_KFS_7TUPLE, /* lan966x is1 X4 */
VCAP_KFS_ARP, /* sparx5 is2 X6, sparx5 es2 X6, lan966x is2 X2 */
+ VCAP_KFS_DBL_VID, /* lan966x is1 X1 */
+ VCAP_KFS_DMAC_VID, /* lan966x is1 X1 */
VCAP_KFS_ETAG, /* sparx5 is0 X2 */
VCAP_KFS_IP4_OTHER, /* sparx5 is2 X6, sparx5 es2 X6, lan966x is2 X2 */
VCAP_KFS_IP4_TCP_UDP, /* sparx5 is2 X6, sparx5 es2 X6, lan966x is2 X2 */
@@ -36,10 +42,13 @@ enum vcap_keyfield_set {
VCAP_KFS_MAC_ETYPE, /* sparx5 is2 X6, sparx5 es2 X6, lan966x is2 X2 */
VCAP_KFS_MAC_LLC, /* lan966x is2 X2 */
VCAP_KFS_MAC_SNAP, /* lan966x is2 X2 */
+ VCAP_KFS_NORMAL, /* lan966x is1 X2 */
VCAP_KFS_NORMAL_5TUPLE_IP4, /* sparx5 is0 X6 */
VCAP_KFS_NORMAL_7TUPLE, /* sparx5 is0 X12 */
+ VCAP_KFS_NORMAL_IP6, /* lan966x is1 X4 */
VCAP_KFS_OAM, /* lan966x is2 X2 */
VCAP_KFS_PURE_5TUPLE_IP4, /* sparx5 is0 X3 */
+ VCAP_KFS_RT, /* lan966x is1 X1 */
VCAP_KFS_SMAC_SIP4, /* lan966x is2 X1 */
VCAP_KFS_SMAC_SIP6, /* lan966x is2 X2 */
};
@@ -61,17 +70,20 @@ enum vcap_keyfield_set {
* Used by 802.1BR Bridge Port Extension in an E-Tag
* VCAP_KF_8021BR_IGR_ECID_EXT: W8, sparx5: is0
* Used by 802.1BR Bridge Port Extension in an E-Tag
- * VCAP_KF_8021Q_DEI0: W1, sparx5: is0
+ * VCAP_KF_8021CB_R_TAGGED_IS: W1, lan966x: is1
+ * Set if frame contains an RTAG: IEEE 802.1CB (FRER Redundancy tag, Ethertype
+ * 0xf1c1)
+ * VCAP_KF_8021Q_DEI0: W1, sparx5: is0, lan966x: is1
* First DEI in multiple vlan tags (outer tag or default port tag)
- * VCAP_KF_8021Q_DEI1: W1, sparx5: is0
+ * VCAP_KF_8021Q_DEI1: W1, sparx5: is0, lan966x: is1
* Second DEI in multiple vlan tags (inner tag)
* VCAP_KF_8021Q_DEI2: W1, sparx5: is0
* Third DEI in multiple vlan tags (not always available)
* VCAP_KF_8021Q_DEI_CLS: W1, sparx5: is2/es2, lan966x: is2
* Classified DEI
- * VCAP_KF_8021Q_PCP0: W3, sparx5: is0
+ * VCAP_KF_8021Q_PCP0: W3, sparx5: is0, lan966x: is1
* First PCP in multiple vlan tags (outer tag or default port tag)
- * VCAP_KF_8021Q_PCP1: W3, sparx5: is0
+ * VCAP_KF_8021Q_PCP1: W3, sparx5: is0, lan966x: is1
* Second PCP in multiple vlan tags (inner tag)
* VCAP_KF_8021Q_PCP2: W3, sparx5: is0
* Third PCP in multiple vlan tags (not always available)
@@ -79,22 +91,24 @@ enum vcap_keyfield_set {
* Classified PCP
* VCAP_KF_8021Q_TPID: W3, sparx5: es0
* TPID for outer tag: 0: Customer TPID 1: Service TPID (88A8 or programmable)
- * VCAP_KF_8021Q_TPID0: W3, sparx5: is0
+ * VCAP_KF_8021Q_TPID0: sparx5 is0 W3, lan966x is1 W1
* First TPIC in multiple vlan tags (outer tag or default port tag)
- * VCAP_KF_8021Q_TPID1: W3, sparx5: is0
+ * VCAP_KF_8021Q_TPID1: sparx5 is0 W3, lan966x is1 W1
* Second TPID in multiple vlan tags (inner tag)
* VCAP_KF_8021Q_TPID2: W3, sparx5: is0
* Third TPID in multiple vlan tags (not always available)
- * VCAP_KF_8021Q_VID0: W12, sparx5: is0
+ * VCAP_KF_8021Q_VID0: W12, sparx5: is0, lan966x: is1
* First VID in multiple vlan tags (outer tag or default port tag)
- * VCAP_KF_8021Q_VID1: W12, sparx5: is0
+ * VCAP_KF_8021Q_VID1: W12, sparx5: is0, lan966x: is1
* Second VID in multiple vlan tags (inner tag)
* VCAP_KF_8021Q_VID2: W12, sparx5: is0
* Third VID in multiple vlan tags (not always available)
* VCAP_KF_8021Q_VID_CLS: sparx5 is2 W13, sparx5 es0 W13, sparx5 es2 W13,
* lan966x is2 W12
* Classified VID
- * VCAP_KF_8021Q_VLAN_TAGGED_IS: W1, sparx5: is2/es2, lan966x: is2
+ * VCAP_KF_8021Q_VLAN_DBL_TAGGED_IS: W1, lan966x: is1
+ * Set if frame has two or more Q-tags. Independent of port VLAN awareness
+ * VCAP_KF_8021Q_VLAN_TAGGED_IS: W1, sparx5: is2/es2, lan966x: is1/is2
* Sparx5: Set if frame was received with a VLAN tag, LAN966x: Set if frame has
* one or more Q-tags. Independent of port VLAN awareness
* VCAP_KF_8021Q_VLAN_TAGS: W3, sparx5: is0
@@ -120,9 +134,9 @@ enum vcap_keyfield_set {
* Class of service
* VCAP_KF_ES0_ISDX_KEY_ENA: W1, sparx5: es2
* The value taken from the IFH .FWD.ES0_ISDX_KEY_ENA
- * VCAP_KF_ETYPE: W16, sparx5: is0/is2/es2, lan966x: is2
+ * VCAP_KF_ETYPE: W16, sparx5: is0/is2/es2, lan966x: is1/is2
* Ethernet type
- * VCAP_KF_ETYPE_LEN_IS: W1, sparx5: is0/is2/es2
+ * VCAP_KF_ETYPE_LEN_IS: W1, sparx5: is0/is2/es2, lan966x: is1
* Set if frame has EtherType >= 0x600
* VCAP_KF_HOST_MATCH: W1, lan966x: is2
* The action from the SMAC_SIP4 or SMAC_SIP6 lookups. Used for IP source
@@ -134,11 +148,12 @@ enum vcap_keyfield_set {
* CPU queue)
* VCAP_KF_IF_EGR_PORT_NO: W7, sparx5: es0
* Egress port number
- * VCAP_KF_IF_IGR_PORT: sparx5 is0 W7, sparx5 es2 W9, lan966x is2 W4
+ * VCAP_KF_IF_IGR_PORT: sparx5 is0 W7, sparx5 es2 W9, lan966x is1 W3, lan966x
+ * is2 W4
* Sparx5: Logical ingress port number retrieved from
* ANA_CL::PORT_ID_CFG.LPORT_NUM or ERLEG, LAN966x: ingress port nunmber
* VCAP_KF_IF_IGR_PORT_MASK: sparx5 is0 W65, sparx5 is2 W32, sparx5 is2 W65,
- * lan966x is2 W9
+ * lan966x is1 W9, lan966x is2 W9
* Ingress port mask, one bit per port/erleg
* VCAP_KF_IF_IGR_PORT_MASK_L3: W1, sparx5: is2
* If set, IF_IGR_PORT_MASK, IF_IGR_PORT_MASK_RNG, and IF_IGR_PORT_MASK_SEL are
@@ -151,24 +166,26 @@ enum vcap_keyfield_set {
* Mapping: 0: DEFAULT 1: LOOPBACK 2: MASQUERADE 3: CPU_VD
* VCAP_KF_IF_IGR_PORT_SEL: W1, sparx5: es2
* Selector for IF_IGR_PORT: physical port number or ERLEG
- * VCAP_KF_IP4_IS: W1, sparx5: is0/is2/es2, lan966x: is2
+ * VCAP_KF_IP4_IS: W1, sparx5: is0/is2/es2, lan966x: is1/is2
* Set if frame has EtherType = 0x800 and IP version = 4
- * VCAP_KF_IP_MC_IS: W1, sparx5: is0
+ * VCAP_KF_IP_MC_IS: W1, sparx5: is0, lan966x: is1
* Set if frame is IPv4 frame and frame's destination MAC address is an IPv4
* multicast address (0x01005E0 /25). Set if frame is IPv6 frame and frame's
* destination MAC address is an IPv6 multicast address (0x3333/16).
- * VCAP_KF_IP_PAYLOAD_5TUPLE: W32, sparx5: is0
+ * VCAP_KF_IP_PAYLOAD_5TUPLE: W32, sparx5: is0, lan966x: is1
* Payload bytes after IP header
- * VCAP_KF_IP_SNAP_IS: W1, sparx5: is0
+ * VCAP_KF_IP_PAYLOAD_S1_IP6: W112, lan966x: is1
+ * Payload after IPv6 header
+ * VCAP_KF_IP_SNAP_IS: W1, sparx5: is0, lan966x: is1
* Set if frame is IPv4, IPv6, or SNAP frame
* VCAP_KF_ISDX_CLS: W12, sparx5: is2/es0/es2
* Classified ISDX
* VCAP_KF_ISDX_GT0_IS: W1, sparx5: is2/es0/es2, lan966x: is2
* Set if classified ISDX > 0
- * VCAP_KF_L2_BC_IS: W1, sparx5: is0/is2/es2, lan966x: is2
+ * VCAP_KF_L2_BC_IS: W1, sparx5: is0/is2/es2, lan966x: is1/is2
* Set if frame's destination MAC address is the broadcast address
* (FF-FF-FF-FF-FF-FF).
- * VCAP_KF_L2_DMAC: W48, sparx5: is0/is2/es2, lan966x: is2
+ * VCAP_KF_L2_DMAC: W48, sparx5: is0/is2/es2, lan966x: is1/is2
* Destination MAC address
* VCAP_KF_L2_FRM_TYPE: W4, lan966x: is2
* Frame subtype for specific EtherTypes (MRP, DLR)
@@ -176,7 +193,9 @@ enum vcap_keyfield_set {
* Set if the frame is allowed to be forwarded to front ports
* VCAP_KF_L2_LLC: W40, lan966x: is2
* LLC header and data after up to two VLAN tags and the type/length field
- * VCAP_KF_L2_MC_IS: W1, sparx5: is0/is2/es2, lan966x: is2
+ * VCAP_KF_L2_MAC: W48, lan966x: is1
+ * MAC address (FIRST=1: SMAC, FIRST=0: DMAC)
+ * VCAP_KF_L2_MC_IS: W1, sparx5: is0/is2/es2, lan966x: is1/is2
* Set if frame's destination MAC address is a multicast address (bit 40 = 1).
* VCAP_KF_L2_PAYLOAD0: W16, lan966x: is2
* Payload bytes 0-1 after the frame's EtherType
@@ -188,7 +207,7 @@ enum vcap_keyfield_set {
* specifically for PTP frames.
* VCAP_KF_L2_PAYLOAD_ETYPE: W64, sparx5: is2/es2
* Byte 0-7 of L2 payload after Type/Len field and overloading for OAM
- * VCAP_KF_L2_SMAC: W48, sparx5: is0/is2/es2, lan966x: is2
+ * VCAP_KF_L2_SMAC: W48, sparx5: is0/is2/es2, lan966x: is1/is2
* Source MAC address
* VCAP_KF_L2_SNAP: W40, lan966x: is2
* SNAP header after LLC header (AA-AA-03)
@@ -196,32 +215,38 @@ enum vcap_keyfield_set {
* Set if Src IP matches Dst IP address
* VCAP_KF_L3_DPL_CLS: W1, sparx5: es0/es2
* The frames drop precedence level
- * VCAP_KF_L3_DSCP: W6, sparx5: is0
+ * VCAP_KF_L3_DSCP: W6, sparx5: is0, lan966x: is1
* Frame's DSCP value
* VCAP_KF_L3_DST_IS: W1, sparx5: is2
* Set if lookup is done for egress router leg
- * VCAP_KF_L3_FRAGMENT: W1, lan966x: is2
+ * VCAP_KF_L3_FRAGMENT: W1, lan966x: is1/is2
* Set if IPv4 frame is fragmented
* VCAP_KF_L3_FRAGMENT_TYPE: W2, sparx5: is0/is2/es2
* L3 Fragmentation type (none, initial, suspicious, valid follow up)
* VCAP_KF_L3_FRAG_INVLD_L4_LEN: W1, sparx5: is0/is2
* Set if frame's L4 length is less than ANA_CL:COMMON:CLM_FRAGMENT_CFG.L4_MIN_L
* EN
- * VCAP_KF_L3_FRAG_OFS_GT0: W1, lan966x: is2
+ * VCAP_KF_L3_FRAG_OFS_GT0: W1, lan966x: is1/is2
* Set if IPv4 frame is fragmented and it is not the first fragment
- * VCAP_KF_L3_IP4_DIP: W32, sparx5: is0/is2/es2, lan966x: is2
+ * VCAP_KF_L3_IP4_DIP: W32, sparx5: is0/is2/es2, lan966x: is1/is2
* Destination IPv4 Address
- * VCAP_KF_L3_IP4_SIP: W32, sparx5: is0/is2/es2, lan966x: is2
+ * VCAP_KF_L3_IP4_SIP: W32, sparx5: is0/is2/es2, lan966x: is1/is2
* Source IPv4 Address
- * VCAP_KF_L3_IP6_DIP: W128, sparx5: is0/is2/es2, lan966x: is2
+ * VCAP_KF_L3_IP6_DIP: sparx5 is0 W128, sparx5 is2 W128, sparx5 es2 W128,
+ * lan966x is1 W64, lan966x is1 W128, lan966x is2 W128
* Sparx5: Full IPv6 DIP, LAN966x: Either Full IPv6 DIP or a subset depending on
* frame type
- * VCAP_KF_L3_IP6_SIP: W128, sparx5: is0/is2/es2, lan966x: is2
+ * VCAP_KF_L3_IP6_DIP_MSB: W16, lan966x: is1
+ * MS 16bits of IPv6 DIP
+ * VCAP_KF_L3_IP6_SIP: sparx5 is0 W128, sparx5 is2 W128, sparx5 es2 W128,
+ * lan966x is1 W128, lan966x is1 W64, lan966x is2 W128
* Sparx5: Full IPv6 SIP, LAN966x: Either Full IPv6 SIP or a subset depending on
* frame type
- * VCAP_KF_L3_IP_PROTO: W8, sparx5: is0/is2/es2, lan966x: is2
+ * VCAP_KF_L3_IP6_SIP_MSB: W16, lan966x: is1
+ * MS 16bits of IPv6 DIP
+ * VCAP_KF_L3_IP_PROTO: W8, sparx5: is0/is2/es2, lan966x: is1/is2
* IPv4 frames: IP protocol. IPv6 frames: Next header, same as for IPV4
- * VCAP_KF_L3_OPTIONS_IS: W1, sparx5: is0/is2/es2, lan966x: is2
+ * VCAP_KF_L3_OPTIONS_IS: W1, sparx5: is0/is2/es2, lan966x: is1/is2
* Set if IPv4 frame contains options (IP len > 5)
* VCAP_KF_L3_PAYLOAD: sparx5 is2 W96, sparx5 is2 W40, sparx5 es2 W96, sparx5
* es2 W40, lan966x is2 W56
@@ -254,7 +279,8 @@ enum vcap_keyfield_set {
* VCAP_KF_L4_PSH: W1, sparx5: is2/es2, lan966x: is2
* Sparx5: TCP flag PSH, LAN966x: TCP: TCP flag PSH. PTP over UDP: flagField bit
* 1 (twoStepFlag)
- * VCAP_KF_L4_RNG: sparx5 is0 W8, sparx5 is2 W16, sparx5 es2 W16, lan966x is2 W8
+ * VCAP_KF_L4_RNG: sparx5 is0 W8, sparx5 is2 W16, sparx5 es2 W16, lan966x is1
+ * W8, lan966x is2 W8
* Range checker bitmask (one for each range checker). Input into range checkers
* is taken from classified results (VID, DSCP) and frame (SPORT, DPORT, ETYPE,
* outer VID, inner VID)
@@ -264,7 +290,7 @@ enum vcap_keyfield_set {
* VCAP_KF_L4_SEQUENCE_EQ0_IS: W1, sparx5: is2/es2, lan966x: is2
* Set if TCP sequence number is 0, LAN966x: Overlayed with PTP over UDP:
* messageType bit 0
- * VCAP_KF_L4_SPORT: W16, sparx5: is0/is2/es2, lan966x: is2
+ * VCAP_KF_L4_SPORT: W16, sparx5: is0/is2/es2, lan966x: is1/is2
* TCP/UDP source port
* VCAP_KF_L4_SPORT_EQ_DPORT_IS: W1, sparx5: is2/es2, lan966x: is2
* Set if UDP or TCP source port equals UDP or TCP destination port
@@ -274,13 +300,16 @@ enum vcap_keyfield_set {
* VCAP_KF_L4_URG: W1, sparx5: is2/es2, lan966x: is2
* Sparx5: TCP flag URG, LAN966x: TCP: TCP flag URG. PTP over UDP: flagField bit
* 7 (reserved)
- * VCAP_KF_LOOKUP_FIRST_IS: W1, sparx5: is0/is2/es2, lan966x: is2
+ * VCAP_KF_LOOKUP_FIRST_IS: W1, sparx5: is0/is2/es2, lan966x: is1/is2
* Selects between entries relevant for first and second lookup. Set for first
* lookup, cleared for second lookup.
* VCAP_KF_LOOKUP_GEN_IDX: W12, sparx5: is0
* Generic index - for chaining CLM instances
* VCAP_KF_LOOKUP_GEN_IDX_SEL: W2, sparx5: is0
* Select the mode of the Generic Index
+ * VCAP_KF_LOOKUP_INDEX: W2, lan966x: is1
+ * 0: First lookup, 1: Second lookup, 2: Third lookup, Similar to VCAP_KF_FIRST
+ * but with extra info
* VCAP_KF_LOOKUP_PAG: W8, sparx5: is2, lan966x: is2
* Classified Policy Association Group: chains rules from IS1/CLM to IS2
* VCAP_KF_MIRROR_PROBE: W2, sparx5: es2
@@ -303,14 +332,22 @@ enum vcap_keyfield_set {
* Set if frame's EtherType = 0x8902
* VCAP_KF_PROT_ACTIVE: W1, sparx5: es0/es2
* Protection is active
- * VCAP_KF_TCP_IS: W1, sparx5: is0/is2/es2, lan966x: is2
+ * VCAP_KF_RT_FRMID: W32, lan966x: is1
+ * Profinet or OPC-UA FrameId
+ * VCAP_KF_RT_TYPE: W2, lan966x: is1
+ * Encoding of frame's EtherType: 0: Other, 1: Profinet, 2: OPC-UA, 3: Custom
+ * (ANA::RT_CUSTOM)
+ * VCAP_KF_RT_VLAN_IDX: W3, lan966x: is1
+ * Real-time VLAN index from ANA::RT_VLAN_PCP
+ * VCAP_KF_TCP_IS: W1, sparx5: is0/is2/es2, lan966x: is1/is2
* Set if frame is IPv4 TCP frame (IP protocol = 6) or IPv6 TCP frames (Next
* header = 6)
- * VCAP_KF_TCP_UDP_IS: W1, sparx5: is0/is2/es2
+ * VCAP_KF_TCP_UDP_IS: W1, sparx5: is0/is2/es2, lan966x: is1
* Set if frame is IPv4/IPv6 TCP or UDP frame (IP protocol/next header equals 6
* or 17)
* VCAP_KF_TYPE: sparx5 is0 W2, sparx5 is0 W1, sparx5 is2 W4, sparx5 is2 W2,
- * sparx5 es0 W1, sparx5 es2 W3, lan966x is2 W4, lan966x is2 W2
+ * sparx5 es0 W1, sparx5 es2 W3, lan966x is1 W1, lan966x is1 W2, lan966x is2 W4,
+ * lan966x is2 W2
* Keyset type id - set by the API
*/
@@ -323,6 +360,7 @@ enum vcap_key_field {
VCAP_KF_8021BR_GRP,
VCAP_KF_8021BR_IGR_ECID_BASE,
VCAP_KF_8021BR_IGR_ECID_EXT,
+ VCAP_KF_8021CB_R_TAGGED_IS,
VCAP_KF_8021Q_DEI0,
VCAP_KF_8021Q_DEI1,
VCAP_KF_8021Q_DEI2,
@@ -339,6 +377,7 @@ enum vcap_key_field {
VCAP_KF_8021Q_VID1,
VCAP_KF_8021Q_VID2,
VCAP_KF_8021Q_VID_CLS,
+ VCAP_KF_8021Q_VLAN_DBL_TAGGED_IS,
VCAP_KF_8021Q_VLAN_TAGGED_IS,
VCAP_KF_8021Q_VLAN_TAGS,
VCAP_KF_ACL_GRP_ID,
@@ -366,6 +405,7 @@ enum vcap_key_field {
VCAP_KF_IP4_IS,
VCAP_KF_IP_MC_IS,
VCAP_KF_IP_PAYLOAD_5TUPLE,
+ VCAP_KF_IP_PAYLOAD_S1_IP6,
VCAP_KF_IP_SNAP_IS,
VCAP_KF_ISDX_CLS,
VCAP_KF_ISDX_GT0_IS,
@@ -374,6 +414,7 @@ enum vcap_key_field {
VCAP_KF_L2_FRM_TYPE,
VCAP_KF_L2_FWD_IS,
VCAP_KF_L2_LLC,
+ VCAP_KF_L2_MAC,
VCAP_KF_L2_MC_IS,
VCAP_KF_L2_PAYLOAD0,
VCAP_KF_L2_PAYLOAD1,
@@ -392,7 +433,9 @@ enum vcap_key_field {
VCAP_KF_L3_IP4_DIP,
VCAP_KF_L3_IP4_SIP,
VCAP_KF_L3_IP6_DIP,
+ VCAP_KF_L3_IP6_DIP_MSB,
VCAP_KF_L3_IP6_SIP,
+ VCAP_KF_L3_IP6_SIP_MSB,
VCAP_KF_L3_IP_PROTO,
VCAP_KF_L3_OPTIONS_IS,
VCAP_KF_L3_PAYLOAD,
@@ -416,6 +459,7 @@ enum vcap_key_field {
VCAP_KF_LOOKUP_FIRST_IS,
VCAP_KF_LOOKUP_GEN_IDX,
VCAP_KF_LOOKUP_GEN_IDX_SEL,
+ VCAP_KF_LOOKUP_INDEX,
VCAP_KF_LOOKUP_PAG,
VCAP_KF_MIRROR_PROBE,
VCAP_KF_OAM_CCM_CNTS_EQ0,
@@ -427,6 +471,9 @@ enum vcap_key_field {
VCAP_KF_OAM_VER,
VCAP_KF_OAM_Y1731_IS,
VCAP_KF_PROT_ACTIVE,
+ VCAP_KF_RT_FRMID,
+ VCAP_KF_RT_TYPE,
+ VCAP_KF_RT_VLAN_IDX,
VCAP_KF_TCP_IS,
VCAP_KF_TCP_UDP_IS,
VCAP_KF_TYPE,
@@ -440,6 +487,7 @@ enum vcap_actionfield_set {
VCAP_AFS_CLASS_REDUCED, /* sparx5 is0 X1 */
VCAP_AFS_ES0, /* sparx5 es0 X1 */
VCAP_AFS_FULL, /* sparx5 is0 X3 */
+ VCAP_AFS_S1, /* lan966x is1 X1 */
VCAP_AFS_SMAC_SIP, /* lan966x is2 X1 */
};
@@ -470,23 +518,31 @@ enum vcap_actionfield_set {
* CPU extraction queue. Used when FWD_SEL >0 and PIPELINE_ACT = XTR.
* VCAP_AF_CPU_QUEUE_NUM: W3, sparx5: is2/es2, lan966x: is2
* CPU queue number. Used when CPU_COPY_ENA is set.
+ * VCAP_AF_CUSTOM_ACE_TYPE_ENA: W4, lan966x: is1
+ * Enables use of custom keys in IS2. Bits 3:2 control second lookup in IS2
+ * while bits 1:0 control first lookup. Encoding per lookup: 0: Disabled. 1:
+ * Extract 40 bytes after position corresponding to the location of the IPv4
+ * header and use as key. 2: Extract 40 bytes after SMAC and use as key
* VCAP_AF_DEI_A_VAL: W1, sparx5: es0
* DEI used in ES0 tag A. See TAG_A_DEI_SEL.
* VCAP_AF_DEI_B_VAL: W1, sparx5: es0
* DEI used in ES0 tag B. See TAG_B_DEI_SEL.
* VCAP_AF_DEI_C_VAL: W1, sparx5: es0
* DEI used in ES0 tag C. See TAG_C_DEI_SEL.
- * VCAP_AF_DEI_ENA: W1, sparx5: is0
+ * VCAP_AF_DEI_ENA: W1, sparx5: is0, lan966x: is1
* If set, use DEI_VAL as classified DEI value. Otherwise, DEI from basic
* classification is used
- * VCAP_AF_DEI_VAL: W1, sparx5: is0
+ * VCAP_AF_DEI_VAL: W1, sparx5: is0, lan966x: is1
* See DEI_ENA
- * VCAP_AF_DP_ENA: W1, sparx5: is0
+ * VCAP_AF_DLR_SEL: W2, lan966x: is1
+ * 0: No changes to port-based selection in ANA:PORT:OAM_CFG.DLR_ENA. 1: Enable
+ * DLR frame processing 2: Disable DLR processing
+ * VCAP_AF_DP_ENA: W1, sparx5: is0, lan966x: is1
* If set, use DP_VAL as classified drop precedence level. Otherwise, drop
* precedence level from basic classification is used.
- * VCAP_AF_DP_VAL: W2, sparx5: is0
+ * VCAP_AF_DP_VAL: sparx5 is0 W2, lan966x is1 W1
* See DP_ENA.
- * VCAP_AF_DSCP_ENA: W1, sparx5: is0
+ * VCAP_AF_DSCP_ENA: W1, sparx5: is0, lan966x: is1
* If set, use DSCP_VAL as classified DSCP value. Otherwise, DSCP value from
* basic classification is used.
* VCAP_AF_DSCP_SEL: W3, sparx5: es0
@@ -495,7 +551,7 @@ enum vcap_actionfield_set {
* table 0, otherwise use DSCP_VAL. 5: Mapped using mapping table 1, otherwise
* use mapping table 0. 6: Mapped using mapping table 2, otherwise use DSCP_VAL.
* 7: Mapped using mapping table 3, otherwise use mapping table 2
- * VCAP_AF_DSCP_VAL: W6, sparx5: is0/es0
+ * VCAP_AF_DSCP_VAL: W6, sparx5: is0/es0, lan966x: is1
* See DSCP_ENA.
* VCAP_AF_ES2_REW_CMD: W3, sparx5: es2
* Command forwarded to REW: 0: No action. 1: SWAP MAC addresses. 2: Do L2CP
@@ -529,9 +585,16 @@ enum vcap_actionfield_set {
* VCAP_AF_ISDX_ADD_REPLACE_SEL: W1, sparx5: is0
* Controls the classified ISDX. 0: New ISDX = old ISDX + ISDX_VAL. 1: New ISDX
* = ISDX_VAL.
+ * VCAP_AF_ISDX_ADD_VAL: W8, lan966x: is1
+ * If ISDX_REPLACE_ENA is set, ISDX_ADD_VAL is used directly as the new ISDX.
+ * Encoding: ISDX_REPLACE_ENA=0, ISDX_ADD_VAL=0: Disabled ISDX_EPLACE_ENA=0,
+ * ISDX_ADD_VAL>0: Add value to classified ISDX. ISDX_REPLACE_ENA=1: Replace
+ * with ISDX_ADD_VAL value.
* VCAP_AF_ISDX_ENA: W1, lan966x: is2
* Setting this bit to 1 causes the classified ISDX to be set to the value of
* POLICE_IDX[8:0].
+ * VCAP_AF_ISDX_REPLACE_ENA: W1, lan966x: is1
+ * If set, classified ISDX is set to ISDX_ADD_VAL.
* VCAP_AF_ISDX_VAL: W12, sparx5: is0
* See isdx_add_replace_sel
* VCAP_AF_LOOP_ENA: W1, sparx5: es0
@@ -572,14 +635,22 @@ enum vcap_actionfield_set {
* VCAP_AF_MIRROR_PROBE_ID: W2, sparx5: es2
* Signals a mirror probe to be placed in the IFH. Only possible when FWD_MODE
* is copy. 0: No mirroring. 1-3: Use mirror probe 0-2.
+ * VCAP_AF_MRP_SEL: W2, lan966x: is1
+ * 0: No changes to port-based selection in ANA:PORT:OAM_CFG.MRP_ENA. 1: Enable
+ * MRP frame processing 2: Disable MRP processing
* VCAP_AF_NXT_IDX: W12, sparx5: is0
* Index used as part of key (field G_IDX) in the next lookup.
* VCAP_AF_NXT_IDX_CTRL: W3, sparx5: is0
* Controls the generation of the G_IDX used in the VCAP CLM next lookup
- * VCAP_AF_PAG_OVERRIDE_MASK: W8, sparx5: is0
+ * VCAP_AF_OAM_SEL: W3, lan966x: is1
+ * 0: No changes to port-based selection in ANA:PORT:OAM_CFG.OAM_CFG 1: Enable
+ * OAM frame processing for untagged frames 2: Enable OAM frame processing for
+ * single frames 3: Enable OAM frame processing for double frames 4: Disable OAM
+ * frame processing
+ * VCAP_AF_PAG_OVERRIDE_MASK: W8, sparx5: is0, lan966x: is1
* Bits set in this mask will override PAG_VAL from port profile. New PAG = (PAG
* (input) AND ~PAG_OVERRIDE_MASK) OR (PAG_VAL AND PAG_OVERRIDE_MASK)
- * VCAP_AF_PAG_VAL: W8, sparx5: is0
+ * VCAP_AF_PAG_VAL: W8, sparx5: is0, lan966x: is1
* See PAG_OVERRIDE_MASK.
* VCAP_AF_PCP_A_VAL: W3, sparx5: es0
* PCP used in ES0 tag A. See TAG_A_PCP_SEL.
@@ -587,10 +658,10 @@ enum vcap_actionfield_set {
* PCP used in ES0 tag B. See TAG_B_PCP_SEL.
* VCAP_AF_PCP_C_VAL: W3, sparx5: es0
* PCP used in ES0 tag C. See TAG_C_PCP_SEL.
- * VCAP_AF_PCP_ENA: W1, sparx5: is0
+ * VCAP_AF_PCP_ENA: W1, sparx5: is0, lan966x: is1
* If set, use PCP_VAL as classified PCP value. Otherwise, PCP from basic
* classification is used.
- * VCAP_AF_PCP_VAL: W3, sparx5: is0
+ * VCAP_AF_PCP_VAL: W3, sparx5: is0, lan966x: is1
* See PCP_ENA.
* VCAP_AF_PIPELINE_ACT: W1, sparx5: es0
* Pipeline action when FWD_SEL > 0. 0: XTR. CPU_QU selects CPU extraction queue
@@ -600,11 +671,11 @@ enum vcap_actionfield_set {
* PIPELINE_PT == NONE. Overrules previous settings of pipeline point.
* VCAP_AF_PIPELINE_PT: sparx5 is2 W5, sparx5 es0 W2
* Pipeline point used if PIPELINE_FORCE_ENA is set
- * VCAP_AF_POLICE_ENA: W1, sparx5: is2/es2, lan966x: is2
- * Setting this bit to 1 causes frames that hit this action to be policed by the
- * ACL policer specified in POLICE_IDX. Only applies to the first lookup.
- * VCAP_AF_POLICE_IDX: sparx5 is2 W6, sparx5 es2 W6, lan966x is2 W9
- * Selects VCAP policer used when policing frames (POLICE_ENA)
+ * VCAP_AF_POLICE_ENA: W1, sparx5: is2/es2, lan966x: is1/is2
+ * If set, POLICE_IDX is used to lookup ANA::POL.
+ * VCAP_AF_POLICE_IDX: sparx5 is2 W6, sparx5 es2 W6, lan966x is1 W9, lan966x is2
+ * W9
+ * Policer index.
* VCAP_AF_POLICE_REMARK: W1, sparx5: es2
* If set, frames exceeding policer rates are marked as yellow but not
* discarded.
@@ -628,16 +699,24 @@ enum vcap_actionfield_set {
* port. 1: ES0 tag A: Push ES0 tag A. No port tag. 2: Force port tag: Always
* push port tag. No ES0 tag A. 3: Force untag: Never push port tag or ES0 tag
* A.
- * VCAP_AF_QOS_ENA: W1, sparx5: is0
+ * VCAP_AF_QOS_ENA: W1, sparx5: is0, lan966x: is1
* If set, use QOS_VAL as classified QoS class. Otherwise, QoS class from basic
* classification is used.
- * VCAP_AF_QOS_VAL: W3, sparx5: is0
+ * VCAP_AF_QOS_VAL: W3, sparx5: is0, lan966x: is1
* See QOS_ENA.
* VCAP_AF_REW_OP: W16, lan966x: is2
* Rewriter operation command.
* VCAP_AF_RT_DIS: W1, sparx5: is2
* If set, routing is disallowed. Only applies when IS_INNER_ACL is 0. See also
* IGR_ACL_ENA, EGR_ACL_ENA, and RLEG_STAT_IDX.
+ * VCAP_AF_SFID_ENA: W1, lan966x: is1
+ * If set, SFID_VAL is used to lookup ANA::SFID.
+ * VCAP_AF_SFID_VAL: W8, lan966x: is1
+ * Stream filter identifier.
+ * VCAP_AF_SGID_ENA: W1, lan966x: is1
+ * If set, SGID_VAL is used to lookup ANA::SGID.
+ * VCAP_AF_SGID_VAL: W8, lan966x: is1
+ * Stream gate identifier.
* VCAP_AF_SWAP_MACS_ENA: W1, sparx5: es0
* This setting is only active when FWD_SEL = 1 or FWD_SEL = 2 and PIPELINE_ACT
* = LBK_ASM. 0: No action. 1: Swap MACs and clear bit 40 in new SMAC.
@@ -686,7 +765,7 @@ enum vcap_actionfield_set {
* VCAP_AF_TAG_C_VID_SEL: W2, sparx5: es0
* Selects VID for ES0 tag C. The resulting VID is termed C-TAG.VID. 0:
* Classified VID. 1: VID_C_VAL. 2: IFH.ENCAP.GVID. 3: Reserved.
- * VCAP_AF_TYPE: W1, sparx5: is0
+ * VCAP_AF_TYPE: W1, sparx5: is0, lan966x: is1
* Actionset type id - Set by the API
* VCAP_AF_UNTAG_VID_ENA: W1, sparx5: es0
* Controls insertion of tag C. Untag or insert mode can be selected. See
@@ -697,8 +776,19 @@ enum vcap_actionfield_set {
* VID used in ES0 tag B. See TAG_B_VID_SEL.
* VCAP_AF_VID_C_VAL: W12, sparx5: es0
* VID used in ES0 tag C. See TAG_C_VID_SEL.
- * VCAP_AF_VID_VAL: W13, sparx5: is0
+ * VCAP_AF_VID_REPLACE_ENA: W1, lan966x: is1
+ * Controls the classified VID: VID_REPLACE_ENA=0: Add VID_ADD_VAL to basic
+ * classified VID and use result as new classified VID. VID_REPLACE_ENA = 1:
+ * Replace basic classified VID with VID_VAL value and use as new classified
+ * VID.
+ * VCAP_AF_VID_VAL: sparx5 is0 W13, lan966x is1 W12
* New VID Value
+ * VCAP_AF_VLAN_POP_CNT: W2, lan966x: is1
+ * See VLAN_POP_CNT_ENA
+ * VCAP_AF_VLAN_POP_CNT_ENA: W1, lan966x: is1
+ * If set, use VLAN_POP_CNT as the number of VLAN tags to pop from the incoming
+ * frame. This number is used by the Rewriter. Otherwise, VLAN_POP_CNT from
+ * ANA:PORT:VLAN_CFG.VLAN_POP_CNT is used
*/
/* Actionfield names */
@@ -712,11 +802,13 @@ enum vcap_action_field {
VCAP_AF_CPU_COPY_ENA,
VCAP_AF_CPU_QU,
VCAP_AF_CPU_QUEUE_NUM,
+ VCAP_AF_CUSTOM_ACE_TYPE_ENA,
VCAP_AF_DEI_A_VAL,
VCAP_AF_DEI_B_VAL,
VCAP_AF_DEI_C_VAL,
VCAP_AF_DEI_ENA,
VCAP_AF_DEI_VAL,
+ VCAP_AF_DLR_SEL,
VCAP_AF_DP_ENA,
VCAP_AF_DP_VAL,
VCAP_AF_DSCP_ENA,
@@ -732,7 +824,9 @@ enum vcap_action_field {
VCAP_AF_IGNORE_PIPELINE_CTRL,
VCAP_AF_INTR_ENA,
VCAP_AF_ISDX_ADD_REPLACE_SEL,
+ VCAP_AF_ISDX_ADD_VAL,
VCAP_AF_ISDX_ENA,
+ VCAP_AF_ISDX_REPLACE_ENA,
VCAP_AF_ISDX_VAL,
VCAP_AF_LOOP_ENA,
VCAP_AF_LRN_DIS,
@@ -745,8 +839,10 @@ enum vcap_action_field {
VCAP_AF_MIRROR_ENA,
VCAP_AF_MIRROR_PROBE,
VCAP_AF_MIRROR_PROBE_ID,
+ VCAP_AF_MRP_SEL,
VCAP_AF_NXT_IDX,
VCAP_AF_NXT_IDX_CTRL,
+ VCAP_AF_OAM_SEL,
VCAP_AF_PAG_OVERRIDE_MASK,
VCAP_AF_PAG_VAL,
VCAP_AF_PCP_A_VAL,
@@ -770,6 +866,10 @@ enum vcap_action_field {
VCAP_AF_QOS_VAL,
VCAP_AF_REW_OP,
VCAP_AF_RT_DIS,
+ VCAP_AF_SFID_ENA,
+ VCAP_AF_SFID_VAL,
+ VCAP_AF_SGID_ENA,
+ VCAP_AF_SGID_VAL,
VCAP_AF_SWAP_MACS_ENA,
VCAP_AF_TAG_A_DEI_SEL,
VCAP_AF_TAG_A_PCP_SEL,
@@ -788,7 +888,10 @@ enum vcap_action_field {
VCAP_AF_VID_A_VAL,
VCAP_AF_VID_B_VAL,
VCAP_AF_VID_C_VAL,
+ VCAP_AF_VID_REPLACE_ENA,
VCAP_AF_VID_VAL,
+ VCAP_AF_VLAN_POP_CNT,
+ VCAP_AF_VLAN_POP_CNT_ENA,
};
#endif /* __VCAP_AG_API__ */
diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api.c b/drivers/net/ethernet/microchip/vcap/vcap_api.c
index 4847d0d99ec9..5675b0962bc3 100644
--- a/drivers/net/ethernet/microchip/vcap/vcap_api.c
+++ b/drivers/net/ethernet/microchip/vcap/vcap_api.c
@@ -976,6 +976,25 @@ int vcap_lookup_rule_by_cookie(struct vcap_control *vctrl, u64 cookie)
}
EXPORT_SYMBOL_GPL(vcap_lookup_rule_by_cookie);
+/* Get number of rules in a vcap instance lookup chain id range */
+int vcap_admin_rule_count(struct vcap_admin *admin, int cid)
+{
+ int max_cid = roundup(cid + 1, VCAP_CID_LOOKUP_SIZE);
+ int min_cid = rounddown(cid, VCAP_CID_LOOKUP_SIZE);
+ struct vcap_rule_internal *elem;
+ int count = 0;
+
+ list_for_each_entry(elem, &admin->rules, list) {
+ mutex_lock(&admin->lock);
+ if (elem->data.vcap_chain_id >= min_cid &&
+ elem->data.vcap_chain_id < max_cid)
+ ++count;
+ mutex_unlock(&admin->lock);
+ }
+ return count;
+}
+EXPORT_SYMBOL_GPL(vcap_admin_rule_count);
+
/* Make a copy of the rule, shallow or full */
static struct vcap_rule_internal *vcap_dup_rule(struct vcap_rule_internal *ri,
bool full)
@@ -3403,6 +3422,25 @@ int vcap_rule_mod_key_u32(struct vcap_rule *rule, enum vcap_key_field key,
}
EXPORT_SYMBOL_GPL(vcap_rule_mod_key_u32);
+/* Remove a key field with value and mask in the rule */
+int vcap_rule_rem_key(struct vcap_rule *rule, enum vcap_key_field key)
+{
+ struct vcap_rule_internal *ri = to_intrule(rule);
+ struct vcap_client_keyfield *field;
+
+ field = vcap_find_keyfield(rule, key);
+ if (!field) {
+ pr_err("%s:%d: key %s is not in the rule\n",
+ __func__, __LINE__, vcap_keyfield_name(ri->vctrl, key));
+ return -EINVAL;
+ }
+ /* Deallocate the key field */
+ list_del(&field->ctrl.list);
+ kfree(field);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(vcap_rule_rem_key);
+
static int vcap_rule_mod_action(struct vcap_rule *rule,
enum vcap_action_field action,
enum vcap_field_type ftype,
@@ -3475,6 +3513,29 @@ int vcap_filter_rule_keys(struct vcap_rule *rule,
}
EXPORT_SYMBOL_GPL(vcap_filter_rule_keys);
+/* Select the keyset from the list that results in the smallest rule size */
+enum vcap_keyfield_set
+vcap_select_min_rule_keyset(struct vcap_control *vctrl,
+ enum vcap_type vtype,
+ struct vcap_keyset_list *kslist)
+{
+ enum vcap_keyfield_set ret = VCAP_KFS_NO_VALUE;
+ const struct vcap_set *kset;
+ int max = 100, idx;
+
+ for (idx = 0; idx < kslist->cnt; ++idx) {
+ kset = vcap_keyfieldset(vctrl, vtype, kslist->keysets[idx]);
+ if (!kset)
+ continue;
+ if (kset->sw_per_item >= max)
+ continue;
+ max = kset->sw_per_item;
+ ret = kslist->keysets[idx];
+ }
+ return ret;
+}
+EXPORT_SYMBOL_GPL(vcap_select_min_rule_keyset);
+
/* Make a full copy of an existing rule with a new rule id */
struct vcap_rule *vcap_copy_rule(struct vcap_rule *erule)
{
diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api_client.h b/drivers/net/ethernet/microchip/vcap/vcap_api_client.h
index 417af9754bcc..d9d1f7c9d762 100644
--- a/drivers/net/ethernet/microchip/vcap/vcap_api_client.h
+++ b/drivers/net/ethernet/microchip/vcap/vcap_api_client.h
@@ -201,6 +201,9 @@ int vcap_rule_add_action_bit(struct vcap_rule *rule,
int vcap_rule_add_action_u32(struct vcap_rule *rule,
enum vcap_action_field action, u32 value);
+/* Get number of rules in a vcap instance lookup chain id range */
+int vcap_admin_rule_count(struct vcap_admin *admin, int cid);
+
/* VCAP rule counter operations */
int vcap_get_rule_count_by_cookie(struct vcap_control *vctrl,
struct vcap_counter *ctr, u64 cookie);
@@ -269,6 +272,14 @@ int vcap_rule_mod_action_u32(struct vcap_rule *rule,
int vcap_rule_get_key_u32(struct vcap_rule *rule, enum vcap_key_field key,
u32 *value, u32 *mask);
+/* Remove a key field with value and mask in the rule */
+int vcap_rule_rem_key(struct vcap_rule *rule, enum vcap_key_field key);
+
+/* Select the keyset from the list that results in the smallest rule size */
+enum vcap_keyfield_set
+vcap_select_min_rule_keyset(struct vcap_control *vctrl, enum vcap_type vtype,
+ struct vcap_keyset_list *kslist);
+
struct vcap_client_actionfield *
vcap_find_actionfield(struct vcap_rule *rule, enum vcap_action_field act);
#endif /* __VCAP_API_CLIENT__ */
diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api_debugfs_kunit.c b/drivers/net/ethernet/microchip/vcap/vcap_api_debugfs_kunit.c
index 0de3f677135a..b23c11b0647c 100644
--- a/drivers/net/ethernet/microchip/vcap/vcap_api_debugfs_kunit.c
+++ b/drivers/net/ethernet/microchip/vcap/vcap_api_debugfs_kunit.c
@@ -387,7 +387,7 @@ static const char * const test_admin_info_expect[] = {
"default_cnt: 73\n",
"require_cnt_dis: 0\n",
"version: 1\n",
- "vtype: 3\n",
+ "vtype: 4\n",
"vinst: 0\n",
"ingress: 1\n",
"first_cid: 10000\n",
@@ -435,7 +435,7 @@ static const char * const test_admin_expect[] = {
"default_cnt: 73\n",
"require_cnt_dis: 0\n",
"version: 1\n",
- "vtype: 3\n",
+ "vtype: 4\n",
"vinst: 0\n",
"ingress: 1\n",
"first_cid: 8000000\n",
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 6120f2b6684f..492474b4d8aa 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -156,6 +156,7 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
struct mana_txq *txq;
struct mana_cq *cq;
int err, len;
+ u16 ihs;
if (unlikely(!apc->port_is_up))
goto tx_drop;
@@ -166,6 +167,7 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
txq = &apc->tx_qp[txq_idx].txq;
gdma_sq = txq->gdma_sq;
cq = &apc->tx_qp[txq_idx].tx_cq;
+ tx_stats = &txq->stats;
pkg.tx_oob.s_oob.vcq_num = cq->gdma_id;
pkg.tx_oob.s_oob.vsq_frame = txq->vsq_frame;
@@ -179,10 +181,17 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
pkg.tx_oob.s_oob.pkt_fmt = pkt_fmt;
- if (pkt_fmt == MANA_SHORT_PKT_FMT)
+ if (pkt_fmt == MANA_SHORT_PKT_FMT) {
pkg.wqe_req.inline_oob_size = sizeof(struct mana_tx_short_oob);
- else
+ u64_stats_update_begin(&tx_stats->syncp);
+ tx_stats->short_pkt_fmt++;
+ u64_stats_update_end(&tx_stats->syncp);
+ } else {
pkg.wqe_req.inline_oob_size = sizeof(struct mana_tx_oob);
+ u64_stats_update_begin(&tx_stats->syncp);
+ tx_stats->long_pkt_fmt++;
+ u64_stats_update_end(&tx_stats->syncp);
+ }
pkg.wqe_req.inline_oob_data = &pkg.tx_oob;
pkg.wqe_req.flags = 0;
@@ -232,9 +241,35 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
&ipv6_hdr(skb)->daddr, 0,
IPPROTO_TCP, 0);
}
+
+ if (skb->encapsulation) {
+ ihs = skb_inner_tcp_all_headers(skb);
+ u64_stats_update_begin(&tx_stats->syncp);
+ tx_stats->tso_inner_packets++;
+ tx_stats->tso_inner_bytes += skb->len - ihs;
+ u64_stats_update_end(&tx_stats->syncp);
+ } else {
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
+ ihs = skb_transport_offset(skb) + sizeof(struct udphdr);
+ } else {
+ ihs = skb_tcp_all_headers(skb);
+ if (ipv6_has_hopopt_jumbo(skb))
+ ihs -= sizeof(struct hop_jumbo_hdr);
+ }
+
+ u64_stats_update_begin(&tx_stats->syncp);
+ tx_stats->tso_packets++;
+ tx_stats->tso_bytes += skb->len - ihs;
+ u64_stats_update_end(&tx_stats->syncp);
+ }
+
} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
csum_type = mana_checksum_info(skb);
+ u64_stats_update_begin(&tx_stats->syncp);
+ tx_stats->csum_partial++;
+ u64_stats_update_end(&tx_stats->syncp);
+
if (csum_type == IPPROTO_TCP) {
pkg.tx_oob.s_oob.is_outer_ipv4 = ipv4;
pkg.tx_oob.s_oob.is_outer_ipv6 = ipv6;
@@ -254,8 +289,12 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
}
}
- if (mana_map_skb(skb, apc, &pkg))
+ if (mana_map_skb(skb, apc, &pkg)) {
+ u64_stats_update_begin(&tx_stats->syncp);
+ tx_stats->mana_map_err++;
+ u64_stats_update_end(&tx_stats->syncp);
goto free_sgl_ptr;
+ }
skb_queue_tail(&txq->pending_skbs, skb);
@@ -1038,6 +1077,8 @@ static void mana_poll_tx_cq(struct mana_cq *cq)
if (comp_read < 1)
return;
+ apc->eth_stats.tx_cqes = comp_read;
+
for (i = 0; i < comp_read; i++) {
struct mana_tx_comp_oob *cqe_oob;
@@ -1064,6 +1105,7 @@ static void mana_poll_tx_cq(struct mana_cq *cq)
case CQE_TX_VLAN_TAGGING_VIOLATION:
WARN_ONCE(1, "TX: CQE error %d: ignored.\n",
cqe_oob->cqe_hdr.cqe_type);
+ apc->eth_stats.tx_cqe_err++;
break;
default:
@@ -1072,6 +1114,7 @@ static void mana_poll_tx_cq(struct mana_cq *cq)
*/
WARN_ONCE(1, "TX: Unexpected CQE type %d: HW BUG?\n",
cqe_oob->cqe_hdr.cqe_type);
+ apc->eth_stats.tx_cqe_unknown_type++;
return;
}
@@ -1118,6 +1161,8 @@ static void mana_poll_tx_cq(struct mana_cq *cq)
WARN_ON_ONCE(1);
cq->work_done = pkt_transmitted;
+
+ apc->eth_stats.tx_cqes -= pkt_transmitted;
}
static void mana_post_pkt_rxq(struct mana_rxq *rxq)
@@ -1252,12 +1297,15 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
struct gdma_context *gc = rxq->gdma_rq->gdma_dev->gdma_context;
struct net_device *ndev = rxq->ndev;
struct mana_recv_buf_oob *rxbuf_oob;
+ struct mana_port_context *apc;
struct device *dev = gc->dev;
void *new_buf, *old_buf;
struct page *new_page;
u32 curr, pktlen;
dma_addr_t da;
+ apc = netdev_priv(ndev);
+
switch (oob->cqe_hdr.cqe_type) {
case CQE_RX_OKAY:
break;
@@ -1270,6 +1318,7 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
case CQE_RX_COALESCED_4:
netdev_err(ndev, "RX coalescing is unsupported\n");
+ apc->eth_stats.rx_coalesced_err++;
return;
case CQE_RX_OBJECT_FENCE:
@@ -1279,6 +1328,7 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
default:
netdev_err(ndev, "Unknown RX CQE type = %d\n",
oob->cqe_hdr.cqe_type);
+ apc->eth_stats.rx_cqe_unknown_type++;
return;
}
@@ -1341,11 +1391,15 @@ static void mana_poll_rx_cq(struct mana_cq *cq)
{
struct gdma_comp *comp = cq->gdma_comp_buf;
struct mana_rxq *rxq = cq->rxq;
+ struct mana_port_context *apc;
int comp_read, i;
+ apc = netdev_priv(rxq->ndev);
+
comp_read = mana_gd_poll_cq(cq->gdma_cq, comp, CQE_POLLING_BUFFER);
WARN_ON_ONCE(comp_read > CQE_POLLING_BUFFER);
+ apc->eth_stats.rx_cqes = comp_read;
rxq->xdp_flush = false;
for (i = 0; i < comp_read; i++) {
@@ -1357,6 +1411,8 @@ static void mana_poll_rx_cq(struct mana_cq *cq)
return;
mana_process_rx_cqe(rxq, cq, &comp[i]);
+
+ apc->eth_stats.rx_cqes--;
}
if (rxq->xdp_flush)
diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
index 5b776a33a817..a64c81410dc1 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
@@ -13,6 +13,15 @@ static const struct {
} mana_eth_stats[] = {
{"stop_queue", offsetof(struct mana_ethtool_stats, stop_queue)},
{"wake_queue", offsetof(struct mana_ethtool_stats, wake_queue)},
+ {"tx_cqes", offsetof(struct mana_ethtool_stats, tx_cqes)},
+ {"tx_cq_err", offsetof(struct mana_ethtool_stats, tx_cqe_err)},
+ {"tx_cqe_unknown_type", offsetof(struct mana_ethtool_stats,
+ tx_cqe_unknown_type)},
+ {"rx_cqes", offsetof(struct mana_ethtool_stats, rx_cqes)},
+ {"rx_coalesced_err", offsetof(struct mana_ethtool_stats,
+ rx_coalesced_err)},
+ {"rx_cqe_unknown_type", offsetof(struct mana_ethtool_stats,
+ rx_cqe_unknown_type)},
};
static int mana_get_sset_count(struct net_device *ndev, int stringset)
@@ -23,7 +32,8 @@ static int mana_get_sset_count(struct net_device *ndev, int stringset)
if (stringset != ETH_SS_STATS)
return -EINVAL;
- return ARRAY_SIZE(mana_eth_stats) + num_queues * 8;
+ return ARRAY_SIZE(mana_eth_stats) + num_queues *
+ (MANA_STATS_RX_COUNT + MANA_STATS_TX_COUNT);
}
static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data)
@@ -61,6 +71,22 @@ static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data)
p += ETH_GSTRING_LEN;
sprintf(p, "tx_%d_xdp_xmit", i);
p += ETH_GSTRING_LEN;
+ sprintf(p, "tx_%d_tso_packets", i);
+ p += ETH_GSTRING_LEN;
+ sprintf(p, "tx_%d_tso_bytes", i);
+ p += ETH_GSTRING_LEN;
+ sprintf(p, "tx_%d_tso_inner_packets", i);
+ p += ETH_GSTRING_LEN;
+ sprintf(p, "tx_%d_tso_inner_bytes", i);
+ p += ETH_GSTRING_LEN;
+ sprintf(p, "tx_%d_long_pkt_fmt", i);
+ p += ETH_GSTRING_LEN;
+ sprintf(p, "tx_%d_short_pkt_fmt", i);
+ p += ETH_GSTRING_LEN;
+ sprintf(p, "tx_%d_csum_partial", i);
+ p += ETH_GSTRING_LEN;
+ sprintf(p, "tx_%d_mana_map_err", i);
+ p += ETH_GSTRING_LEN;
}
}
@@ -78,6 +104,14 @@ static void mana_get_ethtool_stats(struct net_device *ndev,
u64 xdp_xmit;
u64 xdp_drop;
u64 xdp_tx;
+ u64 tso_packets;
+ u64 tso_bytes;
+ u64 tso_inner_packets;
+ u64 tso_inner_bytes;
+ u64 long_pkt_fmt;
+ u64 short_pkt_fmt;
+ u64 csum_partial;
+ u64 mana_map_err;
int q, i = 0;
if (!apc->port_is_up)
@@ -113,11 +147,27 @@ static void mana_get_ethtool_stats(struct net_device *ndev,
packets = tx_stats->packets;
bytes = tx_stats->bytes;
xdp_xmit = tx_stats->xdp_xmit;
+ tso_packets = tx_stats->tso_packets;
+ tso_bytes = tx_stats->tso_bytes;
+ tso_inner_packets = tx_stats->tso_inner_packets;
+ tso_inner_bytes = tx_stats->tso_inner_bytes;
+ long_pkt_fmt = tx_stats->long_pkt_fmt;
+ short_pkt_fmt = tx_stats->short_pkt_fmt;
+ csum_partial = tx_stats->csum_partial;
+ mana_map_err = tx_stats->mana_map_err;
} while (u64_stats_fetch_retry(&tx_stats->syncp, start));
data[i++] = packets;
data[i++] = bytes;
data[i++] = xdp_xmit;
+ data[i++] = tso_packets;
+ data[i++] = tso_bytes;
+ data[i++] = tso_inner_packets;
+ data[i++] = tso_inner_bytes;
+ data[i++] = long_pkt_fmt;
+ data[i++] = short_pkt_fmt;
+ data[i++] = csum_partial;
+ data[i++] = mana_map_err;
}
}
diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
index d23830b5bcb8..73032173ac4e 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c
@@ -55,9 +55,21 @@ static void *get_hashentry(struct rhashtable *ht, void *key,
bool is_pre_ct_flow(struct flow_cls_offload *flow)
{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
+ struct flow_dissector *dissector = rule->match.dissector;
struct flow_action_entry *act;
+ struct flow_match_ct ct;
int i;
+ if (dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_CT)) {
+ flow_rule_match_ct(rule, &ct);
+ if (ct.key->ct_state)
+ return false;
+ }
+
+ if (flow->common.chain_index)
+ return false;
+
flow_action_for_each(i, act, &flow->rule->action) {
if (act->id == FLOW_ACTION_CT) {
/* The pre_ct rule only have the ct or ct nat action, cannot
@@ -82,24 +94,23 @@ bool is_post_ct_flow(struct flow_cls_offload *flow)
struct flow_match_ct ct;
int i;
- /* post ct entry cannot contains any ct action except ct_clear. */
- flow_action_for_each(i, act, &flow->rule->action) {
- if (act->id == FLOW_ACTION_CT) {
- /* ignore ct clear action. */
- if (act->ct.action == TCA_CT_ACT_CLEAR) {
- exist_ct_clear = true;
- continue;
- }
-
- return false;
- }
- }
-
if (dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_CT)) {
flow_rule_match_ct(rule, &ct);
if (ct.key->ct_state & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED)
return true;
} else {
+ /* post ct entry cannot contains any ct action except ct_clear. */
+ flow_action_for_each(i, act, &flow->rule->action) {
+ if (act->id == FLOW_ACTION_CT) {
+ /* ignore ct clear action. */
+ if (act->ct.action == TCA_CT_ACT_CLEAR) {
+ exist_ct_clear = true;
+ continue;
+ }
+
+ return false;
+ }
+ }
/* when do nat with ct, the post ct entry ignore the ct status,
* will match the nat field(sip/dip) instead. In this situation,
* the flow chain index is not zero and contains ct clear action.
@@ -511,6 +522,21 @@ static int nfp_ct_check_vlan_merge(struct flow_action_entry *a_in,
return 0;
}
+/* Extra check for multiple ct-zones merge
+ * currently surpport nft entries merge check in different zones
+ */
+static int nfp_ct_merge_extra_check(struct nfp_fl_ct_flow_entry *nft_entry,
+ struct nfp_fl_ct_tc_merge *tc_m_entry)
+{
+ struct nfp_fl_nft_tc_merge *prev_nft_m_entry;
+ struct nfp_fl_ct_flow_entry *pre_ct_entry;
+
+ pre_ct_entry = tc_m_entry->pre_ct_parent;
+ prev_nft_m_entry = pre_ct_entry->prev_m_entries[pre_ct_entry->num_prev_m_entries - 1];
+
+ return nfp_ct_merge_check(prev_nft_m_entry->nft_parent, nft_entry);
+}
+
static int nfp_ct_merge_act_check(struct nfp_fl_ct_flow_entry *pre_ct_entry,
struct nfp_fl_ct_flow_entry *post_ct_entry,
struct nfp_fl_ct_flow_entry *nft_entry)
@@ -682,34 +708,34 @@ static void nfp_fl_get_csum_flag(struct flow_action_entry *a_in, u8 ip_proto, u3
static int nfp_fl_merge_actions_offload(struct flow_rule **rules,
struct nfp_flower_priv *priv,
struct net_device *netdev,
- struct nfp_fl_payload *flow_pay)
+ struct nfp_fl_payload *flow_pay,
+ int num_rules)
{
enum flow_action_hw_stats tmp_stats = FLOW_ACTION_HW_STATS_DONT_CARE;
struct flow_action_entry *a_in;
- int i, j, num_actions, id;
+ int i, j, id, num_actions = 0;
struct flow_rule *a_rule;
int err = 0, offset = 0;
- num_actions = rules[CT_TYPE_PRE_CT]->action.num_entries +
- rules[CT_TYPE_NFT]->action.num_entries +
- rules[CT_TYPE_POST_CT]->action.num_entries;
+ for (i = 0; i < num_rules; i++)
+ num_actions += rules[i]->action.num_entries;
/* Add one action to make sure there is enough room to add an checksum action
* when do nat.
*/
- a_rule = flow_rule_alloc(num_actions + 1);
+ a_rule = flow_rule_alloc(num_actions + (num_rules / 2));
if (!a_rule)
return -ENOMEM;
- /* Actions need a BASIC dissector. */
- a_rule->match = rules[CT_TYPE_PRE_CT]->match;
/* post_ct entry have one action at least. */
- if (rules[CT_TYPE_POST_CT]->action.num_entries != 0) {
- tmp_stats = rules[CT_TYPE_POST_CT]->action.entries[0].hw_stats;
- }
+ if (rules[num_rules - 1]->action.num_entries != 0)
+ tmp_stats = rules[num_rules - 1]->action.entries[0].hw_stats;
+
+ /* Actions need a BASIC dissector. */
+ a_rule->match = rules[0]->match;
/* Copy actions */
- for (j = 0; j < _CT_TYPE_MAX; j++) {
+ for (j = 0; j < num_rules; j++) {
u32 csum_updated = 0;
u8 ip_proto = 0;
@@ -747,8 +773,9 @@ static int nfp_fl_merge_actions_offload(struct flow_rule **rules,
/* nft entry is generated by tc ct, which mangle action do not care
* the stats, inherit the post entry stats to meet the
* flow_action_hw_stats_check.
+ * nft entry flow rules are at odd array index.
*/
- if (j == CT_TYPE_NFT) {
+ if (j & 0x01) {
if (a_in->hw_stats == FLOW_ACTION_HW_STATS_DONT_CARE)
a_in->hw_stats = tmp_stats;
nfp_fl_get_csum_flag(a_in, ip_proto, &csum_updated);
@@ -784,32 +811,40 @@ static int nfp_fl_ct_add_offload(struct nfp_fl_nft_tc_merge *m_entry)
{
enum nfp_flower_tun_type tun_type = NFP_FL_TUNNEL_NONE;
struct nfp_fl_ct_zone_entry *zt = m_entry->zt;
+ struct flow_rule *rules[NFP_MAX_ENTRY_RULES];
+ struct nfp_fl_ct_flow_entry *pre_ct_entry;
struct nfp_fl_key_ls key_layer, tmp_layer;
struct nfp_flower_priv *priv = zt->priv;
u16 key_map[_FLOW_PAY_LAYERS_MAX];
struct nfp_fl_payload *flow_pay;
-
- struct flow_rule *rules[_CT_TYPE_MAX];
u8 *key, *msk, *kdata, *mdata;
struct nfp_port *port = NULL;
+ int num_rules, err, i, j = 0;
struct net_device *netdev;
bool qinq_sup;
u32 port_id;
u16 offset;
- int i, err;
netdev = m_entry->netdev;
qinq_sup = !!(priv->flower_ext_feats & NFP_FL_FEATS_VLAN_QINQ);
- rules[CT_TYPE_PRE_CT] = m_entry->tc_m_parent->pre_ct_parent->rule;
- rules[CT_TYPE_NFT] = m_entry->nft_parent->rule;
- rules[CT_TYPE_POST_CT] = m_entry->tc_m_parent->post_ct_parent->rule;
+ pre_ct_entry = m_entry->tc_m_parent->pre_ct_parent;
+ num_rules = pre_ct_entry->num_prev_m_entries * 2 + _CT_TYPE_MAX;
+
+ for (i = 0; i < pre_ct_entry->num_prev_m_entries; i++) {
+ rules[j++] = pre_ct_entry->prev_m_entries[i]->tc_m_parent->pre_ct_parent->rule;
+ rules[j++] = pre_ct_entry->prev_m_entries[i]->nft_parent->rule;
+ }
+
+ rules[j++] = m_entry->tc_m_parent->pre_ct_parent->rule;
+ rules[j++] = m_entry->nft_parent->rule;
+ rules[j++] = m_entry->tc_m_parent->post_ct_parent->rule;
memset(&key_layer, 0, sizeof(struct nfp_fl_key_ls));
memset(&key_map, 0, sizeof(key_map));
/* Calculate the resultant key layer and size for offload */
- for (i = 0; i < _CT_TYPE_MAX; i++) {
+ for (i = 0; i < num_rules; i++) {
err = nfp_flower_calculate_key_layers(priv->app,
m_entry->netdev,
&tmp_layer, rules[i],
@@ -875,7 +910,7 @@ static int nfp_fl_ct_add_offload(struct nfp_fl_nft_tc_merge *m_entry)
* that the layer is not present.
*/
if (!qinq_sup) {
- for (i = 0; i < _CT_TYPE_MAX; i++) {
+ for (i = 0; i < num_rules; i++) {
offset = key_map[FLOW_PAY_META_TCI];
key = kdata + offset;
msk = mdata + offset;
@@ -889,7 +924,7 @@ static int nfp_fl_ct_add_offload(struct nfp_fl_nft_tc_merge *m_entry)
offset = key_map[FLOW_PAY_MAC_MPLS];
key = kdata + offset;
msk = mdata + offset;
- for (i = 0; i < _CT_TYPE_MAX; i++) {
+ for (i = 0; i < num_rules; i++) {
nfp_flower_compile_mac((struct nfp_flower_mac_mpls *)key,
(struct nfp_flower_mac_mpls *)msk,
rules[i]);
@@ -905,7 +940,7 @@ static int nfp_fl_ct_add_offload(struct nfp_fl_nft_tc_merge *m_entry)
offset = key_map[FLOW_PAY_IPV4];
key = kdata + offset;
msk = mdata + offset;
- for (i = 0; i < _CT_TYPE_MAX; i++) {
+ for (i = 0; i < num_rules; i++) {
nfp_flower_compile_ipv4((struct nfp_flower_ipv4 *)key,
(struct nfp_flower_ipv4 *)msk,
rules[i]);
@@ -916,7 +951,7 @@ static int nfp_fl_ct_add_offload(struct nfp_fl_nft_tc_merge *m_entry)
offset = key_map[FLOW_PAY_IPV6];
key = kdata + offset;
msk = mdata + offset;
- for (i = 0; i < _CT_TYPE_MAX; i++) {
+ for (i = 0; i < num_rules; i++) {
nfp_flower_compile_ipv6((struct nfp_flower_ipv6 *)key,
(struct nfp_flower_ipv6 *)msk,
rules[i]);
@@ -927,7 +962,7 @@ static int nfp_fl_ct_add_offload(struct nfp_fl_nft_tc_merge *m_entry)
offset = key_map[FLOW_PAY_L4];
key = kdata + offset;
msk = mdata + offset;
- for (i = 0; i < _CT_TYPE_MAX; i++) {
+ for (i = 0; i < num_rules; i++) {
nfp_flower_compile_tport((struct nfp_flower_tp_ports *)key,
(struct nfp_flower_tp_ports *)msk,
rules[i]);
@@ -938,7 +973,7 @@ static int nfp_fl_ct_add_offload(struct nfp_fl_nft_tc_merge *m_entry)
offset = key_map[FLOW_PAY_QINQ];
key = kdata + offset;
msk = mdata + offset;
- for (i = 0; i < _CT_TYPE_MAX; i++) {
+ for (i = 0; i < num_rules; i++) {
nfp_flower_compile_vlan((struct nfp_flower_vlan *)key,
(struct nfp_flower_vlan *)msk,
rules[i]);
@@ -954,7 +989,7 @@ static int nfp_fl_ct_add_offload(struct nfp_fl_nft_tc_merge *m_entry)
struct nfp_ipv6_addr_entry *entry;
struct in6_addr *dst;
- for (i = 0; i < _CT_TYPE_MAX; i++) {
+ for (i = 0; i < num_rules; i++) {
nfp_flower_compile_ipv6_gre_tun((void *)key,
(void *)msk, rules[i]);
}
@@ -971,7 +1006,7 @@ static int nfp_fl_ct_add_offload(struct nfp_fl_nft_tc_merge *m_entry)
} else {
__be32 dst;
- for (i = 0; i < _CT_TYPE_MAX; i++) {
+ for (i = 0; i < num_rules; i++) {
nfp_flower_compile_ipv4_gre_tun((void *)key,
(void *)msk, rules[i]);
}
@@ -995,7 +1030,7 @@ static int nfp_fl_ct_add_offload(struct nfp_fl_nft_tc_merge *m_entry)
struct nfp_ipv6_addr_entry *entry;
struct in6_addr *dst;
- for (i = 0; i < _CT_TYPE_MAX; i++) {
+ for (i = 0; i < num_rules; i++) {
nfp_flower_compile_ipv6_udp_tun((void *)key,
(void *)msk, rules[i]);
}
@@ -1012,7 +1047,7 @@ static int nfp_fl_ct_add_offload(struct nfp_fl_nft_tc_merge *m_entry)
} else {
__be32 dst;
- for (i = 0; i < _CT_TYPE_MAX; i++) {
+ for (i = 0; i < num_rules; i++) {
nfp_flower_compile_ipv4_udp_tun((void *)key,
(void *)msk, rules[i]);
}
@@ -1029,13 +1064,13 @@ static int nfp_fl_ct_add_offload(struct nfp_fl_nft_tc_merge *m_entry)
offset = key_map[FLOW_PAY_GENEVE_OPT];
key = kdata + offset;
msk = mdata + offset;
- for (i = 0; i < _CT_TYPE_MAX; i++)
+ for (i = 0; i < num_rules; i++)
nfp_flower_compile_geneve_opt(key, msk, rules[i]);
}
}
/* Merge actions into flow_pay */
- err = nfp_fl_merge_actions_offload(rules, priv, netdev, flow_pay);
+ err = nfp_fl_merge_actions_offload(rules, priv, netdev, flow_pay, num_rules);
if (err)
goto ct_offload_err;
@@ -1168,6 +1203,12 @@ static int nfp_ct_do_nft_merge(struct nfp_fl_ct_zone_entry *zt,
if (err)
return err;
+ if (pre_ct_entry->num_prev_m_entries > 0) {
+ err = nfp_ct_merge_extra_check(nft_entry, tc_m_entry);
+ if (err)
+ return err;
+ }
+
/* Combine tc_merge and nft cookies for this cookie. */
new_cookie[0] = tc_m_entry->cookie[0];
new_cookie[1] = tc_m_entry->cookie[1];
@@ -1198,11 +1239,6 @@ static int nfp_ct_do_nft_merge(struct nfp_fl_ct_zone_entry *zt,
list_add(&nft_m_entry->tc_merge_list, &tc_m_entry->children);
list_add(&nft_m_entry->nft_flow_list, &nft_entry->children);
- /* Generate offload structure and send to nfp */
- err = nfp_fl_ct_add_offload(nft_m_entry);
- if (err)
- goto err_nft_ct_offload;
-
err = rhashtable_insert_fast(&zt->nft_merge_tb, &nft_m_entry->hash_node,
nfp_nft_ct_merge_params);
if (err)
@@ -1210,12 +1246,20 @@ static int nfp_ct_do_nft_merge(struct nfp_fl_ct_zone_entry *zt,
zt->nft_merge_count++;
+ if (post_ct_entry->goto_chain_index > 0)
+ return nfp_fl_create_new_pre_ct(nft_m_entry);
+
+ /* Generate offload structure and send to nfp */
+ err = nfp_fl_ct_add_offload(nft_m_entry);
+ if (err)
+ goto err_nft_ct_offload;
+
return err;
-err_nft_ct_merge_insert:
+err_nft_ct_offload:
nfp_fl_ct_del_offload(zt->priv->app, nft_m_entry->tc_flower_cookie,
nft_m_entry->netdev);
-err_nft_ct_offload:
+err_nft_ct_merge_insert:
list_del(&nft_m_entry->tc_merge_list);
list_del(&nft_m_entry->nft_flow_list);
kfree(nft_m_entry);
@@ -1243,7 +1287,7 @@ static int nfp_ct_do_tc_merge(struct nfp_fl_ct_zone_entry *zt,
/* Checks that the chain_index of the filter matches the
* chain_index of the GOTO action.
*/
- if (post_ct_entry->chain_index != pre_ct_entry->chain_index)
+ if (post_ct_entry->chain_index != pre_ct_entry->goto_chain_index)
return -EINVAL;
err = nfp_ct_merge_check(pre_ct_entry, post_ct_entry);
@@ -1461,7 +1505,7 @@ nfp_fl_ct_flow_entry *nfp_fl_ct_add_flow(struct nfp_fl_ct_zone_entry *zt,
entry->zt = zt;
entry->netdev = netdev;
- entry->cookie = flow->cookie;
+ entry->cookie = flow->cookie > 0 ? flow->cookie : (unsigned long)entry;
entry->chain_index = flow->common.chain_index;
entry->tun_offset = NFP_FL_CT_NO_TUN;
@@ -1501,6 +1545,9 @@ nfp_fl_ct_flow_entry *nfp_fl_ct_add_flow(struct nfp_fl_ct_zone_entry *zt,
INIT_LIST_HEAD(&entry->children);
+ if (flow->cookie == 0)
+ return entry;
+
/* Now add a ct map entry to flower-priv */
map = get_hashentry(&zt->priv->ct_map_table, &flow->cookie,
nfp_ct_map_params, sizeof(*map));
@@ -1559,6 +1606,14 @@ static void cleanup_nft_merge_entry(struct nfp_fl_nft_tc_merge *m_entry)
list_del(&m_entry->tc_merge_list);
list_del(&m_entry->nft_flow_list);
+ if (m_entry->next_pre_ct_entry) {
+ struct nfp_fl_ct_map_entry pre_ct_map_ent;
+
+ pre_ct_map_ent.ct_entry = m_entry->next_pre_ct_entry;
+ pre_ct_map_ent.cookie = 0;
+ nfp_fl_ct_del_flow(&pre_ct_map_ent);
+ }
+
kfree(m_entry);
}
@@ -1656,6 +1711,22 @@ void nfp_fl_ct_clean_flow_entry(struct nfp_fl_ct_flow_entry *entry)
kfree(entry);
}
+static struct flow_action_entry *get_flow_act_ct(struct flow_rule *rule)
+{
+ struct flow_action_entry *act;
+ int i;
+
+ /* More than one ct action may be present in a flow rule,
+ * Return the first one that is not a CT clear action
+ */
+ flow_action_for_each(i, act, &rule->action) {
+ if (act->id == FLOW_ACTION_CT && act->ct.action != TCA_CT_ACT_CLEAR)
+ return act;
+ }
+
+ return NULL;
+}
+
static struct flow_action_entry *get_flow_act(struct flow_rule *rule,
enum flow_action_id act_id)
{
@@ -1713,14 +1784,15 @@ nfp_ct_merge_nft_with_tc(struct nfp_fl_ct_flow_entry *nft_entry,
int nfp_fl_ct_handle_pre_ct(struct nfp_flower_priv *priv,
struct net_device *netdev,
struct flow_cls_offload *flow,
- struct netlink_ext_ack *extack)
+ struct netlink_ext_ack *extack,
+ struct nfp_fl_nft_tc_merge *m_entry)
{
struct flow_action_entry *ct_act, *ct_goto;
struct nfp_fl_ct_flow_entry *ct_entry;
struct nfp_fl_ct_zone_entry *zt;
int err;
- ct_act = get_flow_act(flow->rule, FLOW_ACTION_CT);
+ ct_act = get_flow_act_ct(flow->rule);
if (!ct_act) {
NL_SET_ERR_MSG_MOD(extack,
"unsupported offload: Conntrack action empty in conntrack offload");
@@ -1756,7 +1828,22 @@ int nfp_fl_ct_handle_pre_ct(struct nfp_flower_priv *priv,
if (IS_ERR(ct_entry))
return PTR_ERR(ct_entry);
ct_entry->type = CT_TYPE_PRE_CT;
- ct_entry->chain_index = ct_goto->chain_index;
+ ct_entry->chain_index = flow->common.chain_index;
+ ct_entry->goto_chain_index = ct_goto->chain_index;
+
+ if (m_entry) {
+ struct nfp_fl_ct_flow_entry *pre_ct_entry;
+ int i;
+
+ pre_ct_entry = m_entry->tc_m_parent->pre_ct_parent;
+ for (i = 0; i < pre_ct_entry->num_prev_m_entries; i++)
+ ct_entry->prev_m_entries[i] = pre_ct_entry->prev_m_entries[i];
+ ct_entry->prev_m_entries[i++] = m_entry;
+ ct_entry->num_prev_m_entries = i;
+
+ m_entry->next_pre_ct_entry = ct_entry;
+ }
+
list_add(&ct_entry->list_node, &zt->pre_ct_list);
zt->pre_ct_count++;
@@ -1779,6 +1866,7 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv,
struct nfp_fl_ct_zone_entry *zt;
bool wildcarded = false;
struct flow_match_ct ct;
+ struct flow_action_entry *ct_goto;
flow_rule_match_ct(rule, &ct);
if (!ct.mask->ct_zone) {
@@ -1803,6 +1891,8 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv,
ct_entry->type = CT_TYPE_POST_CT;
ct_entry->chain_index = flow->common.chain_index;
+ ct_goto = get_flow_act(flow->rule, FLOW_ACTION_GOTO);
+ ct_entry->goto_chain_index = ct_goto ? ct_goto->chain_index : 0;
list_add(&ct_entry->list_node, &zt->post_ct_list);
zt->post_ct_count++;
@@ -1831,6 +1921,28 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv,
return 0;
}
+int nfp_fl_create_new_pre_ct(struct nfp_fl_nft_tc_merge *m_entry)
+{
+ struct nfp_fl_ct_flow_entry *pre_ct_entry, *post_ct_entry;
+ struct flow_cls_offload new_pre_ct_flow;
+ int err;
+
+ pre_ct_entry = m_entry->tc_m_parent->pre_ct_parent;
+ if (pre_ct_entry->num_prev_m_entries >= NFP_MAX_RECIRC_CT_ZONES - 1)
+ return -1;
+
+ post_ct_entry = m_entry->tc_m_parent->post_ct_parent;
+ memset(&new_pre_ct_flow, 0, sizeof(struct flow_cls_offload));
+ new_pre_ct_flow.rule = post_ct_entry->rule;
+ new_pre_ct_flow.common.chain_index = post_ct_entry->chain_index;
+
+ err = nfp_fl_ct_handle_pre_ct(pre_ct_entry->zt->priv,
+ pre_ct_entry->netdev,
+ &new_pre_ct_flow, NULL,
+ m_entry);
+ return err;
+}
+
static void
nfp_fl_ct_sub_stats(struct nfp_fl_nft_tc_merge *nft_merge,
enum ct_entry_type type, u64 *m_pkts,
@@ -1876,6 +1988,32 @@ nfp_fl_ct_sub_stats(struct nfp_fl_nft_tc_merge *nft_merge,
0, priv->stats[ctx_id].used,
FLOW_ACTION_HW_STATS_DELAYED);
}
+
+ /* Update previous pre_ct/post_ct/nft flow stats */
+ if (nft_merge->tc_m_parent->pre_ct_parent->num_prev_m_entries > 0) {
+ struct nfp_fl_nft_tc_merge *tmp_nft_merge;
+ int i;
+
+ for (i = 0; i < nft_merge->tc_m_parent->pre_ct_parent->num_prev_m_entries; i++) {
+ tmp_nft_merge = nft_merge->tc_m_parent->pre_ct_parent->prev_m_entries[i];
+ flow_stats_update(&tmp_nft_merge->tc_m_parent->pre_ct_parent->stats,
+ priv->stats[ctx_id].bytes,
+ priv->stats[ctx_id].pkts,
+ 0, priv->stats[ctx_id].used,
+ FLOW_ACTION_HW_STATS_DELAYED);
+ flow_stats_update(&tmp_nft_merge->tc_m_parent->post_ct_parent->stats,
+ priv->stats[ctx_id].bytes,
+ priv->stats[ctx_id].pkts,
+ 0, priv->stats[ctx_id].used,
+ FLOW_ACTION_HW_STATS_DELAYED);
+ flow_stats_update(&tmp_nft_merge->nft_parent->stats,
+ priv->stats[ctx_id].bytes,
+ priv->stats[ctx_id].pkts,
+ 0, priv->stats[ctx_id].used,
+ FLOW_ACTION_HW_STATS_DELAYED);
+ }
+ }
+
/* Reset stats from the nfp */
priv->stats[ctx_id].pkts = 0;
priv->stats[ctx_id].bytes = 0;
@@ -2080,10 +2218,12 @@ int nfp_fl_ct_del_flow(struct nfp_fl_ct_map_entry *ct_map_ent)
switch (ct_entry->type) {
case CT_TYPE_PRE_CT:
zt->pre_ct_count--;
- rhashtable_remove_fast(m_table, &ct_map_ent->hash_node,
- nfp_ct_map_params);
+ if (ct_map_ent->cookie > 0)
+ rhashtable_remove_fast(m_table, &ct_map_ent->hash_node,
+ nfp_ct_map_params);
nfp_fl_ct_clean_flow_entry(ct_entry);
- kfree(ct_map_ent);
+ if (ct_map_ent->cookie > 0)
+ kfree(ct_map_ent);
if (!zt->pre_ct_count) {
zt->nft = NULL;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h
index 762c0b36e269..c4ec78358033 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h
@@ -86,6 +86,9 @@ enum ct_entry_type {
_CT_TYPE_MAX,
};
+#define NFP_MAX_RECIRC_CT_ZONES 4
+#define NFP_MAX_ENTRY_RULES (NFP_MAX_RECIRC_CT_ZONES * 2 + 1)
+
enum nfp_nfp_layer_name {
FLOW_PAY_META_TCI = 0,
FLOW_PAY_INPORT,
@@ -112,27 +115,33 @@ enum nfp_nfp_layer_name {
* @cookie: Flow cookie, same as original TC flow, used as key
* @list_node: Used by the list
* @chain_index: Chain index of the original flow
+ * @goto_chain_index: goto chain index of the flow
* @netdev: netdev structure.
- * @type: Type of pre-entry from enum ct_entry_type
* @zt: Reference to the zone table this belongs to
* @children: List of tc_merge flows this flow forms part of
* @rule: Reference to the original TC flow rule
* @stats: Used to cache stats for updating
+ * @prev_m_entries: Array of all previous nft_tc_merge entries
+ * @num_prev_m_entries: The number of all previous nft_tc_merge entries
* @tun_offset: Used to indicate tunnel action offset in action list
* @flags: Used to indicate flow flag like NAT which used by merge.
+ * @type: Type of ct-entry from enum ct_entry_type
*/
struct nfp_fl_ct_flow_entry {
unsigned long cookie;
struct list_head list_node;
u32 chain_index;
- enum ct_entry_type type;
+ u32 goto_chain_index;
struct net_device *netdev;
struct nfp_fl_ct_zone_entry *zt;
struct list_head children;
struct flow_rule *rule;
struct flow_stats stats;
+ struct nfp_fl_nft_tc_merge *prev_m_entries[NFP_MAX_RECIRC_CT_ZONES - 1];
+ u8 num_prev_m_entries;
u8 tun_offset; // Set to NFP_FL_CT_NO_TUN if no tun
u8 flags;
+ u8 type;
};
/**
@@ -169,6 +178,7 @@ struct nfp_fl_ct_tc_merge {
* @nft_parent: The nft_entry parent
* @tc_flower_cookie: The cookie of the flow offloaded to the nfp
* @flow_pay: Reference to the offloaded flow struct
+ * @next_pre_ct_entry: Reference to the next ct zone pre ct entry
*/
struct nfp_fl_nft_tc_merge {
struct net_device *netdev;
@@ -181,6 +191,7 @@ struct nfp_fl_nft_tc_merge {
struct nfp_fl_ct_flow_entry *nft_parent;
unsigned long tc_flower_cookie;
struct nfp_fl_payload *flow_pay;
+ struct nfp_fl_ct_flow_entry *next_pre_ct_entry;
};
/**
@@ -204,6 +215,7 @@ bool is_post_ct_flow(struct flow_cls_offload *flow);
* @netdev: netdev structure.
* @flow: TC flower classifier offload structure.
* @extack: Extack pointer for errors
+ * @m_entry:previous nfp_fl_nft_tc_merge entry
*
* Adds a new entry to the relevant zone table and tries to
* merge with other +trk+est entries and offload if possible.
@@ -213,7 +225,8 @@ bool is_post_ct_flow(struct flow_cls_offload *flow);
int nfp_fl_ct_handle_pre_ct(struct nfp_flower_priv *priv,
struct net_device *netdev,
struct flow_cls_offload *flow,
- struct netlink_ext_ack *extack);
+ struct netlink_ext_ack *extack,
+ struct nfp_fl_nft_tc_merge *m_entry);
/**
* nfp_fl_ct_handle_post_ct() - Handles +trk+est conntrack rules
* @priv: Pointer to app priv
@@ -232,6 +245,19 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv,
struct netlink_ext_ack *extack);
/**
+ * nfp_fl_create_new_pre_ct() - create next ct_zone -trk conntrack rules
+ * @m_entry:previous nfp_fl_nft_tc_merge entry
+ *
+ * Create a new pre_ct entry from previous nfp_fl_nft_tc_merge entry
+ * to the next relevant zone table. Try to merge with other +trk+est
+ * entries and offload if possible. The created new pre_ct entry is
+ * linked to the previous nfp_fl_nft_tc_merge entry.
+ *
+ * Return: negative value on error, 0 if configured successfully.
+ */
+int nfp_fl_create_new_pre_ct(struct nfp_fl_nft_tc_merge *m_entry);
+
+/**
* nfp_fl_ct_clean_flow_entry() - Free a nfp_fl_ct_flow_entry
* @entry: Flow entry to cleanup
*/
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index 8593cafa6368..18328eb7f5c3 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -1344,7 +1344,7 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev,
port = nfp_port_from_netdev(netdev);
if (is_pre_ct_flow(flow))
- return nfp_fl_ct_handle_pre_ct(priv, netdev, flow, extack);
+ return nfp_fl_ct_handle_pre_ct(priv, netdev, flow, extack, NULL);
if (is_post_ct_flow(flow))
return nfp_fl_ct_handle_post_ct(priv, netdev, flow, extack);
diff --git a/drivers/net/ethernet/ni/nixge.c b/drivers/net/ethernet/ni/nixge.c
index 56e02cba0b8a..0fd156286d4d 100644
--- a/drivers/net/ethernet/ni/nixge.c
+++ b/drivers/net/ethernet/ni/nixge.c
@@ -1422,7 +1422,7 @@ static struct platform_driver nixge_driver = {
.remove = nixge_remove,
.driver = {
.name = "nixge",
- .of_match_table = of_match_ptr(nixge_dt_ids),
+ .of_match_table = nixge_dt_ids,
},
};
module_platform_driver(nixge_driver);
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
index f13fa7396aef..3d36d23df0c6 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
@@ -854,7 +854,7 @@ typedef struct {
The following is packed:
- N cardrsp_rds_rings
- N cardrs_sds_rings */
- char data[0];
+ char data[];
} nx_cardrsp_rx_ctx_t;
#define SIZEOF_HOSTRQ_RX(HOSTRQ_RX, rds_rings, sds_rings) \
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
index de8d54b23f73..59d0dd862fd1 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
@@ -18,7 +18,6 @@
#include <linux/ipv6.h>
#include <linux/inetdevice.h>
#include <linux/sysfs.h>
-#include <linux/aer.h>
MODULE_DESCRIPTION("QLogic/NetXen (1/10) GbE Intelligent Ethernet Driver");
MODULE_LICENSE("GPL");
@@ -1464,9 +1463,6 @@ netxen_nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if ((err = pci_request_regions(pdev, netxen_nic_driver_name)))
goto err_out_disable_pdev;
- if (NX_IS_REVISION_P3(pdev->revision))
- pci_enable_pcie_error_reporting(pdev);
-
pci_set_master(pdev);
netdev = alloc_etherdev(sizeof(struct netxen_adapter));
@@ -1603,8 +1599,6 @@ err_out_free_netdev:
free_netdev(netdev);
err_out_free_res:
- if (NX_IS_REVISION_P3(pdev->revision))
- pci_disable_pcie_error_reporting(pdev);
pci_release_regions(pdev);
err_out_disable_pdev:
@@ -1659,10 +1653,8 @@ static void netxen_nic_remove(struct pci_dev *pdev)
netxen_release_firmware(adapter);
- if (NX_IS_REVISION_P3(pdev->revision)) {
+ if (NX_IS_REVISION_P3(pdev->revision))
netxen_cleanup_minidump(adapter);
- pci_disable_pcie_error_reporting(pdev);
- }
pci_release_regions(pdev);
pci_disable_device(pdev);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index c91898be7c03..f5af83342856 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -23,7 +23,6 @@
#include <linux/qed/qed_if.h>
#include <linux/qed/qed_ll2_if.h>
#include <net/devlink.h>
-#include <linux/aer.h>
#include <linux/phylink.h>
#include "qed.h"
@@ -259,8 +258,6 @@ static void qed_free_pci(struct qed_dev *cdev)
{
struct pci_dev *pdev = cdev->pdev;
- pci_disable_pcie_error_reporting(pdev);
-
if (cdev->doorbells && cdev->db_size)
iounmap(cdev->doorbells);
if (cdev->regview)
@@ -366,12 +363,6 @@ static int qed_init_pci(struct qed_dev *cdev, struct pci_dev *pdev)
return -ENOMEM;
}
- /* AER (Advanced Error reporting) configuration */
- rc = pci_enable_pcie_error_reporting(pdev);
- if (rc)
- DP_VERBOSE(cdev, NETIF_MSG_DRV,
- "Failed to configure PCIe AER [%d]\n", rc);
-
return 0;
err2:
diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index f90dcfe9ee68..f9931ecb7baa 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -6,8 +6,6 @@
#ifndef _QEDE_H_
#define _QEDE_H_
-#include <linux/compiler.h>
-#include <linux/version.h>
#include <linux/workqueue.h>
#include <linux/netdevice.h>
#include <linux/interrupt.h>
diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
index 8034d812d5a0..374a86b875a3 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
@@ -4,7 +4,6 @@
* Copyright (c) 2019-2020 Marvell International Ltd.
*/
-#include <linux/version.h>
#include <linux/types.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 261f982ca40d..4c6c685820e3 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -35,7 +35,6 @@
#include <net/ip6_checksum.h>
#include <linux/bitops.h>
#include <linux/vmalloc.h>
-#include <linux/aer.h>
#include "qede.h"
#include "qede_ptp.h"
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
index 2fd5c6fdb500..bcef8ab715bf 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
@@ -8,7 +8,6 @@
#include <linux/ipv6.h>
#include <linux/ethtool.h>
#include <linux/interrupt.h>
-#include <linux/aer.h>
#include "qlcnic.h"
#include "qlcnic_sriov.h"
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index 44dac3c0908e..90df4a0909fa 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -12,7 +12,6 @@
#include <net/ip.h>
#include <linux/ipv6.h>
#include <linux/inetdevice.h>
-#include <linux/aer.h>
#include <linux/log2.h>
#include <linux/pci.h>
#include <net/vxlan.h>
@@ -2445,7 +2444,6 @@ qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
goto err_out_disable_pdev;
pci_set_master(pdev);
- pci_enable_pcie_error_reporting(pdev);
ahw = kzalloc(sizeof(struct qlcnic_hardware_context), GFP_KERNEL);
if (!ahw) {
@@ -2675,7 +2673,6 @@ err_out_free_hw_res:
kfree(ahw);
err_out_free_res:
- pci_disable_pcie_error_reporting(pdev);
pci_release_regions(pdev);
err_out_disable_pdev:
@@ -2757,7 +2754,6 @@ static void qlcnic_remove(struct pci_dev *pdev)
qlcnic_release_firmware(adapter);
- pci_disable_pcie_error_reporting(pdev);
pci_release_regions(pdev);
pci_disable_device(pdev);
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c
index 5c2edb715d3e..74125188beb8 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c
@@ -12,7 +12,6 @@
#include <linux/ipv6.h>
#include <linux/inetdevice.h>
#include <linux/sysfs.h>
-#include <linux/aer.h>
#include <linux/log2.h>
#ifdef CONFIG_QLCNIC_HWMON
#include <linux/hwmon.h>
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 45147a1016be..6563e4c6a136 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -613,8 +613,13 @@ struct rtl8169_private {
struct work_struct work;
} wk;
+ spinlock_t config25_lock;
+ spinlock_t mac_ocp_lock;
+
+ spinlock_t cfg9346_usage_lock;
+ int cfg9346_usage_count;
+
unsigned supports_gmii:1;
- unsigned aspm_manageable:1;
dma_addr_t counters_phys_addr;
struct rtl8169_counters *counters;
struct rtl8169_tc_offsets tc_offset;
@@ -661,12 +666,22 @@ static inline struct device *tp_to_dev(struct rtl8169_private *tp)
static void rtl_lock_config_regs(struct rtl8169_private *tp)
{
- RTL_W8(tp, Cfg9346, Cfg9346_Lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&tp->cfg9346_usage_lock, flags);
+ if (!--tp->cfg9346_usage_count)
+ RTL_W8(tp, Cfg9346, Cfg9346_Lock);
+ spin_unlock_irqrestore(&tp->cfg9346_usage_lock, flags);
}
static void rtl_unlock_config_regs(struct rtl8169_private *tp)
{
- RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&tp->cfg9346_usage_lock, flags);
+ if (!tp->cfg9346_usage_count++)
+ RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
+ spin_unlock_irqrestore(&tp->cfg9346_usage_lock, flags);
}
static void rtl_pci_commit(struct rtl8169_private *tp)
@@ -675,6 +690,28 @@ static void rtl_pci_commit(struct rtl8169_private *tp)
RTL_R8(tp, ChipCmd);
}
+static void rtl_mod_config2(struct rtl8169_private *tp, u8 clear, u8 set)
+{
+ unsigned long flags;
+ u8 val;
+
+ spin_lock_irqsave(&tp->config25_lock, flags);
+ val = RTL_R8(tp, Config2);
+ RTL_W8(tp, Config2, (val & ~clear) | set);
+ spin_unlock_irqrestore(&tp->config25_lock, flags);
+}
+
+static void rtl_mod_config5(struct rtl8169_private *tp, u8 clear, u8 set)
+{
+ unsigned long flags;
+ u8 val;
+
+ spin_lock_irqsave(&tp->config25_lock, flags);
+ val = RTL_R8(tp, Config5);
+ RTL_W8(tp, Config5, (val & ~clear) | set);
+ spin_unlock_irqrestore(&tp->config25_lock, flags);
+}
+
static bool rtl_is_8125(struct rtl8169_private *tp)
{
return tp->mac_version >= RTL_GIGA_MAC_VER_61;
@@ -847,7 +884,7 @@ static int r8168_phy_ocp_read(struct rtl8169_private *tp, u32 reg)
(RTL_R32(tp, GPHY_OCP) & 0xffff) : -ETIMEDOUT;
}
-static void r8168_mac_ocp_write(struct rtl8169_private *tp, u32 reg, u32 data)
+static void __r8168_mac_ocp_write(struct rtl8169_private *tp, u32 reg, u32 data)
{
if (rtl_ocp_reg_failure(reg))
return;
@@ -855,7 +892,16 @@ static void r8168_mac_ocp_write(struct rtl8169_private *tp, u32 reg, u32 data)
RTL_W32(tp, OCPDR, OCPAR_FLAG | (reg << 15) | data);
}
-static u16 r8168_mac_ocp_read(struct rtl8169_private *tp, u32 reg)
+static void r8168_mac_ocp_write(struct rtl8169_private *tp, u32 reg, u32 data)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&tp->mac_ocp_lock, flags);
+ __r8168_mac_ocp_write(tp, reg, data);
+ spin_unlock_irqrestore(&tp->mac_ocp_lock, flags);
+}
+
+static u16 __r8168_mac_ocp_read(struct rtl8169_private *tp, u32 reg)
{
if (rtl_ocp_reg_failure(reg))
return 0;
@@ -865,12 +911,28 @@ static u16 r8168_mac_ocp_read(struct rtl8169_private *tp, u32 reg)
return RTL_R32(tp, OCPDR);
}
+static u16 r8168_mac_ocp_read(struct rtl8169_private *tp, u32 reg)
+{
+ unsigned long flags;
+ u16 val;
+
+ spin_lock_irqsave(&tp->mac_ocp_lock, flags);
+ val = __r8168_mac_ocp_read(tp, reg);
+ spin_unlock_irqrestore(&tp->mac_ocp_lock, flags);
+
+ return val;
+}
+
static void r8168_mac_ocp_modify(struct rtl8169_private *tp, u32 reg, u16 mask,
u16 set)
{
- u16 data = r8168_mac_ocp_read(tp, reg);
+ unsigned long flags;
+ u16 data;
- r8168_mac_ocp_write(tp, reg, (data & ~mask) | set);
+ spin_lock_irqsave(&tp->mac_ocp_lock, flags);
+ data = __r8168_mac_ocp_read(tp, reg);
+ __r8168_mac_ocp_write(tp, reg, (data & ~mask) | set);
+ spin_unlock_irqrestore(&tp->mac_ocp_lock, flags);
}
/* Work around a hw issue with RTL8168g PHY, the quirk disables
@@ -1336,6 +1398,7 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
{ WAKE_MAGIC, Config3, MagicPacket }
};
unsigned int i, tmp = ARRAY_SIZE(cfg);
+ unsigned long flags;
u8 options;
rtl_unlock_config_regs(tp);
@@ -1354,12 +1417,14 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
r8168_mac_ocp_modify(tp, 0xc0b6, BIT(0), 0);
}
+ spin_lock_irqsave(&tp->config25_lock, flags);
for (i = 0; i < tmp; i++) {
options = RTL_R8(tp, cfg[i].reg) & ~cfg[i].mask;
if (wolopts & cfg[i].opt)
options |= cfg[i].mask;
RTL_W8(tp, cfg[i].reg, options);
}
+ spin_unlock_irqrestore(&tp->config25_lock, flags);
switch (tp->mac_version) {
case RTL_GIGA_MAC_VER_02 ... RTL_GIGA_MAC_VER_06:
@@ -1371,10 +1436,10 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
case RTL_GIGA_MAC_VER_34:
case RTL_GIGA_MAC_VER_37:
case RTL_GIGA_MAC_VER_39 ... RTL_GIGA_MAC_VER_63:
- options = RTL_R8(tp, Config2) & ~PME_SIGNAL;
if (wolopts)
- options |= PME_SIGNAL;
- RTL_W8(tp, Config2, options);
+ rtl_mod_config2(tp, 0, PME_SIGNAL);
+ else
+ rtl_mod_config2(tp, PME_SIGNAL, 0);
break;
default:
break;
@@ -2675,10 +2740,12 @@ static void rtl_disable_exit_l1(struct rtl8169_private *tp)
static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable)
{
- /* Don't enable ASPM in the chip if OS can't control ASPM */
- if (enable && tp->aspm_manageable) {
- RTL_W8(tp, Config5, RTL_R8(tp, Config5) | ASPM_en);
- RTL_W8(tp, Config2, RTL_R8(tp, Config2) | ClkReqEn);
+ if (tp->mac_version < RTL_GIGA_MAC_VER_32)
+ return;
+
+ if (enable) {
+ rtl_mod_config5(tp, 0, ASPM_en);
+ rtl_mod_config2(tp, 0, ClkReqEn);
switch (tp->mac_version) {
case RTL_GIGA_MAC_VER_46 ... RTL_GIGA_MAC_VER_48:
@@ -2701,11 +2768,9 @@ static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable)
break;
}
- RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
- RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
+ rtl_mod_config2(tp, ClkReqEn, 0);
+ rtl_mod_config5(tp, ASPM_en, 0);
}
-
- udelay(10);
}
static void rtl_set_fifo_size(struct rtl8169_private *tp, u16 rx_stat,
@@ -2863,7 +2928,7 @@ static void rtl_hw_start_8168e_1(struct rtl8169_private *tp)
RTL_W32(tp, MISC, RTL_R32(tp, MISC) | TXPLA_RST);
RTL_W32(tp, MISC, RTL_R32(tp, MISC) & ~TXPLA_RST);
- RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~Spi_en);
+ rtl_mod_config5(tp, Spi_en, 0);
}
static void rtl_hw_start_8168e_2(struct rtl8169_private *tp)
@@ -2896,7 +2961,7 @@ static void rtl_hw_start_8168e_2(struct rtl8169_private *tp)
RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) | PFM_EN);
RTL_W32(tp, MISC, RTL_R32(tp, MISC) | PWM_EN);
- RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~Spi_en);
+ rtl_mod_config5(tp, Spi_en, 0);
rtl_hw_aspm_clkreq_enable(tp, true);
}
@@ -2919,7 +2984,7 @@ static void rtl_hw_start_8168f(struct rtl8169_private *tp)
RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);
RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) | PFM_EN);
RTL_W32(tp, MISC, RTL_R32(tp, MISC) | PWM_EN);
- RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~Spi_en);
+ rtl_mod_config5(tp, Spi_en, 0);
rtl8168_config_eee_mac(tp);
}
@@ -4510,6 +4575,10 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
}
if (napi_schedule_prep(&tp->napi)) {
+ rtl_unlock_config_regs(tp);
+ rtl_hw_aspm_clkreq_enable(tp, false);
+ rtl_lock_config_regs(tp);
+
rtl_irq_disable(tp);
__napi_schedule(&tp->napi);
}
@@ -4569,9 +4638,14 @@ static int rtl8169_poll(struct napi_struct *napi, int budget)
work_done = rtl_rx(dev, tp, budget);
- if (work_done < budget && napi_complete_done(napi, work_done))
+ if (work_done < budget && napi_complete_done(napi, work_done)) {
rtl_irq_enable(tp);
+ rtl_unlock_config_regs(tp);
+ rtl_hw_aspm_clkreq_enable(tp, true);
+ rtl_lock_config_regs(tp);
+ }
+
return work_done;
}
@@ -5145,16 +5219,6 @@ done:
rtl_rar_set(tp, mac_addr);
}
-/* register is set if system vendor successfully tested ASPM 1.2 */
-static bool rtl_aspm_is_safe(struct rtl8169_private *tp)
-{
- if (tp->mac_version >= RTL_GIGA_MAC_VER_61 &&
- r8168_mac_ocp_read(tp, 0xc0b2) & 0xf)
- return true;
-
- return false;
-}
-
static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
{
struct rtl8169_private *tp;
@@ -5176,6 +5240,10 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
tp->eee_adv = -1;
tp->ocp_base = OCP_STD_PHY_BASE;
+ spin_lock_init(&tp->cfg9346_usage_lock);
+ spin_lock_init(&tp->config25_lock);
+ spin_lock_init(&tp->mac_ocp_lock);
+
dev->tstats = devm_netdev_alloc_pcpu_stats(&pdev->dev,
struct pcpu_sw_netstats);
if (!dev->tstats)
@@ -5222,19 +5290,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
tp->mac_version = chipset;
- /* Disable ASPM L1 as that cause random device stop working
- * problems as well as full system hangs for some PCIe devices users.
- * Chips from RTL8168h partially have issues with L1.2, but seem
- * to work fine with L1 and L1.1.
- */
- if (rtl_aspm_is_safe(tp))
- rc = 0;
- else if (tp->mac_version >= RTL_GIGA_MAC_VER_46)
- rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L1_2);
- else
- rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L1);
- tp->aspm_manageable = !rc;
-
tp->dash_type = rtl_check_dash(tp);
tp->cp_cmd = RTL_R16(tp, CPlusCmd) & CPCMD_MASK;
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 894e2690c643..4d6b3b7d6abb 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -28,7 +28,6 @@
#include <linux/pm_runtime.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
-#include <linux/sys_soc.h>
#include <linux/reset.h>
#include <linux/math64.h>
@@ -1390,11 +1389,6 @@ static void ravb_adjust_link(struct net_device *ndev)
phy_print_status(phydev);
}
-static const struct soc_device_attribute r8a7795es10[] = {
- { .soc_id = "r8a7795", .revision = "ES1.0", },
- { /* sentinel */ }
-};
-
/* PHY init function */
static int ravb_phy_init(struct net_device *ndev)
{
@@ -1434,15 +1428,6 @@ static int ravb_phy_init(struct net_device *ndev)
goto err_deregister_fixed_link;
}
- /* This driver only support 10/100Mbit speeds on R-Car H3 ES1.0
- * at this time.
- */
- if (soc_device_match(r8a7795es10)) {
- phy_set_max_speed(phydev, SPEED_100);
-
- netdev_info(ndev, "limited PHY to 100Mbit/s\n");
- }
-
if (!info->half_duplex) {
/* 10BASE, Pause and Asym Pause is not supported */
phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_10baseT_Half_BIT);
diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c
index 926532466691..4e5526303f07 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c
@@ -229,7 +229,7 @@ static struct platform_driver sxgbe_platform_driver = {
.driver = {
.name = SXGBE_RESOURCE_NAME,
.pm = &sxgbe_platform_pm_ops,
- .of_match_table = of_match_ptr(sxgbe_dt_ids),
+ .of_match_table = sxgbe_dt_ids,
},
};
diff --git a/drivers/net/ethernet/sfc/ef100.c b/drivers/net/ethernet/sfc/ef100.c
index 71aab3d0480f..6334992b0af4 100644
--- a/drivers/net/ethernet/sfc/ef100.c
+++ b/drivers/net/ethernet/sfc/ef100.c
@@ -11,7 +11,6 @@
#include "net_driver.h"
#include <linux/module.h>
-#include <linux/aer.h>
#include "efx_common.h"
#include "efx_channels.h"
#include "io.h"
@@ -440,8 +439,6 @@ static void ef100_pci_remove(struct pci_dev *pci_dev)
pci_dbg(pci_dev, "shutdown successful\n");
- pci_disable_pcie_error_reporting(pci_dev);
-
pci_set_drvdata(pci_dev, NULL);
efx_fini_struct(efx);
kfree(probe_data);
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 02c2adeb0a12..92c390ec4735 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -18,7 +18,6 @@
#include <linux/ethtool.h>
#include <linux/topology.h>
#include <linux/gfp.h>
-#include <linux/aer.h>
#include <linux/interrupt.h>
#include "net_driver.h"
#include <net/gre.h>
@@ -892,8 +891,6 @@ static void efx_pci_remove(struct pci_dev *pci_dev)
free_netdev(efx->net_dev);
probe_data = container_of(efx, struct efx_probe_data, efx);
kfree(probe_data);
-
- pci_disable_pcie_error_reporting(pci_dev);
};
/* NIC VPD information
@@ -1126,8 +1123,6 @@ static int efx_pci_probe(struct pci_dev *pci_dev,
netif_warn(efx, probe, efx->net_dev,
"failed to create MTDs (%d)\n", rc);
- (void)pci_enable_pcie_error_reporting(pci_dev);
-
if (efx->type->udp_tnl_push_ports)
efx->type->udp_tnl_push_ports(efx);
diff --git a/drivers/net/ethernet/sfc/falcon/efx.c b/drivers/net/ethernet/sfc/falcon/efx.c
index e151b0957751..e001f27085c6 100644
--- a/drivers/net/ethernet/sfc/falcon/efx.c
+++ b/drivers/net/ethernet/sfc/falcon/efx.c
@@ -17,7 +17,6 @@
#include <linux/ethtool.h>
#include <linux/topology.h>
#include <linux/gfp.h>
-#include <linux/aer.h>
#include <linux/interrupt.h>
#include "net_driver.h"
#include "efx.h"
@@ -2765,8 +2764,6 @@ static void ef4_pci_remove(struct pci_dev *pci_dev)
ef4_fini_struct(efx);
free_netdev(efx->net_dev);
-
- pci_disable_pcie_error_reporting(pci_dev);
};
/* NIC VPD information
@@ -2927,12 +2924,6 @@ static int ef4_pci_probe(struct pci_dev *pci_dev,
netif_warn(efx, probe, efx->net_dev,
"failed to create MTDs (%d)\n", rc);
- rc = pci_enable_pcie_error_reporting(pci_dev);
- if (rc && rc != -EINVAL)
- netif_notice(efx, probe, efx->net_dev,
- "PCIE error reporting unavailable (%d).\n",
- rc);
-
return 0;
fail4:
diff --git a/drivers/net/ethernet/sfc/mae.c b/drivers/net/ethernet/sfc/mae.c
index 2d32abe5f478..c53d354c1fb2 100644
--- a/drivers/net/ethernet/sfc/mae.c
+++ b/drivers/net/ethernet/sfc/mae.c
@@ -682,6 +682,10 @@ int efx_mae_alloc_action_set(struct efx_nic *efx, struct efx_tc_action_set *act)
size_t outlen;
int rc;
+ MCDI_POPULATE_DWORD_2(inbuf, MAE_ACTION_SET_ALLOC_IN_FLAGS,
+ MAE_ACTION_SET_ALLOC_IN_VLAN_PUSH, act->vlan_push,
+ MAE_ACTION_SET_ALLOC_IN_VLAN_POP, act->vlan_pop);
+
MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_SRC_MAC_ID,
MC_CMD_MAE_MAC_ADDR_ALLOC_OUT_MAC_ID_NULL);
MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_DST_MAC_ID,
@@ -694,6 +698,18 @@ int efx_mae_alloc_action_set(struct efx_nic *efx, struct efx_tc_action_set *act)
MC_CMD_MAE_COUNTER_ALLOC_OUT_COUNTER_ID_NULL);
MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_COUNTER_LIST_ID,
MC_CMD_MAE_COUNTER_LIST_ALLOC_OUT_COUNTER_LIST_ID_NULL);
+ if (act->vlan_push) {
+ MCDI_SET_WORD_BE(inbuf, MAE_ACTION_SET_ALLOC_IN_VLAN0_TCI_BE,
+ act->vlan_tci[0]);
+ MCDI_SET_WORD_BE(inbuf, MAE_ACTION_SET_ALLOC_IN_VLAN0_PROTO_BE,
+ act->vlan_proto[0]);
+ }
+ if (act->vlan_push >= 2) {
+ MCDI_SET_WORD_BE(inbuf, MAE_ACTION_SET_ALLOC_IN_VLAN1_TCI_BE,
+ act->vlan_tci[1]);
+ MCDI_SET_WORD_BE(inbuf, MAE_ACTION_SET_ALLOC_IN_VLAN1_PROTO_BE,
+ act->vlan_proto[1]);
+ }
MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_ENCAP_HEADER_ID,
MC_CMD_MAE_ENCAP_HEADER_ALLOC_OUT_ENCAP_HEADER_ID_NULL);
if (act->deliver)
diff --git a/drivers/net/ethernet/sfc/mcdi.h b/drivers/net/ethernet/sfc/mcdi.h
index b139b76febff..454e9d51a4c2 100644
--- a/drivers/net/ethernet/sfc/mcdi.h
+++ b/drivers/net/ethernet/sfc/mcdi.h
@@ -233,6 +233,11 @@ void efx_mcdi_sensor_event(struct efx_nic *efx, efx_qword_t *ev);
((void)BUILD_BUG_ON_ZERO(_field ## _LEN != 2), \
le16_to_cpu(*(__force const __le16 *)MCDI_STRUCT_PTR(_buf, _field)))
/* Write a 16-bit field defined in the protocol as being big-endian. */
+#define MCDI_SET_WORD_BE(_buf, _field, _value) do { \
+ BUILD_BUG_ON(MC_CMD_ ## _field ## _LEN != 2); \
+ BUILD_BUG_ON(MC_CMD_ ## _field ## _OFST & 1); \
+ *(__force __be16 *)MCDI_PTR(_buf, _field) = (_value); \
+ } while (0)
#define MCDI_STRUCT_SET_WORD_BE(_buf, _field, _value) do { \
BUILD_BUG_ON(_field ## _LEN != 2); \
BUILD_BUG_ON(_field ## _OFST & 1); \
diff --git a/drivers/net/ethernet/sfc/siena/efx.c b/drivers/net/ethernet/sfc/siena/efx.c
index ef52ec71d197..8c557f6a183c 100644
--- a/drivers/net/ethernet/sfc/siena/efx.c
+++ b/drivers/net/ethernet/sfc/siena/efx.c
@@ -18,7 +18,6 @@
#include <linux/ethtool.h>
#include <linux/topology.h>
#include <linux/gfp.h>
-#include <linux/aer.h>
#include <linux/interrupt.h>
#include "net_driver.h"
#include <net/gre.h>
@@ -874,8 +873,6 @@ static void efx_pci_remove(struct pci_dev *pci_dev)
efx_siena_fini_struct(efx);
free_netdev(efx->net_dev);
-
- pci_disable_pcie_error_reporting(pci_dev);
};
/* NIC VPD information
@@ -1094,8 +1091,6 @@ static int efx_pci_probe(struct pci_dev *pci_dev,
netif_warn(efx, probe, efx->net_dev,
"failed to create MTDs (%d)\n", rc);
- (void)pci_enable_pcie_error_reporting(pci_dev);
-
if (efx->type->udp_tnl_push_ports)
efx->type->udp_tnl_push_ports(efx);
diff --git a/drivers/net/ethernet/sfc/tc.c b/drivers/net/ethernet/sfc/tc.c
index deeaab9ee761..2b07bb2fd735 100644
--- a/drivers/net/ethernet/sfc/tc.c
+++ b/drivers/net/ethernet/sfc/tc.c
@@ -286,6 +286,8 @@ static int efx_tc_flower_parse_match(struct efx_nic *efx,
/* For details of action order constraints refer to SF-123102-TC-1§12.6.1 */
enum efx_tc_action_order {
+ EFX_TC_AO_VLAN_POP,
+ EFX_TC_AO_VLAN_PUSH,
EFX_TC_AO_COUNT,
EFX_TC_AO_DELIVER
};
@@ -294,6 +296,20 @@ static bool efx_tc_flower_action_order_ok(const struct efx_tc_action_set *act,
enum efx_tc_action_order new)
{
switch (new) {
+ case EFX_TC_AO_VLAN_POP:
+ if (act->vlan_pop >= 2)
+ return false;
+ /* If we've already pushed a VLAN, we can't then pop it;
+ * the hardware would instead try to pop an existing VLAN
+ * before pushing the new one.
+ */
+ if (act->vlan_push)
+ return false;
+ fallthrough;
+ case EFX_TC_AO_VLAN_PUSH:
+ if (act->vlan_push >= 2)
+ return false;
+ fallthrough;
case EFX_TC_AO_COUNT:
if (act->count)
return false;
@@ -393,6 +409,7 @@ static int efx_tc_flower_replace(struct efx_nic *efx,
flow_action_for_each(i, fa, &fr->action) {
struct efx_tc_action_set save;
+ u16 tci;
if (!act) {
/* more actions after a non-pipe action */
@@ -494,6 +511,31 @@ static int efx_tc_flower_replace(struct efx_nic *efx,
}
*act = save;
break;
+ case FLOW_ACTION_VLAN_POP:
+ if (act->vlan_push) {
+ act->vlan_push--;
+ } else if (efx_tc_flower_action_order_ok(act, EFX_TC_AO_VLAN_POP)) {
+ act->vlan_pop++;
+ } else {
+ NL_SET_ERR_MSG_MOD(extack,
+ "More than two VLAN pops, or action order violated");
+ rc = -EINVAL;
+ goto release;
+ }
+ break;
+ case FLOW_ACTION_VLAN_PUSH:
+ if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_VLAN_PUSH)) {
+ rc = -EINVAL;
+ NL_SET_ERR_MSG_MOD(extack,
+ "More than two VLAN pushes, or action order violated");
+ goto release;
+ }
+ tci = fa->vlan.vid & VLAN_VID_MASK;
+ tci |= fa->vlan.prio << VLAN_PRIO_SHIFT;
+ act->vlan_tci[act->vlan_push] = cpu_to_be16(tci);
+ act->vlan_proto[act->vlan_push] = fa->vlan.proto;
+ act->vlan_push++;
+ break;
default:
NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled action %u",
fa->id);
diff --git a/drivers/net/ethernet/sfc/tc.h b/drivers/net/ethernet/sfc/tc.h
index 418ce8c13a06..542853f60c2a 100644
--- a/drivers/net/ethernet/sfc/tc.h
+++ b/drivers/net/ethernet/sfc/tc.h
@@ -19,7 +19,11 @@
#define IS_ALL_ONES(v) (!(typeof (v))~(v))
struct efx_tc_action_set {
+ u16 vlan_push:2;
+ u16 vlan_pop:2;
u16 deliver:1;
+ __be16 vlan_tci[2]; /* TCIs for vlan_push */
+ __be16 vlan_proto[2]; /* Ethertypes for vlan_push */
struct efx_tc_counter_index *count;
u32 dest_mport;
u32 fw_id; /* index of this entry in firmware actions table */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c
index 5e731a72cce8..ef8f3a940938 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c
@@ -91,7 +91,7 @@ static struct platform_driver dwmac_generic_driver = {
.driver = {
.name = STMMAC_RESOURCE_NAME,
.pm = &stmmac_pltfr_pm_ops,
- .of_match_table = of_match_ptr(dwmac_generic_match),
+ .of_match_table = dwmac_generic_match,
},
};
module_platform_driver(dwmac_generic_driver);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
index 732774645c1a..32763566c214 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
@@ -606,7 +606,7 @@ static struct platform_driver qcom_ethqos_driver = {
.driver = {
.name = "qcom-ethqos",
.pm = &stmmac_pltfr_pm_ops,
- .of_match_table = of_match_ptr(qcom_ethqos_match),
+ .of_match_table = qcom_ethqos_match,
},
};
module_platform_driver(qcom_ethqos_driver);
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index 4e3861c47708..4cfbc1c2b1c4 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -1466,15 +1466,13 @@ static void am65_cpsw_disable_serdes_phy(struct am65_cpsw_common *common)
static int am65_cpsw_init_serdes_phy(struct device *dev, struct device_node *port_np,
struct am65_cpsw_port *port)
{
- const char *name = "serdes-phy";
+ const char *name = "serdes";
struct phy *phy;
int ret;
- phy = devm_of_phy_get(dev, port_np, name);
- if (PTR_ERR(phy) == -ENODEV)
- return 0;
- if (IS_ERR(phy))
- return PTR_ERR(phy);
+ phy = devm_of_phy_optional_get(dev, port_np, name);
+ if (IS_ERR_OR_NULL(phy))
+ return PTR_ERR_OR_ZERO(phy);
/* Serdes PHY exists. Store it. */
port->slave.serdes_phy = phy;
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c
index 7db57f934a91..ca409b4054d0 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c
@@ -4,6 +4,7 @@
#include <linux/etherdevice.h>
#include <linux/netdevice.h>
#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
#include <linux/iopoll.h>
#include <linux/pci.h>
@@ -1261,7 +1262,7 @@ static void wx_set_rx_buffer_len(struct wx *wx)
struct net_device *netdev = wx->netdev;
u32 mhadd, max_frame;
- max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
+ max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
/* adjust max frame to be at least the size of a standard frame */
if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN))
max_frame = (ETH_FRAME_LEN + ETH_FCS_LEN);
@@ -1271,6 +1272,24 @@ static void wx_set_rx_buffer_len(struct wx *wx)
wr32(wx, WX_PSR_MAX_SZ, max_frame);
}
+/**
+ * wx_change_mtu - Change the Maximum Transfer Unit
+ * @netdev: network interface device structure
+ * @new_mtu: new value for maximum frame size
+ *
+ * Returns 0 on success, negative on failure
+ **/
+int wx_change_mtu(struct net_device *netdev, int new_mtu)
+{
+ struct wx *wx = netdev_priv(netdev);
+
+ netdev->mtu = new_mtu;
+ wx_set_rx_buffer_len(wx);
+
+ return 0;
+}
+EXPORT_SYMBOL(wx_change_mtu);
+
/* Disable the specified rx queue */
void wx_disable_rx_queue(struct wx *wx, struct wx_ring *ring)
{
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.h b/drivers/net/ethernet/wangxun/libwx/wx_hw.h
index 44dfd6ea442a..c173c56f0ab5 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_hw.h
+++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.h
@@ -23,6 +23,7 @@ void wx_flush_sw_mac_table(struct wx *wx);
int wx_set_mac(struct net_device *netdev, void *p);
void wx_disable_rx(struct wx *wx);
void wx_set_rx_mode(struct net_device *netdev);
+int wx_change_mtu(struct net_device *netdev, int new_mtu);
void wx_disable_rx_queue(struct wx *wx, struct wx_ring *ring);
void wx_configure(struct wx *wx);
int wx_disable_pcie_master(struct wx *wx);
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h
index 77d8d7f1707e..2b9efd13c500 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_type.h
+++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h
@@ -300,6 +300,8 @@
#define WX_MAX_RXD 8192
#define WX_MAX_TXD 8192
+#define WX_MAX_JUMBO_FRAME_SIZE 9432 /* max payload 9414 */
+
/* Supported Rx Buffer Sizes */
#define WX_RXBUFFER_256 256 /* Used for skb receive header */
#define WX_RXBUFFER_2K 2048
diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
index 5b564d348c09..1a004aa2adcb 100644
--- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
+++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
@@ -6,10 +6,10 @@
#include <linux/pci.h>
#include <linux/netdevice.h>
#include <linux/string.h>
-#include <linux/aer.h>
#include <linux/etherdevice.h>
#include <net/ip.h>
#include <linux/phy.h>
+#include <linux/if_vlan.h>
#include "../libwx/wx_type.h"
#include "../libwx/wx_hw.h"
@@ -470,6 +470,7 @@ static void ngbe_shutdown(struct pci_dev *pdev)
static const struct net_device_ops ngbe_netdev_ops = {
.ndo_open = ngbe_open,
.ndo_stop = ngbe_close,
+ .ndo_change_mtu = wx_change_mtu,
.ndo_start_xmit = wx_xmit_frame,
.ndo_set_rx_mode = wx_set_rx_mode,
.ndo_validate_addr = eth_validate_addr,
@@ -520,7 +521,6 @@ static int ngbe_probe(struct pci_dev *pdev,
goto err_pci_disable_dev;
}
- pci_enable_pcie_error_reporting(pdev);
pci_set_master(pdev);
netdev = devm_alloc_etherdev_mqs(&pdev->dev,
@@ -562,7 +562,8 @@ static int ngbe_probe(struct pci_dev *pdev,
netdev->priv_flags |= IFF_SUPP_NOFCS;
netdev->min_mtu = ETH_MIN_MTU;
- netdev->max_mtu = NGBE_MAX_JUMBO_FRAME_SIZE - (ETH_HLEN + ETH_FCS_LEN);
+ netdev->max_mtu = WX_MAX_JUMBO_FRAME_SIZE -
+ (ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN);
wx->bd_number = func_nums;
/* setup the private structure */
@@ -669,7 +670,6 @@ err_clear_interrupt_scheme:
err_free_mac_table:
kfree(wx->mac_table);
err_pci_release_regions:
- pci_disable_pcie_error_reporting(pdev);
pci_release_selected_regions(pdev,
pci_select_bars(pdev, IORESOURCE_MEM));
err_pci_disable_dev:
@@ -698,7 +698,6 @@ static void ngbe_remove(struct pci_dev *pdev)
kfree(wx->mac_table);
wx_clear_interrupt_scheme(wx);
- pci_disable_pcie_error_reporting(pdev);
pci_disable_device(pdev);
}
diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h
index a2351349785e..373d5af628cd 100644
--- a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h
+++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h
@@ -137,7 +137,6 @@ enum NGBE_MSCA_CMD_value {
#define NGBE_RX_PB_SIZE 42
#define NGBE_MC_TBL_SIZE 128
#define NGBE_TDB_PB_SZ (20 * 1024) /* 160KB Packet Buffer */
-#define NGBE_MAX_JUMBO_FRAME_SIZE 9432 /* max payload 9414 */
/* TX/RX descriptor defines */
#define NGBE_DEFAULT_TXD 512 /* default ring size */
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
index 6c0a98230557..843a88bc416f 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
@@ -6,9 +6,9 @@
#include <linux/pci.h>
#include <linux/netdevice.h>
#include <linux/string.h>
-#include <linux/aer.h>
#include <linux/etherdevice.h>
#include <net/ip.h>
+#include <linux/if_vlan.h>
#include "../libwx/wx_type.h"
#include "../libwx/wx_lib.h"
@@ -487,6 +487,7 @@ static void txgbe_shutdown(struct pci_dev *pdev)
static const struct net_device_ops txgbe_netdev_ops = {
.ndo_open = txgbe_open,
.ndo_stop = txgbe_close,
+ .ndo_change_mtu = wx_change_mtu,
.ndo_start_xmit = wx_xmit_frame,
.ndo_set_rx_mode = wx_set_rx_mode,
.ndo_validate_addr = eth_validate_addr,
@@ -538,7 +539,6 @@ static int txgbe_probe(struct pci_dev *pdev,
goto err_pci_disable_dev;
}
- pci_enable_pcie_error_reporting(pdev);
pci_set_master(pdev);
netdev = devm_alloc_etherdev_mqs(&pdev->dev,
@@ -605,7 +605,8 @@ static int txgbe_probe(struct pci_dev *pdev,
netdev->priv_flags |= IFF_SUPP_NOFCS;
netdev->min_mtu = ETH_MIN_MTU;
- netdev->max_mtu = TXGBE_MAX_JUMBO_FRAME_SIZE - (ETH_HLEN + ETH_FCS_LEN);
+ netdev->max_mtu = WX_MAX_JUMBO_FRAME_SIZE -
+ (ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN);
/* make sure the EEPROM is good */
err = txgbe_validate_eeprom_checksum(wx, NULL);
@@ -698,7 +699,6 @@ err_release_hw:
err_free_mac_table:
kfree(wx->mac_table);
err_pci_release_regions:
- pci_disable_pcie_error_reporting(pdev);
pci_release_selected_regions(pdev,
pci_select_bars(pdev, IORESOURCE_MEM));
err_pci_disable_dev:
@@ -729,8 +729,6 @@ static void txgbe_remove(struct pci_dev *pdev)
kfree(wx->mac_table);
wx_clear_interrupt_scheme(wx);
- pci_disable_pcie_error_reporting(pdev);
-
pci_disable_device(pdev);
}
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h
index 563ea51deca6..63a1c733718d 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h
@@ -79,7 +79,6 @@
#define TXGBE_SP_MC_TBL_SIZE 128
#define TXGBE_SP_RX_PB_SIZE 512
#define TXGBE_SP_TDB_PB_SZ (160 * 1024) /* 160KB Packet Buffer */
-#define TXGBE_MAX_JUMBO_FRAME_SIZE 9432 /* max payload 9414 */
/* TX/RX descriptor defines */
#define TXGBE_DEFAULT_TXD 512
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 89ff7f8e8c7e..5c3c7169c0a2 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -1426,7 +1426,7 @@ static int geneve_configure(struct net *net, struct net_device *dev,
dev->type = ARPHRD_NONE;
dev->hard_header_len = 0;
dev->addr_len = 0;
- dev->flags = IFF_NOARP;
+ dev->flags = IFF_POINTOPOINT | IFF_NOARP;
}
err = register_netdevice(dev);
diff --git a/drivers/net/ieee802154/adf7242.c b/drivers/net/ieee802154/adf7242.c
index 5cf218c674a5..f9972b8140f9 100644
--- a/drivers/net/ieee802154/adf7242.c
+++ b/drivers/net/ieee802154/adf7242.c
@@ -1336,9 +1336,8 @@ MODULE_DEVICE_TABLE(spi, adf7242_device_id);
static struct spi_driver adf7242_driver = {
.id_table = adf7242_device_id,
.driver = {
- .of_match_table = of_match_ptr(adf7242_of_match),
+ .of_match_table = adf7242_of_match,
.name = "adf7242",
- .owner = THIS_MODULE,
},
.probe = adf7242_probe,
.remove = adf7242_remove,
diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c
index 62b984f84d9f..164c7f605af5 100644
--- a/drivers/net/ieee802154/at86rf230.c
+++ b/drivers/net/ieee802154/at86rf230.c
@@ -1662,7 +1662,7 @@ MODULE_DEVICE_TABLE(spi, at86rf230_device_id);
static struct spi_driver at86rf230_driver = {
.id_table = at86rf230_device_id,
.driver = {
- .of_match_table = of_match_ptr(at86rf230_of_match),
+ .of_match_table = at86rf230_of_match,
.name = "at86rf230",
},
.probe = at86rf230_probe,
diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c
index 0b0c6c0764fe..1eada1db8dcf 100644
--- a/drivers/net/ieee802154/ca8210.c
+++ b/drivers/net/ieee802154/ca8210.c
@@ -3179,8 +3179,7 @@ MODULE_DEVICE_TABLE(of, ca8210_of_ids);
static struct spi_driver ca8210_spi_driver = {
.driver = {
.name = DRIVER_NAME,
- .owner = THIS_MODULE,
- .of_match_table = of_match_ptr(ca8210_of_ids),
+ .of_match_table = ca8210_of_ids,
},
.probe = ca8210_probe,
.remove = ca8210_remove
diff --git a/drivers/net/ieee802154/mcr20a.c b/drivers/net/ieee802154/mcr20a.c
index f53d185e0568..87abe3b46316 100644
--- a/drivers/net/ieee802154/mcr20a.c
+++ b/drivers/net/ieee802154/mcr20a.c
@@ -1352,7 +1352,7 @@ MODULE_DEVICE_TABLE(spi, mcr20a_device_id);
static struct spi_driver mcr20a_driver = {
.id_table = mcr20a_device_id,
.driver = {
- .of_match_table = of_match_ptr(mcr20a_of_match),
+ .of_match_table = mcr20a_of_match,
.name = "mcr20a",
},
.probe = mcr20a_probe,
diff --git a/drivers/net/mdio/of_mdio.c b/drivers/net/mdio/of_mdio.c
index 510822d6d0d9..bf10d0688eea 100644
--- a/drivers/net/mdio/of_mdio.c
+++ b/drivers/net/mdio/of_mdio.c
@@ -131,7 +131,7 @@ bool of_mdiobus_child_is_phy(struct device_node *child)
return true;
}
- if (!of_find_property(child, "compatible", NULL))
+ if (!of_property_present(child, "compatible"))
return true;
return false;
@@ -203,7 +203,7 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
/* auto scan for PHYs with empty reg property */
for_each_available_child_of_node(np, child) {
/* Skip PHYs with reg property set */
- if (of_find_property(child, "reg", NULL))
+ if (of_property_present(child, "reg"))
continue;
for (addr = 0; addr < PHY_MAX_ADDR; addr++) {
diff --git a/drivers/net/pcs/pcs-lynx.c b/drivers/net/pcs/pcs-lynx.c
index 3903f3baba2b..622c3de3f3a8 100644
--- a/drivers/net/pcs/pcs-lynx.c
+++ b/drivers/net/pcs/pcs-lynx.c
@@ -112,11 +112,11 @@ static void lynx_pcs_get_state(struct phylink_pcs *pcs,
}
dev_dbg(&lynx->mdio->dev,
- "mode=%s/%s/%s link=%u an_enabled=%u an_complete=%u\n",
+ "mode=%s/%s/%s link=%u an_complete=%u\n",
phy_modes(state->interface),
phy_speed_to_str(state->speed),
phy_duplex_to_str(state->duplex),
- state->link, state->an_enabled, state->an_complete);
+ state->link, state->an_complete);
}
static int lynx_pcs_config_giga(struct mdio_device *pcs, unsigned int mode,
diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
index bc428a816719..04a685353041 100644
--- a/drivers/net/pcs/pcs-xpcs.c
+++ b/drivers/net/pcs/pcs-xpcs.c
@@ -321,7 +321,7 @@ static int xpcs_read_fault_c73(struct dw_xpcs *xpcs,
return 0;
}
-static int xpcs_read_link_c73(struct dw_xpcs *xpcs, bool an)
+static int xpcs_read_link_c73(struct dw_xpcs *xpcs)
{
bool link = true;
int ret;
@@ -333,15 +333,6 @@ static int xpcs_read_link_c73(struct dw_xpcs *xpcs, bool an)
if (!(ret & MDIO_STAT1_LSTATUS))
link = false;
- if (an) {
- ret = xpcs_read(xpcs, MDIO_MMD_AN, MDIO_STAT1);
- if (ret < 0)
- return ret;
-
- if (!(ret & MDIO_STAT1_LSTATUS))
- link = false;
- }
-
return link;
}
@@ -935,7 +926,7 @@ static int xpcs_get_state_c73(struct dw_xpcs *xpcs,
int ret;
/* Link needs to be read first ... */
- state->link = xpcs_read_link_c73(xpcs, state->an_enabled) > 0 ? 1 : 0;
+ state->link = xpcs_read_link_c73(xpcs) > 0 ? 1 : 0;
/* ... and then we check the faults. */
ret = xpcs_read_fault_c73(xpcs, state);
diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c
index 89cd821f1f46..5821f04c69dc 100644
--- a/drivers/net/phy/dp83867.c
+++ b/drivers/net/phy/dp83867.c
@@ -693,6 +693,30 @@ static int dp83867_of_init(struct phy_device *phydev)
}
#endif /* CONFIG_OF_MDIO */
+static int dp83867_suspend(struct phy_device *phydev)
+{
+ /* Disable PHY Interrupts */
+ if (phy_interrupt_is_valid(phydev)) {
+ phydev->interrupts = PHY_INTERRUPT_DISABLED;
+ dp83867_config_intr(phydev);
+ }
+
+ return genphy_suspend(phydev);
+}
+
+static int dp83867_resume(struct phy_device *phydev)
+{
+ /* Enable PHY Interrupts */
+ if (phy_interrupt_is_valid(phydev)) {
+ phydev->interrupts = PHY_INTERRUPT_ENABLED;
+ dp83867_config_intr(phydev);
+ }
+
+ genphy_resume(phydev);
+
+ return 0;
+}
+
static int dp83867_probe(struct phy_device *phydev)
{
struct dp83867_private *dp83867;
@@ -968,8 +992,8 @@ static struct phy_driver dp83867_driver[] = {
.config_intr = dp83867_config_intr,
.handle_interrupt = dp83867_handle_interrupt,
- .suspend = genphy_suspend,
- .resume = genphy_resume,
+ .suspend = dp83867_suspend,
+ .resume = dp83867_resume,
.link_change_notify = dp83867_link_change_notify,
.set_loopback = dp83867_loopback,
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 2c84fccef4f6..e26c6723caa4 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -318,6 +318,7 @@ struct kszphy_ptp_priv {
struct ptp_clock_info ptp_clock_info;
/* Lock for ptp_clock */
struct mutex ptp_lock;
+ struct ptp_pin_desc *pin_config;
};
struct kszphy_priv {
@@ -435,11 +436,9 @@ static int kszphy_config_intr(struct phy_device *phydev)
if (err)
return err;
- temp = KSZPHY_INTCS_ALL;
- err = phy_write(phydev, MII_KSZPHY_INTCS, temp);
+ err = phy_write(phydev, MII_KSZPHY_INTCS, KSZPHY_INTCS_ALL);
} else {
- temp = 0;
- err = phy_write(phydev, MII_KSZPHY_INTCS, temp);
+ err = phy_write(phydev, MII_KSZPHY_INTCS, 0);
if (err)
return err;
@@ -3658,6 +3657,77 @@ static int lan8841_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? -EFAULT : 0;
}
+#define LAN8841_EVENT_A 0
+#define LAN8841_EVENT_B 1
+#define LAN8841_PTP_LTC_TARGET_SEC_HI(event) ((event) == LAN8841_EVENT_A ? 278 : 288)
+#define LAN8841_PTP_LTC_TARGET_SEC_LO(event) ((event) == LAN8841_EVENT_A ? 279 : 289)
+#define LAN8841_PTP_LTC_TARGET_NS_HI(event) ((event) == LAN8841_EVENT_A ? 280 : 290)
+#define LAN8841_PTP_LTC_TARGET_NS_LO(event) ((event) == LAN8841_EVENT_A ? 281 : 291)
+
+static int lan8841_ptp_set_target(struct kszphy_ptp_priv *ptp_priv, u8 event,
+ s64 sec, u32 nsec)
+{
+ struct phy_device *phydev = ptp_priv->phydev;
+ int ret;
+
+ ret = phy_write_mmd(phydev, 2, LAN8841_PTP_LTC_TARGET_SEC_HI(event),
+ upper_16_bits(sec));
+ if (ret)
+ return ret;
+
+ ret = phy_write_mmd(phydev, 2, LAN8841_PTP_LTC_TARGET_SEC_LO(event),
+ lower_16_bits(sec));
+ if (ret)
+ return ret;
+
+ ret = phy_write_mmd(phydev, 2, LAN8841_PTP_LTC_TARGET_NS_HI(event) & 0x3fff,
+ upper_16_bits(nsec));
+ if (ret)
+ return ret;
+
+ return phy_write_mmd(phydev, 2, LAN8841_PTP_LTC_TARGET_NS_LO(event),
+ lower_16_bits(nsec));
+}
+
+#define LAN8841_BUFFER_TIME 2
+
+static int lan8841_ptp_update_target(struct kszphy_ptp_priv *ptp_priv,
+ const struct timespec64 *ts)
+{
+ return lan8841_ptp_set_target(ptp_priv, LAN8841_EVENT_A,
+ ts->tv_sec + LAN8841_BUFFER_TIME, ts->tv_nsec);
+}
+
+#define LAN8841_PTP_LTC_TARGET_RELOAD_SEC_HI(event) ((event) == LAN8841_EVENT_A ? 282 : 292)
+#define LAN8841_PTP_LTC_TARGET_RELOAD_SEC_LO(event) ((event) == LAN8841_EVENT_A ? 283 : 293)
+#define LAN8841_PTP_LTC_TARGET_RELOAD_NS_HI(event) ((event) == LAN8841_EVENT_A ? 284 : 294)
+#define LAN8841_PTP_LTC_TARGET_RELOAD_NS_LO(event) ((event) == LAN8841_EVENT_A ? 285 : 295)
+
+static int lan8841_ptp_set_reload(struct kszphy_ptp_priv *ptp_priv, u8 event,
+ s64 sec, u32 nsec)
+{
+ struct phy_device *phydev = ptp_priv->phydev;
+ int ret;
+
+ ret = phy_write_mmd(phydev, 2, LAN8841_PTP_LTC_TARGET_RELOAD_SEC_HI(event),
+ upper_16_bits(sec));
+ if (ret)
+ return ret;
+
+ ret = phy_write_mmd(phydev, 2, LAN8841_PTP_LTC_TARGET_RELOAD_SEC_LO(event),
+ lower_16_bits(sec));
+ if (ret)
+ return ret;
+
+ ret = phy_write_mmd(phydev, 2, LAN8841_PTP_LTC_TARGET_RELOAD_NS_HI(event) & 0x3fff,
+ upper_16_bits(nsec));
+ if (ret)
+ return ret;
+
+ return phy_write_mmd(phydev, 2, LAN8841_PTP_LTC_TARGET_RELOAD_NS_LO(event),
+ lower_16_bits(nsec));
+}
+
#define LAN8841_PTP_LTC_SET_SEC_HI 262
#define LAN8841_PTP_LTC_SET_SEC_MID 263
#define LAN8841_PTP_LTC_SET_SEC_LO 264
@@ -3671,6 +3741,7 @@ static int lan8841_ptp_settime64(struct ptp_clock_info *ptp,
struct kszphy_ptp_priv *ptp_priv = container_of(ptp, struct kszphy_ptp_priv,
ptp_clock_info);
struct phy_device *phydev = ptp_priv->phydev;
+ int ret;
/* Set the value to be stored */
mutex_lock(&ptp_priv->ptp_lock);
@@ -3683,9 +3754,10 @@ static int lan8841_ptp_settime64(struct ptp_clock_info *ptp,
/* Set the command to load the LTC */
phy_write_mmd(phydev, 2, LAN8841_PTP_CMD_CTL,
LAN8841_PTP_CMD_CTL_PTP_LTC_LOAD);
+ ret = lan8841_ptp_update_target(ptp_priv, ts);
mutex_unlock(&ptp_priv->ptp_lock);
- return 0;
+ return ret;
}
#define LAN8841_PTP_LTC_RD_SEC_HI 358
@@ -3740,6 +3812,7 @@ static int lan8841_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
bool add = true;
u32 nsec;
s32 sec;
+ int ret;
/* The HW allows up to 15 sec to adjust the time, but here we limit to
* 10 sec the adjustment. The reason is, in case the adjustment is 14
@@ -3803,7 +3876,13 @@ static int lan8841_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
}
mutex_unlock(&ptp_priv->ptp_lock);
- return 0;
+ /* Update the target clock */
+ ptp->gettime64(ptp, &ts);
+ mutex_lock(&ptp_priv->ptp_lock);
+ ret = lan8841_ptp_update_target(ptp_priv, &ts);
+ mutex_unlock(&ptp_priv->ptp_lock);
+
+ return ret;
}
#define LAN8841_PTP_LTC_RATE_ADJ_HI 269
@@ -3839,6 +3918,292 @@ static int lan8841_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
return 0;
}
+static int lan8841_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin,
+ enum ptp_pin_function func, unsigned int chan)
+{
+ switch (func) {
+ case PTP_PF_NONE:
+ case PTP_PF_PEROUT:
+ break;
+ default:
+ return -1;
+ }
+
+ return 0;
+}
+
+#define LAN8841_PTP_GPIO_NUM 10
+#define LAN8841_GPIO_EN 128
+#define LAN8841_GPIO_DIR 129
+#define LAN8841_GPIO_BUF 130
+
+static int lan8841_ptp_perout_off(struct kszphy_ptp_priv *ptp_priv, int pin)
+{
+ struct phy_device *phydev = ptp_priv->phydev;
+ int ret;
+
+ ret = phy_clear_bits_mmd(phydev, 2, LAN8841_GPIO_EN, BIT(pin));
+ if (ret)
+ return ret;
+
+ ret = phy_clear_bits_mmd(phydev, 2, LAN8841_GPIO_DIR, BIT(pin));
+ if (ret)
+ return ret;
+
+ return phy_clear_bits_mmd(phydev, 2, LAN8841_GPIO_BUF, BIT(pin));
+}
+
+static int lan8841_ptp_perout_on(struct kszphy_ptp_priv *ptp_priv, int pin)
+{
+ struct phy_device *phydev = ptp_priv->phydev;
+ int ret;
+
+ ret = phy_set_bits_mmd(phydev, 2, LAN8841_GPIO_EN, BIT(pin));
+ if (ret)
+ return ret;
+
+ ret = phy_set_bits_mmd(phydev, 2, LAN8841_GPIO_DIR, BIT(pin));
+ if (ret)
+ return ret;
+
+ return phy_set_bits_mmd(phydev, 2, LAN8841_GPIO_BUF, BIT(pin));
+}
+
+#define LAN8841_GPIO_DATA_SEL1 131
+#define LAN8841_GPIO_DATA_SEL2 132
+#define LAN8841_GPIO_DATA_SEL_GPIO_DATA_SEL_EVENT_MASK GENMASK(2, 0)
+#define LAN8841_GPIO_DATA_SEL_GPIO_DATA_SEL_EVENT_A 1
+#define LAN8841_GPIO_DATA_SEL_GPIO_DATA_SEL_EVENT_B 2
+#define LAN8841_PTP_GENERAL_CONFIG 257
+#define LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_POL_A BIT(1)
+#define LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_POL_B BIT(3)
+#define LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_A_MASK GENMASK(7, 4)
+#define LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_B_MASK GENMASK(11, 8)
+#define LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_A 4
+#define LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_B 7
+
+static int lan8841_ptp_remove_event(struct kszphy_ptp_priv *ptp_priv, int pin,
+ u8 event)
+{
+ struct phy_device *phydev = ptp_priv->phydev;
+ u16 tmp;
+ int ret;
+
+ /* Now remove pin from the event. GPIO_DATA_SEL1 contains the GPIO
+ * pins 0-4 while GPIO_DATA_SEL2 contains GPIO pins 5-9, therefore
+ * depending on the pin, it requires to read a different register
+ */
+ if (pin < 5) {
+ tmp = LAN8841_GPIO_DATA_SEL_GPIO_DATA_SEL_EVENT_MASK << (3 * pin);
+ ret = phy_clear_bits_mmd(phydev, 2, LAN8841_GPIO_DATA_SEL1, tmp);
+ } else {
+ tmp = LAN8841_GPIO_DATA_SEL_GPIO_DATA_SEL_EVENT_MASK << (3 * (pin - 5));
+ ret = phy_clear_bits_mmd(phydev, 2, LAN8841_GPIO_DATA_SEL2, tmp);
+ }
+ if (ret)
+ return ret;
+
+ /* Disable the event */
+ if (event == LAN8841_EVENT_A)
+ tmp = LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_POL_A |
+ LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_A_MASK;
+ else
+ tmp = LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_POL_B |
+ LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_B_MASK;
+ return phy_clear_bits_mmd(phydev, 2, LAN8841_GPIO_EN, tmp);
+}
+
+static int lan8841_ptp_enable_event(struct kszphy_ptp_priv *ptp_priv, int pin,
+ u8 event, int pulse_width)
+{
+ struct phy_device *phydev = ptp_priv->phydev;
+ u16 tmp;
+ int ret;
+
+ /* Enable the event */
+ if (event == LAN8841_EVENT_A)
+ ret = phy_modify_mmd(phydev, 2, LAN8841_PTP_GENERAL_CONFIG,
+ LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_POL_A |
+ LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_A_MASK,
+ LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_POL_A |
+ pulse_width << LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_A);
+ else
+ ret = phy_modify_mmd(phydev, 2, LAN8841_PTP_GENERAL_CONFIG,
+ LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_POL_B |
+ LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_B_MASK,
+ LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_POL_B |
+ pulse_width << LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_B);
+ if (ret)
+ return ret;
+
+ /* Now connect the pin to the event. GPIO_DATA_SEL1 contains the GPIO
+ * pins 0-4 while GPIO_DATA_SEL2 contains GPIO pins 5-9, therefore
+ * depending on the pin, it requires to read a different register
+ */
+ if (event == LAN8841_EVENT_A)
+ tmp = LAN8841_GPIO_DATA_SEL_GPIO_DATA_SEL_EVENT_A;
+ else
+ tmp = LAN8841_GPIO_DATA_SEL_GPIO_DATA_SEL_EVENT_B;
+
+ if (pin < 5)
+ ret = phy_set_bits_mmd(phydev, 2, LAN8841_GPIO_DATA_SEL1,
+ tmp << (3 * pin));
+ else
+ ret = phy_set_bits_mmd(phydev, 2, LAN8841_GPIO_DATA_SEL2,
+ tmp << (3 * (pin - 5)));
+
+ return ret;
+}
+
+#define LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_200MS 13
+#define LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_100MS 12
+#define LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_50MS 11
+#define LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_10MS 10
+#define LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_5MS 9
+#define LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_1MS 8
+#define LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_500US 7
+#define LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_100US 6
+#define LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_50US 5
+#define LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_10US 4
+#define LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_5US 3
+#define LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_1US 2
+#define LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_500NS 1
+#define LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_100NS 0
+
+static int lan8841_ptp_perout(struct ptp_clock_info *ptp,
+ struct ptp_clock_request *rq, int on)
+{
+ struct kszphy_ptp_priv *ptp_priv = container_of(ptp, struct kszphy_ptp_priv,
+ ptp_clock_info);
+ struct phy_device *phydev = ptp_priv->phydev;
+ struct timespec64 ts_on, ts_period;
+ s64 on_nsec, period_nsec;
+ int pulse_width;
+ int pin;
+ int ret;
+
+ if (rq->perout.flags & ~PTP_PEROUT_DUTY_CYCLE)
+ return -EOPNOTSUPP;
+
+ pin = ptp_find_pin(ptp_priv->ptp_clock, PTP_PF_PEROUT, rq->perout.index);
+ if (pin == -1 || pin >= LAN8841_PTP_GPIO_NUM)
+ return -EINVAL;
+
+ if (!on) {
+ ret = lan8841_ptp_perout_off(ptp_priv, pin);
+ if (ret)
+ return ret;
+
+ return lan8841_ptp_remove_event(ptp_priv, LAN8841_EVENT_A, pin);
+ }
+
+ ts_on.tv_sec = rq->perout.on.sec;
+ ts_on.tv_nsec = rq->perout.on.nsec;
+ on_nsec = timespec64_to_ns(&ts_on);
+
+ ts_period.tv_sec = rq->perout.period.sec;
+ ts_period.tv_nsec = rq->perout.period.nsec;
+ period_nsec = timespec64_to_ns(&ts_period);
+
+ if (period_nsec < 200) {
+ pr_warn_ratelimited("%s: perout period too small, minimum is 200 nsec\n",
+ phydev_name(phydev));
+ return -EOPNOTSUPP;
+ }
+
+ if (on_nsec >= period_nsec) {
+ pr_warn_ratelimited("%s: pulse width must be smaller than period\n",
+ phydev_name(phydev));
+ return -EINVAL;
+ }
+
+ switch (on_nsec) {
+ case 200000000:
+ pulse_width = LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_200MS;
+ break;
+ case 100000000:
+ pulse_width = LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_100MS;
+ break;
+ case 50000000:
+ pulse_width = LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_50MS;
+ break;
+ case 10000000:
+ pulse_width = LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_10MS;
+ break;
+ case 5000000:
+ pulse_width = LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_5MS;
+ break;
+ case 1000000:
+ pulse_width = LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_1MS;
+ break;
+ case 500000:
+ pulse_width = LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_500US;
+ break;
+ case 100000:
+ pulse_width = LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_100US;
+ break;
+ case 50000:
+ pulse_width = LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_50US;
+ break;
+ case 10000:
+ pulse_width = LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_10US;
+ break;
+ case 5000:
+ pulse_width = LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_5US;
+ break;
+ case 1000:
+ pulse_width = LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_1US;
+ break;
+ case 500:
+ pulse_width = LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_500NS;
+ break;
+ case 100:
+ pulse_width = LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_100NS;
+ break;
+ default:
+ pr_warn_ratelimited("%s: Use default duty cycle of 100ns\n",
+ phydev_name(phydev));
+ pulse_width = LAN8841_PTP_GENERAL_CONFIG_LTC_EVENT_100NS;
+ break;
+ }
+
+ mutex_lock(&ptp_priv->ptp_lock);
+ ret = lan8841_ptp_set_target(ptp_priv, LAN8841_EVENT_A, rq->perout.start.sec,
+ rq->perout.start.nsec);
+ mutex_unlock(&ptp_priv->ptp_lock);
+ if (ret)
+ return ret;
+
+ ret = lan8841_ptp_set_reload(ptp_priv, LAN8841_EVENT_A, rq->perout.period.sec,
+ rq->perout.period.nsec);
+ if (ret)
+ return ret;
+
+ ret = lan8841_ptp_enable_event(ptp_priv, pin, LAN8841_EVENT_A,
+ pulse_width);
+ if (ret)
+ return ret;
+
+ ret = lan8841_ptp_perout_on(ptp_priv, pin);
+ if (ret)
+ lan8841_ptp_remove_event(ptp_priv, pin, LAN8841_EVENT_A);
+
+ return ret;
+}
+
+static int lan8841_ptp_enable(struct ptp_clock_info *ptp,
+ struct ptp_clock_request *rq, int on)
+{
+ switch (rq->type) {
+ case PTP_CLK_REQ_PEROUT:
+ return lan8841_ptp_perout(ptp, rq, on);
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
static struct ptp_clock_info lan8841_ptp_clock_info = {
.owner = THIS_MODULE,
.name = "lan8841 ptp",
@@ -3847,6 +4212,10 @@ static struct ptp_clock_info lan8841_ptp_clock_info = {
.settime64 = lan8841_ptp_settime64,
.adjtime = lan8841_ptp_adjtime,
.adjfine = lan8841_ptp_adjfine,
+ .verify = lan8841_ptp_verify,
+ .enable = lan8841_ptp_enable,
+ .n_per_out = LAN8841_PTP_GPIO_NUM,
+ .n_pins = LAN8841_PTP_GPIO_NUM,
};
#define LAN8841_OPERATION_MODE_STRAP_LOW_REGISTER 3
@@ -3874,7 +4243,23 @@ static int lan8841_probe(struct phy_device *phydev)
priv = phydev->priv;
ptp_priv = &priv->ptp_priv;
+ ptp_priv->pin_config = devm_kcalloc(&phydev->mdio.dev,
+ LAN8841_PTP_GPIO_NUM,
+ sizeof(*ptp_priv->pin_config),
+ GFP_KERNEL);
+ if (!ptp_priv->pin_config)
+ return -ENOMEM;
+
+ for (int i = 0; i < LAN8841_PTP_GPIO_NUM; ++i) {
+ struct ptp_pin_desc *p = &ptp_priv->pin_config[i];
+
+ snprintf(p->name, sizeof(p->name), "pin%d", i);
+ p->index = i;
+ p->func = PTP_PF_NONE;
+ }
+
ptp_priv->ptp_clock_info = lan8841_ptp_clock_info;
+ ptp_priv->ptp_clock_info.pin_config = ptp_priv->pin_config;
ptp_priv->ptp_clock = ptp_clock_register(&ptp_priv->ptp_clock_info,
&phydev->mdio.dev);
if (IS_ERR(ptp_priv->ptp_clock)) {
diff --git a/drivers/net/phy/mxl-gpy.c b/drivers/net/phy/mxl-gpy.c
index e5972b4ef6e8..8e6bb97b5f85 100644
--- a/drivers/net/phy/mxl-gpy.c
+++ b/drivers/net/phy/mxl-gpy.c
@@ -107,6 +107,13 @@ struct gpy_priv {
u8 fw_major;
u8 fw_minor;
+
+ /* It takes 3 seconds to fully switch out of loopback mode before
+ * it can safely re-enter loopback mode. Record the time when
+ * loopback is disabled. Check and wait if necessary before loopback
+ * is enabled.
+ */
+ u64 lb_dis_to;
};
static const struct {
@@ -769,18 +776,34 @@ static void gpy_get_wol(struct phy_device *phydev,
static int gpy_loopback(struct phy_device *phydev, bool enable)
{
+ struct gpy_priv *priv = phydev->priv;
+ u16 set = 0;
int ret;
- ret = phy_modify(phydev, MII_BMCR, BMCR_LOOPBACK,
- enable ? BMCR_LOOPBACK : 0);
- if (!ret) {
- /* It takes some time for PHY device to switch
- * into/out-of loopback mode.
+ if (enable) {
+ u64 now = get_jiffies_64();
+
+ /* wait until 3 seconds from last disable */
+ if (time_before64(now, priv->lb_dis_to))
+ msleep(jiffies64_to_msecs(priv->lb_dis_to - now));
+
+ set = BMCR_LOOPBACK;
+ }
+
+ ret = phy_modify(phydev, MII_BMCR, BMCR_LOOPBACK, set);
+ if (ret <= 0)
+ return ret;
+
+ if (enable) {
+ /* It takes some time for PHY device to switch into
+ * loopback mode.
*/
msleep(100);
+ } else {
+ priv->lb_dis_to = get_jiffies_64() + HZ * 3;
}
- return ret;
+ return 0;
}
static int gpy115_loopback(struct phy_device *phydev, bool enable)
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 1785f1cead97..c0760cbf534b 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -3076,9 +3076,7 @@ EXPORT_SYMBOL_GPL(fwnode_get_phy_node);
* phy_probe - probe and init a PHY device
* @dev: device to probe and init
*
- * Description: Take care of setting up the phy_device structure,
- * set the state to READY (the driver's init function should
- * set it to STARTING if needed).
+ * Take care of setting up the phy_device structure, set the state to READY.
*/
static int phy_probe(struct device *dev)
{
diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index c02cad6478a8..39e3095796d0 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -255,6 +255,8 @@ struct sfp {
unsigned int module_power_mW;
unsigned int module_t_start_up;
unsigned int module_t_wait;
+
+ bool have_a2;
bool tx_fault_ignore;
const struct sfp_quirk *quirk;
@@ -1453,20 +1455,10 @@ static void sfp_hwmon_probe(struct work_struct *work)
static int sfp_hwmon_insert(struct sfp *sfp)
{
- if (sfp->id.ext.sff8472_compliance == SFP_SFF8472_COMPLIANCE_NONE)
- return 0;
-
- if (!(sfp->id.ext.diagmon & SFP_DIAGMON_DDM))
- return 0;
-
- if (sfp->id.ext.diagmon & SFP_DIAGMON_ADDRMODE)
- /* This driver in general does not support address
- * change.
- */
- return 0;
-
- mod_delayed_work(system_wq, &sfp->hwmon_probe, 1);
- sfp->hwmon_tries = R_PROBE_RETRY_SLOW;
+ if (sfp->have_a2 && sfp->id.ext.diagmon & SFP_DIAGMON_DDM) {
+ mod_delayed_work(system_wq, &sfp->hwmon_probe, 1);
+ sfp->hwmon_tries = R_PROBE_RETRY_SLOW;
+ }
return 0;
}
@@ -1916,6 +1908,18 @@ static int sfp_cotsworks_fixup_check(struct sfp *sfp, struct sfp_eeprom_id *id)
return 0;
}
+static int sfp_module_parse_sff8472(struct sfp *sfp)
+{
+ /* If the module requires address swap mode, warn about it */
+ if (sfp->id.ext.diagmon & SFP_DIAGMON_ADDRMODE)
+ dev_warn(sfp->dev,
+ "module address swap to access page 0xA2 is not supported.\n");
+ else
+ sfp->have_a2 = true;
+
+ return 0;
+}
+
static int sfp_sm_mod_probe(struct sfp *sfp, bool report)
{
/* SFP module inserted - read I2C data */
@@ -2053,10 +2057,11 @@ static int sfp_sm_mod_probe(struct sfp *sfp, bool report)
return -EINVAL;
}
- /* If the module requires address swap mode, warn about it */
- if (sfp->id.ext.diagmon & SFP_DIAGMON_ADDRMODE)
- dev_warn(sfp->dev,
- "module address swap to access page 0xA2 is not supported.\n");
+ if (sfp->id.ext.sff8472_compliance != SFP_SFF8472_COMPLIANCE_NONE) {
+ ret = sfp_module_parse_sff8472(sfp);
+ if (ret < 0)
+ return ret;
+ }
/* Parse the module power requirement */
ret = sfp_module_parse_power(sfp);
@@ -2103,6 +2108,7 @@ static void sfp_sm_mod_remove(struct sfp *sfp)
memset(&sfp->id, 0, sizeof(sfp->id));
sfp->module_power_mW = 0;
+ sfp->have_a2 = false;
dev_info(sfp->dev, "module removed\n");
}
@@ -2278,7 +2284,11 @@ static void sfp_sm_main(struct sfp *sfp, unsigned int event)
sfp->sm_dev_state != SFP_DEV_UP)
break;
- if (!(sfp->id.ext.diagmon & SFP_DIAGMON_ADDRMODE))
+ /* Only use the soft state bits if we have access to the A2h
+ * memory, which implies that we have some level of SFF-8472
+ * compliance.
+ */
+ if (sfp->have_a2)
sfp_soft_start_poll(sfp);
sfp_module_tx_enable(sfp);
diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c
index df2c5435c5c4..721871184205 100644
--- a/drivers/net/phy/smsc.c
+++ b/drivers/net/phy/smsc.c
@@ -99,20 +99,13 @@ static irqreturn_t smsc_phy_handle_interrupt(struct phy_device *phydev)
static int smsc_phy_config_init(struct phy_device *phydev)
{
struct smsc_phy_priv *priv = phydev->priv;
- int rc;
if (!priv->energy_enable || phydev->irq != PHY_POLL)
return 0;
- rc = phy_read(phydev, MII_LAN83C185_CTRL_STATUS);
-
- if (rc < 0)
- return rc;
-
- /* Enable energy detect mode for this SMSC Transceivers */
- rc = phy_write(phydev, MII_LAN83C185_CTRL_STATUS,
- rc | MII_LAN83C185_EDPWRDOWN);
- return rc;
+ /* Enable energy detect power down mode */
+ return phy_set_bits(phydev, MII_LAN83C185_CTRL_STATUS,
+ MII_LAN83C185_EDPWRDOWN);
}
static int smsc_phy_reset(struct phy_device *phydev)
@@ -170,18 +163,15 @@ static int lan87xx_config_aneg(struct phy_device *phydev)
static int lan95xx_config_aneg_ext(struct phy_device *phydev)
{
- int rc;
+ if (phydev->phy_id == 0x0007c0f0) { /* LAN9500A or LAN9505A */
+ /* Extend Manual AutoMDIX timer */
+ int rc = phy_set_bits(phydev, PHY_EDPD_CONFIG,
+ PHY_EDPD_CONFIG_EXT_CROSSOVER_);
- if (phydev->phy_id != 0x0007c0f0) /* not (LAN9500A or LAN9505A) */
- return lan87xx_config_aneg(phydev);
-
- /* Extend Manual AutoMDIX timer */
- rc = phy_read(phydev, PHY_EDPD_CONFIG);
- if (rc < 0)
- return rc;
+ if (rc < 0)
+ return rc;
+ }
- rc |= PHY_EDPD_CONFIG_EXT_CROSSOVER_;
- phy_write(phydev, PHY_EDPD_CONFIG, rc);
return lan87xx_config_aneg(phydev);
}
@@ -282,7 +272,6 @@ static void smsc_get_stats(struct phy_device *phydev,
static int smsc_phy_probe(struct phy_device *phydev)
{
struct device *dev = &phydev->mdio.dev;
- struct device_node *of_node = dev->of_node;
struct smsc_phy_priv *priv;
struct clk *refclk;
@@ -292,7 +281,7 @@ static int smsc_phy_probe(struct phy_device *phydev)
priv->energy_enable = true;
- if (of_property_read_bool(of_node, "smsc,disable-energy-detect"))
+ if (device_property_present(dev, "smsc,disable-energy-detect"))
priv->energy_enable = false;
phydev->priv = priv;
diff --git a/drivers/net/phy/spi_ks8995.c b/drivers/net/phy/spi_ks8995.c
index d4202d40d47a..7196e927c2cd 100644
--- a/drivers/net/phy/spi_ks8995.c
+++ b/drivers/net/phy/spi_ks8995.c
@@ -491,7 +491,7 @@ static void ks8995_remove(struct spi_device *spi)
static struct spi_driver ks8995_driver = {
.driver = {
.name = "spi-ks8995",
- .of_match_table = of_match_ptr(ks8895_spi_of_match),
+ .of_match_table = ks8895_spi_of_match,
},
.probe = ks8995_probe,
.remove = ks8995_remove,
diff --git a/drivers/net/tap.c b/drivers/net/tap.c
index 8941aa199ea3..ce993cc75bf3 100644
--- a/drivers/net/tap.c
+++ b/drivers/net/tap.c
@@ -555,6 +555,9 @@ static int tap_open(struct inode *inode, struct file *file)
goto err_put;
}
+ /* tap groks IOCB_NOWAIT just fine, mark it as such */
+ file->f_mode |= FMODE_NOWAIT;
+
dev_put(tap->dev);
rtnl_unlock();
@@ -771,8 +774,12 @@ static ssize_t tap_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct tap_queue *q = file->private_data;
+ int noblock = 0;
+
+ if ((file->f_flags & O_NONBLOCK) || (iocb->ki_flags & IOCB_NOWAIT))
+ noblock = 1;
- return tap_get_user(q, NULL, from, file->f_flags & O_NONBLOCK);
+ return tap_get_user(q, NULL, from, noblock);
}
/* Put packet to the user space buffer */
@@ -888,8 +895,12 @@ static ssize_t tap_read_iter(struct kiocb *iocb, struct iov_iter *to)
struct file *file = iocb->ki_filp;
struct tap_queue *q = file->private_data;
ssize_t len = iov_iter_count(to), ret;
+ int noblock = 0;
+
+ if ((file->f_flags & O_NONBLOCK) || (iocb->ki_flags & IOCB_NOWAIT))
+ noblock = 1;
- ret = tap_do_read(q, to, file->f_flags & O_NONBLOCK, NULL);
+ ret = tap_do_read(q, to, noblock, NULL);
ret = min_t(ssize_t, ret, len);
if (ret > 0)
iocb->ki_pos = ret;
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index ad653b32b2f0..4c7f74904c25 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -3463,6 +3463,8 @@ static int tun_chr_open(struct inode *inode, struct file * file)
sock_set_flag(&tfile->sk, SOCK_ZEROCOPY);
+ /* tun groks IOCB_NOWAIT just fine, mark it as such */
+ file->f_mode |= FMODE_NOWAIT;
return 0;
}
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 2396c28c0122..e2560b6f7980 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -62,7 +62,8 @@ static const unsigned long guest_offloads[] = {
VIRTIO_NET_F_GUEST_UFO,
VIRTIO_NET_F_GUEST_CSUM,
VIRTIO_NET_F_GUEST_USO4,
- VIRTIO_NET_F_GUEST_USO6
+ VIRTIO_NET_F_GUEST_USO6,
+ VIRTIO_NET_F_GUEST_HDRLEN
};
#define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
@@ -4232,7 +4233,8 @@ static struct virtio_device_id id_table[] = {
VIRTIO_NET_F_CTRL_MAC_ADDR, \
VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \
VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \
- VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL
+ VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL, \
+ VIRTIO_NET_F_GUEST_HDRLEN
static unsigned int features[] = {
VIRTNET_FEATURES,
diff --git a/drivers/net/vxlan/Makefile b/drivers/net/vxlan/Makefile
index d4c255499b72..91b8fec8b6cf 100644
--- a/drivers/net/vxlan/Makefile
+++ b/drivers/net/vxlan/Makefile
@@ -4,4 +4,4 @@
obj-$(CONFIG_VXLAN) += vxlan.o
-vxlan-objs := vxlan_core.o vxlan_multicast.o vxlan_vnifilter.o
+vxlan-objs := vxlan_core.o vxlan_multicast.o vxlan_vnifilter.o vxlan_mdb.o
diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
index b1b179effe2a..e2e5f5dac7e6 100644
--- a/drivers/net/vxlan/vxlan_core.c
+++ b/drivers/net/vxlan/vxlan_core.c
@@ -71,53 +71,6 @@ static inline bool vxlan_collect_metadata(struct vxlan_sock *vs)
ip_tunnel_collect_metadata();
}
-#if IS_ENABLED(CONFIG_IPV6)
-static int vxlan_nla_get_addr(union vxlan_addr *ip, struct nlattr *nla)
-{
- if (nla_len(nla) >= sizeof(struct in6_addr)) {
- ip->sin6.sin6_addr = nla_get_in6_addr(nla);
- ip->sa.sa_family = AF_INET6;
- return 0;
- } else if (nla_len(nla) >= sizeof(__be32)) {
- ip->sin.sin_addr.s_addr = nla_get_in_addr(nla);
- ip->sa.sa_family = AF_INET;
- return 0;
- } else {
- return -EAFNOSUPPORT;
- }
-}
-
-static int vxlan_nla_put_addr(struct sk_buff *skb, int attr,
- const union vxlan_addr *ip)
-{
- if (ip->sa.sa_family == AF_INET6)
- return nla_put_in6_addr(skb, attr, &ip->sin6.sin6_addr);
- else
- return nla_put_in_addr(skb, attr, ip->sin.sin_addr.s_addr);
-}
-
-#else /* !CONFIG_IPV6 */
-
-static int vxlan_nla_get_addr(union vxlan_addr *ip, struct nlattr *nla)
-{
- if (nla_len(nla) >= sizeof(struct in6_addr)) {
- return -EAFNOSUPPORT;
- } else if (nla_len(nla) >= sizeof(__be32)) {
- ip->sin.sin_addr.s_addr = nla_get_in_addr(nla);
- ip->sa.sa_family = AF_INET;
- return 0;
- } else {
- return -EAFNOSUPPORT;
- }
-}
-
-static int vxlan_nla_put_addr(struct sk_buff *skb, int attr,
- const union vxlan_addr *ip)
-{
- return nla_put_in_addr(skb, attr, ip->sin.sin_addr.s_addr);
-}
-#endif
-
/* Find VXLAN socket based on network namespace, address family, UDP port,
* enabled unshareable flags and socket device binding (see l3mdev with
* non-default VRF).
@@ -1863,7 +1816,7 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
struct vxlan_fdb *f;
struct sk_buff *reply;
- if (!(n->nud_state & NUD_CONNECTED)) {
+ if (!(READ_ONCE(n->nud_state) & NUD_CONNECTED)) {
neigh_release(n);
goto out;
}
@@ -2027,7 +1980,7 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
struct vxlan_fdb *f;
struct sk_buff *reply;
- if (!(n->nud_state & NUD_CONNECTED)) {
+ if (!(READ_ONCE(n->nud_state) & NUD_CONNECTED)) {
neigh_release(n);
goto out;
}
@@ -2442,9 +2395,8 @@ static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev,
return 0;
}
-static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
- __be32 default_vni, struct vxlan_rdst *rdst,
- bool did_rsc)
+void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
+ __be32 default_vni, struct vxlan_rdst *rdst, bool did_rsc)
{
struct dst_cache *dst_cache;
struct ip_tunnel_info *info;
@@ -2791,6 +2743,21 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
#endif
}
+ if (vxlan->cfg.flags & VXLAN_F_MDB) {
+ struct vxlan_mdb_entry *mdb_entry;
+
+ rcu_read_lock();
+ mdb_entry = vxlan_mdb_entry_skb_get(vxlan, skb, vni);
+ if (mdb_entry) {
+ netdev_tx_t ret;
+
+ ret = vxlan_mdb_xmit(vxlan, mdb_entry, skb);
+ rcu_read_unlock();
+ return ret;
+ }
+ rcu_read_unlock();
+ }
+
eth = eth_hdr(skb);
f = vxlan_find_mac(vxlan, eth->h_dest, vni);
did_rsc = false;
@@ -2926,8 +2893,14 @@ static int vxlan_init(struct net_device *dev)
if (err)
goto err_free_percpu;
+ err = vxlan_mdb_init(vxlan);
+ if (err)
+ goto err_gro_cells_destroy;
+
return 0;
+err_gro_cells_destroy:
+ gro_cells_destroy(&vxlan->gro_cells);
err_free_percpu:
free_percpu(dev->tstats);
err_vnigroup_uninit:
@@ -2952,6 +2925,8 @@ static void vxlan_uninit(struct net_device *dev)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
+ vxlan_mdb_fini(vxlan);
+
if (vxlan->cfg.flags & VXLAN_F_VNIFILTER)
vxlan_vnigroup_uninit(vxlan);
@@ -3108,6 +3083,9 @@ static const struct net_device_ops vxlan_netdev_ether_ops = {
.ndo_fdb_del = vxlan_fdb_delete,
.ndo_fdb_dump = vxlan_fdb_dump,
.ndo_fdb_get = vxlan_fdb_get,
+ .ndo_mdb_add = vxlan_mdb_add,
+ .ndo_mdb_del = vxlan_mdb_del,
+ .ndo_mdb_dump = vxlan_mdb_dump,
.ndo_fill_metadata_dst = vxlan_fill_metadata_dst,
};
diff --git a/drivers/net/vxlan/vxlan_mdb.c b/drivers/net/vxlan/vxlan_mdb.c
new file mode 100644
index 000000000000..5e041622261a
--- /dev/null
+++ b/drivers/net/vxlan/vxlan_mdb.c
@@ -0,0 +1,1462 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/if_bridge.h>
+#include <linux/in.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/netlink.h>
+#include <linux/rhashtable.h>
+#include <linux/rhashtable-types.h>
+#include <linux/rtnetlink.h>
+#include <linux/skbuff.h>
+#include <linux/types.h>
+#include <net/netlink.h>
+#include <net/vxlan.h>
+
+#include "vxlan_private.h"
+
+struct vxlan_mdb_entry_key {
+ union vxlan_addr src;
+ union vxlan_addr dst;
+ __be32 vni;
+};
+
+struct vxlan_mdb_entry {
+ struct rhash_head rhnode;
+ struct list_head remotes;
+ struct vxlan_mdb_entry_key key;
+ struct hlist_node mdb_node;
+ struct rcu_head rcu;
+};
+
+#define VXLAN_MDB_REMOTE_F_BLOCKED BIT(0)
+
+struct vxlan_mdb_remote {
+ struct list_head list;
+ struct vxlan_rdst __rcu *rd;
+ u8 flags;
+ u8 filter_mode;
+ u8 rt_protocol;
+ struct hlist_head src_list;
+ struct rcu_head rcu;
+};
+
+#define VXLAN_SGRP_F_DELETE BIT(0)
+
+struct vxlan_mdb_src_entry {
+ struct hlist_node node;
+ union vxlan_addr addr;
+ u8 flags;
+};
+
+struct vxlan_mdb_dump_ctx {
+ long reserved;
+ long entry_idx;
+ long remote_idx;
+};
+
+struct vxlan_mdb_config_src_entry {
+ union vxlan_addr addr;
+ struct list_head node;
+};
+
+struct vxlan_mdb_config {
+ struct vxlan_dev *vxlan;
+ struct vxlan_mdb_entry_key group;
+ struct list_head src_list;
+ union vxlan_addr remote_ip;
+ u32 remote_ifindex;
+ __be32 remote_vni;
+ __be16 remote_port;
+ u16 nlflags;
+ u8 flags;
+ u8 filter_mode;
+ u8 rt_protocol;
+};
+
+static const struct rhashtable_params vxlan_mdb_rht_params = {
+ .head_offset = offsetof(struct vxlan_mdb_entry, rhnode),
+ .key_offset = offsetof(struct vxlan_mdb_entry, key),
+ .key_len = sizeof(struct vxlan_mdb_entry_key),
+ .automatic_shrinking = true,
+};
+
+static int __vxlan_mdb_add(const struct vxlan_mdb_config *cfg,
+ struct netlink_ext_ack *extack);
+static int __vxlan_mdb_del(const struct vxlan_mdb_config *cfg,
+ struct netlink_ext_ack *extack);
+
+static void vxlan_br_mdb_entry_fill(const struct vxlan_dev *vxlan,
+ const struct vxlan_mdb_entry *mdb_entry,
+ const struct vxlan_mdb_remote *remote,
+ struct br_mdb_entry *e)
+{
+ const union vxlan_addr *dst = &mdb_entry->key.dst;
+
+ memset(e, 0, sizeof(*e));
+ e->ifindex = vxlan->dev->ifindex;
+ e->state = MDB_PERMANENT;
+
+ if (remote->flags & VXLAN_MDB_REMOTE_F_BLOCKED)
+ e->flags |= MDB_FLAGS_BLOCKED;
+
+ switch (dst->sa.sa_family) {
+ case AF_INET:
+ e->addr.u.ip4 = dst->sin.sin_addr.s_addr;
+ e->addr.proto = htons(ETH_P_IP);
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case AF_INET6:
+ e->addr.u.ip6 = dst->sin6.sin6_addr;
+ e->addr.proto = htons(ETH_P_IPV6);
+ break;
+#endif
+ }
+}
+
+static int vxlan_mdb_entry_info_fill_srcs(struct sk_buff *skb,
+ const struct vxlan_mdb_remote *remote)
+{
+ struct vxlan_mdb_src_entry *ent;
+ struct nlattr *nest;
+
+ if (hlist_empty(&remote->src_list))
+ return 0;
+
+ nest = nla_nest_start(skb, MDBA_MDB_EATTR_SRC_LIST);
+ if (!nest)
+ return -EMSGSIZE;
+
+ hlist_for_each_entry(ent, &remote->src_list, node) {
+ struct nlattr *nest_ent;
+
+ nest_ent = nla_nest_start(skb, MDBA_MDB_SRCLIST_ENTRY);
+ if (!nest_ent)
+ goto out_cancel_err;
+
+ if (vxlan_nla_put_addr(skb, MDBA_MDB_SRCATTR_ADDRESS,
+ &ent->addr) ||
+ nla_put_u32(skb, MDBA_MDB_SRCATTR_TIMER, 0))
+ goto out_cancel_err;
+
+ nla_nest_end(skb, nest_ent);
+ }
+
+ nla_nest_end(skb, nest);
+
+ return 0;
+
+out_cancel_err:
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
+}
+
+static int vxlan_mdb_entry_info_fill(const struct vxlan_dev *vxlan,
+ struct sk_buff *skb,
+ const struct vxlan_mdb_entry *mdb_entry,
+ const struct vxlan_mdb_remote *remote)
+{
+ struct vxlan_rdst *rd = rtnl_dereference(remote->rd);
+ struct br_mdb_entry e;
+ struct nlattr *nest;
+
+ nest = nla_nest_start_noflag(skb, MDBA_MDB_ENTRY_INFO);
+ if (!nest)
+ return -EMSGSIZE;
+
+ vxlan_br_mdb_entry_fill(vxlan, mdb_entry, remote, &e);
+
+ if (nla_put_nohdr(skb, sizeof(e), &e) ||
+ nla_put_u32(skb, MDBA_MDB_EATTR_TIMER, 0))
+ goto nest_err;
+
+ if (!vxlan_addr_any(&mdb_entry->key.src) &&
+ vxlan_nla_put_addr(skb, MDBA_MDB_EATTR_SOURCE, &mdb_entry->key.src))
+ goto nest_err;
+
+ if (nla_put_u8(skb, MDBA_MDB_EATTR_RTPROT, remote->rt_protocol) ||
+ nla_put_u8(skb, MDBA_MDB_EATTR_GROUP_MODE, remote->filter_mode) ||
+ vxlan_mdb_entry_info_fill_srcs(skb, remote) ||
+ vxlan_nla_put_addr(skb, MDBA_MDB_EATTR_DST, &rd->remote_ip))
+ goto nest_err;
+
+ if (rd->remote_port && rd->remote_port != vxlan->cfg.dst_port &&
+ nla_put_u16(skb, MDBA_MDB_EATTR_DST_PORT,
+ be16_to_cpu(rd->remote_port)))
+ goto nest_err;
+
+ if (rd->remote_vni != vxlan->default_dst.remote_vni &&
+ nla_put_u32(skb, MDBA_MDB_EATTR_VNI, be32_to_cpu(rd->remote_vni)))
+ goto nest_err;
+
+ if (rd->remote_ifindex &&
+ nla_put_u32(skb, MDBA_MDB_EATTR_IFINDEX, rd->remote_ifindex))
+ goto nest_err;
+
+ if ((vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) &&
+ mdb_entry->key.vni && nla_put_u32(skb, MDBA_MDB_EATTR_SRC_VNI,
+ be32_to_cpu(mdb_entry->key.vni)))
+ goto nest_err;
+
+ nla_nest_end(skb, nest);
+
+ return 0;
+
+nest_err:
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
+}
+
+static int vxlan_mdb_entry_fill(const struct vxlan_dev *vxlan,
+ struct sk_buff *skb,
+ struct vxlan_mdb_dump_ctx *ctx,
+ const struct vxlan_mdb_entry *mdb_entry)
+{
+ int remote_idx = 0, s_remote_idx = ctx->remote_idx;
+ struct vxlan_mdb_remote *remote;
+ struct nlattr *nest;
+ int err = 0;
+
+ nest = nla_nest_start_noflag(skb, MDBA_MDB_ENTRY);
+ if (!nest)
+ return -EMSGSIZE;
+
+ list_for_each_entry(remote, &mdb_entry->remotes, list) {
+ if (remote_idx < s_remote_idx)
+ goto skip;
+
+ err = vxlan_mdb_entry_info_fill(vxlan, skb, mdb_entry, remote);
+ if (err)
+ break;
+skip:
+ remote_idx++;
+ }
+
+ ctx->remote_idx = err ? remote_idx : 0;
+ nla_nest_end(skb, nest);
+ return err;
+}
+
+static int vxlan_mdb_fill(const struct vxlan_dev *vxlan, struct sk_buff *skb,
+ struct vxlan_mdb_dump_ctx *ctx)
+{
+ int entry_idx = 0, s_entry_idx = ctx->entry_idx;
+ struct vxlan_mdb_entry *mdb_entry;
+ struct nlattr *nest;
+ int err = 0;
+
+ nest = nla_nest_start_noflag(skb, MDBA_MDB);
+ if (!nest)
+ return -EMSGSIZE;
+
+ hlist_for_each_entry(mdb_entry, &vxlan->mdb_list, mdb_node) {
+ if (entry_idx < s_entry_idx)
+ goto skip;
+
+ err = vxlan_mdb_entry_fill(vxlan, skb, ctx, mdb_entry);
+ if (err)
+ break;
+skip:
+ entry_idx++;
+ }
+
+ ctx->entry_idx = err ? entry_idx : 0;
+ nla_nest_end(skb, nest);
+ return err;
+}
+
+int vxlan_mdb_dump(struct net_device *dev, struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct vxlan_mdb_dump_ctx *ctx = (void *)cb->ctx;
+ struct vxlan_dev *vxlan = netdev_priv(dev);
+ struct br_port_msg *bpm;
+ struct nlmsghdr *nlh;
+ int err;
+
+ ASSERT_RTNL();
+
+ NL_ASSERT_DUMP_CTX_FITS(struct vxlan_mdb_dump_ctx);
+
+ nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, RTM_NEWMDB, sizeof(*bpm),
+ NLM_F_MULTI);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ bpm = nlmsg_data(nlh);
+ memset(bpm, 0, sizeof(*bpm));
+ bpm->family = AF_BRIDGE;
+ bpm->ifindex = dev->ifindex;
+
+ err = vxlan_mdb_fill(vxlan, skb, ctx);
+
+ nlmsg_end(skb, nlh);
+
+ cb->seq = vxlan->mdb_seq;
+ nl_dump_check_consistent(cb, nlh);
+
+ return err;
+}
+
+static const struct nla_policy
+vxlan_mdbe_src_list_entry_pol[MDBE_SRCATTR_MAX + 1] = {
+ [MDBE_SRCATTR_ADDRESS] = NLA_POLICY_RANGE(NLA_BINARY,
+ sizeof(struct in_addr),
+ sizeof(struct in6_addr)),
+};
+
+static const struct nla_policy
+vxlan_mdbe_src_list_pol[MDBE_SRC_LIST_MAX + 1] = {
+ [MDBE_SRC_LIST_ENTRY] = NLA_POLICY_NESTED(vxlan_mdbe_src_list_entry_pol),
+};
+
+static struct netlink_range_validation vni_range = {
+ .max = VXLAN_N_VID - 1,
+};
+
+static const struct nla_policy vxlan_mdbe_attrs_pol[MDBE_ATTR_MAX + 1] = {
+ [MDBE_ATTR_SOURCE] = NLA_POLICY_RANGE(NLA_BINARY,
+ sizeof(struct in_addr),
+ sizeof(struct in6_addr)),
+ [MDBE_ATTR_GROUP_MODE] = NLA_POLICY_RANGE(NLA_U8, MCAST_EXCLUDE,
+ MCAST_INCLUDE),
+ [MDBE_ATTR_SRC_LIST] = NLA_POLICY_NESTED(vxlan_mdbe_src_list_pol),
+ [MDBE_ATTR_RTPROT] = NLA_POLICY_MIN(NLA_U8, RTPROT_STATIC),
+ [MDBE_ATTR_DST] = NLA_POLICY_RANGE(NLA_BINARY,
+ sizeof(struct in_addr),
+ sizeof(struct in6_addr)),
+ [MDBE_ATTR_DST_PORT] = { .type = NLA_U16 },
+ [MDBE_ATTR_VNI] = NLA_POLICY_FULL_RANGE(NLA_U32, &vni_range),
+ [MDBE_ATTR_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 1),
+ [MDBE_ATTR_SRC_VNI] = NLA_POLICY_FULL_RANGE(NLA_U32, &vni_range),
+};
+
+static bool vxlan_mdb_is_valid_source(const struct nlattr *attr, __be16 proto,
+ struct netlink_ext_ack *extack)
+{
+ switch (proto) {
+ case htons(ETH_P_IP):
+ if (nla_len(attr) != sizeof(struct in_addr)) {
+ NL_SET_ERR_MSG_MOD(extack, "IPv4 invalid source address length");
+ return false;
+ }
+ if (ipv4_is_multicast(nla_get_in_addr(attr))) {
+ NL_SET_ERR_MSG_MOD(extack, "IPv4 multicast source address is not allowed");
+ return false;
+ }
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case htons(ETH_P_IPV6): {
+ struct in6_addr src;
+
+ if (nla_len(attr) != sizeof(struct in6_addr)) {
+ NL_SET_ERR_MSG_MOD(extack, "IPv6 invalid source address length");
+ return false;
+ }
+ src = nla_get_in6_addr(attr);
+ if (ipv6_addr_is_multicast(&src)) {
+ NL_SET_ERR_MSG_MOD(extack, "IPv6 multicast source address is not allowed");
+ return false;
+ }
+ break;
+ }
+#endif
+ default:
+ NL_SET_ERR_MSG_MOD(extack, "Invalid protocol used with source address");
+ return false;
+ }
+
+ return true;
+}
+
+static void vxlan_mdb_config_group_set(struct vxlan_mdb_config *cfg,
+ const struct br_mdb_entry *entry,
+ const struct nlattr *source_attr)
+{
+ struct vxlan_mdb_entry_key *group = &cfg->group;
+
+ switch (entry->addr.proto) {
+ case htons(ETH_P_IP):
+ group->dst.sa.sa_family = AF_INET;
+ group->dst.sin.sin_addr.s_addr = entry->addr.u.ip4;
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case htons(ETH_P_IPV6):
+ group->dst.sa.sa_family = AF_INET6;
+ group->dst.sin6.sin6_addr = entry->addr.u.ip6;
+ break;
+#endif
+ }
+
+ if (source_attr)
+ vxlan_nla_get_addr(&group->src, source_attr);
+}
+
+static bool vxlan_mdb_is_star_g(const struct vxlan_mdb_entry_key *group)
+{
+ return !vxlan_addr_any(&group->dst) && vxlan_addr_any(&group->src);
+}
+
+static bool vxlan_mdb_is_sg(const struct vxlan_mdb_entry_key *group)
+{
+ return !vxlan_addr_any(&group->dst) && !vxlan_addr_any(&group->src);
+}
+
+static int vxlan_mdb_config_src_entry_init(struct vxlan_mdb_config *cfg,
+ __be16 proto,
+ const struct nlattr *src_entry,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[MDBE_SRCATTR_MAX + 1];
+ struct vxlan_mdb_config_src_entry *src;
+ int err;
+
+ err = nla_parse_nested(tb, MDBE_SRCATTR_MAX, src_entry,
+ vxlan_mdbe_src_list_entry_pol, extack);
+ if (err)
+ return err;
+
+ if (NL_REQ_ATTR_CHECK(extack, src_entry, tb, MDBE_SRCATTR_ADDRESS))
+ return -EINVAL;
+
+ if (!vxlan_mdb_is_valid_source(tb[MDBE_SRCATTR_ADDRESS], proto,
+ extack))
+ return -EINVAL;
+
+ src = kzalloc(sizeof(*src), GFP_KERNEL);
+ if (!src)
+ return -ENOMEM;
+
+ err = vxlan_nla_get_addr(&src->addr, tb[MDBE_SRCATTR_ADDRESS]);
+ if (err)
+ goto err_free_src;
+
+ list_add_tail(&src->node, &cfg->src_list);
+
+ return 0;
+
+err_free_src:
+ kfree(src);
+ return err;
+}
+
+static void
+vxlan_mdb_config_src_entry_fini(struct vxlan_mdb_config_src_entry *src)
+{
+ list_del(&src->node);
+ kfree(src);
+}
+
+static int vxlan_mdb_config_src_list_init(struct vxlan_mdb_config *cfg,
+ __be16 proto,
+ const struct nlattr *src_list,
+ struct netlink_ext_ack *extack)
+{
+ struct vxlan_mdb_config_src_entry *src, *tmp;
+ struct nlattr *src_entry;
+ int rem, err;
+
+ nla_for_each_nested(src_entry, src_list, rem) {
+ err = vxlan_mdb_config_src_entry_init(cfg, proto, src_entry,
+ extack);
+ if (err)
+ goto err_src_entry_init;
+ }
+
+ return 0;
+
+err_src_entry_init:
+ list_for_each_entry_safe_reverse(src, tmp, &cfg->src_list, node)
+ vxlan_mdb_config_src_entry_fini(src);
+ return err;
+}
+
+static void vxlan_mdb_config_src_list_fini(struct vxlan_mdb_config *cfg)
+{
+ struct vxlan_mdb_config_src_entry *src, *tmp;
+
+ list_for_each_entry_safe_reverse(src, tmp, &cfg->src_list, node)
+ vxlan_mdb_config_src_entry_fini(src);
+}
+
+static int vxlan_mdb_config_attrs_init(struct vxlan_mdb_config *cfg,
+ const struct br_mdb_entry *entry,
+ const struct nlattr *set_attrs,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *mdbe_attrs[MDBE_ATTR_MAX + 1];
+ int err;
+
+ err = nla_parse_nested(mdbe_attrs, MDBE_ATTR_MAX, set_attrs,
+ vxlan_mdbe_attrs_pol, extack);
+ if (err)
+ return err;
+
+ if (NL_REQ_ATTR_CHECK(extack, set_attrs, mdbe_attrs, MDBE_ATTR_DST)) {
+ NL_SET_ERR_MSG_MOD(extack, "Missing remote destination IP address");
+ return -EINVAL;
+ }
+
+ if (mdbe_attrs[MDBE_ATTR_SOURCE] &&
+ !vxlan_mdb_is_valid_source(mdbe_attrs[MDBE_ATTR_SOURCE],
+ entry->addr.proto, extack))
+ return -EINVAL;
+
+ vxlan_mdb_config_group_set(cfg, entry, mdbe_attrs[MDBE_ATTR_SOURCE]);
+
+ /* rtnetlink code only validates that IPv4 group address is
+ * multicast.
+ */
+ if (!vxlan_addr_is_multicast(&cfg->group.dst) &&
+ !vxlan_addr_any(&cfg->group.dst)) {
+ NL_SET_ERR_MSG_MOD(extack, "Group address is not multicast");
+ return -EINVAL;
+ }
+
+ if (vxlan_addr_any(&cfg->group.dst) &&
+ mdbe_attrs[MDBE_ATTR_SOURCE]) {
+ NL_SET_ERR_MSG_MOD(extack, "Source cannot be specified for the all-zeros entry");
+ return -EINVAL;
+ }
+
+ if (vxlan_mdb_is_sg(&cfg->group))
+ cfg->filter_mode = MCAST_INCLUDE;
+
+ if (mdbe_attrs[MDBE_ATTR_GROUP_MODE]) {
+ if (!vxlan_mdb_is_star_g(&cfg->group)) {
+ NL_SET_ERR_MSG_MOD(extack, "Filter mode can only be set for (*, G) entries");
+ return -EINVAL;
+ }
+ cfg->filter_mode = nla_get_u8(mdbe_attrs[MDBE_ATTR_GROUP_MODE]);
+ }
+
+ if (mdbe_attrs[MDBE_ATTR_SRC_LIST]) {
+ if (!vxlan_mdb_is_star_g(&cfg->group)) {
+ NL_SET_ERR_MSG_MOD(extack, "Source list can only be set for (*, G) entries");
+ return -EINVAL;
+ }
+ if (!mdbe_attrs[MDBE_ATTR_GROUP_MODE]) {
+ NL_SET_ERR_MSG_MOD(extack, "Source list cannot be set without filter mode");
+ return -EINVAL;
+ }
+ err = vxlan_mdb_config_src_list_init(cfg, entry->addr.proto,
+ mdbe_attrs[MDBE_ATTR_SRC_LIST],
+ extack);
+ if (err)
+ return err;
+ }
+
+ if (vxlan_mdb_is_star_g(&cfg->group) && list_empty(&cfg->src_list) &&
+ cfg->filter_mode == MCAST_INCLUDE) {
+ NL_SET_ERR_MSG_MOD(extack, "Cannot add (*, G) INCLUDE with an empty source list");
+ return -EINVAL;
+ }
+
+ if (mdbe_attrs[MDBE_ATTR_RTPROT])
+ cfg->rt_protocol = nla_get_u8(mdbe_attrs[MDBE_ATTR_RTPROT]);
+
+ err = vxlan_nla_get_addr(&cfg->remote_ip, mdbe_attrs[MDBE_ATTR_DST]);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid remote destination address");
+ goto err_src_list_fini;
+ }
+
+ if (mdbe_attrs[MDBE_ATTR_DST_PORT])
+ cfg->remote_port =
+ cpu_to_be16(nla_get_u16(mdbe_attrs[MDBE_ATTR_DST_PORT]));
+
+ if (mdbe_attrs[MDBE_ATTR_VNI])
+ cfg->remote_vni =
+ cpu_to_be32(nla_get_u32(mdbe_attrs[MDBE_ATTR_VNI]));
+
+ if (mdbe_attrs[MDBE_ATTR_IFINDEX]) {
+ cfg->remote_ifindex =
+ nla_get_s32(mdbe_attrs[MDBE_ATTR_IFINDEX]);
+ if (!__dev_get_by_index(cfg->vxlan->net, cfg->remote_ifindex)) {
+ NL_SET_ERR_MSG_MOD(extack, "Outgoing interface not found");
+ err = -EINVAL;
+ goto err_src_list_fini;
+ }
+ }
+
+ if (mdbe_attrs[MDBE_ATTR_SRC_VNI])
+ cfg->group.vni =
+ cpu_to_be32(nla_get_u32(mdbe_attrs[MDBE_ATTR_SRC_VNI]));
+
+ return 0;
+
+err_src_list_fini:
+ vxlan_mdb_config_src_list_fini(cfg);
+ return err;
+}
+
+static int vxlan_mdb_config_init(struct vxlan_mdb_config *cfg,
+ struct net_device *dev, struct nlattr *tb[],
+ u16 nlmsg_flags,
+ struct netlink_ext_ack *extack)
+{
+ struct br_mdb_entry *entry = nla_data(tb[MDBA_SET_ENTRY]);
+ struct vxlan_dev *vxlan = netdev_priv(dev);
+
+ memset(cfg, 0, sizeof(*cfg));
+ cfg->vxlan = vxlan;
+ cfg->group.vni = vxlan->default_dst.remote_vni;
+ INIT_LIST_HEAD(&cfg->src_list);
+ cfg->nlflags = nlmsg_flags;
+ cfg->filter_mode = MCAST_EXCLUDE;
+ cfg->rt_protocol = RTPROT_STATIC;
+ cfg->remote_vni = vxlan->default_dst.remote_vni;
+ cfg->remote_port = vxlan->cfg.dst_port;
+
+ if (entry->ifindex != dev->ifindex) {
+ NL_SET_ERR_MSG_MOD(extack, "Port net device must be the VXLAN net device");
+ return -EINVAL;
+ }
+
+ /* State is not part of the entry key and can be ignored on deletion
+ * requests.
+ */
+ if ((nlmsg_flags & (NLM_F_CREATE | NLM_F_REPLACE)) &&
+ entry->state != MDB_PERMANENT) {
+ NL_SET_ERR_MSG_MOD(extack, "MDB entry must be permanent");
+ return -EINVAL;
+ }
+
+ if (entry->flags) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid MDB entry flags");
+ return -EINVAL;
+ }
+
+ if (entry->vid) {
+ NL_SET_ERR_MSG_MOD(extack, "VID must not be specified");
+ return -EINVAL;
+ }
+
+ if (entry->addr.proto != htons(ETH_P_IP) &&
+ entry->addr.proto != htons(ETH_P_IPV6)) {
+ NL_SET_ERR_MSG_MOD(extack, "Group address must be an IPv4 / IPv6 address");
+ return -EINVAL;
+ }
+
+ if (NL_REQ_ATTR_CHECK(extack, NULL, tb, MDBA_SET_ENTRY_ATTRS)) {
+ NL_SET_ERR_MSG_MOD(extack, "Missing MDBA_SET_ENTRY_ATTRS attribute");
+ return -EINVAL;
+ }
+
+ return vxlan_mdb_config_attrs_init(cfg, entry, tb[MDBA_SET_ENTRY_ATTRS],
+ extack);
+}
+
+static void vxlan_mdb_config_fini(struct vxlan_mdb_config *cfg)
+{
+ vxlan_mdb_config_src_list_fini(cfg);
+}
+
+static struct vxlan_mdb_entry *
+vxlan_mdb_entry_lookup(struct vxlan_dev *vxlan,
+ const struct vxlan_mdb_entry_key *group)
+{
+ return rhashtable_lookup_fast(&vxlan->mdb_tbl, group,
+ vxlan_mdb_rht_params);
+}
+
+static struct vxlan_mdb_remote *
+vxlan_mdb_remote_lookup(const struct vxlan_mdb_entry *mdb_entry,
+ const union vxlan_addr *addr)
+{
+ struct vxlan_mdb_remote *remote;
+
+ list_for_each_entry(remote, &mdb_entry->remotes, list) {
+ struct vxlan_rdst *rd = rtnl_dereference(remote->rd);
+
+ if (vxlan_addr_equal(addr, &rd->remote_ip))
+ return remote;
+ }
+
+ return NULL;
+}
+
+static void vxlan_mdb_rdst_free(struct rcu_head *head)
+{
+ struct vxlan_rdst *rd = container_of(head, struct vxlan_rdst, rcu);
+
+ dst_cache_destroy(&rd->dst_cache);
+ kfree(rd);
+}
+
+static int vxlan_mdb_remote_rdst_init(const struct vxlan_mdb_config *cfg,
+ struct vxlan_mdb_remote *remote)
+{
+ struct vxlan_rdst *rd;
+ int err;
+
+ rd = kzalloc(sizeof(*rd), GFP_KERNEL);
+ if (!rd)
+ return -ENOMEM;
+
+ err = dst_cache_init(&rd->dst_cache, GFP_KERNEL);
+ if (err)
+ goto err_free_rdst;
+
+ rd->remote_ip = cfg->remote_ip;
+ rd->remote_port = cfg->remote_port;
+ rd->remote_vni = cfg->remote_vni;
+ rd->remote_ifindex = cfg->remote_ifindex;
+ rcu_assign_pointer(remote->rd, rd);
+
+ return 0;
+
+err_free_rdst:
+ kfree(rd);
+ return err;
+}
+
+static void vxlan_mdb_remote_rdst_fini(struct vxlan_rdst *rd)
+{
+ call_rcu(&rd->rcu, vxlan_mdb_rdst_free);
+}
+
+static int vxlan_mdb_remote_init(const struct vxlan_mdb_config *cfg,
+ struct vxlan_mdb_remote *remote)
+{
+ int err;
+
+ err = vxlan_mdb_remote_rdst_init(cfg, remote);
+ if (err)
+ return err;
+
+ remote->flags = cfg->flags;
+ remote->filter_mode = cfg->filter_mode;
+ remote->rt_protocol = cfg->rt_protocol;
+ INIT_HLIST_HEAD(&remote->src_list);
+
+ return 0;
+}
+
+static void vxlan_mdb_remote_fini(struct vxlan_dev *vxlan,
+ struct vxlan_mdb_remote *remote)
+{
+ WARN_ON_ONCE(!hlist_empty(&remote->src_list));
+ vxlan_mdb_remote_rdst_fini(rtnl_dereference(remote->rd));
+}
+
+static struct vxlan_mdb_src_entry *
+vxlan_mdb_remote_src_entry_lookup(const struct vxlan_mdb_remote *remote,
+ const union vxlan_addr *addr)
+{
+ struct vxlan_mdb_src_entry *ent;
+
+ hlist_for_each_entry(ent, &remote->src_list, node) {
+ if (vxlan_addr_equal(&ent->addr, addr))
+ return ent;
+ }
+
+ return NULL;
+}
+
+static struct vxlan_mdb_src_entry *
+vxlan_mdb_remote_src_entry_add(struct vxlan_mdb_remote *remote,
+ const union vxlan_addr *addr)
+{
+ struct vxlan_mdb_src_entry *ent;
+
+ ent = kzalloc(sizeof(*ent), GFP_KERNEL);
+ if (!ent)
+ return NULL;
+
+ ent->addr = *addr;
+ hlist_add_head(&ent->node, &remote->src_list);
+
+ return ent;
+}
+
+static void
+vxlan_mdb_remote_src_entry_del(struct vxlan_mdb_src_entry *ent)
+{
+ hlist_del(&ent->node);
+ kfree(ent);
+}
+
+static int
+vxlan_mdb_remote_src_fwd_add(const struct vxlan_mdb_config *cfg,
+ const union vxlan_addr *addr,
+ struct netlink_ext_ack *extack)
+{
+ struct vxlan_mdb_config sg_cfg;
+
+ memset(&sg_cfg, 0, sizeof(sg_cfg));
+ sg_cfg.vxlan = cfg->vxlan;
+ sg_cfg.group.src = *addr;
+ sg_cfg.group.dst = cfg->group.dst;
+ sg_cfg.group.vni = cfg->group.vni;
+ INIT_LIST_HEAD(&sg_cfg.src_list);
+ sg_cfg.remote_ip = cfg->remote_ip;
+ sg_cfg.remote_ifindex = cfg->remote_ifindex;
+ sg_cfg.remote_vni = cfg->remote_vni;
+ sg_cfg.remote_port = cfg->remote_port;
+ sg_cfg.nlflags = cfg->nlflags;
+ sg_cfg.filter_mode = MCAST_INCLUDE;
+ if (cfg->filter_mode == MCAST_EXCLUDE)
+ sg_cfg.flags = VXLAN_MDB_REMOTE_F_BLOCKED;
+ sg_cfg.rt_protocol = cfg->rt_protocol;
+
+ return __vxlan_mdb_add(&sg_cfg, extack);
+}
+
+static void
+vxlan_mdb_remote_src_fwd_del(struct vxlan_dev *vxlan,
+ const struct vxlan_mdb_entry_key *group,
+ const struct vxlan_mdb_remote *remote,
+ const union vxlan_addr *addr)
+{
+ struct vxlan_rdst *rd = rtnl_dereference(remote->rd);
+ struct vxlan_mdb_config sg_cfg;
+
+ memset(&sg_cfg, 0, sizeof(sg_cfg));
+ sg_cfg.vxlan = vxlan;
+ sg_cfg.group.src = *addr;
+ sg_cfg.group.dst = group->dst;
+ sg_cfg.group.vni = group->vni;
+ INIT_LIST_HEAD(&sg_cfg.src_list);
+ sg_cfg.remote_ip = rd->remote_ip;
+
+ __vxlan_mdb_del(&sg_cfg, NULL);
+}
+
+static int
+vxlan_mdb_remote_src_add(const struct vxlan_mdb_config *cfg,
+ struct vxlan_mdb_remote *remote,
+ const struct vxlan_mdb_config_src_entry *src,
+ struct netlink_ext_ack *extack)
+{
+ struct vxlan_mdb_src_entry *ent;
+ int err;
+
+ ent = vxlan_mdb_remote_src_entry_lookup(remote, &src->addr);
+ if (!ent) {
+ ent = vxlan_mdb_remote_src_entry_add(remote, &src->addr);
+ if (!ent)
+ return -ENOMEM;
+ } else if (!(cfg->nlflags & NLM_F_REPLACE)) {
+ NL_SET_ERR_MSG_MOD(extack, "Source entry already exists");
+ return -EEXIST;
+ }
+
+ err = vxlan_mdb_remote_src_fwd_add(cfg, &ent->addr, extack);
+ if (err)
+ goto err_src_del;
+
+ /* Clear flags in case source entry was marked for deletion as part of
+ * replace flow.
+ */
+ ent->flags = 0;
+
+ return 0;
+
+err_src_del:
+ vxlan_mdb_remote_src_entry_del(ent);
+ return err;
+}
+
+static void vxlan_mdb_remote_src_del(struct vxlan_dev *vxlan,
+ const struct vxlan_mdb_entry_key *group,
+ const struct vxlan_mdb_remote *remote,
+ struct vxlan_mdb_src_entry *ent)
+{
+ vxlan_mdb_remote_src_fwd_del(vxlan, group, remote, &ent->addr);
+ vxlan_mdb_remote_src_entry_del(ent);
+}
+
+static int vxlan_mdb_remote_srcs_add(const struct vxlan_mdb_config *cfg,
+ struct vxlan_mdb_remote *remote,
+ struct netlink_ext_ack *extack)
+{
+ struct vxlan_mdb_config_src_entry *src;
+ struct vxlan_mdb_src_entry *ent;
+ struct hlist_node *tmp;
+ int err;
+
+ list_for_each_entry(src, &cfg->src_list, node) {
+ err = vxlan_mdb_remote_src_add(cfg, remote, src, extack);
+ if (err)
+ goto err_src_del;
+ }
+
+ return 0;
+
+err_src_del:
+ hlist_for_each_entry_safe(ent, tmp, &remote->src_list, node)
+ vxlan_mdb_remote_src_del(cfg->vxlan, &cfg->group, remote, ent);
+ return err;
+}
+
+static void vxlan_mdb_remote_srcs_del(struct vxlan_dev *vxlan,
+ const struct vxlan_mdb_entry_key *group,
+ struct vxlan_mdb_remote *remote)
+{
+ struct vxlan_mdb_src_entry *ent;
+ struct hlist_node *tmp;
+
+ hlist_for_each_entry_safe(ent, tmp, &remote->src_list, node)
+ vxlan_mdb_remote_src_del(vxlan, group, remote, ent);
+}
+
+static size_t
+vxlan_mdb_nlmsg_src_list_size(const struct vxlan_mdb_entry_key *group,
+ const struct vxlan_mdb_remote *remote)
+{
+ struct vxlan_mdb_src_entry *ent;
+ size_t nlmsg_size;
+
+ if (hlist_empty(&remote->src_list))
+ return 0;
+
+ /* MDBA_MDB_EATTR_SRC_LIST */
+ nlmsg_size = nla_total_size(0);
+
+ hlist_for_each_entry(ent, &remote->src_list, node) {
+ /* MDBA_MDB_SRCLIST_ENTRY */
+ nlmsg_size += nla_total_size(0) +
+ /* MDBA_MDB_SRCATTR_ADDRESS */
+ nla_total_size(vxlan_addr_size(&group->dst)) +
+ /* MDBA_MDB_SRCATTR_TIMER */
+ nla_total_size(sizeof(u8));
+ }
+
+ return nlmsg_size;
+}
+
+static size_t vxlan_mdb_nlmsg_size(const struct vxlan_dev *vxlan,
+ const struct vxlan_mdb_entry *mdb_entry,
+ const struct vxlan_mdb_remote *remote)
+{
+ const struct vxlan_mdb_entry_key *group = &mdb_entry->key;
+ struct vxlan_rdst *rd = rtnl_dereference(remote->rd);
+ size_t nlmsg_size;
+
+ nlmsg_size = NLMSG_ALIGN(sizeof(struct br_port_msg)) +
+ /* MDBA_MDB */
+ nla_total_size(0) +
+ /* MDBA_MDB_ENTRY */
+ nla_total_size(0) +
+ /* MDBA_MDB_ENTRY_INFO */
+ nla_total_size(sizeof(struct br_mdb_entry)) +
+ /* MDBA_MDB_EATTR_TIMER */
+ nla_total_size(sizeof(u32));
+ /* MDBA_MDB_EATTR_SOURCE */
+ if (vxlan_mdb_is_sg(group))
+ nlmsg_size += nla_total_size(vxlan_addr_size(&group->dst));
+ /* MDBA_MDB_EATTR_RTPROT */
+ nlmsg_size += nla_total_size(sizeof(u8));
+ /* MDBA_MDB_EATTR_SRC_LIST */
+ nlmsg_size += vxlan_mdb_nlmsg_src_list_size(group, remote);
+ /* MDBA_MDB_EATTR_GROUP_MODE */
+ nlmsg_size += nla_total_size(sizeof(u8));
+ /* MDBA_MDB_EATTR_DST */
+ nlmsg_size += nla_total_size(vxlan_addr_size(&rd->remote_ip));
+ /* MDBA_MDB_EATTR_DST_PORT */
+ if (rd->remote_port && rd->remote_port != vxlan->cfg.dst_port)
+ nlmsg_size += nla_total_size(sizeof(u16));
+ /* MDBA_MDB_EATTR_VNI */
+ if (rd->remote_vni != vxlan->default_dst.remote_vni)
+ nlmsg_size += nla_total_size(sizeof(u32));
+ /* MDBA_MDB_EATTR_IFINDEX */
+ if (rd->remote_ifindex)
+ nlmsg_size += nla_total_size(sizeof(u32));
+ /* MDBA_MDB_EATTR_SRC_VNI */
+ if ((vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) && group->vni)
+ nlmsg_size += nla_total_size(sizeof(u32));
+
+ return nlmsg_size;
+}
+
+static int vxlan_mdb_nlmsg_fill(const struct vxlan_dev *vxlan,
+ struct sk_buff *skb,
+ const struct vxlan_mdb_entry *mdb_entry,
+ const struct vxlan_mdb_remote *remote,
+ int type)
+{
+ struct nlattr *mdb_nest, *mdb_entry_nest;
+ struct br_port_msg *bpm;
+ struct nlmsghdr *nlh;
+
+ nlh = nlmsg_put(skb, 0, 0, type, sizeof(*bpm), 0);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ bpm = nlmsg_data(nlh);
+ memset(bpm, 0, sizeof(*bpm));
+ bpm->family = AF_BRIDGE;
+ bpm->ifindex = vxlan->dev->ifindex;
+
+ mdb_nest = nla_nest_start_noflag(skb, MDBA_MDB);
+ if (!mdb_nest)
+ goto cancel;
+ mdb_entry_nest = nla_nest_start_noflag(skb, MDBA_MDB_ENTRY);
+ if (!mdb_entry_nest)
+ goto cancel;
+
+ if (vxlan_mdb_entry_info_fill(vxlan, skb, mdb_entry, remote))
+ goto cancel;
+
+ nla_nest_end(skb, mdb_entry_nest);
+ nla_nest_end(skb, mdb_nest);
+ nlmsg_end(skb, nlh);
+
+ return 0;
+
+cancel:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+}
+
+static void vxlan_mdb_remote_notify(const struct vxlan_dev *vxlan,
+ const struct vxlan_mdb_entry *mdb_entry,
+ const struct vxlan_mdb_remote *remote,
+ int type)
+{
+ struct net *net = dev_net(vxlan->dev);
+ struct sk_buff *skb;
+ int err = -ENOBUFS;
+
+ skb = nlmsg_new(vxlan_mdb_nlmsg_size(vxlan, mdb_entry, remote),
+ GFP_KERNEL);
+ if (!skb)
+ goto errout;
+
+ err = vxlan_mdb_nlmsg_fill(vxlan, skb, mdb_entry, remote, type);
+ if (err) {
+ kfree_skb(skb);
+ goto errout;
+ }
+
+ rtnl_notify(skb, net, 0, RTNLGRP_MDB, NULL, GFP_KERNEL);
+ return;
+errout:
+ rtnl_set_sk_err(net, RTNLGRP_MDB, err);
+}
+
+static int
+vxlan_mdb_remote_srcs_replace(const struct vxlan_mdb_config *cfg,
+ const struct vxlan_mdb_entry *mdb_entry,
+ struct vxlan_mdb_remote *remote,
+ struct netlink_ext_ack *extack)
+{
+ struct vxlan_dev *vxlan = cfg->vxlan;
+ struct vxlan_mdb_src_entry *ent;
+ struct hlist_node *tmp;
+ int err;
+
+ hlist_for_each_entry(ent, &remote->src_list, node)
+ ent->flags |= VXLAN_SGRP_F_DELETE;
+
+ err = vxlan_mdb_remote_srcs_add(cfg, remote, extack);
+ if (err)
+ goto err_clear_delete;
+
+ hlist_for_each_entry_safe(ent, tmp, &remote->src_list, node) {
+ if (ent->flags & VXLAN_SGRP_F_DELETE)
+ vxlan_mdb_remote_src_del(vxlan, &mdb_entry->key, remote,
+ ent);
+ }
+
+ return 0;
+
+err_clear_delete:
+ hlist_for_each_entry(ent, &remote->src_list, node)
+ ent->flags &= ~VXLAN_SGRP_F_DELETE;
+ return err;
+}
+
+static int vxlan_mdb_remote_replace(const struct vxlan_mdb_config *cfg,
+ const struct vxlan_mdb_entry *mdb_entry,
+ struct vxlan_mdb_remote *remote,
+ struct netlink_ext_ack *extack)
+{
+ struct vxlan_rdst *new_rd, *old_rd = rtnl_dereference(remote->rd);
+ struct vxlan_dev *vxlan = cfg->vxlan;
+ int err;
+
+ err = vxlan_mdb_remote_rdst_init(cfg, remote);
+ if (err)
+ return err;
+ new_rd = rtnl_dereference(remote->rd);
+
+ err = vxlan_mdb_remote_srcs_replace(cfg, mdb_entry, remote, extack);
+ if (err)
+ goto err_rdst_reset;
+
+ WRITE_ONCE(remote->flags, cfg->flags);
+ WRITE_ONCE(remote->filter_mode, cfg->filter_mode);
+ remote->rt_protocol = cfg->rt_protocol;
+ vxlan_mdb_remote_notify(vxlan, mdb_entry, remote, RTM_NEWMDB);
+
+ vxlan_mdb_remote_rdst_fini(old_rd);
+
+ return 0;
+
+err_rdst_reset:
+ rcu_assign_pointer(remote->rd, old_rd);
+ vxlan_mdb_remote_rdst_fini(new_rd);
+ return err;
+}
+
+static int vxlan_mdb_remote_add(const struct vxlan_mdb_config *cfg,
+ struct vxlan_mdb_entry *mdb_entry,
+ struct netlink_ext_ack *extack)
+{
+ struct vxlan_mdb_remote *remote;
+ int err;
+
+ remote = vxlan_mdb_remote_lookup(mdb_entry, &cfg->remote_ip);
+ if (remote) {
+ if (!(cfg->nlflags & NLM_F_REPLACE)) {
+ NL_SET_ERR_MSG_MOD(extack, "Replace not specified and MDB remote entry already exists");
+ return -EEXIST;
+ }
+ return vxlan_mdb_remote_replace(cfg, mdb_entry, remote, extack);
+ }
+
+ if (!(cfg->nlflags & NLM_F_CREATE)) {
+ NL_SET_ERR_MSG_MOD(extack, "Create not specified and entry does not exist");
+ return -ENOENT;
+ }
+
+ remote = kzalloc(sizeof(*remote), GFP_KERNEL);
+ if (!remote)
+ return -ENOMEM;
+
+ err = vxlan_mdb_remote_init(cfg, remote);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to initialize remote MDB entry");
+ goto err_free_remote;
+ }
+
+ err = vxlan_mdb_remote_srcs_add(cfg, remote, extack);
+ if (err)
+ goto err_remote_fini;
+
+ list_add_rcu(&remote->list, &mdb_entry->remotes);
+ vxlan_mdb_remote_notify(cfg->vxlan, mdb_entry, remote, RTM_NEWMDB);
+
+ return 0;
+
+err_remote_fini:
+ vxlan_mdb_remote_fini(cfg->vxlan, remote);
+err_free_remote:
+ kfree(remote);
+ return err;
+}
+
+static void vxlan_mdb_remote_del(struct vxlan_dev *vxlan,
+ struct vxlan_mdb_entry *mdb_entry,
+ struct vxlan_mdb_remote *remote)
+{
+ vxlan_mdb_remote_notify(vxlan, mdb_entry, remote, RTM_DELMDB);
+ list_del_rcu(&remote->list);
+ vxlan_mdb_remote_srcs_del(vxlan, &mdb_entry->key, remote);
+ vxlan_mdb_remote_fini(vxlan, remote);
+ kfree_rcu(remote, rcu);
+}
+
+static struct vxlan_mdb_entry *
+vxlan_mdb_entry_get(struct vxlan_dev *vxlan,
+ const struct vxlan_mdb_entry_key *group)
+{
+ struct vxlan_mdb_entry *mdb_entry;
+ int err;
+
+ mdb_entry = vxlan_mdb_entry_lookup(vxlan, group);
+ if (mdb_entry)
+ return mdb_entry;
+
+ mdb_entry = kzalloc(sizeof(*mdb_entry), GFP_KERNEL);
+ if (!mdb_entry)
+ return ERR_PTR(-ENOMEM);
+
+ INIT_LIST_HEAD(&mdb_entry->remotes);
+ memcpy(&mdb_entry->key, group, sizeof(mdb_entry->key));
+ hlist_add_head(&mdb_entry->mdb_node, &vxlan->mdb_list);
+
+ err = rhashtable_lookup_insert_fast(&vxlan->mdb_tbl,
+ &mdb_entry->rhnode,
+ vxlan_mdb_rht_params);
+ if (err)
+ goto err_free_entry;
+
+ if (hlist_is_singular_node(&mdb_entry->mdb_node, &vxlan->mdb_list))
+ vxlan->cfg.flags |= VXLAN_F_MDB;
+
+ return mdb_entry;
+
+err_free_entry:
+ hlist_del(&mdb_entry->mdb_node);
+ kfree(mdb_entry);
+ return ERR_PTR(err);
+}
+
+static void vxlan_mdb_entry_put(struct vxlan_dev *vxlan,
+ struct vxlan_mdb_entry *mdb_entry)
+{
+ if (!list_empty(&mdb_entry->remotes))
+ return;
+
+ if (hlist_is_singular_node(&mdb_entry->mdb_node, &vxlan->mdb_list))
+ vxlan->cfg.flags &= ~VXLAN_F_MDB;
+
+ rhashtable_remove_fast(&vxlan->mdb_tbl, &mdb_entry->rhnode,
+ vxlan_mdb_rht_params);
+ hlist_del(&mdb_entry->mdb_node);
+ kfree_rcu(mdb_entry, rcu);
+}
+
+static int __vxlan_mdb_add(const struct vxlan_mdb_config *cfg,
+ struct netlink_ext_ack *extack)
+{
+ struct vxlan_dev *vxlan = cfg->vxlan;
+ struct vxlan_mdb_entry *mdb_entry;
+ int err;
+
+ mdb_entry = vxlan_mdb_entry_get(vxlan, &cfg->group);
+ if (IS_ERR(mdb_entry))
+ return PTR_ERR(mdb_entry);
+
+ err = vxlan_mdb_remote_add(cfg, mdb_entry, extack);
+ if (err)
+ goto err_entry_put;
+
+ vxlan->mdb_seq++;
+
+ return 0;
+
+err_entry_put:
+ vxlan_mdb_entry_put(vxlan, mdb_entry);
+ return err;
+}
+
+static int __vxlan_mdb_del(const struct vxlan_mdb_config *cfg,
+ struct netlink_ext_ack *extack)
+{
+ struct vxlan_dev *vxlan = cfg->vxlan;
+ struct vxlan_mdb_entry *mdb_entry;
+ struct vxlan_mdb_remote *remote;
+
+ mdb_entry = vxlan_mdb_entry_lookup(vxlan, &cfg->group);
+ if (!mdb_entry) {
+ NL_SET_ERR_MSG_MOD(extack, "Did not find MDB entry");
+ return -ENOENT;
+ }
+
+ remote = vxlan_mdb_remote_lookup(mdb_entry, &cfg->remote_ip);
+ if (!remote) {
+ NL_SET_ERR_MSG_MOD(extack, "Did not find MDB remote entry");
+ return -ENOENT;
+ }
+
+ vxlan_mdb_remote_del(vxlan, mdb_entry, remote);
+ vxlan_mdb_entry_put(vxlan, mdb_entry);
+
+ vxlan->mdb_seq++;
+
+ return 0;
+}
+
+int vxlan_mdb_add(struct net_device *dev, struct nlattr *tb[], u16 nlmsg_flags,
+ struct netlink_ext_ack *extack)
+{
+ struct vxlan_mdb_config cfg;
+ int err;
+
+ ASSERT_RTNL();
+
+ err = vxlan_mdb_config_init(&cfg, dev, tb, nlmsg_flags, extack);
+ if (err)
+ return err;
+
+ err = __vxlan_mdb_add(&cfg, extack);
+
+ vxlan_mdb_config_fini(&cfg);
+ return err;
+}
+
+int vxlan_mdb_del(struct net_device *dev, struct nlattr *tb[],
+ struct netlink_ext_ack *extack)
+{
+ struct vxlan_mdb_config cfg;
+ int err;
+
+ ASSERT_RTNL();
+
+ err = vxlan_mdb_config_init(&cfg, dev, tb, 0, extack);
+ if (err)
+ return err;
+
+ err = __vxlan_mdb_del(&cfg, extack);
+
+ vxlan_mdb_config_fini(&cfg);
+ return err;
+}
+
+struct vxlan_mdb_entry *vxlan_mdb_entry_skb_get(struct vxlan_dev *vxlan,
+ struct sk_buff *skb,
+ __be32 src_vni)
+{
+ struct vxlan_mdb_entry *mdb_entry;
+ struct vxlan_mdb_entry_key group;
+
+ if (!is_multicast_ether_addr(eth_hdr(skb)->h_dest) ||
+ is_broadcast_ether_addr(eth_hdr(skb)->h_dest))
+ return NULL;
+
+ /* When not in collect metadata mode, 'src_vni' is zero, but MDB
+ * entries are stored with the VNI of the VXLAN device.
+ */
+ if (!(vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA))
+ src_vni = vxlan->default_dst.remote_vni;
+
+ memset(&group, 0, sizeof(group));
+ group.vni = src_vni;
+
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+ return NULL;
+ group.dst.sa.sa_family = AF_INET;
+ group.dst.sin.sin_addr.s_addr = ip_hdr(skb)->daddr;
+ group.src.sa.sa_family = AF_INET;
+ group.src.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case htons(ETH_P_IPV6):
+ if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+ return NULL;
+ group.dst.sa.sa_family = AF_INET6;
+ group.dst.sin6.sin6_addr = ipv6_hdr(skb)->daddr;
+ group.src.sa.sa_family = AF_INET6;
+ group.src.sin6.sin6_addr = ipv6_hdr(skb)->saddr;
+ break;
+#endif
+ default:
+ return NULL;
+ }
+
+ mdb_entry = vxlan_mdb_entry_lookup(vxlan, &group);
+ if (mdb_entry)
+ return mdb_entry;
+
+ memset(&group.src, 0, sizeof(group.src));
+ mdb_entry = vxlan_mdb_entry_lookup(vxlan, &group);
+ if (mdb_entry)
+ return mdb_entry;
+
+ /* No (S, G) or (*, G) found. Look up the all-zeros entry, but only if
+ * the destination IP address is not link-local multicast since we want
+ * to transmit such traffic together with broadcast and unknown unicast
+ * traffic.
+ */
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ if (ipv4_is_local_multicast(group.dst.sin.sin_addr.s_addr))
+ return NULL;
+ group.dst.sin.sin_addr.s_addr = 0;
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case htons(ETH_P_IPV6):
+ if (ipv6_addr_type(&group.dst.sin6.sin6_addr) &
+ IPV6_ADDR_LINKLOCAL)
+ return NULL;
+ memset(&group.dst.sin6.sin6_addr, 0,
+ sizeof(group.dst.sin6.sin6_addr));
+ break;
+#endif
+ default:
+ return NULL;
+ }
+
+ return vxlan_mdb_entry_lookup(vxlan, &group);
+}
+
+netdev_tx_t vxlan_mdb_xmit(struct vxlan_dev *vxlan,
+ const struct vxlan_mdb_entry *mdb_entry,
+ struct sk_buff *skb)
+{
+ struct vxlan_mdb_remote *remote, *fremote = NULL;
+ __be32 src_vni = mdb_entry->key.vni;
+
+ list_for_each_entry_rcu(remote, &mdb_entry->remotes, list) {
+ struct sk_buff *skb1;
+
+ if ((vxlan_mdb_is_star_g(&mdb_entry->key) &&
+ READ_ONCE(remote->filter_mode) == MCAST_INCLUDE) ||
+ (READ_ONCE(remote->flags) & VXLAN_MDB_REMOTE_F_BLOCKED))
+ continue;
+
+ if (!fremote) {
+ fremote = remote;
+ continue;
+ }
+
+ skb1 = skb_clone(skb, GFP_ATOMIC);
+ if (skb1)
+ vxlan_xmit_one(skb1, vxlan->dev, src_vni,
+ rcu_dereference(remote->rd), false);
+ }
+
+ if (fremote)
+ vxlan_xmit_one(skb, vxlan->dev, src_vni,
+ rcu_dereference(fremote->rd), false);
+ else
+ kfree_skb(skb);
+
+ return NETDEV_TX_OK;
+}
+
+static void vxlan_mdb_check_empty(void *ptr, void *arg)
+{
+ WARN_ON_ONCE(1);
+}
+
+static void vxlan_mdb_remotes_flush(struct vxlan_dev *vxlan,
+ struct vxlan_mdb_entry *mdb_entry)
+{
+ struct vxlan_mdb_remote *remote, *tmp;
+
+ list_for_each_entry_safe(remote, tmp, &mdb_entry->remotes, list)
+ vxlan_mdb_remote_del(vxlan, mdb_entry, remote);
+}
+
+static void vxlan_mdb_entries_flush(struct vxlan_dev *vxlan)
+{
+ struct vxlan_mdb_entry *mdb_entry;
+ struct hlist_node *tmp;
+
+ /* The removal of an entry cannot trigger the removal of another entry
+ * since entries are always added to the head of the list.
+ */
+ hlist_for_each_entry_safe(mdb_entry, tmp, &vxlan->mdb_list, mdb_node) {
+ vxlan_mdb_remotes_flush(vxlan, mdb_entry);
+ vxlan_mdb_entry_put(vxlan, mdb_entry);
+ }
+}
+
+int vxlan_mdb_init(struct vxlan_dev *vxlan)
+{
+ int err;
+
+ err = rhashtable_init(&vxlan->mdb_tbl, &vxlan_mdb_rht_params);
+ if (err)
+ return err;
+
+ INIT_HLIST_HEAD(&vxlan->mdb_list);
+
+ return 0;
+}
+
+void vxlan_mdb_fini(struct vxlan_dev *vxlan)
+{
+ vxlan_mdb_entries_flush(vxlan);
+ WARN_ON_ONCE(vxlan->cfg.flags & VXLAN_F_MDB);
+ rhashtable_free_and_destroy(&vxlan->mdb_tbl, vxlan_mdb_check_empty,
+ NULL);
+}
diff --git a/drivers/net/vxlan/vxlan_private.h b/drivers/net/vxlan/vxlan_private.h
index 599c3b4fdd5e..817fa3075842 100644
--- a/drivers/net/vxlan/vxlan_private.h
+++ b/drivers/net/vxlan/vxlan_private.h
@@ -85,6 +85,39 @@ bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b)
return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr;
}
+static inline int vxlan_nla_get_addr(union vxlan_addr *ip,
+ const struct nlattr *nla)
+{
+ if (nla_len(nla) >= sizeof(struct in6_addr)) {
+ ip->sin6.sin6_addr = nla_get_in6_addr(nla);
+ ip->sa.sa_family = AF_INET6;
+ return 0;
+ } else if (nla_len(nla) >= sizeof(__be32)) {
+ ip->sin.sin_addr.s_addr = nla_get_in_addr(nla);
+ ip->sa.sa_family = AF_INET;
+ return 0;
+ } else {
+ return -EAFNOSUPPORT;
+ }
+}
+
+static inline int vxlan_nla_put_addr(struct sk_buff *skb, int attr,
+ const union vxlan_addr *ip)
+{
+ if (ip->sa.sa_family == AF_INET6)
+ return nla_put_in6_addr(skb, attr, &ip->sin6.sin6_addr);
+ else
+ return nla_put_in_addr(skb, attr, ip->sin.sin_addr.s_addr);
+}
+
+static inline bool vxlan_addr_is_multicast(const union vxlan_addr *ip)
+{
+ if (ip->sa.sa_family == AF_INET6)
+ return ipv6_addr_is_multicast(&ip->sin6.sin6_addr);
+ else
+ return ipv4_is_multicast(ip->sin.sin_addr.s_addr);
+}
+
#else /* !CONFIG_IPV6 */
static inline
@@ -93,8 +126,41 @@ bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b)
return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr;
}
+static inline int vxlan_nla_get_addr(union vxlan_addr *ip,
+ const struct nlattr *nla)
+{
+ if (nla_len(nla) >= sizeof(struct in6_addr)) {
+ return -EAFNOSUPPORT;
+ } else if (nla_len(nla) >= sizeof(__be32)) {
+ ip->sin.sin_addr.s_addr = nla_get_in_addr(nla);
+ ip->sa.sa_family = AF_INET;
+ return 0;
+ } else {
+ return -EAFNOSUPPORT;
+ }
+}
+
+static inline int vxlan_nla_put_addr(struct sk_buff *skb, int attr,
+ const union vxlan_addr *ip)
+{
+ return nla_put_in_addr(skb, attr, ip->sin.sin_addr.s_addr);
+}
+
+static inline bool vxlan_addr_is_multicast(const union vxlan_addr *ip)
+{
+ return ipv4_is_multicast(ip->sin.sin_addr.s_addr);
+}
+
#endif
+static inline size_t vxlan_addr_size(const union vxlan_addr *ip)
+{
+ if (ip->sa.sa_family == AF_INET6)
+ return sizeof(struct in6_addr);
+ else
+ return sizeof(__be32);
+}
+
static inline struct vxlan_vni_node *
vxlan_vnifilter_lookup(struct vxlan_dev *vxlan, __be32 vni)
{
@@ -127,6 +193,8 @@ int vxlan_fdb_update(struct vxlan_dev *vxlan,
__be16 port, __be32 src_vni, __be32 vni,
__u32 ifindex, __u16 ndm_flags, u32 nhid,
bool swdev_notify, struct netlink_ext_ack *extack);
+void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
+ __be32 default_vni, struct vxlan_rdst *rdst, bool did_rsc);
int vxlan_vni_in_use(struct net *src_net, struct vxlan_dev *vxlan,
struct vxlan_config *conf, __be32 vni);
@@ -159,4 +227,20 @@ int vxlan_igmp_join(struct vxlan_dev *vxlan, union vxlan_addr *rip,
int rifindex);
int vxlan_igmp_leave(struct vxlan_dev *vxlan, union vxlan_addr *rip,
int rifindex);
+
+/* vxlan_mdb.c */
+int vxlan_mdb_dump(struct net_device *dev, struct sk_buff *skb,
+ struct netlink_callback *cb);
+int vxlan_mdb_add(struct net_device *dev, struct nlattr *tb[], u16 nlmsg_flags,
+ struct netlink_ext_ack *extack);
+int vxlan_mdb_del(struct net_device *dev, struct nlattr *tb[],
+ struct netlink_ext_ack *extack);
+struct vxlan_mdb_entry *vxlan_mdb_entry_skb_get(struct vxlan_dev *vxlan,
+ struct sk_buff *skb,
+ __be32 src_vni);
+netdev_tx_t vxlan_mdb_xmit(struct vxlan_dev *vxlan,
+ const struct vxlan_mdb_entry *mdb_entry,
+ struct sk_buff *skb);
+int vxlan_mdb_init(struct vxlan_dev *vxlan);
+void vxlan_mdb_fini(struct vxlan_dev *vxlan);
#endif
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/Makefile b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/Makefile
index 0e996cf24f88..dc6d27a36faa 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/Makefile
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/Makefile
@@ -48,6 +48,8 @@ brcmfmac-$(CONFIG_OF) += \
of.o
brcmfmac-$(CONFIG_DMI) += \
dmi.o
+brcmfmac-$(CONFIG_ACPI) += \
+ acpi.o
ifeq ($(CONFIG_BRCMFMAC),m)
obj-m += wcc/
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/acpi.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/acpi.c
new file mode 100644
index 000000000000..c4a54861bfb4
--- /dev/null
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/acpi.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: ISC
+/*
+ * Copyright The Asahi Linux Contributors
+ */
+
+#include <linux/acpi.h>
+#include "debug.h"
+#include "core.h"
+#include "common.h"
+
+void brcmf_acpi_probe(struct device *dev, enum brcmf_bus_type bus_type,
+ struct brcmf_mp_device *settings)
+{
+ acpi_status status;
+ const union acpi_object *o;
+ struct acpi_buffer buf = {ACPI_ALLOCATE_BUFFER, NULL};
+ struct acpi_device *adev = ACPI_COMPANION(dev);
+
+ if (!adev)
+ return;
+
+ if (!ACPI_FAILURE(acpi_dev_get_property(adev, "module-instance",
+ ACPI_TYPE_STRING, &o))) {
+ brcmf_dbg(INFO, "ACPI module-instance=%s\n", o->string.pointer);
+ settings->board_type = devm_kasprintf(dev, GFP_KERNEL,
+ "apple,%s",
+ o->string.pointer);
+ } else {
+ brcmf_dbg(INFO, "No ACPI module-instance\n");
+ return;
+ }
+
+ status = acpi_evaluate_object(adev->handle, "RWCV", NULL, &buf);
+ o = buf.pointer;
+ if (!ACPI_FAILURE(status) && o && o->type == ACPI_TYPE_BUFFER &&
+ o->buffer.length >= 2) {
+ char *antenna_sku = devm_kzalloc(dev, 3, GFP_KERNEL);
+
+ if (antenna_sku) {
+ memcpy(antenna_sku, o->buffer.pointer, 2);
+ brcmf_dbg(INFO, "ACPI RWCV data=%*phN antenna-sku=%s\n",
+ (int)o->buffer.length, o->buffer.pointer,
+ antenna_sku);
+ settings->antenna_sku = antenna_sku;
+ }
+
+ kfree(buf.pointer);
+ } else {
+ brcmf_dbg(INFO, "No ACPI antenna-sku\n");
+ }
+}
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bus.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bus.h
index 501136e011b5..fe31051a9e11 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bus.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bus.h
@@ -55,6 +55,7 @@ enum brcmf_bus_protocol_type {
/* Firmware blobs that may be available */
enum brcmf_blob_type {
BRCMF_BLOB_CLM,
+ BRCMF_BLOB_TXCAP,
};
struct brcmf_mp_device;
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
index a9690ec4c850..e0a70a671550 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
@@ -1039,12 +1039,134 @@ void brcmf_set_mpc(struct brcmf_if *ifp, int mpc)
}
}
+static void brcmf_scan_params_v2_to_v1(struct brcmf_scan_params_v2_le *params_v2_le,
+ struct brcmf_scan_params_le *params_le)
+{
+ size_t params_size;
+ u32 ch;
+ int n_channels, n_ssids;
+
+ memcpy(&params_le->ssid_le, &params_v2_le->ssid_le,
+ sizeof(params_le->ssid_le));
+ memcpy(&params_le->bssid, &params_v2_le->bssid,
+ sizeof(params_le->bssid));
+
+ params_le->bss_type = params_v2_le->bss_type;
+ params_le->scan_type = le32_to_cpu(params_v2_le->scan_type);
+ params_le->nprobes = params_v2_le->nprobes;
+ params_le->active_time = params_v2_le->active_time;
+ params_le->passive_time = params_v2_le->passive_time;
+ params_le->home_time = params_v2_le->home_time;
+ params_le->channel_num = params_v2_le->channel_num;
+
+ ch = le32_to_cpu(params_v2_le->channel_num);
+ n_channels = ch & BRCMF_SCAN_PARAMS_COUNT_MASK;
+ n_ssids = ch >> BRCMF_SCAN_PARAMS_NSSID_SHIFT;
+
+ params_size = sizeof(u16) * n_channels;
+ if (n_ssids > 0) {
+ params_size = roundup(params_size, sizeof(u32));
+ params_size += sizeof(struct brcmf_ssid_le) * n_ssids;
+ }
+
+ memcpy(&params_le->channel_list[0],
+ &params_v2_le->channel_list[0], params_size);
+}
+
+static void brcmf_escan_prep(struct brcmf_cfg80211_info *cfg,
+ struct brcmf_scan_params_v2_le *params_le,
+ struct cfg80211_scan_request *request)
+{
+ u32 n_ssids;
+ u32 n_channels;
+ s32 i;
+ s32 offset;
+ u16 chanspec;
+ char *ptr;
+ int length;
+ struct brcmf_ssid_le ssid_le;
+
+ eth_broadcast_addr(params_le->bssid);
+
+ length = BRCMF_SCAN_PARAMS_V2_FIXED_SIZE;
+
+ params_le->version = cpu_to_le16(BRCMF_SCAN_PARAMS_VERSION_V2);
+ params_le->bss_type = DOT11_BSSTYPE_ANY;
+ params_le->scan_type = cpu_to_le32(BRCMF_SCANTYPE_ACTIVE);
+ params_le->channel_num = 0;
+ params_le->nprobes = cpu_to_le32(-1);
+ params_le->active_time = cpu_to_le32(-1);
+ params_le->passive_time = cpu_to_le32(-1);
+ params_le->home_time = cpu_to_le32(-1);
+ memset(&params_le->ssid_le, 0, sizeof(params_le->ssid_le));
+
+ /* Scan abort */
+ if (!request) {
+ length += sizeof(u16);
+ params_le->channel_num = cpu_to_le32(1);
+ params_le->channel_list[0] = cpu_to_le16(-1);
+ params_le->length = cpu_to_le16(length);
+ return;
+ }
+
+ n_ssids = request->n_ssids;
+ n_channels = request->n_channels;
+
+ /* Copy channel array if applicable */
+ brcmf_dbg(SCAN, "### List of channelspecs to scan ### %d\n",
+ n_channels);
+ if (n_channels > 0) {
+ length += roundup(sizeof(u16) * n_channels, sizeof(u32));
+ for (i = 0; i < n_channels; i++) {
+ chanspec = channel_to_chanspec(&cfg->d11inf,
+ request->channels[i]);
+ brcmf_dbg(SCAN, "Chan : %d, Channel spec: %x\n",
+ request->channels[i]->hw_value, chanspec);
+ params_le->channel_list[i] = cpu_to_le16(chanspec);
+ }
+ } else {
+ brcmf_dbg(SCAN, "Scanning all channels\n");
+ }
+
+ /* Copy ssid array if applicable */
+ brcmf_dbg(SCAN, "### List of SSIDs to scan ### %d\n", n_ssids);
+ if (n_ssids > 0) {
+ offset = offsetof(struct brcmf_scan_params_v2_le, channel_list) +
+ n_channels * sizeof(u16);
+ offset = roundup(offset, sizeof(u32));
+ length += sizeof(ssid_le) * n_ssids,
+ ptr = (char *)params_le + offset;
+ for (i = 0; i < n_ssids; i++) {
+ memset(&ssid_le, 0, sizeof(ssid_le));
+ ssid_le.SSID_len =
+ cpu_to_le32(request->ssids[i].ssid_len);
+ memcpy(ssid_le.SSID, request->ssids[i].ssid,
+ request->ssids[i].ssid_len);
+ if (!ssid_le.SSID_len)
+ brcmf_dbg(SCAN, "%d: Broadcast scan\n", i);
+ else
+ brcmf_dbg(SCAN, "%d: scan for %.32s size=%d\n",
+ i, ssid_le.SSID, ssid_le.SSID_len);
+ memcpy(ptr, &ssid_le, sizeof(ssid_le));
+ ptr += sizeof(ssid_le);
+ }
+ } else {
+ brcmf_dbg(SCAN, "Performing passive scan\n");
+ params_le->scan_type = cpu_to_le32(BRCMF_SCANTYPE_PASSIVE);
+ }
+ params_le->length = cpu_to_le16(length);
+ /* Adding mask to channel numbers */
+ params_le->channel_num =
+ cpu_to_le32((n_ssids << BRCMF_SCAN_PARAMS_NSSID_SHIFT) |
+ (n_channels & BRCMF_SCAN_PARAMS_COUNT_MASK));
+}
+
s32 brcmf_notify_escan_complete(struct brcmf_cfg80211_info *cfg,
struct brcmf_if *ifp, bool aborted,
bool fw_abort)
{
struct brcmf_pub *drvr = cfg->pub;
- struct brcmf_scan_params_le params_le;
+ struct brcmf_scan_params_v2_le params_v2_le;
struct cfg80211_scan_request *scan_request;
u64 reqid;
u32 bucket;
@@ -1063,20 +1185,23 @@ s32 brcmf_notify_escan_complete(struct brcmf_cfg80211_info *cfg,
if (fw_abort) {
/* Do a scan abort to stop the driver's scan engine */
brcmf_dbg(SCAN, "ABORT scan in firmware\n");
- memset(&params_le, 0, sizeof(params_le));
- eth_broadcast_addr(params_le.bssid);
- params_le.bss_type = DOT11_BSSTYPE_ANY;
- params_le.scan_type = 0;
- params_le.channel_num = cpu_to_le32(1);
- params_le.nprobes = cpu_to_le32(1);
- params_le.active_time = cpu_to_le32(-1);
- params_le.passive_time = cpu_to_le32(-1);
- params_le.home_time = cpu_to_le32(-1);
- /* Scan is aborted by setting channel_list[0] to -1 */
- params_le.channel_list[0] = cpu_to_le16(-1);
+
+ brcmf_escan_prep(cfg, &params_v2_le, NULL);
+
/* E-Scan (or anyother type) can be aborted by SCAN */
- err = brcmf_fil_cmd_data_set(ifp, BRCMF_C_SCAN,
- &params_le, sizeof(params_le));
+ if (brcmf_feat_is_enabled(ifp, BRCMF_FEAT_SCAN_V2)) {
+ err = brcmf_fil_cmd_data_set(ifp, BRCMF_C_SCAN,
+ &params_v2_le,
+ sizeof(params_v2_le));
+ } else {
+ struct brcmf_scan_params_le params_le;
+
+ brcmf_scan_params_v2_to_v1(&params_v2_le, &params_le);
+ err = brcmf_fil_cmd_data_set(ifp, BRCMF_C_SCAN,
+ &params_le,
+ sizeof(params_le));
+ }
+
if (err)
bphy_err(drvr, "Scan abort failed\n");
}
@@ -1295,83 +1420,13 @@ done:
return err;
}
-static void brcmf_escan_prep(struct brcmf_cfg80211_info *cfg,
- struct brcmf_scan_params_le *params_le,
- struct cfg80211_scan_request *request)
-{
- u32 n_ssids;
- u32 n_channels;
- s32 i;
- s32 offset;
- u16 chanspec;
- char *ptr;
- struct brcmf_ssid_le ssid_le;
-
- eth_broadcast_addr(params_le->bssid);
- params_le->bss_type = DOT11_BSSTYPE_ANY;
- params_le->scan_type = BRCMF_SCANTYPE_ACTIVE;
- params_le->channel_num = 0;
- params_le->nprobes = cpu_to_le32(-1);
- params_le->active_time = cpu_to_le32(-1);
- params_le->passive_time = cpu_to_le32(-1);
- params_le->home_time = cpu_to_le32(-1);
- memset(&params_le->ssid_le, 0, sizeof(params_le->ssid_le));
-
- n_ssids = request->n_ssids;
- n_channels = request->n_channels;
-
- /* Copy channel array if applicable */
- brcmf_dbg(SCAN, "### List of channelspecs to scan ### %d\n",
- n_channels);
- if (n_channels > 0) {
- for (i = 0; i < n_channels; i++) {
- chanspec = channel_to_chanspec(&cfg->d11inf,
- request->channels[i]);
- brcmf_dbg(SCAN, "Chan : %d, Channel spec: %x\n",
- request->channels[i]->hw_value, chanspec);
- params_le->channel_list[i] = cpu_to_le16(chanspec);
- }
- } else {
- brcmf_dbg(SCAN, "Scanning all channels\n");
- }
- /* Copy ssid array if applicable */
- brcmf_dbg(SCAN, "### List of SSIDs to scan ### %d\n", n_ssids);
- if (n_ssids > 0) {
- offset = offsetof(struct brcmf_scan_params_le, channel_list) +
- n_channels * sizeof(u16);
- offset = roundup(offset, sizeof(u32));
- ptr = (char *)params_le + offset;
- for (i = 0; i < n_ssids; i++) {
- memset(&ssid_le, 0, sizeof(ssid_le));
- ssid_le.SSID_len =
- cpu_to_le32(request->ssids[i].ssid_len);
- memcpy(ssid_le.SSID, request->ssids[i].ssid,
- request->ssids[i].ssid_len);
- if (!ssid_le.SSID_len)
- brcmf_dbg(SCAN, "%d: Broadcast scan\n", i);
- else
- brcmf_dbg(SCAN, "%d: scan for %.32s size=%d\n",
- i, ssid_le.SSID, ssid_le.SSID_len);
- memcpy(ptr, &ssid_le, sizeof(ssid_le));
- ptr += sizeof(ssid_le);
- }
- } else {
- brcmf_dbg(SCAN, "Performing passive scan\n");
- params_le->scan_type = BRCMF_SCANTYPE_PASSIVE;
- }
- /* Adding mask to channel numbers */
- params_le->channel_num =
- cpu_to_le32((n_ssids << BRCMF_SCAN_PARAMS_NSSID_SHIFT) |
- (n_channels & BRCMF_SCAN_PARAMS_COUNT_MASK));
-}
-
static s32
brcmf_run_escan(struct brcmf_cfg80211_info *cfg, struct brcmf_if *ifp,
struct cfg80211_scan_request *request)
{
struct brcmf_pub *drvr = cfg->pub;
- s32 params_size = BRCMF_SCAN_PARAMS_FIXED_SIZE +
- offsetof(struct brcmf_escan_params_le, params_le);
+ s32 params_size = BRCMF_SCAN_PARAMS_V2_FIXED_SIZE +
+ offsetof(struct brcmf_escan_params_le, params_v2_le);
struct brcmf_escan_params_le *params;
s32 err = 0;
@@ -1391,8 +1446,22 @@ brcmf_run_escan(struct brcmf_cfg80211_info *cfg, struct brcmf_if *ifp,
goto exit;
}
BUG_ON(params_size + sizeof("escan") >= BRCMF_DCMD_MEDLEN);
- brcmf_escan_prep(cfg, &params->params_le, request);
- params->version = cpu_to_le32(BRCMF_ESCAN_REQ_VERSION);
+ brcmf_escan_prep(cfg, &params->params_v2_le, request);
+
+ params->version = cpu_to_le32(BRCMF_ESCAN_REQ_VERSION_V2);
+
+ if (!brcmf_feat_is_enabled(ifp, BRCMF_FEAT_SCAN_V2)) {
+ struct brcmf_escan_params_le *params_v1;
+
+ params_size -= BRCMF_SCAN_PARAMS_V2_FIXED_SIZE;
+ params_size += BRCMF_SCAN_PARAMS_FIXED_SIZE;
+ params_v1 = kzalloc(params_size, GFP_KERNEL);
+ params_v1->version = cpu_to_le32(BRCMF_ESCAN_REQ_VERSION);
+ brcmf_scan_params_v2_to_v1(&params->params_v2_le, &params_v1->params_le);
+ kfree(params);
+ params = params_v1;
+ }
+
params->action = cpu_to_le16(WL_ESCAN_ACTION_START);
params->sync_id = cpu_to_le16(0x1234);
@@ -1617,13 +1686,14 @@ static int brcmf_set_pmk(struct brcmf_if *ifp, const u8 *pmk_data, u16 pmk_len)
{
struct brcmf_pub *drvr = ifp->drvr;
struct brcmf_wsec_pmk_le pmk;
- int i, err;
+ int err;
+
+ memset(&pmk, 0, sizeof(pmk));
- /* convert to firmware key format */
- pmk.key_len = cpu_to_le16(pmk_len << 1);
- pmk.flags = cpu_to_le16(BRCMF_WSEC_PASSPHRASE);
- for (i = 0; i < pmk_len; i++)
- snprintf(&pmk.key[2 * i], 3, "%02x", pmk_data[i]);
+ /* pass pmk directly */
+ pmk.key_len = cpu_to_le16(pmk_len);
+ pmk.flags = cpu_to_le16(0);
+ memcpy(pmk.key, pmk_data, pmk_len);
/* store psk in firmware */
err = brcmf_fil_cmd_data_set(ifp, BRCMF_C_SET_WSEC_PMK,
@@ -4237,6 +4307,37 @@ exit:
return 0;
}
+static s32
+brcmf_pmksa_v3_op(struct brcmf_if *ifp, struct cfg80211_pmksa *pmksa,
+ bool alive)
+{
+ struct brcmf_pmk_op_v3_le *pmk_op;
+ int length = offsetof(struct brcmf_pmk_op_v3_le, pmk);
+ int ret;
+
+ pmk_op = kzalloc(sizeof(*pmk_op), GFP_KERNEL);
+ pmk_op->version = cpu_to_le16(BRCMF_PMKSA_VER_3);
+
+ if (!pmksa) {
+ /* Flush operation, operate on entire list */
+ pmk_op->count = cpu_to_le16(0);
+ } else {
+ /* Single PMK operation */
+ pmk_op->count = cpu_to_le16(1);
+ length += sizeof(struct brcmf_pmksa_v3);
+ memcpy(pmk_op->pmk[0].bssid, pmksa->bssid, ETH_ALEN);
+ memcpy(pmk_op->pmk[0].pmkid, pmksa->pmkid, WLAN_PMKID_LEN);
+ pmk_op->pmk[0].pmkid_len = WLAN_PMKID_LEN;
+ pmk_op->pmk[0].time_left = cpu_to_le32(alive ? BRCMF_PMKSA_NO_EXPIRY : 0);
+ }
+
+ pmk_op->length = cpu_to_le16(length);
+
+ ret = brcmf_fil_iovar_data_set(ifp, "pmkid_info", pmk_op, sizeof(*pmk_op));
+ kfree(pmk_op);
+ return ret;
+}
+
static __used s32
brcmf_update_pmklist(struct brcmf_cfg80211_info *cfg, struct brcmf_if *ifp)
{
@@ -4270,6 +4371,14 @@ brcmf_cfg80211_set_pmksa(struct wiphy *wiphy, struct net_device *ndev,
if (!check_vif_up(ifp->vif))
return -EIO;
+ brcmf_dbg(CONN, "set_pmksa - PMK bssid: %pM =\n", pmksa->bssid);
+ brcmf_dbg(CONN, "%*ph\n", WLAN_PMKID_LEN, pmksa->pmkid);
+
+ if (brcmf_feat_is_enabled(ifp, BRCMF_FEAT_PMKID_V3))
+ return brcmf_pmksa_v3_op(ifp, pmksa, true);
+
+ /* TODO: implement PMKID_V2 */
+
npmk = le32_to_cpu(cfg->pmk_list.npmk);
for (i = 0; i < npmk; i++)
if (!memcmp(pmksa->bssid, pmk[i].bssid, ETH_ALEN))
@@ -4286,9 +4395,6 @@ brcmf_cfg80211_set_pmksa(struct wiphy *wiphy, struct net_device *ndev,
return -EINVAL;
}
- brcmf_dbg(CONN, "set_pmksa - PMK bssid: %pM =\n", pmk[npmk].bssid);
- brcmf_dbg(CONN, "%*ph\n", WLAN_PMKID_LEN, pmk[npmk].pmkid);
-
err = brcmf_update_pmklist(cfg, ifp);
brcmf_dbg(TRACE, "Exit\n");
@@ -4312,6 +4418,11 @@ brcmf_cfg80211_del_pmksa(struct wiphy *wiphy, struct net_device *ndev,
brcmf_dbg(CONN, "del_pmksa - PMK bssid = %pM\n", pmksa->bssid);
+ if (brcmf_feat_is_enabled(ifp, BRCMF_FEAT_PMKID_V3))
+ return brcmf_pmksa_v3_op(ifp, pmksa, false);
+
+ /* TODO: implement PMKID_V2 */
+
npmk = le32_to_cpu(cfg->pmk_list.npmk);
for (i = 0; i < npmk; i++)
if (!memcmp(pmksa->bssid, pmk[i].bssid, ETH_ALEN))
@@ -4348,6 +4459,11 @@ brcmf_cfg80211_flush_pmksa(struct wiphy *wiphy, struct net_device *ndev)
if (!check_vif_up(ifp->vif))
return -EIO;
+ if (brcmf_feat_is_enabled(ifp, BRCMF_FEAT_PMKID_V3))
+ return brcmf_pmksa_v3_op(ifp, NULL, false);
+
+ /* TODO: implement PMKID_V2 */
+
memset(&cfg->pmk_list, 0, sizeof(cfg->pmk_list));
err = brcmf_update_pmklist(cfg, ifp);
@@ -6489,18 +6605,20 @@ static s32 brcmf_notify_rssi(struct brcmf_if *ifp,
{
struct brcmf_cfg80211_vif *vif = ifp->vif;
struct brcmf_rssi_be *info = data;
- s32 rssi, snr, noise;
+ s32 rssi, snr = 0, noise = 0;
s32 low, high, last;
- if (e->datalen < sizeof(*info)) {
+ if (e->datalen >= sizeof(*info)) {
+ rssi = be32_to_cpu(info->rssi);
+ snr = be32_to_cpu(info->snr);
+ noise = be32_to_cpu(info->noise);
+ } else if (e->datalen >= sizeof(rssi)) {
+ rssi = be32_to_cpu(*(__be32 *)data);
+ } else {
brcmf_err("insufficient RSSI event data\n");
return 0;
}
- rssi = be32_to_cpu(info->rssi);
- snr = be32_to_cpu(info->snr);
- noise = be32_to_cpu(info->noise);
-
low = vif->cqm_rssi_low;
high = vif->cqm_rssi_high;
last = vif->cqm_rssi_last;
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c
index 8073f31be27d..9f9bf08a70bb 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c
@@ -212,8 +212,9 @@ struct sbsocramregs {
#define ARMCR4_TCBANB_MASK 0xf
#define ARMCR4_TCBANB_SHIFT 0
-#define ARMCR4_BSZ_MASK 0x3f
+#define ARMCR4_BSZ_MASK 0x7f
#define ARMCR4_BSZ_MULT 8192
+#define ARMCR4_BLK_1K_MASK 0x200
struct brcmf_core_priv {
struct brcmf_core pub;
@@ -684,6 +685,7 @@ static u32 brcmf_chip_tcm_ramsize(struct brcmf_core_priv *cr4)
u32 nbb;
u32 totb;
u32 bxinfo;
+ u32 blksize;
u32 idx;
corecap = brcmf_chip_core_read32(cr4, ARMCR4_CAP);
@@ -695,7 +697,11 @@ static u32 brcmf_chip_tcm_ramsize(struct brcmf_core_priv *cr4)
for (idx = 0; idx < totb; idx++) {
brcmf_chip_core_write32(cr4, ARMCR4_BANKIDX, idx);
bxinfo = brcmf_chip_core_read32(cr4, ARMCR4_BANKINFO);
- memsize += ((bxinfo & ARMCR4_BSZ_MASK) + 1) * ARMCR4_BSZ_MULT;
+ blksize = ARMCR4_BSZ_MULT;
+ if (bxinfo & ARMCR4_BLK_1K_MASK)
+ blksize >>= 3;
+
+ memsize += ((bxinfo & ARMCR4_BSZ_MASK) + 1) * blksize;
}
return memsize;
@@ -737,6 +743,8 @@ static u32 brcmf_chip_tcm_rambase(struct brcmf_chip_priv *ci)
return 0x170000;
case BRCM_CC_4378_CHIP_ID:
return 0x352000;
+ case BRCM_CC_4387_CHIP_ID:
+ return 0x740000;
default:
brcmf_err("unknown chip: %s\n", ci->pub.name);
break;
@@ -1292,15 +1300,18 @@ static bool brcmf_chip_cm3_set_active(struct brcmf_chip_priv *chip)
static inline void
brcmf_chip_cr4_set_passive(struct brcmf_chip_priv *chip)
{
+ int i;
struct brcmf_core *core;
brcmf_chip_disable_arm(chip, BCMA_CORE_ARM_CR4);
- core = brcmf_chip_get_core(&chip->pub, BCMA_CORE_80211);
- brcmf_chip_resetcore(core, D11_BCMA_IOCTL_PHYRESET |
- D11_BCMA_IOCTL_PHYCLOCKEN,
- D11_BCMA_IOCTL_PHYCLOCKEN,
- D11_BCMA_IOCTL_PHYCLOCKEN);
+ /* Disable the cores only and let the firmware enable them.
+ * Releasing reset ourselves breaks BCM4387 in weird ways.
+ */
+ for (i = 0; (core = brcmf_chip_get_d11core(&chip->pub, i)); i++)
+ brcmf_chip_coredisable(core, D11_BCMA_IOCTL_PHYRESET |
+ D11_BCMA_IOCTL_PHYCLOCKEN,
+ D11_BCMA_IOCTL_PHYCLOCKEN);
}
static bool brcmf_chip_cr4_set_active(struct brcmf_chip_priv *chip, u32 rstvec)
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
index f235beaddddb..a194b0e68eb5 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
@@ -101,7 +101,7 @@ void brcmf_c_set_joinpref_default(struct brcmf_if *ifp)
static int brcmf_c_download(struct brcmf_if *ifp, u16 flag,
struct brcmf_dload_data_le *dload_buf,
- u32 len)
+ u32 len, const char *var)
{
s32 err;
@@ -111,18 +111,18 @@ static int brcmf_c_download(struct brcmf_if *ifp, u16 flag,
dload_buf->len = cpu_to_le32(len);
dload_buf->crc = cpu_to_le32(0);
- err = brcmf_fil_iovar_data_set(ifp, "clmload", dload_buf,
+ err = brcmf_fil_iovar_data_set(ifp, var, dload_buf,
struct_size(dload_buf, data, len));
return err;
}
-static int brcmf_c_process_clm_blob(struct brcmf_if *ifp)
+static int brcmf_c_download_blob(struct brcmf_if *ifp,
+ const void *data, size_t size,
+ const char *loadvar, const char *statvar)
{
struct brcmf_pub *drvr = ifp->drvr;
- struct brcmf_bus *bus = drvr->bus_if;
struct brcmf_dload_data_le *chunk_buf;
- const struct firmware *clm = NULL;
u32 chunk_len;
u32 datalen;
u32 cumulative_len;
@@ -132,21 +132,14 @@ static int brcmf_c_process_clm_blob(struct brcmf_if *ifp)
brcmf_dbg(TRACE, "Enter\n");
- err = brcmf_bus_get_blob(bus, &clm, BRCMF_BLOB_CLM);
- if (err || !clm) {
- brcmf_info("no clm_blob available (err=%d), device may have limited channels available\n",
- err);
- return 0;
- }
-
chunk_buf = kzalloc(struct_size(chunk_buf, data, MAX_CHUNK_LEN),
GFP_KERNEL);
if (!chunk_buf) {
err = -ENOMEM;
- goto done;
+ return -ENOMEM;
}
- datalen = clm->size;
+ datalen = size;
cumulative_len = 0;
do {
if (datalen > MAX_CHUNK_LEN) {
@@ -155,9 +148,10 @@ static int brcmf_c_process_clm_blob(struct brcmf_if *ifp)
chunk_len = datalen;
dl_flag |= DL_END;
}
- memcpy(chunk_buf->data, clm->data + cumulative_len, chunk_len);
+ memcpy(chunk_buf->data, data + cumulative_len, chunk_len);
- err = brcmf_c_download(ifp, dl_flag, chunk_buf, chunk_len);
+ err = brcmf_c_download(ifp, dl_flag, chunk_buf, chunk_len,
+ loadvar);
dl_flag &= ~DL_BEGIN;
@@ -166,20 +160,64 @@ static int brcmf_c_process_clm_blob(struct brcmf_if *ifp)
} while ((datalen > 0) && (err == 0));
if (err) {
- bphy_err(drvr, "clmload (%zu byte file) failed (%d)\n",
- clm->size, err);
- /* Retrieve clmload_status and print */
- err = brcmf_fil_iovar_int_get(ifp, "clmload_status", &status);
+ bphy_err(drvr, "%s (%zu byte file) failed (%d)\n",
+ loadvar, size, err);
+ /* Retrieve status and print */
+ err = brcmf_fil_iovar_int_get(ifp, statvar, &status);
if (err)
- bphy_err(drvr, "get clmload_status failed (%d)\n", err);
+ bphy_err(drvr, "get %s failed (%d)\n", statvar, err);
else
- brcmf_dbg(INFO, "clmload_status=%d\n", status);
+ brcmf_dbg(INFO, "%s=%d\n", statvar, status);
err = -EIO;
}
kfree(chunk_buf);
-done:
- release_firmware(clm);
+ return err;
+}
+
+static int brcmf_c_process_clm_blob(struct brcmf_if *ifp)
+{
+ struct brcmf_pub *drvr = ifp->drvr;
+ struct brcmf_bus *bus = drvr->bus_if;
+ const struct firmware *fw = NULL;
+ s32 err;
+
+ brcmf_dbg(TRACE, "Enter\n");
+
+ err = brcmf_bus_get_blob(bus, &fw, BRCMF_BLOB_CLM);
+ if (err || !fw) {
+ brcmf_info("no clm_blob available (err=%d), device may have limited channels available\n",
+ err);
+ return 0;
+ }
+
+ err = brcmf_c_download_blob(ifp, fw->data, fw->size,
+ "clmload", "clmload_status");
+
+ release_firmware(fw);
+ return err;
+}
+
+static int brcmf_c_process_txcap_blob(struct brcmf_if *ifp)
+{
+ struct brcmf_pub *drvr = ifp->drvr;
+ struct brcmf_bus *bus = drvr->bus_if;
+ const struct firmware *fw = NULL;
+ s32 err;
+
+ brcmf_dbg(TRACE, "Enter\n");
+
+ err = brcmf_bus_get_blob(bus, &fw, BRCMF_BLOB_TXCAP);
+ if (err || !fw) {
+ brcmf_info("no txcap_blob available (err=%d)\n", err);
+ return 0;
+ }
+
+ brcmf_info("TxCap blob found, loading\n");
+ err = brcmf_c_download_blob(ifp, fw->data, fw->size,
+ "txcapload", "txcapload_status");
+
+ release_firmware(fw);
return err;
}
@@ -208,6 +246,23 @@ static const u8 brcmf_default_mac_address[ETH_ALEN] = {
0x00, 0x90, 0x4c, 0xc5, 0x12, 0x38
};
+static int brcmf_c_process_cal_blob(struct brcmf_if *ifp)
+{
+ struct brcmf_pub *drvr = ifp->drvr;
+ struct brcmf_mp_device *settings = drvr->settings;
+ s32 err;
+
+ brcmf_dbg(TRACE, "Enter\n");
+
+ if (!settings->cal_blob || !settings->cal_size)
+ return 0;
+
+ brcmf_info("Calibration blob provided by platform, loading\n");
+ err = brcmf_c_download_blob(ifp, settings->cal_blob, settings->cal_size,
+ "calload", "calload_status");
+ return err;
+}
+
int brcmf_c_preinit_dcmds(struct brcmf_if *ifp)
{
struct brcmf_pub *drvr = ifp->drvr;
@@ -291,6 +346,20 @@ int brcmf_c_preinit_dcmds(struct brcmf_if *ifp)
goto done;
}
+ /* Do TxCap downloading, if needed */
+ err = brcmf_c_process_txcap_blob(ifp);
+ if (err < 0) {
+ bphy_err(drvr, "download TxCap blob file failed, %d\n", err);
+ goto done;
+ }
+
+ /* Download external calibration blob, if available */
+ err = brcmf_c_process_cal_blob(ifp);
+ if (err < 0) {
+ bphy_err(drvr, "download calibration blob file failed, %d\n", err);
+ goto done;
+ }
+
/* query for 'ver' to get version info from firmware */
memset(buf, 0, sizeof(buf));
err = brcmf_fil_iovar_data_get(ifp, "ver", buf, sizeof(buf));
@@ -487,6 +556,7 @@ struct brcmf_mp_device *brcmf_get_module_param(struct device *dev,
/* No platform data for this device, try OF and DMI data */
brcmf_dmi_probe(settings, chip, chiprev);
brcmf_of_probe(dev, bus_type, settings);
+ brcmf_acpi_probe(dev, bus_type, settings);
}
return settings;
}
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.h
index aa25abffcc7d..2be2986d2110 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.h
@@ -54,6 +54,8 @@ struct brcmf_mp_device {
const char *board_type;
unsigned char mac[ETH_ALEN];
const char *antenna_sku;
+ const void *cal_blob;
+ int cal_size;
union {
struct brcmfmac_sdio_pd sdio;
} bus;
@@ -77,6 +79,15 @@ static inline void
brcmf_dmi_probe(struct brcmf_mp_device *settings, u32 chip, u32 chiprev) {}
#endif
+#ifdef CONFIG_ACPI
+void brcmf_acpi_probe(struct device *dev, enum brcmf_bus_type bus_type,
+ struct brcmf_mp_device *settings);
+#else
+static inline void brcmf_acpi_probe(struct device *dev,
+ enum brcmf_bus_type bus_type,
+ struct brcmf_mp_device *settings) {}
+#endif
+
u8 brcmf_map_prio_to_prec(void *cfg, u8 prio);
u8 brcmf_map_prio_to_aci(void *cfg, u8 prio);
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c
index 10bac865d724..6d10c9efbe93 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c
@@ -126,6 +126,53 @@ static void brcmf_feat_firmware_overrides(struct brcmf_pub *drv)
drv->feat_flags |= feat_flags;
}
+struct brcmf_feat_wlcfeat {
+ u16 min_ver_major;
+ u16 min_ver_minor;
+ u32 feat_flags;
+};
+
+static const struct brcmf_feat_wlcfeat brcmf_feat_wlcfeat_map[] = {
+ { 12, 0, BIT(BRCMF_FEAT_PMKID_V2) },
+ { 13, 0, BIT(BRCMF_FEAT_PMKID_V3) },
+};
+
+static void brcmf_feat_wlc_version_overrides(struct brcmf_pub *drv)
+{
+ struct brcmf_if *ifp = brcmf_get_ifp(drv, 0);
+ const struct brcmf_feat_wlcfeat *e;
+ struct brcmf_wlc_version_le ver;
+ u32 feat_flags = 0;
+ int i, err, major, minor;
+
+ err = brcmf_fil_iovar_data_get(ifp, "wlc_ver", &ver, sizeof(ver));
+ if (err)
+ return;
+
+ major = le16_to_cpu(ver.wlc_ver_major);
+ minor = le16_to_cpu(ver.wlc_ver_minor);
+
+ brcmf_dbg(INFO, "WLC version: %d.%d\n", major, minor);
+
+ for (i = 0; i < ARRAY_SIZE(brcmf_feat_wlcfeat_map); i++) {
+ e = &brcmf_feat_wlcfeat_map[i];
+ if (major > e->min_ver_major ||
+ (major == e->min_ver_major &&
+ minor >= e->min_ver_minor)) {
+ feat_flags |= e->feat_flags;
+ }
+ }
+
+ if (!feat_flags)
+ return;
+
+ for (i = 0; i < BRCMF_FEAT_LAST; i++)
+ if (feat_flags & BIT(i))
+ brcmf_dbg(INFO, "enabling firmware feature: %s\n",
+ brcmf_feat_names[i]);
+ drv->feat_flags |= feat_flags;
+}
+
/**
* brcmf_feat_iovar_int_get() - determine feature through iovar query.
*
@@ -290,6 +337,7 @@ void brcmf_feat_attach(struct brcmf_pub *drvr)
ifp->drvr->feat_flags |= BIT(BRCMF_FEAT_SCAN_RANDOM_MAC);
brcmf_feat_iovar_int_get(ifp, BRCMF_FEAT_FWSUP, "sup_wpa");
+ brcmf_feat_iovar_int_get(ifp, BRCMF_FEAT_SCAN_V2, "scan_ver");
if (drvr->settings->feature_disable) {
brcmf_dbg(INFO, "Features: 0x%02x, disable: 0x%02x\n",
@@ -298,6 +346,7 @@ void brcmf_feat_attach(struct brcmf_pub *drvr)
ifp->drvr->feat_flags &= ~drvr->settings->feature_disable;
}
+ brcmf_feat_wlc_version_overrides(drvr);
brcmf_feat_firmware_overrides(drvr);
/* set chip related quirks */
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.h
index f1b086a69d73..7f4f0b3e4a7b 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.h
@@ -30,6 +30,7 @@
* SAE: simultaneous authentication of equals
* FWAUTH: Firmware authenticator
* DUMP_OBSS: Firmware has capable to dump obss info to support ACS
+ * SCAN_V2: Version 2 scan params
*/
#define BRCMF_FEAT_LIST \
BRCMF_FEAT_DEF(MBSS) \
@@ -53,7 +54,10 @@
BRCMF_FEAT_DEF(DOT11H) \
BRCMF_FEAT_DEF(SAE) \
BRCMF_FEAT_DEF(FWAUTH) \
- BRCMF_FEAT_DEF(DUMP_OBSS)
+ BRCMF_FEAT_DEF(DUMP_OBSS) \
+ BRCMF_FEAT_DEF(SCAN_V2) \
+ BRCMF_FEAT_DEF(PMKID_V2) \
+ BRCMF_FEAT_DEF(PMKID_V3)
/*
* Quirks:
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h
index 04e1beedfd81..792adaf880b4 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h
@@ -48,6 +48,10 @@
/* size of brcmf_scan_params not including variable length array */
#define BRCMF_SCAN_PARAMS_FIXED_SIZE 64
+#define BRCMF_SCAN_PARAMS_V2_FIXED_SIZE 72
+
+/* version of brcmf_scan_params structure */
+#define BRCMF_SCAN_PARAMS_VERSION_V2 2
/* masks for channel and ssid count */
#define BRCMF_SCAN_PARAMS_COUNT_MASK 0x0000ffff
@@ -67,6 +71,7 @@
#define BRCMF_PRIMARY_KEY (1 << 1)
#define DOT11_BSSTYPE_ANY 2
#define BRCMF_ESCAN_REQ_VERSION 1
+#define BRCMF_ESCAN_REQ_VERSION_V2 2
#define BRCMF_MAXRATES_IN_SET 16 /* max # of rates in rateset */
@@ -169,6 +174,10 @@
#define BRCMF_HE_CAP_MCS_MAP_NSS_MAX 8
+#define BRCMF_PMKSA_VER_2 2
+#define BRCMF_PMKSA_VER_3 3
+#define BRCMF_PMKSA_NO_EXPIRY 0xffffffff
+
/* MAX_CHUNK_LEN is the maximum length for data passing to firmware in each
* ioctl. It is relatively small because firmware has small maximum size input
* playload restriction for ioctls.
@@ -350,6 +359,12 @@ struct brcmf_ssid_le {
unsigned char SSID[IEEE80211_MAX_SSID_LEN];
};
+/* Alternate SSID structure used in some places... */
+struct brcmf_ssid8_le {
+ u8 SSID_len;
+ unsigned char SSID[IEEE80211_MAX_SSID_LEN];
+};
+
struct brcmf_scan_params_le {
struct brcmf_ssid_le ssid_le; /* default: {0, ""} */
u8 bssid[ETH_ALEN]; /* default: bcast */
@@ -386,6 +401,45 @@ struct brcmf_scan_params_le {
__le16 channel_list[1]; /* list of chanspecs */
};
+struct brcmf_scan_params_v2_le {
+ __le16 version; /* structure version */
+ __le16 length; /* structure length */
+ struct brcmf_ssid_le ssid_le; /* default: {0, ""} */
+ u8 bssid[ETH_ALEN]; /* default: bcast */
+ s8 bss_type; /* default: any,
+ * DOT11_BSSTYPE_ANY/INFRASTRUCTURE/INDEPENDENT
+ */
+ u8 pad;
+ __le32 scan_type; /* flags, 0 use default */
+ __le32 nprobes; /* -1 use default, number of probes per channel */
+ __le32 active_time; /* -1 use default, dwell time per channel for
+ * active scanning
+ */
+ __le32 passive_time; /* -1 use default, dwell time per channel
+ * for passive scanning
+ */
+ __le32 home_time; /* -1 use default, dwell time for the
+ * home channel between channel scans
+ */
+ __le32 channel_num; /* count of channels and ssids that follow
+ *
+ * low half is count of channels in
+ * channel_list, 0 means default (use all
+ * available channels)
+ *
+ * high half is entries in struct brcmf_ssid
+ * array that follows channel_list, aligned for
+ * s32 (4 bytes) meaning an odd channel count
+ * implies a 2-byte pad between end of
+ * channel_list and first ssid
+ *
+ * if ssid count is zero, single ssid in the
+ * fixed parameter portion is assumed, otherwise
+ * ssid in the fixed portion is ignored
+ */
+ __le16 channel_list[1]; /* list of chanspecs */
+};
+
struct brcmf_scan_results {
u32 buflen;
u32 version;
@@ -397,7 +451,10 @@ struct brcmf_escan_params_le {
__le32 version;
__le16 action;
__le16 sync_id;
- struct brcmf_scan_params_le params_le;
+ union {
+ struct brcmf_scan_params_le params_le;
+ struct brcmf_scan_params_v2_le params_v2_le;
+ };
};
struct brcmf_escan_result_le {
@@ -742,6 +799,31 @@ struct brcmf_rev_info_le {
};
/**
+ * struct brcmf_wlc_version_le - firmware revision info.
+ *
+ * @version: structure version.
+ * @length: structure length.
+ * @epi_ver_major: EPI major version
+ * @epi_ver_minor: EPI minor version
+ * @epi_ver_rc: EPI rc version
+ * @epi_ver_incr: EPI increment version
+ * @wlc_ver_major: WLC major version
+ * @wlc_ver_minor: WLC minor version
+ */
+struct brcmf_wlc_version_le {
+ __le16 version;
+ __le16 length;
+
+ __le16 epi_ver_major;
+ __le16 epi_ver_minor;
+ __le16 epi_ver_rc;
+ __le16 epi_ver_incr;
+
+ __le16 wlc_ver_major;
+ __le16 wlc_ver_minor;
+};
+
+/**
* struct brcmf_assoclist_le - request assoc list.
*
* @count: indicates number of stations.
@@ -804,6 +886,51 @@ struct brcmf_pmksa {
};
/**
+ * struct brcmf_pmksa_v2 - PMK Security Association
+ *
+ * @length: Length of the structure.
+ * @bssid: The AP's BSSID.
+ * @pmkid: The PMK ID.
+ * @pmk: PMK material for FILS key derivation.
+ * @pmk_len: Length of PMK data.
+ * @ssid: The AP's SSID.
+ * @fils_cache_id: FILS cache identifier
+ */
+struct brcmf_pmksa_v2 {
+ __le16 length;
+ u8 bssid[ETH_ALEN];
+ u8 pmkid[WLAN_PMKID_LEN];
+ u8 pmk[WLAN_PMK_LEN_SUITE_B_192];
+ __le16 pmk_len;
+ struct brcmf_ssid8_le ssid;
+ u16 fils_cache_id;
+};
+
+/**
+ * struct brcmf_pmksa_v3 - PMK Security Association
+ *
+ * @bssid: The AP's BSSID.
+ * @pmkid: The PMK ID.
+ * @pmkid_len: The length of the PMK ID.
+ * @pmk: PMK material for FILS key derivation.
+ * @pmk_len: Length of PMK data.
+ * @fils_cache_id: FILS cache identifier
+ * @ssid: The AP's SSID.
+ * @time_left: Remaining time until expiry. 0 = expired, ~0 = no expiry.
+ */
+struct brcmf_pmksa_v3 {
+ u8 bssid[ETH_ALEN];
+ u8 pmkid[WLAN_PMKID_LEN];
+ u8 pmkid_len;
+ u8 pmk[WLAN_PMK_LEN_SUITE_B_192];
+ u8 pmk_len;
+ __le16 fils_cache_id;
+ u8 pad;
+ struct brcmf_ssid8_le ssid;
+ __le32 time_left;
+};
+
+/**
* struct brcmf_pmk_list_le - List of pmksa's.
*
* @npmk: Number of pmksa's.
@@ -815,6 +942,34 @@ struct brcmf_pmk_list_le {
};
/**
+ * struct brcmf_pmk_list_v2_le - List of pmksa's.
+ *
+ * @version: Request version.
+ * @length: Length of this structure.
+ * @pmk: PMK SA information.
+ */
+struct brcmf_pmk_list_v2_le {
+ __le16 version;
+ __le16 length;
+ struct brcmf_pmksa_v2 pmk[BRCMF_MAXPMKID];
+};
+
+/**
+ * struct brcmf_pmk_op_v3_le - Operation on PMKSA list.
+ *
+ * @version: Request version.
+ * @length: Length of this structure.
+ * @pmk: PMK SA information.
+ */
+struct brcmf_pmk_op_v3_le {
+ __le16 version;
+ __le16 length;
+ __le16 count;
+ __le16 pad;
+ struct brcmf_pmksa_v3 pmk[BRCMF_MAXPMKID];
+};
+
+/**
* struct brcmf_pno_param_le - PNO scan configuration parameters
*
* @version: PNO parameters version.
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c
index fdd0c9abc1a1..e406e11481a6 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c
@@ -86,6 +86,13 @@ void brcmf_of_probe(struct device *dev, enum brcmf_bus_type bus_type,
if (!of_property_read_string(np, "apple,antenna-sku", &prop))
settings->antenna_sku = prop;
+ /* The WLAN calibration blob is normally stored in SROM, but Apple
+ * ARM64 platforms pass it via the DT instead.
+ */
+ prop = of_get_property(np, "brcm,cal-blob", &settings->cal_size);
+ if (prop && settings->cal_size)
+ settings->cal_blob = prop;
+
/* Set board-type to the first string of the machine compatible prop */
root = of_find_node_by_path("/");
if (root && err) {
@@ -122,7 +129,7 @@ void brcmf_of_probe(struct device *dev, enum brcmf_bus_type bus_type,
sdio->drive_strength = val;
/* make sure there are interrupts defined in the node */
- if (!of_find_property(np, "interrupts", NULL))
+ if (!of_property_present(np, "interrupts"))
return;
irq = irq_of_parse_and_map(np, 0);
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
index a9b9b2dc62d4..59f3e9c5e139 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
@@ -15,6 +15,7 @@
#include <linux/sched/signal.h>
#include <linux/kthread.h>
#include <linux/io.h>
+#include <linux/random.h>
#include <asm/unaligned.h>
#include <soc.h>
@@ -57,6 +58,7 @@ BRCMF_FW_CLM_DEF(4356, "brcmfmac4356-pcie");
BRCMF_FW_CLM_DEF(43570, "brcmfmac43570-pcie");
BRCMF_FW_DEF(4358, "brcmfmac4358-pcie");
BRCMF_FW_DEF(4359, "brcmfmac4359-pcie");
+BRCMF_FW_DEF(4359C, "brcmfmac4359c-pcie");
BRCMF_FW_CLM_DEF(4364B2, "brcmfmac4364b2-pcie");
BRCMF_FW_CLM_DEF(4364B3, "brcmfmac4364b3-pcie");
BRCMF_FW_DEF(4365B, "brcmfmac4365b-pcie");
@@ -66,6 +68,8 @@ BRCMF_FW_DEF(4366C, "brcmfmac4366c-pcie");
BRCMF_FW_DEF(4371, "brcmfmac4371-pcie");
BRCMF_FW_CLM_DEF(4377B3, "brcmfmac4377b3-pcie");
BRCMF_FW_CLM_DEF(4378B1, "brcmfmac4378b1-pcie");
+BRCMF_FW_CLM_DEF(4378B3, "brcmfmac4378b3-pcie");
+BRCMF_FW_CLM_DEF(4387C2, "brcmfmac4387c2-pcie");
/* firmware config files */
MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH "brcmfmac*-pcie.txt");
@@ -74,6 +78,7 @@ MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH "brcmfmac*-pcie.*.txt");
/* per-board firmware binaries */
MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH "brcmfmac*-pcie.*.bin");
MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH "brcmfmac*-pcie.*.clm_blob");
+MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH "brcmfmac*-pcie.*.txcap_blob");
static const struct brcmf_firmware_mapping brcmf_pcie_fwnames[] = {
BRCMF_FW_ENTRY(BRCM_CC_43602_CHIP_ID, 0xFFFFFFFF, 43602),
@@ -88,7 +93,8 @@ static const struct brcmf_firmware_mapping brcmf_pcie_fwnames[] = {
BRCMF_FW_ENTRY(BRCM_CC_43569_CHIP_ID, 0xFFFFFFFF, 43570),
BRCMF_FW_ENTRY(BRCM_CC_43570_CHIP_ID, 0xFFFFFFFF, 43570),
BRCMF_FW_ENTRY(BRCM_CC_4358_CHIP_ID, 0xFFFFFFFF, 4358),
- BRCMF_FW_ENTRY(BRCM_CC_4359_CHIP_ID, 0xFFFFFFFF, 4359),
+ BRCMF_FW_ENTRY(BRCM_CC_4359_CHIP_ID, 0x000001FF, 4359),
+ BRCMF_FW_ENTRY(BRCM_CC_4359_CHIP_ID, 0xFFFFFE00, 4359C),
BRCMF_FW_ENTRY(BRCM_CC_4364_CHIP_ID, 0x0000000F, 4364B2), /* 3 */
BRCMF_FW_ENTRY(BRCM_CC_4364_CHIP_ID, 0xFFFFFFF0, 4364B3), /* 4 */
BRCMF_FW_ENTRY(BRCM_CC_4365_CHIP_ID, 0x0000000F, 4365B),
@@ -99,7 +105,9 @@ static const struct brcmf_firmware_mapping brcmf_pcie_fwnames[] = {
BRCMF_FW_ENTRY(BRCM_CC_43666_CHIP_ID, 0xFFFFFFF0, 4366C),
BRCMF_FW_ENTRY(BRCM_CC_4371_CHIP_ID, 0xFFFFFFFF, 4371),
BRCMF_FW_ENTRY(BRCM_CC_4377_CHIP_ID, 0xFFFFFFFF, 4377B3), /* revision ID 4 */
- BRCMF_FW_ENTRY(BRCM_CC_4378_CHIP_ID, 0xFFFFFFFF, 4378B1), /* revision ID 3 */
+ BRCMF_FW_ENTRY(BRCM_CC_4378_CHIP_ID, 0x0000000F, 4378B1), /* revision ID 3 */
+ BRCMF_FW_ENTRY(BRCM_CC_4378_CHIP_ID, 0xFFFFFFE0, 4378B3), /* revision ID 5 */
+ BRCMF_FW_ENTRY(BRCM_CC_4387_CHIP_ID, 0xFFFFFFFF, 4387C2), /* revision ID 7 */
};
#define BRCMF_PCIE_FW_UP_TIMEOUT 5000 /* msec */
@@ -326,7 +334,9 @@ struct brcmf_pciedev_info {
char fw_name[BRCMF_FW_NAME_LEN];
char nvram_name[BRCMF_FW_NAME_LEN];
char clm_name[BRCMF_FW_NAME_LEN];
+ char txcap_name[BRCMF_FW_NAME_LEN];
const struct firmware *clm_fw;
+ const struct firmware *txcap_fw;
const struct brcmf_pcie_reginfo *reginfo;
void __iomem *regs;
void __iomem *tcm;
@@ -1517,6 +1527,10 @@ static int brcmf_pcie_get_blob(struct device *dev, const struct firmware **fw,
*fw = devinfo->clm_fw;
devinfo->clm_fw = NULL;
break;
+ case BRCMF_BLOB_TXCAP:
+ *fw = devinfo->txcap_fw;
+ devinfo->txcap_fw = NULL;
+ break;
default:
return -ENOENT;
}
@@ -1653,6 +1667,13 @@ brcmf_pcie_init_share_ram_info(struct brcmf_pciedev_info *devinfo,
return 0;
}
+struct brcmf_random_seed_footer {
+ __le32 length;
+ __le32 magic;
+};
+
+#define BRCMF_RANDOM_SEED_MAGIC 0xfeedc0de
+#define BRCMF_RANDOM_SEED_LENGTH 0x100
static int brcmf_pcie_download_fw_nvram(struct brcmf_pciedev_info *devinfo,
const struct firmware *fw, void *nvram,
@@ -1689,6 +1710,30 @@ static int brcmf_pcie_download_fw_nvram(struct brcmf_pciedev_info *devinfo,
nvram_len;
memcpy_toio(devinfo->tcm + address, nvram, nvram_len);
brcmf_fw_nvram_free(nvram);
+
+ if (devinfo->otp.valid) {
+ size_t rand_len = BRCMF_RANDOM_SEED_LENGTH;
+ struct brcmf_random_seed_footer footer = {
+ .length = cpu_to_le32(rand_len),
+ .magic = cpu_to_le32(BRCMF_RANDOM_SEED_MAGIC),
+ };
+ void *randbuf;
+
+ /* Some Apple chips/firmwares expect a buffer of random
+ * data to be present before NVRAM
+ */
+ brcmf_dbg(PCIE, "Download random seed\n");
+
+ address -= sizeof(footer);
+ memcpy_toio(devinfo->tcm + address, &footer,
+ sizeof(footer));
+
+ address -= rand_len;
+ randbuf = kzalloc(rand_len, GFP_KERNEL);
+ get_random_bytes(randbuf, rand_len);
+ memcpy_toio(devinfo->tcm + address, randbuf, rand_len);
+ kfree(randbuf);
+ }
} else {
brcmf_dbg(PCIE, "No matching NVRAM file found %s\n",
devinfo->nvram_name);
@@ -2016,6 +2061,11 @@ static int brcmf_pcie_read_otp(struct brcmf_pciedev_info *devinfo)
base = 0x1120;
words = 0x170;
break;
+ case BRCM_CC_4387_CHIP_ID:
+ coreid = BCMA_CORE_GCI;
+ base = 0x113c;
+ words = 0x170;
+ break;
default:
/* OTP not supported on this chip */
return 0;
@@ -2073,6 +2123,7 @@ static int brcmf_pcie_read_otp(struct brcmf_pciedev_info *devinfo)
#define BRCMF_PCIE_FW_CODE 0
#define BRCMF_PCIE_FW_NVRAM 1
#define BRCMF_PCIE_FW_CLM 2
+#define BRCMF_PCIE_FW_TXCAP 3
static void brcmf_pcie_setup(struct device *dev, int ret,
struct brcmf_fw_request *fwreq)
@@ -2099,6 +2150,7 @@ static void brcmf_pcie_setup(struct device *dev, int ret,
nvram = fwreq->items[BRCMF_PCIE_FW_NVRAM].nv_data.data;
nvram_len = fwreq->items[BRCMF_PCIE_FW_NVRAM].nv_data.len;
devinfo->clm_fw = fwreq->items[BRCMF_PCIE_FW_CLM].binary;
+ devinfo->txcap_fw = fwreq->items[BRCMF_PCIE_FW_TXCAP].binary;
kfree(fwreq);
ret = brcmf_chip_get_raminfo(devinfo->ci);
@@ -2180,6 +2232,7 @@ brcmf_pcie_prepare_fw_request(struct brcmf_pciedev_info *devinfo)
{ ".bin", devinfo->fw_name },
{ ".txt", devinfo->nvram_name },
{ ".clm_blob", devinfo->clm_name },
+ { ".txcap_blob", devinfo->txcap_name },
};
fwreq = brcmf_fw_alloc_request(devinfo->ci->chip, devinfo->ci->chiprev,
@@ -2194,6 +2247,8 @@ brcmf_pcie_prepare_fw_request(struct brcmf_pciedev_info *devinfo)
fwreq->items[BRCMF_PCIE_FW_NVRAM].flags = BRCMF_FW_REQF_OPTIONAL;
fwreq->items[BRCMF_PCIE_FW_CLM].type = BRCMF_FW_TYPE_BINARY;
fwreq->items[BRCMF_PCIE_FW_CLM].flags = BRCMF_FW_REQF_OPTIONAL;
+ fwreq->items[BRCMF_PCIE_FW_TXCAP].type = BRCMF_FW_TYPE_BINARY;
+ fwreq->items[BRCMF_PCIE_FW_TXCAP].flags = BRCMF_FW_REQF_OPTIONAL;
/* NVRAM reserves PCI domain 0 for Broadcom's SDK faked bus */
fwreq->domain_nr = pci_domain_nr(devinfo->pdev->bus) + 1;
fwreq->bus_nr = devinfo->pdev->bus->number;
@@ -2491,6 +2546,7 @@ brcmf_pcie_remove(struct pci_dev *pdev)
brcmf_pcie_reset_device(devinfo);
brcmf_pcie_release_resource(devinfo);
release_firmware(devinfo->clm_fw);
+ release_firmware(devinfo->txcap_fw);
if (devinfo->ci)
brcmf_chip_detach(devinfo->ci);
@@ -2630,6 +2686,7 @@ static const struct pci_device_id brcmf_pcie_devid_table[] = {
BRCMF_PCIE_DEVICE(BRCM_PCIE_43596_DEVICE_ID, CYW),
BRCMF_PCIE_DEVICE(BRCM_PCIE_4377_DEVICE_ID, WCC),
BRCMF_PCIE_DEVICE(BRCM_PCIE_4378_DEVICE_ID, WCC),
+ BRCMF_PCIE_DEVICE(BRCM_PCIE_4387_DEVICE_ID, WCC),
{ /* end: all zeroes */ }
};
diff --git a/drivers/net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h b/drivers/net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h
index 896615f57952..44684bf1b9ac 100644
--- a/drivers/net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h
+++ b/drivers/net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h
@@ -54,6 +54,7 @@
#define BRCM_CC_4371_CHIP_ID 0x4371
#define BRCM_CC_4377_CHIP_ID 0x4377
#define BRCM_CC_4378_CHIP_ID 0x4378
+#define BRCM_CC_4387_CHIP_ID 0x4387
#define CY_CC_4373_CHIP_ID 0x4373
#define CY_CC_43012_CHIP_ID 43012
#define CY_CC_43439_CHIP_ID 43439
@@ -95,6 +96,7 @@
#define BRCM_PCIE_43596_DEVICE_ID 0x4415
#define BRCM_PCIE_4377_DEVICE_ID 0x4488
#define BRCM_PCIE_4378_DEVICE_ID 0x4425
+#define BRCM_PCIE_4387_DEVICE_ID 0x4433
/* brcmsmac IDs */
#define BCM4313_D11N2G_ID 0x4727 /* 4313 802.11n 2.4G device */
diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
index 3bdd6774716d..05720352e49f 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
@@ -62,6 +62,7 @@
#define IWL_BZ_Z_GF_A_FW_PRE "iwlwifi-bz-z0-gf-a0-"
#define IWL_BNJ_A_FM_A_FW_PRE "iwlwifi-BzBnj-a0-fm-a0-"
#define IWL_BNJ_A_FM4_A_FW_PRE "iwlwifi-BzBnj-a0-fm4-a0-"
+#define IWL_BNJ_B_FM4_B_FW_PRE "iwlwifi-BzBnj-b0-fm4-b0-"
#define IWL_BNJ_A_GF_A_FW_PRE "iwlwifi-BzBnj-a0-gf-a0-"
#define IWL_BNJ_A_GF4_A_FW_PRE "iwlwifi-BzBnj-a0-gf4-a0-"
#define IWL_BNJ_A_HR_B_FW_PRE "iwlwifi-BzBnj-a0-hr-b0-"
@@ -132,6 +133,8 @@
IWL_BNJ_A_FM_A_FW_PRE __stringify(api) ".ucode"
#define IWL_BNJ_A_FM4_A_MODULE_FIRMWARE(api) \
IWL_BNJ_A_FM4_A_FW_PRE __stringify(api) ".ucode"
+#define IWL_BNJ_B_FM4_B_MODULE_FIRMWARE(api) \
+ IWL_BNJ_B_FM4_B_FW_PRE __stringify(api) ".ucode"
#define IWL_BNJ_A_GF_A_MODULE_FIRMWARE(api) \
IWL_BNJ_A_GF_A_FW_PRE __stringify(api) ".ucode"
#define IWL_BNJ_A_GF4_A_MODULE_FIRMWARE(api) \
@@ -998,6 +1001,14 @@ const struct iwl_cfg iwl_cfg_bnj_a0_fm4_a0 = {
.num_rbds = IWL_NUM_RBDS_AX210_HE,
};
+const struct iwl_cfg iwl_cfg_bnj_b0_fm4_b0 = {
+ .fw_name_pre = IWL_BNJ_B_FM4_B_FW_PRE,
+ .uhb_supported = true,
+ IWL_DEVICE_BZ,
+ .features = IWL_TX_CSUM_NETIF_FLAGS | NETIF_F_RXCSUM,
+ .num_rbds = IWL_NUM_RBDS_AX210_HE,
+};
+
const struct iwl_cfg iwl_cfg_bnj_a0_gf_a0 = {
.fw_name_pre = IWL_BNJ_A_GF_A_FW_PRE,
.uhb_supported = true,
@@ -1059,6 +1070,7 @@ MODULE_FIRMWARE(IWL_BZ_A_FM_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
MODULE_FIRMWARE(IWL_GL_A_FM_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
MODULE_FIRMWARE(IWL_BNJ_A_FM_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
MODULE_FIRMWARE(IWL_BNJ_A_FM4_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
+MODULE_FIRMWARE(IWL_BNJ_B_FM4_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
MODULE_FIRMWARE(IWL_BNJ_A_GF_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
MODULE_FIRMWARE(IWL_BNJ_A_GF4_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
MODULE_FIRMWARE(IWL_BNJ_A_HR_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/rs.h b/drivers/net/wireless/intel/iwlwifi/fw/api/rs.h
index ddacd5b45aea..c9a48fc5fac8 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/rs.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/rs.h
@@ -373,9 +373,6 @@ enum {
/* Bit 4-5: (0) SISO, (1) MIMO2 (2) MIMO3 */
#define RATE_VHT_MCS_RATE_CODE_MSK 0xf
-#define RATE_VHT_MCS_NSS_POS 4
-#define RATE_VHT_MCS_NSS_MSK (3 << RATE_VHT_MCS_NSS_POS)
-#define RATE_VHT_MCS_MIMO2_MSK BIT(RATE_VHT_MCS_NSS_POS)
/*
* Legacy OFDM rate format for bits 7:0
@@ -449,11 +446,16 @@ enum {
* 1 2xLTF+0.8us
* 2 2xLTF+1.6us
* 3 4xLTF+3.2us
- * HE TRIG:
+ * HE-EHT TRIG:
* 0 1xLTF+1.6us
* 1 2xLTF+1.6us
* 2 4xLTF+3.2us
* 3 (does not occur)
+ * EHT MU:
+ * 0 2xLTF+0.8us
+ * 1 2xLTF+1.6us
+ * 2 4xLTF+0.8us
+ * 3 4xLTF+3.2us
*/
#define RATE_MCS_HE_GI_LTF_POS 20
#define RATE_MCS_HE_GI_LTF_MSK_V1 (3 << RATE_MCS_HE_GI_LTF_POS)
@@ -546,12 +548,17 @@ enum {
/*
* Bits 13-11: (0) 20MHz, (1) 40MHz, (2) 80MHz, (3) 160MHz, (4) 320MHz
*/
-#define RATE_MCS_CHAN_WIDTH_MSK (0x7 << RATE_MCS_CHAN_WIDTH_POS)
-#define RATE_MCS_CHAN_WIDTH_20 (0 << RATE_MCS_CHAN_WIDTH_POS)
-#define RATE_MCS_CHAN_WIDTH_40 (1 << RATE_MCS_CHAN_WIDTH_POS)
-#define RATE_MCS_CHAN_WIDTH_80 (2 << RATE_MCS_CHAN_WIDTH_POS)
-#define RATE_MCS_CHAN_WIDTH_160 (3 << RATE_MCS_CHAN_WIDTH_POS)
-#define RATE_MCS_CHAN_WIDTH_320 (4 << RATE_MCS_CHAN_WIDTH_POS)
+#define RATE_MCS_CHAN_WIDTH_MSK (0x7 << RATE_MCS_CHAN_WIDTH_POS)
+#define RATE_MCS_CHAN_WIDTH_20_VAL 0
+#define RATE_MCS_CHAN_WIDTH_20 (RATE_MCS_CHAN_WIDTH_20_VAL << RATE_MCS_CHAN_WIDTH_POS)
+#define RATE_MCS_CHAN_WIDTH_40_VAL 1
+#define RATE_MCS_CHAN_WIDTH_40 (RATE_MCS_CHAN_WIDTH_40_VAL << RATE_MCS_CHAN_WIDTH_POS)
+#define RATE_MCS_CHAN_WIDTH_80_VAL 2
+#define RATE_MCS_CHAN_WIDTH_80 (RATE_MCS_CHAN_WIDTH_80_VAL << RATE_MCS_CHAN_WIDTH_POS)
+#define RATE_MCS_CHAN_WIDTH_160_VAL 3
+#define RATE_MCS_CHAN_WIDTH_160 (RATE_MCS_CHAN_WIDTH_160_VAL << RATE_MCS_CHAN_WIDTH_POS)
+#define RATE_MCS_CHAN_WIDTH_320_VAL 4
+#define RATE_MCS_CHAN_WIDTH_320 (RATE_MCS_CHAN_WIDTH_320_VAL << RATE_MCS_CHAN_WIDTH_POS)
/* Bit 15-14: Antenna selection:
* Bit 14: Ant A active
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h b/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h
index 1c4e84932058..fdd8b01f09e4 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h
@@ -367,7 +367,8 @@ enum iwl_rx_phy_eht_data1 {
/* number of EHT-LTF symbols 0 - 1 EHT-LTF, 1 - 2 EHT-LTFs, 2 - 4 EHT-LTFs,
* 3 - 6 EHT-LTFs, 4 - 8 EHT-LTFs */
IWL_RX_PHY_DATA1_EHT_SIG_LTF_NUM = 0x000000e0,
- IWL_RX_PHY_DATA1_EHT_RU_ALLOC = 0x0000ff00,
+ IWL_RX_PHY_DATA1_EHT_B0 = 0x00000100,
+ IWL_RX_PHY_DATA1_EHT_RU_B1_B7_ALLOC = 0x0000fe00,
};
/* goes into Metadata DW 7 */
@@ -413,7 +414,7 @@ enum iwl_rx_phy_eht_data2 {
/* OFDM_RX_VECTOR_COMMON_RU_ALLOC_0_OUT */
IWL_RX_PHY_DATA2_EHT_MU_EXT_RU_ALLOC_A1 = 0x000001ff,
IWL_RX_PHY_DATA2_EHT_MU_EXT_RU_ALLOC_A2 = 0x0003fe00,
- IWL_RX_PHY_DATA2_EHT_MU_EXT_RU_ALLOC_A3 = 0x01fc0000,
+ IWL_RX_PHY_DATA2_EHT_MU_EXT_RU_ALLOC_B1 = 0x07fc0000,
/* info type: EHT-TB-EXT */
IWL_RX_PHY_DATA2_EHT_TB_EXT_TRIG_SIGA1 = 0xffffffff,
@@ -423,19 +424,18 @@ enum iwl_rx_phy_eht_data2 {
enum iwl_rx_phy_eht_data3 {
/* info type: EHT-MU-EXT */
/* OFDM_RX_VECTOR_COMMON_RU_ALLOC_1_OUT */
- IWL_RX_PHY_DATA3_EHT_MU_EXT_RU_ALLOC_B1 = 0x000001ff,
- IWL_RX_PHY_DATA3_EHT_MU_EXT_RU_ALLOC_B2 = 0x0003fe00,
- IWL_RX_PHY_DATA3_EHT_MU_EXT_RU_ALLOC_B3 = 0x01fc0000,
+ IWL_RX_PHY_DATA3_EHT_MU_EXT_RU_ALLOC_B2 = 0x000001ff,
+ IWL_RX_PHY_DATA3_EHT_MU_EXT_RU_ALLOC_C1 = 0x0003fe00,
+ IWL_RX_PHY_DATA3_EHT_MU_EXT_RU_ALLOC_C2 = 0x07fc0000,
};
/* goes into Metadata DW 4 */
enum iwl_rx_phy_eht_data4 {
/* info type: EHT-MU-EXT */
/* OFDM_RX_VECTOR_COMMON_RU_ALLOC_2_OUT */
- IWL_RX_PHY_DATA4_EHT_MU_EXT_RU_ALLOC_C1 = 0x000001ff,
- IWL_RX_PHY_DATA4_EHT_MU_EXT_RU_ALLOC_C2 = 0x0003fe00,
- IWL_RX_PHY_DATA4_EHT_MU_EXT_RU_ALLOC_C3 = 0x01fc0000,
- IWL_RX_PHY_DATA4_EHT_MU_EXT_SIGB_MCS = 0x18000000,
+ IWL_RX_PHY_DATA4_EHT_MU_EXT_RU_ALLOC_D1 = 0x000001ff,
+ IWL_RX_PHY_DATA4_EHT_MU_EXT_RU_ALLOC_D2 = 0x0003fe00,
+ IWL_RX_PHY_DATA4_EHT_MU_EXT_SIGB_MCS = 0x000c0000,
};
/* goes into Metadata DW 16 */
@@ -673,22 +673,31 @@ struct iwl_rx_mpdu_desc {
* @mac_phy_idx: MAC/PHY index
*/
u8 mac_phy_idx;
- /* DW4 - carries csum data only when rpa_en == 1 */
- /**
- * @raw_csum: raw checksum (alledgedly unreliable)
- */
- __le16 raw_csum;
-
+ /* DW4 */
union {
+ struct {
+ /* carries csum data only when rpa_en == 1 */
+ /**
+ * @raw_csum: raw checksum (alledgedly unreliable)
+ */
+ __le16 raw_csum;
+
+ union {
+ /**
+ * @l3l4_flags: &enum iwl_rx_l3l4_flags
+ */
+ __le16 l3l4_flags;
+
+ /**
+ * @phy_data4: depends on info type, see phy_data1
+ */
+ __le16 phy_data4;
+ };
+ };
/**
- * @l3l4_flags: &enum iwl_rx_l3l4_flags
- */
- __le16 l3l4_flags;
-
- /**
- * @phy_data4: depends on info type, see phy_data1
+ * @phy_eht_data4: depends on info type, see phy_data1
*/
- __le16 phy_data4;
+ __le32 phy_eht_data4;
};
/* DW5 */
/**
@@ -725,7 +734,7 @@ struct iwl_rx_mpdu_desc {
#define RX_NO_DATA_INFO_TYPE_RX_ERR 1
#define RX_NO_DATA_INFO_TYPE_NDP 2
#define RX_NO_DATA_INFO_TYPE_MU_UNMATCHED 3
-#define RX_NO_DATA_INFO_TYPE_HE_TB_UNMATCHED 4
+#define RX_NO_DATA_INFO_TYPE_TB_UNMATCHED 4
#define RX_NO_DATA_INFO_ERR_POS 8
#define RX_NO_DATA_INFO_ERR_MSK (0xff << RX_NO_DATA_INFO_ERR_POS)
@@ -743,6 +752,35 @@ struct iwl_rx_mpdu_desc {
#define RX_NO_DATA_RX_VEC0_VHT_NSTS_MSK 0x38000000
#define RX_NO_DATA_RX_VEC2_EHT_NSTS_MSK 0x00f00000
+/* content of OFDM_RX_VECTOR_USIG_A1_OUT */
+enum iwl_rx_usig_a1 {
+ IWL_RX_USIG_A1_ENHANCED_WIFI_VER_ID = 0x00000007,
+ IWL_RX_USIG_A1_BANDWIDTH = 0x00000038,
+ IWL_RX_USIG_A1_UL_FLAG = 0x00000040,
+ IWL_RX_USIG_A1_BSS_COLOR = 0x00001f80,
+ IWL_RX_USIG_A1_TXOP_DURATION = 0x000fe000,
+ IWL_RX_USIG_A1_DISREGARD = 0x01f00000,
+ IWL_RX_USIG_A1_VALIDATE = 0x02000000,
+ IWL_RX_USIG_A1_EHT_BW320_SLOT = 0x04000000,
+ IWL_RX_USIG_A1_EHT_TYPE = 0x18000000,
+ IWL_RX_USIG_A1_RDY = 0x80000000,
+};
+
+/* content of OFDM_RX_VECTOR_USIG_A2_EHT_OUT */
+enum iwl_rx_usig_a2_eht {
+ IWL_RX_USIG_A2_EHT_PPDU_TYPE = 0x00000003,
+ IWL_RX_USIG_A2_EHT_USIG2_VALIDATE_B2 = 0x00000004,
+ IWL_RX_USIG_A2_EHT_PUNC_CHANNEL = 0x000000f8,
+ IWL_RX_USIG_A2_EHT_USIG2_VALIDATE_B8 = 0x00000100,
+ IWL_RX_USIG_A2_EHT_SIG_MCS = 0x00000600,
+ IWL_RX_USIG_A2_EHT_SIG_SYM_NUM = 0x0000f800,
+ IWL_RX_USIG_A2_EHT_TRIG_SPATIAL_REUSE_1 = 0x000f0000,
+ IWL_RX_USIG_A2_EHT_TRIG_SPATIAL_REUSE_2 = 0x00f00000,
+ IWL_RX_USIG_A2_EHT_TRIG_USIG2_DISREGARD = 0x1f000000,
+ IWL_RX_USIG_A2_EHT_CRC_OK = 0x40000000,
+ IWL_RX_USIG_A2_EHT_RDY = 0x80000000,
+};
+
/**
* struct iwl_rx_no_data - RX no data descriptor
* @info: 7:0 frame type, 15:8 RX error type
@@ -780,7 +818,7 @@ struct iwl_rx_no_data {
* @rx_vec: DW-12:9 raw RX vectors from DSP according to modulation type.
* for VHT: OFDM_RX_VECTOR_SIGA1_OUT, OFDM_RX_VECTOR_SIGA2_OUT
* for HE: OFDM_RX_VECTOR_HE_SIGA1_OUT, OFDM_RX_VECTOR_HE_SIGA2_OUT
- * for EHT: OFDM_RX_VECTOR_USIG_A1_OUT, OFDM_RX_VECTOR_USIG_A2_OUT,
+ * for EHT: OFDM_RX_VECTOR_USIG_A1_OUT, OFDM_RX_VECTOR_USIG_A2_EHT_OUT,
* OFDM_RX_VECTOR_EHT_OUT, OFDM_RX_VECTOR_EHT_USER_FIELD_OUT
*/
struct iwl_rx_no_data_ver_3 {
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/rs.c b/drivers/net/wireless/intel/iwlwifi/fw/rs.c
index e128d2e07f38..b09e68dbf5a9 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/rs.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/rs.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright (C) 2021 Intel Corporation
+ * Copyright (C) 2021-2022 Intel Corporation
*/
#include <net/mac80211.h>
@@ -126,7 +126,7 @@ u32 iwl_new_rate_from_v1(u32 rate_v1)
rate_v1 & RATE_MCS_HE_MSK_V1) {
rate_v2 |= rate_v1 & RATE_VHT_MCS_RATE_CODE_MSK;
- rate_v2 |= rate_v1 & RATE_VHT_MCS_MIMO2_MSK;
+ rate_v2 |= rate_v1 & RATE_MCS_NSS_MSK;
if (rate_v1 & RATE_MCS_HE_MSK_V1) {
u32 he_type_bits = rate_v1 & RATE_MCS_HE_TYPE_MSK_V1;
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
index cfa5e1b3c3f6..eaa0ff2736c5 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
@@ -659,6 +659,7 @@ extern const struct iwl_cfg iwl_cfg_bnj_a0_gf_a0;
extern const struct iwl_cfg iwl_cfg_bnj_a0_gf4_a0;
extern const struct iwl_cfg iwl_cfg_bnj_a0_hr_b0;
extern const struct iwl_cfg iwl_cfg_bnj_b0_fm_b0;
+extern const struct iwl_cfg iwl_cfg_bnj_b0_fm4_b0;
#endif /* CONFIG_IWLMVM */
#endif /* __IWL_CONFIG_H__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h
index 3e1f011e93aa..bece76b1a514 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h
@@ -348,6 +348,7 @@ enum {
#define CSR_HW_RF_ID_TYPE_HRCDB (0x00109F00)
#define CSR_HW_RF_ID_TYPE_GF (0x0010D000)
#define CSR_HW_RF_ID_TYPE_GF4 (0x0010E000)
+#define CSR_HW_RF_ID_TYPE_MS (0x00111000)
/* HW_RF CHIP STEP */
#define CSR_HW_RF_STEP(_val) (((_val) >> 8) & 0xF)
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c
index 48e7376a5fea..87366b70b17f 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c
@@ -350,9 +350,9 @@ void iwl_dbg_tlv_alloc(struct iwl_trans *trans, const struct iwl_ucode_tlv *tlv,
ret = dbg_tlv_alloc[tlv_idx](trans, tlv);
if (ret) {
- IWL_ERR(trans,
- "WRT: Failed to allocate TLV 0x%x, ret %d, (ext=%d)\n",
- type, ret, ext);
+ IWL_WARN(trans,
+ "WRT: Failed to allocate TLV 0x%x, ret %d, (ext=%d)\n",
+ type, ret, ext);
goto out_err;
}
@@ -1218,11 +1218,12 @@ iwl_dbg_tlv_tp_trigger(struct iwl_fw_runtime *fwrt, bool sync,
}
fwrt->trans->dbg.restart_required = FALSE;
- IWL_DEBUG_INFO(fwrt, "WRT: tp %d, reset_fw %d\n",
- tp, dump_data.trig->reset_fw);
- IWL_DEBUG_INFO(fwrt, "WRT: restart_required %d, last_tp_resetfw %d\n",
- fwrt->trans->dbg.restart_required,
- fwrt->trans->dbg.last_tp_resetfw);
+ IWL_DEBUG_FW(fwrt, "WRT: tp %d, reset_fw %d\n",
+ tp, dump_data.trig->reset_fw);
+ IWL_DEBUG_FW(fwrt,
+ "WRT: restart_required %d, last_tp_resetfw %d\n",
+ fwrt->trans->dbg.restart_required,
+ fwrt->trans->dbg.last_tp_resetfw);
if (fwrt->trans->trans_cfg->device_family ==
IWL_DEVICE_FAMILY_9000) {
@@ -1235,18 +1236,19 @@ iwl_dbg_tlv_tp_trigger(struct iwl_fw_runtime *fwrt, bool sync,
IWL_DEBUG_FW(fwrt, "WRT: FW_ASSERT due to reset_fw_mode-no restart\n");
} else if (le32_to_cpu(dump_data.trig->reset_fw) ==
IWL_FW_INI_RESET_FW_MODE_STOP_AND_RELOAD_FW) {
- IWL_DEBUG_INFO(fwrt, "WRT: stop and reload firmware\n");
+ IWL_DEBUG_FW(fwrt, "WRT: stop and reload firmware\n");
fwrt->trans->dbg.restart_required = TRUE;
} else if (le32_to_cpu(dump_data.trig->reset_fw) ==
IWL_FW_INI_RESET_FW_MODE_STOP_FW_ONLY) {
- IWL_DEBUG_INFO(fwrt, "WRT: stop only and no reload firmware\n");
+ IWL_DEBUG_FW(fwrt,
+ "WRT: stop only and no reload firmware\n");
fwrt->trans->dbg.restart_required = FALSE;
fwrt->trans->dbg.last_tp_resetfw =
le32_to_cpu(dump_data.trig->reset_fw);
} else if (le32_to_cpu(dump_data.trig->reset_fw) ==
IWL_FW_INI_RESET_FW_MODE_NOTHING) {
- IWL_DEBUG_INFO(fwrt,
- "WRT: nothing need to be done after debug collection\n");
+ IWL_DEBUG_FW(fwrt,
+ "WRT: nothing need to be done after debug collection\n");
} else {
IWL_ERR(fwrt, "WRT: wrong resetfw %d\n",
le32_to_cpu(dump_data.trig->reset_fw));
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
index 0c6b49fcb00d..45981e22b2db 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -1084,6 +1084,11 @@ static const struct dmi_system_id dmi_tas_approved_list[] = {
DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
},
},
+ { .ident = "MSFT",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
+ },
+ },
/* keep last */
{}
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
index aa791dbc3066..114c96ba39ee 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
@@ -654,7 +654,7 @@ static int iwl_mvm_mac_ctxt_cmd_listener(struct iwl_mvm *mvm,
u32 action)
{
struct iwl_mac_ctx_cmd cmd = {};
- u32 tfd_queue_msk = BIT(mvm->snif_queue);
+ u32 tfd_queue_msk = 0;
int ret;
WARN_ON(vif->type != NL80211_IFTYPE_MONITOR);
@@ -669,6 +669,14 @@ static int iwl_mvm_mac_ctxt_cmd_listener(struct iwl_mvm *mvm,
MAC_FILTER_ACCEPT_GRP);
ieee80211_hw_set(mvm->hw, RX_INCLUDES_FCS);
+ /*
+ * the queue mask is only relevant for old TX API, and
+ * mvm->snif_queue isn't set here (it's still set to
+ * IWL_MVM_INVALID_QUEUE so the BIT() of it is UB)
+ */
+ if (!iwl_mvm_has_new_tx_api(mvm))
+ tfd_queue_msk = BIT(mvm->snif_queue);
+
/* Allocate sniffer station */
ret = iwl_mvm_allocate_int_sta(mvm, &mvm->snif_sta, tfd_queue_msk,
vif->type, IWL_STA_GENERAL_PURPOSE);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index 565522466eba..ab02c6076276 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -1362,6 +1362,28 @@ static void iwl_mvm_channel_switch_disconnect_wk(struct work_struct *wk)
ieee80211_chswitch_done(vif, false);
}
+static u8
+iwl_mvm_chandef_get_primary_80(struct cfg80211_chan_def *chandef)
+{
+ int data_start;
+ int control_start;
+ int bw;
+
+ if (chandef->width == NL80211_CHAN_WIDTH_320)
+ bw = 320;
+ else if (chandef->width == NL80211_CHAN_WIDTH_160)
+ bw = 160;
+ else
+ return 0;
+
+ /* data is bw wide so the start is half the width */
+ data_start = chandef->center_freq1 - bw / 2;
+ /* control is 20Mhz width */
+ control_start = chandef->chan->center_freq - 10;
+
+ return (control_start - data_start) / 80;
+}
+
static int iwl_mvm_mac_add_interface(struct ieee80211_hw *hw,
struct ieee80211_vif *vif)
{
@@ -1478,8 +1500,11 @@ static int iwl_mvm_mac_add_interface(struct ieee80211_hw *hw,
INIT_DELAYED_WORK(&mvmvif->csa_work,
iwl_mvm_channel_switch_disconnect_wk);
- if (vif->type == NL80211_IFTYPE_MONITOR)
+ if (vif->type == NL80211_IFTYPE_MONITOR) {
mvm->monitor_on = true;
+ mvm->monitor_p80 =
+ iwl_mvm_chandef_get_primary_80(&vif->bss_conf.chandef);
+ }
iwl_mvm_vif_dbgfs_register(mvm, vif);
@@ -5033,9 +5058,10 @@ static void iwl_mvm_mac_flush(struct ieee80211_hw *hw,
if (iwl_mvm_flush_sta(mvm, mvmsta, false))
IWL_ERR(mvm, "flush request fail\n");
} else {
- msk |= mvmsta->tfd_queue_msk;
if (iwl_mvm_has_new_tx_api(mvm))
iwl_mvm_wait_sta_queues_empty(mvm, mvmsta);
+ else /* only used for !iwl_mvm_has_new_tx_api() below */
+ msk |= mvmsta->tfd_queue_msk;
}
}
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
index 90bc95d96a78..6bd1a4c72a12 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
@@ -1096,6 +1096,11 @@ struct iwl_mvm {
/* does a monitor vif exist (only one can exist hence bool) */
bool monitor_on;
+ /*
+ * primary channel position relative to he whole bandwidth,
+ * in steps of 80 MHz
+ */
+ u8 monitor_p80;
/* sniffer data to include in radiotap */
__le16 cur_aid;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c
index f30eeab5505b..e3fb1b2cea6d 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c
@@ -337,10 +337,14 @@ static void rs_fw_eht_set_enabled_rates(const struct ieee80211_sta *sta,
const struct ieee80211_eht_mcs_nss_supp_bw *mcs_tx =
rs_fw_rs_mcs2eht_mcs(bw, eht_tx_mcs);
- /* got unsuppored index for bw */
+ /* got unsupported index for bw */
if (!mcs_rx || !mcs_tx)
continue;
+ /* break out if we don't support the bandwidth */
+ if (cmd->max_ch_width < (bw + IWL_TLC_MNG_CH_WIDTH_80MHZ))
+ break;
+
rs_fw_set_eht_mcs_nss(cmd->ht_rates, bw,
MAX_NSS_MCS(9, mcs_rx, mcs_tx), GENMASK(9, 0));
rs_fw_set_eht_mcs_nss(cmd->ht_rates, bw,
@@ -550,7 +554,7 @@ void rs_fw_rate_init(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
struct iwl_tlc_config_cmd_v4 cfg_cmd = {
.sta_id = mvmsta->sta_id,
.max_ch_width = update ?
- rs_fw_bw_from_sta_bw(sta) : RATE_MCS_CHAN_WIDTH_20,
+ rs_fw_bw_from_sta_bw(sta) : IWL_TLC_MNG_CH_WIDTH_20MHZ,
.flags = cpu_to_le16(rs_fw_get_config_flags(mvm, sta, sband)),
.chains = rs_fw_set_active_chains(iwl_mvm_get_valid_tx_ant(mvm)),
.sgi_ch_width_supp = rs_fw_sgi_cw_support(sta),
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c
index 0b50b816684a..1f81dff71bc4 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
/******************************************************************************
*
- * Copyright(c) 2005 - 2014, 2018 - 2021 Intel Corporation. All rights reserved.
+ * Copyright(c) 2005 - 2014, 2018 - 2022 Intel Corporation. All rights reserved.
* Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
* Copyright(c) 2016 - 2017 Intel Deutschland GmbH
*****************************************************************************/
@@ -895,8 +895,7 @@ static int rs_rate_from_ucode_rate(const u32 ucode_rate,
WARN_ON_ONCE(1);
}
} else if (ucode_rate & RATE_MCS_VHT_MSK_V1) {
- nss = ((ucode_rate & RATE_VHT_MCS_NSS_MSK) >>
- RATE_VHT_MCS_NSS_POS) + 1;
+ nss = FIELD_GET(RATE_MCS_NSS_MSK, ucode_rate) + 1;
if (nss == 1) {
rate->type = LQ_VHT_SISO;
@@ -910,8 +909,7 @@ static int rs_rate_from_ucode_rate(const u32 ucode_rate,
WARN_ON_ONCE(1);
}
} else if (ucode_rate & RATE_MCS_HE_MSK_V1) {
- nss = ((ucode_rate & RATE_VHT_MCS_NSS_MSK) >>
- RATE_VHT_MCS_NSS_POS) + 1;
+ nss = FIELD_GET(RATE_MCS_NSS_MSK, ucode_rate) + 1;
if (nss == 1) {
rate->type = LQ_HE_SISO;
@@ -2885,8 +2883,7 @@ void iwl_mvm_update_frame_stats(struct iwl_mvm *mvm, u32 rate, bool agg)
nss = ((rate & RATE_HT_MCS_NSS_MSK_V1) >> RATE_HT_MCS_NSS_POS_V1) + 1;
} else if (rate & RATE_MCS_VHT_MSK_V1) {
mvm->drv_rx_stats.vht_frames++;
- nss = ((rate & RATE_VHT_MCS_NSS_MSK) >>
- RATE_VHT_MCS_NSS_POS) + 1;
+ nss = FIELD_GET(RATE_MCS_NSS_MSK, rate) + 1;
} else {
mvm->drv_rx_stats.legacy_frames++;
}
@@ -3665,8 +3662,7 @@ int rs_pretty_print_rate_v1(char *buf, int bufsz, const u32 rate)
if (rate & RATE_MCS_VHT_MSK_V1) {
type = "VHT";
mcs = rate & RATE_VHT_MCS_RATE_CODE_MSK;
- nss = ((rate & RATE_VHT_MCS_NSS_MSK)
- >> RATE_VHT_MCS_NSS_POS) + 1;
+ nss = FIELD_GET(RATE_MCS_NSS_MSK, rate) + 1;
} else if (rate & RATE_MCS_HT_MSK_V1) {
type = "HT";
mcs = rate & RATE_HT_MCS_INDEX_MSK_V1;
@@ -3675,8 +3671,7 @@ int rs_pretty_print_rate_v1(char *buf, int bufsz, const u32 rate)
} else if (rate & RATE_MCS_HE_MSK_V1) {
type = "HE";
mcs = rate & RATE_VHT_MCS_RATE_CODE_MSK;
- nss = ((rate & RATE_VHT_MCS_NSS_MSK)
- >> RATE_VHT_MCS_NSS_POS) + 1;
+ nss = FIELD_GET(RATE_MCS_NSS_MSK, rate) + 1;
} else {
type = "Unknown"; /* shouldn't happen */
}
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c
index 49ca1e168fc5..d2ce414879aa 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c
@@ -190,7 +190,7 @@ static u32 iwl_mvm_set_mac80211_rx_flag(struct iwl_mvm *mvm,
default:
/* Expected in monitor (not having the keys) */
if (!mvm->monitor_on)
- IWL_ERR(mvm, "Unhandled alg: 0x%x\n", rx_pkt_status);
+ IWL_WARN(mvm, "Unhandled alg: 0x%x\n", rx_pkt_status);
}
return 0;
@@ -253,8 +253,7 @@ static void iwl_mvm_rx_handle_tcm(struct iwl_mvm *mvm,
ARRAY_SIZE(thresh_tpt)))
return;
thr = thresh_tpt[rate_n_flags & RATE_VHT_MCS_RATE_CODE_MSK];
- thr *= 1 + ((rate_n_flags & RATE_VHT_MCS_NSS_MSK) >>
- RATE_VHT_MCS_NSS_POS);
+ thr *= 1 + FIELD_GET(RATE_MCS_NSS_MSK, rate_n_flags);
}
thr <<= ((rate_n_flags & RATE_MCS_CHAN_WIDTH_MSK_V1) >>
@@ -500,8 +499,7 @@ void iwl_mvm_rx_rx_mpdu(struct iwl_mvm *mvm, struct napi_struct *napi,
u8 stbc = (rate_n_flags & RATE_MCS_STBC_MSK) >>
RATE_MCS_STBC_POS;
rx_status->nss =
- ((rate_n_flags & RATE_VHT_MCS_NSS_MSK) >>
- RATE_VHT_MCS_NSS_POS) + 1;
+ FIELD_GET(RATE_MCS_NSS_MSK, rate_n_flags) + 1;
rx_status->rate_idx = rate_n_flags & RATE_VHT_MCS_RATE_CODE_MSK;
rx_status->encoding = RX_ENC_VHT;
rx_status->enc_flags |= stbc << RX_ENC_FLAG_STBC_SHIFT;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
index 549dbe0be223..91556d43735a 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright (C) 2012-2014, 2018-2022 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2023 Intel Corporation
* Copyright (C) 2013-2015 Intel Mobile Communications GmbH
* Copyright (C) 2015-2017 Intel Deutschland GmbH
*/
@@ -205,37 +205,47 @@ static int iwl_mvm_create_skb(struct iwl_mvm *mvm, struct sk_buff *skb,
return 0;
}
+/* put a TLV on the skb and return data pointer
+ *
+ * Also pad to 4 the len and zero out all data part
+ */
+static void *
+iwl_mvm_radiotap_put_tlv(struct sk_buff *skb, u16 type, u16 len)
+{
+ struct ieee80211_radiotap_tlv *tlv;
+
+ tlv = skb_put(skb, sizeof(*tlv));
+ tlv->type = cpu_to_le16(type);
+ tlv->len = cpu_to_le16(len);
+ return skb_put_zero(skb, ALIGN(len, 4));
+}
+
static void iwl_mvm_add_rtap_sniffer_config(struct iwl_mvm *mvm,
struct sk_buff *skb)
{
struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb);
- struct ieee80211_vendor_radiotap *radiotap;
- const int size = sizeof(*radiotap) + sizeof(__le16);
+ struct ieee80211_radiotap_vendor_content *radiotap;
+ const u16 vendor_data_len = sizeof(mvm->cur_aid);
if (!mvm->cur_aid)
return;
- /* ensure alignment */
- BUILD_BUG_ON((size + 2) % 4);
+ radiotap = iwl_mvm_radiotap_put_tlv(skb,
+ IEEE80211_RADIOTAP_VENDOR_NAMESPACE,
+ sizeof(*radiotap) + vendor_data_len);
- radiotap = skb_put(skb, size + 2);
- radiotap->align = 1;
/* Intel OUI */
radiotap->oui[0] = 0xf6;
radiotap->oui[1] = 0x54;
radiotap->oui[2] = 0x25;
/* radiotap sniffer config sub-namespace */
- radiotap->subns = 1;
- radiotap->present = 0x1;
- radiotap->len = size - sizeof(*radiotap);
- radiotap->pad = 2;
+ radiotap->oui_subtype = 1;
+ radiotap->vendor_type = 0;
/* fill the data now */
memcpy(radiotap->data, &mvm->cur_aid, sizeof(mvm->cur_aid));
- /* and clear the padding */
- memset(radiotap->data + sizeof(__le16), 0, radiotap->pad);
- rx_status->flag |= RX_FLAG_RADIOTAP_VENDOR_DATA;
+ rx_status->flag |= RX_FLAG_RADIOTAP_TLV_AT_END;
}
/* iwl_mvm_pass_packet_to_mac80211 - passes the packet for mac80211 */
@@ -443,7 +453,7 @@ static int iwl_mvm_rx_crypto(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
*/
if (!is_multicast_ether_addr(hdr->addr1) &&
!mvm->monitor_on && net_ratelimit())
- IWL_ERR(mvm, "Unhandled alg: 0x%x\n", status);
+ IWL_WARN(mvm, "Unhandled alg: 0x%x\n", status);
}
return 0;
@@ -1167,8 +1177,11 @@ static void iwl_mvm_flip_address(u8 *addr)
struct iwl_mvm_rx_phy_data {
enum iwl_rx_phy_info_type info_type;
- __le32 d0, d1, d2, d3;
+ __le32 d0, d1, d2, d3, eht_d4, d5;
__le16 d4;
+ bool with_data;
+ bool first_subframe;
+ __le32 rx_vec[4];
u32 rate_n_flags;
u32 gp2_on_air_rise;
@@ -1446,6 +1459,528 @@ static void iwl_mvm_decode_he_phy_data(struct iwl_mvm *mvm,
}
}
+#define LE32_DEC_ENC(value, dec_bits, enc_bits) \
+ le32_encode_bits(le32_get_bits(value, dec_bits), enc_bits)
+
+#define IWL_MVM_ENC_USIG_VALUE_MASK(usig, in_value, dec_bits, enc_bits) do { \
+ typeof(enc_bits) _enc_bits = enc_bits; \
+ typeof(usig) _usig = usig; \
+ (_usig)->mask |= cpu_to_le32(_enc_bits); \
+ (_usig)->value |= LE32_DEC_ENC(in_value, dec_bits, _enc_bits); \
+} while (0)
+
+#define __IWL_MVM_ENC_EHT_RU(rt_data, rt_ru, fw_data, fw_ru) \
+ eht->data[(rt_data)] |= \
+ (cpu_to_le32 \
+ (IEEE80211_RADIOTAP_EHT_DATA ## rt_data ## _RU_ALLOC_CC_ ## rt_ru ## _KNOWN) | \
+ LE32_DEC_ENC(data ## fw_data, \
+ IWL_RX_PHY_DATA ## fw_data ## _EHT_MU_EXT_RU_ALLOC_ ## fw_ru, \
+ IEEE80211_RADIOTAP_EHT_DATA ## rt_data ## _RU_ALLOC_CC_ ## rt_ru))
+
+#define _IWL_MVM_ENC_EHT_RU(rt_data, rt_ru, fw_data, fw_ru) \
+ __IWL_MVM_ENC_EHT_RU(rt_data, rt_ru, fw_data, fw_ru)
+
+#define IEEE80211_RADIOTAP_RU_DATA_1_1_1 1
+#define IEEE80211_RADIOTAP_RU_DATA_2_1_1 2
+#define IEEE80211_RADIOTAP_RU_DATA_1_1_2 2
+#define IEEE80211_RADIOTAP_RU_DATA_2_1_2 2
+#define IEEE80211_RADIOTAP_RU_DATA_1_2_1 3
+#define IEEE80211_RADIOTAP_RU_DATA_2_2_1 3
+#define IEEE80211_RADIOTAP_RU_DATA_1_2_2 3
+#define IEEE80211_RADIOTAP_RU_DATA_2_2_2 4
+
+#define IWL_RX_RU_DATA_A1 2
+#define IWL_RX_RU_DATA_A2 2
+#define IWL_RX_RU_DATA_B1 2
+#define IWL_RX_RU_DATA_B2 3
+#define IWL_RX_RU_DATA_C1 3
+#define IWL_RX_RU_DATA_C2 3
+#define IWL_RX_RU_DATA_D1 4
+#define IWL_RX_RU_DATA_D2 4
+
+#define IWL_MVM_ENC_EHT_RU(rt_ru, fw_ru) \
+ _IWL_MVM_ENC_EHT_RU(IEEE80211_RADIOTAP_RU_DATA_ ## rt_ru, \
+ rt_ru, \
+ IWL_RX_RU_DATA_ ## fw_ru, \
+ fw_ru)
+
+static void iwl_mvm_decode_eht_ext_mu(struct iwl_mvm *mvm,
+ struct iwl_mvm_rx_phy_data *phy_data,
+ struct ieee80211_rx_status *rx_status,
+ struct ieee80211_radiotap_eht *eht,
+ struct ieee80211_radiotap_eht_usig *usig)
+{
+ if (phy_data->with_data) {
+ __le32 data1 = phy_data->d1;
+ __le32 data2 = phy_data->d2;
+ __le32 data3 = phy_data->d3;
+ __le32 data4 = phy_data->eht_d4;
+ __le32 data5 = phy_data->d5;
+ u32 phy_bw = phy_data->rate_n_flags & RATE_MCS_CHAN_WIDTH_MSK;
+
+ IWL_MVM_ENC_USIG_VALUE_MASK(usig, data5,
+ IWL_RX_PHY_DATA5_EHT_TYPE_AND_COMP,
+ IEEE80211_RADIOTAP_EHT_USIG2_MU_B0_B1_PPDU_TYPE);
+ IWL_MVM_ENC_USIG_VALUE_MASK(usig, data5,
+ IWL_RX_PHY_DATA5_EHT_MU_PUNC_CH_CODE,
+ IEEE80211_RADIOTAP_EHT_USIG2_MU_B3_B7_PUNCTURED_INFO);
+ IWL_MVM_ENC_USIG_VALUE_MASK(usig, data4,
+ IWL_RX_PHY_DATA4_EHT_MU_EXT_SIGB_MCS,
+ IEEE80211_RADIOTAP_EHT_USIG2_MU_B9_B10_SIG_MCS);
+ IWL_MVM_ENC_USIG_VALUE_MASK
+ (usig, data1, IWL_RX_PHY_DATA1_EHT_MU_NUM_SIG_SYM_USIGA2,
+ IEEE80211_RADIOTAP_EHT_USIG2_MU_B11_B15_EHT_SIG_SYMBOLS);
+
+ eht->user_info[0] |=
+ cpu_to_le32(IEEE80211_RADIOTAP_EHT_USER_INFO_STA_ID_KNOWN) |
+ LE32_DEC_ENC(data5, IWL_RX_PHY_DATA5_EHT_MU_STA_ID_USR,
+ IEEE80211_RADIOTAP_EHT_USER_INFO_STA_ID);
+
+ eht->known |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_KNOWN_NR_NON_OFDMA_USERS_M);
+ eht->data[7] |= LE32_DEC_ENC
+ (data5, IWL_RX_PHY_DATA5_EHT_MU_NUM_USR_NON_OFDMA,
+ IEEE80211_RADIOTAP_EHT_DATA7_NUM_OF_NON_OFDMA_USERS);
+
+ /*
+ * Hardware labels the content channels/RU allocation values
+ * as follows:
+ * Content Channel 1 Content Channel 2
+ * 20 MHz: A1
+ * 40 MHz: A1 B1
+ * 80 MHz: A1 C1 B1 D1
+ * 160 MHz: A1 C1 A2 C2 B1 D1 B2 D2
+ * 320 MHz: A1 C1 A2 C2 A3 C3 A4 C4 B1 D1 B2 D2 B3 D3 B4 D4
+ *
+ * However firmware can only give us A1-D2, so the higher
+ * frequencies are missing.
+ */
+
+ switch (phy_bw) {
+ case RATE_MCS_CHAN_WIDTH_320:
+ /* additional values are missing in RX metadata */
+ case RATE_MCS_CHAN_WIDTH_160:
+ /* content channel 1 */
+ IWL_MVM_ENC_EHT_RU(1_2_1, A2);
+ IWL_MVM_ENC_EHT_RU(1_2_2, C2);
+ /* content channel 2 */
+ IWL_MVM_ENC_EHT_RU(2_2_1, B2);
+ IWL_MVM_ENC_EHT_RU(2_2_2, D2);
+ fallthrough;
+ case RATE_MCS_CHAN_WIDTH_80:
+ /* content channel 1 */
+ IWL_MVM_ENC_EHT_RU(1_1_2, C1);
+ /* content channel 2 */
+ IWL_MVM_ENC_EHT_RU(2_1_2, D1);
+ fallthrough;
+ case RATE_MCS_CHAN_WIDTH_40:
+ /* content channel 2 */
+ IWL_MVM_ENC_EHT_RU(2_1_1, B1);
+ fallthrough;
+ case RATE_MCS_CHAN_WIDTH_20:
+ IWL_MVM_ENC_EHT_RU(1_1_1, A1);
+ break;
+ }
+ } else {
+ __le32 usig_a1 = phy_data->rx_vec[0];
+ __le32 usig_a2 = phy_data->rx_vec[1];
+
+ IWL_MVM_ENC_USIG_VALUE_MASK(usig, usig_a1,
+ IWL_RX_USIG_A1_DISREGARD,
+ IEEE80211_RADIOTAP_EHT_USIG1_MU_B20_B24_DISREGARD);
+ IWL_MVM_ENC_USIG_VALUE_MASK(usig, usig_a1,
+ IWL_RX_USIG_A1_VALIDATE,
+ IEEE80211_RADIOTAP_EHT_USIG1_MU_B25_VALIDATE);
+ IWL_MVM_ENC_USIG_VALUE_MASK(usig, usig_a2,
+ IWL_RX_USIG_A2_EHT_PPDU_TYPE,
+ IEEE80211_RADIOTAP_EHT_USIG2_MU_B0_B1_PPDU_TYPE);
+ IWL_MVM_ENC_USIG_VALUE_MASK(usig, usig_a2,
+ IWL_RX_USIG_A2_EHT_USIG2_VALIDATE_B2,
+ IEEE80211_RADIOTAP_EHT_USIG2_MU_B2_VALIDATE);
+ IWL_MVM_ENC_USIG_VALUE_MASK(usig, usig_a2,
+ IWL_RX_USIG_A2_EHT_PUNC_CHANNEL,
+ IEEE80211_RADIOTAP_EHT_USIG2_MU_B3_B7_PUNCTURED_INFO);
+ IWL_MVM_ENC_USIG_VALUE_MASK(usig, usig_a2,
+ IWL_RX_USIG_A2_EHT_USIG2_VALIDATE_B8,
+ IEEE80211_RADIOTAP_EHT_USIG2_MU_B8_VALIDATE);
+ IWL_MVM_ENC_USIG_VALUE_MASK(usig, usig_a2,
+ IWL_RX_USIG_A2_EHT_SIG_MCS,
+ IEEE80211_RADIOTAP_EHT_USIG2_MU_B9_B10_SIG_MCS);
+ IWL_MVM_ENC_USIG_VALUE_MASK
+ (usig, usig_a2, IWL_RX_USIG_A2_EHT_SIG_SYM_NUM,
+ IEEE80211_RADIOTAP_EHT_USIG2_MU_B11_B15_EHT_SIG_SYMBOLS);
+ IWL_MVM_ENC_USIG_VALUE_MASK(usig, usig_a2,
+ IWL_RX_USIG_A2_EHT_CRC_OK,
+ IEEE80211_RADIOTAP_EHT_USIG2_MU_B16_B19_CRC);
+ }
+}
+
+static void iwl_mvm_decode_eht_ext_tb(struct iwl_mvm *mvm,
+ struct iwl_mvm_rx_phy_data *phy_data,
+ struct ieee80211_rx_status *rx_status,
+ struct ieee80211_radiotap_eht *eht,
+ struct ieee80211_radiotap_eht_usig *usig)
+{
+ if (phy_data->with_data) {
+ __le32 data5 = phy_data->d5;
+
+ IWL_MVM_ENC_USIG_VALUE_MASK(usig, data5,
+ IWL_RX_PHY_DATA5_EHT_TYPE_AND_COMP,
+ IEEE80211_RADIOTAP_EHT_USIG2_TB_B0_B1_PPDU_TYPE);
+ IWL_MVM_ENC_USIG_VALUE_MASK(usig, data5,
+ IWL_RX_PHY_DATA5_EHT_TB_SPATIAL_REUSE1,
+ IEEE80211_RADIOTAP_EHT_USIG2_TB_B3_B6_SPATIAL_REUSE_1);
+
+ IWL_MVM_ENC_USIG_VALUE_MASK(usig, data5,
+ IWL_RX_PHY_DATA5_EHT_TB_SPATIAL_REUSE2,
+ IEEE80211_RADIOTAP_EHT_USIG2_TB_B7_B10_SPATIAL_REUSE_2);
+ } else {
+ __le32 usig_a1 = phy_data->rx_vec[0];
+ __le32 usig_a2 = phy_data->rx_vec[1];
+
+ IWL_MVM_ENC_USIG_VALUE_MASK(usig, usig_a1,
+ IWL_RX_USIG_A1_DISREGARD,
+ IEEE80211_RADIOTAP_EHT_USIG1_TB_B20_B25_DISREGARD);
+ IWL_MVM_ENC_USIG_VALUE_MASK(usig, usig_a2,
+ IWL_RX_USIG_A2_EHT_PPDU_TYPE,
+ IEEE80211_RADIOTAP_EHT_USIG2_TB_B0_B1_PPDU_TYPE);
+ IWL_MVM_ENC_USIG_VALUE_MASK(usig, usig_a2,
+ IWL_RX_USIG_A2_EHT_USIG2_VALIDATE_B2,
+ IEEE80211_RADIOTAP_EHT_USIG2_TB_B2_VALIDATE);
+ IWL_MVM_ENC_USIG_VALUE_MASK(usig, usig_a2,
+ IWL_RX_USIG_A2_EHT_TRIG_SPATIAL_REUSE_1,
+ IEEE80211_RADIOTAP_EHT_USIG2_TB_B3_B6_SPATIAL_REUSE_1);
+ IWL_MVM_ENC_USIG_VALUE_MASK(usig, usig_a2,
+ IWL_RX_USIG_A2_EHT_TRIG_SPATIAL_REUSE_2,
+ IEEE80211_RADIOTAP_EHT_USIG2_TB_B7_B10_SPATIAL_REUSE_2);
+ IWL_MVM_ENC_USIG_VALUE_MASK(usig, usig_a2,
+ IWL_RX_USIG_A2_EHT_TRIG_USIG2_DISREGARD,
+ IEEE80211_RADIOTAP_EHT_USIG2_TB_B11_B15_DISREGARD);
+ IWL_MVM_ENC_USIG_VALUE_MASK(usig, usig_a2,
+ IWL_RX_USIG_A2_EHT_CRC_OK,
+ IEEE80211_RADIOTAP_EHT_USIG2_TB_B16_B19_CRC);
+ }
+}
+
+static void iwl_mvm_decode_eht_ru(struct iwl_mvm *mvm,
+ struct ieee80211_rx_status *rx_status,
+ struct ieee80211_radiotap_eht *eht)
+{
+ u32 ru = le32_get_bits(eht->data[8],
+ IEEE80211_RADIOTAP_EHT_DATA8_RU_ALLOC_TB_FMT_B7_B1);
+ enum nl80211_eht_ru_alloc nl_ru;
+
+ /* Using D1.5 Table 9-53a - Encoding of PS160 and RU Allocation subfields
+ * in an EHT variant User Info field
+ */
+
+ switch (ru) {
+ case 0 ... 36:
+ nl_ru = NL80211_RATE_INFO_EHT_RU_ALLOC_26;
+ break;
+ case 37 ... 52:
+ nl_ru = NL80211_RATE_INFO_EHT_RU_ALLOC_52;
+ break;
+ case 53 ... 60:
+ nl_ru = NL80211_RATE_INFO_EHT_RU_ALLOC_106;
+ break;
+ case 61 ... 64:
+ nl_ru = NL80211_RATE_INFO_EHT_RU_ALLOC_242;
+ break;
+ case 65 ... 66:
+ nl_ru = NL80211_RATE_INFO_EHT_RU_ALLOC_484;
+ break;
+ case 67:
+ nl_ru = NL80211_RATE_INFO_EHT_RU_ALLOC_996;
+ break;
+ case 68:
+ nl_ru = NL80211_RATE_INFO_EHT_RU_ALLOC_2x996;
+ break;
+ case 69:
+ nl_ru = NL80211_RATE_INFO_EHT_RU_ALLOC_4x996;
+ break;
+ case 70 ... 81:
+ nl_ru = NL80211_RATE_INFO_EHT_RU_ALLOC_52P26;
+ break;
+ case 82 ... 89:
+ nl_ru = NL80211_RATE_INFO_EHT_RU_ALLOC_106P26;
+ break;
+ case 90 ... 93:
+ nl_ru = NL80211_RATE_INFO_EHT_RU_ALLOC_484P242;
+ break;
+ case 94 ... 95:
+ nl_ru = NL80211_RATE_INFO_EHT_RU_ALLOC_996P484;
+ break;
+ case 96 ... 99:
+ nl_ru = NL80211_RATE_INFO_EHT_RU_ALLOC_996P484P242;
+ break;
+ case 100 ... 103:
+ nl_ru = NL80211_RATE_INFO_EHT_RU_ALLOC_2x996P484;
+ break;
+ case 104:
+ nl_ru = NL80211_RATE_INFO_EHT_RU_ALLOC_3x996;
+ break;
+ case 105 ... 106:
+ nl_ru = NL80211_RATE_INFO_EHT_RU_ALLOC_3x996P484;
+ break;
+ default:
+ return;
+ }
+
+ rx_status->bw = RATE_INFO_BW_EHT_RU;
+ rx_status->eht.ru = nl_ru;
+}
+
+static void iwl_mvm_decode_eht_phy_data(struct iwl_mvm *mvm,
+ struct iwl_mvm_rx_phy_data *phy_data,
+ struct ieee80211_rx_status *rx_status,
+ struct ieee80211_radiotap_eht *eht,
+ struct ieee80211_radiotap_eht_usig *usig)
+
+{
+ __le32 data0 = phy_data->d0;
+ __le32 data1 = phy_data->d1;
+ __le32 usig_a1 = phy_data->rx_vec[0];
+ u8 info_type = phy_data->info_type;
+
+ /* Not in EHT range */
+ if (info_type < IWL_RX_PHY_INFO_TYPE_EHT_MU ||
+ info_type > IWL_RX_PHY_INFO_TYPE_EHT_TB_EXT)
+ return;
+
+ usig->common |= cpu_to_le32
+ (IEEE80211_RADIOTAP_EHT_USIG_COMMON_UL_DL_KNOWN |
+ IEEE80211_RADIOTAP_EHT_USIG_COMMON_BSS_COLOR_KNOWN);
+ if (phy_data->with_data) {
+ usig->common |= LE32_DEC_ENC(data0,
+ IWL_RX_PHY_DATA0_EHT_UPLINK,
+ IEEE80211_RADIOTAP_EHT_USIG_COMMON_UL_DL);
+ usig->common |= LE32_DEC_ENC(data0,
+ IWL_RX_PHY_DATA0_EHT_BSS_COLOR_MASK,
+ IEEE80211_RADIOTAP_EHT_USIG_COMMON_BSS_COLOR);
+ } else {
+ usig->common |= LE32_DEC_ENC(usig_a1,
+ IWL_RX_USIG_A1_UL_FLAG,
+ IEEE80211_RADIOTAP_EHT_USIG_COMMON_UL_DL);
+ usig->common |= LE32_DEC_ENC(usig_a1,
+ IWL_RX_USIG_A1_BSS_COLOR,
+ IEEE80211_RADIOTAP_EHT_USIG_COMMON_BSS_COLOR);
+ }
+
+ eht->known |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_KNOWN_SPATIAL_REUSE);
+ eht->data[0] |= LE32_DEC_ENC(data0,
+ IWL_RX_PHY_DATA0_ETH_SPATIAL_REUSE_MASK,
+ IEEE80211_RADIOTAP_EHT_DATA0_SPATIAL_REUSE);
+
+ /* All RU allocating size/index is in TB format */
+ eht->known |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_KNOWN_RU_ALLOC_TB_FMT);
+ eht->data[8] |= LE32_DEC_ENC(data0, IWL_RX_PHY_DATA0_EHT_PS160,
+ IEEE80211_RADIOTAP_EHT_DATA8_RU_ALLOC_TB_FMT_PS_160);
+ eht->data[8] |= LE32_DEC_ENC(data1, IWL_RX_PHY_DATA1_EHT_B0,
+ IEEE80211_RADIOTAP_EHT_DATA8_RU_ALLOC_TB_FMT_B0);
+ eht->data[8] |= LE32_DEC_ENC(data1, IWL_RX_PHY_DATA1_EHT_RU_B1_B7_ALLOC,
+ IEEE80211_RADIOTAP_EHT_DATA8_RU_ALLOC_TB_FMT_B7_B1);
+
+ iwl_mvm_decode_eht_ru(mvm, rx_status, eht);
+
+ /* We only get here in case of IWL_RX_MPDU_PHY_TSF_OVERLOAD is set
+ * which is on only in case of monitor mode so no need to check monitor
+ * mode
+ */
+ eht->known |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_KNOWN_PRIMARY_80);
+ eht->data[1] |=
+ le32_encode_bits(mvm->monitor_p80,
+ IEEE80211_RADIOTAP_EHT_DATA1_PRIMARY_80);
+
+ usig->common |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_USIG_COMMON_TXOP_KNOWN);
+ if (phy_data->with_data)
+ usig->common |= LE32_DEC_ENC(data0, IWL_RX_PHY_DATA0_EHT_TXOP_DUR_MASK,
+ IEEE80211_RADIOTAP_EHT_USIG_COMMON_TXOP);
+ else
+ usig->common |= LE32_DEC_ENC(usig_a1, IWL_RX_USIG_A1_TXOP_DURATION,
+ IEEE80211_RADIOTAP_EHT_USIG_COMMON_TXOP);
+
+ eht->known |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_KNOWN_LDPC_EXTRA_SYM_OM);
+ eht->data[0] |= LE32_DEC_ENC(data0, IWL_RX_PHY_DATA0_EHT_LDPC_EXT_SYM,
+ IEEE80211_RADIOTAP_EHT_DATA0_LDPC_EXTRA_SYM_OM);
+
+ eht->known |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_KNOWN_PRE_PADD_FACOR_OM);
+ eht->data[0] |= LE32_DEC_ENC(data0, IWL_RX_PHY_DATA0_EHT_PRE_FEC_PAD_MASK,
+ IEEE80211_RADIOTAP_EHT_DATA0_PRE_PADD_FACOR_OM);
+
+ eht->known |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_KNOWN_PE_DISAMBIGUITY_OM);
+ eht->data[0] |= LE32_DEC_ENC(data0, IWL_RX_PHY_DATA0_EHT_PE_DISAMBIG,
+ IEEE80211_RADIOTAP_EHT_DATA0_PE_DISAMBIGUITY_OM);
+
+ /* TODO: what about IWL_RX_PHY_DATA0_EHT_BW320_SLOT */
+
+ if (!le32_get_bits(data0, IWL_RX_PHY_DATA0_EHT_SIGA_CRC_OK))
+ usig->common |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_USIG_COMMON_BAD_USIG_CRC);
+
+ usig->common |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_USIG_COMMON_PHY_VER_KNOWN);
+ usig->common |= LE32_DEC_ENC(data0, IWL_RX_PHY_DATA0_EHT_PHY_VER,
+ IEEE80211_RADIOTAP_EHT_USIG_COMMON_PHY_VER);
+
+ /*
+ * TODO: what about TB - IWL_RX_PHY_DATA1_EHT_TB_PILOT_TYPE,
+ * IWL_RX_PHY_DATA1_EHT_TB_LOW_SS
+ */
+
+ eht->known |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_KNOWN_EHT_LTF);
+ eht->data[0] |= LE32_DEC_ENC(data1, IWL_RX_PHY_DATA1_EHT_SIG_LTF_NUM,
+ IEEE80211_RADIOTAP_EHT_DATA0_EHT_LTF);
+
+ if (info_type == IWL_RX_PHY_INFO_TYPE_EHT_TB_EXT ||
+ info_type == IWL_RX_PHY_INFO_TYPE_EHT_TB)
+ iwl_mvm_decode_eht_ext_tb(mvm, phy_data, rx_status, eht, usig);
+
+ if (info_type == IWL_RX_PHY_INFO_TYPE_EHT_MU_EXT ||
+ info_type == IWL_RX_PHY_INFO_TYPE_EHT_MU)
+ iwl_mvm_decode_eht_ext_mu(mvm, phy_data, rx_status, eht, usig);
+}
+
+static void iwl_mvm_rx_eht(struct iwl_mvm *mvm, struct sk_buff *skb,
+ struct iwl_mvm_rx_phy_data *phy_data,
+ int queue)
+{
+ struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb);
+
+ struct ieee80211_radiotap_eht *eht;
+ struct ieee80211_radiotap_eht_usig *usig;
+ size_t eht_len = sizeof(*eht);
+
+ u32 rate_n_flags = phy_data->rate_n_flags;
+ u32 he_type = rate_n_flags & RATE_MCS_HE_TYPE_MSK;
+ /* EHT and HE have the same valus for LTF */
+ u8 ltf = IEEE80211_RADIOTAP_HE_DATA5_LTF_SIZE_UNKNOWN;
+ u16 phy_info = phy_data->phy_info;
+ u32 bw;
+
+ /* u32 for 1 user_info */
+ if (phy_data->with_data)
+ eht_len += sizeof(u32);
+
+ eht = iwl_mvm_radiotap_put_tlv(skb, IEEE80211_RADIOTAP_EHT, eht_len);
+
+ usig = iwl_mvm_radiotap_put_tlv(skb, IEEE80211_RADIOTAP_EHT_USIG,
+ sizeof(*usig));
+ rx_status->flag |= RX_FLAG_RADIOTAP_TLV_AT_END;
+ usig->common |=
+ cpu_to_le32(IEEE80211_RADIOTAP_EHT_USIG_COMMON_BW_KNOWN);
+
+ /* specific handling for 320MHz */
+ bw = FIELD_GET(RATE_MCS_CHAN_WIDTH_MSK, rate_n_flags);
+ if (bw == RATE_MCS_CHAN_WIDTH_320_VAL)
+ bw += FIELD_GET(IWL_RX_PHY_DATA0_EHT_BW320_SLOT,
+ le32_to_cpu(phy_data->d0));
+
+ usig->common |= cpu_to_le32
+ (FIELD_PREP(IEEE80211_RADIOTAP_EHT_USIG_COMMON_BW, bw));
+
+ /* report the AMPDU-EOF bit on single frames */
+ if (!queue && !(phy_info & IWL_RX_MPDU_PHY_AMPDU)) {
+ rx_status->flag |= RX_FLAG_AMPDU_DETAILS;
+ rx_status->flag |= RX_FLAG_AMPDU_EOF_BIT_KNOWN;
+ if (phy_data->d0 & cpu_to_le32(IWL_RX_PHY_DATA0_EHT_DELIM_EOF))
+ rx_status->flag |= RX_FLAG_AMPDU_EOF_BIT;
+ }
+
+ /* update aggregation data for monitor sake on default queue */
+ if (!queue && (phy_info & IWL_RX_MPDU_PHY_TSF_OVERLOAD) &&
+ (phy_info & IWL_RX_MPDU_PHY_AMPDU) && phy_data->first_subframe) {
+ rx_status->flag |= RX_FLAG_AMPDU_EOF_BIT_KNOWN;
+ if (phy_data->d0 & cpu_to_le32(IWL_RX_PHY_DATA0_EHT_DELIM_EOF))
+ rx_status->flag |= RX_FLAG_AMPDU_EOF_BIT;
+ }
+
+ if (phy_info & IWL_RX_MPDU_PHY_TSF_OVERLOAD)
+ iwl_mvm_decode_eht_phy_data(mvm, phy_data, rx_status, eht, usig);
+
+#define CHECK_TYPE(F) \
+ BUILD_BUG_ON(IEEE80211_RADIOTAP_HE_DATA1_FORMAT_ ## F != \
+ (RATE_MCS_HE_TYPE_ ## F >> RATE_MCS_HE_TYPE_POS))
+
+ CHECK_TYPE(SU);
+ CHECK_TYPE(EXT_SU);
+ CHECK_TYPE(MU);
+ CHECK_TYPE(TRIG);
+
+ switch (FIELD_GET(RATE_MCS_HE_GI_LTF_MSK, rate_n_flags)) {
+ case 0:
+ if (he_type == RATE_MCS_HE_TYPE_TRIG) {
+ rx_status->eht.gi = NL80211_RATE_INFO_EHT_GI_1_6;
+ ltf = IEEE80211_RADIOTAP_HE_DATA5_LTF_SIZE_1X;
+ } else {
+ rx_status->eht.gi = NL80211_RATE_INFO_EHT_GI_0_8;
+ ltf = IEEE80211_RADIOTAP_HE_DATA5_LTF_SIZE_2X;
+ }
+ break;
+ case 1:
+ rx_status->eht.gi = NL80211_RATE_INFO_EHT_GI_1_6;
+ ltf = IEEE80211_RADIOTAP_HE_DATA5_LTF_SIZE_2X;
+ break;
+ case 2:
+ ltf = IEEE80211_RADIOTAP_HE_DATA5_LTF_SIZE_4X;
+ if (he_type == RATE_MCS_HE_TYPE_TRIG)
+ rx_status->eht.gi = NL80211_RATE_INFO_EHT_GI_3_2;
+ else
+ rx_status->eht.gi = NL80211_RATE_INFO_EHT_GI_0_8;
+ break;
+ case 3:
+ if (he_type != RATE_MCS_HE_TYPE_TRIG) {
+ ltf = IEEE80211_RADIOTAP_HE_DATA5_LTF_SIZE_4X;
+ rx_status->eht.gi = NL80211_RATE_INFO_EHT_GI_3_2;
+ }
+ break;
+ default:
+ /* nothing here */
+ break;
+ }
+
+ if (ltf != IEEE80211_RADIOTAP_HE_DATA5_LTF_SIZE_UNKNOWN) {
+ eht->known |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_KNOWN_GI);
+ eht->data[0] |= cpu_to_le32
+ (FIELD_PREP(IEEE80211_RADIOTAP_EHT_DATA0_LTF,
+ ltf) |
+ FIELD_PREP(IEEE80211_RADIOTAP_EHT_DATA0_GI,
+ rx_status->eht.gi));
+ }
+
+
+ if (!phy_data->with_data) {
+ eht->known |= cpu_to_le32(IEEE80211_RADIOTAP_EHT_KNOWN_NSS_S |
+ IEEE80211_RADIOTAP_EHT_KNOWN_BEAMFORMED_S);
+ eht->data[7] |=
+ le32_encode_bits(le32_get_bits(phy_data->rx_vec[2],
+ RX_NO_DATA_RX_VEC2_EHT_NSTS_MSK),
+ IEEE80211_RADIOTAP_EHT_DATA7_NSS_S);
+ if (rate_n_flags & RATE_MCS_BF_MSK)
+ eht->data[7] |=
+ cpu_to_le32(IEEE80211_RADIOTAP_EHT_DATA7_BEAMFORMED_S);
+ } else {
+ eht->user_info[0] |=
+ cpu_to_le32(IEEE80211_RADIOTAP_EHT_USER_INFO_MCS_KNOWN |
+ IEEE80211_RADIOTAP_EHT_USER_INFO_CODING_KNOWN |
+ IEEE80211_RADIOTAP_EHT_USER_INFO_NSS_KNOWN_O |
+ IEEE80211_RADIOTAP_EHT_USER_INFO_BEAMFORMING_KNOWN_O |
+ IEEE80211_RADIOTAP_EHT_USER_INFO_DATA_FOR_USER);
+
+ if (rate_n_flags & RATE_MCS_BF_MSK)
+ eht->user_info[0] |=
+ cpu_to_le32(IEEE80211_RADIOTAP_EHT_USER_INFO_BEAMFORMING_O);
+
+ if (rate_n_flags & RATE_MCS_LDPC_MSK)
+ eht->user_info[0] |=
+ cpu_to_le32(IEEE80211_RADIOTAP_EHT_USER_INFO_CODING);
+
+ eht->user_info[0] |= cpu_to_le32
+ (FIELD_PREP(IEEE80211_RADIOTAP_EHT_USER_INFO_MCS,
+ FIELD_GET(RATE_VHT_MCS_RATE_CODE_MSK,
+ rate_n_flags)) |
+ FIELD_PREP(IEEE80211_RADIOTAP_EHT_USER_INFO_NSS_O,
+ FIELD_GET(RATE_MCS_NSS_MSK, rate_n_flags)));
+ }
+}
+
static void iwl_mvm_rx_he(struct iwl_mvm *mvm, struct sk_buff *skb,
struct iwl_mvm_rx_phy_data *phy_data,
int queue)
@@ -1497,15 +2032,10 @@ static void iwl_mvm_rx_he(struct iwl_mvm *mvm, struct sk_buff *skb,
/* update aggregation data for monitor sake on default queue */
if (!queue && (phy_info & IWL_RX_MPDU_PHY_TSF_OVERLOAD) &&
- (phy_info & IWL_RX_MPDU_PHY_AMPDU)) {
- bool toggle_bit = phy_info & IWL_RX_MPDU_PHY_AMPDU_TOGGLE;
-
- /* toggle is switched whenever new aggregation starts */
- if (toggle_bit != mvm->ampdu_toggle) {
- rx_status->flag |= RX_FLAG_AMPDU_EOF_BIT_KNOWN;
- if (phy_data->d0 & cpu_to_le32(IWL_RX_PHY_DATA0_HE_DELIM_EOF))
- rx_status->flag |= RX_FLAG_AMPDU_EOF_BIT;
- }
+ (phy_info & IWL_RX_MPDU_PHY_AMPDU) && phy_data->first_subframe) {
+ rx_status->flag |= RX_FLAG_AMPDU_EOF_BIT_KNOWN;
+ if (phy_data->d0 & cpu_to_le32(IWL_RX_PHY_DATA0_EHT_DELIM_EOF))
+ rx_status->flag |= RX_FLAG_AMPDU_EOF_BIT;
}
if (he_type == RATE_MCS_HE_TYPE_EXT_SU &&
@@ -1593,6 +2123,10 @@ static void iwl_mvm_decode_lsig(struct sk_buff *skb,
case IWL_RX_PHY_INFO_TYPE_HE_MU:
case IWL_RX_PHY_INFO_TYPE_HE_MU_EXT:
case IWL_RX_PHY_INFO_TYPE_HE_TB:
+ case IWL_RX_PHY_INFO_TYPE_EHT_MU:
+ case IWL_RX_PHY_INFO_TYPE_EHT_TB:
+ case IWL_RX_PHY_INFO_TYPE_EHT_MU_EXT:
+ case IWL_RX_PHY_INFO_TYPE_EHT_TB_EXT:
lsig = skb_put(skb, sizeof(*lsig));
lsig->data1 = cpu_to_le16(IEEE80211_RADIOTAP_LSIG_DATA1_LENGTH_KNOWN);
lsig->data2 = le16_encode_bits(le32_get_bits(phy_data->d1,
@@ -1689,6 +2223,10 @@ static void iwl_mvm_rx_fill_status(struct iwl_mvm *mvm,
iwl_mvm_get_signal_strength(mvm, rx_status, rate_n_flags,
phy_data->energy_a, phy_data->energy_b);
+ /* using TLV format and must be after all fixed len fields */
+ if (format == RATE_MCS_EHT_MSK)
+ iwl_mvm_rx_eht(mvm, skb, phy_data, queue);
+
if (unlikely(mvm->monitor_on))
iwl_mvm_add_rtap_sniffer_config(mvm, skb);
@@ -1788,6 +2326,8 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi,
phy_data.d1 = desc->v3.phy_data1;
phy_data.d2 = desc->v3.phy_data2;
phy_data.d3 = desc->v3.phy_data3;
+ phy_data.eht_d4 = desc->phy_eht_data4;
+ phy_data.d5 = desc->v3.phy_data5;
} else {
phy_data.rate_n_flags = le32_to_cpu(desc->v1.rate_n_flags);
phy_data.channel = desc->v1.channel;
@@ -1817,6 +2357,7 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi,
return;
}
+ phy_data.with_data = true;
phy_data.phy_info = le16_to_cpu(desc->phy_info);
phy_data.d4 = desc->phy_data4;
@@ -1897,6 +2438,7 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi,
if (mvm->ampdu_ref == 0)
mvm->ampdu_ref++;
mvm->ampdu_toggle = toggle_bit;
+ phy_data.first_subframe = true;
}
rx_status->ampdu_reference = mvm->ampdu_ref;
}
@@ -2079,6 +2621,7 @@ void iwl_mvm_rx_monitor_no_data(struct iwl_mvm *mvm, struct napi_struct *napi,
phy_data.energy_a = u32_get_bits(rssi, RX_NO_DATA_CHAIN_A_MSK);
phy_data.energy_b = u32_get_bits(rssi, RX_NO_DATA_CHAIN_B_MSK);
phy_data.channel = u32_get_bits(rssi, RX_NO_DATA_CHANNEL_MSK);
+ phy_data.with_data = false;
if (iwl_fw_lookup_notif_ver(mvm->fw, DATA_PATH_GROUP,
RX_NO_DATA_NOTIF, 0) < 2) {
@@ -2097,6 +2640,7 @@ void iwl_mvm_rx_monitor_no_data(struct iwl_mvm *mvm, struct napi_struct *napi,
sizeof(struct iwl_rx_no_data_ver_3)))
/* invalid len for ver 3 */
return;
+ memcpy(phy_data.rx_vec, desc->rx_vec, sizeof(phy_data.rx_vec));
} else {
if (format == RATE_MCS_EHT_MSK)
/* no support for EHT before version 3 API */
@@ -2123,7 +2667,7 @@ void iwl_mvm_rx_monitor_no_data(struct iwl_mvm *mvm, struct napi_struct *napi,
IEEE80211_RADIOTAP_ZERO_LEN_PSDU_SOUNDING;
break;
case RX_NO_DATA_INFO_TYPE_MU_UNMATCHED:
- case RX_NO_DATA_INFO_TYPE_HE_TB_UNMATCHED:
+ case RX_NO_DATA_INFO_TYPE_TB_UNMATCHED:
rx_status->zero_length_psdu_type =
IEEE80211_RADIOTAP_ZERO_LEN_PSDU_NOT_CAPTURED;
break;
@@ -2142,11 +2686,8 @@ void iwl_mvm_rx_monitor_no_data(struct iwl_mvm *mvm, struct napi_struct *napi,
*
* We mark it as mac header, for upper layers to know where
* all radio tap header ends.
- *
- * Since data doesn't move data while putting data on skb and that is
- * the only way we use, data + len is the next place that hdr would be put
*/
- skb_set_mac_header(skb, skb->len);
+ skb_reset_mac_header(skb);
/*
* Override the nss from the rx_vec since the rate_n_flags has
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
index 9813d7fa1800..a6d69885cd3f 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
@@ -1396,8 +1396,8 @@ void iwl_mvm_hwrate_to_tx_rate(u32 rate_n_flags,
r->idx = rate;
} else if (format == RATE_MCS_VHT_MSK) {
ieee80211_rate_set_vht(r, rate,
- ((rate_n_flags & RATE_MCS_NSS_MSK) >>
- RATE_MCS_NSS_POS) + 1);
+ FIELD_GET(RATE_MCS_NSS_MSK,
+ rate_n_flags) + 1);
r->flags |= IEEE80211_TX_RC_VHT_MCS;
} else if (format == RATE_MCS_HE_MSK) {
/* mac80211 cannot do this without ieee80211_tx_status_ext()
@@ -1428,8 +1428,7 @@ void iwl_mvm_hwrate_to_tx_rate_v1(u32 rate_n_flags,
} else if (rate_n_flags & RATE_MCS_VHT_MSK_V1) {
ieee80211_rate_set_vht(
r, rate_n_flags & RATE_VHT_MCS_RATE_CODE_MSK,
- ((rate_n_flags & RATE_VHT_MCS_NSS_MSK) >>
- RATE_VHT_MCS_NSS_POS) + 1);
+ FIELD_GET(RATE_MCS_NSS_MSK, rate_n_flags) + 1);
r->flags |= IEEE80211_TX_RC_VHT_MCS;
} else {
r->idx = iwl_mvm_legacy_rate_to_mac80211_idx(rate_n_flags,
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
index 99768d6a6032..8aa8a678475c 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
@@ -1194,6 +1194,11 @@ static const struct iwl_dev_info iwl_dev_info_table[] = {
IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_CDB, IWL_CFG_IS_JACKET,
iwl_cfg_bnj_a0_fm4_a0, iwl_bz_name),
_IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY,
+ IWL_CFG_MAC_TYPE_GL, SILICON_B_STEP,
+ IWL_CFG_RF_TYPE_FM, IWL_CFG_ANY,
+ IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_CDB, IWL_CFG_IS_JACKET,
+ iwl_cfg_bnj_b0_fm4_b0, iwl_bz_name),
+ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY,
IWL_CFG_MAC_TYPE_GL, IWL_CFG_ANY,
IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY,
IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB, IWL_CFG_IS_JACKET,
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
index 94f40c4d2421..1e263154e9eb 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright (C) 2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2021 Intel Corporation
+ * Copyright (C) 2018-2022 Intel Corporation
*/
#include "iwl-trans.h"
#include "iwl-prph.h"
@@ -277,6 +277,9 @@ static void iwl_pcie_get_rf_name(struct iwl_trans *trans)
case CSR_HW_RFID_TYPE(CSR_HW_RF_ID_TYPE_HRCDB):
pos = scnprintf(buf, buflen, "HRCDB");
break;
+ case CSR_HW_RFID_TYPE(CSR_HW_RF_ID_TYPE_MS):
+ pos = scnprintf(buf, buflen, "MS");
+ break;
default:
return;
}
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 4cc4eaf80b14..f4bdc243ea0d 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -1534,37 +1534,38 @@ static void mac80211_hwsim_add_vendor_rtap(struct sk_buff *skb)
* the values accordingly.
*/
#ifdef HWSIM_RADIOTAP_OUI
- struct ieee80211_vendor_radiotap *rtap;
+ struct ieee80211_radiotap_vendor_tlv *rtap;
+ static const char vendor_data[8] = "ABCDEFGH";
+
+ // Make sure no padding is needed
+ BUILD_BUG_ON(sizeof(vendor_data) % 4);
+ /* this is last radiotap info before the mac header, so
+ * skb_reset_mac_header for mac8022 to know the end of
+ * the radiotap TLV/beginning of the 802.11 header
+ */
+ skb_reset_mac_header(skb);
/*
* Note that this code requires the headroom in the SKB
* that was allocated earlier.
*/
- rtap = skb_push(skb, sizeof(*rtap) + 8 + 4);
- rtap->oui[0] = HWSIM_RADIOTAP_OUI[0];
- rtap->oui[1] = HWSIM_RADIOTAP_OUI[1];
- rtap->oui[2] = HWSIM_RADIOTAP_OUI[2];
- rtap->subns = 127;
-
- /*
- * Radiotap vendor namespaces can (and should) also be
- * split into fields by using the standard radiotap
- * presence bitmap mechanism. Use just BIT(0) here for
- * the presence bitmap.
- */
- rtap->present = BIT(0);
- /* We have 8 bytes of (dummy) data */
- rtap->len = 8;
- /* For testing, also require it to be aligned */
- rtap->align = 8;
- /* And also test that padding works, 4 bytes */
- rtap->pad = 4;
- /* push the data */
- memcpy(rtap->data, "ABCDEFGH", 8);
- /* make sure to clear padding, mac80211 doesn't */
- memset(rtap->data + 8, 0, 4);
-
- IEEE80211_SKB_RXCB(skb)->flag |= RX_FLAG_RADIOTAP_VENDOR_DATA;
+ rtap = skb_push(skb, sizeof(*rtap) + sizeof(vendor_data));
+
+ rtap->len = cpu_to_le16(sizeof(*rtap) -
+ sizeof(struct ieee80211_radiotap_tlv) +
+ sizeof(vendor_data));
+ rtap->type = cpu_to_le16(IEEE80211_RADIOTAP_VENDOR_NAMESPACE);
+
+ rtap->content.oui[0] = HWSIM_RADIOTAP_OUI[0];
+ rtap->content.oui[1] = HWSIM_RADIOTAP_OUI[1];
+ rtap->content.oui[2] = HWSIM_RADIOTAP_OUI[2];
+ rtap->content.oui_subtype = 127;
+ /* clear reserved field */
+ rtap->content.reserved = 0;
+ rtap->content.vendor_type = 0;
+ memcpy(rtap->content.data, vendor_data, sizeof(vendor_data));
+
+ IEEE80211_SKB_RXCB(skb)->flag |= RX_FLAG_RADIOTAP_TLV_AT_END;
#endif
}
@@ -4446,6 +4447,9 @@ static int mac80211_hwsim_new_radio(struct genl_info *info,
wiphy_ext_feature_set(hw->wiphy,
NL80211_EXT_FEATURE_BEACON_RATE_LEGACY);
+ wiphy_ext_feature_set(hw->wiphy,
+ NL80211_EXT_FEATURE_SCAN_MIN_PREQ_CONTENT);
+
hw->wiphy->interface_modes = param->iftypes;
/* ask mac80211 to reserve space for magic */
diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8188e.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8188e.c
index a99ddb41cd24..f15b099899e5 100644
--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8188e.c
+++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8188e.c
@@ -1699,6 +1699,12 @@ void rtl8188e_handle_ra_tx_report2(struct rtl8xxxu_priv *priv, struct sk_buff *s
dev_dbg(dev, "%s: len: %d items: %d\n", __func__, tx_rpt_len, items);
+ /* We only use macid 0, so only the first item is relevant.
+ * AP mode will use more of them if it's ever implemented.
+ */
+ if (!priv->vif || priv->vif->type == NL80211_IFTYPE_STATION)
+ items = 1;
+
for (macid = 0; macid < items; macid++) {
valid = false;
@@ -1741,12 +1747,6 @@ void rtl8188e_handle_ra_tx_report2(struct rtl8xxxu_priv *priv, struct sk_buff *s
min_rpt_time = ra->rpt_time;
rpt += TX_RPT2_ITEM_SIZE;
-
- /*
- * We only use macid 0, so only the first item is relevant.
- * AP mode will use more of them if it's ever implemented.
- */
- break;
}
if (min_rpt_time != ra->pre_min_rpt_time) {
diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c
index 620a5cc2bfdd..54ca6f2ced3f 100644
--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c
+++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c
@@ -1575,11 +1575,7 @@ rtl8xxxu_set_spec_sifs(struct rtl8xxxu_priv *priv, u16 cck, u16 ofdm)
static void rtl8xxxu_print_chipinfo(struct rtl8xxxu_priv *priv)
{
struct device *dev = &priv->udev->dev;
- char cut = '?';
-
- /* Currently always true: chip_cut is 4 bits. */
- if (priv->chip_cut <= 15)
- cut = 'A' + priv->chip_cut;
+ char cut = 'A' + priv->chip_cut;
dev_info(dev,
"RTL%s rev %c (%s) romver %d, %iT%iR, TX queues %i, WiFi=%i, BT=%i, GPS=%i, HI PA=%i\n",
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c
index b9c62640d2cb..dc480323c9cb 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c
@@ -1428,7 +1428,9 @@ static void _rtl92ce_read_txpower_info_from_hwpg(struct ieee80211_hw *hw,
for (rf_path = 0; rf_path < 2; rf_path++) {
for (i = 0; i < 3; i++) {
- if (!autoload_fail) {
+ if (!autoload_fail &&
+ hwinfo[EEPROM_TXPOWERCCK + rf_path * 3 + i] != 0xff &&
+ hwinfo[EEPROM_TXPOWERHT40_1S + rf_path * 3 + i] != 0xff) {
rtlefuse->
eeprom_chnlarea_txpwr_cck[rf_path][i] =
hwinfo[EEPROM_TXPOWERCCK + rf_path * 3 + i];
@@ -1448,7 +1450,8 @@ static void _rtl92ce_read_txpower_info_from_hwpg(struct ieee80211_hw *hw,
}
for (i = 0; i < 3; i++) {
- if (!autoload_fail)
+ if (!autoload_fail &&
+ hwinfo[EEPROM_TXPOWERHT40_2SDIFF + i] != 0xff)
tempval = hwinfo[EEPROM_TXPOWERHT40_2SDIFF + i];
else
tempval = EEPROM_DEFAULT_HT40_2SDIFF;
@@ -1518,7 +1521,9 @@ static void _rtl92ce_read_txpower_info_from_hwpg(struct ieee80211_hw *hw,
}
for (i = 0; i < 3; i++) {
- if (!autoload_fail) {
+ if (!autoload_fail &&
+ hwinfo[EEPROM_TXPWR_GROUP + i] != 0xff &&
+ hwinfo[EEPROM_TXPWR_GROUP + 3 + i] != 0xff) {
rtlefuse->eeprom_pwrlimit_ht40[i] =
hwinfo[EEPROM_TXPWR_GROUP + i];
rtlefuse->eeprom_pwrlimit_ht20[i] =
@@ -1563,7 +1568,8 @@ static void _rtl92ce_read_txpower_info_from_hwpg(struct ieee80211_hw *hw,
for (i = 0; i < 14; i++) {
index = rtl92c_get_chnl_group((u8)i);
- if (!autoload_fail)
+ if (!autoload_fail &&
+ hwinfo[EEPROM_TXPOWERHT20DIFF + index] != 0xff)
tempval = hwinfo[EEPROM_TXPOWERHT20DIFF + index];
else
tempval = EEPROM_DEFAULT_HT20_DIFF;
@@ -1580,7 +1586,8 @@ static void _rtl92ce_read_txpower_info_from_hwpg(struct ieee80211_hw *hw,
index = rtl92c_get_chnl_group((u8)i);
- if (!autoload_fail)
+ if (!autoload_fail &&
+ hwinfo[EEPROM_TXPOWER_OFDMDIFF + index] != 0xff)
tempval = hwinfo[EEPROM_TXPOWER_OFDMDIFF + index];
else
tempval = EEPROM_DEFAULT_LEGACYHTTXPOWERDIFF;
@@ -1610,14 +1617,16 @@ static void _rtl92ce_read_txpower_info_from_hwpg(struct ieee80211_hw *hw,
"RF-B Legacy to HT40 Diff[%d] = 0x%x\n",
i, rtlefuse->txpwr_legacyhtdiff[RF90_PATH_B][i]);
- if (!autoload_fail)
+ if (!autoload_fail && hwinfo[RF_OPTION1] != 0xff)
rtlefuse->eeprom_regulatory = (hwinfo[RF_OPTION1] & 0x7);
else
rtlefuse->eeprom_regulatory = 0;
RTPRINT(rtlpriv, FINIT, INIT_TXPOWER,
"eeprom_regulatory = 0x%x\n", rtlefuse->eeprom_regulatory);
- if (!autoload_fail) {
+ if (!autoload_fail &&
+ hwinfo[EEPROM_TSSI_A] != 0xff &&
+ hwinfo[EEPROM_TSSI_B] != 0xff) {
rtlefuse->eeprom_tssi[RF90_PATH_A] = hwinfo[EEPROM_TSSI_A];
rtlefuse->eeprom_tssi[RF90_PATH_B] = hwinfo[EEPROM_TSSI_B];
} else {
@@ -1628,7 +1637,7 @@ static void _rtl92ce_read_txpower_info_from_hwpg(struct ieee80211_hw *hw,
rtlefuse->eeprom_tssi[RF90_PATH_A],
rtlefuse->eeprom_tssi[RF90_PATH_B]);
- if (!autoload_fail)
+ if (!autoload_fail && hwinfo[EEPROM_THERMAL_METER] != 0xff)
tempval = hwinfo[EEPROM_THERMAL_METER];
else
tempval = EEPROM_DEFAULT_THERMALMETER;
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c
index 2aecb2583f75..df1e36fbc348 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c
@@ -1047,7 +1047,6 @@ static int _rtl92de_set_media_status(struct ieee80211_hw *hw,
struct rtl_priv *rtlpriv = rtl_priv(hw);
u8 bt_msr = rtl_read_byte(rtlpriv, MSR);
enum led_ctl_mode ledaction = LED_CTL_NO_LINK;
- u8 bcnfunc_enable;
bt_msr &= 0xfc;
@@ -1064,31 +1063,26 @@ static int _rtl92de_set_media_status(struct ieee80211_hw *hw,
"Set HW_VAR_MEDIA_STATUS: No such media status(%x)\n",
type);
}
- bcnfunc_enable = rtl_read_byte(rtlpriv, REG_BCN_CTRL);
switch (type) {
case NL80211_IFTYPE_UNSPECIFIED:
bt_msr |= MSR_NOLINK;
ledaction = LED_CTL_LINK;
- bcnfunc_enable &= 0xF7;
rtl_dbg(rtlpriv, COMP_INIT, DBG_TRACE,
"Set Network type to NO LINK!\n");
break;
case NL80211_IFTYPE_ADHOC:
bt_msr |= MSR_ADHOC;
- bcnfunc_enable |= 0x08;
rtl_dbg(rtlpriv, COMP_INIT, DBG_TRACE,
"Set Network type to Ad Hoc!\n");
break;
case NL80211_IFTYPE_STATION:
bt_msr |= MSR_INFRA;
ledaction = LED_CTL_LINK;
- bcnfunc_enable &= 0xF7;
rtl_dbg(rtlpriv, COMP_INIT, DBG_TRACE,
"Set Network type to STA!\n");
break;
case NL80211_IFTYPE_AP:
bt_msr |= MSR_AP;
- bcnfunc_enable |= 0x08;
rtl_dbg(rtlpriv, COMP_INIT, DBG_TRACE,
"Set Network type to AP!\n");
break;
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c
index bd0b7e365edb..a8b5bf45b1bb 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c
@@ -1552,8 +1552,6 @@ void rtl92se_set_beacon_related_registers(struct ieee80211_hw *hw)
{
struct rtl_priv *rtlpriv = rtl_priv(hw);
struct rtl_mac *mac = rtl_mac(rtl_priv(hw));
- u16 bcntime_cfg = 0;
- u16 bcn_cw = 6, bcn_ifs = 0xf;
u16 atim_window = 2;
/* ATIM Window (in unit of TU). */
@@ -1576,13 +1574,6 @@ void rtl92se_set_beacon_related_registers(struct ieee80211_hw *hw)
* other ad hoc STA */
rtl_write_byte(rtlpriv, BCN_ERR_THRESH, 100);
- /* Beacon Time Configuration */
- if (mac->opmode == NL80211_IFTYPE_ADHOC)
- bcntime_cfg |= (bcn_cw << BCN_TCFG_CW_SHIFT);
-
- /* TODO: bcn_ifs may required to be changed on ASIC */
- bcntime_cfg |= bcn_ifs << BCN_TCFG_IFS;
-
/*for beacon changed */
rtl92s_phy_set_beacon_hwreg(hw, mac->beacon_interval);
}
diff --git a/drivers/net/wireless/realtek/rtw88/mac.c b/drivers/net/wireless/realtek/rtw88/mac.c
index dae64901bac5..f3a566cf979b 100644
--- a/drivers/net/wireless/realtek/rtw88/mac.c
+++ b/drivers/net/wireless/realtek/rtw88/mac.c
@@ -222,6 +222,9 @@ static int rtw_pwr_seq_parser(struct rtw_dev *rtwdev,
case RTW_HCI_TYPE_USB:
intf_mask = RTW_PWR_INTF_USB_MSK;
break;
+ case RTW_HCI_TYPE_SDIO:
+ intf_mask = RTW_PWR_INTF_SDIO_MSK;
+ break;
default:
return -EINVAL;
}
@@ -233,7 +236,7 @@ static int rtw_pwr_seq_parser(struct rtw_dev *rtwdev,
ret = rtw_sub_pwr_seq_parser(rtwdev, intf_mask, cut_mask, cmd);
if (ret)
- return -EBUSY;
+ return ret;
idx++;
} while (1);
@@ -247,6 +250,7 @@ static int rtw_mac_power_switch(struct rtw_dev *rtwdev, bool pwr_on)
const struct rtw_pwr_seq_cmd **pwr_seq;
u8 rpwm;
bool cur_pwr;
+ int ret;
if (rtw_chip_wcpu_11ac(rtwdev)) {
rpwm = rtw_read8(rtwdev, rtwdev->hci.rpwm_addr);
@@ -270,8 +274,9 @@ static int rtw_mac_power_switch(struct rtw_dev *rtwdev, bool pwr_on)
return -EALREADY;
pwr_seq = pwr_on ? chip->pwr_on_seq : chip->pwr_off_seq;
- if (rtw_pwr_seq_parser(rtwdev, pwr_seq))
- return -EINVAL;
+ ret = rtw_pwr_seq_parser(rtwdev, pwr_seq);
+ if (ret)
+ return ret;
if (pwr_on)
set_bit(RTW_FLAG_POWERON, rtwdev->flags);
@@ -1040,6 +1045,9 @@ static int txdma_queue_mapping(struct rtw_dev *rtwdev)
else
return -EINVAL;
break;
+ case RTW_HCI_TYPE_SDIO:
+ rqpn = &chip->rqpn_table[0];
+ break;
default:
return -EINVAL;
}
@@ -1202,6 +1210,9 @@ static int priority_queue_cfg(struct rtw_dev *rtwdev)
else
return -EINVAL;
break;
+ case RTW_HCI_TYPE_SDIO:
+ pg_tbl = &chip->page_table[0];
+ break;
default:
return -EINVAL;
}
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8821c.c b/drivers/net/wireless/realtek/rtw88/rtw8821c.c
index 17f800f6efbd..7ae0541d7b99 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8821c.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8821c.c
@@ -32,6 +32,12 @@ static void rtw8821cu_efuse_parsing(struct rtw_efuse *efuse,
ether_addr_copy(efuse->addr, map->u.mac_addr);
}
+static void rtw8821cs_efuse_parsing(struct rtw_efuse *efuse,
+ struct rtw8821c_efuse *map)
+{
+ ether_addr_copy(efuse->addr, map->s.mac_addr);
+}
+
enum rtw8821ce_rf_set {
SWITCH_TO_BTG,
SWITCH_TO_WLG,
@@ -77,6 +83,9 @@ static int rtw8821c_read_efuse(struct rtw_dev *rtwdev, u8 *log_map)
case RTW_HCI_TYPE_USB:
rtw8821cu_efuse_parsing(efuse, map);
break;
+ case RTW_HCI_TYPE_SDIO:
+ rtw8821cs_efuse_parsing(efuse, map);
+ break;
default:
/* unsupported now */
return -ENOTSUPP;
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8821c.h b/drivers/net/wireless/realtek/rtw88/rtw8821c.h
index 1c81260f3a54..fcff31688c45 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8821c.h
+++ b/drivers/net/wireless/realtek/rtw88/rtw8821c.h
@@ -65,6 +65,11 @@ struct rtw8821ce_efuse {
u8 res7;
};
+struct rtw8821cs_efuse {
+ u8 res4[0x4a]; /* 0xd0 */
+ u8 mac_addr[ETH_ALEN]; /* 0x11a */
+} __packed;
+
struct rtw8821c_efuse {
__le16 rtl_id;
u8 res0[0x0e];
@@ -94,6 +99,7 @@ struct rtw8821c_efuse {
union {
struct rtw8821ce_efuse e;
struct rtw8821cu_efuse u;
+ struct rtw8821cs_efuse s;
};
};
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822b.c b/drivers/net/wireless/realtek/rtw88/rtw8822b.c
index 74dfb89b2c94..531b67787e2e 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822b.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822b.c
@@ -32,6 +32,12 @@ static void rtw8822bu_efuse_parsing(struct rtw_efuse *efuse,
ether_addr_copy(efuse->addr, map->u.mac_addr);
}
+static void rtw8822bs_efuse_parsing(struct rtw_efuse *efuse,
+ struct rtw8822b_efuse *map)
+{
+ ether_addr_copy(efuse->addr, map->s.mac_addr);
+}
+
static int rtw8822b_read_efuse(struct rtw_dev *rtwdev, u8 *log_map)
{
struct rtw_efuse *efuse = &rtwdev->efuse;
@@ -65,6 +71,9 @@ static int rtw8822b_read_efuse(struct rtw_dev *rtwdev, u8 *log_map)
case RTW_HCI_TYPE_USB:
rtw8822bu_efuse_parsing(efuse, map);
break;
+ case RTW_HCI_TYPE_SDIO:
+ rtw8822bs_efuse_parsing(efuse, map);
+ break;
default:
/* unsupported now */
return -ENOTSUPP;
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822b.h b/drivers/net/wireless/realtek/rtw88/rtw8822b.h
index 01d3644e0c94..2dc3a6660f06 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822b.h
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822b.h
@@ -65,6 +65,11 @@ struct rtw8822be_efuse {
u8 res7;
};
+struct rtw8822bs_efuse {
+ u8 res4[0x4a]; /* 0xd0 */
+ u8 mac_addr[ETH_ALEN]; /* 0x11a */
+} __packed;
+
struct rtw8822b_efuse {
__le16 rtl_id;
u8 res0[0x0e];
@@ -92,8 +97,9 @@ struct rtw8822b_efuse {
u8 country_code[2];
u8 res[3];
union {
- struct rtw8822bu_efuse u;
struct rtw8822be_efuse e;
+ struct rtw8822bu_efuse u;
+ struct rtw8822bs_efuse s;
};
};
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c.c b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
index 964e27887fe2..5a2c004b12df 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822c.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
@@ -35,6 +35,12 @@ static void rtw8822cu_efuse_parsing(struct rtw_efuse *efuse,
ether_addr_copy(efuse->addr, map->u.mac_addr);
}
+static void rtw8822cs_efuse_parsing(struct rtw_efuse *efuse,
+ struct rtw8822c_efuse *map)
+{
+ ether_addr_copy(efuse->addr, map->s.mac_addr);
+}
+
static int rtw8822c_read_efuse(struct rtw_dev *rtwdev, u8 *log_map)
{
struct rtw_efuse *efuse = &rtwdev->efuse;
@@ -67,6 +73,9 @@ static int rtw8822c_read_efuse(struct rtw_dev *rtwdev, u8 *log_map)
case RTW_HCI_TYPE_USB:
rtw8822cu_efuse_parsing(efuse, map);
break;
+ case RTW_HCI_TYPE_SDIO:
+ rtw8822cs_efuse_parsing(efuse, map);
+ break;
default:
/* unsupported now */
return -ENOTSUPP;
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c.h b/drivers/net/wireless/realtek/rtw88/rtw8822c.h
index 479d5d769c52..1bc0e7f5d6bb 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822c.h
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822c.h
@@ -16,6 +16,11 @@ struct rtw8822cu_efuse {
u8 res2[0x3d];
};
+struct rtw8822cs_efuse {
+ u8 res0[0x4a]; /* 0x120 */
+ u8 mac_addr[ETH_ALEN]; /* 0x16a */
+} __packed;
+
struct rtw8822ce_efuse {
u8 mac_addr[ETH_ALEN]; /* 0x120 */
u8 vender_id[2];
@@ -91,8 +96,9 @@ struct rtw8822c_efuse {
u8 res9;
u8 res10[0x42];
union {
- struct rtw8822cu_efuse u;
struct rtw8822ce_efuse e;
+ struct rtw8822cu_efuse u;
+ struct rtw8822cs_efuse s;
};
};
diff --git a/drivers/net/wireless/realtek/rtw89/core.c b/drivers/net/wireless/realtek/rtw89/core.c
index f09361bc4a4d..489fa7a86160 100644
--- a/drivers/net/wireless/realtek/rtw89/core.c
+++ b/drivers/net/wireless/realtek/rtw89/core.c
@@ -1400,6 +1400,34 @@ static void rtw89_stats_trigger_frame(struct rtw89_dev *rtwdev,
}
}
+static void rtw89_core_cancel_6ghz_probe_tx(struct rtw89_dev *rtwdev,
+ struct sk_buff *skb)
+{
+ struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb);
+ struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)skb->data;
+ struct list_head *pkt_list = rtwdev->scan_info.pkt_list;
+ struct rtw89_pktofld_info *info;
+ const u8 *ies = mgmt->u.beacon.variable, *ssid_ie;
+
+ if (rx_status->band != NL80211_BAND_6GHZ)
+ return;
+
+ ssid_ie = cfg80211_find_ie(WLAN_EID_SSID, ies, skb->len);
+
+ list_for_each_entry(info, &pkt_list[NL80211_BAND_6GHZ], list) {
+ if (ether_addr_equal(info->bssid, mgmt->bssid)) {
+ rtw89_fw_h2c_del_pkt_offload(rtwdev, info->id);
+ continue;
+ }
+
+ if (!ssid_ie || ssid_ie[1] != info->ssid_len || info->ssid_len == 0)
+ continue;
+
+ if (memcmp(&ssid_ie[2], info->ssid, info->ssid_len) == 0)
+ rtw89_fw_h2c_del_pkt_offload(rtwdev, info->id);
+ }
+}
+
static void rtw89_vif_rx_stats_iter(void *data, u8 *mac,
struct ieee80211_vif *vif)
{
@@ -1412,6 +1440,11 @@ static void rtw89_vif_rx_stats_iter(void *data, u8 *mac,
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
const u8 *bssid = iter_data->bssid;
+ if (rtwdev->scanning &&
+ (ieee80211_is_beacon(hdr->frame_control) ||
+ ieee80211_is_probe_resp(hdr->frame_control)))
+ rtw89_core_cancel_6ghz_probe_tx(rtwdev, skb);
+
if (!vif->bss_conf.bssid)
return;
@@ -3372,7 +3405,7 @@ static int rtw89_core_register_hw(struct rtw89_dev *rtwdev)
hw->wiphy->flags |= WIPHY_FLAG_SUPPORTS_TDLS |
WIPHY_FLAG_TDLS_EXTERNAL_SETUP |
- WIPHY_FLAG_AP_UAPSD;
+ WIPHY_FLAG_AP_UAPSD | WIPHY_FLAG_SPLIT_SCAN_6GHZ;
hw->wiphy->features |= NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR;
hw->wiphy->max_scan_ssids = RTW89_SCANOFLD_MAX_SSID;
diff --git a/drivers/net/wireless/realtek/rtw89/core.h b/drivers/net/wireless/realtek/rtw89/core.h
index 41365ffb7e5e..b1a886898c5a 100644
--- a/drivers/net/wireless/realtek/rtw89/core.h
+++ b/drivers/net/wireless/realtek/rtw89/core.h
@@ -3023,7 +3023,7 @@ enum rtw89_fw_feature {
RTW89_FW_FEATURE_SCAN_OFFLOAD,
RTW89_FW_FEATURE_TX_WAKE,
RTW89_FW_FEATURE_CRASH_TRIGGER,
- RTW89_FW_FEATURE_PACKET_DROP,
+ RTW89_FW_FEATURE_NO_PACKET_DROP,
RTW89_FW_FEATURE_NO_DEEP_PS,
RTW89_FW_FEATURE_NO_LPS_PG,
};
diff --git a/drivers/net/wireless/realtek/rtw89/fw.c b/drivers/net/wireless/realtek/rtw89/fw.c
index 0b73dc2e9ad7..1a4ff24078fb 100644
--- a/drivers/net/wireless/realtek/rtw89/fw.c
+++ b/drivers/net/wireless/realtek/rtw89/fw.c
@@ -235,6 +235,7 @@ static bool __fw_feat_cond_ ## __cond(u32 suit_ver_code, u32 comp_ver_code) \
__DEF_FW_FEAT_COND(ge, >=); /* greater or equal */
__DEF_FW_FEAT_COND(le, <=); /* less or equal */
+__DEF_FW_FEAT_COND(lt, <); /* less than */
struct __fw_feat_cfg {
enum rtw89_core_chip_id chip_id;
@@ -256,9 +257,11 @@ static const struct __fw_feat_cfg fw_feat_tbl[] = {
__CFG_FW_FEAT(RTL8852A, ge, 0, 13, 35, 0, SCAN_OFFLOAD),
__CFG_FW_FEAT(RTL8852A, ge, 0, 13, 35, 0, TX_WAKE),
__CFG_FW_FEAT(RTL8852A, ge, 0, 13, 36, 0, CRASH_TRIGGER),
- __CFG_FW_FEAT(RTL8852A, ge, 0, 13, 38, 0, PACKET_DROP),
+ __CFG_FW_FEAT(RTL8852A, lt, 0, 13, 38, 0, NO_PACKET_DROP),
__CFG_FW_FEAT(RTL8852B, ge, 0, 29, 26, 0, NO_LPS_PG),
- __CFG_FW_FEAT(RTL8852C, ge, 0, 27, 20, 0, PACKET_DROP),
+ __CFG_FW_FEAT(RTL8852B, ge, 0, 29, 26, 0, TX_WAKE),
+ __CFG_FW_FEAT(RTL8852B, ge, 0, 29, 29, 0, CRASH_TRIGGER),
+ __CFG_FW_FEAT(RTL8852B, ge, 0, 29, 29, 0, SCAN_OFFLOAD),
__CFG_FW_FEAT(RTL8852C, le, 0, 27, 33, 0, NO_DEEP_PS),
__CFG_FW_FEAT(RTL8852C, ge, 0, 27, 34, 0, TX_WAKE),
__CFG_FW_FEAT(RTL8852C, ge, 0, 27, 36, 0, SCAN_OFFLOAD),
@@ -2702,9 +2705,29 @@ static void rtw89_release_pkt_list(struct rtw89_dev *rtwdev)
}
}
+static bool rtw89_is_6ghz_wildcard_probe_req(struct rtw89_dev *rtwdev,
+ struct rtw89_vif *rtwvif,
+ struct rtw89_pktofld_info *info,
+ enum nl80211_band band, u8 ssid_idx)
+{
+ struct cfg80211_scan_request *req = rtwvif->scan_req;
+
+ if (band != NL80211_BAND_6GHZ)
+ return false;
+
+ if (req->ssids[ssid_idx].ssid_len) {
+ memcpy(info->ssid, req->ssids[ssid_idx].ssid,
+ req->ssids[ssid_idx].ssid_len);
+ info->ssid_len = req->ssids[ssid_idx].ssid_len;
+ return false;
+ } else {
+ return true;
+ }
+}
+
static int rtw89_append_probe_req_ie(struct rtw89_dev *rtwdev,
struct rtw89_vif *rtwvif,
- struct sk_buff *skb)
+ struct sk_buff *skb, u8 ssid_idx)
{
struct rtw89_hw_scan_info *scan_info = &rtwdev->scan_info;
struct ieee80211_scan_ies *ies = rtwvif->scan_ies;
@@ -2732,6 +2755,13 @@ static int rtw89_append_probe_req_ie(struct rtw89_dev *rtwdev,
goto out;
}
+ if (rtw89_is_6ghz_wildcard_probe_req(rtwdev, rtwvif, info, band,
+ ssid_idx)) {
+ kfree_skb(new);
+ kfree(info);
+ goto out;
+ }
+
ret = rtw89_fw_h2c_add_pkt_offload(rtwdev, &info->id, new);
if (ret) {
kfree_skb(new);
@@ -2762,7 +2792,7 @@ static int rtw89_hw_scan_update_probe_req(struct rtw89_dev *rtwdev,
if (!skb)
return -ENOMEM;
- ret = rtw89_append_probe_req_ie(rtwdev, rtwvif, skb);
+ ret = rtw89_append_probe_req_ie(rtwdev, rtwvif, skb, i);
kfree_skb(skb);
if (ret)
@@ -2772,6 +2802,77 @@ static int rtw89_hw_scan_update_probe_req(struct rtw89_dev *rtwdev,
return 0;
}
+static int rtw89_update_6ghz_rnr_chan(struct rtw89_dev *rtwdev,
+ struct cfg80211_scan_request *req,
+ struct rtw89_mac_chinfo *ch_info)
+{
+ struct ieee80211_vif *vif = rtwdev->scan_info.scanning_vif;
+ struct list_head *pkt_list = rtwdev->scan_info.pkt_list;
+ struct rtw89_vif *rtwvif = vif_to_rtwvif_safe(vif);
+ struct ieee80211_scan_ies *ies = rtwvif->scan_ies;
+ struct cfg80211_scan_6ghz_params *params;
+ struct rtw89_pktofld_info *info, *tmp;
+ struct ieee80211_hdr *hdr;
+ struct sk_buff *skb;
+ bool found;
+ int ret = 0;
+ u8 i;
+
+ if (!req->n_6ghz_params)
+ return 0;
+
+ for (i = 0; i < req->n_6ghz_params; i++) {
+ params = &req->scan_6ghz_params[i];
+
+ if (req->channels[params->channel_idx]->hw_value !=
+ ch_info->pri_ch)
+ continue;
+
+ found = false;
+ list_for_each_entry(tmp, &pkt_list[NL80211_BAND_6GHZ], list) {
+ if (ether_addr_equal(tmp->bssid, params->bssid)) {
+ found = true;
+ break;
+ }
+ }
+ if (found)
+ continue;
+
+ skb = ieee80211_probereq_get(rtwdev->hw, rtwvif->mac_addr,
+ NULL, 0, req->ie_len);
+ skb_put_data(skb, ies->ies[NL80211_BAND_6GHZ], ies->len[NL80211_BAND_6GHZ]);
+ skb_put_data(skb, ies->common_ies, ies->common_ie_len);
+ hdr = (struct ieee80211_hdr *)skb->data;
+ ether_addr_copy(hdr->addr3, params->bssid);
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info) {
+ ret = -ENOMEM;
+ kfree_skb(skb);
+ goto out;
+ }
+
+ ret = rtw89_fw_h2c_add_pkt_offload(rtwdev, &info->id, skb);
+ if (ret) {
+ kfree_skb(skb);
+ kfree(info);
+ goto out;
+ }
+
+ ether_addr_copy(info->bssid, params->bssid);
+ info->channel_6ghz = req->channels[params->channel_idx]->hw_value;
+ list_add_tail(&info->list, &rtwdev->scan_info.pkt_list[NL80211_BAND_6GHZ]);
+
+ ch_info->tx_pkt = true;
+ ch_info->period = RTW89_CHANNEL_TIME_6G + RTW89_DWELL_TIME_6G;
+
+ kfree_skb(skb);
+ }
+
+out:
+ return ret;
+}
+
static void rtw89_hw_scan_add_chan(struct rtw89_dev *rtwdev, int chan_type,
int ssid_num,
struct rtw89_mac_chinfo *ch_info)
@@ -2782,6 +2883,7 @@ static void rtw89_hw_scan_add_chan(struct rtw89_dev *rtwdev, int chan_type,
struct cfg80211_scan_request *req = rtwvif->scan_req;
struct rtw89_pktofld_info *info;
u8 band, probe_count = 0;
+ int ret;
ch_info->notify_action = RTW89_SCANOFLD_DEBUG_MASK;
ch_info->dfs_ch = chan_type == RTW89_CHAN_DFS;
@@ -2793,25 +2895,31 @@ static void rtw89_hw_scan_add_chan(struct rtw89_dev *rtwdev, int chan_type,
ch_info->pause_data = false;
ch_info->probe_id = RTW89_SCANOFLD_PKT_NONE;
+ if (ch_info->ch_band == RTW89_BAND_6G) {
+ if ((ssid_num == 1 && req->ssids[0].ssid_len == 0) ||
+ !ch_info->is_psc) {
+ ch_info->tx_pkt = false;
+ if (!req->duration_mandatory)
+ ch_info->period -= RTW89_DWELL_TIME_6G;
+ }
+ }
+
+ ret = rtw89_update_6ghz_rnr_chan(rtwdev, req, ch_info);
+ if (ret)
+ rtw89_warn(rtwdev, "RNR fails: %d\n", ret);
+
if (ssid_num) {
- ch_info->num_pkt = ssid_num;
band = rtw89_hw_to_nl80211_band(ch_info->ch_band);
list_for_each_entry(info, &scan_info->pkt_list[band], list) {
- ch_info->pkt_id[probe_count] = info->id;
- if (++probe_count >= ssid_num)
+ if (info->channel_6ghz &&
+ ch_info->pri_ch != info->channel_6ghz)
+ continue;
+ ch_info->pkt_id[probe_count++] = info->id;
+ if (probe_count >= RTW89_SCANOFLD_MAX_SSID)
break;
}
- if (probe_count != ssid_num)
- rtw89_err(rtwdev, "SSID num differs from list len\n");
- }
-
- if (ch_info->ch_band == RTW89_BAND_6G) {
- if (ssid_num == 1 && req->ssids[0].ssid_len == 0) {
- ch_info->tx_pkt = false;
- if (!req->duration_mandatory)
- ch_info->period -= RTW89_DWELL_TIME_6G;
- }
+ ch_info->num_pkt = probe_count;
}
switch (chan_type) {
@@ -2872,6 +2980,7 @@ static int rtw89_hw_scan_add_chan_list(struct rtw89_dev *rtwdev,
ch_info->central_ch = channel->hw_value;
ch_info->pri_ch = channel->hw_value;
ch_info->rand_seq_num = random_seq;
+ ch_info->is_psc = cfg80211_channel_is_psc(channel);
if (channel->flags &
(IEEE80211_CHAN_RADAR | IEEE80211_CHAN_NO_IR))
diff --git a/drivers/net/wireless/realtek/rtw89/fw.h b/drivers/net/wireless/realtek/rtw89/fw.h
index cae07e325326..3f6e0871381d 100644
--- a/drivers/net/wireless/realtek/rtw89/fw.h
+++ b/drivers/net/wireless/realtek/rtw89/fw.h
@@ -237,6 +237,7 @@ struct rtw89_mac_chinfo {
u16 tx_pwr_idx;
u8 rsvd1;
struct list_head list;
+ bool is_psc;
};
struct rtw89_scan_option {
@@ -247,6 +248,12 @@ struct rtw89_scan_option {
struct rtw89_pktofld_info {
struct list_head list;
u8 id;
+
+ /* Below fields are for 6 GHz RNR use only */
+ u8 ssid[IEEE80211_MAX_SSID_LEN];
+ u8 ssid_len;
+ u8 bssid[ETH_ALEN];
+ u16 channel_6ghz;
};
static inline void RTW89_SET_FWCMD_RA_IS_DIS(void *cmd, u32 val)
diff --git a/drivers/net/wireless/realtek/rtw89/mac.c b/drivers/net/wireless/realtek/rtw89/mac.c
index 2e2a2b6eab09..3d1e4ffef1b1 100644
--- a/drivers/net/wireless/realtek/rtw89/mac.c
+++ b/drivers/net/wireless/realtek/rtw89/mac.c
@@ -5426,7 +5426,7 @@ int rtw89_mac_ptk_drop_by_band_and_wait(struct rtw89_dev *rtwdev,
for (i = 0; i < try_cnt; i++) {
ret = read_poll_timeout(mac_is_txq_empty, empty, empty, 50,
50000, false, rtwdev);
- if (ret)
+ if (ret && !RTW89_CHK_FW_FEATURE(NO_PACKET_DROP, &rtwdev->fw))
rtw89_fw_h2c_pkt_drop(rtwdev, &params);
else
return 0;
diff --git a/drivers/net/wireless/realtek/rtw89/mac80211.c b/drivers/net/wireless/realtek/rtw89/mac80211.c
index d43281f7335b..367a7bf319da 100644
--- a/drivers/net/wireless/realtek/rtw89/mac80211.c
+++ b/drivers/net/wireless/realtek/rtw89/mac80211.c
@@ -676,7 +676,7 @@ static void rtw89_ops_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
rtw89_leave_lps(rtwdev);
rtw89_hci_flush_queues(rtwdev, queues, drop);
- if (drop && RTW89_CHK_FW_FEATURE(PACKET_DROP, &rtwdev->fw))
+ if (drop && !RTW89_CHK_FW_FEATURE(NO_PACKET_DROP, &rtwdev->fw))
__rtw89_drop_packets(rtwdev, vif);
else
rtw89_mac_flush_txq(rtwdev, queues, drop);
diff --git a/drivers/net/wireless/realtek/rtw89/phy.c b/drivers/net/wireless/realtek/rtw89/phy.c
index d9f61ba3d176..d8b035972dd4 100644
--- a/drivers/net/wireless/realtek/rtw89/phy.c
+++ b/drivers/net/wireless/realtek/rtw89/phy.c
@@ -4294,3 +4294,75 @@ void rtw89_phy_tssi_ctrl_set_bandedge_cfg(struct rtw89_dev *rtwdev,
data[RTW89_TSSI_SBW20]);
}
EXPORT_SYMBOL(rtw89_phy_tssi_ctrl_set_bandedge_cfg);
+
+static
+const u8 rtw89_ch_base_table[16] = {1, 0xff,
+ 36, 100, 132, 149, 0xff,
+ 1, 33, 65, 97, 129, 161, 193, 225, 0xff};
+#define RTW89_CH_BASE_IDX_2G 0
+#define RTW89_CH_BASE_IDX_5G_FIRST 2
+#define RTW89_CH_BASE_IDX_5G_LAST 5
+#define RTW89_CH_BASE_IDX_6G_FIRST 7
+#define RTW89_CH_BASE_IDX_6G_LAST 14
+
+#define RTW89_CH_BASE_IDX_MASK GENMASK(7, 4)
+#define RTW89_CH_OFFSET_MASK GENMASK(3, 0)
+
+u8 rtw89_encode_chan_idx(struct rtw89_dev *rtwdev, u8 central_ch, u8 band)
+{
+ u8 chan_idx;
+ u8 last, first;
+ u8 idx;
+
+ switch (band) {
+ case RTW89_BAND_2G:
+ chan_idx = FIELD_PREP(RTW89_CH_BASE_IDX_MASK, RTW89_CH_BASE_IDX_2G) |
+ FIELD_PREP(RTW89_CH_OFFSET_MASK, central_ch);
+ return chan_idx;
+ case RTW89_BAND_5G:
+ first = RTW89_CH_BASE_IDX_5G_FIRST;
+ last = RTW89_CH_BASE_IDX_5G_LAST;
+ break;
+ case RTW89_BAND_6G:
+ first = RTW89_CH_BASE_IDX_6G_FIRST;
+ last = RTW89_CH_BASE_IDX_6G_LAST;
+ break;
+ default:
+ rtw89_warn(rtwdev, "Unsupported band %d\n", band);
+ return 0;
+ }
+
+ for (idx = last; idx >= first; idx--)
+ if (central_ch >= rtw89_ch_base_table[idx])
+ break;
+
+ if (idx < first) {
+ rtw89_warn(rtwdev, "Unknown band %d channel %d\n", band, central_ch);
+ return 0;
+ }
+
+ chan_idx = FIELD_PREP(RTW89_CH_BASE_IDX_MASK, idx) |
+ FIELD_PREP(RTW89_CH_OFFSET_MASK,
+ (central_ch - rtw89_ch_base_table[idx]) >> 1);
+ return chan_idx;
+}
+EXPORT_SYMBOL(rtw89_encode_chan_idx);
+
+void rtw89_decode_chan_idx(struct rtw89_dev *rtwdev, u8 chan_idx,
+ u8 *ch, enum nl80211_band *band)
+{
+ u8 idx, offset;
+
+ idx = FIELD_GET(RTW89_CH_BASE_IDX_MASK, chan_idx);
+ offset = FIELD_GET(RTW89_CH_OFFSET_MASK, chan_idx);
+
+ if (idx == RTW89_CH_BASE_IDX_2G) {
+ *band = NL80211_BAND_2GHZ;
+ *ch = offset;
+ return;
+ }
+
+ *band = idx <= RTW89_CH_BASE_IDX_5G_LAST ? NL80211_BAND_5GHZ : NL80211_BAND_6GHZ;
+ *ch = rtw89_ch_base_table[idx] + (offset << 1);
+}
+EXPORT_SYMBOL(rtw89_decode_chan_idx);
diff --git a/drivers/net/wireless/realtek/rtw89/phy.h b/drivers/net/wireless/realtek/rtw89/phy.h
index 21233f094644..de0a9abf646e 100644
--- a/drivers/net/wireless/realtek/rtw89/phy.h
+++ b/drivers/net/wireless/realtek/rtw89/phy.h
@@ -555,5 +555,8 @@ void rtw89_phy_tssi_ctrl_set_bandedge_cfg(struct rtw89_dev *rtwdev,
enum rtw89_tssi_bandedge_cfg bandedge_cfg);
void rtw89_phy_ul_tb_assoc(struct rtw89_dev *rtwdev, struct rtw89_vif *rtwvif);
void rtw89_phy_ul_tb_ctrl_track(struct rtw89_dev *rtwdev);
+u8 rtw89_encode_chan_idx(struct rtw89_dev *rtwdev, u8 central_ch, u8 band);
+void rtw89_decode_chan_idx(struct rtw89_dev *rtwdev, u8 chan_idx,
+ u8 *ch, enum nl80211_band *band);
#endif
diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852b.c b/drivers/net/wireless/realtek/rtw89/rtw8852b.c
index ee8dba7e0074..499ae0389c71 100644
--- a/drivers/net/wireless/realtek/rtw89/rtw8852b.c
+++ b/drivers/net/wireless/realtek/rtw89/rtw8852b.c
@@ -1422,6 +1422,7 @@ static void rtw8852b_set_channel_bb(struct rtw89_dev *rtwdev, const struct rtw89
{
bool cck_en = chan->channel <= 14;
u8 pri_ch_idx = chan->pri_ch_idx;
+ u8 band = chan->band_type, chan_idx;
if (cck_en)
rtw8852b_ctrl_sco_cck(rtwdev, chan->primary_channel);
@@ -1444,8 +1445,8 @@ static void rtw8852b_set_channel_bb(struct rtw89_dev *rtwdev, const struct rtw89
B_BT_DYN_DC_EST_EN_MSK, 0x0);
rtw89_phy_write32_mask(rtwdev, R_GNT_BT_WGT_EN, B_GNT_BT_WGT_EN, 0x0);
}
- rtw89_phy_write32_mask(rtwdev, R_MAC_PIN_SEL, B_CH_IDX_SEG0,
- chan->primary_channel);
+ chan_idx = rtw89_encode_chan_idx(rtwdev, chan->primary_channel, band);
+ rtw89_phy_write32_mask(rtwdev, R_MAC_PIN_SEL, B_CH_IDX_SEG0, chan_idx);
rtw8852b_5m_mask(rtwdev, chan, phy_idx);
rtw8852b_bb_set_pop(rtwdev);
rtw8852b_bb_reset_all(rtwdev, phy_idx);
@@ -2299,13 +2300,14 @@ static void rtw8852b_fill_freq_with_ppdu(struct rtw89_dev *rtwdev,
struct ieee80211_rx_status *status)
{
u16 chan = phy_ppdu->chan_idx;
- u8 band;
+ enum nl80211_band band;
+ u8 ch;
if (chan == 0)
return;
- band = chan <= 14 ? NL80211_BAND_2GHZ : NL80211_BAND_5GHZ;
- status->freq = ieee80211_channel_to_frequency(chan, band);
+ rtw89_decode_chan_idx(rtwdev, chan, &ch, &band);
+ status->freq = ieee80211_channel_to_frequency(ch, band);
status->band = band;
}
diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852c.c b/drivers/net/wireless/realtek/rtw89/rtw8852c.c
index d2dde21d3daf..8af813132f71 100644
--- a/drivers/net/wireless/realtek/rtw89/rtw8852c.c
+++ b/drivers/net/wireless/realtek/rtw89/rtw8852c.c
@@ -852,76 +852,6 @@ static void rtw8852c_set_gain_error(struct rtw89_dev *rtwdev,
}
}
-static
-const u8 rtw8852c_ch_base_table[16] = {1, 0xff,
- 36, 100, 132, 149, 0xff,
- 1, 33, 65, 97, 129, 161, 193, 225, 0xff};
-#define RTW8852C_CH_BASE_IDX_2G 0
-#define RTW8852C_CH_BASE_IDX_5G_FIRST 2
-#define RTW8852C_CH_BASE_IDX_5G_LAST 5
-#define RTW8852C_CH_BASE_IDX_6G_FIRST 7
-#define RTW8852C_CH_BASE_IDX_6G_LAST 14
-
-#define RTW8852C_CH_BASE_IDX_MASK GENMASK(7, 4)
-#define RTW8852C_CH_OFFSET_MASK GENMASK(3, 0)
-
-static u8 rtw8852c_encode_chan_idx(struct rtw89_dev *rtwdev, u8 central_ch, u8 band)
-{
- u8 chan_idx;
- u8 last, first;
- u8 idx;
-
- switch (band) {
- case RTW89_BAND_2G:
- chan_idx = FIELD_PREP(RTW8852C_CH_BASE_IDX_MASK, RTW8852C_CH_BASE_IDX_2G) |
- FIELD_PREP(RTW8852C_CH_OFFSET_MASK, central_ch);
- return chan_idx;
- case RTW89_BAND_5G:
- first = RTW8852C_CH_BASE_IDX_5G_FIRST;
- last = RTW8852C_CH_BASE_IDX_5G_LAST;
- break;
- case RTW89_BAND_6G:
- first = RTW8852C_CH_BASE_IDX_6G_FIRST;
- last = RTW8852C_CH_BASE_IDX_6G_LAST;
- break;
- default:
- rtw89_warn(rtwdev, "Unsupported band %d\n", band);
- return 0;
- }
-
- for (idx = last; idx >= first; idx--)
- if (central_ch >= rtw8852c_ch_base_table[idx])
- break;
-
- if (idx < first) {
- rtw89_warn(rtwdev, "Unknown band %d channel %d\n", band, central_ch);
- return 0;
- }
-
- chan_idx = FIELD_PREP(RTW8852C_CH_BASE_IDX_MASK, idx) |
- FIELD_PREP(RTW8852C_CH_OFFSET_MASK,
- (central_ch - rtw8852c_ch_base_table[idx]) >> 1);
- return chan_idx;
-}
-
-static void rtw8852c_decode_chan_idx(struct rtw89_dev *rtwdev, u8 chan_idx,
- u8 *ch, enum nl80211_band *band)
-{
- u8 idx, offset;
-
- idx = FIELD_GET(RTW8852C_CH_BASE_IDX_MASK, chan_idx);
- offset = FIELD_GET(RTW8852C_CH_OFFSET_MASK, chan_idx);
-
- if (idx == RTW8852C_CH_BASE_IDX_2G) {
- *band = NL80211_BAND_2GHZ;
- *ch = offset;
- return;
- }
-
- *band = idx <= RTW8852C_CH_BASE_IDX_5G_LAST ? NL80211_BAND_5GHZ : NL80211_BAND_6GHZ;
- *ch = rtw8852c_ch_base_table[idx] + (offset << 1);
-}
-
static void rtw8852c_set_gain_offset(struct rtw89_dev *rtwdev,
const struct rtw89_chan *chan,
enum rtw89_phy_idx phy_idx,
@@ -1084,7 +1014,7 @@ static void rtw8852c_ctrl_ch(struct rtw89_dev *rtwdev,
}
}
- chan_idx = rtw8852c_encode_chan_idx(rtwdev, chan->primary_channel, band);
+ chan_idx = rtw89_encode_chan_idx(rtwdev, chan->primary_channel, band);
rtw89_phy_write32_idx(rtwdev, R_MAC_PIN_SEL, B_CH_IDX_SEG0, chan_idx, phy_idx);
}
@@ -2730,7 +2660,7 @@ static void rtw8852c_fill_freq_with_ppdu(struct rtw89_dev *rtwdev,
if (chan_idx == 0)
return;
- rtw8852c_decode_chan_idx(rtwdev, chan_idx, &ch, &band);
+ rtw89_decode_chan_idx(rtwdev, chan_idx, &ch, &band);
status->freq = ieee80211_channel_to_frequency(ch, band);
status->band = band;
}
diff --git a/drivers/net/wireless/realtek/rtw89/ser.c b/drivers/net/wireless/realtek/rtw89/ser.c
index 61db7189fdab..9e9f6947e7f1 100644
--- a/drivers/net/wireless/realtek/rtw89/ser.c
+++ b/drivers/net/wireless/realtek/rtw89/ser.c
@@ -414,8 +414,11 @@ static void ser_idle_st_hdl(struct rtw89_ser *ser, u8 evt)
static void ser_reset_trx_st_hdl(struct rtw89_ser *ser, u8 evt)
{
+ struct rtw89_dev *rtwdev = container_of(ser, struct rtw89_dev, ser);
+
switch (evt) {
case SER_EV_STATE_IN:
+ cancel_delayed_work_sync(&rtwdev->track_work);
drv_stop_tx(ser);
if (hal_stop_dma(ser)) {
@@ -446,6 +449,8 @@ static void ser_reset_trx_st_hdl(struct rtw89_ser *ser, u8 evt)
hal_enable_dma(ser);
drv_resume_rx(ser);
drv_resume_tx(ser);
+ ieee80211_queue_delayed_work(rtwdev->hw, &rtwdev->track_work,
+ RTW89_TRACK_WORK_PERIOD);
break;
default:
diff --git a/drivers/net/wireless/silabs/wfx/main.c b/drivers/net/wireless/silabs/wfx/main.c
index 6b9864e478ac..0b50f7058bbb 100644
--- a/drivers/net/wireless/silabs/wfx/main.c
+++ b/drivers/net/wireless/silabs/wfx/main.c
@@ -358,13 +358,9 @@ int wfx_probe(struct wfx_dev *wdev)
wfx_bh_poll_irq(wdev);
err = wait_for_completion_timeout(&wdev->firmware_ready, 1 * HZ);
- if (err <= 0) {
- if (err == 0) {
- dev_err(wdev->dev, "timeout while waiting for startup indication\n");
- err = -ETIMEDOUT;
- } else if (err == -ERESTARTSYS) {
- dev_info(wdev->dev, "probe interrupted by user\n");
- }
+ if (err == 0) {
+ dev_err(wdev->dev, "timeout while waiting for startup indication\n");
+ err = -ETIMEDOUT;
goto bh_unregister;
}
diff --git a/drivers/nfc/nfcmrvl/i2c.c b/drivers/nfc/nfcmrvl/i2c.c
index e74342b0b728..164e2ab859fd 100644
--- a/drivers/nfc/nfcmrvl/i2c.c
+++ b/drivers/nfc/nfcmrvl/i2c.c
@@ -168,7 +168,7 @@ static int nfcmrvl_i2c_parse_dt(struct device_node *node,
return ret;
}
- if (of_find_property(node, "i2c-int-falling", NULL))
+ if (of_property_read_bool(node, "i2c-int-falling"))
pdata->irq_polarity = IRQF_TRIGGER_FALLING;
else
pdata->irq_polarity = IRQF_TRIGGER_RISING;
diff --git a/drivers/nfc/nfcmrvl/main.c b/drivers/nfc/nfcmrvl/main.c
index 1a5284de4341..141bc4b66dcb 100644
--- a/drivers/nfc/nfcmrvl/main.c
+++ b/drivers/nfc/nfcmrvl/main.c
@@ -261,11 +261,7 @@ int nfcmrvl_parse_dt(struct device_node *node,
return reset_n_io;
}
pdata->reset_n_io = reset_n_io;
-
- if (of_find_property(node, "hci-muxed", NULL))
- pdata->hci_muxed = 1;
- else
- pdata->hci_muxed = 0;
+ pdata->hci_muxed = of_property_read_bool(node, "hci-muxed");
return 0;
}
diff --git a/drivers/nfc/nfcmrvl/nfcmrvl.h b/drivers/nfc/nfcmrvl/nfcmrvl.h
index 165bd0a95190..f61a99e553db 100644
--- a/drivers/nfc/nfcmrvl/nfcmrvl.h
+++ b/drivers/nfc/nfcmrvl/nfcmrvl.h
@@ -8,8 +8,6 @@
#ifndef _NFCMRVL_H_
#define _NFCMRVL_H_
-#include <linux/platform_data/nfcmrvl.h>
-
#include "fw_dnld.h"
/* Define private flags: */
@@ -50,6 +48,34 @@ enum nfcmrvl_phy {
NFCMRVL_PHY_SPI = 3,
};
+struct nfcmrvl_platform_data {
+ /*
+ * Generic
+ */
+
+ /* GPIO that is wired to RESET_N signal */
+ int reset_n_io;
+ /* Tell if transport is muxed in HCI one */
+ bool hci_muxed;
+
+ /*
+ * UART specific
+ */
+
+ /* Tell if UART needs flow control at init */
+ bool flow_control;
+ /* Tell if firmware supports break control for power management */
+ bool break_control;
+
+
+ /*
+ * I2C specific
+ */
+
+ unsigned int irq;
+ unsigned int irq_polarity;
+};
+
struct nfcmrvl_private {
unsigned long flags;
diff --git a/drivers/nfc/nfcmrvl/uart.c b/drivers/nfc/nfcmrvl/uart.c
index 9c92cbdc42f0..956ae92f7573 100644
--- a/drivers/nfc/nfcmrvl/uart.c
+++ b/drivers/nfc/nfcmrvl/uart.c
@@ -76,15 +76,8 @@ static int nfcmrvl_uart_parse_dt(struct device_node *node,
return ret;
}
- if (of_find_property(matched_node, "flow-control", NULL))
- pdata->flow_control = 1;
- else
- pdata->flow_control = 0;
-
- if (of_find_property(matched_node, "break-control", NULL))
- pdata->break_control = 1;
- else
- pdata->break_control = 0;
+ pdata->flow_control = of_property_read_bool(matched_node, "flow-control");
+ pdata->break_control = of_property_read_bool(matched_node, "break-control");
of_node_put(matched_node);
diff --git a/drivers/nfc/trf7970a.c b/drivers/nfc/trf7970a.c
index 21d68664fe08..7eb17f46a815 100644
--- a/drivers/nfc/trf7970a.c
+++ b/drivers/nfc/trf7970a.c
@@ -2229,7 +2229,7 @@ static const struct dev_pm_ops trf7970a_pm_ops = {
trf7970a_pm_runtime_resume, NULL)
};
-static const struct of_device_id trf7970a_of_match[] = {
+static const struct of_device_id trf7970a_of_match[] __maybe_unused = {
{.compatible = "ti,trf7970a",},
{},
};
diff --git a/drivers/ptp/ptp_ines.c b/drivers/ptp/ptp_ines.c
index 61f47fb9d997..ed215b458183 100644
--- a/drivers/ptp/ptp_ines.c
+++ b/drivers/ptp/ptp_ines.c
@@ -792,7 +792,7 @@ static struct platform_driver ines_ptp_ctrl_driver = {
.remove = ines_ptp_ctrl_remove,
.driver = {
.name = "ines_ptp_ctrl",
- .of_match_table = of_match_ptr(ines_ptp_ctrl_of_match),
+ .of_match_table = ines_ptp_ctrl_of_match,
},
};
module_platform_driver(ines_ptp_ctrl_driver);
diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c
index 4bbaccd543ad..2b63f3487645 100644
--- a/drivers/ptp/ptp_ocp.c
+++ b/drivers/ptp/ptp_ocp.c
@@ -662,6 +662,7 @@ static struct ocp_resource ocp_fb_resource[] = {
.num_chipselect = 1,
.bits_per_word = 8,
.num_devices = 1,
+ .force_irq = true,
.devices = &(struct spi_board_info) {
.modalias = "spi-nor",
},
diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c
index eb7e13486087..05749c877990 100644
--- a/drivers/s390/net/ism_drv.c
+++ b/drivers/s390/net/ism_drv.c
@@ -11,7 +11,6 @@
#include <linux/types.h>
#include <linux/interrupt.h>
#include <linux/device.h>
-#include <linux/pci.h>
#include <linux/err.h>
#include <linux/ctype.h>
#include <linux/processor.h>
@@ -842,6 +841,12 @@ static int smcd_move(struct smcd_dev *smcd, u64 dmb_tok, unsigned int idx,
return ism_move(smcd->priv, dmb_tok, idx, sf, offset, data, size);
}
+static int smcd_supports_v2(void)
+{
+ return SYSTEM_EID.serial_number[0] != '0' ||
+ SYSTEM_EID.type[0] != '0';
+}
+
static u64 smcd_get_local_gid(struct smcd_dev *smcd)
{
return ism_get_local_gid(smcd->priv);
@@ -869,6 +874,7 @@ static const struct smcd_ops ism_ops = {
.reset_vlan_required = smcd_reset_vlan_required,
.signal_event = smcd_signal_ieq,
.move_data = smcd_move,
+ .supports_v2 = smcd_supports_v2,
.get_system_eid = ism_get_seid,
.get_local_gid = smcd_get_local_gid,
.get_chid = smcd_get_chid,
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index a9b14f81d655..bd786b3be5ec 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -601,7 +601,7 @@ static void lowcomms_error_report(struct sock *sk)
"sk_err=%d/%d\n", dlm_our_nodeid(),
con->nodeid, &inet->inet_daddr,
ntohs(inet->inet_dport), sk->sk_err,
- sk->sk_err_soft);
+ READ_ONCE(sk->sk_err_soft));
break;
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
@@ -610,14 +610,15 @@ static void lowcomms_error_report(struct sock *sk)
"dport %d, sk_err=%d/%d\n", dlm_our_nodeid(),
con->nodeid, &sk->sk_v6_daddr,
ntohs(inet->inet_dport), sk->sk_err,
- sk->sk_err_soft);
+ READ_ONCE(sk->sk_err_soft));
break;
#endif
default:
printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
"invalid socket family %d set, "
"sk_err=%d/%d\n", dlm_our_nodeid(),
- sk->sk_family, sk->sk_err, sk->sk_err_soft);
+ sk->sk_family, sk->sk_err,
+ READ_ONCE(sk->sk_err_soft));
break;
}
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 520b238abd5a..6792a7940e1e 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -161,6 +161,8 @@ struct bpf_map_ops {
bpf_callback_t callback_fn,
void *callback_ctx, u64 flags);
+ u64 (*map_mem_usage)(const struct bpf_map *map);
+
/* BTF id of struct allocated by map_alloc */
int *map_btf_id;
@@ -607,11 +609,18 @@ enum bpf_type_flag {
*/
NON_OWN_REF = BIT(14 + BPF_BASE_TYPE_BITS),
+ /* DYNPTR points to sk_buff */
+ DYNPTR_TYPE_SKB = BIT(15 + BPF_BASE_TYPE_BITS),
+
+ /* DYNPTR points to xdp_buff */
+ DYNPTR_TYPE_XDP = BIT(16 + BPF_BASE_TYPE_BITS),
+
__BPF_TYPE_FLAG_MAX,
__BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1,
};
-#define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF)
+#define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF | DYNPTR_TYPE_SKB \
+ | DYNPTR_TYPE_XDP)
/* Max number of base types. */
#define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS)
@@ -1124,6 +1133,37 @@ static __always_inline __nocfi unsigned int bpf_dispatcher_nop_func(
return bpf_func(ctx, insnsi);
}
+/* the implementation of the opaque uapi struct bpf_dynptr */
+struct bpf_dynptr_kern {
+ void *data;
+ /* Size represents the number of usable bytes of dynptr data.
+ * If for example the offset is at 4 for a local dynptr whose data is
+ * of type u64, the number of usable bytes is 4.
+ *
+ * The upper 8 bits are reserved. It is as follows:
+ * Bits 0 - 23 = size
+ * Bits 24 - 30 = dynptr type
+ * Bit 31 = whether dynptr is read-only
+ */
+ u32 size;
+ u32 offset;
+} __aligned(8);
+
+enum bpf_dynptr_type {
+ BPF_DYNPTR_TYPE_INVALID,
+ /* Points to memory that is local to the bpf program */
+ BPF_DYNPTR_TYPE_LOCAL,
+ /* Underlying data is a ringbuf record */
+ BPF_DYNPTR_TYPE_RINGBUF,
+ /* Underlying data is a sk_buff */
+ BPF_DYNPTR_TYPE_SKB,
+ /* Underlying data is a xdp_buff */
+ BPF_DYNPTR_TYPE_XDP,
+};
+
+int bpf_dynptr_check_size(u32 size);
+u32 bpf_dynptr_get_size(const struct bpf_dynptr_kern *ptr);
+
#ifdef CONFIG_BPF_JIT
int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr);
int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr);
@@ -2241,7 +2281,7 @@ struct bpf_core_ctx {
bool btf_nested_type_is_trusted(struct bpf_verifier_log *log,
const struct bpf_reg_state *reg,
- int off);
+ int off, const char *suffix);
bool btf_type_ids_nocast_alias(struct bpf_verifier_log *log,
const struct btf *reg_btf, u32 reg_id,
@@ -2266,6 +2306,11 @@ static inline bool has_current_bpf_ctx(void)
}
void notrace bpf_prog_inc_misses_counter(struct bpf_prog *prog);
+
+void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
+ enum bpf_dynptr_type type, u32 offset, u32 size);
+void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr);
+void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr);
#else /* !CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get(u32 ufd)
{
@@ -2495,6 +2540,19 @@ static inline void bpf_prog_inc_misses_counter(struct bpf_prog *prog)
static inline void bpf_cgrp_storage_free(struct cgroup *cgroup)
{
}
+
+static inline void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
+ enum bpf_dynptr_type type, u32 offset, u32 size)
+{
+}
+
+static inline void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr)
+{
+}
+
+static inline void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr)
+{
+}
#endif /* CONFIG_BPF_SYSCALL */
void __bpf_free_used_btfs(struct bpf_prog_aux *aux,
@@ -2566,6 +2624,7 @@ static inline bool bpf_map_is_offloaded(struct bpf_map *map)
struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr);
void bpf_map_offload_map_free(struct bpf_map *map);
+u64 bpf_map_offload_map_mem_usage(const struct bpf_map *map);
int bpf_prog_test_run_syscall(struct bpf_prog *prog,
const union bpf_attr *kattr,
union bpf_attr __user *uattr);
@@ -2637,6 +2696,11 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map)
{
}
+static inline u64 bpf_map_offload_map_mem_usage(const struct bpf_map *map)
+{
+ return 0;
+}
+
static inline int bpf_prog_test_run_syscall(struct bpf_prog *prog,
const union bpf_attr *kattr,
union bpf_attr __user *uattr)
@@ -2801,6 +2865,8 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
struct bpf_insn *insn_buf,
struct bpf_prog *prog,
u32 *target_size);
+int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags,
+ struct bpf_dynptr_kern *ptr);
#else
static inline bool bpf_sock_common_is_valid_access(int off, int size,
enum bpf_access_type type,
@@ -2822,6 +2888,11 @@ static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
{
return 0;
}
+static inline int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags,
+ struct bpf_dynptr_kern *ptr)
+{
+ return -EOPNOTSUPP;
+}
#endif
#ifdef CONFIG_INET
@@ -2913,36 +2984,6 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
u32 num_args, struct bpf_bprintf_data *data);
void bpf_bprintf_cleanup(struct bpf_bprintf_data *data);
-/* the implementation of the opaque uapi struct bpf_dynptr */
-struct bpf_dynptr_kern {
- void *data;
- /* Size represents the number of usable bytes of dynptr data.
- * If for example the offset is at 4 for a local dynptr whose data is
- * of type u64, the number of usable bytes is 4.
- *
- * The upper 8 bits are reserved. It is as follows:
- * Bits 0 - 23 = size
- * Bits 24 - 30 = dynptr type
- * Bit 31 = whether dynptr is read-only
- */
- u32 size;
- u32 offset;
-} __aligned(8);
-
-enum bpf_dynptr_type {
- BPF_DYNPTR_TYPE_INVALID,
- /* Points to memory that is local to the bpf program */
- BPF_DYNPTR_TYPE_LOCAL,
- /* Underlying data is a kernel-produced ringbuf record */
- BPF_DYNPTR_TYPE_RINGBUF,
-};
-
-void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
- enum bpf_dynptr_type type, u32 offset, u32 size);
-void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr);
-int bpf_dynptr_check_size(u32 size);
-u32 bpf_dynptr_get_size(const struct bpf_dynptr_kern *ptr);
-
#ifdef CONFIG_BPF_LSM
void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype);
void bpf_cgroup_atype_put(int cgroup_atype);
diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h
index 6d37a40cd90e..d934248b8e81 100644
--- a/include/linux/bpf_local_storage.h
+++ b/include/linux/bpf_local_storage.h
@@ -164,5 +164,6 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
void *value, u64 map_flags, gfp_t gfp_flags);
void bpf_local_storage_free_rcu(struct rcu_head *rcu);
+u64 bpf_local_storage_map_mem_usage(const struct bpf_map *map);
#endif /* _BPF_LOCAL_STORAGE_H */
diff --git a/include/linux/bpf_mem_alloc.h b/include/linux/bpf_mem_alloc.h
index 3e164b8efaa9..a7104af61ab4 100644
--- a/include/linux/bpf_mem_alloc.h
+++ b/include/linux/bpf_mem_alloc.h
@@ -14,6 +14,13 @@ struct bpf_mem_alloc {
struct work_struct work;
};
+/* 'size != 0' is for bpf_mem_alloc which manages fixed-size objects.
+ * Alloc and free are done with bpf_mem_cache_{alloc,free}().
+ *
+ * 'size = 0' is for bpf_mem_alloc which manages many fixed-size objects.
+ * Alloc and free are done with bpf_mem_{alloc,free}() and the size of
+ * the returned object is given by the size argument of bpf_mem_alloc().
+ */
int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu);
void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma);
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index cf1bb1cf4a7b..18538bad2b8c 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -537,7 +537,6 @@ struct bpf_verifier_env {
bool bypass_spec_v1;
bool bypass_spec_v4;
bool seen_direct_write;
- bool rcu_tag_supported;
struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
const struct bpf_line_info *prev_linfo;
struct bpf_verifier_log log;
@@ -616,9 +615,6 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
enum bpf_arg_type arg_type);
int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
u32 regno, u32 mem_size);
-struct bpf_call_arg_meta;
-int process_dynptr_func(struct bpf_verifier_env *env, int regno,
- enum bpf_arg_type arg_type, struct bpf_call_arg_meta *meta);
/* this lives here instead of in bpf.h because it needs to dereference tgt_prog */
static inline u64 bpf_trampoline_compute_key(const struct bpf_prog *tgt_prog,
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 49e0fe6d8274..556b3e2e7471 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -70,7 +70,7 @@
#define KF_TRUSTED_ARGS (1 << 4) /* kfunc only takes trusted pointer arguments */
#define KF_SLEEPABLE (1 << 5) /* kfunc may sleep */
#define KF_DESTRUCTIVE (1 << 6) /* kfunc performs destructive actions */
-#define KF_RCU (1 << 7) /* kfunc only takes rcu pointer arguments */
+#define KF_RCU (1 << 7) /* kfunc takes either rcu or trusted pointer arguments */
/*
* Tag marking a kernel function as a kfunc. This is meant to minimize the
diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h
index 3a4f7cd882ca..00950cc03bff 100644
--- a/include/linux/btf_ids.h
+++ b/include/linux/btf_ids.h
@@ -204,7 +204,7 @@ extern struct btf_id_set8 name;
#else
-#define BTF_ID_LIST(name) static u32 __maybe_unused name[16];
+#define BTF_ID_LIST(name) static u32 __maybe_unused name[64];
#define BTF_ID(prefix, name)
#define BTF_ID_FLAGS(prefix, name, ...)
#define BTF_ID_UNUSED
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 1727898f1641..efa5d4a1677e 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1542,4 +1542,50 @@ static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u64 index
return XDP_REDIRECT;
}
+#ifdef CONFIG_NET
+int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len);
+int __bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from,
+ u32 len, u64 flags);
+int __bpf_xdp_load_bytes(struct xdp_buff *xdp, u32 offset, void *buf, u32 len);
+int __bpf_xdp_store_bytes(struct xdp_buff *xdp, u32 offset, void *buf, u32 len);
+void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len);
+void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off,
+ void *buf, unsigned long len, bool flush);
+#else /* CONFIG_NET */
+static inline int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset,
+ void *to, u32 len)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int __bpf_skb_store_bytes(struct sk_buff *skb, u32 offset,
+ const void *from, u32 len, u64 flags)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int __bpf_xdp_load_bytes(struct xdp_buff *xdp, u32 offset,
+ void *buf, u32 len)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int __bpf_xdp_store_bytes(struct xdp_buff *xdp, u32 offset,
+ void *buf, u32 len)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len)
+{
+ return NULL;
+}
+
+static inline void *bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, void *buf,
+ unsigned long len, bool flush)
+{
+ return NULL;
+}
+#endif /* CONFIG_NET */
+
#endif /* __LINUX_FILTER_H__ */
diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index b19d3284551f..ebf4349a53af 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -122,7 +122,7 @@ extern int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
sockptr_t optval, sockptr_t optlen);
extern int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
sockptr_t optval, size_t offset);
-extern int ip_mc_sf_allow(struct sock *sk, __be32 local, __be32 rmt,
+extern int ip_mc_sf_allow(const struct sock *sk, __be32 local, __be32 rmt,
int dif, int sdif);
extern void ip_mc_init_dev(struct in_device *);
extern void ip_mc_destroy_dev(struct in_device *);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index f33389b42209..7a898113b6b7 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -134,6 +134,7 @@ enum {
MLX5_REG_PCAM = 0x507f,
MLX5_REG_NODE_DESC = 0x6001,
MLX5_REG_HOST_ENDIANNESS = 0x7004,
+ MLX5_REG_MTMP = 0x900A,
MLX5_REG_MCIA = 0x9014,
MLX5_REG_MFRL = 0x9028,
MLX5_REG_MLCR = 0x902b,
@@ -731,6 +732,7 @@ struct mlx5_fw_tracer;
struct mlx5_vxlan;
struct mlx5_geneve;
struct mlx5_hv_vhca;
+struct mlx5_thermal;
#define MLX5_LOG_SW_ICM_BLOCK_SIZE(dev) (MLX5_CAP_DEV_MEM(dev, log_sw_icm_alloc_granularity))
#define MLX5_SW_ICM_BLOCK_SIZE(dev) (1 << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev))
@@ -808,6 +810,7 @@ struct mlx5_core_dev {
struct mlx5_rsc_dump *rsc_dump;
u32 vsc_addr;
struct mlx5_hv_vhca *hv_vhca;
+ struct mlx5_thermal *thermal;
};
struct mlx5_db {
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 66d76e97a087..d2c164f0778c 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -10869,6 +10869,31 @@ struct mlx5_ifc_mrtc_reg_bits {
u8 time_l[0x20];
};
+struct mlx5_ifc_mtmp_reg_bits {
+ u8 reserved_at_0[0x14];
+ u8 sensor_index[0xc];
+
+ u8 reserved_at_20[0x10];
+ u8 temperature[0x10];
+
+ u8 mte[0x1];
+ u8 mtr[0x1];
+ u8 reserved_at_42[0xe];
+ u8 max_temperature[0x10];
+
+ u8 tee[0x2];
+ u8 reserved_at_62[0xe];
+ u8 temp_threshold_hi[0x10];
+
+ u8 reserved_at_80[0x10];
+ u8 temp_threshold_lo[0x10];
+
+ u8 reserved_at_a0[0x20];
+
+ u8 sensor_name_hi[0x20];
+ u8 sensor_name_lo[0x20];
+};
+
union mlx5_ifc_ports_control_registers_document_bits {
struct mlx5_ifc_bufferx_reg_bits bufferx_reg;
struct mlx5_ifc_eth_2819_cntrs_grp_data_layout_bits eth_2819_cntrs_grp_data_layout;
@@ -10931,6 +10956,7 @@ union mlx5_ifc_ports_control_registers_document_bits {
struct mlx5_ifc_mfrl_reg_bits mfrl_reg;
struct mlx5_ifc_mtutc_reg_bits mtutc_reg;
struct mlx5_ifc_mrtc_reg_bits mrtc_reg;
+ struct mlx5_ifc_mtmp_reg_bits mtmp_reg;
u8 reserved_at_0[0x60e0];
};
diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h
index e96ee1e348cb..98b2e1e149f9 100644
--- a/include/linux/mlx5/port.h
+++ b/include/linux/mlx5/port.h
@@ -141,6 +141,12 @@ enum mlx5_ptys_width {
MLX5_PTYS_WIDTH_12X = 1 << 4,
};
+struct mlx5_port_eth_proto {
+ u32 cap;
+ u32 admin;
+ u32 oper;
+};
+
#define MLX5E_PROT_MASK(link_mode) (1U << link_mode)
#define MLX5_GET_ETH_PROTO(reg, out, ext, field) \
(ext ? MLX5_GET(reg, out, ext_##field) : \
@@ -218,4 +224,14 @@ int mlx5_set_trust_state(struct mlx5_core_dev *mdev, u8 trust_state);
int mlx5_query_trust_state(struct mlx5_core_dev *mdev, u8 *trust_state);
int mlx5_set_dscp2prio(struct mlx5_core_dev *mdev, u8 dscp, u8 prio);
int mlx5_query_dscp2prio(struct mlx5_core_dev *mdev, u8 *dscp2prio);
+
+int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext,
+ struct mlx5_port_eth_proto *eproto);
+bool mlx5_ptys_ext_supported(struct mlx5_core_dev *mdev);
+u32 mlx5_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper,
+ bool force_legacy);
+u32 mlx5_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed,
+ bool force_legacy);
+int mlx5_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
+
#endif /* __MLX5_PORT_H__ */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 470085b121d3..7621c512765f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -52,6 +52,7 @@
#include <linux/rbtree.h>
#include <net/net_trackers.h>
#include <net/net_debug.h>
+#include <net/dropreason.h>
struct netpoll_info;
struct device;
@@ -1308,6 +1309,17 @@ struct netdev_net_notifier {
* Used to add FDB entries to dump requests. Implementers should add
* entries to skb and update idx with the number of entries.
*
+ * int (*ndo_mdb_add)(struct net_device *dev, struct nlattr *tb[],
+ * u16 nlmsg_flags, struct netlink_ext_ack *extack);
+ * Adds an MDB entry to dev.
+ * int (*ndo_mdb_del)(struct net_device *dev, struct nlattr *tb[],
+ * struct netlink_ext_ack *extack);
+ * Deletes the MDB entry from dev.
+ * int (*ndo_mdb_dump)(struct net_device *dev, struct sk_buff *skb,
+ * struct netlink_callback *cb);
+ * Dumps MDB entries from dev. The first argument (marker) in the netlink
+ * callback is used by core rtnetlink code.
+ *
* int (*ndo_bridge_setlink)(struct net_device *dev, struct nlmsghdr *nlh,
* u16 flags, struct netlink_ext_ack *extack)
* int (*ndo_bridge_getlink)(struct sk_buff *skb, u32 pid, u32 seq,
@@ -1570,6 +1582,16 @@ struct net_device_ops {
const unsigned char *addr,
u16 vid, u32 portid, u32 seq,
struct netlink_ext_ack *extack);
+ int (*ndo_mdb_add)(struct net_device *dev,
+ struct nlattr *tb[],
+ u16 nlmsg_flags,
+ struct netlink_ext_ack *extack);
+ int (*ndo_mdb_del)(struct net_device *dev,
+ struct nlattr *tb[],
+ struct netlink_ext_ack *extack);
+ int (*ndo_mdb_dump)(struct net_device *dev,
+ struct sk_buff *skb,
+ struct netlink_callback *cb);
int (*ndo_bridge_setlink)(struct net_device *dev,
struct nlmsghdr *nlh,
u16 flags,
@@ -3806,13 +3828,8 @@ static inline unsigned int get_netdev_rx_queue_index(
int netif_get_num_default_rss_queues(void);
-enum skb_free_reason {
- SKB_REASON_CONSUMED,
- SKB_REASON_DROPPED,
-};
-
-void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason);
-void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason);
+void dev_kfree_skb_irq_reason(struct sk_buff *skb, enum skb_drop_reason reason);
+void dev_kfree_skb_any_reason(struct sk_buff *skb, enum skb_drop_reason reason);
/*
* It is not allowed to call kfree_skb() or consume_skb() from hardware
@@ -3835,22 +3852,22 @@ void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason);
*/
static inline void dev_kfree_skb_irq(struct sk_buff *skb)
{
- __dev_kfree_skb_irq(skb, SKB_REASON_DROPPED);
+ dev_kfree_skb_irq_reason(skb, SKB_DROP_REASON_NOT_SPECIFIED);
}
static inline void dev_consume_skb_irq(struct sk_buff *skb)
{
- __dev_kfree_skb_irq(skb, SKB_REASON_CONSUMED);
+ dev_kfree_skb_irq_reason(skb, SKB_CONSUMED);
}
static inline void dev_kfree_skb_any(struct sk_buff *skb)
{
- __dev_kfree_skb_any(skb, SKB_REASON_DROPPED);
+ dev_kfree_skb_any_reason(skb, SKB_DROP_REASON_NOT_SPECIFIED);
}
static inline void dev_consume_skb_any(struct sk_buff *skb)
{
- __dev_kfree_skb_any(skb, SKB_REASON_CONSUMED);
+ dev_kfree_skb_any_reason(skb, SKB_CONSUMED);
}
u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h
index 48314ade1506..7834c0be2831 100644
--- a/include/linux/netfilter_ipv6.h
+++ b/include/linux/netfilter_ipv6.h
@@ -197,6 +197,8 @@ static inline int nf_cookie_v6_check(const struct ipv6hdr *iph,
__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, u_int8_t protocol);
+int nf_ip6_check_hbh_len(struct sk_buff *skb, u32 *plen);
+
int ipv6_netfilter_init(void);
void ipv6_netfilter_fini(void);
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index c43ac7690eca..3e8743252167 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -50,7 +50,6 @@ struct netlink_kernel_cfg {
struct mutex *cb_mutex;
int (*bind)(struct net *net, int group);
void (*unbind)(struct net *net, int group);
- bool (*compare)(struct net *net, struct sock *sk);
};
struct sock *__netlink_kernel_create(struct net *net, int unit,
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 36bf0bbc8efa..fefd5091bc24 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1130,16 +1130,15 @@ static inline int phy_read(struct phy_device *phydev, u32 regnum)
#define phy_read_poll_timeout(phydev, regnum, val, cond, sleep_us, \
timeout_us, sleep_before_read) \
({ \
- int __ret = read_poll_timeout(phy_read, val, (cond) || val < 0, \
+ int __ret = read_poll_timeout(phy_read, val, val < 0 || (cond), \
sleep_us, timeout_us, sleep_before_read, phydev, regnum); \
- if (val < 0) \
+ if (val < 0) \
__ret = val; \
if (__ret) \
phydev_err(phydev, "%s failed: %d\n", __func__, __ret); \
__ret; \
})
-
/**
* __phy_read - convenience function for reading a given PHY register
* @phydev: the phy_device struct
diff --git a/include/linux/platform_data/nfcmrvl.h b/include/linux/platform_data/nfcmrvl.h
deleted file mode 100644
index 9e75ac8d19be..000000000000
--- a/include/linux/platform_data/nfcmrvl.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (C) 2015, Marvell International Ltd.
- *
- * This software file (the "File") is distributed by Marvell International
- * Ltd. under the terms of the GNU General Public License Version 2, June 1991
- * (the "License"). You may use, redistribute and/or modify this File in
- * accordance with the terms and conditions of the License, a copy of which
- * is available on the worldwide web at
- * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt.
- *
- * THE FILE IS DISTRIBUTED AS-IS, WITHOUT WARRANTY OF ANY KIND, AND THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
- * ARE EXPRESSLY DISCLAIMED. The License provides additional details about
- * this warranty disclaimer.
- */
-
-#ifndef _NFCMRVL_PTF_H_
-#define _NFCMRVL_PTF_H_
-
-struct nfcmrvl_platform_data {
- /*
- * Generic
- */
-
- /* GPIO that is wired to RESET_N signal */
- int reset_n_io;
- /* Tell if transport is muxed in HCI one */
- unsigned int hci_muxed;
-
- /*
- * UART specific
- */
-
- /* Tell if UART needs flow control at init */
- unsigned int flow_control;
- /* Tell if firmware supports break control for power management */
- unsigned int break_control;
-
-
- /*
- * I2C specific
- */
-
- unsigned int irq;
- unsigned int irq_polarity;
-};
-
-#endif /* _NFCMRVL_PTF_H_ */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index ff7ad331fb82..fe661011644b 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -810,7 +810,6 @@ typedef unsigned char *sk_buff_data_t;
* @csum_level: indicates the number of consecutive checksums found in
* the packet minus one that have been verified as
* CHECKSUM_UNNECESSARY (max 3)
- * @scm_io_uring: SKB holds io_uring registered files
* @dst_pending_confirm: need to confirm neighbour
* @decrypted: Decrypted SKB
* @slow_gro: state present at GRO time, slower prepare step required
@@ -989,7 +988,6 @@ struct sk_buff {
#endif
__u8 slow_gro:1;
__u8 csum_not_inet:1;
- __u8 scm_io_uring:1;
#ifdef CONFIG_NET_SCHED
__u16 tc_index; /* traffic control index */
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index c04f359655b8..82da55101b5a 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -223,7 +223,7 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex,
const struct in6_addr *addr);
void __ipv6_sock_mc_close(struct sock *sk);
void ipv6_sock_mc_close(struct sock *sk);
-bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr,
+bool inet6_mc_check(const struct sock *sk, const struct in6_addr *mc_addr,
const struct in6_addr *src_addr);
int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr);
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 480fa579787e..45ebde587138 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -11,6 +11,7 @@
void unix_inflight(struct user_struct *user, struct file *fp);
void unix_notinflight(struct user_struct *user, struct file *fp);
void unix_destruct_scm(struct sk_buff *skb);
+void io_uring_destruct_scm(struct sk_buff *skb);
void unix_gc(void);
void wait_for_unix_gc(void);
struct sock *unix_get_socket(struct file *filp);
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index f115b2550309..7cebba1c4135 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -828,6 +828,18 @@ struct cfg80211_fils_aad {
};
/**
+ * struct cfg80211_set_hw_timestamp - enable/disable HW timestamping
+ * @macaddr: peer MAC address. NULL to enable/disable HW timestamping for all
+ * addresses.
+ * @enable: if set, enable HW timestamping for the specified MAC address.
+ * Otherwise disable HW timestamping for the specified MAC address.
+ */
+struct cfg80211_set_hw_timestamp {
+ const u8 *macaddr;
+ bool enable;
+};
+
+/**
* cfg80211_get_chandef_type - return old channel type from chandef
* @chandef: the channel definition
*
@@ -4330,6 +4342,8 @@ struct mgmt_frame_regs {
* @add_link_station: Add a link to a station.
* @mod_link_station: Modify a link of a station.
* @del_link_station: Remove a link of a station.
+ *
+ * @set_hw_timestamp: Enable/disable HW timestamping of TM/FTM frames.
*/
struct cfg80211_ops {
int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow);
@@ -4683,6 +4697,8 @@ struct cfg80211_ops {
struct link_station_parameters *params);
int (*del_link_station)(struct wiphy *wiphy, struct net_device *dev,
struct link_station_del_parameters *params);
+ int (*set_hw_timestamp)(struct wiphy *wiphy, struct net_device *dev,
+ struct cfg80211_set_hw_timestamp *hwts);
};
/*
@@ -5139,6 +5155,8 @@ struct wiphy_iftype_akm_suites {
int n_akm_suites;
};
+#define CFG80211_HW_TIMESTAMP_ALL_PEERS 0xffff
+
/**
* struct wiphy - wireless hardware description
* @mtx: mutex for the data (structures) of this device
@@ -5348,6 +5366,13 @@ struct wiphy_iftype_akm_suites {
* NL80211_MAX_NR_AKM_SUITES in order to avoid compatibility issues with
* legacy userspace and maximum allowed value is
* CFG80211_MAX_NUM_AKM_SUITES.
+ *
+ * @hw_timestamp_max_peers: maximum number of peers that the driver supports
+ * enabling HW timestamping for concurrently. Setting this field to a
+ * non-zero value indicates that the driver supports HW timestamping.
+ * A value of %CFG80211_HW_TIMESTAMP_ALL_PEERS indicates the driver
+ * supports enabling HW timestamping for all peers (i.e. no need to
+ * specify a mac address).
*/
struct wiphy {
struct mutex mtx;
@@ -5496,6 +5521,8 @@ struct wiphy {
u8 ema_max_profile_periodicity;
u16 max_num_akm_suites;
+ u16 hw_timestamp_max_peers;
+
char priv[] __aligned(NETDEV_ALIGN);
};
@@ -6814,13 +6841,11 @@ enum cfg80211_bss_frame_type {
* @ie: IEs
* @ielen: length of IEs
* @band: enum nl80211_band of the channel
- * @ftype: frame type
*
* Returns the channel number, or -1 if none could be determined.
*/
int cfg80211_get_ies_channel_number(const u8 *ie, size_t ielen,
- enum nl80211_band band,
- enum cfg80211_bss_frame_type ftype);
+ enum nl80211_band band);
/**
* cfg80211_inform_bss_data - inform cfg80211 of a new BSS
@@ -8101,6 +8126,7 @@ void cfg80211_control_port_tx_status(struct wireless_dev *wdev, u64 cookie,
* responsible for any cleanup. The caller must also ensure that
* skb->protocol is set appropriately.
* @unencrypted: Whether the frame was received unencrypted
+ * @link_id: the link the frame was received on, -1 if not applicable or unknown
*
* This function is used to inform userspace about a received control port
* frame. It should only be used if userspace indicated it wants to receive
@@ -8111,8 +8137,8 @@ void cfg80211_control_port_tx_status(struct wireless_dev *wdev, u64 cookie,
*
* Return: %true if the frame was passed to userspace
*/
-bool cfg80211_rx_control_port(struct net_device *dev,
- struct sk_buff *skb, bool unencrypted);
+bool cfg80211_rx_control_port(struct net_device *dev, struct sk_buff *skb,
+ bool unencrypted, int link_id);
/**
* cfg80211_cqm_rssi_notify - connection quality monitoring rssi event
diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h
index 598f53d2a3a0..f980a72f2ce6 100644
--- a/include/net/ieee80211_radiotap.h
+++ b/include/net/ieee80211_radiotap.h
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2017 Intel Deutschland GmbH
- * Copyright (c) 2018-2019, 2021 Intel Corporation
+ * Copyright (c) 2018-2019, 2021-2022 Intel Corporation
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -82,11 +82,14 @@ enum ieee80211_radiotap_presence {
IEEE80211_RADIOTAP_HE_MU = 24,
IEEE80211_RADIOTAP_ZERO_LEN_PSDU = 26,
IEEE80211_RADIOTAP_LSIG = 27,
+ IEEE80211_RADIOTAP_TLV = 28,
/* valid in every it_present bitmap, even vendor namespaces */
IEEE80211_RADIOTAP_RADIOTAP_NAMESPACE = 29,
IEEE80211_RADIOTAP_VENDOR_NAMESPACE = 30,
- IEEE80211_RADIOTAP_EXT = 31
+ IEEE80211_RADIOTAP_EXT = 31,
+ IEEE80211_RADIOTAP_EHT_USIG = 33,
+ IEEE80211_RADIOTAP_EHT = 34,
};
/* for IEEE80211_RADIOTAP_FLAGS */
@@ -360,6 +363,214 @@ enum ieee80211_radiotap_zero_len_psdu_type {
IEEE80211_RADIOTAP_ZERO_LEN_PSDU_VENDOR = 0xff,
};
+struct ieee80211_radiotap_tlv {
+ __le16 type;
+ __le16 len;
+ u8 data[];
+} __packed;
+
+/**
+ * struct ieee80211_radiotap_vendor_content - radiotap vendor data content
+ * @oui: radiotap vendor namespace OUI
+ * @oui_subtype: radiotap vendor sub namespace
+ * @vendor_type: radiotap vendor type
+ * @reserved: should always be set to zero (to avoid leaking memory)
+ * @data: the actual vendor namespace data
+ */
+struct ieee80211_radiotap_vendor_content {
+ u8 oui[3];
+ u8 oui_subtype;
+ __le16 vendor_type;
+ __le16 reserved;
+ u8 data[];
+} __packed;
+
+/**
+ * struct ieee80211_radiotap_vendor_tlv - vendor radiotap data information
+ * @type: should always be set to IEEE80211_RADIOTAP_VENDOR_NAMESPACE
+ * @len: length of data
+ * @content: vendor content see @ieee80211_radiotap_vendor_content
+ */
+struct ieee80211_radiotap_vendor_tlv {
+ __le16 type; /* IEEE80211_RADIOTAP_VENDOR_NAMESPACE */
+ __le16 len;
+ struct ieee80211_radiotap_vendor_content content;
+};
+
+/* ieee80211_radiotap_eht_usig - content of U-SIG tlv (type 33)
+ * see www.radiotap.org/fields/U-SIG.html for details
+ */
+struct ieee80211_radiotap_eht_usig {
+ __le32 common;
+ __le32 value;
+ __le32 mask;
+} __packed;
+
+/* ieee80211_radiotap_eht - content of EHT tlv (type 34)
+ * see www.radiotap.org/fields/EHT.html for details
+ */
+struct ieee80211_radiotap_eht {
+ __le32 known;
+ __le32 data[9];
+ __le32 user_info[];
+} __packed;
+
+/* Known field for EHT TLV
+ * The ending defines for what the field applies as following
+ * O - OFDMA (including TB), M - MU-MIMO, S - EHT sounding.
+ */
+enum ieee80211_radiotap_eht_known {
+ IEEE80211_RADIOTAP_EHT_KNOWN_SPATIAL_REUSE = 0x00000002,
+ IEEE80211_RADIOTAP_EHT_KNOWN_GI = 0x00000004,
+ IEEE80211_RADIOTAP_EHT_KNOWN_EHT_LTF = 0x00000010,
+ IEEE80211_RADIOTAP_EHT_KNOWN_LDPC_EXTRA_SYM_OM = 0x00000020,
+ IEEE80211_RADIOTAP_EHT_KNOWN_PRE_PADD_FACOR_OM = 0x00000040,
+ IEEE80211_RADIOTAP_EHT_KNOWN_PE_DISAMBIGUITY_OM = 0x00000080,
+ IEEE80211_RADIOTAP_EHT_KNOWN_DISREGARD_O = 0x00000100,
+ IEEE80211_RADIOTAP_EHT_KNOWN_DISREGARD_S = 0x00000200,
+ IEEE80211_RADIOTAP_EHT_KNOWN_CRC1 = 0x00002000,
+ IEEE80211_RADIOTAP_EHT_KNOWN_TAIL1 = 0x00004000,
+ IEEE80211_RADIOTAP_EHT_KNOWN_CRC2_O = 0x00008000,
+ IEEE80211_RADIOTAP_EHT_KNOWN_TAIL2_O = 0x00010000,
+ IEEE80211_RADIOTAP_EHT_KNOWN_NSS_S = 0x00020000,
+ IEEE80211_RADIOTAP_EHT_KNOWN_BEAMFORMED_S = 0x00040000,
+ IEEE80211_RADIOTAP_EHT_KNOWN_NR_NON_OFDMA_USERS_M = 0x00080000,
+ IEEE80211_RADIOTAP_EHT_KNOWN_ENCODING_BLOCK_CRC_M = 0x00100000,
+ IEEE80211_RADIOTAP_EHT_KNOWN_ENCODING_BLOCK_TAIL_M = 0x00200000,
+ IEEE80211_RADIOTAP_EHT_KNOWN_RU_MRU_SIZE_OM = 0x00400000,
+ IEEE80211_RADIOTAP_EHT_KNOWN_RU_MRU_INDEX_OM = 0x00800000,
+ IEEE80211_RADIOTAP_EHT_KNOWN_RU_ALLOC_TB_FMT = 0x01000000,
+ IEEE80211_RADIOTAP_EHT_KNOWN_PRIMARY_80 = 0x02000000,
+};
+
+enum ieee80211_radiotap_eht_data {
+ /* Data 0 */
+ IEEE80211_RADIOTAP_EHT_DATA0_SPATIAL_REUSE = 0x00000078,
+ IEEE80211_RADIOTAP_EHT_DATA0_GI = 0x00000180,
+ IEEE80211_RADIOTAP_EHT_DATA0_LTF = 0x00000600,
+ IEEE80211_RADIOTAP_EHT_DATA0_EHT_LTF = 0x00003800,
+ IEEE80211_RADIOTAP_EHT_DATA0_LDPC_EXTRA_SYM_OM = 0x00004000,
+ IEEE80211_RADIOTAP_EHT_DATA0_PRE_PADD_FACOR_OM = 0x00018000,
+ IEEE80211_RADIOTAP_EHT_DATA0_PE_DISAMBIGUITY_OM = 0x00020000,
+ IEEE80211_RADIOTAP_EHT_DATA0_DISREGARD_S = 0x000c0000,
+ IEEE80211_RADIOTAP_EHT_DATA0_DISREGARD_O = 0x003c0000,
+ IEEE80211_RADIOTAP_EHT_DATA0_CRC1_O = 0x03c00000,
+ IEEE80211_RADIOTAP_EHT_DATA0_TAIL1_O = 0xfc000000,
+ /* Data 1 */
+ IEEE80211_RADIOTAP_EHT_DATA1_RU_SIZE = 0x0000001f,
+ IEEE80211_RADIOTAP_EHT_DATA1_RU_INDEX = 0x00001fe0,
+ IEEE80211_RADIOTAP_EHT_DATA1_RU_ALLOC_CC_1_1_1 = 0x003fe000,
+ IEEE80211_RADIOTAP_EHT_DATA1_RU_ALLOC_CC_1_1_1_KNOWN = 0x00400000,
+ IEEE80211_RADIOTAP_EHT_DATA1_PRIMARY_80 = 0xc0000000,
+ /* Data 2 */
+ IEEE80211_RADIOTAP_EHT_DATA2_RU_ALLOC_CC_2_1_1 = 0x000001ff,
+ IEEE80211_RADIOTAP_EHT_DATA2_RU_ALLOC_CC_2_1_1_KNOWN = 0x00000200,
+ IEEE80211_RADIOTAP_EHT_DATA2_RU_ALLOC_CC_1_1_2 = 0x0007fc00,
+ IEEE80211_RADIOTAP_EHT_DATA2_RU_ALLOC_CC_1_1_2_KNOWN = 0x00080000,
+ IEEE80211_RADIOTAP_EHT_DATA2_RU_ALLOC_CC_2_1_2 = 0x1ff00000,
+ IEEE80211_RADIOTAP_EHT_DATA2_RU_ALLOC_CC_2_1_2_KNOWN = 0x20000000,
+ /* Data 3 */
+ IEEE80211_RADIOTAP_EHT_DATA3_RU_ALLOC_CC_1_2_1 = 0x000001ff,
+ IEEE80211_RADIOTAP_EHT_DATA3_RU_ALLOC_CC_1_2_1_KNOWN = 0x00000200,
+ IEEE80211_RADIOTAP_EHT_DATA3_RU_ALLOC_CC_2_2_1 = 0x0007fc00,
+ IEEE80211_RADIOTAP_EHT_DATA3_RU_ALLOC_CC_2_2_1_KNOWN = 0x00080000,
+ IEEE80211_RADIOTAP_EHT_DATA3_RU_ALLOC_CC_1_2_2 = 0x1ff00000,
+ IEEE80211_RADIOTAP_EHT_DATA3_RU_ALLOC_CC_1_2_2_KNOWN = 0x20000000,
+ /* Data 4 */
+ IEEE80211_RADIOTAP_EHT_DATA4_RU_ALLOC_CC_2_2_2 = 0x000001ff,
+ IEEE80211_RADIOTAP_EHT_DATA4_RU_ALLOC_CC_2_2_2_KNOWN = 0x00000200,
+ IEEE80211_RADIOTAP_EHT_DATA4_RU_ALLOC_CC_1_2_3 = 0x0007fc00,
+ IEEE80211_RADIOTAP_EHT_DATA4_RU_ALLOC_CC_1_2_3_KNOWN = 0x00080000,
+ IEEE80211_RADIOTAP_EHT_DATA4_RU_ALLOC_CC_2_2_3 = 0x1ff00000,
+ IEEE80211_RADIOTAP_EHT_DATA4_RU_ALLOC_CC_2_2_3_KNOWN = 0x20000000,
+ /* Data 5 */
+ IEEE80211_RADIOTAP_EHT_DATA5_RU_ALLOC_CC_1_2_4 = 0x000001ff,
+ IEEE80211_RADIOTAP_EHT_DATA5_RU_ALLOC_CC_1_2_4_KNOWN = 0x00000200,
+ IEEE80211_RADIOTAP_EHT_DATA5_RU_ALLOC_CC_2_2_4 = 0x0007fc00,
+ IEEE80211_RADIOTAP_EHT_DATA5_RU_ALLOC_CC_2_2_4_KNOWN = 0x00080000,
+ IEEE80211_RADIOTAP_EHT_DATA5_RU_ALLOC_CC_1_2_5 = 0x1ff00000,
+ IEEE80211_RADIOTAP_EHT_DATA5_RU_ALLOC_CC_1_2_5_KNOWN = 0x20000000,
+ /* Data 6 */
+ IEEE80211_RADIOTAP_EHT_DATA6_RU_ALLOC_CC_2_2_5 = 0x000001ff,
+ IEEE80211_RADIOTAP_EHT_DATA6_RU_ALLOC_CC_2_2_5_KNOWN = 0x00000200,
+ IEEE80211_RADIOTAP_EHT_DATA6_RU_ALLOC_CC_1_2_6 = 0x0007fc00,
+ IEEE80211_RADIOTAP_EHT_DATA6_RU_ALLOC_CC_1_2_6_KNOWN = 0x00080000,
+ IEEE80211_RADIOTAP_EHT_DATA6_RU_ALLOC_CC_2_2_6 = 0x1ff00000,
+ IEEE80211_RADIOTAP_EHT_DATA6_RU_ALLOC_CC_2_2_6_KNOWN = 0x20000000,
+ /* Data 7 */
+ IEEE80211_RADIOTAP_EHT_DATA7_CRC2_O = 0x0000000f,
+ IEEE80211_RADIOTAP_EHT_DATA7_TAIL_2_O = 0x000003f0,
+ IEEE80211_RADIOTAP_EHT_DATA7_NSS_S = 0x0000f000,
+ IEEE80211_RADIOTAP_EHT_DATA7_BEAMFORMED_S = 0x00010000,
+ IEEE80211_RADIOTAP_EHT_DATA7_NUM_OF_NON_OFDMA_USERS = 0x000e0000,
+ IEEE80211_RADIOTAP_EHT_DATA7_USER_ENCODING_BLOCK_CRC = 0x00f00000,
+ IEEE80211_RADIOTAP_EHT_DATA7_USER_ENCODING_BLOCK_TAIL = 0x3f000000,
+ /* Data 8 */
+ IEEE80211_RADIOTAP_EHT_DATA8_RU_ALLOC_TB_FMT_PS_160 = 0x00000001,
+ IEEE80211_RADIOTAP_EHT_DATA8_RU_ALLOC_TB_FMT_B0 = 0x00000002,
+ IEEE80211_RADIOTAP_EHT_DATA8_RU_ALLOC_TB_FMT_B7_B1 = 0x000001fc,
+};
+
+enum ieee80211_radiotap_eht_user_info {
+ IEEE80211_RADIOTAP_EHT_USER_INFO_STA_ID_KNOWN = 0x00000001,
+ IEEE80211_RADIOTAP_EHT_USER_INFO_MCS_KNOWN = 0x00000002,
+ IEEE80211_RADIOTAP_EHT_USER_INFO_CODING_KNOWN = 0x00000004,
+ IEEE80211_RADIOTAP_EHT_USER_INFO_NSS_KNOWN_O = 0x00000010,
+ IEEE80211_RADIOTAP_EHT_USER_INFO_BEAMFORMING_KNOWN_O = 0x00000020,
+ IEEE80211_RADIOTAP_EHT_USER_INFO_SPATIAL_CONFIG_KNOWN_M = 0x00000040,
+ IEEE80211_RADIOTAP_EHT_USER_INFO_DATA_FOR_USER = 0x00000080,
+ IEEE80211_RADIOTAP_EHT_USER_INFO_STA_ID = 0x0007ff00,
+ IEEE80211_RADIOTAP_EHT_USER_INFO_CODING = 0x00080000,
+ IEEE80211_RADIOTAP_EHT_USER_INFO_MCS = 0x00f00000,
+ IEEE80211_RADIOTAP_EHT_USER_INFO_NSS_O = 0x0f000000,
+ IEEE80211_RADIOTAP_EHT_USER_INFO_BEAMFORMING_O = 0x20000000,
+ IEEE80211_RADIOTAP_EHT_USER_INFO_SPATIAL_CONFIG_M = 0x3f000000,
+ IEEE80211_RADIOTAP_EHT_USER_INFO_RESEVED_c0000000 = 0xc0000000,
+};
+
+enum ieee80211_radiotap_eht_usig_common {
+ IEEE80211_RADIOTAP_EHT_USIG_COMMON_PHY_VER_KNOWN = 0x00000001,
+ IEEE80211_RADIOTAP_EHT_USIG_COMMON_BW_KNOWN = 0x00000002,
+ IEEE80211_RADIOTAP_EHT_USIG_COMMON_UL_DL_KNOWN = 0x00000004,
+ IEEE80211_RADIOTAP_EHT_USIG_COMMON_BSS_COLOR_KNOWN = 0x00000008,
+ IEEE80211_RADIOTAP_EHT_USIG_COMMON_TXOP_KNOWN = 0x00000010,
+ IEEE80211_RADIOTAP_EHT_USIG_COMMON_BAD_USIG_CRC = 0x00000020,
+ IEEE80211_RADIOTAP_EHT_USIG_COMMON_PHY_VER = 0x00007000,
+ IEEE80211_RADIOTAP_EHT_USIG_COMMON_BW = 0x00038000,
+ IEEE80211_RADIOTAP_EHT_USIG_COMMON_UL_DL = 0x00040000,
+ IEEE80211_RADIOTAP_EHT_USIG_COMMON_BSS_COLOR = 0x01f80000,
+ IEEE80211_RADIOTAP_EHT_USIG_COMMON_TXOP = 0xfe000000,
+};
+
+enum ieee80211_radiotap_eht_usig_mu {
+ /* MU-USIG-1 */
+ IEEE80211_RADIOTAP_EHT_USIG1_MU_B20_B24_DISREGARD = 0x0000001f,
+ IEEE80211_RADIOTAP_EHT_USIG1_MU_B25_VALIDATE = 0x00000020,
+ /* MU-USIG-2 */
+ IEEE80211_RADIOTAP_EHT_USIG2_MU_B0_B1_PPDU_TYPE = 0x000000c0,
+ IEEE80211_RADIOTAP_EHT_USIG2_MU_B2_VALIDATE = 0x00000100,
+ IEEE80211_RADIOTAP_EHT_USIG2_MU_B3_B7_PUNCTURED_INFO = 0x00003e00,
+ IEEE80211_RADIOTAP_EHT_USIG2_MU_B8_VALIDATE = 0x00004000,
+ IEEE80211_RADIOTAP_EHT_USIG2_MU_B9_B10_SIG_MCS = 0x00018000,
+ IEEE80211_RADIOTAP_EHT_USIG2_MU_B11_B15_EHT_SIG_SYMBOLS = 0x003e0000,
+ IEEE80211_RADIOTAP_EHT_USIG2_MU_B16_B19_CRC = 0x03c00000,
+ IEEE80211_RADIOTAP_EHT_USIG2_MU_B20_B25_TAIL = 0xfc000000,
+};
+
+enum ieee80211_radiotap_eht_usig_tb {
+ /* TB-USIG-1 */
+ IEEE80211_RADIOTAP_EHT_USIG1_TB_B20_B25_DISREGARD = 0x0000001f,
+
+ /* TB-USIG-2 */
+ IEEE80211_RADIOTAP_EHT_USIG2_TB_B0_B1_PPDU_TYPE = 0x000000c0,
+ IEEE80211_RADIOTAP_EHT_USIG2_TB_B2_VALIDATE = 0x00000100,
+ IEEE80211_RADIOTAP_EHT_USIG2_TB_B3_B6_SPATIAL_REUSE_1 = 0x00001e00,
+ IEEE80211_RADIOTAP_EHT_USIG2_TB_B7_B10_SPATIAL_REUSE_2 = 0x0001e000,
+ IEEE80211_RADIOTAP_EHT_USIG2_TB_B11_B15_DISREGARD = 0x003e0000,
+ IEEE80211_RADIOTAP_EHT_USIG2_TB_B16_B19_CRC = 0x03c00000,
+ IEEE80211_RADIOTAP_EHT_USIG2_TB_B20_B25_TAIL = 0xfc000000,
+};
+
/**
* ieee80211_get_radiotap_len - get radiotap header length
*/
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 51857117ac09..caa20a905531 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -305,10 +305,7 @@ static inline struct sock *skb_to_full_sk(const struct sk_buff *skb)
return sk_to_full_sk(skb->sk);
}
-static inline struct inet_sock *inet_sk(const struct sock *sk)
-{
- return (struct inet_sock *)sk;
-}
+#define inet_sk(ptr) container_of_const(ptr, struct inet_sock, sk)
static inline void __inet_sk_copy_descendant(struct sock *sk_to,
const struct sock *sk_from,
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 219fd15893b0..f12edca660ba 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -534,6 +534,7 @@ struct ieee80211_fils_discovery {
* This structure keeps information about a BSS (and an association
* to that BSS) that can change during the lifetime of the BSS.
*
+ * @vif: reference to owning VIF
* @addr: (link) address used locally
* @link_id: link ID, or 0 for non-MLO
* @htc_trig_based_pkt_ext: default PE in 4us units, if BSS supports HE
@@ -656,6 +657,9 @@ struct ieee80211_fils_discovery {
* write-protected by sdata_lock and local->mtx so holding either is fine
* for read access.
* @color_change_color: the bss color that will be used after the change.
+ * @ht_ldpc: in AP mode, indicates interface has HT LDPC capability.
+ * @vht_ldpc: in AP mode, indicates interface has VHT LDPC capability.
+ * @he_ldpc: in AP mode, indicates interface has HE LDPC capability.
* @vht_su_beamformer: in AP mode, does this BSS support operation as an VHT SU
* beamformer
* @vht_su_beamformee: in AP mode, does this BSS support operation as an VHT SU
@@ -673,8 +677,16 @@ struct ieee80211_fils_discovery {
* @he_full_ul_mumimo: does this BSS support the reception (AP) or transmission
* (non-AP STA) of an HE TB PPDU on an RU that spans the entire PPDU
* bandwidth
+ * @eht_su_beamformer: in AP-mode, does this BSS enable operation as an EHT SU
+ * beamformer
+ * @eht_su_beamformee: in AP-mode, does this BSS enable operation as an EHT SU
+ * beamformee
+ * @eht_mu_beamformer: in AP-mode, does this BSS enable operation as an EHT MU
+ * beamformer
*/
struct ieee80211_bss_conf {
+ struct ieee80211_vif *vif;
+
const u8 *bssid;
unsigned int link_id;
u8 addr[ETH_ALEN] __aligned(2);
@@ -750,6 +762,9 @@ struct ieee80211_bss_conf {
bool color_change_active;
u8 color_change_color;
+ bool ht_ldpc;
+ bool vht_ldpc;
+ bool he_ldpc;
bool vht_su_beamformer;
bool vht_su_beamformee;
bool vht_mu_beamformer;
@@ -758,6 +773,9 @@ struct ieee80211_bss_conf {
bool he_su_beamformee;
bool he_mu_beamformer;
bool he_full_ul_mumimo;
+ bool eht_su_beamformer;
+ bool eht_su_beamformee;
+ bool eht_mu_beamformer;
};
/**
@@ -1372,9 +1390,12 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info)
* subframes share the same sequence number. Reported subframes can be
* either regular MSDU or singly A-MSDUs. Subframes must not be
* interleaved with other frames.
- * @RX_FLAG_RADIOTAP_VENDOR_DATA: This frame contains vendor-specific
- * radiotap data in the skb->data (before the frame) as described by
- * the &struct ieee80211_vendor_radiotap.
+ * @RX_FLAG_RADIOTAP_TLV_AT_END: This frame contains radiotap TLVs in the
+ * skb->data (before the 802.11 header).
+ * If used, the SKB's mac_header pointer must be set to point
+ * to the 802.11 header after the TLVs, and any padding added after TLV
+ * data to align to 4 must be cleared by the driver putting the TLVs
+ * in the skb.
* @RX_FLAG_ALLOW_SAME_PN: Allow the same PN as same packet before.
* This is used for AMSDU subframes which can have the same PN as
* the first subframe.
@@ -1426,7 +1447,7 @@ enum mac80211_rx_flags {
RX_FLAG_ONLY_MONITOR = BIT(17),
RX_FLAG_SKIP_MONITOR = BIT(18),
RX_FLAG_AMSDU_MORE = BIT(19),
- RX_FLAG_RADIOTAP_VENDOR_DATA = BIT(20),
+ RX_FLAG_RADIOTAP_TLV_AT_END = BIT(20),
RX_FLAG_MIC_STRIPPED = BIT(21),
RX_FLAG_ALLOW_SAME_PN = BIT(22),
RX_FLAG_ICV_STRIPPED = BIT(23),
@@ -1567,39 +1588,6 @@ ieee80211_rx_status_to_khz(struct ieee80211_rx_status *rx_status)
}
/**
- * struct ieee80211_vendor_radiotap - vendor radiotap data information
- * @present: presence bitmap for this vendor namespace
- * (this could be extended in the future if any vendor needs more
- * bits, the radiotap spec does allow for that)
- * @align: radiotap vendor namespace alignment. This defines the needed
- * alignment for the @data field below, not for the vendor namespace
- * description itself (which has a fixed 2-byte alignment)
- * Must be a power of two, and be set to at least 1!
- * @oui: radiotap vendor namespace OUI
- * @subns: radiotap vendor sub namespace
- * @len: radiotap vendor sub namespace skip length, if alignment is done
- * then that's added to this, i.e. this is only the length of the
- * @data field.
- * @pad: number of bytes of padding after the @data, this exists so that
- * the skb data alignment can be preserved even if the data has odd
- * length
- * @data: the actual vendor namespace data
- *
- * This struct, including the vendor data, goes into the skb->data before
- * the 802.11 header. It's split up in mac80211 using the align/oui/subns
- * data.
- */
-struct ieee80211_vendor_radiotap {
- u32 present;
- u8 align;
- u8 oui[3];
- u8 subns;
- u8 pad;
- u16 len;
- u8 data[];
-} __packed;
-
-/**
* enum ieee80211_conf_flags - configuration flags
*
* Flags to define PHY configuration options
@@ -3841,6 +3829,12 @@ struct ieee80211_prep_tx_info {
* the station. See @sta_pre_rcu_remove if needed.
* This callback can sleep.
*
+ * @link_add_debugfs: Drivers can use this callback to add debugfs files
+ * when a link is added to a mac80211 vif. This callback should be within
+ * a CONFIG_MAC80211_DEBUGFS conditional. This callback can sleep.
+ * For non-MLO the callback will be called once for the default bss_conf
+ * with the vif's directory rather than a separate subdirectory.
+ *
* @sta_add_debugfs: Drivers can use this callback to add debugfs files
* when a station is added to mac80211's station list. This callback
* should be within a CONFIG_MAC80211_DEBUGFS conditional. This
@@ -4230,6 +4224,9 @@ struct ieee80211_prep_tx_info {
* Note that a sta can also be inserted or removed with valid links,
* i.e. passed to @sta_add/@sta_state with sta->valid_links not zero.
* In fact, cannot change from having valid_links and not having them.
+ * @set_hw_timestamp: Enable/disable HW timestamping of TM/FTM frames. This is
+ * not restored at HW reset by mac80211 so drivers need to take care of
+ * that.
*/
struct ieee80211_ops {
void (*tx)(struct ieee80211_hw *hw,
@@ -4319,6 +4316,10 @@ struct ieee80211_ops {
int (*sta_remove)(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
struct ieee80211_sta *sta);
#ifdef CONFIG_MAC80211_DEBUGFS
+ void (*link_add_debugfs)(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ struct ieee80211_bss_conf *link_conf,
+ struct dentry *dir);
void (*sta_add_debugfs)(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
struct ieee80211_sta *sta,
@@ -4589,6 +4590,9 @@ struct ieee80211_ops {
struct ieee80211_vif *vif,
struct ieee80211_sta *sta,
u16 old_links, u16 new_links);
+ int (*set_hw_timestamp)(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ struct cfg80211_set_hw_timestamp *hwts);
};
/**
@@ -5985,6 +5989,20 @@ void ieee80211_queue_delayed_work(struct ieee80211_hw *hw,
unsigned long delay);
/**
+ * ieee80211_refresh_tx_agg_session_timer - Refresh a tx agg session timer.
+ * @sta: the station for which to start a BA session
+ * @tid: the TID to BA on.
+ *
+ * This function allows low level driver to refresh tx agg session timer
+ * to maintain BA session, the session level will still be managed by the
+ * mac80211.
+ *
+ * Note: must be called in an RCU critical section.
+ */
+void ieee80211_refresh_tx_agg_session_timer(struct ieee80211_sta *sta,
+ u16 tid);
+
+/**
* ieee80211_start_tx_ba_session - Start a tx Block Ack session.
* @sta: the station for which to start a BA session
* @tid: the TID to BA on.
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 3bb579962a14..bb11a6535d80 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -48,6 +48,10 @@ enum TRI_STATE {
#define MAX_PORTS_IN_MANA_DEV 256
+/* Update this count whenever the respective structures are changed */
+#define MANA_STATS_RX_COUNT 5
+#define MANA_STATS_TX_COUNT 11
+
struct mana_stats_rx {
u64 packets;
u64 bytes;
@@ -61,6 +65,14 @@ struct mana_stats_tx {
u64 packets;
u64 bytes;
u64 xdp_xmit;
+ u64 tso_packets;
+ u64 tso_bytes;
+ u64 tso_inner_packets;
+ u64 tso_inner_bytes;
+ u64 short_pkt_fmt;
+ u64 long_pkt_fmt;
+ u64 csum_partial;
+ u64 mana_map_err;
struct u64_stats_sync syncp;
};
@@ -331,6 +343,12 @@ struct mana_tx_qp {
struct mana_ethtool_stats {
u64 stop_queue;
u64 wake_queue;
+ u64 tx_cqes;
+ u64 tx_cqe_err;
+ u64 tx_cqe_unknown_type;
+ u64 rx_cqes;
+ u64 rx_coalesced_err;
+ u64 rx_cqe_unknown_type;
};
struct mana_context {
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 2f2a6023fb0e..c8d39bba2a0d 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -336,8 +336,6 @@ void neigh_table_init(int index, struct neigh_table *tbl);
int neigh_table_clear(int index, struct neigh_table *tbl);
struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
struct net_device *dev);
-struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
- const void *pkey);
struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
struct net_device *dev, bool want_ref);
static inline struct neighbour *neigh_create(struct neigh_table *tbl,
@@ -466,7 +464,7 @@ static __always_inline int neigh_event_send_probe(struct neighbour *neigh,
if (READ_ONCE(neigh->used) != now)
WRITE_ONCE(neigh->used, now);
- if (!(neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE)))
+ if (!(READ_ONCE(neigh->nud_state) & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE)))
return __neigh_event_send(neigh, skb, immediate_ok);
return 0;
}
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 2016839991a4..bb0bd69fb655 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -64,7 +64,6 @@ static inline psched_time_t psched_get_time(void)
}
struct qdisc_watchdog {
- u64 last_expires;
struct hrtimer timer;
struct Qdisc *qdisc;
};
diff --git a/include/net/raw.h b/include/net/raw.h
index 2c004c20ed99..7ad15830cf38 100644
--- a/include/net/raw.h
+++ b/include/net/raw.h
@@ -22,7 +22,7 @@
extern struct proto raw_prot;
extern struct raw_hashinfo raw_v4_hashinfo;
-bool raw_v4_match(struct net *net, struct sock *sk, unsigned short num,
+bool raw_v4_match(struct net *net, const struct sock *sk, unsigned short num,
__be32 raddr, __be32 laddr, int dif, int sdif);
int raw_abort(struct sock *sk, int err);
diff --git a/include/net/rawv6.h b/include/net/rawv6.h
index bc70909625f6..82810cbe3798 100644
--- a/include/net/rawv6.h
+++ b/include/net/rawv6.h
@@ -6,7 +6,7 @@
#include <net/raw.h>
extern struct raw_hashinfo raw_v6_hashinfo;
-bool raw_v6_match(struct net *net, struct sock *sk, unsigned short num,
+bool raw_v6_match(struct net *net, const struct sock *sk, unsigned short num,
const struct in6_addr *loc_addr,
const struct in6_addr *rmt_addr, int dif, int sdif);
diff --git a/include/net/scm.h b/include/net/scm.h
index 1ce365f4c256..585adc1346bd 100644
--- a/include/net/scm.h
+++ b/include/net/scm.h
@@ -105,16 +105,27 @@ static inline void scm_passec(struct socket *sock, struct msghdr *msg, struct sc
}
}
}
+
+static inline bool scm_has_secdata(struct socket *sock)
+{
+ return test_bit(SOCK_PASSSEC, &sock->flags);
+}
#else
static inline void scm_passec(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm)
{ }
+
+static inline bool scm_has_secdata(struct socket *sock)
+{
+ return false;
+}
#endif /* CONFIG_SECURITY_NETWORK */
static __inline__ void scm_recv(struct socket *sock, struct msghdr *msg,
struct scm_cookie *scm, int flags)
{
if (!msg->msg_control) {
- if (test_bit(SOCK_PASSCRED, &sock->flags) || scm->fp)
+ if (test_bit(SOCK_PASSCRED, &sock->flags) || scm->fp ||
+ scm_has_secdata(sock))
msg->msg_flags |= MSG_CTRUNC;
scm_destroy(scm);
return;
diff --git a/include/net/sctp/stream_sched.h b/include/net/sctp/stream_sched.h
index fa00dc20a0d7..572d73fdcd5e 100644
--- a/include/net/sctp/stream_sched.h
+++ b/include/net/sctp/stream_sched.h
@@ -58,5 +58,7 @@ void sctp_sched_ops_register(enum sctp_sched_type sched,
struct sctp_sched_ops *sched_ops);
void sctp_sched_ops_prio_init(void);
void sctp_sched_ops_rr_init(void);
+void sctp_sched_ops_fc_init(void);
+void sctp_sched_ops_wfq_init(void);
#endif /* __sctp_stream_sched_h__ */
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index e1f6e7fc2b11..a0933efd93c3 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1429,6 +1429,11 @@ struct sctp_stream_out_ext {
struct {
struct list_head rr_list;
};
+ struct {
+ struct list_head fc_list;
+ __u32 fc_length;
+ __u16 fc_weight;
+ };
};
};
@@ -1475,6 +1480,9 @@ struct sctp_stream {
/* The next stream in line */
struct sctp_stream_out_ext *rr_next;
};
+ struct {
+ struct list_head fc_list;
+ };
};
struct sctp_stream_interleave *si;
};
diff --git a/include/net/smc.h b/include/net/smc.h
index 597cb9381182..a002552be29c 100644
--- a/include/net/smc.h
+++ b/include/net/smc.h
@@ -67,6 +67,7 @@ struct smcd_ops {
int (*move_data)(struct smcd_dev *dev, u64 dmb_tok, unsigned int idx,
bool sf, unsigned int offset, void *data,
unsigned int size);
+ int (*supports_v2)(void);
u8* (*get_system_eid)(void);
u64 (*get_local_gid)(struct smcd_dev *dev);
u16 (*get_chid)(struct smcd_dev *dev);
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index bca5b01af247..b7b2e9abfb37 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -3,6 +3,7 @@
#define __NET_VXLAN_H 1
#include <linux/if_vlan.h>
+#include <linux/rhashtable-types.h>
#include <net/udp_tunnel.h>
#include <net/dst_metadata.h>
#include <net/rtnetlink.h>
@@ -302,6 +303,10 @@ struct vxlan_dev {
struct vxlan_vni_group __rcu *vnigrp;
struct hlist_head fdb_head[FDB_HASH_SIZE];
+
+ struct rhashtable mdb_tbl;
+ struct hlist_head mdb_list;
+ unsigned int mdb_seq;
};
#define VXLAN_F_LEARN 0x01
@@ -322,6 +327,7 @@ struct vxlan_dev {
#define VXLAN_F_IPV6_LINKLOCAL 0x8000
#define VXLAN_F_TTL_INHERIT 0x10000
#define VXLAN_F_VNIFILTER 0x20000
+#define VXLAN_F_MDB 0x40000
/* Flags that are used in the receive path. These flags must match in
* order for a socket to be shareable
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 3057e1a4a11c..e96a1151ec75 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -38,6 +38,7 @@ struct xdp_umem {
struct xsk_map {
struct bpf_map map;
spinlock_t lock; /* Synchronize map updates */
+ atomic_t count;
struct xdp_sock __rcu *xsk_map[];
};
diff --git a/include/trace/events/sock.h b/include/trace/events/sock.h
index 03d19fc562f8..fd206a6ab5b8 100644
--- a/include/trace/events/sock.h
+++ b/include/trace/events/sock.h
@@ -158,7 +158,7 @@ TRACE_EVENT(inet_sock_set_state,
),
TP_fast_assign(
- struct inet_sock *inet = inet_sk(sk);
+ const struct inet_sock *inet = inet_sk(sk);
struct in6_addr *pin6;
__be32 *p32;
@@ -222,7 +222,7 @@ TRACE_EVENT(inet_sk_error_report,
),
TP_fast_assign(
- struct inet_sock *inet = inet_sk(sk);
+ const struct inet_sock *inet = inet_sk(sk);
struct in6_addr *pin6;
__be32 *p32;
diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index 901b440238d5..bf06db8d2046 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -67,7 +67,7 @@ DECLARE_EVENT_CLASS(tcp_event_sk_skb,
),
TP_fast_assign(
- struct inet_sock *inet = inet_sk(sk);
+ const struct inet_sock *inet = inet_sk(sk);
__be32 *p32;
__entry->skbaddr = skb;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 62ce1f5d1b1d..976b194eb775 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4969,6 +4969,12 @@ union bpf_attr {
* different maps if key/value layout matches across maps.
* Every bpf_timer_set_callback() can have different callback_fn.
*
+ * *flags* can be one of:
+ *
+ * **BPF_F_TIMER_ABS**
+ * Start the timer in absolute expire value instead of the
+ * default relative one.
+ *
* Return
* 0 on success.
* **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier
@@ -5325,11 +5331,22 @@ union bpf_attr {
* Description
* Write *len* bytes from *src* into *dst*, starting from *offset*
* into *dst*.
- * *flags* is currently unused.
+ *
+ * *flags* must be 0 except for skb-type dynptrs.
+ *
+ * For skb-type dynptrs:
+ * * All data slices of the dynptr are automatically
+ * invalidated after **bpf_dynptr_write**\ (). This is
+ * because writing may pull the skb and change the
+ * underlying packet buffer.
+ *
+ * * For *flags*, please see the flags accepted by
+ * **bpf_skb_store_bytes**\ ().
* Return
* 0 on success, -E2BIG if *offset* + *len* exceeds the length
* of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst*
- * is a read-only dynptr or if *flags* is not 0.
+ * is a read-only dynptr or if *flags* is not correct. For skb-type dynptrs,
+ * other errors correspond to errors returned by **bpf_skb_store_bytes**\ ().
*
* void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u32 offset, u32 len)
* Description
@@ -5337,6 +5354,9 @@ union bpf_attr {
*
* *len* must be a statically known value. The returned data slice
* is invalidated whenever the dynptr is invalidated.
+ *
+ * skb and xdp type dynptrs may not use bpf_dynptr_data. They should
+ * instead use bpf_dynptr_slice and bpf_dynptr_slice_rdwr.
* Return
* Pointer to the underlying dynptr data, NULL if the dynptr is
* read-only, if the dynptr is invalid, or if the offset and length
@@ -7083,4 +7103,13 @@ struct bpf_core_relo {
enum bpf_core_relo_kind kind;
};
+/*
+ * Flags to control bpf_timer_start() behaviour.
+ * - BPF_F_TIMER_ABS: Timeout passed is absolute time, by default it is
+ * relative to current time.
+ */
+enum {
+ BPF_F_TIMER_ABS = (1ULL << 0),
+};
+
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index d60c456710b3..c9d624f528c5 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -633,6 +633,11 @@ enum {
MDBA_MDB_EATTR_GROUP_MODE,
MDBA_MDB_EATTR_SOURCE,
MDBA_MDB_EATTR_RTPROT,
+ MDBA_MDB_EATTR_DST,
+ MDBA_MDB_EATTR_DST_PORT,
+ MDBA_MDB_EATTR_VNI,
+ MDBA_MDB_EATTR_IFINDEX,
+ MDBA_MDB_EATTR_SRC_VNI,
__MDBA_MDB_EATTR_MAX
};
#define MDBA_MDB_EATTR_MAX (__MDBA_MDB_EATTR_MAX - 1)
@@ -728,6 +733,11 @@ enum {
MDBE_ATTR_SRC_LIST,
MDBE_ATTR_GROUP_MODE,
MDBE_ATTR_RTPROT,
+ MDBE_ATTR_DST,
+ MDBE_ATTR_DST_PORT,
+ MDBE_ATTR_VNI,
+ MDBE_ATTR_IFINDEX,
+ MDBE_ATTR_SRC_VNI,
__MDBE_ATTR_MAX,
};
#define MDBE_ATTR_MAX (__MDBE_ATTR_MAX - 1)
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index ff677f3a6cad..9c6f02c26054 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -931,6 +931,7 @@ enum nft_exthdr_attributes {
* @NFT_META_TIME_HOUR: hour of day (in seconds)
* @NFT_META_SDIF: slave device interface index
* @NFT_META_SDIFNAME: slave device interface name
+ * @NFT_META_BRI_BROUTE: packet br_netfilter_broute bit
*/
enum nft_meta_keys {
NFT_META_LEN,
@@ -969,6 +970,7 @@ enum nft_meta_keys {
NFT_META_TIME_HOUR,
NFT_META_SDIF,
NFT_META_SDIFNAME,
+ NFT_META_BRI_BROUTE,
__NFT_META_IIFTYPE,
};
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index f14621a954e1..9a0ac0363f1f 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1299,6 +1299,16 @@
* @NL80211_CMD_MODIFY_LINK_STA: Modify a link of an MLD station
* @NL80211_CMD_REMOVE_LINK_STA: Remove a link of an MLD station
*
+ * @NL80211_CMD_SET_HW_TIMESTAMP: Enable/disable HW timestamping of Timing
+ * measurement and Fine timing measurement frames. If %NL80211_ATTR_MAC
+ * is included, enable/disable HW timestamping only for frames to/from the
+ * specified MAC address. Otherwise enable/disable HW timestamping for
+ * all TM/FTM frames (including ones that were enabled with specific MAC
+ * address). If %NL80211_ATTR_HW_TIMESTAMP_ENABLED is not included, disable
+ * HW timestamping.
+ * The number of peers that HW timestamping can be enabled for concurrently
+ * is indicated by %NL80211_ATTR_MAX_HW_TIMESTAMP_PEERS.
+ *
* @NL80211_CMD_MAX: highest used command number
* @__NL80211_CMD_AFTER_LAST: internal use
*/
@@ -1550,6 +1560,8 @@ enum nl80211_commands {
NL80211_CMD_MODIFY_LINK_STA,
NL80211_CMD_REMOVE_LINK_STA,
+ NL80211_CMD_SET_HW_TIMESTAMP,
+
/* add new commands above here */
/* used to define NL80211_CMD_MAX below */
@@ -2775,6 +2787,13 @@ enum nl80211_commands {
* indicates that the sub-channel is punctured. Higher 16 bits are
* reserved.
*
+ * @NL80211_ATTR_MAX_HW_TIMESTAMP_PEERS: Maximum number of peers that HW
+ * timestamping can be enabled for concurrently (u16), a wiphy attribute.
+ * A value of 0xffff indicates setting for all peers (i.e. not specifying
+ * an address with %NL80211_CMD_SET_HW_TIMESTAMP) is supported.
+ * @NL80211_ATTR_HW_TIMESTAMP_ENABLED: Indicates whether HW timestamping should
+ * be enabled or not (flag attribute).
+ *
* @NUM_NL80211_ATTR: total number of nl80211_attrs available
* @NL80211_ATTR_MAX: highest attribute number currently defined
* @__NL80211_ATTR_AFTER_LAST: internal use
@@ -3306,6 +3325,9 @@ enum nl80211_attrs {
NL80211_ATTR_PUNCT_BITMAP,
+ NL80211_ATTR_MAX_HW_TIMESTAMP_PEERS,
+ NL80211_ATTR_HW_TIMESTAMP_ENABLED,
+
/* add attributes here, update the policy in nl80211.c */
__NL80211_ATTR_AFTER_LAST,
@@ -6326,6 +6348,10 @@ enum nl80211_feature_flags {
* @NL80211_EXT_FEATURE_SECURE_NAN: Device supports NAN Pairing which enables
* authentication, data encryption and message integrity.
*
+ * @NL80211_EXT_FEATURE_AUTH_AND_DEAUTH_RANDOM_TA: Device supports randomized TA
+ * in authentication and deauthentication frames sent to unassociated peer
+ * using @NL80211_CMD_FRAME.
+ *
* @NUM_NL80211_EXT_FEATURES: number of extended features.
* @MAX_NL80211_EXT_FEATURES: highest extended feature index.
*/
@@ -6396,6 +6422,7 @@ enum nl80211_ext_feature_index {
NL80211_EXT_FEATURE_POWERED_ADDR_CHANGE,
NL80211_EXT_FEATURE_PUNCT,
NL80211_EXT_FEATURE_SECURE_NAN,
+ NL80211_EXT_FEATURE_AUTH_AND_DEAUTH_RANDOM_TA,
/* add new features before the definition below */
NUM_NL80211_EXT_FEATURES,
@@ -6510,8 +6537,14 @@ enum nl80211_timeout_reason {
* @NL80211_SCAN_FLAG_FREQ_KHZ: report scan results with
* %NL80211_ATTR_SCAN_FREQ_KHZ. This also means
* %NL80211_ATTR_SCAN_FREQUENCIES will not be included.
- * @NL80211_SCAN_FLAG_COLOCATED_6GHZ: scan for colocated APs reported by
- * 2.4/5 GHz APs
+ * @NL80211_SCAN_FLAG_COLOCATED_6GHZ: scan for collocated APs reported by
+ * 2.4/5 GHz APs. When the flag is set, the scan logic will use the
+ * information from the RNR element found in beacons/probe responses
+ * received on the 2.4/5 GHz channels to actively scan only the 6GHz
+ * channels on which APs are expected to be found. Note that when not set,
+ * the scan logic would scan all 6GHz channels, but since transmission of
+ * probe requests on non PSC channels is limited, it is highly likely that
+ * these channels would passively be scanned.
*/
enum nl80211_scan_flags {
NL80211_SCAN_FLAG_LOW_PRIORITY = 1<<0,
diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index ed7d4ecbf53d..b7d91d4cf0db 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -1211,7 +1211,9 @@ enum sctp_sched_type {
SCTP_SS_DEFAULT = SCTP_SS_FCFS,
SCTP_SS_PRIO,
SCTP_SS_RR,
- SCTP_SS_MAX = SCTP_SS_RR
+ SCTP_SS_FC,
+ SCTP_SS_WFQ,
+ SCTP_SS_MAX = SCTP_SS_WFQ
};
/* Probe Interval socket option */
diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h
index b4062bed186a..12c1c9699935 100644
--- a/include/uapi/linux/virtio_net.h
+++ b/include/uapi/linux/virtio_net.h
@@ -61,6 +61,7 @@
#define VIRTIO_NET_F_GUEST_USO6 55 /* Guest can handle USOv6 in. */
#define VIRTIO_NET_F_HOST_USO 56 /* Host can handle USO in. */
#define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */
+#define VIRTIO_NET_F_GUEST_HDRLEN 59 /* Guest provides the exact hdr_len value. */
#define VIRTIO_NET_F_RSS 60 /* Supports RSS RX steering */
#define VIRTIO_NET_F_RSC_EXT 61 /* extended coalescing info */
#define VIRTIO_NET_F_STANDBY 62 /* Act as standby for another device
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index e2bac9f89902..ad10bae33b23 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -867,8 +867,7 @@ int __io_scm_file_account(struct io_ring_ctx *ctx, struct file *file)
UNIXCB(skb).fp = fpl;
skb->sk = sk;
- skb->scm_io_uring = 1;
- skb->destructor = unix_destruct_scm;
+ skb->destructor = io_uring_destruct_scm;
refcount_add(skb->truesize, &sk->sk_wmem_alloc);
}
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 484706959556..1588c793a715 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -721,6 +721,28 @@ static int bpf_for_each_array_elem(struct bpf_map *map, bpf_callback_t callback_
return num_elems;
}
+static u64 array_map_mem_usage(const struct bpf_map *map)
+{
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
+ bool percpu = map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
+ u32 elem_size = array->elem_size;
+ u64 entries = map->max_entries;
+ u64 usage = sizeof(*array);
+
+ if (percpu) {
+ usage += entries * sizeof(void *);
+ usage += entries * elem_size * num_possible_cpus();
+ } else {
+ if (map->map_flags & BPF_F_MMAPABLE) {
+ usage = PAGE_ALIGN(usage);
+ usage += PAGE_ALIGN(entries * elem_size);
+ } else {
+ usage += entries * elem_size;
+ }
+ }
+ return usage;
+}
+
BTF_ID_LIST_SINGLE(array_map_btf_ids, struct, bpf_array)
const struct bpf_map_ops array_map_ops = {
.map_meta_equal = array_map_meta_equal,
@@ -742,6 +764,7 @@ const struct bpf_map_ops array_map_ops = {
.map_update_batch = generic_map_update_batch,
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_array_elem,
+ .map_mem_usage = array_map_mem_usage,
.map_btf_id = &array_map_btf_ids[0],
.iter_seq_info = &iter_seq_info,
};
@@ -762,6 +785,7 @@ const struct bpf_map_ops percpu_array_map_ops = {
.map_update_batch = generic_map_update_batch,
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_array_elem,
+ .map_mem_usage = array_map_mem_usage,
.map_btf_id = &array_map_btf_ids[0],
.iter_seq_info = &iter_seq_info,
};
@@ -1156,6 +1180,7 @@ const struct bpf_map_ops prog_array_map_ops = {
.map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem,
.map_release_uref = prog_array_map_clear,
.map_seq_show_elem = prog_array_map_seq_show_elem,
+ .map_mem_usage = array_map_mem_usage,
.map_btf_id = &array_map_btf_ids[0],
};
@@ -1257,6 +1282,7 @@ const struct bpf_map_ops perf_event_array_map_ops = {
.map_fd_put_ptr = perf_event_fd_array_put_ptr,
.map_release = perf_event_fd_array_release,
.map_check_btf = map_check_no_btf,
+ .map_mem_usage = array_map_mem_usage,
.map_btf_id = &array_map_btf_ids[0],
};
@@ -1291,6 +1317,7 @@ const struct bpf_map_ops cgroup_array_map_ops = {
.map_fd_get_ptr = cgroup_fd_array_get_ptr,
.map_fd_put_ptr = cgroup_fd_array_put_ptr,
.map_check_btf = map_check_no_btf,
+ .map_mem_usage = array_map_mem_usage,
.map_btf_id = &array_map_btf_ids[0],
};
#endif
@@ -1379,5 +1406,6 @@ const struct bpf_map_ops array_of_maps_map_ops = {
.map_lookup_batch = generic_map_lookup_batch,
.map_update_batch = generic_map_update_batch,
.map_check_btf = map_check_no_btf,
+ .map_mem_usage = array_map_mem_usage,
.map_btf_id = &array_map_btf_ids[0],
};
diff --git a/kernel/bpf/bloom_filter.c b/kernel/bpf/bloom_filter.c
index 48ee750849f2..6350c5d35a9b 100644
--- a/kernel/bpf/bloom_filter.c
+++ b/kernel/bpf/bloom_filter.c
@@ -193,6 +193,17 @@ static int bloom_map_check_btf(const struct bpf_map *map,
return btf_type_is_void(key_type) ? 0 : -EINVAL;
}
+static u64 bloom_map_mem_usage(const struct bpf_map *map)
+{
+ struct bpf_bloom_filter *bloom;
+ u64 bitset_bytes;
+
+ bloom = container_of(map, struct bpf_bloom_filter, map);
+ bitset_bytes = BITS_TO_BYTES((u64)bloom->bitset_mask + 1);
+ bitset_bytes = roundup(bitset_bytes, sizeof(unsigned long));
+ return sizeof(*bloom) + bitset_bytes;
+}
+
BTF_ID_LIST_SINGLE(bpf_bloom_map_btf_ids, struct, bpf_bloom_filter)
const struct bpf_map_ops bloom_filter_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
@@ -206,5 +217,6 @@ const struct bpf_map_ops bloom_filter_map_ops = {
.map_update_elem = bloom_map_update_elem,
.map_delete_elem = bloom_map_delete_elem,
.map_check_btf = bloom_map_check_btf,
+ .map_mem_usage = bloom_map_mem_usage,
.map_btf_id = &bpf_bloom_map_btf_ids[0],
};
diff --git a/kernel/bpf/bpf_cgrp_storage.c b/kernel/bpf/bpf_cgrp_storage.c
index 6cdf6d9ed91d..9ae07aedaf23 100644
--- a/kernel/bpf/bpf_cgrp_storage.c
+++ b/kernel/bpf/bpf_cgrp_storage.c
@@ -221,6 +221,7 @@ const struct bpf_map_ops cgrp_storage_map_ops = {
.map_update_elem = bpf_cgrp_storage_update_elem,
.map_delete_elem = bpf_cgrp_storage_delete_elem,
.map_check_btf = bpf_local_storage_map_check_btf,
+ .map_mem_usage = bpf_local_storage_map_mem_usage,
.map_btf_id = &bpf_local_storage_map_btf_id[0],
.map_owner_storage_ptr = cgroup_storage_ptr,
};
diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c
index 05f4c66c9089..43e2619c8167 100644
--- a/kernel/bpf/bpf_inode_storage.c
+++ b/kernel/bpf/bpf_inode_storage.c
@@ -223,6 +223,7 @@ const struct bpf_map_ops inode_storage_map_ops = {
.map_update_elem = bpf_fd_inode_storage_update_elem,
.map_delete_elem = bpf_fd_inode_storage_delete_elem,
.map_check_btf = bpf_local_storage_map_check_btf,
+ .map_mem_usage = bpf_local_storage_map_mem_usage,
.map_btf_id = &bpf_local_storage_map_btf_id[0],
.map_owner_storage_ptr = inode_storage_ptr,
};
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index 35f4138a54dc..d3ba3f2db640 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -51,11 +51,21 @@ owner_storage(struct bpf_local_storage_map *smap, void *owner)
return map->ops->map_owner_storage_ptr(owner);
}
+static bool selem_linked_to_storage_lockless(const struct bpf_local_storage_elem *selem)
+{
+ return !hlist_unhashed_lockless(&selem->snode);
+}
+
static bool selem_linked_to_storage(const struct bpf_local_storage_elem *selem)
{
return !hlist_unhashed(&selem->snode);
}
+static bool selem_linked_to_map_lockless(const struct bpf_local_storage_elem *selem)
+{
+ return !hlist_unhashed_lockless(&selem->map_node);
+}
+
static bool selem_linked_to_map(const struct bpf_local_storage_elem *selem)
{
return !hlist_unhashed(&selem->map_node);
@@ -75,6 +85,7 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
if (selem) {
if (value)
copy_map_value(&smap->map, SDATA(selem)->data, value);
+ /* No need to call check_and_init_map_value as memory is zero init */
return selem;
}
@@ -98,7 +109,28 @@ void bpf_local_storage_free_rcu(struct rcu_head *rcu)
kfree_rcu(local_storage, rcu);
}
-static void bpf_selem_free_rcu(struct rcu_head *rcu)
+static void bpf_selem_free_fields_rcu(struct rcu_head *rcu)
+{
+ struct bpf_local_storage_elem *selem;
+ struct bpf_local_storage_map *smap;
+
+ selem = container_of(rcu, struct bpf_local_storage_elem, rcu);
+ /* protected by the rcu_barrier*() */
+ smap = rcu_dereference_protected(SDATA(selem)->smap, true);
+ bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
+ kfree(selem);
+}
+
+static void bpf_selem_free_fields_trace_rcu(struct rcu_head *rcu)
+{
+ /* Free directly if Tasks Trace RCU GP also implies RCU GP */
+ if (rcu_trace_implies_rcu_gp())
+ bpf_selem_free_fields_rcu(rcu);
+ else
+ call_rcu(rcu, bpf_selem_free_fields_rcu);
+}
+
+static void bpf_selem_free_trace_rcu(struct rcu_head *rcu)
{
struct bpf_local_storage_elem *selem;
@@ -119,6 +151,7 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor
{
struct bpf_local_storage_map *smap;
bool free_local_storage;
+ struct btf_record *rec;
void *owner;
smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
@@ -159,10 +192,26 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor
SDATA(selem))
RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL);
- if (use_trace_rcu)
- call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_rcu);
- else
- kfree_rcu(selem, rcu);
+ /* A different RCU callback is chosen whenever we need to free
+ * additional fields in selem data before freeing selem.
+ * bpf_local_storage_map_free only executes rcu_barrier to wait for RCU
+ * callbacks when it has special fields, hence we can only conditionally
+ * dereference smap, as by this time the map might have already been
+ * freed without waiting for our call_rcu callback if it did not have
+ * any special fields.
+ */
+ rec = smap->map.record;
+ if (use_trace_rcu) {
+ if (!IS_ERR_OR_NULL(rec))
+ call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_fields_trace_rcu);
+ else
+ call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_trace_rcu);
+ } else {
+ if (!IS_ERR_OR_NULL(rec))
+ call_rcu(&selem->rcu, bpf_selem_free_fields_rcu);
+ else
+ kfree_rcu(selem, rcu);
+ }
return free_local_storage;
}
@@ -174,7 +223,7 @@ static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem,
bool free_local_storage = false;
unsigned long flags;
- if (unlikely(!selem_linked_to_storage(selem)))
+ if (unlikely(!selem_linked_to_storage_lockless(selem)))
/* selem has already been unlinked from sk */
return;
@@ -208,7 +257,7 @@ void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
struct bpf_local_storage_map_bucket *b;
unsigned long flags;
- if (unlikely(!selem_linked_to_map(selem)))
+ if (unlikely(!selem_linked_to_map_lockless(selem)))
/* selem has already be unlinked from smap */
return;
@@ -420,7 +469,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
err = check_flags(old_sdata, map_flags);
if (err)
return ERR_PTR(err);
- if (old_sdata && selem_linked_to_storage(SELEM(old_sdata))) {
+ if (old_sdata && selem_linked_to_storage_lockless(SELEM(old_sdata))) {
copy_map_value_locked(&smap->map, old_sdata->data,
value, false);
return old_sdata;
@@ -636,6 +685,16 @@ bool bpf_local_storage_unlink_nolock(struct bpf_local_storage *local_storage)
return free_storage;
}
+u64 bpf_local_storage_map_mem_usage(const struct bpf_map *map)
+{
+ struct bpf_local_storage_map *smap = (struct bpf_local_storage_map *)map;
+ u64 usage = sizeof(*smap);
+
+ /* The dynamically callocated selems are not counted currently. */
+ usage += sizeof(*smap->buckets) * (1ULL << smap->bucket_log);
+ return usage;
+}
+
struct bpf_map *
bpf_local_storage_map_alloc(union bpf_attr *attr,
struct bpf_local_storage_cache *cache)
@@ -713,6 +772,26 @@ void bpf_local_storage_map_free(struct bpf_map *map,
*/
synchronize_rcu();
+ /* Only delay freeing of smap, buckets are not needed anymore */
kvfree(smap->buckets);
+
+ /* When local storage has special fields, callbacks for
+ * bpf_selem_free_fields_rcu and bpf_selem_free_fields_trace_rcu will
+ * keep using the map BTF record, we need to execute an RCU barrier to
+ * wait for them as the record will be freed right after our map_free
+ * callback.
+ */
+ if (!IS_ERR_OR_NULL(smap->map.record)) {
+ rcu_barrier_tasks_trace();
+ /* We cannot skip rcu_barrier() when rcu_trace_implies_rcu_gp()
+ * is true, because while call_rcu invocation is skipped in that
+ * case in bpf_selem_free_fields_trace_rcu (and all local
+ * storage maps pass use_trace_rcu = true), there can be
+ * call_rcu callbacks based on use_trace_rcu = false in the
+ * while ((selem = ...)) loop above or when owner's free path
+ * calls bpf_local_storage_unlink_nolock.
+ */
+ rcu_barrier();
+ }
bpf_map_area_free(smap);
}
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index ece9870cab68..38903fb52f98 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -641,6 +641,21 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
return map;
}
+static u64 bpf_struct_ops_map_mem_usage(const struct bpf_map *map)
+{
+ struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
+ const struct bpf_struct_ops *st_ops = st_map->st_ops;
+ const struct btf_type *vt = st_ops->value_type;
+ u64 usage;
+
+ usage = sizeof(*st_map) +
+ vt->size - sizeof(struct bpf_struct_ops_value);
+ usage += vt->size;
+ usage += btf_type_vlen(vt) * sizeof(struct bpf_links *);
+ usage += PAGE_SIZE;
+ return usage;
+}
+
BTF_ID_LIST_SINGLE(bpf_struct_ops_map_btf_ids, struct, bpf_struct_ops_map)
const struct bpf_map_ops bpf_struct_ops_map_ops = {
.map_alloc_check = bpf_struct_ops_map_alloc_check,
@@ -651,6 +666,7 @@ const struct bpf_map_ops bpf_struct_ops_map_ops = {
.map_delete_elem = bpf_struct_ops_map_delete_elem,
.map_update_elem = bpf_struct_ops_map_update_elem,
.map_seq_show_elem = bpf_struct_ops_map_seq_show_elem,
+ .map_mem_usage = bpf_struct_ops_map_mem_usage,
.map_btf_id = &bpf_struct_ops_map_btf_ids[0],
};
diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c
index 1e486055a523..20f942229f3c 100644
--- a/kernel/bpf/bpf_task_storage.c
+++ b/kernel/bpf/bpf_task_storage.c
@@ -335,6 +335,7 @@ const struct bpf_map_ops task_storage_map_ops = {
.map_update_elem = bpf_pid_task_storage_update_elem,
.map_delete_elem = bpf_pid_task_storage_delete_elem,
.map_check_btf = bpf_local_storage_map_check_btf,
+ .map_mem_usage = bpf_local_storage_map_mem_usage,
.map_btf_id = &bpf_local_storage_map_btf_id[0],
.map_owner_storage_ptr = task_storage_ptr,
};
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 73780748404c..1853beaed4be 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -207,6 +207,11 @@ enum btf_kfunc_hook {
BTF_KFUNC_HOOK_TRACING,
BTF_KFUNC_HOOK_SYSCALL,
BTF_KFUNC_HOOK_FMODRET,
+ BTF_KFUNC_HOOK_CGROUP_SKB,
+ BTF_KFUNC_HOOK_SCHED_ACT,
+ BTF_KFUNC_HOOK_SK_SKB,
+ BTF_KFUNC_HOOK_SOCKET_FILTER,
+ BTF_KFUNC_HOOK_LWT,
BTF_KFUNC_HOOK_MAX,
};
@@ -3283,9 +3288,9 @@ static int btf_find_kptr(const struct btf *btf, const struct btf_type *t,
/* Reject extra tags */
if (btf_type_is_type_tag(btf_type_by_id(btf, t->type)))
return -EINVAL;
- if (!strcmp("kptr", __btf_name_by_offset(btf, t->name_off)))
+ if (!strcmp("kptr_untrusted", __btf_name_by_offset(btf, t->name_off)))
type = BPF_KPTR_UNREF;
- else if (!strcmp("kptr_ref", __btf_name_by_offset(btf, t->name_off)))
+ else if (!strcmp("kptr", __btf_name_by_offset(btf, t->name_off)))
type = BPF_KPTR_REF;
else
return -EINVAL;
@@ -5684,6 +5689,10 @@ again:
* int socket_filter_bpf_prog(struct __sk_buff *skb)
* { // no fields of skb are ever used }
*/
+ if (strcmp(ctx_tname, "__sk_buff") == 0 && strcmp(tname, "sk_buff") == 0)
+ return ctx_type;
+ if (strcmp(ctx_tname, "xdp_md") == 0 && strcmp(tname, "xdp_buff") == 0)
+ return ctx_type;
if (strcmp(ctx_tname, tname)) {
/* bpf_user_pt_regs_t is a typedef, so resolve it to
* underlying struct and check name again
@@ -6155,6 +6164,7 @@ static int btf_struct_walk(struct bpf_verifier_log *log, const struct btf *btf,
const char *tname, *mname, *tag_value;
u32 vlen, elem_id, mid;
+ *flag = 0;
again:
tname = __btf_name_by_offset(btf, t->name_off);
if (!btf_type_is_struct(t)) {
@@ -6321,6 +6331,15 @@ error:
* of this field or inside of this struct
*/
if (btf_type_is_struct(mtype)) {
+ if (BTF_INFO_KIND(mtype->info) == BTF_KIND_UNION &&
+ btf_type_vlen(mtype) != 1)
+ /*
+ * walking unions yields untrusted pointers
+ * with exception of __bpf_md_ptr and other
+ * unions with a single member
+ */
+ *flag |= PTR_UNTRUSTED;
+
/* our field must be inside that union or struct */
t = mtype;
@@ -6365,7 +6384,7 @@ error:
stype = btf_type_skip_modifiers(btf, mtype->type, &id);
if (btf_type_is_struct(stype)) {
*next_btf_id = id;
- *flag = tmp_flag;
+ *flag |= tmp_flag;
return WALK_PTR;
}
}
@@ -7705,6 +7724,19 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type)
return BTF_KFUNC_HOOK_TRACING;
case BPF_PROG_TYPE_SYSCALL:
return BTF_KFUNC_HOOK_SYSCALL;
+ case BPF_PROG_TYPE_CGROUP_SKB:
+ return BTF_KFUNC_HOOK_CGROUP_SKB;
+ case BPF_PROG_TYPE_SCHED_ACT:
+ return BTF_KFUNC_HOOK_SCHED_ACT;
+ case BPF_PROG_TYPE_SK_SKB:
+ return BTF_KFUNC_HOOK_SK_SKB;
+ case BPF_PROG_TYPE_SOCKET_FILTER:
+ return BTF_KFUNC_HOOK_SOCKET_FILTER;
+ case BPF_PROG_TYPE_LWT_OUT:
+ case BPF_PROG_TYPE_LWT_IN:
+ case BPF_PROG_TYPE_LWT_XMIT:
+ case BPF_PROG_TYPE_LWT_SEG6LOCAL:
+ return BTF_KFUNC_HOOK_LWT;
default:
return BTF_KFUNC_HOOK_MAX;
}
@@ -8336,7 +8368,7 @@ out:
bool btf_nested_type_is_trusted(struct bpf_verifier_log *log,
const struct bpf_reg_state *reg,
- int off)
+ int off, const char *suffix)
{
struct btf *btf = reg->btf;
const struct btf_type *walk_type, *safe_type;
@@ -8353,7 +8385,7 @@ bool btf_nested_type_is_trusted(struct bpf_verifier_log *log,
tname = btf_name_by_offset(btf, walk_type->name_off);
- ret = snprintf(safe_tname, sizeof(safe_tname), "%s__safe_fields", tname);
+ ret = snprintf(safe_tname, sizeof(safe_tname), "%s%s", tname, suffix);
if (ret < 0)
return false;
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index bf2fdb33fb31..53edb8ad2471 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -2223,10 +2223,12 @@ static u32 sysctl_convert_ctx_access(enum bpf_access_type type,
BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos),
treg, si->dst_reg,
offsetof(struct bpf_sysctl_kern, ppos));
- *insn++ = BPF_STX_MEM(
- BPF_SIZEOF(u32), treg, si->src_reg,
+ *insn++ = BPF_RAW_INSN(
+ BPF_CLASS(si->code) | BPF_MEM | BPF_SIZEOF(u32),
+ treg, si->src_reg,
bpf_ctx_narrow_access_offset(
- 0, sizeof(u32), sizeof(loff_t)));
+ 0, sizeof(u32), sizeof(loff_t)),
+ si->imm);
*insn++ = BPF_LDX_MEM(
BPF_DW, treg, si->dst_reg,
offsetof(struct bpf_sysctl_kern, tmp_reg));
@@ -2376,10 +2378,17 @@ static bool cg_sockopt_is_valid_access(int off, int size,
return true;
}
-#define CG_SOCKOPT_ACCESS_FIELD(T, F) \
- T(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, F), \
- si->dst_reg, si->src_reg, \
- offsetof(struct bpf_sockopt_kern, F))
+#define CG_SOCKOPT_READ_FIELD(F) \
+ BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, F), \
+ si->dst_reg, si->src_reg, \
+ offsetof(struct bpf_sockopt_kern, F))
+
+#define CG_SOCKOPT_WRITE_FIELD(F) \
+ BPF_RAW_INSN((BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, F) | \
+ BPF_MEM | BPF_CLASS(si->code)), \
+ si->dst_reg, si->src_reg, \
+ offsetof(struct bpf_sockopt_kern, F), \
+ si->imm)
static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si,
@@ -2391,25 +2400,25 @@ static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type,
switch (si->off) {
case offsetof(struct bpf_sockopt, sk):
- *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, sk);
+ *insn++ = CG_SOCKOPT_READ_FIELD(sk);
break;
case offsetof(struct bpf_sockopt, level):
if (type == BPF_WRITE)
- *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, level);
+ *insn++ = CG_SOCKOPT_WRITE_FIELD(level);
else
- *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, level);
+ *insn++ = CG_SOCKOPT_READ_FIELD(level);
break;
case offsetof(struct bpf_sockopt, optname):
if (type == BPF_WRITE)
- *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, optname);
+ *insn++ = CG_SOCKOPT_WRITE_FIELD(optname);
else
- *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optname);
+ *insn++ = CG_SOCKOPT_READ_FIELD(optname);
break;
case offsetof(struct bpf_sockopt, optlen):
if (type == BPF_WRITE)
- *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, optlen);
+ *insn++ = CG_SOCKOPT_WRITE_FIELD(optlen);
else
- *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optlen);
+ *insn++ = CG_SOCKOPT_READ_FIELD(optlen);
break;
case offsetof(struct bpf_sockopt, retval):
BUILD_BUG_ON(offsetof(struct bpf_cg_run_ctx, run_ctx) != 0);
@@ -2429,9 +2438,11 @@ static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type,
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx),
treg, treg,
offsetof(struct task_struct, bpf_ctx));
- *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval),
- treg, si->src_reg,
- offsetof(struct bpf_cg_run_ctx, retval));
+ *insn++ = BPF_RAW_INSN(BPF_CLASS(si->code) | BPF_MEM |
+ BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval),
+ treg, si->src_reg,
+ offsetof(struct bpf_cg_run_ctx, retval),
+ si->imm);
*insn++ = BPF_LDX_MEM(BPF_DW, treg, si->dst_reg,
offsetof(struct bpf_sockopt_kern, tmp_reg));
} else {
@@ -2447,10 +2458,10 @@ static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type,
}
break;
case offsetof(struct bpf_sockopt, optval):
- *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval);
+ *insn++ = CG_SOCKOPT_READ_FIELD(optval);
break;
case offsetof(struct bpf_sockopt, optval_end):
- *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval_end);
+ *insn++ = CG_SOCKOPT_READ_FIELD(optval_end);
break;
}
@@ -2529,10 +2540,6 @@ cgroup_current_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_current_pid_tgid_proto;
case BPF_FUNC_get_current_comm:
return &bpf_get_current_comm_proto;
- case BPF_FUNC_get_current_cgroup_id:
- return &bpf_get_current_cgroup_id_proto;
- case BPF_FUNC_get_current_ancestor_cgroup_id:
- return &bpf_get_current_ancestor_cgroup_id_proto;
#ifdef CONFIG_CGROUP_NET_CLASSID
case BPF_FUNC_get_cgroup_classid:
return &bpf_get_cgroup_classid_curr_proto;
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index d2110c1f6fa6..871809e71b4e 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -673,6 +673,15 @@ static int cpu_map_redirect(struct bpf_map *map, u64 index, u64 flags)
__cpu_map_lookup_elem);
}
+static u64 cpu_map_mem_usage(const struct bpf_map *map)
+{
+ u64 usage = sizeof(struct bpf_cpu_map);
+
+ /* Currently the dynamically allocated elements are not counted */
+ usage += (u64)map->max_entries * sizeof(struct bpf_cpu_map_entry *);
+ return usage;
+}
+
BTF_ID_LIST_SINGLE(cpu_map_btf_ids, struct, bpf_cpu_map)
const struct bpf_map_ops cpu_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
@@ -683,6 +692,7 @@ const struct bpf_map_ops cpu_map_ops = {
.map_lookup_elem = cpu_map_lookup_elem,
.map_get_next_key = cpu_map_get_next_key,
.map_check_btf = map_check_no_btf,
+ .map_mem_usage = cpu_map_mem_usage,
.map_btf_id = &cpu_map_btf_ids[0],
.map_redirect = cpu_map_redirect,
};
diff --git a/kernel/bpf/cpumask.c b/kernel/bpf/cpumask.c
index 52b981512a35..b6587ec40f1b 100644
--- a/kernel/bpf/cpumask.c
+++ b/kernel/bpf/cpumask.c
@@ -55,7 +55,7 @@ __bpf_kfunc struct bpf_cpumask *bpf_cpumask_create(void)
/* cpumask must be the first element so struct bpf_cpumask be cast to struct cpumask. */
BUILD_BUG_ON(offsetof(struct bpf_cpumask, cpumask) != 0);
- cpumask = bpf_mem_alloc(&bpf_cpumask_ma, sizeof(*cpumask));
+ cpumask = bpf_mem_cache_alloc(&bpf_cpumask_ma);
if (!cpumask)
return NULL;
@@ -123,7 +123,7 @@ __bpf_kfunc void bpf_cpumask_release(struct bpf_cpumask *cpumask)
if (refcount_dec_and_test(&cpumask->usage)) {
migrate_disable();
- bpf_mem_free(&bpf_cpumask_ma, cpumask);
+ bpf_mem_cache_free(&bpf_cpumask_ma, cpumask);
migrate_enable();
}
}
@@ -427,26 +427,26 @@ BTF_ID_FLAGS(func, bpf_cpumask_create, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_cpumask_release, KF_RELEASE | KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_cpumask_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_cpumask_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL)
-BTF_ID_FLAGS(func, bpf_cpumask_first, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_first_zero, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_set_cpu, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_clear_cpu, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_test_cpu, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_test_and_set_cpu, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_test_and_clear_cpu, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_setall, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_clear, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_and, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_or, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_xor, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_equal, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_intersects, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_subset, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_empty, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_full, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_copy, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_any, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_any_and, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_cpumask_first, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_first_zero, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_set_cpu, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_clear_cpu, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_test_cpu, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_test_and_set_cpu, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_test_and_clear_cpu, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_setall, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_clear, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_and, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_or, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_xor, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_equal, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_intersects, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_subset, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_empty, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_full, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_copy, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_any, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_any_and, KF_RCU)
BTF_SET8_END(cpumask_kfunc_btf_ids)
static const struct btf_kfunc_id_set cpumask_kfunc_set = {
@@ -468,7 +468,7 @@ static int __init cpumask_kfunc_init(void)
},
};
- ret = bpf_mem_alloc_init(&bpf_cpumask_ma, 0, false);
+ ret = bpf_mem_alloc_init(&bpf_cpumask_ma, sizeof(struct bpf_cpumask), false);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &cpumask_kfunc_set);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &cpumask_kfunc_set);
return ret ?: register_btf_id_dtor_kfuncs(cpumask_dtors,
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 2675fefc6cb6..19b036a228f7 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -819,8 +819,10 @@ static int dev_map_delete_elem(struct bpf_map *map, void *key)
return -EINVAL;
old_dev = unrcu_pointer(xchg(&dtab->netdev_map[k], NULL));
- if (old_dev)
+ if (old_dev) {
call_rcu(&old_dev->rcu, __dev_map_entry_free);
+ atomic_dec((atomic_t *)&dtab->items);
+ }
return 0;
}
@@ -931,6 +933,8 @@ static int __dev_map_update_elem(struct net *net, struct bpf_map *map,
old_dev = unrcu_pointer(xchg(&dtab->netdev_map[i], RCU_INITIALIZER(dev)));
if (old_dev)
call_rcu(&old_dev->rcu, __dev_map_entry_free);
+ else
+ atomic_inc((atomic_t *)&dtab->items);
return 0;
}
@@ -1016,6 +1020,20 @@ static int dev_hash_map_redirect(struct bpf_map *map, u64 ifindex, u64 flags)
__dev_map_hash_lookup_elem);
}
+static u64 dev_map_mem_usage(const struct bpf_map *map)
+{
+ struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+ u64 usage = sizeof(struct bpf_dtab);
+
+ if (map->map_type == BPF_MAP_TYPE_DEVMAP_HASH)
+ usage += (u64)dtab->n_buckets * sizeof(struct hlist_head);
+ else
+ usage += (u64)map->max_entries * sizeof(struct bpf_dtab_netdev *);
+ usage += atomic_read((atomic_t *)&dtab->items) *
+ (u64)sizeof(struct bpf_dtab_netdev);
+ return usage;
+}
+
BTF_ID_LIST_SINGLE(dev_map_btf_ids, struct, bpf_dtab)
const struct bpf_map_ops dev_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
@@ -1026,6 +1044,7 @@ const struct bpf_map_ops dev_map_ops = {
.map_update_elem = dev_map_update_elem,
.map_delete_elem = dev_map_delete_elem,
.map_check_btf = map_check_no_btf,
+ .map_mem_usage = dev_map_mem_usage,
.map_btf_id = &dev_map_btf_ids[0],
.map_redirect = dev_map_redirect,
};
@@ -1039,6 +1058,7 @@ const struct bpf_map_ops dev_map_hash_ops = {
.map_update_elem = dev_map_hash_update_elem,
.map_delete_elem = dev_map_hash_delete_elem,
.map_check_btf = map_check_no_btf,
+ .map_mem_usage = dev_map_mem_usage,
.map_btf_id = &dev_map_btf_ids[0],
.map_redirect = dev_hash_map_redirect,
};
@@ -1109,9 +1129,11 @@ static int dev_map_notification(struct notifier_block *notifier,
if (!dev || netdev != dev->dev)
continue;
odev = unrcu_pointer(cmpxchg(&dtab->netdev_map[i], RCU_INITIALIZER(dev), NULL));
- if (dev == odev)
+ if (dev == odev) {
call_rcu(&dev->rcu,
__dev_map_entry_free);
+ atomic_dec((atomic_t *)&dtab->items);
+ }
}
}
rcu_read_unlock();
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 5dfcb5ad0d06..0df4b0c10f59 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -249,7 +249,18 @@ static void htab_free_prealloced_fields(struct bpf_htab *htab)
struct htab_elem *elem;
elem = get_htab_elem(htab, i);
- bpf_obj_free_fields(htab->map.record, elem->key + round_up(htab->map.key_size, 8));
+ if (htab_is_percpu(htab)) {
+ void __percpu *pptr = htab_elem_get_ptr(elem, htab->map.key_size);
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ bpf_obj_free_fields(htab->map.record, per_cpu_ptr(pptr, cpu));
+ cond_resched();
+ }
+ } else {
+ bpf_obj_free_fields(htab->map.record, elem->key + round_up(htab->map.key_size, 8));
+ cond_resched();
+ }
cond_resched();
}
}
@@ -759,9 +770,17 @@ static int htab_lru_map_gen_lookup(struct bpf_map *map,
static void check_and_free_fields(struct bpf_htab *htab,
struct htab_elem *elem)
{
- void *map_value = elem->key + round_up(htab->map.key_size, 8);
+ if (htab_is_percpu(htab)) {
+ void __percpu *pptr = htab_elem_get_ptr(elem, htab->map.key_size);
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ bpf_obj_free_fields(htab->map.record, per_cpu_ptr(pptr, cpu));
+ } else {
+ void *map_value = elem->key + round_up(htab->map.key_size, 8);
- bpf_obj_free_fields(htab->map.record, map_value);
+ bpf_obj_free_fields(htab->map.record, map_value);
+ }
}
/* It is called from the bpf_lru_list when the LRU needs to delete
@@ -858,9 +877,9 @@ find_first_elem:
static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l)
{
+ check_and_free_fields(htab, l);
if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH)
bpf_mem_cache_free(&htab->pcpu_ma, l->ptr_to_pptr);
- check_and_free_fields(htab, l);
bpf_mem_cache_free(&htab->ma, l);
}
@@ -918,14 +937,13 @@ static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
{
if (!onallcpus) {
/* copy true value_size bytes */
- memcpy(this_cpu_ptr(pptr), value, htab->map.value_size);
+ copy_map_value(&htab->map, this_cpu_ptr(pptr), value);
} else {
u32 size = round_up(htab->map.value_size, 8);
int off = 0, cpu;
for_each_possible_cpu(cpu) {
- bpf_long_memcpy(per_cpu_ptr(pptr, cpu),
- value + off, size);
+ copy_map_value_long(&htab->map, per_cpu_ptr(pptr, cpu), value + off);
off += size;
}
}
@@ -940,16 +958,14 @@ static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr,
* (onallcpus=false always when coming from bpf prog).
*/
if (!onallcpus) {
- u32 size = round_up(htab->map.value_size, 8);
int current_cpu = raw_smp_processor_id();
int cpu;
for_each_possible_cpu(cpu) {
if (cpu == current_cpu)
- bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value,
- size);
- else
- memset(per_cpu_ptr(pptr, cpu), 0, size);
+ copy_map_value_long(&htab->map, per_cpu_ptr(pptr, cpu), value);
+ else /* Since elem is preallocated, we cannot touch special fields */
+ zero_map_value(&htab->map, per_cpu_ptr(pptr, cpu));
}
} else {
pcpu_copy_value(htab, pptr, value, onallcpus);
@@ -1575,9 +1591,8 @@ static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
pptr = htab_elem_get_ptr(l, key_size);
for_each_possible_cpu(cpu) {
- bpf_long_memcpy(value + off,
- per_cpu_ptr(pptr, cpu),
- roundup_value_size);
+ copy_map_value_long(&htab->map, value + off, per_cpu_ptr(pptr, cpu));
+ check_and_init_map_value(&htab->map, value + off);
off += roundup_value_size;
}
} else {
@@ -1772,8 +1787,8 @@ again_nocopy:
pptr = htab_elem_get_ptr(l, map->key_size);
for_each_possible_cpu(cpu) {
- bpf_long_memcpy(dst_val + off,
- per_cpu_ptr(pptr, cpu), size);
+ copy_map_value_long(&htab->map, dst_val + off, per_cpu_ptr(pptr, cpu));
+ check_and_init_map_value(&htab->map, dst_val + off);
off += size;
}
} else {
@@ -2046,9 +2061,9 @@ static int __bpf_hash_map_seq_show(struct seq_file *seq, struct htab_elem *elem)
roundup_value_size = round_up(map->value_size, 8);
pptr = htab_elem_get_ptr(elem, map->key_size);
for_each_possible_cpu(cpu) {
- bpf_long_memcpy(info->percpu_value_buf + off,
- per_cpu_ptr(pptr, cpu),
- roundup_value_size);
+ copy_map_value_long(map, info->percpu_value_buf + off,
+ per_cpu_ptr(pptr, cpu));
+ check_and_init_map_value(map, info->percpu_value_buf + off);
off += roundup_value_size;
}
ctx.value = info->percpu_value_buf;
@@ -2175,6 +2190,44 @@ out:
return num_elems;
}
+static u64 htab_map_mem_usage(const struct bpf_map *map)
+{
+ struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+ u32 value_size = round_up(htab->map.value_size, 8);
+ bool prealloc = htab_is_prealloc(htab);
+ bool percpu = htab_is_percpu(htab);
+ bool lru = htab_is_lru(htab);
+ u64 num_entries;
+ u64 usage = sizeof(struct bpf_htab);
+
+ usage += sizeof(struct bucket) * htab->n_buckets;
+ usage += sizeof(int) * num_possible_cpus() * HASHTAB_MAP_LOCK_COUNT;
+ if (prealloc) {
+ num_entries = map->max_entries;
+ if (htab_has_extra_elems(htab))
+ num_entries += num_possible_cpus();
+
+ usage += htab->elem_size * num_entries;
+
+ if (percpu)
+ usage += value_size * num_possible_cpus() * num_entries;
+ else if (!lru)
+ usage += sizeof(struct htab_elem *) * num_possible_cpus();
+ } else {
+#define LLIST_NODE_SZ sizeof(struct llist_node)
+
+ num_entries = htab->use_percpu_counter ?
+ percpu_counter_sum(&htab->pcount) :
+ atomic_read(&htab->count);
+ usage += (htab->elem_size + LLIST_NODE_SZ) * num_entries;
+ if (percpu) {
+ usage += (LLIST_NODE_SZ + sizeof(void *)) * num_entries;
+ usage += value_size * num_possible_cpus() * num_entries;
+ }
+ }
+ return usage;
+}
+
BTF_ID_LIST_SINGLE(htab_map_btf_ids, struct, bpf_htab)
const struct bpf_map_ops htab_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
@@ -2191,6 +2244,7 @@ const struct bpf_map_ops htab_map_ops = {
.map_seq_show_elem = htab_map_seq_show_elem,
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_hash_elem,
+ .map_mem_usage = htab_map_mem_usage,
BATCH_OPS(htab),
.map_btf_id = &htab_map_btf_ids[0],
.iter_seq_info = &iter_seq_info,
@@ -2212,6 +2266,7 @@ const struct bpf_map_ops htab_lru_map_ops = {
.map_seq_show_elem = htab_map_seq_show_elem,
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_hash_elem,
+ .map_mem_usage = htab_map_mem_usage,
BATCH_OPS(htab_lru),
.map_btf_id = &htab_map_btf_ids[0],
.iter_seq_info = &iter_seq_info,
@@ -2292,8 +2347,8 @@ int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value)
*/
pptr = htab_elem_get_ptr(l, map->key_size);
for_each_possible_cpu(cpu) {
- bpf_long_memcpy(value + off,
- per_cpu_ptr(pptr, cpu), size);
+ copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu));
+ check_and_init_map_value(map, value + off);
off += size;
}
ret = 0;
@@ -2363,6 +2418,7 @@ const struct bpf_map_ops htab_percpu_map_ops = {
.map_seq_show_elem = htab_percpu_map_seq_show_elem,
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_hash_elem,
+ .map_mem_usage = htab_map_mem_usage,
BATCH_OPS(htab_percpu),
.map_btf_id = &htab_map_btf_ids[0],
.iter_seq_info = &iter_seq_info,
@@ -2382,6 +2438,7 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = {
.map_seq_show_elem = htab_percpu_map_seq_show_elem,
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_hash_elem,
+ .map_mem_usage = htab_map_mem_usage,
BATCH_OPS(htab_lru_percpu),
.map_btf_id = &htab_map_btf_ids[0],
.iter_seq_info = &iter_seq_info,
@@ -2519,6 +2576,7 @@ const struct bpf_map_ops htab_of_maps_map_ops = {
.map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
.map_gen_lookup = htab_of_map_gen_lookup,
.map_check_btf = map_check_no_btf,
+ .map_mem_usage = htab_map_mem_usage,
BATCH_OPS(htab),
.map_btf_id = &htab_map_btf_ids[0],
};
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 5b278a38ae58..637ac4e92e75 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1264,10 +1264,11 @@ BPF_CALL_3(bpf_timer_start, struct bpf_timer_kern *, timer, u64, nsecs, u64, fla
{
struct bpf_hrtimer *t;
int ret = 0;
+ enum hrtimer_mode mode;
if (in_nmi())
return -EOPNOTSUPP;
- if (flags)
+ if (flags > BPF_F_TIMER_ABS)
return -EINVAL;
__bpf_spin_lock_irqsave(&timer->lock);
t = timer->timer;
@@ -1275,7 +1276,13 @@ BPF_CALL_3(bpf_timer_start, struct bpf_timer_kern *, timer, u64, nsecs, u64, fla
ret = -EINVAL;
goto out;
}
- hrtimer_start(&t->timer, ns_to_ktime(nsecs), HRTIMER_MODE_REL_SOFT);
+
+ if (flags & BPF_F_TIMER_ABS)
+ mode = HRTIMER_MODE_ABS_SOFT;
+ else
+ mode = HRTIMER_MODE_REL_SOFT;
+
+ hrtimer_start(&t->timer, ns_to_ktime(nsecs), mode);
out:
__bpf_spin_unlock_irqrestore(&timer->lock);
return ret;
@@ -1420,11 +1427,21 @@ static bool bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr)
return ptr->size & DYNPTR_RDONLY_BIT;
}
+void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr)
+{
+ ptr->size |= DYNPTR_RDONLY_BIT;
+}
+
static void bpf_dynptr_set_type(struct bpf_dynptr_kern *ptr, enum bpf_dynptr_type type)
{
ptr->size |= type << DYNPTR_TYPE_SHIFT;
}
+static enum bpf_dynptr_type bpf_dynptr_get_type(const struct bpf_dynptr_kern *ptr)
+{
+ return (ptr->size & ~(DYNPTR_RDONLY_BIT)) >> DYNPTR_TYPE_SHIFT;
+}
+
u32 bpf_dynptr_get_size(const struct bpf_dynptr_kern *ptr)
{
return ptr->size & DYNPTR_SIZE_MASK;
@@ -1497,6 +1514,7 @@ static const struct bpf_func_proto bpf_dynptr_from_mem_proto = {
BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, const struct bpf_dynptr_kern *, src,
u32, offset, u64, flags)
{
+ enum bpf_dynptr_type type;
int err;
if (!src->data || flags)
@@ -1506,13 +1524,25 @@ BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, const struct bpf_dynptr_kern
if (err)
return err;
- /* Source and destination may possibly overlap, hence use memmove to
- * copy the data. E.g. bpf_dynptr_from_mem may create two dynptr
- * pointing to overlapping PTR_TO_MAP_VALUE regions.
- */
- memmove(dst, src->data + src->offset + offset, len);
+ type = bpf_dynptr_get_type(src);
- return 0;
+ switch (type) {
+ case BPF_DYNPTR_TYPE_LOCAL:
+ case BPF_DYNPTR_TYPE_RINGBUF:
+ /* Source and destination may possibly overlap, hence use memmove to
+ * copy the data. E.g. bpf_dynptr_from_mem may create two dynptr
+ * pointing to overlapping PTR_TO_MAP_VALUE regions.
+ */
+ memmove(dst, src->data + src->offset + offset, len);
+ return 0;
+ case BPF_DYNPTR_TYPE_SKB:
+ return __bpf_skb_load_bytes(src->data, src->offset + offset, dst, len);
+ case BPF_DYNPTR_TYPE_XDP:
+ return __bpf_xdp_load_bytes(src->data, src->offset + offset, dst, len);
+ default:
+ WARN_ONCE(true, "bpf_dynptr_read: unknown dynptr type %d\n", type);
+ return -EFAULT;
+ }
}
static const struct bpf_func_proto bpf_dynptr_read_proto = {
@@ -1529,22 +1559,40 @@ static const struct bpf_func_proto bpf_dynptr_read_proto = {
BPF_CALL_5(bpf_dynptr_write, const struct bpf_dynptr_kern *, dst, u32, offset, void *, src,
u32, len, u64, flags)
{
+ enum bpf_dynptr_type type;
int err;
- if (!dst->data || flags || bpf_dynptr_is_rdonly(dst))
+ if (!dst->data || bpf_dynptr_is_rdonly(dst))
return -EINVAL;
err = bpf_dynptr_check_off_len(dst, offset, len);
if (err)
return err;
- /* Source and destination may possibly overlap, hence use memmove to
- * copy the data. E.g. bpf_dynptr_from_mem may create two dynptr
- * pointing to overlapping PTR_TO_MAP_VALUE regions.
- */
- memmove(dst->data + dst->offset + offset, src, len);
+ type = bpf_dynptr_get_type(dst);
- return 0;
+ switch (type) {
+ case BPF_DYNPTR_TYPE_LOCAL:
+ case BPF_DYNPTR_TYPE_RINGBUF:
+ if (flags)
+ return -EINVAL;
+ /* Source and destination may possibly overlap, hence use memmove to
+ * copy the data. E.g. bpf_dynptr_from_mem may create two dynptr
+ * pointing to overlapping PTR_TO_MAP_VALUE regions.
+ */
+ memmove(dst->data + dst->offset + offset, src, len);
+ return 0;
+ case BPF_DYNPTR_TYPE_SKB:
+ return __bpf_skb_store_bytes(dst->data, dst->offset + offset, src, len,
+ flags);
+ case BPF_DYNPTR_TYPE_XDP:
+ if (flags)
+ return -EINVAL;
+ return __bpf_xdp_store_bytes(dst->data, dst->offset + offset, src, len);
+ default:
+ WARN_ONCE(true, "bpf_dynptr_write: unknown dynptr type %d\n", type);
+ return -EFAULT;
+ }
}
static const struct bpf_func_proto bpf_dynptr_write_proto = {
@@ -1560,6 +1608,7 @@ static const struct bpf_func_proto bpf_dynptr_write_proto = {
BPF_CALL_3(bpf_dynptr_data, const struct bpf_dynptr_kern *, ptr, u32, offset, u32, len)
{
+ enum bpf_dynptr_type type;
int err;
if (!ptr->data)
@@ -1572,7 +1621,20 @@ BPF_CALL_3(bpf_dynptr_data, const struct bpf_dynptr_kern *, ptr, u32, offset, u3
if (bpf_dynptr_is_rdonly(ptr))
return 0;
- return (unsigned long)(ptr->data + ptr->offset + offset);
+ type = bpf_dynptr_get_type(ptr);
+
+ switch (type) {
+ case BPF_DYNPTR_TYPE_LOCAL:
+ case BPF_DYNPTR_TYPE_RINGBUF:
+ return (unsigned long)(ptr->data + ptr->offset + offset);
+ case BPF_DYNPTR_TYPE_SKB:
+ case BPF_DYNPTR_TYPE_XDP:
+ /* skb and xdp dynptrs should use bpf_dynptr_slice / bpf_dynptr_slice_rdwr */
+ return 0;
+ default:
+ WARN_ONCE(true, "bpf_dynptr_data: unknown dynptr type %d\n", type);
+ return 0;
+ }
}
static const struct bpf_func_proto bpf_dynptr_data_proto = {
@@ -1693,6 +1755,10 @@ bpf_base_func_proto(enum bpf_func_id func_id)
return &bpf_cgrp_storage_get_proto;
case BPF_FUNC_cgrp_storage_delete:
return &bpf_cgrp_storage_delete_proto;
+ case BPF_FUNC_get_current_cgroup_id:
+ return &bpf_get_current_cgroup_id_proto;
+ case BPF_FUNC_get_current_ancestor_cgroup_id:
+ return &bpf_get_current_ancestor_cgroup_id_proto;
#endif
default:
break;
@@ -2097,10 +2163,28 @@ __bpf_kfunc struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level)
if (level > cgrp->level || level < 0)
return NULL;
+ /* cgrp's refcnt could be 0 here, but ancestors can still be accessed */
ancestor = cgrp->ancestors[level];
- cgroup_get(ancestor);
+ if (!cgroup_tryget(ancestor))
+ return NULL;
return ancestor;
}
+
+/**
+ * bpf_cgroup_from_id - Find a cgroup from its ID. A cgroup returned by this
+ * kfunc which is not subsequently stored in a map, must be released by calling
+ * bpf_cgroup_release().
+ * @cgid: cgroup id.
+ */
+__bpf_kfunc struct cgroup *bpf_cgroup_from_id(u64 cgid)
+{
+ struct cgroup *cgrp;
+
+ cgrp = cgroup_get_from_id(cgid);
+ if (IS_ERR(cgrp))
+ return NULL;
+ return cgrp;
+}
#endif /* CONFIG_CGROUPS */
/**
@@ -2122,6 +2206,140 @@ __bpf_kfunc struct task_struct *bpf_task_from_pid(s32 pid)
return p;
}
+/**
+ * bpf_dynptr_slice() - Obtain a read-only pointer to the dynptr data.
+ * @ptr: The dynptr whose data slice to retrieve
+ * @offset: Offset into the dynptr
+ * @buffer: User-provided buffer to copy contents into
+ * @buffer__szk: Size (in bytes) of the buffer. This is the length of the
+ * requested slice. This must be a constant.
+ *
+ * For non-skb and non-xdp type dynptrs, there is no difference between
+ * bpf_dynptr_slice and bpf_dynptr_data.
+ *
+ * If the intention is to write to the data slice, please use
+ * bpf_dynptr_slice_rdwr.
+ *
+ * The user must check that the returned pointer is not null before using it.
+ *
+ * Please note that in the case of skb and xdp dynptrs, bpf_dynptr_slice
+ * does not change the underlying packet data pointers, so a call to
+ * bpf_dynptr_slice will not invalidate any ctx->data/data_end pointers in
+ * the bpf program.
+ *
+ * Return: NULL if the call failed (eg invalid dynptr), pointer to a read-only
+ * data slice (can be either direct pointer to the data or a pointer to the user
+ * provided buffer, with its contents containing the data, if unable to obtain
+ * direct pointer)
+ */
+__bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset,
+ void *buffer, u32 buffer__szk)
+{
+ enum bpf_dynptr_type type;
+ u32 len = buffer__szk;
+ int err;
+
+ if (!ptr->data)
+ return NULL;
+
+ err = bpf_dynptr_check_off_len(ptr, offset, len);
+ if (err)
+ return NULL;
+
+ type = bpf_dynptr_get_type(ptr);
+
+ switch (type) {
+ case BPF_DYNPTR_TYPE_LOCAL:
+ case BPF_DYNPTR_TYPE_RINGBUF:
+ return ptr->data + ptr->offset + offset;
+ case BPF_DYNPTR_TYPE_SKB:
+ return skb_header_pointer(ptr->data, ptr->offset + offset, len, buffer);
+ case BPF_DYNPTR_TYPE_XDP:
+ {
+ void *xdp_ptr = bpf_xdp_pointer(ptr->data, ptr->offset + offset, len);
+ if (xdp_ptr)
+ return xdp_ptr;
+
+ bpf_xdp_copy_buf(ptr->data, ptr->offset + offset, buffer, len, false);
+ return buffer;
+ }
+ default:
+ WARN_ONCE(true, "unknown dynptr type %d\n", type);
+ return NULL;
+ }
+}
+
+/**
+ * bpf_dynptr_slice_rdwr() - Obtain a writable pointer to the dynptr data.
+ * @ptr: The dynptr whose data slice to retrieve
+ * @offset: Offset into the dynptr
+ * @buffer: User-provided buffer to copy contents into
+ * @buffer__szk: Size (in bytes) of the buffer. This is the length of the
+ * requested slice. This must be a constant.
+ *
+ * For non-skb and non-xdp type dynptrs, there is no difference between
+ * bpf_dynptr_slice and bpf_dynptr_data.
+ *
+ * The returned pointer is writable and may point to either directly the dynptr
+ * data at the requested offset or to the buffer if unable to obtain a direct
+ * data pointer to (example: the requested slice is to the paged area of an skb
+ * packet). In the case where the returned pointer is to the buffer, the user
+ * is responsible for persisting writes through calling bpf_dynptr_write(). This
+ * usually looks something like this pattern:
+ *
+ * struct eth_hdr *eth = bpf_dynptr_slice_rdwr(&dynptr, 0, buffer, sizeof(buffer));
+ * if (!eth)
+ * return TC_ACT_SHOT;
+ *
+ * // mutate eth header //
+ *
+ * if (eth == buffer)
+ * bpf_dynptr_write(&ptr, 0, buffer, sizeof(buffer), 0);
+ *
+ * Please note that, as in the example above, the user must check that the
+ * returned pointer is not null before using it.
+ *
+ * Please also note that in the case of skb and xdp dynptrs, bpf_dynptr_slice_rdwr
+ * does not change the underlying packet data pointers, so a call to
+ * bpf_dynptr_slice_rdwr will not invalidate any ctx->data/data_end pointers in
+ * the bpf program.
+ *
+ * Return: NULL if the call failed (eg invalid dynptr), pointer to a
+ * data slice (can be either direct pointer to the data or a pointer to the user
+ * provided buffer, with its contents containing the data, if unable to obtain
+ * direct pointer)
+ */
+__bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr_kern *ptr, u32 offset,
+ void *buffer, u32 buffer__szk)
+{
+ if (!ptr->data || bpf_dynptr_is_rdonly(ptr))
+ return NULL;
+
+ /* bpf_dynptr_slice_rdwr is the same logic as bpf_dynptr_slice.
+ *
+ * For skb-type dynptrs, it is safe to write into the returned pointer
+ * if the bpf program allows skb data writes. There are two possiblities
+ * that may occur when calling bpf_dynptr_slice_rdwr:
+ *
+ * 1) The requested slice is in the head of the skb. In this case, the
+ * returned pointer is directly to skb data, and if the skb is cloned, the
+ * verifier will have uncloned it (see bpf_unclone_prologue()) already.
+ * The pointer can be directly written into.
+ *
+ * 2) Some portion of the requested slice is in the paged buffer area.
+ * In this case, the requested data will be copied out into the buffer
+ * and the returned pointer will be a pointer to the buffer. The skb
+ * will not be pulled. To persist the write, the user will need to call
+ * bpf_dynptr_write(), which will pull the skb and commit the write.
+ *
+ * Similarly for xdp programs, if the requested slice is not across xdp
+ * fragments, then a direct pointer will be returned, otherwise the data
+ * will be copied out into the buffer and the user will need to call
+ * bpf_dynptr_write() to commit changes.
+ */
+ return bpf_dynptr_slice(ptr, offset, buffer, buffer__szk);
+}
+
__bpf_kfunc void *bpf_cast_to_kern_ctx(void *obj)
{
return obj;
@@ -2166,7 +2384,8 @@ BTF_ID_FLAGS(func, bpf_rbtree_first, KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_cgroup_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_cgroup_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_cgroup_release, KF_RELEASE)
-BTF_ID_FLAGS(func, bpf_cgroup_ancestor, KF_ACQUIRE | KF_TRUSTED_ARGS | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_cgroup_ancestor, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_cgroup_from_id, KF_ACQUIRE | KF_RET_NULL)
#endif
BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL)
BTF_SET8_END(generic_btf_ids)
@@ -2190,6 +2409,8 @@ BTF_ID_FLAGS(func, bpf_cast_to_kern_ctx)
BTF_ID_FLAGS(func, bpf_rdonly_cast)
BTF_ID_FLAGS(func, bpf_rcu_read_lock)
BTF_ID_FLAGS(func, bpf_rcu_read_unlock)
+BTF_ID_FLAGS(func, bpf_dynptr_slice, KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_dynptr_slice_rdwr, KF_RET_NULL)
BTF_SET8_END(common_btf_ids)
static const struct btf_kfunc_id_set common_kfunc_set = {
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index e90d9f63edc5..a993560f200a 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -446,6 +446,12 @@ static void cgroup_storage_seq_show_elem(struct bpf_map *map, void *key,
rcu_read_unlock();
}
+static u64 cgroup_storage_map_usage(const struct bpf_map *map)
+{
+ /* Currently the dynamically allocated elements are not counted. */
+ return sizeof(struct bpf_cgroup_storage_map);
+}
+
BTF_ID_LIST_SINGLE(cgroup_storage_map_btf_ids, struct,
bpf_cgroup_storage_map)
const struct bpf_map_ops cgroup_storage_map_ops = {
@@ -457,6 +463,7 @@ const struct bpf_map_ops cgroup_storage_map_ops = {
.map_delete_elem = cgroup_storage_delete_elem,
.map_check_btf = cgroup_storage_check_btf,
.map_seq_show_elem = cgroup_storage_seq_show_elem,
+ .map_mem_usage = cgroup_storage_map_usage,
.map_btf_id = &cgroup_storage_map_btf_ids[0],
};
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index d833496e9e42..dc23f2ac9cde 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -720,6 +720,16 @@ static int trie_check_btf(const struct bpf_map *map,
-EINVAL : 0;
}
+static u64 trie_mem_usage(const struct bpf_map *map)
+{
+ struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
+ u64 elem_size;
+
+ elem_size = sizeof(struct lpm_trie_node) + trie->data_size +
+ trie->map.value_size;
+ return elem_size * READ_ONCE(trie->n_entries);
+}
+
BTF_ID_LIST_SINGLE(trie_map_btf_ids, struct, lpm_trie)
const struct bpf_map_ops trie_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
@@ -733,5 +743,6 @@ const struct bpf_map_ops trie_map_ops = {
.map_update_batch = generic_map_update_batch,
.map_delete_batch = generic_map_delete_batch,
.map_check_btf = trie_check_btf,
+ .map_mem_usage = trie_mem_usage,
.map_btf_id = &trie_map_btf_ids[0],
};
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 0c85e06f7ea7..d9c9f45e3529 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -563,6 +563,12 @@ void bpf_map_offload_map_free(struct bpf_map *map)
bpf_map_area_free(offmap);
}
+u64 bpf_map_offload_map_mem_usage(const struct bpf_map *map)
+{
+ /* The memory dynamically allocated in netdev dev_ops is not counted */
+ return sizeof(struct bpf_offloaded_map);
+}
+
int bpf_map_offload_lookup_elem(struct bpf_map *map, void *key, void *value)
{
struct bpf_offloaded_map *offmap = map_to_offmap(map);
diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c
index 8a5e060de63b..63ecbbcb349d 100644
--- a/kernel/bpf/queue_stack_maps.c
+++ b/kernel/bpf/queue_stack_maps.c
@@ -246,6 +246,14 @@ static int queue_stack_map_get_next_key(struct bpf_map *map, void *key,
return -EINVAL;
}
+static u64 queue_stack_map_mem_usage(const struct bpf_map *map)
+{
+ u64 usage = sizeof(struct bpf_queue_stack);
+
+ usage += ((u64)map->max_entries + 1) * map->value_size;
+ return usage;
+}
+
BTF_ID_LIST_SINGLE(queue_map_btf_ids, struct, bpf_queue_stack)
const struct bpf_map_ops queue_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
@@ -259,6 +267,7 @@ const struct bpf_map_ops queue_map_ops = {
.map_pop_elem = queue_map_pop_elem,
.map_peek_elem = queue_map_peek_elem,
.map_get_next_key = queue_stack_map_get_next_key,
+ .map_mem_usage = queue_stack_map_mem_usage,
.map_btf_id = &queue_map_btf_ids[0],
};
@@ -274,5 +283,6 @@ const struct bpf_map_ops stack_map_ops = {
.map_pop_elem = stack_map_pop_elem,
.map_peek_elem = stack_map_peek_elem,
.map_get_next_key = queue_stack_map_get_next_key,
+ .map_mem_usage = queue_stack_map_mem_usage,
.map_btf_id = &queue_map_btf_ids[0],
};
diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c
index 82c61612f382..71cb72f5b733 100644
--- a/kernel/bpf/reuseport_array.c
+++ b/kernel/bpf/reuseport_array.c
@@ -335,6 +335,13 @@ static int reuseport_array_get_next_key(struct bpf_map *map, void *key,
return 0;
}
+static u64 reuseport_array_mem_usage(const struct bpf_map *map)
+{
+ struct reuseport_array *array;
+
+ return struct_size(array, ptrs, map->max_entries);
+}
+
BTF_ID_LIST_SINGLE(reuseport_array_map_btf_ids, struct, reuseport_array)
const struct bpf_map_ops reuseport_array_ops = {
.map_meta_equal = bpf_map_meta_equal,
@@ -344,5 +351,6 @@ const struct bpf_map_ops reuseport_array_ops = {
.map_lookup_elem = reuseport_array_lookup_elem,
.map_get_next_key = reuseport_array_get_next_key,
.map_delete_elem = reuseport_array_delete_elem,
+ .map_mem_usage = reuseport_array_mem_usage,
.map_btf_id = &reuseport_array_map_btf_ids[0],
};
diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c
index 8732e0aadf36..0d2a45ff83f1 100644
--- a/kernel/bpf/ringbuf.c
+++ b/kernel/bpf/ringbuf.c
@@ -19,6 +19,7 @@
(offsetof(struct bpf_ringbuf, consumer_pos) >> PAGE_SHIFT)
/* consumer page and producer page */
#define RINGBUF_POS_PAGES 2
+#define RINGBUF_NR_META_PAGES (RINGBUF_PGOFF + RINGBUF_POS_PAGES)
#define RINGBUF_MAX_RECORD_SZ (UINT_MAX/4)
@@ -96,7 +97,7 @@ static struct bpf_ringbuf *bpf_ringbuf_area_alloc(size_t data_sz, int numa_node)
{
const gfp_t flags = GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL |
__GFP_NOWARN | __GFP_ZERO;
- int nr_meta_pages = RINGBUF_PGOFF + RINGBUF_POS_PAGES;
+ int nr_meta_pages = RINGBUF_NR_META_PAGES;
int nr_data_pages = data_sz >> PAGE_SHIFT;
int nr_pages = nr_meta_pages + nr_data_pages;
struct page **pages, *page;
@@ -336,6 +337,21 @@ static __poll_t ringbuf_map_poll_user(struct bpf_map *map, struct file *filp,
return 0;
}
+static u64 ringbuf_map_mem_usage(const struct bpf_map *map)
+{
+ struct bpf_ringbuf *rb;
+ int nr_data_pages;
+ int nr_meta_pages;
+ u64 usage = sizeof(struct bpf_ringbuf_map);
+
+ rb = container_of(map, struct bpf_ringbuf_map, map)->rb;
+ usage += (u64)rb->nr_pages << PAGE_SHIFT;
+ nr_meta_pages = RINGBUF_NR_META_PAGES;
+ nr_data_pages = map->max_entries >> PAGE_SHIFT;
+ usage += (nr_meta_pages + 2 * nr_data_pages) * sizeof(struct page *);
+ return usage;
+}
+
BTF_ID_LIST_SINGLE(ringbuf_map_btf_ids, struct, bpf_ringbuf_map)
const struct bpf_map_ops ringbuf_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
@@ -347,6 +363,7 @@ const struct bpf_map_ops ringbuf_map_ops = {
.map_update_elem = ringbuf_map_update_elem,
.map_delete_elem = ringbuf_map_delete_elem,
.map_get_next_key = ringbuf_map_get_next_key,
+ .map_mem_usage = ringbuf_map_mem_usage,
.map_btf_id = &ringbuf_map_btf_ids[0],
};
@@ -361,6 +378,7 @@ const struct bpf_map_ops user_ringbuf_map_ops = {
.map_update_elem = ringbuf_map_update_elem,
.map_delete_elem = ringbuf_map_delete_elem,
.map_get_next_key = ringbuf_map_get_next_key,
+ .map_mem_usage = ringbuf_map_mem_usage,
.map_btf_id = &user_ringbuf_map_btf_ids[0],
};
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index aecea7451b61..0f1d8dced933 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -654,6 +654,19 @@ static void stack_map_free(struct bpf_map *map)
put_callchain_buffers();
}
+static u64 stack_map_mem_usage(const struct bpf_map *map)
+{
+ struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
+ u64 value_size = map->value_size;
+ u64 n_buckets = smap->n_buckets;
+ u64 enties = map->max_entries;
+ u64 usage = sizeof(*smap);
+
+ usage += n_buckets * sizeof(struct stack_map_bucket *);
+ usage += enties * (sizeof(struct stack_map_bucket) + value_size);
+ return usage;
+}
+
BTF_ID_LIST_SINGLE(stack_trace_map_btf_ids, struct, bpf_stack_map)
const struct bpf_map_ops stack_trace_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
@@ -664,5 +677,6 @@ const struct bpf_map_ops stack_trace_map_ops = {
.map_update_elem = stack_map_update_elem,
.map_delete_elem = stack_map_delete_elem,
.map_check_btf = map_check_no_btf,
+ .map_mem_usage = stack_map_mem_usage,
.map_btf_id = &stack_trace_map_btf_ids[0],
};
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index adc83cb82f37..f406dfa13792 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -105,6 +105,7 @@ const struct bpf_map_ops bpf_map_offload_ops = {
.map_alloc = bpf_map_offload_map_alloc,
.map_free = bpf_map_offload_map_free,
.map_check_btf = map_check_no_btf,
+ .map_mem_usage = bpf_map_offload_map_mem_usage,
};
static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
@@ -128,6 +129,8 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
}
if (attr->map_ifindex)
ops = &bpf_map_offload_ops;
+ if (!ops->map_mem_usage)
+ return ERR_PTR(-EINVAL);
map = ops->map_alloc(attr);
if (IS_ERR(map))
return map;
@@ -771,17 +774,10 @@ static fmode_t map_get_sys_perms(struct bpf_map *map, struct fd f)
}
#ifdef CONFIG_PROC_FS
-/* Provides an approximation of the map's memory footprint.
- * Used only to provide a backward compatibility and display
- * a reasonable "memlock" info.
- */
-static unsigned long bpf_map_memory_footprint(const struct bpf_map *map)
+/* Show the memory usage of a bpf map */
+static u64 bpf_map_memory_usage(const struct bpf_map *map)
{
- unsigned long size;
-
- size = round_up(map->key_size + bpf_map_value_size(map), 8);
-
- return round_up(map->max_entries * size, PAGE_SIZE);
+ return map->ops->map_mem_usage(map);
}
static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
@@ -803,7 +799,7 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
"max_entries:\t%u\n"
"map_flags:\t%#x\n"
"map_extra:\t%#llx\n"
- "memlock:\t%lu\n"
+ "memlock:\t%llu\n"
"map_id:\t%u\n"
"frozen:\t%u\n",
map->map_type,
@@ -812,7 +808,7 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
map->max_entries,
map->map_flags,
(unsigned long long)map->map_extra,
- bpf_map_memory_footprint(map),
+ bpf_map_memory_usage(map),
map->id,
READ_ONCE(map->frozen));
if (type) {
@@ -1059,9 +1055,15 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
if (map->map_type != BPF_MAP_TYPE_HASH &&
+ map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
map->map_type != BPF_MAP_TYPE_LRU_HASH &&
+ map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH &&
map->map_type != BPF_MAP_TYPE_ARRAY &&
- map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY) {
+ map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY &&
+ map->map_type != BPF_MAP_TYPE_SK_STORAGE &&
+ map->map_type != BPF_MAP_TYPE_INODE_STORAGE &&
+ map->map_type != BPF_MAP_TYPE_TASK_STORAGE &&
+ map->map_type != BPF_MAP_TYPE_CGRP_STORAGE) {
ret = -EOPNOTSUPP;
goto free_map_tab;
}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 272563a0b770..b2116ca78d9a 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -268,7 +268,41 @@ struct bpf_call_arg_meta {
u32 ret_btf_id;
u32 subprogno;
struct btf_field *kptr_field;
- u8 uninit_dynptr_regno;
+};
+
+struct bpf_kfunc_call_arg_meta {
+ /* In parameters */
+ struct btf *btf;
+ u32 func_id;
+ u32 kfunc_flags;
+ const struct btf_type *func_proto;
+ const char *func_name;
+ /* Out parameters */
+ u32 ref_obj_id;
+ u8 release_regno;
+ bool r0_rdonly;
+ u32 ret_btf_id;
+ u64 r0_size;
+ u32 subprogno;
+ struct {
+ u64 value;
+ bool found;
+ } arg_constant;
+ struct {
+ struct btf *btf;
+ u32 btf_id;
+ } arg_obj_drop;
+ struct {
+ struct btf_field *field;
+ } arg_list_head;
+ struct {
+ struct btf_field *field;
+ } arg_rbtree_root;
+ struct {
+ enum bpf_dynptr_type type;
+ u32 id;
+ } initialized_dynptr;
+ u64 mem_size;
};
struct btf *btf_vmlinux;
@@ -453,7 +487,8 @@ static bool reg_type_not_null(enum bpf_reg_type type)
type == PTR_TO_TCP_SOCK ||
type == PTR_TO_MAP_VALUE ||
type == PTR_TO_MAP_KEY ||
- type == PTR_TO_SOCK_COMMON;
+ type == PTR_TO_SOCK_COMMON ||
+ type == PTR_TO_MEM;
}
static bool type_is_ptr_alloc_obj(u32 type)
@@ -675,37 +710,62 @@ static bool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_sl
return spi - nr_slots + 1 >= 0 && spi < allocated_slots;
}
-static int dynptr_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
+static int stack_slot_obj_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
+ const char *obj_kind, int nr_slots)
{
int off, spi;
if (!tnum_is_const(reg->var_off)) {
- verbose(env, "dynptr has to be at a constant offset\n");
+ verbose(env, "%s has to be at a constant offset\n", obj_kind);
return -EINVAL;
}
off = reg->off + reg->var_off.value;
if (off % BPF_REG_SIZE) {
- verbose(env, "cannot pass in dynptr at an offset=%d\n", off);
+ verbose(env, "cannot pass in %s at an offset=%d\n", obj_kind, off);
return -EINVAL;
}
spi = __get_spi(off);
- if (spi < 1) {
- verbose(env, "cannot pass in dynptr at an offset=%d\n", off);
+ if (spi + 1 < nr_slots) {
+ verbose(env, "cannot pass in %s at an offset=%d\n", obj_kind, off);
return -EINVAL;
}
- if (!is_spi_bounds_valid(func(env, reg), spi, BPF_DYNPTR_NR_SLOTS))
+ if (!is_spi_bounds_valid(func(env, reg), spi, nr_slots))
return -ERANGE;
return spi;
}
+static int dynptr_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
+{
+ return stack_slot_obj_get_spi(env, reg, "dynptr", BPF_DYNPTR_NR_SLOTS);
+}
+
static const char *kernel_type_name(const struct btf* btf, u32 id)
{
return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off);
}
+static const char *dynptr_type_str(enum bpf_dynptr_type type)
+{
+ switch (type) {
+ case BPF_DYNPTR_TYPE_LOCAL:
+ return "local";
+ case BPF_DYNPTR_TYPE_RINGBUF:
+ return "ringbuf";
+ case BPF_DYNPTR_TYPE_SKB:
+ return "skb";
+ case BPF_DYNPTR_TYPE_XDP:
+ return "xdp";
+ case BPF_DYNPTR_TYPE_INVALID:
+ return "<invalid>";
+ default:
+ WARN_ONCE(1, "unknown dynptr type %d\n", type);
+ return "<unknown>";
+ }
+}
+
static void mark_reg_scratched(struct bpf_verifier_env *env, u32 regno)
{
env->scratched_regs |= 1U << regno;
@@ -751,11 +811,31 @@ static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type)
return BPF_DYNPTR_TYPE_LOCAL;
case DYNPTR_TYPE_RINGBUF:
return BPF_DYNPTR_TYPE_RINGBUF;
+ case DYNPTR_TYPE_SKB:
+ return BPF_DYNPTR_TYPE_SKB;
+ case DYNPTR_TYPE_XDP:
+ return BPF_DYNPTR_TYPE_XDP;
default:
return BPF_DYNPTR_TYPE_INVALID;
}
}
+static enum bpf_type_flag get_dynptr_type_flag(enum bpf_dynptr_type type)
+{
+ switch (type) {
+ case BPF_DYNPTR_TYPE_LOCAL:
+ return DYNPTR_TYPE_LOCAL;
+ case BPF_DYNPTR_TYPE_RINGBUF:
+ return DYNPTR_TYPE_RINGBUF;
+ case BPF_DYNPTR_TYPE_SKB:
+ return DYNPTR_TYPE_SKB;
+ case BPF_DYNPTR_TYPE_XDP:
+ return DYNPTR_TYPE_XDP;
+ default:
+ return 0;
+ }
+}
+
static bool dynptr_type_refcounted(enum bpf_dynptr_type type)
{
return type == BPF_DYNPTR_TYPE_RINGBUF;
@@ -895,6 +975,14 @@ static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_re
static void __mark_reg_unknown(const struct bpf_verifier_env *env,
struct bpf_reg_state *reg);
+static void mark_reg_invalid(const struct bpf_verifier_env *env, struct bpf_reg_state *reg)
+{
+ if (!env->allow_ptr_leaks)
+ __mark_reg_not_init(env, reg);
+ else
+ __mark_reg_unknown(env, reg);
+}
+
static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
struct bpf_func_state *state, int spi)
{
@@ -934,12 +1022,8 @@ static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
/* Dynptr slices are only PTR_TO_MEM_OR_NULL and PTR_TO_MEM */
if (dreg->type != (PTR_TO_MEM | PTR_MAYBE_NULL) && dreg->type != PTR_TO_MEM)
continue;
- if (dreg->dynptr_id == dynptr_id) {
- if (!env->allow_ptr_leaks)
- __mark_reg_not_init(env, dreg);
- else
- __mark_reg_unknown(env, dreg);
- }
+ if (dreg->dynptr_id == dynptr_id)
+ mark_reg_invalid(env, dreg);
}));
/* Do not release reference state, we are destroying dynptr on stack,
@@ -955,39 +1039,49 @@ static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
return 0;
}
-static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
- int spi)
+static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
{
+ int spi;
+
if (reg->type == CONST_PTR_TO_DYNPTR)
return false;
- /* For -ERANGE (i.e. spi not falling into allocated stack slots), we
- * will do check_mem_access to check and update stack bounds later, so
- * return true for that case.
+ spi = dynptr_get_spi(env, reg);
+
+ /* -ERANGE (i.e. spi not falling into allocated stack slots) isn't an
+ * error because this just means the stack state hasn't been updated yet.
+ * We will do check_mem_access to check and update stack bounds later.
*/
- if (spi < 0)
- return spi == -ERANGE;
- /* We allow overwriting existing unreferenced STACK_DYNPTR slots, see
- * mark_stack_slots_dynptr which calls destroy_if_dynptr_stack_slot to
- * ensure dynptr objects at the slots we are touching are completely
- * destructed before we reinitialize them for a new one. For referenced
- * ones, destroy_if_dynptr_stack_slot returns an error early instead of
- * delaying it until the end where the user will get "Unreleased
+ if (spi < 0 && spi != -ERANGE)
+ return false;
+
+ /* We don't need to check if the stack slots are marked by previous
+ * dynptr initializations because we allow overwriting existing unreferenced
+ * STACK_DYNPTR slots, see mark_stack_slots_dynptr which calls
+ * destroy_if_dynptr_stack_slot to ensure dynptr objects at the slots we are
+ * touching are completely destructed before we reinitialize them for a new
+ * one. For referenced ones, destroy_if_dynptr_stack_slot returns an error early
+ * instead of delaying it until the end where the user will get "Unreleased
* reference" error.
*/
return true;
}
-static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
- int spi)
+static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
{
struct bpf_func_state *state = func(env, reg);
- int i;
+ int i, spi;
- /* This already represents first slot of initialized bpf_dynptr */
+ /* This already represents first slot of initialized bpf_dynptr.
+ *
+ * CONST_PTR_TO_DYNPTR already has fixed and var_off as 0 due to
+ * check_func_arg_reg_off's logic, so we don't need to check its
+ * offset and alignment.
+ */
if (reg->type == CONST_PTR_TO_DYNPTR)
return true;
+ spi = dynptr_get_spi(env, reg);
if (spi < 0)
return false;
if (!state->stack[spi].spilled_ptr.dynptr.first_slot)
@@ -1143,26 +1237,49 @@ static void print_verifier_state(struct bpf_verifier_env *env,
for (j = 0; j < BPF_REG_SIZE; j++) {
if (state->stack[i].slot_type[j] != STACK_INVALID)
valid = true;
- types_buf[j] = slot_type_char[
- state->stack[i].slot_type[j]];
+ types_buf[j] = slot_type_char[state->stack[i].slot_type[j]];
}
types_buf[BPF_REG_SIZE] = 0;
if (!valid)
continue;
if (!print_all && !stack_slot_scratched(env, i))
continue;
- verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
- print_liveness(env, state->stack[i].spilled_ptr.live);
- if (is_spilled_reg(&state->stack[i])) {
+ switch (state->stack[i].slot_type[BPF_REG_SIZE - 1]) {
+ case STACK_SPILL:
reg = &state->stack[i].spilled_ptr;
t = reg->type;
+
+ verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
+ print_liveness(env, reg->live);
verbose(env, "=%s", t == SCALAR_VALUE ? "" : reg_type_str(env, t));
if (t == SCALAR_VALUE && reg->precise)
verbose(env, "P");
if (t == SCALAR_VALUE && tnum_is_const(reg->var_off))
verbose(env, "%lld", reg->var_off.value + reg->off);
- } else {
+ break;
+ case STACK_DYNPTR:
+ i += BPF_DYNPTR_NR_SLOTS - 1;
+ reg = &state->stack[i].spilled_ptr;
+
+ verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
+ print_liveness(env, reg->live);
+ verbose(env, "=dynptr_%s", dynptr_type_str(reg->dynptr.type));
+ if (reg->ref_obj_id)
+ verbose(env, "(ref_id=%d)", reg->ref_obj_id);
+ break;
+ case STACK_MISC:
+ case STACK_ZERO:
+ default:
+ reg = &state->stack[i].spilled_ptr;
+
+ for (j = 0; j < BPF_REG_SIZE; j++)
+ types_buf[j] = slot_type_char[state->stack[i].slot_type[j]];
+ types_buf[BPF_REG_SIZE] = 0;
+
+ verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
+ print_liveness(env, reg->live);
verbose(env, "=%s", types_buf);
+ break;
}
}
if (state->acquired_refs && state->refs[0].id) {
@@ -1664,6 +1781,12 @@ static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
reg->type == PTR_TO_PACKET_END;
}
+static bool reg_is_dynptr_slice_pkt(const struct bpf_reg_state *reg)
+{
+ return base_type(reg->type) == PTR_TO_MEM &&
+ (reg->type & DYNPTR_TYPE_SKB || reg->type & DYNPTR_TYPE_XDP);
+}
+
/* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
enum bpf_reg_type which)
@@ -2475,8 +2598,8 @@ static int check_subprogs(struct bpf_verifier_env *env)
u8 code = insn[i].code;
if (code == (BPF_JMP | BPF_CALL) &&
- insn[i].imm == BPF_FUNC_tail_call &&
- insn[i].src_reg != BPF_PSEUDO_CALL)
+ insn[i].src_reg == 0 &&
+ insn[i].imm == BPF_FUNC_tail_call)
subprog[cur_subprog].has_tail_call = true;
if (BPF_CLASS(code) == BPF_LD &&
(BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND))
@@ -3826,6 +3949,8 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
continue;
if (type == STACK_MISC)
continue;
+ if (type == STACK_INVALID && env->allow_uninit_stack)
+ continue;
verbose(env, "invalid read from stack off %d+%d size %d\n",
off, i, size);
return -EACCES;
@@ -3863,6 +3988,8 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
continue;
if (type == STACK_ZERO)
continue;
+ if (type == STACK_INVALID && env->allow_uninit_stack)
+ continue;
verbose(env, "invalid read from stack off %d+%d size %d\n",
off, i, size);
return -EACCES;
@@ -4175,7 +4302,7 @@ static int map_kptr_match_type(struct bpf_verifier_env *env,
struct bpf_reg_state *reg, u32 regno)
{
const char *targ_name = kernel_type_name(kptr_field->kptr.btf, kptr_field->kptr.btf_id);
- int perm_flags = PTR_MAYBE_NULL | PTR_TRUSTED;
+ int perm_flags = PTR_MAYBE_NULL | PTR_TRUSTED | MEM_RCU;
const char *reg_name = "";
/* Only unreferenced case accepts untrusted pointers */
@@ -4242,6 +4369,34 @@ bad_type:
return -EINVAL;
}
+/* The non-sleepable programs and sleepable programs with explicit bpf_rcu_read_lock()
+ * can dereference RCU protected pointers and result is PTR_TRUSTED.
+ */
+static bool in_rcu_cs(struct bpf_verifier_env *env)
+{
+ return env->cur_state->active_rcu_lock || !env->prog->aux->sleepable;
+}
+
+/* Once GCC supports btf_type_tag the following mechanism will be replaced with tag check */
+BTF_SET_START(rcu_protected_types)
+BTF_ID(struct, prog_test_ref_kfunc)
+BTF_ID(struct, cgroup)
+BTF_SET_END(rcu_protected_types)
+
+static bool rcu_protected_object(const struct btf *btf, u32 btf_id)
+{
+ if (!btf_is_kernel(btf))
+ return false;
+ return btf_id_set_contains(&rcu_protected_types, btf_id);
+}
+
+static bool rcu_safe_kptr(const struct btf_field *field)
+{
+ const struct btf_field_kptr *kptr = &field->kptr;
+
+ return field->type == BPF_KPTR_REF && rcu_protected_object(kptr->btf, kptr->btf_id);
+}
+
static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
int value_regno, int insn_idx,
struct btf_field *kptr_field)
@@ -4276,7 +4431,10 @@ static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
* value from map as PTR_TO_BTF_ID, with the correct type.
*/
mark_btf_ld_reg(env, cur_regs(env), value_regno, PTR_TO_BTF_ID, kptr_field->kptr.btf,
- kptr_field->kptr.btf_id, PTR_MAYBE_NULL | PTR_UNTRUSTED);
+ kptr_field->kptr.btf_id,
+ rcu_safe_kptr(kptr_field) && in_rcu_cs(env) ?
+ PTR_MAYBE_NULL | MEM_RCU :
+ PTR_MAYBE_NULL | PTR_UNTRUSTED);
/* For mark_ptr_or_null_reg */
val_reg->id = ++env->id_gen;
} else if (class == BPF_STX) {
@@ -4999,23 +5157,76 @@ static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)
return 0;
}
-#define BTF_TYPE_SAFE_NESTED(__type) __PASTE(__type, __safe_fields)
+#define BTF_TYPE_SAFE_RCU(__type) __PASTE(__type, __safe_rcu)
+#define BTF_TYPE_SAFE_TRUSTED(__type) __PASTE(__type, __safe_trusted)
-BTF_TYPE_SAFE_NESTED(struct task_struct) {
+/*
+ * Allow list few fields as RCU trusted or full trusted.
+ * This logic doesn't allow mix tagging and will be removed once GCC supports
+ * btf_type_tag.
+ */
+
+/* RCU trusted: these fields are trusted in RCU CS and never NULL */
+BTF_TYPE_SAFE_RCU(struct task_struct) {
const cpumask_t *cpus_ptr;
+ struct css_set __rcu *cgroups;
+ struct task_struct __rcu *real_parent;
+ struct task_struct *group_leader;
};
-static bool nested_ptr_is_trusted(struct bpf_verifier_env *env,
- struct bpf_reg_state *reg,
- int off)
+BTF_TYPE_SAFE_RCU(struct css_set) {
+ struct cgroup *dfl_cgrp;
+};
+
+/* full trusted: these fields are trusted even outside of RCU CS and never NULL */
+BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta) {
+ __bpf_md_ptr(struct seq_file *, seq);
+};
+
+BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task) {
+ __bpf_md_ptr(struct bpf_iter_meta *, meta);
+ __bpf_md_ptr(struct task_struct *, task);
+};
+
+BTF_TYPE_SAFE_TRUSTED(struct linux_binprm) {
+ struct file *file;
+};
+
+BTF_TYPE_SAFE_TRUSTED(struct file) {
+ struct inode *f_inode;
+};
+
+BTF_TYPE_SAFE_TRUSTED(struct dentry) {
+ /* no negative dentry-s in places where bpf can see it */
+ struct inode *d_inode;
+};
+
+BTF_TYPE_SAFE_TRUSTED(struct socket) {
+ struct sock *sk;
+};
+
+static bool type_is_rcu(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg,
+ int off)
{
- /* If its parent is not trusted, it can't regain its trusted status. */
- if (!is_trusted_reg(reg))
- return false;
+ BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct task_struct));
+ BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct css_set));
- BTF_TYPE_EMIT(BTF_TYPE_SAFE_NESTED(struct task_struct));
+ return btf_nested_type_is_trusted(&env->log, reg, off, "__safe_rcu");
+}
- return btf_nested_type_is_trusted(&env->log, reg, off);
+static bool type_is_trusted(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg,
+ int off)
+{
+ BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta));
+ BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task));
+ BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct linux_binprm));
+ BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct file));
+ BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct dentry));
+ BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct socket));
+
+ return btf_nested_type_is_trusted(&env->log, reg, off, "__safe_trusted");
}
static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
@@ -5101,41 +5312,56 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
if (ret < 0)
return ret;
- /* If this is an untrusted pointer, all pointers formed by walking it
- * also inherit the untrusted flag.
- */
- if (type_flag(reg->type) & PTR_UNTRUSTED)
- flag |= PTR_UNTRUSTED;
-
- /* By default any pointer obtained from walking a trusted pointer is no
- * longer trusted, unless the field being accessed has explicitly been
- * marked as inheriting its parent's state of trust.
- *
- * An RCU-protected pointer can also be deemed trusted if we are in an
- * RCU read region. This case is handled below.
- */
- if (nested_ptr_is_trusted(env, reg, off))
- flag |= PTR_TRUSTED;
- else
- flag &= ~PTR_TRUSTED;
+ if (ret != PTR_TO_BTF_ID) {
+ /* just mark; */
- if (flag & MEM_RCU) {
- /* Mark value register as MEM_RCU only if it is protected by
- * bpf_rcu_read_lock() and the ptr reg is rcu or trusted. MEM_RCU
- * itself can already indicate trustedness inside the rcu
- * read lock region. Also mark rcu pointer as PTR_MAYBE_NULL since
- * it could be null in some cases.
+ } else if (type_flag(reg->type) & PTR_UNTRUSTED) {
+ /* If this is an untrusted pointer, all pointers formed by walking it
+ * also inherit the untrusted flag.
*/
- if (!env->cur_state->active_rcu_lock ||
- !(is_trusted_reg(reg) || is_rcu_reg(reg)))
- flag &= ~MEM_RCU;
- else
- flag |= PTR_MAYBE_NULL;
- } else if (reg->type & MEM_RCU) {
- /* ptr (reg) is marked as MEM_RCU, but the struct field is not tagged
- * with __rcu. Mark the flag as PTR_UNTRUSTED conservatively.
+ flag = PTR_UNTRUSTED;
+
+ } else if (is_trusted_reg(reg) || is_rcu_reg(reg)) {
+ /* By default any pointer obtained from walking a trusted pointer is no
+ * longer trusted, unless the field being accessed has explicitly been
+ * marked as inheriting its parent's state of trust (either full or RCU).
+ * For example:
+ * 'cgroups' pointer is untrusted if task->cgroups dereference
+ * happened in a sleepable program outside of bpf_rcu_read_lock()
+ * section. In a non-sleepable program it's trusted while in RCU CS (aka MEM_RCU).
+ * Note bpf_rcu_read_unlock() converts MEM_RCU pointers to PTR_UNTRUSTED.
+ *
+ * A regular RCU-protected pointer with __rcu tag can also be deemed
+ * trusted if we are in an RCU CS. Such pointer can be NULL.
*/
- flag |= PTR_UNTRUSTED;
+ if (type_is_trusted(env, reg, off)) {
+ flag |= PTR_TRUSTED;
+ } else if (in_rcu_cs(env) && !type_may_be_null(reg->type)) {
+ if (type_is_rcu(env, reg, off)) {
+ /* ignore __rcu tag and mark it MEM_RCU */
+ flag |= MEM_RCU;
+ } else if (flag & MEM_RCU) {
+ /* __rcu tagged pointers can be NULL */
+ flag |= PTR_MAYBE_NULL;
+ } else if (flag & (MEM_PERCPU | MEM_USER)) {
+ /* keep as-is */
+ } else {
+ /* walking unknown pointers yields untrusted pointer */
+ flag = PTR_UNTRUSTED;
+ }
+ } else {
+ /*
+ * If not in RCU CS or MEM_RCU pointer can be NULL then
+ * aggressively mark as untrusted otherwise such
+ * pointers will be plain PTR_TO_BTF_ID without flags
+ * and will be allowed to be passed into helpers for
+ * compat reasons.
+ */
+ flag = PTR_UNTRUSTED;
+ }
+ } else {
+ /* Old compat. Deprecated */
+ flag &= ~PTR_TRUSTED;
}
if (atype == BPF_READ && value_regno >= 0)
@@ -5754,7 +5980,8 @@ static int check_stack_range_initialized(
stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
if (*stype == STACK_MISC)
goto mark;
- if (*stype == STACK_ZERO) {
+ if ((*stype == STACK_ZERO) ||
+ (*stype == STACK_INVALID && env->allow_uninit_stack)) {
if (clobber) {
/* helper can write anything into the stack */
*stype = STACK_MISC;
@@ -6206,11 +6433,11 @@ static int process_kptr_func(struct bpf_verifier_env *env, int regno,
* Helpers which do not mutate the bpf_dynptr set MEM_RDONLY in their argument
* type, and declare it as 'const struct bpf_dynptr *' in their prototype.
*/
-int process_dynptr_func(struct bpf_verifier_env *env, int regno,
- enum bpf_arg_type arg_type, struct bpf_call_arg_meta *meta)
+static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn_idx,
+ enum bpf_arg_type arg_type)
{
struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
- int spi = 0;
+ int err;
/* MEM_UNINIT and MEM_RDONLY are exclusive, when applied to an
* ARG_PTR_TO_DYNPTR (or ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_*):
@@ -6219,15 +6446,6 @@ int process_dynptr_func(struct bpf_verifier_env *env, int regno,
verbose(env, "verifier internal error: misconfigured dynptr helper type flags\n");
return -EFAULT;
}
- /* CONST_PTR_TO_DYNPTR already has fixed and var_off as 0 due to
- * check_func_arg_reg_off's logic. We only need to check offset
- * and its alignment for PTR_TO_STACK.
- */
- if (reg->type == PTR_TO_STACK) {
- spi = dynptr_get_spi(env, reg);
- if (spi < 0 && spi != -ERANGE)
- return spi;
- }
/* MEM_UNINIT - Points to memory that is an appropriate candidate for
* constructing a mutable bpf_dynptr object.
@@ -6245,30 +6463,30 @@ int process_dynptr_func(struct bpf_verifier_env *env, int regno,
* to.
*/
if (arg_type & MEM_UNINIT) {
- if (!is_dynptr_reg_valid_uninit(env, reg, spi)) {
+ int i;
+
+ if (!is_dynptr_reg_valid_uninit(env, reg)) {
verbose(env, "Dynptr has to be an uninitialized dynptr\n");
return -EINVAL;
}
- /* We only support one dynptr being uninitialized at the moment,
- * which is sufficient for the helper functions we have right now.
- */
- if (meta->uninit_dynptr_regno) {
- verbose(env, "verifier internal error: multiple uninitialized dynptr args\n");
- return -EFAULT;
+ /* we write BPF_DW bits (8 bytes) at a time */
+ for (i = 0; i < BPF_DYNPTR_SIZE; i += 8) {
+ err = check_mem_access(env, insn_idx, regno,
+ i, BPF_DW, BPF_WRITE, -1, false);
+ if (err)
+ return err;
}
- meta->uninit_dynptr_regno = regno;
+ err = mark_stack_slots_dynptr(env, reg, arg_type, insn_idx);
} else /* MEM_RDONLY and None case from above */ {
- int err;
-
/* For the reg->type == PTR_TO_STACK case, bpf_dynptr is never const */
if (reg->type == CONST_PTR_TO_DYNPTR && !(arg_type & MEM_RDONLY)) {
verbose(env, "cannot pass pointer to const bpf_dynptr, the helper mutates it\n");
return -EINVAL;
}
- if (!is_dynptr_reg_valid_init(env, reg, spi)) {
+ if (!is_dynptr_reg_valid_init(env, reg)) {
verbose(env,
"Expected an initialized dynptr as arg #%d\n",
regno);
@@ -6277,30 +6495,15 @@ int process_dynptr_func(struct bpf_verifier_env *env, int regno,
/* Fold modifiers (in this case, MEM_RDONLY) when checking expected type */
if (!is_dynptr_type_expected(env, reg, arg_type & ~MEM_RDONLY)) {
- const char *err_extra = "";
-
- switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
- case DYNPTR_TYPE_LOCAL:
- err_extra = "local";
- break;
- case DYNPTR_TYPE_RINGBUF:
- err_extra = "ringbuf";
- break;
- default:
- err_extra = "<unknown>";
- break;
- }
verbose(env,
"Expected a dynptr of type %s as arg #%d\n",
- err_extra, regno);
+ dynptr_type_str(arg_to_dynptr_type(arg_type)), regno);
return -EINVAL;
}
err = mark_dynptr_read(env, reg);
- if (err)
- return err;
}
- return 0;
+ return err;
}
static bool arg_type_is_mem_size(enum bpf_arg_type type)
@@ -6522,7 +6725,14 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
return -EACCES;
found:
- if (reg->type == PTR_TO_BTF_ID || reg->type & PTR_TRUSTED) {
+ if (base_type(reg->type) != PTR_TO_BTF_ID)
+ return 0;
+
+ switch ((int)reg->type) {
+ case PTR_TO_BTF_ID:
+ case PTR_TO_BTF_ID | PTR_TRUSTED:
+ case PTR_TO_BTF_ID | MEM_RCU:
+ {
/* For bpf_sk_release, it needs to match against first member
* 'struct sock_common', hence make an exception for it. This
* allows bpf_sk_release to work for multiple socket types.
@@ -6558,13 +6768,23 @@ found:
return -EACCES;
}
}
- } else if (type_is_alloc(reg->type)) {
+ break;
+ }
+ case PTR_TO_BTF_ID | MEM_ALLOC:
if (meta->func_id != BPF_FUNC_spin_lock && meta->func_id != BPF_FUNC_spin_unlock) {
verbose(env, "verifier internal error: unimplemented handling of MEM_ALLOC\n");
return -EFAULT;
}
+ /* Handled by helper specific checks */
+ break;
+ case PTR_TO_BTF_ID | MEM_PERCPU:
+ case PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED:
+ /* Handled by helper specific checks */
+ break;
+ default:
+ verbose(env, "verifier internal error: invalid PTR_TO_BTF_ID register for type match\n");
+ return -EFAULT;
}
-
return 0;
}
@@ -6651,7 +6871,6 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
case PTR_TO_BTF_ID | MEM_ALLOC:
case PTR_TO_BTF_ID | PTR_TRUSTED:
case PTR_TO_BTF_ID | MEM_RCU:
- case PTR_TO_BTF_ID | MEM_ALLOC | PTR_TRUSTED:
case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF:
/* When referenced PTR_TO_BTF_ID is passed to release function,
* its fixed offset must be 0. In the other cases, fixed offset
@@ -6666,6 +6885,28 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
}
}
+static struct bpf_reg_state *get_dynptr_arg_reg(struct bpf_verifier_env *env,
+ const struct bpf_func_proto *fn,
+ struct bpf_reg_state *regs)
+{
+ struct bpf_reg_state *state = NULL;
+ int i;
+
+ for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++)
+ if (arg_type_is_dynptr(fn->arg_type[i])) {
+ if (state) {
+ verbose(env, "verifier internal error: multiple dynptr args\n");
+ return NULL;
+ }
+ state = &regs[BPF_REG_1 + i];
+ }
+
+ if (!state)
+ verbose(env, "verifier internal error: no dynptr arg found\n");
+
+ return state;
+}
+
static int dynptr_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
{
struct bpf_func_state *state = func(env, reg);
@@ -6692,9 +6933,28 @@ static int dynptr_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state
return state->stack[spi].spilled_ptr.ref_obj_id;
}
+static enum bpf_dynptr_type dynptr_get_type(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg)
+{
+ struct bpf_func_state *state = func(env, reg);
+ int spi;
+
+ if (reg->type == CONST_PTR_TO_DYNPTR)
+ return reg->dynptr.type;
+
+ spi = __get_spi(reg->off);
+ if (spi < 0) {
+ verbose(env, "verifier internal error: invalid spi when querying dynptr type\n");
+ return BPF_DYNPTR_TYPE_INVALID;
+ }
+
+ return state->stack[spi].spilled_ptr.dynptr.type;
+}
+
static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
struct bpf_call_arg_meta *meta,
- const struct bpf_func_proto *fn)
+ const struct bpf_func_proto *fn,
+ int insn_idx)
{
u32 regno = BPF_REG_1 + arg;
struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
@@ -6907,7 +7167,7 @@ skip_type_check:
err = check_mem_size_reg(env, reg, regno, true, meta);
break;
case ARG_PTR_TO_DYNPTR:
- err = process_dynptr_func(env, regno, arg_type, meta);
+ err = process_dynptr_func(env, regno, insn_idx, arg_type);
if (err)
return err;
break;
@@ -7126,22 +7386,26 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
break;
case BPF_MAP_TYPE_SK_STORAGE:
if (func_id != BPF_FUNC_sk_storage_get &&
- func_id != BPF_FUNC_sk_storage_delete)
+ func_id != BPF_FUNC_sk_storage_delete &&
+ func_id != BPF_FUNC_kptr_xchg)
goto error;
break;
case BPF_MAP_TYPE_INODE_STORAGE:
if (func_id != BPF_FUNC_inode_storage_get &&
- func_id != BPF_FUNC_inode_storage_delete)
+ func_id != BPF_FUNC_inode_storage_delete &&
+ func_id != BPF_FUNC_kptr_xchg)
goto error;
break;
case BPF_MAP_TYPE_TASK_STORAGE:
if (func_id != BPF_FUNC_task_storage_get &&
- func_id != BPF_FUNC_task_storage_delete)
+ func_id != BPF_FUNC_task_storage_delete &&
+ func_id != BPF_FUNC_kptr_xchg)
goto error;
break;
case BPF_MAP_TYPE_CGRP_STORAGE:
if (func_id != BPF_FUNC_cgrp_storage_get &&
- func_id != BPF_FUNC_cgrp_storage_delete)
+ func_id != BPF_FUNC_cgrp_storage_delete &&
+ func_id != BPF_FUNC_kptr_xchg)
goto error;
break;
case BPF_MAP_TYPE_BLOOM_FILTER:
@@ -7355,6 +7619,9 @@ static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
/* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
* are now invalid, so turn them into unknown SCALAR_VALUE.
+ *
+ * This also applies to dynptr slices belonging to skb and xdp dynptrs,
+ * since these slices point to packet data.
*/
static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
{
@@ -7362,8 +7629,8 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
struct bpf_reg_state *reg;
bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
- if (reg_is_pkt_pointer_any(reg))
- __mark_reg_unknown(env, reg);
+ if (reg_is_pkt_pointer_any(reg) || reg_is_dynptr_slice_pkt(reg))
+ mark_reg_invalid(env, reg);
}));
}
@@ -7408,12 +7675,8 @@ static int release_reference(struct bpf_verifier_env *env,
return err;
bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
- if (reg->ref_obj_id == ref_obj_id) {
- if (!env->allow_ptr_leaks)
- __mark_reg_not_init(env, reg);
- else
- __mark_reg_unknown(env, reg);
- }
+ if (reg->ref_obj_id == ref_obj_id)
+ mark_reg_invalid(env, reg);
}));
return 0;
@@ -7426,7 +7689,7 @@ static void invalidate_non_owning_refs(struct bpf_verifier_env *env)
bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({
if (type_is_non_owning_ref(reg->type))
- __mark_reg_unknown(env, reg);
+ mark_reg_invalid(env, reg);
}));
}
@@ -8197,7 +8460,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
meta.func_id = func_id;
/* check args */
for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
- err = check_func_arg(env, i, &meta, fn);
+ err = check_func_arg(env, i, &meta, fn, insn_idx);
if (err)
return err;
}
@@ -8222,30 +8485,6 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
regs = cur_regs(env);
- /* This can only be set for PTR_TO_STACK, as CONST_PTR_TO_DYNPTR cannot
- * be reinitialized by any dynptr helper. Hence, mark_stack_slots_dynptr
- * is safe to do directly.
- */
- if (meta.uninit_dynptr_regno) {
- if (regs[meta.uninit_dynptr_regno].type == CONST_PTR_TO_DYNPTR) {
- verbose(env, "verifier internal error: CONST_PTR_TO_DYNPTR cannot be initialized\n");
- return -EFAULT;
- }
- /* we write BPF_DW bits (8 bytes) at a time */
- for (i = 0; i < BPF_DYNPTR_SIZE; i += 8) {
- err = check_mem_access(env, insn_idx, meta.uninit_dynptr_regno,
- i, BPF_DW, BPF_WRITE, -1, false);
- if (err)
- return err;
- }
-
- err = mark_stack_slots_dynptr(env, &regs[meta.uninit_dynptr_regno],
- fn->arg_type[meta.uninit_dynptr_regno - BPF_REG_1],
- insn_idx);
- if (err)
- return err;
- }
-
if (meta.release_regno) {
err = -EINVAL;
/* This can only be set for PTR_TO_STACK, as CONST_PTR_TO_DYNPTR cannot
@@ -8330,43 +8569,62 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
}
break;
case BPF_FUNC_dynptr_data:
- for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
- if (arg_type_is_dynptr(fn->arg_type[i])) {
- struct bpf_reg_state *reg = &regs[BPF_REG_1 + i];
- int id, ref_obj_id;
+ {
+ struct bpf_reg_state *reg;
+ int id, ref_obj_id;
- if (meta.dynptr_id) {
- verbose(env, "verifier internal error: meta.dynptr_id already set\n");
- return -EFAULT;
- }
+ reg = get_dynptr_arg_reg(env, fn, regs);
+ if (!reg)
+ return -EFAULT;
- if (meta.ref_obj_id) {
- verbose(env, "verifier internal error: meta.ref_obj_id already set\n");
- return -EFAULT;
- }
- id = dynptr_id(env, reg);
- if (id < 0) {
- verbose(env, "verifier internal error: failed to obtain dynptr id\n");
- return id;
- }
+ if (meta.dynptr_id) {
+ verbose(env, "verifier internal error: meta.dynptr_id already set\n");
+ return -EFAULT;
+ }
+ if (meta.ref_obj_id) {
+ verbose(env, "verifier internal error: meta.ref_obj_id already set\n");
+ return -EFAULT;
+ }
- ref_obj_id = dynptr_ref_obj_id(env, reg);
- if (ref_obj_id < 0) {
- verbose(env, "verifier internal error: failed to obtain dynptr ref_obj_id\n");
- return ref_obj_id;
- }
+ id = dynptr_id(env, reg);
+ if (id < 0) {
+ verbose(env, "verifier internal error: failed to obtain dynptr id\n");
+ return id;
+ }
- meta.dynptr_id = id;
- meta.ref_obj_id = ref_obj_id;
- break;
- }
+ ref_obj_id = dynptr_ref_obj_id(env, reg);
+ if (ref_obj_id < 0) {
+ verbose(env, "verifier internal error: failed to obtain dynptr ref_obj_id\n");
+ return ref_obj_id;
}
- if (i == MAX_BPF_FUNC_REG_ARGS) {
- verbose(env, "verifier internal error: no dynptr in bpf_dynptr_data()\n");
+
+ meta.dynptr_id = id;
+ meta.ref_obj_id = ref_obj_id;
+
+ break;
+ }
+ case BPF_FUNC_dynptr_write:
+ {
+ enum bpf_dynptr_type dynptr_type;
+ struct bpf_reg_state *reg;
+
+ reg = get_dynptr_arg_reg(env, fn, regs);
+ if (!reg)
return -EFAULT;
- }
+
+ dynptr_type = dynptr_get_type(env, reg);
+ if (dynptr_type == BPF_DYNPTR_TYPE_INVALID)
+ return -EFAULT;
+
+ if (dynptr_type == BPF_DYNPTR_TYPE_SKB)
+ /* this will trigger clear_all_pkt_pointers(), which will
+ * invalidate all dynptr slices associated with the skb
+ */
+ changes_data = true;
+
break;
+ }
case BPF_FUNC_user_ringbuf_drain:
err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
set_user_ringbuf_callback_state);
@@ -8595,36 +8853,6 @@ static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno,
}
}
-struct bpf_kfunc_call_arg_meta {
- /* In parameters */
- struct btf *btf;
- u32 func_id;
- u32 kfunc_flags;
- const struct btf_type *func_proto;
- const char *func_name;
- /* Out parameters */
- u32 ref_obj_id;
- u8 release_regno;
- bool r0_rdonly;
- u32 ret_btf_id;
- u64 r0_size;
- u32 subprogno;
- struct {
- u64 value;
- bool found;
- } arg_constant;
- struct {
- struct btf *btf;
- u32 btf_id;
- } arg_obj_drop;
- struct {
- struct btf_field *field;
- } arg_list_head;
- struct {
- struct btf_field *field;
- } arg_rbtree_root;
-};
-
static bool is_kfunc_acquire(struct bpf_kfunc_call_arg_meta *meta)
{
return meta->kfunc_flags & KF_ACQUIRE;
@@ -8696,6 +8924,19 @@ static bool is_kfunc_arg_mem_size(const struct btf *btf,
return __kfunc_param_match_suffix(btf, arg, "__sz");
}
+static bool is_kfunc_arg_const_mem_size(const struct btf *btf,
+ const struct btf_param *arg,
+ const struct bpf_reg_state *reg)
+{
+ const struct btf_type *t;
+
+ t = btf_type_skip_modifiers(btf, arg->type, NULL);
+ if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
+ return false;
+
+ return __kfunc_param_match_suffix(btf, arg, "__szk");
+}
+
static bool is_kfunc_arg_constant(const struct btf *btf, const struct btf_param *arg)
{
return __kfunc_param_match_suffix(btf, arg, "__k");
@@ -8711,6 +8952,11 @@ static bool is_kfunc_arg_alloc_obj(const struct btf *btf, const struct btf_param
return __kfunc_param_match_suffix(btf, arg, "__alloc");
}
+static bool is_kfunc_arg_uninit(const struct btf *btf, const struct btf_param *arg)
+{
+ return __kfunc_param_match_suffix(btf, arg, "__uninit");
+}
+
static bool is_kfunc_arg_scalar_with_name(const struct btf *btf,
const struct btf_param *arg,
const char *name)
@@ -8877,6 +9123,10 @@ enum special_kfunc_type {
KF_bpf_rbtree_remove,
KF_bpf_rbtree_add,
KF_bpf_rbtree_first,
+ KF_bpf_dynptr_from_skb,
+ KF_bpf_dynptr_from_xdp,
+ KF_bpf_dynptr_slice,
+ KF_bpf_dynptr_slice_rdwr,
};
BTF_SET_START(special_kfunc_set)
@@ -8891,6 +9141,10 @@ BTF_ID(func, bpf_rdonly_cast)
BTF_ID(func, bpf_rbtree_remove)
BTF_ID(func, bpf_rbtree_add)
BTF_ID(func, bpf_rbtree_first)
+BTF_ID(func, bpf_dynptr_from_skb)
+BTF_ID(func, bpf_dynptr_from_xdp)
+BTF_ID(func, bpf_dynptr_slice)
+BTF_ID(func, bpf_dynptr_slice_rdwr)
BTF_SET_END(special_kfunc_set)
BTF_ID_LIST(special_kfunc_list)
@@ -8907,6 +9161,10 @@ BTF_ID(func, bpf_rcu_read_unlock)
BTF_ID(func, bpf_rbtree_remove)
BTF_ID(func, bpf_rbtree_add)
BTF_ID(func, bpf_rbtree_first)
+BTF_ID(func, bpf_dynptr_from_skb)
+BTF_ID(func, bpf_dynptr_from_xdp)
+BTF_ID(func, bpf_dynptr_slice)
+BTF_ID(func, bpf_dynptr_slice_rdwr)
static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta)
{
@@ -8986,7 +9244,10 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
if (is_kfunc_arg_callback(env, meta->btf, &args[argno]))
return KF_ARG_PTR_TO_CALLBACK;
- if (argno + 1 < nargs && is_kfunc_arg_mem_size(meta->btf, &args[argno + 1], &regs[regno + 1]))
+
+ if (argno + 1 < nargs &&
+ (is_kfunc_arg_mem_size(meta->btf, &args[argno + 1], &regs[regno + 1]) ||
+ is_kfunc_arg_const_mem_size(meta->btf, &args[argno + 1], &regs[regno + 1])))
arg_mem_size = true;
/* This is the catch all argument type of register types supported by
@@ -9206,7 +9467,6 @@ static int check_reg_allocation_locked(struct bpf_verifier_env *env, struct bpf_
ptr = reg->map_ptr;
break;
case PTR_TO_BTF_ID | MEM_ALLOC:
- case PTR_TO_BTF_ID | MEM_ALLOC | PTR_TRUSTED:
ptr = reg->btf;
break;
default:
@@ -9455,7 +9715,8 @@ static int process_kf_arg_ptr_to_rbtree_node(struct bpf_verifier_env *env,
&meta->arg_rbtree_root.field);
}
-static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta)
+static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta,
+ int insn_idx)
{
const char *func_name = meta->func_name, *ref_tname;
const struct btf *btf = meta->btf;
@@ -9538,7 +9799,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
return -EINVAL;
}
- if (is_kfunc_trusted_args(meta) &&
+ if ((is_kfunc_trusted_args(meta) || is_kfunc_rcu(meta)) &&
(register_is_null(reg) || type_may_be_null(reg->type))) {
verbose(env, "Possibly NULL pointer passed to trusted arg%d\n", i);
return -EACCES;
@@ -9646,16 +9907,43 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
return ret;
break;
case KF_ARG_PTR_TO_DYNPTR:
+ {
+ enum bpf_arg_type dynptr_arg_type = ARG_PTR_TO_DYNPTR;
+
if (reg->type != PTR_TO_STACK &&
reg->type != CONST_PTR_TO_DYNPTR) {
verbose(env, "arg#%d expected pointer to stack or dynptr_ptr\n", i);
return -EINVAL;
}
- ret = process_dynptr_func(env, regno, ARG_PTR_TO_DYNPTR | MEM_RDONLY, NULL);
+ if (reg->type == CONST_PTR_TO_DYNPTR)
+ dynptr_arg_type |= MEM_RDONLY;
+
+ if (is_kfunc_arg_uninit(btf, &args[i]))
+ dynptr_arg_type |= MEM_UNINIT;
+
+ if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb])
+ dynptr_arg_type |= DYNPTR_TYPE_SKB;
+ else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_xdp])
+ dynptr_arg_type |= DYNPTR_TYPE_XDP;
+
+ ret = process_dynptr_func(env, regno, insn_idx, dynptr_arg_type);
if (ret < 0)
return ret;
+
+ if (!(dynptr_arg_type & MEM_UNINIT)) {
+ int id = dynptr_id(env, reg);
+
+ if (id < 0) {
+ verbose(env, "verifier internal error: failed to obtain dynptr id\n");
+ return id;
+ }
+ meta->initialized_dynptr.id = id;
+ meta->initialized_dynptr.type = dynptr_get_type(env, reg);
+ }
+
break;
+ }
case KF_ARG_PTR_TO_LIST_HEAD:
if (reg->type != PTR_TO_MAP_VALUE &&
reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
@@ -9749,14 +10037,33 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
return ret;
break;
case KF_ARG_PTR_TO_MEM_SIZE:
- ret = check_kfunc_mem_size_reg(env, &regs[regno + 1], regno + 1);
+ {
+ struct bpf_reg_state *size_reg = &regs[regno + 1];
+ const struct btf_param *size_arg = &args[i + 1];
+
+ ret = check_kfunc_mem_size_reg(env, size_reg, regno + 1);
if (ret < 0) {
verbose(env, "arg#%d arg#%d memory, len pair leads to invalid memory access\n", i, i + 1);
return ret;
}
- /* Skip next '__sz' argument */
+
+ if (is_kfunc_arg_const_mem_size(meta->btf, size_arg, size_reg)) {
+ if (meta->arg_constant.found) {
+ verbose(env, "verifier internal error: only one constant argument permitted\n");
+ return -EFAULT;
+ }
+ if (!tnum_is_const(size_reg->var_off)) {
+ verbose(env, "R%d must be a known constant\n", regno + 1);
+ return -EINVAL;
+ }
+ meta->arg_constant.found = true;
+ meta->arg_constant.value = size_reg->var_off.value;
+ }
+
+ /* Skip next '__sz' or '__szk' argument */
i++;
break;
+ }
case KF_ARG_PTR_TO_CALLBACK:
meta->subprogno = reg->subprogno;
break;
@@ -9828,10 +10135,6 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta);
rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta);
- if ((rcu_lock || rcu_unlock) && !env->rcu_tag_supported) {
- verbose(env, "no vmlinux btf rcu tag support for kfunc %s\n", func_name);
- return -EACCES;
- }
if (env->cur_state->active_rcu_lock) {
struct bpf_func_state *state;
@@ -9860,7 +10163,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
}
/* Check the arguments */
- err = check_kfunc_args(env, &meta);
+ err = check_kfunc_args(env, &meta, insn_idx);
if (err < 0)
return err;
/* In case of release function, we get register number of refcounted
@@ -9991,6 +10294,42 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
regs[BPF_REG_0].btf = desc_btf;
regs[BPF_REG_0].btf_id = meta.arg_constant.value;
+ } else if (meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice] ||
+ meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice_rdwr]) {
+ enum bpf_type_flag type_flag = get_dynptr_type_flag(meta.initialized_dynptr.type);
+
+ mark_reg_known_zero(env, regs, BPF_REG_0);
+
+ if (!meta.arg_constant.found) {
+ verbose(env, "verifier internal error: bpf_dynptr_slice(_rdwr) no constant size\n");
+ return -EFAULT;
+ }
+
+ regs[BPF_REG_0].mem_size = meta.arg_constant.value;
+
+ /* PTR_MAYBE_NULL will be added when is_kfunc_ret_null is checked */
+ regs[BPF_REG_0].type = PTR_TO_MEM | type_flag;
+
+ if (meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice]) {
+ regs[BPF_REG_0].type |= MEM_RDONLY;
+ } else {
+ /* this will set env->seen_direct_write to true */
+ if (!may_access_direct_pkt_data(env, NULL, BPF_WRITE)) {
+ verbose(env, "the prog does not allow writes to packet data\n");
+ return -EINVAL;
+ }
+ }
+
+ if (!meta.initialized_dynptr.id) {
+ verbose(env, "verifier internal error: no dynptr id\n");
+ return -EFAULT;
+ }
+ regs[BPF_REG_0].dynptr_id = meta.initialized_dynptr.id;
+
+ /* we don't need to set BPF_REG_0's ref obj id
+ * because packet slices are not refcounted (see
+ * dynptr_type_refcounted)
+ */
} else {
verbose(env, "kernel function %s unhandled dynamic return type\n",
meta.func_name);
@@ -9998,6 +10337,14 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
}
} else if (!__btf_type_is_struct(ptr_type)) {
if (!meta.r0_size) {
+ __u32 sz;
+
+ if (!IS_ERR(btf_resolve_size(desc_btf, ptr_type, &sz))) {
+ meta.r0_size = sz;
+ meta.r0_rdonly = true;
+ }
+ }
+ if (!meta.r0_size) {
ptr_type_name = btf_name_by_offset(desc_btf,
ptr_type->name_off);
verbose(env,
@@ -13152,44 +13499,43 @@ static int visit_func_call_insn(int t, struct bpf_insn *insns,
*/
static int visit_insn(int t, struct bpf_verifier_env *env)
{
- struct bpf_insn *insns = env->prog->insnsi;
+ struct bpf_insn *insns = env->prog->insnsi, *insn = &insns[t];
int ret;
- if (bpf_pseudo_func(insns + t))
+ if (bpf_pseudo_func(insn))
return visit_func_call_insn(t, insns, env, true);
/* All non-branch instructions have a single fall-through edge. */
- if (BPF_CLASS(insns[t].code) != BPF_JMP &&
- BPF_CLASS(insns[t].code) != BPF_JMP32)
+ if (BPF_CLASS(insn->code) != BPF_JMP &&
+ BPF_CLASS(insn->code) != BPF_JMP32)
return push_insn(t, t + 1, FALLTHROUGH, env, false);
- switch (BPF_OP(insns[t].code)) {
+ switch (BPF_OP(insn->code)) {
case BPF_EXIT:
return DONE_EXPLORING;
case BPF_CALL:
- if (insns[t].imm == BPF_FUNC_timer_set_callback)
+ if (insn->src_reg == 0 && insn->imm == BPF_FUNC_timer_set_callback)
/* Mark this call insn as a prune point to trigger
* is_state_visited() check before call itself is
* processed by __check_func_call(). Otherwise new
* async state will be pushed for further exploration.
*/
mark_prune_point(env, t);
- return visit_func_call_insn(t, insns, env,
- insns[t].src_reg == BPF_PSEUDO_CALL);
+ return visit_func_call_insn(t, insns, env, insn->src_reg == BPF_PSEUDO_CALL);
case BPF_JA:
- if (BPF_SRC(insns[t].code) != BPF_K)
+ if (BPF_SRC(insn->code) != BPF_K)
return -EINVAL;
/* unconditional jump with single edge */
- ret = push_insn(t, t + insns[t].off + 1, FALLTHROUGH, env,
+ ret = push_insn(t, t + insn->off + 1, FALLTHROUGH, env,
true);
if (ret)
return ret;
- mark_prune_point(env, t + insns[t].off + 1);
- mark_jmp_point(env, t + insns[t].off + 1);
+ mark_prune_point(env, t + insn->off + 1);
+ mark_jmp_point(env, t + insn->off + 1);
return ret;
@@ -13201,7 +13547,7 @@ static int visit_insn(int t, struct bpf_verifier_env *env)
if (ret)
return ret;
- return push_insn(t, t + insns[t].off + 1, BRANCH, env, true);
+ return push_insn(t, t + insn->off + 1, BRANCH, env, true);
}
}
@@ -13877,13 +14223,17 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
tnum_in(rold->var_off, rcur->var_off);
case PTR_TO_MAP_KEY:
case PTR_TO_MAP_VALUE:
+ case PTR_TO_MEM:
+ case PTR_TO_BUF:
+ case PTR_TO_TP_BUFFER:
/* If the new min/max/var_off satisfy the old ones and
* everything else matches, we are OK.
*/
return memcmp(rold, rcur, offsetof(struct bpf_reg_state, var_off)) == 0 &&
range_within(rold, rcur) &&
tnum_in(rold->var_off, rcur->var_off) &&
- check_ids(rold->id, rcur->id, idmap);
+ check_ids(rold->id, rcur->id, idmap) &&
+ check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap);
case PTR_TO_PACKET_META:
case PTR_TO_PACKET:
/* We must have at least as much range as the old ptr
@@ -13936,6 +14286,10 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
continue;
+ if (env->allow_uninit_stack &&
+ old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC)
+ continue;
+
/* explored stack has more populated slots than current stack
* and these slots were used
*/
@@ -14311,7 +14665,8 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
* This threshold shouldn't be too high either, since states
* at the end of the loop are likely to be useful in pruning.
*/
- if (env->jmps_processed - env->prev_jmps_processed < 20 &&
+ if (!env->test_state_freq &&
+ env->jmps_processed - env->prev_jmps_processed < 20 &&
env->insn_processed - env->prev_insn_processed < 100)
add_new_state = false;
goto miss;
@@ -14500,6 +14855,44 @@ static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
!reg_type_mismatch_ok(prev));
}
+static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type type,
+ bool allow_trust_missmatch)
+{
+ enum bpf_reg_type *prev_type = &env->insn_aux_data[env->insn_idx].ptr_type;
+
+ if (*prev_type == NOT_INIT) {
+ /* Saw a valid insn
+ * dst_reg = *(u32 *)(src_reg + off)
+ * save type to validate intersecting paths
+ */
+ *prev_type = type;
+ } else if (reg_type_mismatch(type, *prev_type)) {
+ /* Abuser program is trying to use the same insn
+ * dst_reg = *(u32*) (src_reg + off)
+ * with different pointer types:
+ * src_reg == ctx in one branch and
+ * src_reg == stack|map in some other branch.
+ * Reject it.
+ */
+ if (allow_trust_missmatch &&
+ base_type(type) == PTR_TO_BTF_ID &&
+ base_type(*prev_type) == PTR_TO_BTF_ID) {
+ /*
+ * Have to support a use case when one path through
+ * the program yields TRUSTED pointer while another
+ * is UNTRUSTED. Fallback to UNTRUSTED to generate
+ * BPF_PROBE_MEM.
+ */
+ *prev_type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
+ } else {
+ verbose(env, "same insn cannot be used with different pointers\n");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
static int do_check(struct bpf_verifier_env *env)
{
bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
@@ -14609,7 +15002,7 @@ static int do_check(struct bpf_verifier_env *env)
return err;
} else if (class == BPF_LDX) {
- enum bpf_reg_type *prev_src_type, src_reg_type;
+ enum bpf_reg_type src_reg_type;
/* check for reserved fields is already done */
@@ -14633,29 +15026,11 @@ static int do_check(struct bpf_verifier_env *env)
if (err)
return err;
- prev_src_type = &env->insn_aux_data[env->insn_idx].ptr_type;
-
- if (*prev_src_type == NOT_INIT) {
- /* saw a valid insn
- * dst_reg = *(u32 *)(src_reg + off)
- * save type to validate intersecting paths
- */
- *prev_src_type = src_reg_type;
-
- } else if (reg_type_mismatch(src_reg_type, *prev_src_type)) {
- /* ABuser program is trying to use the same insn
- * dst_reg = *(u32*) (src_reg + off)
- * with different pointer types:
- * src_reg == ctx in one branch and
- * src_reg == stack|map in some other branch.
- * Reject it.
- */
- verbose(env, "same insn cannot be used with different pointers\n");
- return -EINVAL;
- }
-
+ err = save_aux_ptr_type(env, src_reg_type, true);
+ if (err)
+ return err;
} else if (class == BPF_STX) {
- enum bpf_reg_type *prev_dst_type, dst_reg_type;
+ enum bpf_reg_type dst_reg_type;
if (BPF_MODE(insn->code) == BPF_ATOMIC) {
err = check_atomic(env, env->insn_idx, insn);
@@ -14688,16 +15063,12 @@ static int do_check(struct bpf_verifier_env *env)
if (err)
return err;
- prev_dst_type = &env->insn_aux_data[env->insn_idx].ptr_type;
-
- if (*prev_dst_type == NOT_INIT) {
- *prev_dst_type = dst_reg_type;
- } else if (reg_type_mismatch(dst_reg_type, *prev_dst_type)) {
- verbose(env, "same insn cannot be used with different pointers\n");
- return -EINVAL;
- }
-
+ err = save_aux_ptr_type(env, dst_reg_type, false);
+ if (err)
+ return err;
} else if (class == BPF_ST) {
+ enum bpf_reg_type dst_reg_type;
+
if (BPF_MODE(insn->code) != BPF_MEM ||
insn->src_reg != BPF_REG_0) {
verbose(env, "BPF_ST uses reserved fields\n");
@@ -14708,12 +15079,7 @@ static int do_check(struct bpf_verifier_env *env)
if (err)
return err;
- if (is_ctx_reg(env, insn->dst_reg)) {
- verbose(env, "BPF_ST stores into R%d %s is not allowed\n",
- insn->dst_reg,
- reg_type_str(env, reg_state(env, insn->dst_reg)->type));
- return -EACCES;
- }
+ dst_reg_type = regs[insn->dst_reg].type;
/* check that memory (dst_reg + off) is writeable */
err = check_mem_access(env, env->insn_idx, insn->dst_reg,
@@ -14722,6 +15088,9 @@ static int do_check(struct bpf_verifier_env *env)
if (err)
return err;
+ err = save_aux_ptr_type(env, dst_reg_type, false);
+ if (err)
+ return err;
} else if (class == BPF_JMP || class == BPF_JMP32) {
u8 opcode = BPF_OP(insn->code);
@@ -14756,6 +15125,8 @@ static int do_check(struct bpf_verifier_env *env)
err = check_helper_call(env, insn, &env->insn_idx);
if (err)
return err;
+
+ mark_reg_scratched(env, BPF_REG_0);
} else if (opcode == BPF_JA) {
if (BPF_SRC(insn->code) != BPF_K ||
insn->imm != 0 ||
@@ -15830,14 +16201,12 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
for (i = 0; i < insn_cnt; i++, insn++) {
bpf_convert_ctx_access_t convert_ctx_access;
- bool ctx_access;
if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) {
type = BPF_READ;
- ctx_access = true;
} else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
@@ -15847,7 +16216,6 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
insn->code == (BPF_ST | BPF_MEM | BPF_W) ||
insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
type = BPF_WRITE;
- ctx_access = BPF_CLASS(insn->code) == BPF_STX;
} else {
continue;
}
@@ -15870,9 +16238,6 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
continue;
}
- if (!ctx_access)
- continue;
-
switch ((int)env->insn_aux_data[i + delta].ptr_type) {
case PTR_TO_CTX:
if (!ops->convert_ctx_access)
@@ -16321,6 +16686,17 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
*cnt = 1;
+ } else if (desc->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
+ bool seen_direct_write = env->seen_direct_write;
+ bool is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE);
+
+ if (is_rdonly)
+ insn->imm = BPF_CALL_IMM(bpf_dynptr_from_skb_rdonly);
+
+ /* restore env->seen_direct_write to its original value, since
+ * may_access_direct_pkt_data mutates it
+ */
+ env->seen_direct_write = seen_direct_write;
}
return 0;
}
@@ -17712,8 +18088,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr)
env->bypass_spec_v1 = bpf_bypass_spec_v1();
env->bypass_spec_v4 = bpf_bypass_spec_v4();
env->bpf_capable = bpf_capable();
- env->rcu_tag_supported = btf_vmlinux &&
- btf_find_by_name_kind(btf_vmlinux, "rcu", BTF_KIND_TYPE_TAG) > 0;
if (is_priv)
env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index e8da032bb6fc..bcf91bc7bf71 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1453,10 +1453,6 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
NULL : &bpf_probe_read_compat_str_proto;
#endif
#ifdef CONFIG_CGROUPS
- case BPF_FUNC_get_current_cgroup_id:
- return &bpf_get_current_cgroup_id_proto;
- case BPF_FUNC_get_current_ancestor_cgroup_id:
- return &bpf_get_current_ancestor_cgroup_id_proto;
case BPF_FUNC_cgrp_storage_get:
return &bpf_cgrp_storage_get_proto;
case BPF_FUNC_cgrp_storage_delete:
diff --git a/lib/packing.c b/lib/packing.c
index a96169237ae6..3f656167c17e 100644
--- a/lib/packing.c
+++ b/lib/packing.c
@@ -198,5 +198,4 @@ int packing(void *pbuf, u64 *uval, int startbit, int endbit, size_t pbuflen,
}
EXPORT_SYMBOL(packing);
-MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("Generic bitfield packing and unpacking");
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 296d0145932f..5920544e93e8 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -365,7 +365,7 @@ static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
switch (cmd) {
case SIOCSHWTSTAMP:
- if (!net_eq(dev_net(dev), &init_net))
+ if (!net_eq(dev_net(dev), dev_net(real_dev)))
break;
fallthrough;
case SIOCGMIIPHY:
diff --git a/net/atm/signaling.c b/net/atm/signaling.c
index 5de06ab8ed75..e70ae2c113f9 100644
--- a/net/atm/signaling.c
+++ b/net/atm/signaling.c
@@ -125,7 +125,7 @@ as_indicate_complete:
break;
case as_addparty:
case as_dropparty:
- sk->sk_err_soft = -msg->reply;
+ WRITE_ONCE(sk->sk_err_soft, -msg->reply);
/* < 0 failure, otherwise ep_ref */
clear_bit(ATM_VF_WAITING, &vcc->flags);
break;
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index f81b24320a36..d350f31c7a3d 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -744,6 +744,7 @@ __bpf_kfunc void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len)
__bpf_kfunc void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p)
{
+ /* p != NULL, but p->cnt could be 0 */
}
__bpf_kfunc void bpf_kfunc_call_test_destructive(void)
@@ -791,7 +792,7 @@ BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail3)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_pass1)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail1)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail2)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS | KF_RCU)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_destructive, KF_DESTRUCTIVE)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_static_unused_arg)
BTF_SET8_END(test_sk_check_kfunc_ids)
diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c
index e5e48c6e35d7..b45c00c01dea 100644
--- a/net/bridge/br_arp_nd_proxy.c
+++ b/net/bridge/br_arp_nd_proxy.c
@@ -192,7 +192,7 @@ void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br,
if (n) {
struct net_bridge_fdb_entry *f;
- if (!(n->nud_state & NUD_VALID)) {
+ if (!(READ_ONCE(n->nud_state) & NUD_VALID)) {
neigh_release(n);
return;
}
@@ -452,7 +452,7 @@ void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br,
if (n) {
struct net_bridge_fdb_entry *f;
- if (!(n->nud_state & NUD_VALID)) {
+ if (!(READ_ONCE(n->nud_state) & NUD_VALID)) {
neigh_release(n);
return;
}
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index b82906fc999a..df47c876230e 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -468,6 +468,9 @@ static const struct net_device_ops br_netdev_ops = {
.ndo_fdb_del_bulk = br_fdb_delete_bulk,
.ndo_fdb_dump = br_fdb_dump,
.ndo_fdb_get = br_fdb_get,
+ .ndo_mdb_add = br_mdb_add,
+ .ndo_mdb_del = br_mdb_del,
+ .ndo_mdb_dump = br_mdb_dump,
.ndo_bridge_getlink = br_getlink,
.ndo_bridge_setlink = br_setlink,
.ndo_bridge_dellink = br_dellink,
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 25c48d81a597..7305f5f8215c 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -380,82 +380,37 @@ out:
return err;
}
-static int br_mdb_valid_dump_req(const struct nlmsghdr *nlh,
- struct netlink_ext_ack *extack)
+int br_mdb_dump(struct net_device *dev, struct sk_buff *skb,
+ struct netlink_callback *cb)
{
+ struct net_bridge *br = netdev_priv(dev);
struct br_port_msg *bpm;
+ struct nlmsghdr *nlh;
+ int err;
- if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*bpm))) {
- NL_SET_ERR_MSG_MOD(extack, "Invalid header for mdb dump request");
- return -EINVAL;
- }
+ nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, RTM_GETMDB, sizeof(*bpm),
+ NLM_F_MULTI);
+ if (!nlh)
+ return -EMSGSIZE;
bpm = nlmsg_data(nlh);
- if (bpm->ifindex) {
- NL_SET_ERR_MSG_MOD(extack, "Filtering by device index is not supported for mdb dump request");
- return -EINVAL;
- }
- if (nlmsg_attrlen(nlh, sizeof(*bpm))) {
- NL_SET_ERR_MSG(extack, "Invalid data after header in mdb dump request");
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int br_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
-{
- struct net_device *dev;
- struct net *net = sock_net(skb->sk);
- struct nlmsghdr *nlh = NULL;
- int idx = 0, s_idx;
-
- if (cb->strict_check) {
- int err = br_mdb_valid_dump_req(cb->nlh, cb->extack);
-
- if (err < 0)
- return err;
- }
-
- s_idx = cb->args[0];
+ memset(bpm, 0, sizeof(*bpm));
+ bpm->ifindex = dev->ifindex;
rcu_read_lock();
- for_each_netdev_rcu(net, dev) {
- if (netif_is_bridge_master(dev)) {
- struct net_bridge *br = netdev_priv(dev);
- struct br_port_msg *bpm;
-
- if (idx < s_idx)
- goto skip;
-
- nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, RTM_GETMDB,
- sizeof(*bpm), NLM_F_MULTI);
- if (nlh == NULL)
- break;
-
- bpm = nlmsg_data(nlh);
- memset(bpm, 0, sizeof(*bpm));
- bpm->ifindex = dev->ifindex;
- if (br_mdb_fill_info(skb, cb, dev) < 0)
- goto out;
- if (br_rports_fill_info(skb, &br->multicast_ctx) < 0)
- goto out;
-
- cb->args[1] = 0;
- nlmsg_end(skb, nlh);
- skip:
- idx++;
- }
- }
+ err = br_mdb_fill_info(skb, cb, dev);
+ if (err)
+ goto out;
+ err = br_rports_fill_info(skb, &br->multicast_ctx);
+ if (err)
+ goto out;
out:
- if (nlh)
- nlmsg_end(skb, nlh);
rcu_read_unlock();
- cb->args[0] = idx;
- return skb->len;
+ nlmsg_end(skb, nlh);
+ return err;
}
static int nlmsg_populate_mdb_fill(struct sk_buff *skb,
@@ -683,60 +638,6 @@ static const struct nla_policy br_mdbe_attrs_pol[MDBE_ATTR_MAX + 1] = {
[MDBE_ATTR_RTPROT] = NLA_POLICY_MIN(NLA_U8, RTPROT_STATIC),
};
-static int validate_mdb_entry(const struct nlattr *attr,
- struct netlink_ext_ack *extack)
-{
- struct br_mdb_entry *entry = nla_data(attr);
-
- if (nla_len(attr) != sizeof(struct br_mdb_entry)) {
- NL_SET_ERR_MSG_MOD(extack, "Invalid MDBA_SET_ENTRY attribute length");
- return -EINVAL;
- }
-
- if (entry->ifindex == 0) {
- NL_SET_ERR_MSG_MOD(extack, "Zero entry ifindex is not allowed");
- return -EINVAL;
- }
-
- if (entry->addr.proto == htons(ETH_P_IP)) {
- if (!ipv4_is_multicast(entry->addr.u.ip4)) {
- NL_SET_ERR_MSG_MOD(extack, "IPv4 entry group address is not multicast");
- return -EINVAL;
- }
- if (ipv4_is_local_multicast(entry->addr.u.ip4)) {
- NL_SET_ERR_MSG_MOD(extack, "IPv4 entry group address is local multicast");
- return -EINVAL;
- }
-#if IS_ENABLED(CONFIG_IPV6)
- } else if (entry->addr.proto == htons(ETH_P_IPV6)) {
- if (ipv6_addr_is_ll_all_nodes(&entry->addr.u.ip6)) {
- NL_SET_ERR_MSG_MOD(extack, "IPv6 entry group address is link-local all nodes");
- return -EINVAL;
- }
-#endif
- } else if (entry->addr.proto == 0) {
- /* L2 mdb */
- if (!is_multicast_ether_addr(entry->addr.u.mac_addr)) {
- NL_SET_ERR_MSG_MOD(extack, "L2 entry group is not multicast");
- return -EINVAL;
- }
- } else {
- NL_SET_ERR_MSG_MOD(extack, "Unknown entry protocol");
- return -EINVAL;
- }
-
- if (entry->state != MDB_PERMANENT && entry->state != MDB_TEMPORARY) {
- NL_SET_ERR_MSG_MOD(extack, "Unknown entry state");
- return -EINVAL;
- }
- if (entry->vid >= VLAN_VID_MASK) {
- NL_SET_ERR_MSG_MOD(extack, "Invalid entry VLAN id");
- return -EINVAL;
- }
-
- return 0;
-}
-
static bool is_valid_mdb_source(struct nlattr *attr, __be16 proto,
struct netlink_ext_ack *extack)
{
@@ -1299,49 +1200,16 @@ static int br_mdb_config_attrs_init(struct nlattr *set_attrs,
return 0;
}
-static const struct nla_policy mdba_policy[MDBA_SET_ENTRY_MAX + 1] = {
- [MDBA_SET_ENTRY_UNSPEC] = { .strict_start_type = MDBA_SET_ENTRY_ATTRS + 1 },
- [MDBA_SET_ENTRY] = NLA_POLICY_VALIDATE_FN(NLA_BINARY,
- validate_mdb_entry,
- sizeof(struct br_mdb_entry)),
- [MDBA_SET_ENTRY_ATTRS] = { .type = NLA_NESTED },
-};
-
-static int br_mdb_config_init(struct net *net, const struct nlmsghdr *nlh,
- struct br_mdb_config *cfg,
+static int br_mdb_config_init(struct br_mdb_config *cfg, struct net_device *dev,
+ struct nlattr *tb[], u16 nlmsg_flags,
struct netlink_ext_ack *extack)
{
- struct nlattr *tb[MDBA_SET_ENTRY_MAX + 1];
- struct br_port_msg *bpm;
- struct net_device *dev;
- int err;
-
- err = nlmsg_parse_deprecated(nlh, sizeof(*bpm), tb,
- MDBA_SET_ENTRY_MAX, mdba_policy, extack);
- if (err)
- return err;
+ struct net *net = dev_net(dev);
memset(cfg, 0, sizeof(*cfg));
cfg->filter_mode = MCAST_EXCLUDE;
cfg->rt_protocol = RTPROT_STATIC;
- cfg->nlflags = nlh->nlmsg_flags;
-
- bpm = nlmsg_data(nlh);
- if (!bpm->ifindex) {
- NL_SET_ERR_MSG_MOD(extack, "Invalid bridge ifindex");
- return -EINVAL;
- }
-
- dev = __dev_get_by_index(net, bpm->ifindex);
- if (!dev) {
- NL_SET_ERR_MSG_MOD(extack, "Bridge device doesn't exist");
- return -ENODEV;
- }
-
- if (!netif_is_bridge_master(dev)) {
- NL_SET_ERR_MSG_MOD(extack, "Device is not a bridge");
- return -EOPNOTSUPP;
- }
+ cfg->nlflags = nlmsg_flags;
cfg->br = netdev_priv(dev);
@@ -1355,11 +1223,6 @@ static int br_mdb_config_init(struct net *net, const struct nlmsghdr *nlh,
return -EINVAL;
}
- if (NL_REQ_ATTR_CHECK(extack, NULL, tb, MDBA_SET_ENTRY)) {
- NL_SET_ERR_MSG_MOD(extack, "Missing MDBA_SET_ENTRY attribute");
- return -EINVAL;
- }
-
cfg->entry = nla_data(tb[MDBA_SET_ENTRY]);
if (cfg->entry->ifindex != cfg->br->dev->ifindex) {
@@ -1383,6 +1246,12 @@ static int br_mdb_config_init(struct net *net, const struct nlmsghdr *nlh,
}
}
+ if (cfg->entry->addr.proto == htons(ETH_P_IP) &&
+ ipv4_is_zeronet(cfg->entry->addr.u.ip4)) {
+ NL_SET_ERR_MSG_MOD(extack, "IPv4 entry group address 0.0.0.0 is not allowed");
+ return -EINVAL;
+ }
+
if (tb[MDBA_SET_ENTRY_ATTRS])
return br_mdb_config_attrs_init(tb[MDBA_SET_ENTRY_ATTRS], cfg,
extack);
@@ -1397,16 +1266,15 @@ static void br_mdb_config_fini(struct br_mdb_config *cfg)
br_mdb_config_src_list_fini(cfg);
}
-static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct netlink_ext_ack *extack)
+int br_mdb_add(struct net_device *dev, struct nlattr *tb[], u16 nlmsg_flags,
+ struct netlink_ext_ack *extack)
{
- struct net *net = sock_net(skb->sk);
struct net_bridge_vlan_group *vg;
struct net_bridge_vlan *v;
struct br_mdb_config cfg;
int err;
- err = br_mdb_config_init(net, nlh, &cfg, extack);
+ err = br_mdb_config_init(&cfg, dev, tb, nlmsg_flags, extack);
if (err)
return err;
@@ -1500,16 +1368,15 @@ unlock:
return err;
}
-static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct netlink_ext_ack *extack)
+int br_mdb_del(struct net_device *dev, struct nlattr *tb[],
+ struct netlink_ext_ack *extack)
{
- struct net *net = sock_net(skb->sk);
struct net_bridge_vlan_group *vg;
struct net_bridge_vlan *v;
struct br_mdb_config cfg;
int err;
- err = br_mdb_config_init(net, nlh, &cfg, extack);
+ err = br_mdb_config_init(&cfg, dev, tb, 0, extack);
if (err)
return err;
@@ -1534,17 +1401,3 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
br_mdb_config_fini(&cfg);
return err;
}
-
-void br_mdb_init(void)
-{
- rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_GETMDB, NULL, br_mdb_dump, 0);
- rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_NEWMDB, br_mdb_add, NULL, 0);
- rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_DELMDB, br_mdb_del, NULL, 0);
-}
-
-void br_mdb_uninit(void)
-{
- rtnl_unregister(PF_BRIDGE, RTM_GETMDB);
- rtnl_unregister(PF_BRIDGE, RTM_NEWMDB);
- rtnl_unregister(PF_BRIDGE, RTM_DELMDB);
-}
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 638a4d5359db..3e3065bc0465 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -277,7 +277,8 @@ int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_
struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
int ret;
- if ((neigh->nud_state & NUD_CONNECTED) && neigh->hh.hh_len) {
+ if ((READ_ONCE(neigh->nud_state) & NUD_CONNECTED) &&
+ READ_ONCE(neigh->hh.hh_len)) {
neigh_hh_bridge(&neigh->hh, skb);
skb->dev = nf_bridge->physindev;
ret = br_handle_frame_finish(net, sk, skb);
diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c
index 6b07f30675bb..550039dfc31a 100644
--- a/net/bridge/br_netfilter_ipv6.c
+++ b/net/bridge/br_netfilter_ipv6.c
@@ -40,62 +40,6 @@
#include <linux/sysctl.h>
#endif
-/* We only check the length. A bridge shouldn't do any hop-by-hop stuff
- * anyway
- */
-static int br_nf_check_hbh_len(struct sk_buff *skb)
-{
- unsigned char *raw = (u8 *)(ipv6_hdr(skb) + 1);
- u32 pkt_len;
- const unsigned char *nh = skb_network_header(skb);
- int off = raw - nh;
- int len = (raw[1] + 1) << 3;
-
- if ((raw + len) - skb->data > skb_headlen(skb))
- goto bad;
-
- off += 2;
- len -= 2;
-
- while (len > 0) {
- int optlen = nh[off + 1] + 2;
-
- switch (nh[off]) {
- case IPV6_TLV_PAD1:
- optlen = 1;
- break;
-
- case IPV6_TLV_PADN:
- break;
-
- case IPV6_TLV_JUMBO:
- if (nh[off + 1] != 4 || (off & 3) != 2)
- goto bad;
- pkt_len = ntohl(*(__be32 *)(nh + off + 2));
- if (pkt_len <= IPV6_MAXPLEN ||
- ipv6_hdr(skb)->payload_len)
- goto bad;
- if (pkt_len > skb->len - sizeof(struct ipv6hdr))
- goto bad;
- if (pskb_trim_rcsum(skb,
- pkt_len + sizeof(struct ipv6hdr)))
- goto bad;
- nh = skb_network_header(skb);
- break;
- default:
- if (optlen > len)
- goto bad;
- break;
- }
- off += optlen;
- len -= optlen;
- }
- if (len == 0)
- return 0;
-bad:
- return -1;
-}
-
int br_validate_ipv6(struct net *net, struct sk_buff *skb)
{
const struct ipv6hdr *hdr;
@@ -115,22 +59,19 @@ int br_validate_ipv6(struct net *net, struct sk_buff *skb)
goto inhdr_error;
pkt_len = ntohs(hdr->payload_len);
+ if (hdr->nexthdr == NEXTHDR_HOP && nf_ip6_check_hbh_len(skb, &pkt_len))
+ goto drop;
- if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) {
- if (pkt_len + ip6h_len > skb->len) {
- __IP6_INC_STATS(net, idev,
- IPSTATS_MIB_INTRUNCATEDPKTS);
- goto drop;
- }
- if (pskb_trim_rcsum(skb, pkt_len + ip6h_len)) {
- __IP6_INC_STATS(net, idev,
- IPSTATS_MIB_INDISCARDS);
- goto drop;
- }
- hdr = ipv6_hdr(skb);
+ if (pkt_len + ip6h_len > skb->len) {
+ __IP6_INC_STATS(net, idev,
+ IPSTATS_MIB_INTRUNCATEDPKTS);
+ goto drop;
}
- if (hdr->nexthdr == NEXTHDR_HOP && br_nf_check_hbh_len(skb))
+ if (pskb_trim_rcsum(skb, pkt_len + ip6h_len)) {
+ __IP6_INC_STATS(net, idev,
+ IPSTATS_MIB_INDISCARDS);
goto drop;
+ }
memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
/* No IP options in IPv6 header; however it should be
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 9173e52b89e2..fefb1c0e248b 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -1886,7 +1886,6 @@ int __init br_netlink_init(void)
{
int err;
- br_mdb_init();
br_vlan_rtnl_init();
rtnl_af_register(&br_af_ops);
@@ -1898,13 +1897,11 @@ int __init br_netlink_init(void)
out_af:
rtnl_af_unregister(&br_af_ops);
- br_mdb_uninit();
return err;
}
void br_netlink_fini(void)
{
- br_mdb_uninit();
br_vlan_rtnl_uninit();
rtnl_af_unregister(&br_af_ops);
rtnl_link_unregister(&br_link_ops);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index cef5f6ea850c..7264fd40f82f 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -981,8 +981,12 @@ void br_multicast_get_stats(const struct net_bridge *br,
u32 br_multicast_ngroups_get(const struct net_bridge_mcast_port *pmctx);
void br_multicast_ngroups_set_max(struct net_bridge_mcast_port *pmctx, u32 max);
u32 br_multicast_ngroups_get_max(const struct net_bridge_mcast_port *pmctx);
-void br_mdb_init(void);
-void br_mdb_uninit(void);
+int br_mdb_add(struct net_device *dev, struct nlattr *tb[], u16 nlmsg_flags,
+ struct netlink_ext_ack *extack);
+int br_mdb_del(struct net_device *dev, struct nlattr *tb[],
+ struct netlink_ext_ack *extack);
+int br_mdb_dump(struct net_device *dev, struct sk_buff *skb,
+ struct netlink_callback *cb);
void br_multicast_host_join(const struct net_bridge_mcast *brmctx,
struct net_bridge_mdb_entry *mp, bool notify);
void br_multicast_host_leave(struct net_bridge_mdb_entry *mp, bool notify);
@@ -1374,12 +1378,22 @@ static inline bool br_multicast_querier_exists(struct net_bridge_mcast *brmctx,
return false;
}
-static inline void br_mdb_init(void)
+static inline int br_mdb_add(struct net_device *dev, struct nlattr *tb[],
+ u16 nlmsg_flags, struct netlink_ext_ack *extack)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int br_mdb_del(struct net_device *dev, struct nlattr *tb[],
+ struct netlink_ext_ack *extack)
{
+ return -EOPNOTSUPP;
}
-static inline void br_mdb_uninit(void)
+static inline int br_mdb_dump(struct net_device *dev, struct sk_buff *skb,
+ struct netlink_callback *cb)
{
+ return 0;
}
static inline int br_mdb_hash_init(struct net_bridge *br)
diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c
index c3ecd77e25cb..bd4d1b4d745f 100644
--- a/net/bridge/netfilter/nft_meta_bridge.c
+++ b/net/bridge/netfilter/nft_meta_bridge.c
@@ -8,6 +8,9 @@
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nft_meta.h>
#include <linux/if_bridge.h>
+#include <uapi/linux/netfilter_bridge.h> /* NF_BR_PRE_ROUTING */
+
+#include "../br_private.h"
static const struct net_device *
nft_meta_get_bridge(const struct net_device *dev)
@@ -102,6 +105,50 @@ static const struct nft_expr_ops nft_meta_bridge_get_ops = {
.reduce = nft_meta_get_reduce,
};
+static void nft_meta_bridge_set_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_meta *meta = nft_expr_priv(expr);
+ u32 *sreg = &regs->data[meta->sreg];
+ struct sk_buff *skb = pkt->skb;
+ u8 value8;
+
+ switch (meta->key) {
+ case NFT_META_BRI_BROUTE:
+ value8 = nft_reg_load8(sreg);
+ BR_INPUT_SKB_CB(skb)->br_netfilter_broute = !!value8;
+ break;
+ default:
+ nft_meta_set_eval(expr, regs, pkt);
+ }
+}
+
+static int nft_meta_bridge_set_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_meta *priv = nft_expr_priv(expr);
+ unsigned int len;
+ int err;
+
+ priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
+ switch (priv->key) {
+ case NFT_META_BRI_BROUTE:
+ len = sizeof(u8);
+ break;
+ default:
+ return nft_meta_set_init(ctx, expr, tb);
+ }
+
+ priv->len = len;
+ err = nft_parse_register_load(tb[NFTA_META_SREG], &priv->sreg, len);
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
static bool nft_meta_bridge_set_reduce(struct nft_regs_track *track,
const struct nft_expr *expr)
{
@@ -120,15 +167,33 @@ static bool nft_meta_bridge_set_reduce(struct nft_regs_track *track,
return false;
}
+static int nft_meta_bridge_set_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **data)
+{
+ struct nft_meta *priv = nft_expr_priv(expr);
+ unsigned int hooks;
+
+ switch (priv->key) {
+ case NFT_META_BRI_BROUTE:
+ hooks = 1 << NF_BR_PRE_ROUTING;
+ break;
+ default:
+ return nft_meta_set_validate(ctx, expr, data);
+ }
+
+ return nft_chain_validate_hooks(ctx->chain, hooks);
+}
+
static const struct nft_expr_ops nft_meta_bridge_set_ops = {
.type = &nft_meta_bridge_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
- .eval = nft_meta_set_eval,
- .init = nft_meta_set_init,
+ .eval = nft_meta_bridge_set_eval,
+ .init = nft_meta_bridge_set_init,
.destroy = nft_meta_set_destroy,
.dump = nft_meta_set_dump,
.reduce = nft_meta_bridge_set_reduce,
- .validate = nft_meta_set_validate,
+ .validate = nft_meta_bridge_set_validate,
};
static const struct nft_expr_ops *
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index bb378c33f542..7a36353dbc22 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -324,6 +324,7 @@ const struct bpf_map_ops sk_storage_map_ops = {
.map_local_storage_charge = bpf_sk_storage_charge,
.map_local_storage_uncharge = bpf_sk_storage_uncharge,
.map_owner_storage_ptr = bpf_sk_storage_ptr,
+ .map_mem_usage = bpf_local_storage_map_mem_usage,
};
const struct bpf_func_proto bpf_sk_storage_get_proto = {
diff --git a/net/core/dev.c b/net/core/dev.c
index 253584777101..c7853192563d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3075,7 +3075,7 @@ void __netif_schedule(struct Qdisc *q)
EXPORT_SYMBOL(__netif_schedule);
struct dev_kfree_skb_cb {
- enum skb_free_reason reason;
+ enum skb_drop_reason reason;
};
static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb)
@@ -3108,7 +3108,7 @@ void netif_tx_wake_queue(struct netdev_queue *dev_queue)
}
EXPORT_SYMBOL(netif_tx_wake_queue);
-void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)
+void dev_kfree_skb_irq_reason(struct sk_buff *skb, enum skb_drop_reason reason)
{
unsigned long flags;
@@ -3128,18 +3128,16 @@ void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)
raise_softirq_irqoff(NET_TX_SOFTIRQ);
local_irq_restore(flags);
}
-EXPORT_SYMBOL(__dev_kfree_skb_irq);
+EXPORT_SYMBOL(dev_kfree_skb_irq_reason);
-void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason)
+void dev_kfree_skb_any_reason(struct sk_buff *skb, enum skb_drop_reason reason)
{
if (in_hardirq() || irqs_disabled())
- __dev_kfree_skb_irq(skb, reason);
- else if (unlikely(reason == SKB_REASON_DROPPED))
- kfree_skb(skb);
+ dev_kfree_skb_irq_reason(skb, reason);
else
- consume_skb(skb);
+ kfree_skb_reason(skb, reason);
}
-EXPORT_SYMBOL(__dev_kfree_skb_any);
+EXPORT_SYMBOL(dev_kfree_skb_any_reason);
/**
@@ -5020,11 +5018,11 @@ static __latent_entropy void net_tx_action(struct softirq_action *h)
clist = clist->next;
WARN_ON(refcount_read(&skb->users));
- if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED))
+ if (likely(get_kfree_skb_cb(skb)->reason == SKB_CONSUMED))
trace_consume_skb(skb, net_tx_action);
else
trace_kfree_skb(skb, net_tx_action,
- SKB_DROP_REASON_NOT_SPECIFIED);
+ get_kfree_skb_cb(skb)->reason);
if (skb->fclone != SKB_FCLONE_UNAVAILABLE)
__kfree_skb(skb);
diff --git a/net/core/filter.c b/net/core/filter.c
index 1d6f165923bf..d052fac28d02 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1721,6 +1721,12 @@ static const struct bpf_func_proto bpf_skb_store_bytes_proto = {
.arg5_type = ARG_ANYTHING,
};
+int __bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from,
+ u32 len, u64 flags)
+{
+ return ____bpf_skb_store_bytes(skb, offset, from, len, flags);
+}
+
BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset,
void *, to, u32, len)
{
@@ -1751,6 +1757,11 @@ static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
.arg4_type = ARG_CONST_SIZE,
};
+int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len)
+{
+ return ____bpf_skb_load_bytes(skb, offset, to, len);
+}
+
BPF_CALL_4(bpf_flow_dissector_load_bytes,
const struct bpf_flow_dissector *, ctx, u32, offset,
void *, to, u32, len)
@@ -3828,7 +3839,7 @@ static const struct bpf_func_proto sk_skb_change_head_proto = {
.arg3_type = ARG_ANYTHING,
};
-BPF_CALL_1(bpf_xdp_get_buff_len, struct xdp_buff*, xdp)
+BPF_CALL_1(bpf_xdp_get_buff_len, struct xdp_buff*, xdp)
{
return xdp_get_buff_len(xdp);
}
@@ -3883,8 +3894,8 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
.arg2_type = ARG_ANYTHING,
};
-static void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off,
- void *buf, unsigned long len, bool flush)
+void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off,
+ void *buf, unsigned long len, bool flush)
{
unsigned long ptr_len, ptr_off = 0;
skb_frag_t *next_frag, *end_frag;
@@ -3930,7 +3941,7 @@ static void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off,
}
}
-static void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len)
+void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len)
{
struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
u32 size = xdp->data_end - xdp->data;
@@ -3988,6 +3999,11 @@ static const struct bpf_func_proto bpf_xdp_load_bytes_proto = {
.arg4_type = ARG_CONST_SIZE,
};
+int __bpf_xdp_load_bytes(struct xdp_buff *xdp, u32 offset, void *buf, u32 len)
+{
+ return ____bpf_xdp_load_bytes(xdp, offset, buf, len);
+}
+
BPF_CALL_4(bpf_xdp_store_bytes, struct xdp_buff *, xdp, u32, offset,
void *, buf, u32, len)
{
@@ -4015,6 +4031,11 @@ static const struct bpf_func_proto bpf_xdp_store_bytes_proto = {
.arg4_type = ARG_CONST_SIZE,
};
+int __bpf_xdp_store_bytes(struct xdp_buff *xdp, u32 offset, void *buf, u32 len)
+{
+ return ____bpf_xdp_store_bytes(xdp, offset, buf, len);
+}
+
static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset)
{
struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
@@ -5850,7 +5871,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
else
neigh = __ipv6_neigh_lookup_noref_stub(dev, params->ipv6_dst);
- if (!neigh || !(neigh->nud_state & NUD_VALID))
+ if (!neigh || !(READ_ONCE(neigh->nud_state) & NUD_VALID))
return BPF_FIB_LKUP_RET_NO_NEIGH;
memcpy(params->dmac, neigh->ha, ETH_ALEN);
memcpy(params->smac, dev->dev_addr, ETH_ALEN);
@@ -5971,7 +5992,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
* not needed here.
*/
neigh = __ipv6_neigh_lookup_noref_stub(dev, dst);
- if (!neigh || !(neigh->nud_state & NUD_VALID))
+ if (!neigh || !(READ_ONCE(neigh->nud_state) & NUD_VALID))
return BPF_FIB_LKUP_RET_NO_NEIGH;
memcpy(params->dmac, neigh->ha, ETH_ALEN);
memcpy(params->smac, dev->dev_addr, ETH_ALEN);
@@ -8144,12 +8165,6 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sk_storage_delete_proto;
case BPF_FUNC_get_netns_cookie:
return &bpf_get_netns_cookie_sk_msg_proto;
-#ifdef CONFIG_CGROUPS
- case BPF_FUNC_get_current_cgroup_id:
- return &bpf_get_current_cgroup_id_proto;
- case BPF_FUNC_get_current_ancestor_cgroup_id:
- return &bpf_get_current_ancestor_cgroup_id_proto;
-#endif
#ifdef CONFIG_CGROUP_NET_CLASSID
case BPF_FUNC_get_cgroup_classid:
return &bpf_get_cgroup_classid_curr_proto;
@@ -9264,11 +9279,15 @@ static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog,
#endif
/* <store>: skb->tstamp = tstamp */
- *insn++ = BPF_STX_MEM(BPF_DW, skb_reg, value_reg,
- offsetof(struct sk_buff, tstamp));
+ *insn++ = BPF_RAW_INSN(BPF_CLASS(si->code) | BPF_DW | BPF_MEM,
+ skb_reg, value_reg, offsetof(struct sk_buff, tstamp), si->imm);
return insn;
}
+#define BPF_EMIT_STORE(size, si, off) \
+ BPF_RAW_INSN(BPF_CLASS((si)->code) | (size) | BPF_MEM, \
+ (si)->dst_reg, (si)->src_reg, (off), (si)->imm)
+
static u32 bpf_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si,
struct bpf_insn *insn_buf,
@@ -9298,9 +9317,9 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
case offsetof(struct __sk_buff, priority):
if (type == BPF_WRITE)
- *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
- bpf_target_off(struct sk_buff, priority, 4,
- target_size));
+ *insn++ = BPF_EMIT_STORE(BPF_W, si,
+ bpf_target_off(struct sk_buff, priority, 4,
+ target_size));
else
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
bpf_target_off(struct sk_buff, priority, 4,
@@ -9331,9 +9350,9 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
case offsetof(struct __sk_buff, mark):
if (type == BPF_WRITE)
- *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
- bpf_target_off(struct sk_buff, mark, 4,
- target_size));
+ *insn++ = BPF_EMIT_STORE(BPF_W, si,
+ bpf_target_off(struct sk_buff, mark, 4,
+ target_size));
else
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
bpf_target_off(struct sk_buff, mark, 4,
@@ -9352,11 +9371,16 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
case offsetof(struct __sk_buff, queue_mapping):
if (type == BPF_WRITE) {
- *insn++ = BPF_JMP_IMM(BPF_JGE, si->src_reg, NO_QUEUE_MAPPING, 1);
- *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg,
- bpf_target_off(struct sk_buff,
- queue_mapping,
- 2, target_size));
+ u32 off = bpf_target_off(struct sk_buff, queue_mapping, 2, target_size);
+
+ if (BPF_CLASS(si->code) == BPF_ST && si->imm >= NO_QUEUE_MAPPING) {
+ *insn++ = BPF_JMP_A(0); /* noop */
+ break;
+ }
+
+ if (BPF_CLASS(si->code) == BPF_STX)
+ *insn++ = BPF_JMP_IMM(BPF_JGE, si->src_reg, NO_QUEUE_MAPPING, 1);
+ *insn++ = BPF_EMIT_STORE(BPF_H, si, off);
} else {
*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
bpf_target_off(struct sk_buff,
@@ -9392,8 +9416,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
off += offsetof(struct sk_buff, cb);
off += offsetof(struct qdisc_skb_cb, data);
if (type == BPF_WRITE)
- *insn++ = BPF_STX_MEM(BPF_SIZE(si->code), si->dst_reg,
- si->src_reg, off);
+ *insn++ = BPF_EMIT_STORE(BPF_SIZE(si->code), si, off);
else
*insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg,
si->src_reg, off);
@@ -9408,8 +9431,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
off += offsetof(struct qdisc_skb_cb, tc_classid);
*target_size = 2;
if (type == BPF_WRITE)
- *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg,
- si->src_reg, off);
+ *insn++ = BPF_EMIT_STORE(BPF_H, si, off);
else
*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg,
si->src_reg, off);
@@ -9442,9 +9464,9 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
case offsetof(struct __sk_buff, tc_index):
#ifdef CONFIG_NET_SCHED
if (type == BPF_WRITE)
- *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg,
- bpf_target_off(struct sk_buff, tc_index, 2,
- target_size));
+ *insn++ = BPF_EMIT_STORE(BPF_H, si,
+ bpf_target_off(struct sk_buff, tc_index, 2,
+ target_size));
else
*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
bpf_target_off(struct sk_buff, tc_index, 2,
@@ -9645,8 +9667,8 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
BUILD_BUG_ON(sizeof_field(struct sock, sk_bound_dev_if) != 4);
if (type == BPF_WRITE)
- *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
- offsetof(struct sock, sk_bound_dev_if));
+ *insn++ = BPF_EMIT_STORE(BPF_W, si,
+ offsetof(struct sock, sk_bound_dev_if));
else
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
offsetof(struct sock, sk_bound_dev_if));
@@ -9656,8 +9678,8 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
BUILD_BUG_ON(sizeof_field(struct sock, sk_mark) != 4);
if (type == BPF_WRITE)
- *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
- offsetof(struct sock, sk_mark));
+ *insn++ = BPF_EMIT_STORE(BPF_W, si,
+ offsetof(struct sock, sk_mark));
else
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
offsetof(struct sock, sk_mark));
@@ -9667,8 +9689,8 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
BUILD_BUG_ON(sizeof_field(struct sock, sk_priority) != 4);
if (type == BPF_WRITE)
- *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
- offsetof(struct sock, sk_priority));
+ *insn++ = BPF_EMIT_STORE(BPF_W, si,
+ offsetof(struct sock, sk_priority));
else
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
offsetof(struct sock, sk_priority));
@@ -9933,10 +9955,12 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
offsetof(S, TF)); \
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), tmp_reg, \
si->dst_reg, offsetof(S, F)); \
- *insn++ = BPF_STX_MEM(SIZE, tmp_reg, si->src_reg, \
+ *insn++ = BPF_RAW_INSN(SIZE | BPF_MEM | BPF_CLASS(si->code), \
+ tmp_reg, si->src_reg, \
bpf_target_off(NS, NF, sizeof_field(NS, NF), \
target_size) \
- + OFF); \
+ + OFF, \
+ si->imm); \
*insn++ = BPF_LDX_MEM(BPF_DW, tmp_reg, si->dst_reg, \
offsetof(S, TF)); \
} while (0)
@@ -10171,9 +10195,11 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
struct bpf_sock_ops_kern, sk),\
reg, si->dst_reg, \
offsetof(struct bpf_sock_ops_kern, sk));\
- *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD), \
- reg, si->src_reg, \
- offsetof(OBJ, OBJ_FIELD)); \
+ *insn++ = BPF_RAW_INSN(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD) | \
+ BPF_MEM | BPF_CLASS(si->code), \
+ reg, si->src_reg, \
+ offsetof(OBJ, OBJ_FIELD), \
+ si->imm); \
*insn++ = BPF_LDX_MEM(BPF_DW, reg, si->dst_reg, \
offsetof(struct bpf_sock_ops_kern, \
temp)); \
@@ -10205,8 +10231,7 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
off -= offsetof(struct bpf_sock_ops, replylong[0]);
off += offsetof(struct bpf_sock_ops_kern, replylong[0]);
if (type == BPF_WRITE)
- *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
- off);
+ *insn++ = BPF_EMIT_STORE(BPF_W, si, off);
else
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
off);
@@ -10563,8 +10588,7 @@ static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
off += offsetof(struct sk_buff, cb);
off += offsetof(struct sk_skb_cb, data);
if (type == BPF_WRITE)
- *insn++ = BPF_STX_MEM(BPF_SIZE(si->code), si->dst_reg,
- si->src_reg, off);
+ *insn++ = BPF_EMIT_STORE(BPF_SIZE(si->code), si, off);
else
*insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg,
si->src_reg, off);
@@ -11621,3 +11645,82 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id)
return func;
}
+
+__diag_push();
+__diag_ignore_all("-Wmissing-prototypes",
+ "Global functions as their definitions will be in vmlinux BTF");
+__bpf_kfunc int bpf_dynptr_from_skb(struct sk_buff *skb, u64 flags,
+ struct bpf_dynptr_kern *ptr__uninit)
+{
+ if (flags) {
+ bpf_dynptr_set_null(ptr__uninit);
+ return -EINVAL;
+ }
+
+ bpf_dynptr_init(ptr__uninit, skb, BPF_DYNPTR_TYPE_SKB, 0, skb->len);
+
+ return 0;
+}
+
+__bpf_kfunc int bpf_dynptr_from_xdp(struct xdp_buff *xdp, u64 flags,
+ struct bpf_dynptr_kern *ptr__uninit)
+{
+ if (flags) {
+ bpf_dynptr_set_null(ptr__uninit);
+ return -EINVAL;
+ }
+
+ bpf_dynptr_init(ptr__uninit, xdp, BPF_DYNPTR_TYPE_XDP, 0, xdp_get_buff_len(xdp));
+
+ return 0;
+}
+__diag_pop();
+
+int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags,
+ struct bpf_dynptr_kern *ptr__uninit)
+{
+ int err;
+
+ err = bpf_dynptr_from_skb(skb, flags, ptr__uninit);
+ if (err)
+ return err;
+
+ bpf_dynptr_set_rdonly(ptr__uninit);
+
+ return 0;
+}
+
+BTF_SET8_START(bpf_kfunc_check_set_skb)
+BTF_ID_FLAGS(func, bpf_dynptr_from_skb)
+BTF_SET8_END(bpf_kfunc_check_set_skb)
+
+BTF_SET8_START(bpf_kfunc_check_set_xdp)
+BTF_ID_FLAGS(func, bpf_dynptr_from_xdp)
+BTF_SET8_END(bpf_kfunc_check_set_xdp)
+
+static const struct btf_kfunc_id_set bpf_kfunc_set_skb = {
+ .owner = THIS_MODULE,
+ .set = &bpf_kfunc_check_set_skb,
+};
+
+static const struct btf_kfunc_id_set bpf_kfunc_set_xdp = {
+ .owner = THIS_MODULE,
+ .set = &bpf_kfunc_check_set_xdp,
+};
+
+static int __init bpf_kfunc_init(void)
+{
+ int ret;
+
+ ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_kfunc_set_skb);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_ACT, &bpf_kfunc_set_skb);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SK_SKB, &bpf_kfunc_set_skb);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SOCKET_FILTER, &bpf_kfunc_set_skb);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SKB, &bpf_kfunc_set_skb);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_OUT, &bpf_kfunc_set_skb);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_IN, &bpf_kfunc_set_skb);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_XMIT, &bpf_kfunc_set_skb);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_SEG6LOCAL, &bpf_kfunc_set_skb);
+ return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp);
+}
+late_initcall(bpf_kfunc_init);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 6798f6d2423b..90d399b3f980 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -627,37 +627,6 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
}
EXPORT_SYMBOL(neigh_lookup);
-struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
- const void *pkey)
-{
- struct neighbour *n;
- unsigned int key_len = tbl->key_len;
- u32 hash_val;
- struct neigh_hash_table *nht;
-
- NEIGH_CACHE_STAT_INC(tbl, lookups);
-
- rcu_read_lock_bh();
- nht = rcu_dereference_bh(tbl->nht);
- hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
-
- for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
- n != NULL;
- n = rcu_dereference_bh(n->next)) {
- if (!memcmp(n->primary_key, pkey, key_len) &&
- net_eq(dev_net(n->dev), net)) {
- if (!refcount_inc_not_zero(&n->refcnt))
- n = NULL;
- NEIGH_CACHE_STAT_INC(tbl, hits);
- break;
- }
- }
-
- rcu_read_unlock_bh();
- return n;
-}
-EXPORT_SYMBOL(neigh_lookup_nodev);
-
static struct neighbour *
___neigh_create(struct neigh_table *tbl, const void *pkey,
struct net_device *dev, u32 flags,
@@ -1124,13 +1093,13 @@ static void neigh_timer_handler(struct timer_list *t)
neigh->used +
NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
neigh_dbg(2, "neigh %p is delayed\n", neigh);
- neigh->nud_state = NUD_DELAY;
+ WRITE_ONCE(neigh->nud_state, NUD_DELAY);
neigh->updated = jiffies;
neigh_suspect(neigh);
next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
} else {
neigh_dbg(2, "neigh %p is suspected\n", neigh);
- neigh->nud_state = NUD_STALE;
+ WRITE_ONCE(neigh->nud_state, NUD_STALE);
neigh->updated = jiffies;
neigh_suspect(neigh);
notify = 1;
@@ -1140,14 +1109,14 @@ static void neigh_timer_handler(struct timer_list *t)
neigh->confirmed +
NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
neigh_dbg(2, "neigh %p is now reachable\n", neigh);
- neigh->nud_state = NUD_REACHABLE;
+ WRITE_ONCE(neigh->nud_state, NUD_REACHABLE);
neigh->updated = jiffies;
neigh_connect(neigh);
notify = 1;
next = neigh->confirmed + neigh->parms->reachable_time;
} else {
neigh_dbg(2, "neigh %p is probed\n", neigh);
- neigh->nud_state = NUD_PROBE;
+ WRITE_ONCE(neigh->nud_state, NUD_PROBE);
neigh->updated = jiffies;
atomic_set(&neigh->probes, 0);
notify = 1;
@@ -1161,7 +1130,7 @@ static void neigh_timer_handler(struct timer_list *t)
if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
- neigh->nud_state = NUD_FAILED;
+ WRITE_ONCE(neigh->nud_state, NUD_FAILED);
notify = 1;
neigh_invalidate(neigh);
goto out;
@@ -1210,7 +1179,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb,
atomic_set(&neigh->probes,
NEIGH_VAR(neigh->parms, UCAST_PROBES));
neigh_del_timer(neigh);
- neigh->nud_state = NUD_INCOMPLETE;
+ WRITE_ONCE(neigh->nud_state, NUD_INCOMPLETE);
neigh->updated = now;
if (!immediate_ok) {
next = now + 1;
@@ -1222,7 +1191,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb,
}
neigh_add_timer(neigh, next);
} else {
- neigh->nud_state = NUD_FAILED;
+ WRITE_ONCE(neigh->nud_state, NUD_FAILED);
neigh->updated = jiffies;
write_unlock_bh(&neigh->lock);
@@ -1232,7 +1201,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb,
} else if (neigh->nud_state & NUD_STALE) {
neigh_dbg(2, "neigh %p is delayed\n", neigh);
neigh_del_timer(neigh);
- neigh->nud_state = NUD_DELAY;
+ WRITE_ONCE(neigh->nud_state, NUD_DELAY);
neigh->updated = jiffies;
neigh_add_timer(neigh, jiffies +
NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
@@ -1344,7 +1313,7 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
neigh_update_flags(neigh, flags, &notify, &gc_update, &managed_update);
if (flags & (NEIGH_UPDATE_F_USE | NEIGH_UPDATE_F_MANAGED)) {
new = old & ~NUD_PERMANENT;
- neigh->nud_state = new;
+ WRITE_ONCE(neigh->nud_state, new);
err = 0;
goto out;
}
@@ -1353,7 +1322,7 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
neigh_del_timer(neigh);
if (old & NUD_CONNECTED)
neigh_suspect(neigh);
- neigh->nud_state = new;
+ WRITE_ONCE(neigh->nud_state, new);
err = 0;
notify = old & NUD_VALID;
if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
@@ -1432,7 +1401,7 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
((new & NUD_REACHABLE) ?
neigh->parms->reachable_time :
0)));
- neigh->nud_state = new;
+ WRITE_ONCE(neigh->nud_state, new);
notify = 1;
}
@@ -1519,7 +1488,7 @@ void __neigh_set_probe_once(struct neighbour *neigh)
neigh->updated = jiffies;
if (!(neigh->nud_state & NUD_FAILED))
return;
- neigh->nud_state = NUD_INCOMPLETE;
+ WRITE_ONCE(neigh->nud_state, NUD_INCOMPLETE);
atomic_set(&neigh->probes, neigh_max_probes(neigh));
neigh_add_timer(neigh,
jiffies + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
@@ -3229,7 +3198,7 @@ static struct neighbour *neigh_get_first(struct seq_file *seq)
}
if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
break;
- if (n->nud_state & ~NUD_NOARP)
+ if (READ_ONCE(n->nud_state) & ~NUD_NOARP)
break;
next:
n = rcu_dereference_bh(n->next);
@@ -3271,7 +3240,7 @@ static struct neighbour *neigh_get_next(struct seq_file *seq,
if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
break;
- if (n->nud_state & ~NUD_NOARP)
+ if (READ_ONCE(n->nud_state) & ~NUD_NOARP)
break;
next:
n = rcu_dereference_bh(n->next);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 5d8eb57867a9..b7b1661d0d56 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -54,6 +54,9 @@
#include <net/rtnetlink.h>
#include <net/net_namespace.h>
#include <net/devlink.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/addrconf.h>
+#endif
#include "dev.h"
@@ -6063,6 +6066,217 @@ static int rtnl_stats_set(struct sk_buff *skb, struct nlmsghdr *nlh,
return 0;
}
+static int rtnl_mdb_valid_dump_req(const struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct br_port_msg *bpm;
+
+ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*bpm))) {
+ NL_SET_ERR_MSG(extack, "Invalid header for mdb dump request");
+ return -EINVAL;
+ }
+
+ bpm = nlmsg_data(nlh);
+ if (bpm->ifindex) {
+ NL_SET_ERR_MSG(extack, "Filtering by device index is not supported for mdb dump request");
+ return -EINVAL;
+ }
+ if (nlmsg_attrlen(nlh, sizeof(*bpm))) {
+ NL_SET_ERR_MSG(extack, "Invalid data after header in mdb dump request");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+struct rtnl_mdb_dump_ctx {
+ long idx;
+};
+
+static int rtnl_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct rtnl_mdb_dump_ctx *ctx = (void *)cb->ctx;
+ struct net *net = sock_net(skb->sk);
+ struct net_device *dev;
+ int idx, s_idx;
+ int err;
+
+ NL_ASSERT_DUMP_CTX_FITS(struct rtnl_mdb_dump_ctx);
+
+ if (cb->strict_check) {
+ err = rtnl_mdb_valid_dump_req(cb->nlh, cb->extack);
+ if (err)
+ return err;
+ }
+
+ s_idx = ctx->idx;
+ idx = 0;
+
+ for_each_netdev(net, dev) {
+ if (idx < s_idx)
+ goto skip;
+ if (!dev->netdev_ops->ndo_mdb_dump)
+ goto skip;
+
+ err = dev->netdev_ops->ndo_mdb_dump(dev, skb, cb);
+ if (err == -EMSGSIZE)
+ goto out;
+ /* Moving on to next device, reset markers and sequence
+ * counters since they are all maintained per-device.
+ */
+ memset(cb->ctx, 0, sizeof(cb->ctx));
+ cb->prev_seq = 0;
+ cb->seq = 0;
+skip:
+ idx++;
+ }
+
+out:
+ ctx->idx = idx;
+ return skb->len;
+}
+
+static int rtnl_validate_mdb_entry(const struct nlattr *attr,
+ struct netlink_ext_ack *extack)
+{
+ struct br_mdb_entry *entry = nla_data(attr);
+
+ if (nla_len(attr) != sizeof(struct br_mdb_entry)) {
+ NL_SET_ERR_MSG_ATTR(extack, attr, "Invalid attribute length");
+ return -EINVAL;
+ }
+
+ if (entry->ifindex == 0) {
+ NL_SET_ERR_MSG(extack, "Zero entry ifindex is not allowed");
+ return -EINVAL;
+ }
+
+ if (entry->addr.proto == htons(ETH_P_IP)) {
+ if (!ipv4_is_multicast(entry->addr.u.ip4) &&
+ !ipv4_is_zeronet(entry->addr.u.ip4)) {
+ NL_SET_ERR_MSG(extack, "IPv4 entry group address is not multicast or 0.0.0.0");
+ return -EINVAL;
+ }
+ if (ipv4_is_local_multicast(entry->addr.u.ip4)) {
+ NL_SET_ERR_MSG(extack, "IPv4 entry group address is local multicast");
+ return -EINVAL;
+ }
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (entry->addr.proto == htons(ETH_P_IPV6)) {
+ if (ipv6_addr_is_ll_all_nodes(&entry->addr.u.ip6)) {
+ NL_SET_ERR_MSG(extack, "IPv6 entry group address is link-local all nodes");
+ return -EINVAL;
+ }
+#endif
+ } else if (entry->addr.proto == 0) {
+ /* L2 mdb */
+ if (!is_multicast_ether_addr(entry->addr.u.mac_addr)) {
+ NL_SET_ERR_MSG(extack, "L2 entry group is not multicast");
+ return -EINVAL;
+ }
+ } else {
+ NL_SET_ERR_MSG(extack, "Unknown entry protocol");
+ return -EINVAL;
+ }
+
+ if (entry->state != MDB_PERMANENT && entry->state != MDB_TEMPORARY) {
+ NL_SET_ERR_MSG(extack, "Unknown entry state");
+ return -EINVAL;
+ }
+ if (entry->vid >= VLAN_VID_MASK) {
+ NL_SET_ERR_MSG(extack, "Invalid entry VLAN id");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static const struct nla_policy mdba_policy[MDBA_SET_ENTRY_MAX + 1] = {
+ [MDBA_SET_ENTRY_UNSPEC] = { .strict_start_type = MDBA_SET_ENTRY_ATTRS + 1 },
+ [MDBA_SET_ENTRY] = NLA_POLICY_VALIDATE_FN(NLA_BINARY,
+ rtnl_validate_mdb_entry,
+ sizeof(struct br_mdb_entry)),
+ [MDBA_SET_ENTRY_ATTRS] = { .type = NLA_NESTED },
+};
+
+static int rtnl_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[MDBA_SET_ENTRY_MAX + 1];
+ struct net *net = sock_net(skb->sk);
+ struct br_port_msg *bpm;
+ struct net_device *dev;
+ int err;
+
+ err = nlmsg_parse_deprecated(nlh, sizeof(*bpm), tb,
+ MDBA_SET_ENTRY_MAX, mdba_policy, extack);
+ if (err)
+ return err;
+
+ bpm = nlmsg_data(nlh);
+ if (!bpm->ifindex) {
+ NL_SET_ERR_MSG(extack, "Invalid ifindex");
+ return -EINVAL;
+ }
+
+ dev = __dev_get_by_index(net, bpm->ifindex);
+ if (!dev) {
+ NL_SET_ERR_MSG(extack, "Device doesn't exist");
+ return -ENODEV;
+ }
+
+ if (NL_REQ_ATTR_CHECK(extack, NULL, tb, MDBA_SET_ENTRY)) {
+ NL_SET_ERR_MSG(extack, "Missing MDBA_SET_ENTRY attribute");
+ return -EINVAL;
+ }
+
+ if (!dev->netdev_ops->ndo_mdb_add) {
+ NL_SET_ERR_MSG(extack, "Device does not support MDB operations");
+ return -EOPNOTSUPP;
+ }
+
+ return dev->netdev_ops->ndo_mdb_add(dev, tb, nlh->nlmsg_flags, extack);
+}
+
+static int rtnl_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[MDBA_SET_ENTRY_MAX + 1];
+ struct net *net = sock_net(skb->sk);
+ struct br_port_msg *bpm;
+ struct net_device *dev;
+ int err;
+
+ err = nlmsg_parse_deprecated(nlh, sizeof(*bpm), tb,
+ MDBA_SET_ENTRY_MAX, mdba_policy, extack);
+ if (err)
+ return err;
+
+ bpm = nlmsg_data(nlh);
+ if (!bpm->ifindex) {
+ NL_SET_ERR_MSG(extack, "Invalid ifindex");
+ return -EINVAL;
+ }
+
+ dev = __dev_get_by_index(net, bpm->ifindex);
+ if (!dev) {
+ NL_SET_ERR_MSG(extack, "Device doesn't exist");
+ return -ENODEV;
+ }
+
+ if (NL_REQ_ATTR_CHECK(extack, NULL, tb, MDBA_SET_ENTRY)) {
+ NL_SET_ERR_MSG(extack, "Missing MDBA_SET_ENTRY attribute");
+ return -EINVAL;
+ }
+
+ if (!dev->netdev_ops->ndo_mdb_del) {
+ NL_SET_ERR_MSG(extack, "Device does not support MDB operations");
+ return -EOPNOTSUPP;
+ }
+
+ return dev->netdev_ops->ndo_mdb_del(dev, tb, extack);
+}
+
/* Process one rtnetlink message. */
static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -6297,4 +6511,8 @@ void __init rtnetlink_init(void)
rtnl_register(PF_UNSPEC, RTM_GETSTATS, rtnl_stats_get, rtnl_stats_dump,
0);
rtnl_register(PF_UNSPEC, RTM_SETSTATS, rtnl_stats_set, NULL, 0);
+
+ rtnl_register(PF_BRIDGE, RTM_GETMDB, NULL, rtnl_mdb_dump, 0);
+ rtnl_register(PF_BRIDGE, RTM_NEWMDB, rtnl_mdb_add, NULL, 0);
+ rtnl_register(PF_BRIDGE, RTM_DELMDB, rtnl_mdb_del, NULL, 0);
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 1a31815104d6..050a875d09c5 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -420,10 +420,9 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
{
struct sk_buff *skb = __build_skb(data, frag_size);
- if (skb && frag_size) {
+ if (likely(skb && frag_size)) {
skb->head_frag = 1;
- if (page_is_pfmemalloc(virt_to_head_page(data)))
- skb->pfmemalloc = 1;
+ skb_propagate_pfmemalloc(virt_to_head_page(data), skb);
}
return skb;
}
@@ -445,8 +444,7 @@ struct sk_buff *build_skb_around(struct sk_buff *skb,
if (frag_size) {
skb->head_frag = 1;
- if (page_is_pfmemalloc(virt_to_head_page(data)))
- skb->pfmemalloc = 1;
+ skb_propagate_pfmemalloc(virt_to_head_page(data), skb);
}
return skb;
}
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index a68a7290a3b2..9b854e236d23 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -797,6 +797,14 @@ static void sock_map_fini_seq_private(void *priv_data)
bpf_map_put_with_uref(info->map);
}
+static u64 sock_map_mem_usage(const struct bpf_map *map)
+{
+ u64 usage = sizeof(struct bpf_stab);
+
+ usage += (u64)map->max_entries * sizeof(struct sock *);
+ return usage;
+}
+
static const struct bpf_iter_seq_info sock_map_iter_seq_info = {
.seq_ops = &sock_map_seq_ops,
.init_seq_private = sock_map_init_seq_private,
@@ -816,6 +824,7 @@ const struct bpf_map_ops sock_map_ops = {
.map_lookup_elem = sock_map_lookup,
.map_release_uref = sock_map_release_progs,
.map_check_btf = map_check_no_btf,
+ .map_mem_usage = sock_map_mem_usage,
.map_btf_id = &sock_map_btf_ids[0],
.iter_seq_info = &sock_map_iter_seq_info,
};
@@ -1397,6 +1406,16 @@ static void sock_hash_fini_seq_private(void *priv_data)
bpf_map_put_with_uref(info->map);
}
+static u64 sock_hash_mem_usage(const struct bpf_map *map)
+{
+ struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map);
+ u64 usage = sizeof(*htab);
+
+ usage += htab->buckets_num * sizeof(struct bpf_shtab_bucket);
+ usage += atomic_read(&htab->count) * (u64)htab->elem_size;
+ return usage;
+}
+
static const struct bpf_iter_seq_info sock_hash_iter_seq_info = {
.seq_ops = &sock_hash_seq_ops,
.init_seq_private = sock_hash_init_seq_private,
@@ -1416,6 +1435,7 @@ const struct bpf_map_ops sock_hash_ops = {
.map_lookup_elem_sys_only = sock_hash_lookup_sys,
.map_release_uref = sock_hash_release_progs,
.map_check_btf = map_check_no_btf,
+ .map_mem_usage = sock_hash_mem_usage,
.map_btf_id = &sock_hash_map_btf_ids[0],
.iter_seq_info = &sock_hash_iter_seq_info,
};
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index b780827f5e0a..3ab68415d121 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -177,7 +177,7 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk,
* for the case, if this connection will not able to recover.
*/
if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
- sk->sk_err_soft = EMSGSIZE;
+ WRITE_ONCE(sk->sk_err_soft, EMSGSIZE);
mtu = dst_mtu(dst);
@@ -339,8 +339,9 @@ static int dccp_v4_err(struct sk_buff *skb, u32 info)
sk_error_report(sk);
dccp_done(sk);
- } else
- sk->sk_err_soft = err;
+ } else {
+ WRITE_ONCE(sk->sk_err_soft, err);
+ }
goto out;
}
@@ -364,8 +365,9 @@ static int dccp_v4_err(struct sk_buff *skb, u32 info)
if (!sock_owned_by_user(sk) && inet->recverr) {
sk->sk_err = err;
sk_error_report(sk);
- } else /* Only an error on timeout */
- sk->sk_err_soft = err;
+ } else { /* Only an error on timeout */
+ WRITE_ONCE(sk->sk_err_soft, err);
+ }
out:
bh_unlock_sock(sk);
sock_put(sk);
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index b9d7c3dd1cb3..47fb10834223 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -174,17 +174,18 @@ static int dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
*/
sk_error_report(sk);
dccp_done(sk);
- } else
- sk->sk_err_soft = err;
+ } else {
+ WRITE_ONCE(sk->sk_err_soft, err);
+ }
goto out;
}
if (!sock_owned_by_user(sk) && np->recverr) {
sk->sk_err = err;
sk_error_report(sk);
- } else
- sk->sk_err_soft = err;
-
+ } else {
+ WRITE_ONCE(sk->sk_err_soft, err);
+ }
out:
bh_unlock_sock(sk);
sock_put(sk);
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 27a3b37acd2e..b3255e87cc7e 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -19,7 +19,7 @@ int sysctl_dccp_retries2 __read_mostly = TCP_RETR2;
static void dccp_write_err(struct sock *sk)
{
- sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
+ sk->sk_err = READ_ONCE(sk->sk_err_soft) ? : ETIMEDOUT;
sk_error_report(sk);
dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 8db6747f892f..940062e08f57 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1322,7 +1322,7 @@ int inet_sk_rebuild_header(struct sock *sk)
sk->sk_state != TCP_SYN_SENT ||
(sk->sk_userlocks & SOCK_BINDADDR_LOCK) ||
(err = inet_sk_reselect_saddr(sk)) != 0)
- sk->sk_err_soft = -err;
+ WRITE_ONCE(sk->sk_err_soft, -err);
}
return err;
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 4f7237661afb..9456f5bb35e5 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -375,7 +375,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES);
if (probes < 0) {
- if (!(neigh->nud_state & NUD_VALID))
+ if (!(READ_ONCE(neigh->nud_state) & NUD_VALID))
pr_debug("trying to ucast probe in NUD_INVALID\n");
neigh_ha_snapshot(dst_ha, neigh, dev);
dst_hw = dst_ha;
@@ -1123,7 +1123,7 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev)
neigh = neigh_lookup(&arp_tbl, &ip, dev);
if (neigh) {
- if (!(neigh->nud_state & NUD_NOARP)) {
+ if (!(READ_ONCE(neigh->nud_state) & NUD_NOARP)) {
read_lock_bh(&neigh->lock);
memcpy(r->arp_ha.sa_data, neigh->ha, dev->addr_len);
r->arp_flags = arp_state_to_flags(neigh);
@@ -1144,12 +1144,12 @@ int arp_invalidate(struct net_device *dev, __be32 ip, bool force)
struct neigh_table *tbl = &arp_tbl;
if (neigh) {
- if ((neigh->nud_state & NUD_VALID) && !force) {
+ if ((READ_ONCE(neigh->nud_state) & NUD_VALID) && !force) {
neigh_release(neigh);
return 0;
}
- if (neigh->nud_state & ~NUD_NOARP)
+ if (READ_ONCE(neigh->nud_state) & ~NUD_NOARP)
err = neigh_update(neigh, NULL, NUD_FAILED,
NEIGH_UPDATE_F_OVERRIDE|
NEIGH_UPDATE_F_ADMIN, 0);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 3bb890a40ed7..574ff450c4d2 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -563,7 +563,7 @@ static int fib_detect_death(struct fib_info *fi, int order,
n = NULL;
if (n) {
- state = n->nud_state;
+ state = READ_ONCE(n->nud_state);
neigh_release(n);
} else {
return 0;
@@ -2202,7 +2202,7 @@ static bool fib_good_nh(const struct fib_nh *nh)
else
n = NULL;
if (n)
- state = n->nud_state;
+ state = READ_ONCE(n->nud_state);
rcu_read_unlock_bh();
}
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index c920aa9a62a9..48ff5f13e797 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2638,10 +2638,10 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
/*
* check if a multicast source filter allows delivery for a given <src,dst,intf>
*/
-int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr,
+int ip_mc_sf_allow(const struct sock *sk, __be32 loc_addr, __be32 rmt_addr,
int dif, int sdif)
{
- struct inet_sock *inet = inet_sk(sk);
+ const struct inet_sock *inet = inet_sk(sk);
struct ip_mc_socklist *pmc;
struct ip_sf_socklist *psl;
int i;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 4e4e308c3230..cb04dbad9ea4 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -129,7 +129,8 @@ int ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
}
EXPORT_SYMBOL_GPL(ip_local_out);
-static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
+static inline int ip_select_ttl(const struct inet_sock *inet,
+ const struct dst_entry *dst)
{
int ttl = inet->uc_ttl;
@@ -146,7 +147,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk,
__be32 saddr, __be32 daddr, struct ip_options_rcu *opt,
u8 tos)
{
- struct inet_sock *inet = inet_sk(sk);
+ const struct inet_sock *inet = inet_sk(sk);
struct rtable *rt = skb_rtable(skb);
struct net *net = sock_net(sk);
struct iphdr *iph;
@@ -990,7 +991,7 @@ static int __ip_append_data(struct sock *sk,
mtu = cork->gso_size ? IP_MAX_MTU : cork->fragsize;
paged = !!cork->gso_size;
- if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
+ if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
tskey = atomic_inc_return(&sk->sk_tskey) - 1;
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index d8ef05347fd9..e28a99f1996b 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -1128,7 +1128,7 @@ static bool ipv6_good_nh(const struct fib6_nh *nh)
n = __ipv6_neigh_lookup_noref_stub(nh->fib_nh_dev, &nh->fib_nh_gw6);
if (n)
- state = n->nud_state;
+ state = READ_ONCE(n->nud_state);
rcu_read_unlock_bh();
@@ -1145,7 +1145,7 @@ static bool ipv4_good_nh(const struct fib_nh *nh)
n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev,
(__force u32)nh->fib_nh_gw4);
if (n)
- state = n->nud_state;
+ state = READ_ONCE(n->nud_state);
rcu_read_unlock_bh();
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 94df935ee0c5..3cf68695b40d 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -116,10 +116,10 @@ void raw_unhash_sk(struct sock *sk)
}
EXPORT_SYMBOL_GPL(raw_unhash_sk);
-bool raw_v4_match(struct net *net, struct sock *sk, unsigned short num,
+bool raw_v4_match(struct net *net, const struct sock *sk, unsigned short num,
__be32 raddr, __be32 laddr, int dif, int sdif)
{
- struct inet_sock *inet = inet_sk(sk);
+ const struct inet_sock *inet = inet_sk(sk);
if (net_eq(sock_net(sk), net) && inet->inet_num == num &&
!(inet->inet_daddr && inet->inet_daddr != raddr) &&
diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c
index 999321834b94..bca49a844f01 100644
--- a/net/ipv4/raw_diag.c
+++ b/net/ipv4/raw_diag.c
@@ -34,7 +34,7 @@ raw_get_hashinfo(const struct inet_diag_req_v2 *r)
* use helper to figure it out.
*/
-static bool raw_lookup(struct net *net, struct sock *sk,
+static bool raw_lookup(struct net *net, const struct sock *sk,
const struct inet_diag_req_v2 *req)
{
struct inet_diag_req_raw *r = (void *)req;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index de6e3515ab4f..232009d216c4 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -784,7 +784,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
if (!n)
n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev);
if (!IS_ERR(n)) {
- if (!(n->nud_state & NUD_VALID)) {
+ if (!(READ_ONCE(n->nud_state) & NUD_VALID)) {
neigh_event_send(n, NULL);
} else {
if (fib_lookup(net, fl4, &res, 0) == 0) {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 288693981b00..01569de651b6 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -589,7 +589,8 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
}
/* This barrier is coupled with smp_wmb() in tcp_reset() */
smp_rmb();
- if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
+ if (READ_ONCE(sk->sk_err) ||
+ !skb_queue_empty_lockless(&sk->sk_error_queue))
mask |= EPOLLERR;
return mask;
@@ -3094,7 +3095,7 @@ int tcp_disconnect(struct sock *sk, int flags)
if (old_state == TCP_LISTEN) {
inet_csk_listen_stop(sk);
} else if (unlikely(tp->repair)) {
- sk->sk_err = ECONNABORTED;
+ WRITE_ONCE(sk->sk_err, ECONNABORTED);
} else if (tcp_need_reset(old_state) ||
(tp->snd_nxt != tp->write_seq &&
(1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) {
@@ -3102,9 +3103,9 @@ int tcp_disconnect(struct sock *sk, int flags)
* states
*/
tcp_send_active_reset(sk, gfp_any());
- sk->sk_err = ECONNRESET;
+ WRITE_ONCE(sk->sk_err, ECONNRESET);
} else if (old_state == TCP_SYN_SENT)
- sk->sk_err = ECONNRESET;
+ WRITE_ONCE(sk->sk_err, ECONNRESET);
tcp_clear_xmit_timers(sk);
__skb_queue_purge(&sk->sk_receive_queue);
@@ -4692,7 +4693,7 @@ int tcp_abort(struct sock *sk, int err)
bh_lock_sock(sk);
if (!sock_flag(sk, SOCK_DEAD)) {
- sk->sk_err = err;
+ WRITE_ONCE(sk->sk_err, err);
/* This barrier is coupled with smp_rmb() in tcp_poll() */
smp_wmb();
sk_error_report(sk);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index cc072d2cfcd8..754ddbe0577f 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3874,7 +3874,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
/* We passed data and got it acked, remove any soft error
* log. Something worked...
*/
- sk->sk_err_soft = 0;
+ WRITE_ONCE(sk->sk_err_soft, 0);
icsk->icsk_probes_out = 0;
tp->rcv_tstamp = tcp_jiffies32;
if (!prior_packets)
@@ -4322,15 +4322,15 @@ void tcp_reset(struct sock *sk, struct sk_buff *skb)
/* We want the right error as BSD sees it (and indeed as we do). */
switch (sk->sk_state) {
case TCP_SYN_SENT:
- sk->sk_err = ECONNREFUSED;
+ WRITE_ONCE(sk->sk_err, ECONNREFUSED);
break;
case TCP_CLOSE_WAIT:
- sk->sk_err = EPIPE;
+ WRITE_ONCE(sk->sk_err, EPIPE);
break;
case TCP_CLOSE:
return;
default:
- sk->sk_err = ECONNRESET;
+ WRITE_ONCE(sk->sk_err, ECONNRESET);
}
/* This barrier is coupled with smp_rmb() in tcp_poll() */
smp_wmb();
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ea370afa70ed..89daa6b953ff 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -361,7 +361,7 @@ void tcp_v4_mtu_reduced(struct sock *sk)
* for the case, if this connection will not able to recover.
*/
if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
- sk->sk_err_soft = EMSGSIZE;
+ WRITE_ONCE(sk->sk_err_soft, EMSGSIZE);
mtu = dst_mtu(dst);
@@ -596,13 +596,13 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
ip_icmp_error(sk, skb, err, th->dest, info, (u8 *)th);
if (!sock_owned_by_user(sk)) {
- sk->sk_err = err;
+ WRITE_ONCE(sk->sk_err, err);
sk_error_report(sk);
tcp_done(sk);
} else {
- sk->sk_err_soft = err;
+ WRITE_ONCE(sk->sk_err_soft, err);
}
goto out;
}
@@ -625,10 +625,10 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
inet = inet_sk(sk);
if (!sock_owned_by_user(sk) && inet->recverr) {
- sk->sk_err = err;
+ WRITE_ONCE(sk->sk_err, err);
sk_error_report(sk);
} else { /* Only an error on timeout */
- sk->sk_err_soft = err;
+ WRITE_ONCE(sk->sk_err_soft, err);
}
out:
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index ba839e441450..b1e743b39a83 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3699,7 +3699,7 @@ static void tcp_connect_init(struct sock *sk)
tp->rx_opt.rcv_wscale = rcv_wscale;
tp->rcv_ssthresh = tp->rcv_wnd;
- sk->sk_err = 0;
+ WRITE_ONCE(sk->sk_err, 0);
sock_reset_flag(sk, SOCK_DONE);
tp->snd_wnd = 0;
tcp_init_wl(tp, 0);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index cb79127f45c3..b839c2f91292 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -67,7 +67,7 @@ u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when)
static void tcp_write_err(struct sock *sk)
{
- sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
+ WRITE_ONCE(sk->sk_err, READ_ONCE(sk->sk_err_soft) ? : ETIMEDOUT);
sk_error_report(sk);
tcp_write_queue_purge(sk);
@@ -110,7 +110,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset)
shift++;
/* If some dubious ICMP arrived, penalize even more. */
- if (sk->sk_err_soft)
+ if (READ_ONCE(sk->sk_err_soft))
shift++;
if (tcp_check_oom(sk, shift)) {
@@ -146,7 +146,7 @@ static int tcp_orphan_retries(struct sock *sk, bool alive)
int retries = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_orphan_retries); /* May be zero. */
/* We know from an ICMP that something is wrong. */
- if (sk->sk_err_soft && !alive)
+ if (READ_ONCE(sk->sk_err_soft) && !alive)
retries = 0;
/* However, if socket sent something recently, select some safe
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index c605d171eb2d..aa32afd871ee 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -578,12 +578,12 @@ struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
EXPORT_SYMBOL_GPL(udp4_lib_lookup);
#endif
-static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk,
+static inline bool __udp_is_mcast_sock(struct net *net, const struct sock *sk,
__be16 loc_port, __be32 loc_addr,
__be16 rmt_port, __be32 rmt_addr,
int dif, int sdif, unsigned short hnum)
{
- struct inet_sock *inet = inet_sk(sk);
+ const struct inet_sock *inet = inet_sk(sk);
if (!net_eq(sock_net(sk), net) ||
udp_sk(sk)->udp_port_hash != hnum ||
@@ -1531,10 +1531,21 @@ static void busylock_release(spinlock_t *busy)
spin_unlock(busy);
}
+static int udp_rmem_schedule(struct sock *sk, int size)
+{
+ int delta;
+
+ delta = size - sk->sk_forward_alloc;
+ if (delta > 0 && !__sk_mem_schedule(sk, delta, SK_MEM_RECV))
+ return -ENOBUFS;
+
+ return 0;
+}
+
int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
{
struct sk_buff_head *list = &sk->sk_receive_queue;
- int rmem, delta, amt, err = -ENOMEM;
+ int rmem, err = -ENOMEM;
spinlock_t *busy = NULL;
int size;
@@ -1567,16 +1578,10 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
goto uncharge_drop;
spin_lock(&list->lock);
- if (size >= sk->sk_forward_alloc) {
- amt = sk_mem_pages(size);
- delta = amt << PAGE_SHIFT;
- if (!__sk_mem_raise_allocated(sk, delta, amt, SK_MEM_RECV)) {
- err = -ENOBUFS;
- spin_unlock(&list->lock);
- goto uncharge_drop;
- }
-
- sk->sk_forward_alloc += delta;
+ err = udp_rmem_schedule(sk, size);
+ if (err) {
+ spin_unlock(&list->lock);
+ goto uncharge_drop;
}
sk->sk_forward_alloc -= size;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 38689bedfce7..e1b679a590c9 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -845,7 +845,7 @@ int inet6_sk_rebuild_header(struct sock *sk)
dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
if (IS_ERR(dst)) {
sk->sk_route_caps = 0;
- sk->sk_err_soft = -PTR_ERR(dst);
+ WRITE_ONCE(sk->sk_err_soft, -PTR_ERR(dst));
return PTR_ERR(dst);
}
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 5a9f4d722f35..0c50dcd35fe8 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -120,7 +120,7 @@ int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused
dst = inet6_csk_route_socket(sk, &fl6);
if (IS_ERR(dst)) {
- sk->sk_err_soft = -PTR_ERR(dst);
+ WRITE_ONCE(sk->sk_err_soft, -PTR_ERR(dst));
sk->sk_route_caps = 0;
kfree_skb(skb);
return PTR_ERR(dst);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index c314fdde0097..e5ed39a3c65f 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1153,7 +1153,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
rcu_read_lock_bh();
n = __ipv6_neigh_lookup_noref(rt->dst.dev,
rt6_nexthop(rt, &fl6->daddr));
- err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
+ err = n && !(READ_ONCE(n->nud_state) & NUD_VALID) ? -EINVAL : 0;
rcu_read_unlock_bh();
if (err) {
@@ -1500,7 +1500,7 @@ static int __ip6_append_data(struct sock *sk,
mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
orig_mtu = mtu;
- if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
+ if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
tskey = atomic_inc_return(&sk->sk_tskey) - 1;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 1c02160cf7a4..714cdc9e2b8e 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -627,12 +627,12 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
return 0;
}
-bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr,
+bool inet6_mc_check(const struct sock *sk, const struct in6_addr *mc_addr,
const struct in6_addr *src_addr)
{
- struct ipv6_pinfo *np = inet6_sk(sk);
- struct ipv6_mc_socklist *mc;
- struct ip6_sf_socklist *psl;
+ const struct ipv6_pinfo *np = inet6_sk(sk);
+ const struct ipv6_mc_socklist *mc;
+ const struct ip6_sf_socklist *psl;
bool rv = true;
rcu_read_lock();
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index c4be62c99f73..18634ebd20a4 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -745,7 +745,7 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
saddr = &ipv6_hdr(skb)->saddr;
probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES);
if (probes < 0) {
- if (!(neigh->nud_state & NUD_VALID)) {
+ if (!(READ_ONCE(neigh->nud_state) & NUD_VALID)) {
ND_PRINTK(1, dbg,
"%s: trying to ucast probe in NUD_INVALID: %pI6\n",
__func__, target);
@@ -1090,7 +1090,7 @@ static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb)
u8 old_flags = neigh->flags;
struct net *net = dev_net(dev);
- if (neigh->nud_state & NUD_FAILED)
+ if (READ_ONCE(neigh->nud_state) & NUD_FAILED)
goto out;
/*
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 808983bc2ec9..c4835dbdfcff 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -237,7 +237,7 @@ static int ping_v6_seq_show(struct seq_file *seq, void *v)
seq_puts(seq, IPV6_SEQ_DGRAM_HEADER);
} else {
int bucket = ((struct ping_iter_state *) seq->private)->bucket;
- struct inet_sock *inet = inet_sk(v);
+ struct inet_sock *inet = inet_sk((struct sock *)v);
__u16 srcp = ntohs(inet->inet_sport);
__u16 destp = ntohs(inet->inet_dport);
ip6_dgram_sock_seq_show(seq, v, srcp, destp, bucket);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index bac9ba747bde..6ac2f2690c44 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -64,7 +64,7 @@
struct raw_hashinfo raw_v6_hashinfo;
EXPORT_SYMBOL_GPL(raw_v6_hashinfo);
-bool raw_v6_match(struct net *net, struct sock *sk, unsigned short num,
+bool raw_v6_match(struct net *net, const struct sock *sk, unsigned short num,
const struct in6_addr *loc_addr,
const struct in6_addr *rmt_addr, int dif, int sdif)
{
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 0fdb03df2287..e829bd880384 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -638,7 +638,7 @@ static void rt6_probe(struct fib6_nh *fib6_nh)
idev = __in6_dev_get(dev);
neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
if (neigh) {
- if (neigh->nud_state & NUD_VALID)
+ if (READ_ONCE(neigh->nud_state) & NUD_VALID)
goto out;
write_lock(&neigh->lock);
@@ -687,16 +687,16 @@ static enum rt6_nud_state rt6_check_neigh(const struct fib6_nh *fib6_nh)
neigh = __ipv6_neigh_lookup_noref(fib6_nh->fib_nh_dev,
&fib6_nh->fib_nh_gw6);
if (neigh) {
- read_lock(&neigh->lock);
- if (neigh->nud_state & NUD_VALID)
+ u8 nud_state = READ_ONCE(neigh->nud_state);
+
+ if (nud_state & NUD_VALID)
ret = RT6_NUD_SUCCEED;
#ifdef CONFIG_IPV6_ROUTER_PREF
- else if (!(neigh->nud_state & NUD_FAILED))
+ else if (!(nud_state & NUD_FAILED))
ret = RT6_NUD_SUCCEED;
else
ret = RT6_NUD_FAIL_PROBE;
#endif
- read_unlock(&neigh->lock);
} else {
ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 1bf93b61aa06..35cf523c9efd 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -493,12 +493,13 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
if (!sock_owned_by_user(sk)) {
- sk->sk_err = err;
+ WRITE_ONCE(sk->sk_err, err);
sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
tcp_done(sk);
- } else
- sk->sk_err_soft = err;
+ } else {
+ WRITE_ONCE(sk->sk_err_soft, err);
+ }
goto out;
case TCP_LISTEN:
break;
@@ -512,11 +513,11 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
}
if (!sock_owned_by_user(sk) && np->recverr) {
- sk->sk_err = err;
+ WRITE_ONCE(sk->sk_err, err);
sk_error_report(sk);
- } else
- sk->sk_err_soft = err;
-
+ } else {
+ WRITE_ONCE(sk->sk_err_soft, err);
+ }
out:
bh_unlock_sock(sk);
sock_put(sk);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 9fb2f33ee3a7..d350e57c4792 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -805,12 +805,12 @@ static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
return 0;
}
-static bool __udp_v6_is_mcast_sock(struct net *net, struct sock *sk,
+static bool __udp_v6_is_mcast_sock(struct net *net, const struct sock *sk,
__be16 loc_port, const struct in6_addr *loc_addr,
__be16 rmt_port, const struct in6_addr *rmt_addr,
int dif, int sdif, unsigned short hnum)
{
- struct inet_sock *inet = inet_sk(sk);
+ const struct inet_sock *inet = inet_sk(sk);
if (!net_eq(sock_net(sk), net))
return false;
@@ -1708,7 +1708,7 @@ int udp6_seq_show(struct seq_file *seq, void *v)
seq_puts(seq, IPV6_SEQ_DGRAM_HEADER);
} else {
int bucket = ((struct udp_iter_state *)seq->private)->bucket;
- struct inet_sock *inet = inet_sk(v);
+ const struct inet_sock *inet = inet_sk((const struct sock *)v);
__u16 srcp = ntohs(inet->inet_sport);
__u16 destp = ntohs(inet->inet_dport);
__ip6_dgram_sock_seq_show(seq, v, srcp, destp,
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index f9514bacbd4a..3b651e7f5a73 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -554,6 +554,23 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
ieee80211_send_addba_with_timeout(sta, tid_tx);
}
+void ieee80211_refresh_tx_agg_session_timer(struct ieee80211_sta *pubsta,
+ u16 tid)
+{
+ struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
+ struct tid_ampdu_tx *tid_tx;
+
+ if (WARN_ON_ONCE(tid >= IEEE80211_NUM_TIDS))
+ return;
+
+ tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[tid]);
+ if (!tid_tx)
+ return;
+
+ tid_tx->last_tx = jiffies;
+}
+EXPORT_SYMBOL(ieee80211_refresh_tx_agg_session_timer);
+
/*
* After accepting the AddBA Response we activated a timer,
* resetting it after each frame that we send.
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index d3d861911ed6..bccc9627dca5 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1252,7 +1252,15 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
prev_beacon_int = link_conf->beacon_int;
link_conf->beacon_int = params->beacon_interval;
+ if (params->ht_cap)
+ link_conf->ht_ldpc =
+ params->ht_cap->cap_info &
+ cpu_to_le16(IEEE80211_HT_CAP_LDPC_CODING);
+
if (params->vht_cap) {
+ link_conf->vht_ldpc =
+ params->vht_cap->vht_cap_info &
+ cpu_to_le32(IEEE80211_VHT_CAP_RXLDPC);
link_conf->vht_su_beamformer =
params->vht_cap->vht_cap_info &
cpu_to_le32(IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE);
@@ -1282,6 +1290,9 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
}
if (params->he_cap) {
+ link_conf->he_ldpc =
+ params->he_cap->phy_cap_info[1] &
+ IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD;
link_conf->he_su_beamformer =
params->he_cap->phy_cap_info[3] &
IEEE80211_HE_PHY_CAP3_SU_BEAMFORMER;
@@ -1299,6 +1310,22 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
if (params->eht_cap) {
link_conf->eht_puncturing = params->punct_bitmap;
changed |= BSS_CHANGED_EHT_PUNCTURING;
+
+ link_conf->eht_su_beamformer =
+ params->eht_cap->fixed.phy_cap_info[0] &
+ IEEE80211_EHT_PHY_CAP0_SU_BEAMFORMER;
+ link_conf->eht_su_beamformee =
+ params->eht_cap->fixed.phy_cap_info[0] &
+ IEEE80211_EHT_PHY_CAP0_SU_BEAMFORMEE;
+ link_conf->eht_mu_beamformer =
+ params->eht_cap->fixed.phy_cap_info[7] &
+ (IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_80MHZ |
+ IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_160MHZ |
+ IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_320MHZ);
+ } else {
+ link_conf->eht_su_beamformer = false;
+ link_conf->eht_su_beamformee = false;
+ link_conf->eht_mu_beamformer = false;
}
if (sdata->vif.type == NL80211_IFTYPE_AP &&
@@ -1788,7 +1815,7 @@ static int sta_link_apply_parameters(struct ieee80211_local *local,
(void *)params->he_6ghz_capa,
link_sta);
- if (params->eht_capa)
+ if (params->he_capa && params->eht_capa)
ieee80211_eht_cap_ie_to_sta_eht_cap(sdata, sband,
(u8 *)params->he_capa,
params->he_capa_len,
@@ -4905,6 +4932,22 @@ ieee80211_del_link_station(struct wiphy *wiphy, struct net_device *dev,
return ret;
}
+static int ieee80211_set_hw_timestamp(struct wiphy *wiphy,
+ struct net_device *dev,
+ struct cfg80211_set_hw_timestamp *hwts)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_local *local = sdata->local;
+
+ if (!local->ops->set_hw_timestamp)
+ return -EOPNOTSUPP;
+
+ if (!check_sdata_in_driver(sdata))
+ return -EIO;
+
+ return local->ops->set_hw_timestamp(&local->hw, &sdata->vif, hwts);
+}
+
const struct cfg80211_ops mac80211_config_ops = {
.add_virtual_intf = ieee80211_add_iface,
.del_virtual_intf = ieee80211_del_iface,
@@ -5015,4 +5058,5 @@ const struct cfg80211_ops mac80211_config_ops = {
.add_link_station = ieee80211_add_link_station,
.mod_link_station = ieee80211_mod_link_station,
.del_link_station = ieee80211_del_link_station,
+ .set_hw_timestamp = ieee80211_set_hw_timestamp,
};
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 0bac9af3ca96..b0cef37eb394 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -23,16 +23,16 @@
#include "driver-ops.h"
static ssize_t ieee80211_if_read(
- struct ieee80211_sub_if_data *sdata,
+ void *data,
char __user *userbuf,
size_t count, loff_t *ppos,
- ssize_t (*format)(const struct ieee80211_sub_if_data *, char *, int))
+ ssize_t (*format)(const void *, char *, int))
{
char buf[200];
ssize_t ret = -EINVAL;
read_lock(&dev_base_lock);
- ret = (*format)(sdata, buf, sizeof(buf));
+ ret = (*format)(data, buf, sizeof(buf));
read_unlock(&dev_base_lock);
if (ret >= 0)
@@ -42,10 +42,10 @@ static ssize_t ieee80211_if_read(
}
static ssize_t ieee80211_if_write(
- struct ieee80211_sub_if_data *sdata,
+ void *data,
const char __user *userbuf,
size_t count, loff_t *ppos,
- ssize_t (*write)(struct ieee80211_sub_if_data *, const char *, int))
+ ssize_t (*write)(void *, const char *, int))
{
char buf[64];
ssize_t ret;
@@ -58,64 +58,64 @@ static ssize_t ieee80211_if_write(
buf[count] = '\0';
rtnl_lock();
- ret = (*write)(sdata, buf, count);
+ ret = (*write)(data, buf, count);
rtnl_unlock();
return ret;
}
-#define IEEE80211_IF_FMT(name, field, format_string) \
+#define IEEE80211_IF_FMT(name, type, field, format_string) \
static ssize_t ieee80211_if_fmt_##name( \
- const struct ieee80211_sub_if_data *sdata, char *buf, \
+ const type *data, char *buf, \
int buflen) \
{ \
- return scnprintf(buf, buflen, format_string, sdata->field); \
+ return scnprintf(buf, buflen, format_string, data->field); \
}
-#define IEEE80211_IF_FMT_DEC(name, field) \
- IEEE80211_IF_FMT(name, field, "%d\n")
-#define IEEE80211_IF_FMT_HEX(name, field) \
- IEEE80211_IF_FMT(name, field, "%#x\n")
-#define IEEE80211_IF_FMT_LHEX(name, field) \
- IEEE80211_IF_FMT(name, field, "%#lx\n")
+#define IEEE80211_IF_FMT_DEC(name, type, field) \
+ IEEE80211_IF_FMT(name, type, field, "%d\n")
+#define IEEE80211_IF_FMT_HEX(name, type, field) \
+ IEEE80211_IF_FMT(name, type, field, "%#x\n")
+#define IEEE80211_IF_FMT_LHEX(name, type, field) \
+ IEEE80211_IF_FMT(name, type, field, "%#lx\n")
-#define IEEE80211_IF_FMT_HEXARRAY(name, field) \
+#define IEEE80211_IF_FMT_HEXARRAY(name, type, field) \
static ssize_t ieee80211_if_fmt_##name( \
- const struct ieee80211_sub_if_data *sdata, \
+ const type *data, \
char *buf, int buflen) \
{ \
char *p = buf; \
int i; \
- for (i = 0; i < sizeof(sdata->field); i++) { \
+ for (i = 0; i < sizeof(data->field); i++) { \
p += scnprintf(p, buflen + buf - p, "%.2x ", \
- sdata->field[i]); \
+ data->field[i]); \
} \
p += scnprintf(p, buflen + buf - p, "\n"); \
return p - buf; \
}
-#define IEEE80211_IF_FMT_ATOMIC(name, field) \
+#define IEEE80211_IF_FMT_ATOMIC(name, type, field) \
static ssize_t ieee80211_if_fmt_##name( \
- const struct ieee80211_sub_if_data *sdata, \
+ const type *data, \
char *buf, int buflen) \
{ \
- return scnprintf(buf, buflen, "%d\n", atomic_read(&sdata->field));\
+ return scnprintf(buf, buflen, "%d\n", atomic_read(&data->field));\
}
-#define IEEE80211_IF_FMT_MAC(name, field) \
+#define IEEE80211_IF_FMT_MAC(name, type, field) \
static ssize_t ieee80211_if_fmt_##name( \
- const struct ieee80211_sub_if_data *sdata, char *buf, \
+ const type *data, char *buf, \
int buflen) \
{ \
- return scnprintf(buf, buflen, "%pM\n", sdata->field); \
+ return scnprintf(buf, buflen, "%pM\n", data->field); \
}
-#define IEEE80211_IF_FMT_JIFFIES_TO_MS(name, field) \
+#define IEEE80211_IF_FMT_JIFFIES_TO_MS(name, type, field) \
static ssize_t ieee80211_if_fmt_##name( \
- const struct ieee80211_sub_if_data *sdata, \
+ const type *data, \
char *buf, int buflen) \
{ \
return scnprintf(buf, buflen, "%d\n", \
- jiffies_to_msecs(sdata->field)); \
+ jiffies_to_msecs(data->field)); \
}
#define _IEEE80211_IF_FILE_OPS(name, _read, _write) \
@@ -126,43 +126,67 @@ static const struct file_operations name##_ops = { \
.llseek = generic_file_llseek, \
}
-#define _IEEE80211_IF_FILE_R_FN(name) \
+#define _IEEE80211_IF_FILE_R_FN(name, type) \
static ssize_t ieee80211_if_read_##name(struct file *file, \
char __user *userbuf, \
size_t count, loff_t *ppos) \
{ \
+ ssize_t (*fn)(const void *, char *, int) = (void *) \
+ ((ssize_t (*)(const type, char *, int)) \
+ ieee80211_if_fmt_##name); \
return ieee80211_if_read(file->private_data, \
- userbuf, count, ppos, \
- ieee80211_if_fmt_##name); \
+ userbuf, count, ppos, fn); \
}
-#define _IEEE80211_IF_FILE_W_FN(name) \
+#define _IEEE80211_IF_FILE_W_FN(name, type) \
static ssize_t ieee80211_if_write_##name(struct file *file, \
const char __user *userbuf, \
size_t count, loff_t *ppos) \
{ \
+ ssize_t (*fn)(void *, const char *, int) = (void *) \
+ ((ssize_t (*)(type, const char *, int)) \
+ ieee80211_if_parse_##name); \
return ieee80211_if_write(file->private_data, userbuf, count, \
- ppos, ieee80211_if_parse_##name); \
+ ppos, fn); \
}
#define IEEE80211_IF_FILE_R(name) \
- _IEEE80211_IF_FILE_R_FN(name) \
+ _IEEE80211_IF_FILE_R_FN(name, struct ieee80211_sub_if_data *) \
_IEEE80211_IF_FILE_OPS(name, ieee80211_if_read_##name, NULL)
#define IEEE80211_IF_FILE_W(name) \
- _IEEE80211_IF_FILE_W_FN(name) \
+ _IEEE80211_IF_FILE_W_FN(name, struct ieee80211_sub_if_data *) \
_IEEE80211_IF_FILE_OPS(name, NULL, ieee80211_if_write_##name)
#define IEEE80211_IF_FILE_RW(name) \
- _IEEE80211_IF_FILE_R_FN(name) \
- _IEEE80211_IF_FILE_W_FN(name) \
+ _IEEE80211_IF_FILE_R_FN(name, struct ieee80211_sub_if_data *) \
+ _IEEE80211_IF_FILE_W_FN(name, struct ieee80211_sub_if_data *) \
_IEEE80211_IF_FILE_OPS(name, ieee80211_if_read_##name, \
ieee80211_if_write_##name)
#define IEEE80211_IF_FILE(name, field, format) \
- IEEE80211_IF_FMT_##format(name, field) \
+ IEEE80211_IF_FMT_##format(name, struct ieee80211_sub_if_data, field) \
IEEE80211_IF_FILE_R(name)
+/* Same but with a link_ prefix in the ops variable name and different type */
+#define IEEE80211_IF_LINK_FILE_R(name) \
+ _IEEE80211_IF_FILE_R_FN(name, struct ieee80211_link_data *) \
+ _IEEE80211_IF_FILE_OPS(link_##name, ieee80211_if_read_##name, NULL)
+
+#define IEEE80211_IF_LINK_FILE_W(name) \
+ _IEEE80211_IF_FILE_W_FN(name) \
+ _IEEE80211_IF_FILE_OPS(link_##name, NULL, ieee80211_if_write_##name)
+
+#define IEEE80211_IF_LINK_FILE_RW(name) \
+ _IEEE80211_IF_FILE_R_FN(name, struct ieee80211_link_data *) \
+ _IEEE80211_IF_FILE_W_FN(name, struct ieee80211_link_data *) \
+ _IEEE80211_IF_FILE_OPS(link_##name, ieee80211_if_read_##name, \
+ ieee80211_if_write_##name)
+
+#define IEEE80211_IF_LINK_FILE(name, field, format) \
+ IEEE80211_IF_FMT_##format(name, struct ieee80211_link_data, field) \
+ IEEE80211_IF_LINK_FILE_R(name)
+
/* common attributes */
IEEE80211_IF_FILE(rc_rateidx_mask_2ghz, rc_rateidx_mask[NL80211_BAND_2GHZ],
HEX);
@@ -207,9 +231,9 @@ IEEE80211_IF_FILE_R(rc_rateidx_vht_mcs_mask_5ghz);
IEEE80211_IF_FILE(flags, flags, HEX);
IEEE80211_IF_FILE(state, state, LHEX);
-IEEE80211_IF_FILE(txpower, vif.bss_conf.txpower, DEC);
-IEEE80211_IF_FILE(ap_power_level, deflink.ap_power_level, DEC);
-IEEE80211_IF_FILE(user_power_level, deflink.user_power_level, DEC);
+IEEE80211_IF_LINK_FILE(txpower, conf->txpower, DEC);
+IEEE80211_IF_LINK_FILE(ap_power_level, ap_power_level, DEC);
+IEEE80211_IF_LINK_FILE(user_power_level, user_power_level, DEC);
static ssize_t
ieee80211_if_fmt_hw_queues(const struct ieee80211_sub_if_data *sdata,
@@ -236,9 +260,10 @@ IEEE80211_IF_FILE(bssid, deflink.u.mgd.bssid, MAC);
IEEE80211_IF_FILE(aid, vif.cfg.aid, DEC);
IEEE80211_IF_FILE(beacon_timeout, u.mgd.beacon_timeout, JIFFIES_TO_MS);
-static int ieee80211_set_smps(struct ieee80211_sub_if_data *sdata,
+static int ieee80211_set_smps(struct ieee80211_link_data *link,
enum ieee80211_smps_mode smps_mode)
{
+ struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_local *local = sdata->local;
int err;
@@ -256,7 +281,7 @@ static int ieee80211_set_smps(struct ieee80211_sub_if_data *sdata,
return -EOPNOTSUPP;
sdata_lock(sdata);
- err = __ieee80211_request_smps_mgd(sdata, &sdata->deflink, smps_mode);
+ err = __ieee80211_request_smps_mgd(link->sdata, link, smps_mode);
sdata_unlock(sdata);
return err;
@@ -269,24 +294,24 @@ static const char *smps_modes[IEEE80211_SMPS_NUM_MODES] = {
[IEEE80211_SMPS_DYNAMIC] = "dynamic",
};
-static ssize_t ieee80211_if_fmt_smps(const struct ieee80211_sub_if_data *sdata,
+static ssize_t ieee80211_if_fmt_smps(const struct ieee80211_link_data *link,
char *buf, int buflen)
{
- if (sdata->vif.type == NL80211_IFTYPE_STATION)
+ if (link->sdata->vif.type == NL80211_IFTYPE_STATION)
return snprintf(buf, buflen, "request: %s\nused: %s\n",
- smps_modes[sdata->deflink.u.mgd.req_smps],
- smps_modes[sdata->deflink.smps_mode]);
+ smps_modes[link->u.mgd.req_smps],
+ smps_modes[link->smps_mode]);
return -EINVAL;
}
-static ssize_t ieee80211_if_parse_smps(struct ieee80211_sub_if_data *sdata,
+static ssize_t ieee80211_if_parse_smps(struct ieee80211_link_data *link,
const char *buf, int buflen)
{
enum ieee80211_smps_mode mode;
for (mode = 0; mode < IEEE80211_SMPS_NUM_MODES; mode++) {
if (strncmp(buf, smps_modes[mode], buflen) == 0) {
- int err = ieee80211_set_smps(sdata, mode);
+ int err = ieee80211_set_smps(link, mode);
if (!err)
return buflen;
return err;
@@ -295,7 +320,7 @@ static ssize_t ieee80211_if_parse_smps(struct ieee80211_sub_if_data *sdata,
return -EINVAL;
}
-IEEE80211_IF_FILE_RW(smps);
+IEEE80211_IF_LINK_FILE_RW(smps);
static ssize_t ieee80211_if_parse_tkip_mic_test(
struct ieee80211_sub_if_data *sdata, const char *buf, int buflen)
@@ -595,6 +620,8 @@ static ssize_t ieee80211_if_parse_active_links(struct ieee80211_sub_if_data *sda
}
IEEE80211_IF_FILE_RW(active_links);
+IEEE80211_IF_LINK_FILE(addr, conf->addr, MAC);
+
#ifdef CONFIG_MAC80211_MESH
IEEE80211_IF_FILE(estab_plinks, u.mesh.estab_plinks, ATOMIC);
@@ -685,7 +712,6 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata)
DEBUGFS_ADD(bssid);
DEBUGFS_ADD(aid);
DEBUGFS_ADD(beacon_timeout);
- DEBUGFS_ADD_MODE(smps, 0600);
DEBUGFS_ADD_MODE(tkip_mic_test, 0200);
DEBUGFS_ADD_MODE(beacon_loss, 0200);
DEBUGFS_ADD_MODE(uapsd_queues, 0600);
@@ -698,7 +724,6 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata)
static void add_ap_files(struct ieee80211_sub_if_data *sdata)
{
DEBUGFS_ADD(num_mcast_sta);
- DEBUGFS_ADD_MODE(smps, 0600);
DEBUGFS_ADD(num_sta_ps);
DEBUGFS_ADD(dtim_count);
DEBUGFS_ADD(num_buffered_multicast);
@@ -789,9 +814,6 @@ static void add_files(struct ieee80211_sub_if_data *sdata)
DEBUGFS_ADD(flags);
DEBUGFS_ADD(state);
- DEBUGFS_ADD(txpower);
- DEBUGFS_ADD(user_power_level);
- DEBUGFS_ADD(ap_power_level);
if (sdata->vif.type != NL80211_IFTYPE_MONITOR)
add_common_files(sdata);
@@ -821,6 +843,31 @@ static void add_files(struct ieee80211_sub_if_data *sdata)
}
}
+#undef DEBUGFS_ADD_MODE
+#undef DEBUGFS_ADD
+
+#define DEBUGFS_ADD_MODE(dentry, name, mode) \
+ debugfs_create_file(#name, mode, dentry, \
+ link, &link_##name##_ops)
+
+#define DEBUGFS_ADD(dentry, name) DEBUGFS_ADD_MODE(dentry, name, 0400)
+
+static void add_link_files(struct ieee80211_link_data *link,
+ struct dentry *dentry)
+{
+ DEBUGFS_ADD(dentry, txpower);
+ DEBUGFS_ADD(dentry, user_power_level);
+ DEBUGFS_ADD(dentry, ap_power_level);
+
+ switch (link->sdata->vif.type) {
+ case NL80211_IFTYPE_STATION:
+ DEBUGFS_ADD_MODE(dentry, smps, 0600);
+ break;
+ default:
+ break;
+ }
+}
+
void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata)
{
char buf[10+IFNAMSIZ];
@@ -831,6 +878,9 @@ void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata)
sdata->debugfs.subdir_stations = debugfs_create_dir("stations",
sdata->vif.debugfs_dir);
add_files(sdata);
+
+ if (!(sdata->local->hw.wiphy->flags & WIPHY_FLAG_SUPPORTS_MLO))
+ add_link_files(&sdata->deflink, sdata->vif.debugfs_dir);
}
void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata)
@@ -856,3 +906,66 @@ void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata)
sprintf(buf, "netdev:%s", sdata->name);
debugfs_rename(dir->d_parent, dir, dir->d_parent, buf);
}
+
+void ieee80211_link_debugfs_add(struct ieee80211_link_data *link)
+{
+ char link_dir_name[10];
+
+ if (WARN_ON(!link->sdata->vif.debugfs_dir))
+ return;
+
+ /* For now, this should not be called for non-MLO capable drivers */
+ if (WARN_ON(!(link->sdata->local->hw.wiphy->flags & WIPHY_FLAG_SUPPORTS_MLO)))
+ return;
+
+ snprintf(link_dir_name, sizeof(link_dir_name),
+ "link-%d", link->link_id);
+
+ link->debugfs_dir =
+ debugfs_create_dir(link_dir_name,
+ link->sdata->vif.debugfs_dir);
+
+ DEBUGFS_ADD(link->debugfs_dir, addr);
+ add_link_files(link, link->debugfs_dir);
+}
+
+void ieee80211_link_debugfs_remove(struct ieee80211_link_data *link)
+{
+ if (!link->sdata->vif.debugfs_dir || !link->debugfs_dir) {
+ link->debugfs_dir = NULL;
+ return;
+ }
+
+ if (link->debugfs_dir == link->sdata->vif.debugfs_dir) {
+ WARN_ON(link != &link->sdata->deflink);
+ link->debugfs_dir = NULL;
+ return;
+ }
+
+ debugfs_remove_recursive(link->debugfs_dir);
+ link->debugfs_dir = NULL;
+}
+
+void ieee80211_link_debugfs_drv_add(struct ieee80211_link_data *link)
+{
+ if (WARN_ON(!link->debugfs_dir))
+ return;
+
+ drv_link_add_debugfs(link->sdata->local, link->sdata,
+ link->conf, link->debugfs_dir);
+}
+
+void ieee80211_link_debugfs_drv_remove(struct ieee80211_link_data *link)
+{
+ if (!link || !link->debugfs_dir)
+ return;
+
+ if (WARN_ON(link->debugfs_dir == link->sdata->vif.debugfs_dir))
+ return;
+
+ /* Recreate the directory excluding the driver data */
+ debugfs_remove_recursive(link->debugfs_dir);
+ link->debugfs_dir = NULL;
+
+ ieee80211_link_debugfs_add(link);
+}
diff --git a/net/mac80211/debugfs_netdev.h b/net/mac80211/debugfs_netdev.h
index a7e9d8d518f9..99e688dcabd6 100644
--- a/net/mac80211/debugfs_netdev.h
+++ b/net/mac80211/debugfs_netdev.h
@@ -10,6 +10,12 @@
void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata);
void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata);
void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata);
+
+void ieee80211_link_debugfs_add(struct ieee80211_link_data *link);
+void ieee80211_link_debugfs_remove(struct ieee80211_link_data *link);
+
+void ieee80211_link_debugfs_drv_add(struct ieee80211_link_data *link);
+void ieee80211_link_debugfs_drv_remove(struct ieee80211_link_data *link);
#else
static inline void ieee80211_debugfs_add_netdev(
struct ieee80211_sub_if_data *sdata)
@@ -20,6 +26,16 @@ static inline void ieee80211_debugfs_remove_netdev(
static inline void ieee80211_debugfs_rename_netdev(
struct ieee80211_sub_if_data *sdata)
{}
+
+static inline void ieee80211_link_debugfs_add(struct ieee80211_link_data *link)
+{}
+static inline void ieee80211_link_debugfs_remove(struct ieee80211_link_data *link)
+{}
+
+static inline void ieee80211_link_debugfs_drv_add(struct ieee80211_link_data *link)
+{}
+static inline void ieee80211_link_debugfs_drv_remove(struct ieee80211_link_data *link)
+{}
#endif
#endif /* __IEEE80211_DEBUGFS_NETDEV_H */
diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c
index cfb09e4aed4d..30cd0c905a24 100644
--- a/net/mac80211/driver-ops.c
+++ b/net/mac80211/driver-ops.c
@@ -8,6 +8,7 @@
#include "trace.h"
#include "driver-ops.h"
#include "debugfs_sta.h"
+#include "debugfs_netdev.h"
int drv_start(struct ieee80211_local *local)
{
@@ -477,6 +478,10 @@ int drv_change_vif_links(struct ieee80211_local *local,
u16 old_links, u16 new_links,
struct ieee80211_bss_conf *old[IEEE80211_MLD_MAX_NUM_LINKS])
{
+ struct ieee80211_link_data *link;
+ unsigned long links_to_add;
+ unsigned long links_to_rem;
+ unsigned int link_id;
int ret = -EOPNOTSUPP;
might_sleep();
@@ -487,13 +492,31 @@ int drv_change_vif_links(struct ieee80211_local *local,
if (old_links == new_links)
return 0;
+ links_to_add = ~old_links & new_links;
+ links_to_rem = old_links & ~new_links;
+
+ for_each_set_bit(link_id, &links_to_rem, IEEE80211_MLD_MAX_NUM_LINKS) {
+ link = rcu_access_pointer(sdata->link[link_id]);
+
+ ieee80211_link_debugfs_drv_remove(link);
+ }
+
trace_drv_change_vif_links(local, sdata, old_links, new_links);
if (local->ops->change_vif_links)
ret = local->ops->change_vif_links(&local->hw, &sdata->vif,
old_links, new_links, old);
trace_drv_return_int(local, ret);
- return ret;
+ if (ret)
+ return ret;
+
+ for_each_set_bit(link_id, &links_to_add, IEEE80211_MLD_MAX_NUM_LINKS) {
+ link = rcu_access_pointer(sdata->link[link_id]);
+
+ ieee80211_link_debugfs_drv_add(link);
+ }
+
+ return 0;
}
int drv_change_sta_links(struct ieee80211_local *local,
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 5d13a3dfd366..a68d606e6987 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -465,6 +465,22 @@ static inline void drv_sta_remove(struct ieee80211_local *local,
}
#ifdef CONFIG_MAC80211_DEBUGFS
+static inline void drv_link_add_debugfs(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_bss_conf *link_conf,
+ struct dentry *dir)
+{
+ might_sleep();
+
+ sdata = get_bss_sdata(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return;
+
+ if (local->ops->link_add_debugfs)
+ local->ops->link_add_debugfs(&local->hw, &sdata->vif,
+ link_conf, dir);
+}
+
static inline void drv_sta_add_debugfs(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
struct ieee80211_sta *sta,
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index ecc232eb1ee8..3d4edc25a69e 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -999,6 +999,10 @@ struct ieee80211_link_data {
struct ieee80211_tx_queue_params tx_conf[IEEE80211_NUM_ACS];
struct ieee80211_bss_conf *conf;
+
+#ifdef CONFIG_MAC80211_DEBUGFS
+ struct dentry *debugfs_dir;
+#endif
};
struct ieee80211_sub_if_data {
diff --git a/net/mac80211/link.c b/net/mac80211/link.c
index 8c8869cc1fb4..e82db88a47f8 100644
--- a/net/mac80211/link.c
+++ b/net/mac80211/link.c
@@ -10,6 +10,7 @@
#include "ieee80211_i.h"
#include "driver-ops.h"
#include "key.h"
+#include "debugfs_netdev.h"
void ieee80211_link_setup(struct ieee80211_link_data *link)
{
@@ -34,6 +35,7 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata,
link->link_id = link_id;
link->conf = link_conf;
link_conf->link_id = link_id;
+ link_conf->vif = &sdata->vif;
INIT_WORK(&link->csa_finalize_work,
ieee80211_csa_finalize_work);
@@ -60,6 +62,8 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata,
default:
WARN_ON(1);
}
+
+ ieee80211_link_debugfs_add(link);
}
}
@@ -93,6 +97,7 @@ static void ieee80211_tear_down_links(struct ieee80211_sub_if_data *sdata,
if (WARN_ON(!link))
continue;
ieee80211_remove_link_keys(link, &keys);
+ ieee80211_link_debugfs_remove(link);
ieee80211_link_stop(link);
}
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 60792dfabc9d..e13a0354c397 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2744,7 +2744,7 @@ static u32 ieee80211_handle_bss_capability(struct ieee80211_link_data *link,
return changed;
}
-static u32 ieee80211_link_set_associated(struct ieee80211_link_data *link,
+static u64 ieee80211_link_set_associated(struct ieee80211_link_data *link,
struct cfg80211_bss *cbss)
{
struct ieee80211_sub_if_data *sdata = link->sdata;
@@ -3227,7 +3227,7 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata,
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
bool already = false;
- if (WARN_ON(sdata->vif.valid_links))
+ if (WARN_ON_ONCE(sdata->vif.valid_links))
return;
if (!ieee80211_sdata_running(sdata))
@@ -5893,7 +5893,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
goto free;
}
- if (sta && elems->opmode_notif)
+ if (elems->opmode_notif)
ieee80211_vht_handle_opmode(sdata, link_sta,
*elems->opmode_notif,
rx_status->band);
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index f7fdfe710951..0255c5745e1c 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -43,6 +43,7 @@ static struct sk_buff *ieee80211_clean_skb(struct sk_buff *skb,
unsigned int present_fcs_len,
unsigned int rtap_space)
{
+ struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
struct ieee80211_hdr *hdr;
unsigned int hdrlen;
__le16 fc;
@@ -51,6 +52,14 @@ static struct sk_buff *ieee80211_clean_skb(struct sk_buff *skb,
__pskb_trim(skb, skb->len - present_fcs_len);
pskb_pull(skb, rtap_space);
+ /* After pulling radiotap header, clear all flags that indicate
+ * info in skb->data.
+ */
+ status->flag &= ~(RX_FLAG_RADIOTAP_TLV_AT_END |
+ RX_FLAG_RADIOTAP_LSIG |
+ RX_FLAG_RADIOTAP_HE_MU |
+ RX_FLAG_RADIOTAP_HE);
+
hdr = (void *)skb->data;
fc = hdr->frame_control;
@@ -117,9 +126,6 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local,
/* allocate extra bitmaps */
if (status->chains)
len += 4 * hweight8(status->chains);
- /* vendor presence bitmap */
- if (status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA)
- len += 4;
if (ieee80211_have_rx_timestamp(status)) {
len = ALIGN(len, 8);
@@ -181,34 +187,28 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local,
len += 2 * hweight8(status->chains);
}
- if (status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA) {
- struct ieee80211_vendor_radiotap *rtap;
- int vendor_data_offset = 0;
+ if (status->flag & RX_FLAG_RADIOTAP_TLV_AT_END) {
+ int tlv_offset = 0;
/*
* The position to look at depends on the existence (or non-
* existence) of other elements, so take that into account...
*/
if (status->flag & RX_FLAG_RADIOTAP_HE)
- vendor_data_offset +=
+ tlv_offset +=
sizeof(struct ieee80211_radiotap_he);
if (status->flag & RX_FLAG_RADIOTAP_HE_MU)
- vendor_data_offset +=
+ tlv_offset +=
sizeof(struct ieee80211_radiotap_he_mu);
if (status->flag & RX_FLAG_RADIOTAP_LSIG)
- vendor_data_offset +=
+ tlv_offset +=
sizeof(struct ieee80211_radiotap_lsig);
- rtap = (void *)&skb->data[vendor_data_offset];
+ /* ensure 4 byte alignment for TLV */
+ len = ALIGN(len, 4);
- /* alignment for fixed 6-byte vendor data header */
- len = ALIGN(len, 2);
- /* vendor data header */
- len += 6;
- if (WARN_ON(rtap->align == 0))
- rtap->align = 1;
- len = ALIGN(len, rtap->align);
- len += rtap->len + rtap->pad;
+ /* TLVs until the mac header */
+ len += skb_mac_header(skb) - &skb->data[tlv_offset];
}
return len;
@@ -304,9 +304,9 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
u32 it_present_val;
u16 rx_flags = 0;
u16 channel_flags = 0;
+ u32 tlvs_len = 0;
int mpdulen, chain;
unsigned long chains = status->chains;
- struct ieee80211_vendor_radiotap rtap = {};
struct ieee80211_radiotap_he he = {};
struct ieee80211_radiotap_he_mu he_mu = {};
struct ieee80211_radiotap_lsig lsig = {};
@@ -327,18 +327,17 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
skb_pull(skb, sizeof(lsig));
}
- if (status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA) {
- rtap = *(struct ieee80211_vendor_radiotap *)skb->data;
- /* rtap.len and rtap.pad are undone immediately */
- skb_pull(skb, sizeof(rtap) + rtap.len + rtap.pad);
+ if (status->flag & RX_FLAG_RADIOTAP_TLV_AT_END) {
+ /* data is pointer at tlv all other info was pulled off */
+ tlvs_len = skb_mac_header(skb) - skb->data;
}
mpdulen = skb->len;
if (!(has_fcs && ieee80211_hw_check(&local->hw, RX_INCLUDES_FCS)))
mpdulen += FCS_LEN;
- rthdr = skb_push(skb, rtap_len);
- memset(rthdr, 0, rtap_len - rtap.len - rtap.pad);
+ rthdr = skb_push(skb, rtap_len - tlvs_len);
+ memset(rthdr, 0, rtap_len - tlvs_len);
it_present = &rthdr->it_present;
/* radiotap header, set always present flags */
@@ -360,13 +359,8 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
BIT(IEEE80211_RADIOTAP_DBM_ANTSIGNAL);
}
- if (status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA) {
- it_present_val |= BIT(IEEE80211_RADIOTAP_VENDOR_NAMESPACE) |
- BIT(IEEE80211_RADIOTAP_EXT);
- put_unaligned_le32(it_present_val, it_present);
- it_present++;
- it_present_val = rtap.present;
- }
+ if (status->flag & RX_FLAG_RADIOTAP_TLV_AT_END)
+ it_present_val |= BIT(IEEE80211_RADIOTAP_TLV);
put_unaligned_le32(it_present_val, it_present);
@@ -697,22 +691,6 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
*pos++ = status->chain_signal[chain];
*pos++ = chain;
}
-
- if (status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA) {
- /* ensure 2 byte alignment for the vendor field as required */
- if ((pos - (u8 *)rthdr) & 1)
- *pos++ = 0;
- *pos++ = rtap.oui[0];
- *pos++ = rtap.oui[1];
- *pos++ = rtap.oui[2];
- *pos++ = rtap.subns;
- put_unaligned_le16(rtap.len, pos);
- pos += 2;
- /* align the actual payload as requested */
- while ((pos - (u8 *)rthdr) & (rtap.align - 1))
- *pos++ = 0;
- /* data (and possible padding) already follows */
- }
}
static struct sk_buff *
@@ -788,6 +766,13 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
bool only_monitor = false;
unsigned int min_head_len;
+ if (WARN_ON_ONCE(status->flag & RX_FLAG_RADIOTAP_TLV_AT_END &&
+ !skb_mac_header_was_set(origskb))) {
+ /* with this skb no way to know where frame payload starts */
+ dev_kfree_skb(origskb);
+ return NULL;
+ }
+
if (status->flag & RX_FLAG_RADIOTAP_HE)
rtap_space += sizeof(struct ieee80211_radiotap_he);
@@ -797,12 +782,8 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
if (status->flag & RX_FLAG_RADIOTAP_LSIG)
rtap_space += sizeof(struct ieee80211_radiotap_lsig);
- if (unlikely(status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA)) {
- struct ieee80211_vendor_radiotap *rtap =
- (void *)(origskb->data + rtap_space);
-
- rtap_space += sizeof(*rtap) + rtap->len + rtap->pad;
- }
+ if (status->flag & RX_FLAG_RADIOTAP_TLV_AT_END)
+ rtap_space += skb_mac_header(origskb) - &origskb->data[rtap_space];
min_head_len = rtap_space;
@@ -2582,7 +2563,7 @@ static void ieee80211_deliver_skb_to_local_stack(struct sk_buff *skb,
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
bool noencrypt = !(status->flag & RX_FLAG_DECRYPTED);
- cfg80211_rx_control_port(dev, skb, noencrypt);
+ cfg80211_rx_control_port(dev, skb, noencrypt, rx->link_id);
dev_kfree_skb(skb);
} else {
struct ethhdr *ehdr = (void *)skb_mac_header(skb);
@@ -3916,8 +3897,6 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx,
if (!local->cooked_mntrs)
goto out_free_skb;
- /* vendor data is long removed here */
- status->flag &= ~RX_FLAG_RADIOTAP_VENDOR_DATA;
/* room for the radiotap header based on driver features */
needed_headroom = ieee80211_rx_radiotap_hdrlen(local, status, skb);
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index dc3cdee51e66..32fa8aca7005 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -9,7 +9,7 @@
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2013-2015 Intel Mobile Communications GmbH
* Copyright 2016-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2021 Intel Corporation
+ * Copyright (C) 2018-2022 Intel Corporation
*/
#include <linux/if_arp.h>
@@ -1246,11 +1246,11 @@ int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata,
return ret;
}
-/*
- * Only call this function when a scan can't be queued -- under RTNL.
- */
void ieee80211_scan_cancel(struct ieee80211_local *local)
{
+ /* ensure a new scan cannot be queued */
+ lockdep_assert_wiphy(local->hw.wiphy);
+
/*
* We are canceling software scan, or deferred scan that was not
* yet really started (see __ieee80211_start_scan ).
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 7699fb410670..628d60f3db2a 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -5115,6 +5115,16 @@ static int ieee80211_beacon_protect(struct sk_buff *skb,
tx.key = rcu_dereference(link->default_beacon_key);
if (!tx.key)
return 0;
+
+ if (unlikely(tx.key->flags & KEY_FLAG_TAINTED)) {
+ tx.key = NULL;
+ return -EINVAL;
+ }
+
+ if (!(tx.key->conf.flags & IEEE80211_KEY_FLAG_SW_MGMT_TX) &&
+ tx.key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)
+ IEEE80211_SKB_CB(skb)->control.hw_key = &tx.key->conf;
+
tx.local = local;
tx.sdata = sdata;
__skb_queue_head_init(&tx.skbs);
diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c
index 3150f3f0c872..bb4bd0b6a4f7 100644
--- a/net/mctp/af_mctp.c
+++ b/net/mctp/af_mctp.c
@@ -704,7 +704,6 @@ subsys_initcall(mctp_init);
module_exit(mctp_exit);
MODULE_DESCRIPTION("MCTP core");
-MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Jeremy Kerr <jk@codeconstruct.com.au>");
MODULE_ALIAS_NETPROTO(PF_MCTP);
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 5c8dea49626c..1c42bebca39e 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -2035,7 +2035,7 @@ static int mptcp_event_put_token_and_ssk(struct sk_buff *skb,
nla_put_s32(skb, MPTCP_ATTR_IF_IDX, ssk->sk_bound_dev_if))
return -EMSGSIZE;
- sk_err = ssk->sk_err;
+ sk_err = READ_ONCE(ssk->sk_err);
if (sk_err && sk->sk_state == TCP_ESTABLISHED &&
nla_put_u8(skb, MPTCP_ATTR_ERROR, sk_err))
return -EMSGSIZE;
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 60b23b2716c4..6738181b2977 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2480,15 +2480,15 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
/* Mirror the tcp_reset() error propagation */
switch (sk->sk_state) {
case TCP_SYN_SENT:
- sk->sk_err = ECONNREFUSED;
+ WRITE_ONCE(sk->sk_err, ECONNREFUSED);
break;
case TCP_CLOSE_WAIT:
- sk->sk_err = EPIPE;
+ WRITE_ONCE(sk->sk_err, EPIPE);
break;
case TCP_CLOSE:
return;
default:
- sk->sk_err = ECONNRESET;
+ WRITE_ONCE(sk->sk_err, ECONNRESET);
}
inet_sk_state_store(sk, TCP_CLOSE);
@@ -3791,7 +3791,7 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
/* This barrier is coupled with smp_wmb() in __mptcp_error_report() */
smp_rmb();
- if (sk->sk_err)
+ if (READ_ONCE(sk->sk_err))
mask |= EPOLLERR;
return mask;
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 8a9656248b0f..5cef4d3d21ac 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -1046,7 +1046,7 @@ static int mptcp_getsockopt_tcpinfo(struct mptcp_sock *msk, char __user *optval,
static void mptcp_get_sub_addrs(const struct sock *sk, struct mptcp_subflow_addrs *a)
{
- struct inet_sock *inet = inet_sk(sk);
+ const struct inet_sock *inet = inet_sk(sk);
memset(a, 0, sizeof(*a));
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index a0041360ee9d..dadaf85db720 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1350,7 +1350,7 @@ fallback:
subflow->reset_reason = MPTCP_RST_EMPTCP;
reset:
- ssk->sk_err = EBADMSG;
+ WRITE_ONCE(ssk->sk_err, EBADMSG);
tcp_set_state(ssk, TCP_CLOSE);
while ((skb = skb_peek(&ssk->sk_receive_queue)))
sk_eat_skb(ssk, skb);
@@ -1434,7 +1434,7 @@ void __mptcp_error_report(struct sock *sk)
ssk_state = inet_sk_state_load(ssk);
if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD))
inet_sk_state_store(sk, ssk_state);
- sk->sk_err = -err;
+ WRITE_ONCE(sk->sk_err, -err);
/* This barrier is coupled with smp_rmb() in mptcp_poll() */
smp_wmb();
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index c6a6a6099b4e..db1ea361f2da 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1294,7 +1294,7 @@ dying:
}
EXPORT_SYMBOL_GPL(__nf_conntrack_confirm);
-/* Returns true if a connection correspondings to the tuple (required
+/* Returns true if a connection corresponds to the tuple (required
for NAT). */
int
nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
diff --git a/net/netfilter/nf_conntrack_ovs.c b/net/netfilter/nf_conntrack_ovs.c
index 52b776bdf526..068e9489e1c2 100644
--- a/net/netfilter/nf_conntrack_ovs.c
+++ b/net/netfilter/nf_conntrack_ovs.c
@@ -6,6 +6,7 @@
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
#include <net/ipv6_frag.h>
#include <net/ip.h>
+#include <linux/netfilter_ipv6.h>
/* 'skb' should already be pulled to nh_ofs. */
int nf_ct_helper(struct sk_buff *skb, struct nf_conn *ct,
@@ -120,8 +121,14 @@ int nf_ct_skb_network_trim(struct sk_buff *skb, int family)
len = skb_ip_totlen(skb);
break;
case NFPROTO_IPV6:
- len = sizeof(struct ipv6hdr)
- + ntohs(ipv6_hdr(skb)->payload_len);
+ len = ntohs(ipv6_hdr(skb)->payload_len);
+ if (ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP) {
+ int err = nf_ip6_check_hbh_len(skb, &len);
+
+ if (err)
+ return err;
+ }
+ len += sizeof(struct ipv6hdr);
break;
default:
len = skb->len;
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index e29e4ccb5c5a..ce829d434f13 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -549,8 +549,8 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
if (!(range->flags & NF_NAT_RANGE_PROTO_OFFSET) &&
l4proto_in_range(tuple, maniptype,
- &range->min_proto,
- &range->max_proto) &&
+ &range->min_proto,
+ &range->max_proto) &&
(range->min_proto.all == range->max_proto.all ||
!nf_nat_used_tuple(tuple, ct)))
return;
diff --git a/net/netfilter/utils.c b/net/netfilter/utils.c
index 2182d361e273..acef4155f0da 100644
--- a/net/netfilter/utils.c
+++ b/net/netfilter/utils.c
@@ -215,3 +215,55 @@ int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry)
}
return ret;
}
+
+/* Only get and check the lengths, not do any hop-by-hop stuff. */
+int nf_ip6_check_hbh_len(struct sk_buff *skb, u32 *plen)
+{
+ int len, off = sizeof(struct ipv6hdr);
+ unsigned char *nh;
+
+ if (!pskb_may_pull(skb, off + 8))
+ return -ENOMEM;
+ nh = (unsigned char *)(ipv6_hdr(skb) + 1);
+ len = (nh[1] + 1) << 3;
+
+ if (!pskb_may_pull(skb, off + len))
+ return -ENOMEM;
+ nh = skb_network_header(skb);
+
+ off += 2;
+ len -= 2;
+ while (len > 0) {
+ int optlen;
+
+ if (nh[off] == IPV6_TLV_PAD1) {
+ off++;
+ len--;
+ continue;
+ }
+ if (len < 2)
+ return -EBADMSG;
+ optlen = nh[off + 1] + 2;
+ if (optlen > len)
+ return -EBADMSG;
+
+ if (nh[off] == IPV6_TLV_JUMBO) {
+ u32 pkt_len;
+
+ if (nh[off + 1] != 4 || (off & 3) != 2)
+ return -EBADMSG;
+ pkt_len = ntohl(*(__be32 *)(nh + off + 2));
+ if (pkt_len <= IPV6_MAXPLEN ||
+ ipv6_hdr(skb)->payload_len)
+ return -EBADMSG;
+ if (pkt_len > skb->len - sizeof(struct ipv6hdr))
+ return -EBADMSG;
+ *plen = pkt_len;
+ }
+ off += optlen;
+ len -= optlen;
+ }
+
+ return len ? -EBADMSG : 0;
+}
+EXPORT_SYMBOL_GPL(nf_ip6_check_hbh_len);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index c64277659753..877f1da1a8ac 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2097,8 +2097,6 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module,
nl_table[unit].bind = cfg->bind;
nl_table[unit].unbind = cfg->unbind;
nl_table[unit].flags = cfg->flags;
- if (cfg->compare)
- nl_table[unit].compare = cfg->compare;
}
nl_table[unit].registered = 1;
} else {
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index 5f454c8de6a4..90a3198a9b7f 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -64,7 +64,6 @@ struct netlink_table {
struct module *module;
int (*bind)(struct net *net, int group);
void (*unbind)(struct net *net, int group);
- bool (*compare)(struct net *net, struct sock *sock);
int registered;
};
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index d4e76e2ae153..7b9367b233d3 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -307,7 +307,8 @@ static void packet_cached_dev_reset(struct packet_sock *po)
static bool packet_use_direct_xmit(const struct packet_sock *po)
{
- return po->xmit == packet_direct_xmit;
+ /* Paired with WRITE_ONCE() in packet_setsockopt() */
+ return READ_ONCE(po->xmit) == packet_direct_xmit;
}
static u16 packet_pick_tx_queue(struct sk_buff *skb)
@@ -339,14 +340,14 @@ static void __register_prot_hook(struct sock *sk)
{
struct packet_sock *po = pkt_sk(sk);
- if (!po->running) {
+ if (!packet_sock_flag(po, PACKET_SOCK_RUNNING)) {
if (po->fanout)
__fanout_link(sk, po);
else
dev_add_pack(&po->prot_hook);
sock_hold(sk);
- po->running = 1;
+ packet_sock_flag_set(po, PACKET_SOCK_RUNNING, 1);
}
}
@@ -368,7 +369,7 @@ static void __unregister_prot_hook(struct sock *sk, bool sync)
lockdep_assert_held_once(&po->bind_lock);
- po->running = 0;
+ packet_sock_flag_set(po, PACKET_SOCK_RUNNING, 0);
if (po->fanout)
__fanout_unlink(sk, po);
@@ -388,7 +389,7 @@ static void unregister_prot_hook(struct sock *sk, bool sync)
{
struct packet_sock *po = pkt_sk(sk);
- if (po->running)
+ if (packet_sock_flag(po, PACKET_SOCK_RUNNING))
__unregister_prot_hook(sk, sync);
}
@@ -473,7 +474,7 @@ static __u32 __packet_set_timestamp(struct packet_sock *po, void *frame,
struct timespec64 ts;
__u32 ts_status;
- if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
+ if (!(ts_status = tpacket_get_timestamp(skb, &ts, READ_ONCE(po->tp_tstamp))))
return 0;
h.raw = frame;
@@ -1306,22 +1307,23 @@ static int __packet_rcv_has_room(const struct packet_sock *po,
static int packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb)
{
- int pressure, ret;
+ bool pressure;
+ int ret;
ret = __packet_rcv_has_room(po, skb);
pressure = ret != ROOM_NORMAL;
- if (READ_ONCE(po->pressure) != pressure)
- WRITE_ONCE(po->pressure, pressure);
+ if (packet_sock_flag(po, PACKET_SOCK_PRESSURE) != pressure)
+ packet_sock_flag_set(po, PACKET_SOCK_PRESSURE, pressure);
return ret;
}
static void packet_rcv_try_clear_pressure(struct packet_sock *po)
{
- if (READ_ONCE(po->pressure) &&
+ if (packet_sock_flag(po, PACKET_SOCK_PRESSURE) &&
__packet_rcv_has_room(po, NULL) == ROOM_NORMAL)
- WRITE_ONCE(po->pressure, 0);
+ packet_sock_flag_set(po, PACKET_SOCK_PRESSURE, false);
}
static void packet_sock_destruct(struct sock *sk)
@@ -1408,7 +1410,8 @@ static unsigned int fanout_demux_rollover(struct packet_fanout *f,
i = j = min_t(int, po->rollover->sock, num - 1);
do {
po_next = pkt_sk(rcu_dereference(f->arr[i]));
- if (po_next != po_skip && !READ_ONCE(po_next->pressure) &&
+ if (po_next != po_skip &&
+ !packet_sock_flag(po_next, PACKET_SOCK_PRESSURE) &&
packet_rcv_has_room(po_next, skb) == ROOM_NORMAL) {
if (i != j)
po->rollover->sock = i;
@@ -1781,7 +1784,7 @@ static int fanout_add(struct sock *sk, struct fanout_args *args)
err = -EINVAL;
spin_lock(&po->bind_lock);
- if (po->running &&
+ if (packet_sock_flag(po, PACKET_SOCK_RUNNING) &&
match->type == type &&
match->prot_hook.type == po->prot_hook.type &&
match->prot_hook.dev == po->prot_hook.dev) {
@@ -2183,7 +2186,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
sll = &PACKET_SKB_CB(skb)->sa.ll;
sll->sll_hatype = dev->type;
sll->sll_pkttype = skb->pkt_type;
- if (unlikely(po->origdev))
+ if (unlikely(packet_sock_flag(po, PACKET_SOCK_ORIGDEV)))
sll->sll_ifindex = orig_dev->ifindex;
else
sll->sll_ifindex = dev->ifindex;
@@ -2308,7 +2311,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
netoff = TPACKET_ALIGN(po->tp_hdrlen +
(maclen < 16 ? 16 : maclen)) +
po->tp_reserve;
- if (po->has_vnet_hdr) {
+ if (packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR)) {
netoff += sizeof(struct virtio_net_hdr);
do_vnet = true;
}
@@ -2402,7 +2405,8 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
* closer to the time of capture.
*/
ts_status = tpacket_get_timestamp(skb, &ts,
- po->tp_tstamp | SOF_TIMESTAMPING_SOFTWARE);
+ READ_ONCE(po->tp_tstamp) |
+ SOF_TIMESTAMPING_SOFTWARE);
if (!ts_status)
ktime_get_real_ts64(&ts);
@@ -2460,7 +2464,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
sll->sll_hatype = dev->type;
sll->sll_protocol = skb->protocol;
sll->sll_pkttype = skb->pkt_type;
- if (unlikely(po->origdev))
+ if (unlikely(packet_sock_flag(po, PACKET_SOCK_ORIGDEV)))
sll->sll_ifindex = orig_dev->ifindex;
else
sll->sll_ifindex = dev->ifindex;
@@ -2670,7 +2674,7 @@ static int tpacket_parse_header(struct packet_sock *po, void *frame,
return -EMSGSIZE;
}
- if (unlikely(po->tp_tx_has_off)) {
+ if (unlikely(packet_sock_flag(po, PACKET_SOCK_TX_HAS_OFF))) {
int off_min, off_max;
off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll);
@@ -2778,7 +2782,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
size_max = po->tx_ring.frame_size
- (po->tp_hdrlen - sizeof(struct sockaddr_ll));
- if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !po->has_vnet_hdr)
+ if ((size_max > dev->mtu + reserve + VLAN_HLEN) &&
+ !packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR))
size_max = dev->mtu + reserve + VLAN_HLEN;
reinit_completion(&po->skb_completion);
@@ -2807,7 +2812,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
status = TP_STATUS_SEND_REQUEST;
hlen = LL_RESERVED_SPACE(dev);
tlen = dev->needed_tailroom;
- if (po->has_vnet_hdr) {
+ if (packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR)) {
vnet_hdr = data;
data += sizeof(*vnet_hdr);
tp_len -= sizeof(*vnet_hdr);
@@ -2835,13 +2840,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
addr, hlen, copylen, &sockc);
if (likely(tp_len >= 0) &&
tp_len > dev->mtu + reserve &&
- !po->has_vnet_hdr &&
+ !packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR) &&
!packet_extra_vlan_len_allowed(dev, skb))
tp_len = -EMSGSIZE;
if (unlikely(tp_len < 0)) {
tpacket_error:
- if (po->tp_loss) {
+ if (packet_sock_flag(po, PACKET_SOCK_TP_LOSS)) {
__packet_set_status(po, ph,
TP_STATUS_AVAILABLE);
packet_increment_head(&po->tx_ring);
@@ -2854,7 +2859,7 @@ tpacket_error:
}
}
- if (po->has_vnet_hdr) {
+ if (packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR)) {
if (virtio_net_hdr_to_skb(skb, vnet_hdr, vio_le())) {
tp_len = -EINVAL;
goto tpacket_error;
@@ -2867,7 +2872,8 @@ tpacket_error:
packet_inc_pending(&po->tx_ring);
status = TP_STATUS_SEND_REQUEST;
- err = po->xmit(skb);
+ /* Paired with WRITE_ONCE() in packet_setsockopt() */
+ err = READ_ONCE(po->xmit)(skb);
if (unlikely(err != 0)) {
if (err > 0)
err = net_xmit_errno(err);
@@ -2988,7 +2994,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
if (sock->type == SOCK_RAW)
reserve = dev->hard_header_len;
- if (po->has_vnet_hdr) {
+ if (packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR)) {
err = packet_snd_vnet_parse(msg, &len, &vnet_hdr);
if (err)
goto out_unlock;
@@ -3070,7 +3076,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
virtio_net_hdr_set_proto(skb, &vnet_hdr);
}
- err = po->xmit(skb);
+ /* Paired with WRITE_ONCE() in packet_setsockopt() */
+ err = READ_ONCE(po->xmit)(skb);
if (unlikely(err != 0)) {
if (err > 0)
err = net_xmit_errno(err);
@@ -3217,7 +3224,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
if (need_rehook) {
dev_hold(dev);
- if (po->running) {
+ if (packet_sock_flag(po, PACKET_SOCK_RUNNING)) {
rcu_read_unlock();
/* prevents packet_notifier() from calling
* register_prot_hook()
@@ -3230,7 +3237,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
dev->ifindex);
}
- BUG_ON(po->running);
+ BUG_ON(packet_sock_flag(po, PACKET_SOCK_RUNNING));
WRITE_ONCE(po->num, proto);
po->prot_hook.type = proto;
@@ -3447,7 +3454,7 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
packet_rcv_try_clear_pressure(pkt_sk(sk));
- if (pkt_sk(sk)->has_vnet_hdr) {
+ if (packet_sock_flag(pkt_sk(sk), PACKET_SOCK_HAS_VNET_HDR)) {
err = packet_rcv_vnet(msg, skb, &len);
if (err)
goto out_free;
@@ -3511,7 +3518,7 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len);
}
- if (pkt_sk(sk)->auxdata) {
+ if (packet_sock_flag(pkt_sk(sk), PACKET_SOCK_AUXDATA)) {
struct tpacket_auxdata aux;
aux.tp_status = TP_STATUS_USER;
@@ -3882,7 +3889,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
ret = -EBUSY;
} else {
- po->tp_loss = !!val;
+ packet_sock_flag_set(po, PACKET_SOCK_TP_LOSS, val);
ret = 0;
}
release_sock(sk);
@@ -3897,9 +3904,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
- lock_sock(sk);
- po->auxdata = !!val;
- release_sock(sk);
+ packet_sock_flag_set(po, PACKET_SOCK_AUXDATA, val);
return 0;
}
case PACKET_ORIGDEV:
@@ -3911,9 +3916,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
- lock_sock(sk);
- po->origdev = !!val;
- release_sock(sk);
+ packet_sock_flag_set(po, PACKET_SOCK_ORIGDEV, val);
return 0;
}
case PACKET_VNET_HDR:
@@ -3931,7 +3934,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
ret = -EBUSY;
} else {
- po->has_vnet_hdr = !!val;
+ packet_sock_flag_set(po, PACKET_SOCK_HAS_VNET_HDR, val);
ret = 0;
}
release_sock(sk);
@@ -3946,7 +3949,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
- po->tp_tstamp = val;
+ WRITE_ONCE(po->tp_tstamp, val);
return 0;
}
case PACKET_FANOUT:
@@ -3993,7 +3996,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
lock_sock(sk);
if (!po->rx_ring.pg_vec && !po->tx_ring.pg_vec)
- po->tp_tx_has_off = !!val;
+ packet_sock_flag_set(po, PACKET_SOCK_TX_HAS_OFF, val);
release_sock(sk);
return 0;
@@ -4007,7 +4010,8 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
- po->xmit = val ? packet_direct_xmit : dev_queue_xmit;
+ /* Paired with all lockless reads of po->xmit */
+ WRITE_ONCE(po->xmit, val ? packet_direct_xmit : dev_queue_xmit);
return 0;
}
default:
@@ -4058,13 +4062,13 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
break;
case PACKET_AUXDATA:
- val = po->auxdata;
+ val = packet_sock_flag(po, PACKET_SOCK_AUXDATA);
break;
case PACKET_ORIGDEV:
- val = po->origdev;
+ val = packet_sock_flag(po, PACKET_SOCK_ORIGDEV);
break;
case PACKET_VNET_HDR:
- val = po->has_vnet_hdr;
+ val = packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR);
break;
case PACKET_VERSION:
val = po->tp_version;
@@ -4094,10 +4098,10 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
val = po->tp_reserve;
break;
case PACKET_LOSS:
- val = po->tp_loss;
+ val = packet_sock_flag(po, PACKET_SOCK_TP_LOSS);
break;
case PACKET_TIMESTAMP:
- val = po->tp_tstamp;
+ val = READ_ONCE(po->tp_tstamp);
break;
case PACKET_FANOUT:
val = (po->fanout ?
@@ -4119,7 +4123,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
lv = sizeof(rstats);
break;
case PACKET_TX_HAS_OFF:
- val = po->tp_tx_has_off;
+ val = packet_sock_flag(po, PACKET_SOCK_TX_HAS_OFF);
break;
case PACKET_QDISC_BYPASS:
val = packet_use_direct_xmit(po);
@@ -4157,7 +4161,7 @@ static int packet_notifier(struct notifier_block *this,
case NETDEV_DOWN:
if (dev->ifindex == po->ifindex) {
spin_lock(&po->bind_lock);
- if (po->running) {
+ if (packet_sock_flag(po, PACKET_SOCK_RUNNING)) {
__unregister_prot_hook(sk, false);
sk->sk_err = ENETDOWN;
if (!sock_flag(sk, SOCK_DEAD))
@@ -4468,7 +4472,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
/* Detach socket from network */
spin_lock(&po->bind_lock);
- was_running = po->running;
+ was_running = packet_sock_flag(po, PACKET_SOCK_RUNNING);
num = po->num;
if (was_running) {
WRITE_ONCE(po->num, 0);
@@ -4679,7 +4683,7 @@ static int packet_seq_show(struct seq_file *seq, void *v)
s->sk_type,
ntohs(READ_ONCE(po->num)),
READ_ONCE(po->ifindex),
- po->running,
+ packet_sock_flag(po, PACKET_SOCK_RUNNING),
atomic_read(&s->sk_rmem_alloc),
from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)),
sock_i_ino(s));
diff --git a/net/packet/diag.c b/net/packet/diag.c
index 07812ae5ca07..de4ced5cf3e8 100644
--- a/net/packet/diag.c
+++ b/net/packet/diag.c
@@ -18,18 +18,18 @@ static int pdiag_put_info(const struct packet_sock *po, struct sk_buff *nlskb)
pinfo.pdi_version = po->tp_version;
pinfo.pdi_reserve = po->tp_reserve;
pinfo.pdi_copy_thresh = po->copy_thresh;
- pinfo.pdi_tstamp = po->tp_tstamp;
+ pinfo.pdi_tstamp = READ_ONCE(po->tp_tstamp);
pinfo.pdi_flags = 0;
- if (po->running)
+ if (packet_sock_flag(po, PACKET_SOCK_RUNNING))
pinfo.pdi_flags |= PDI_RUNNING;
- if (po->auxdata)
+ if (packet_sock_flag(po, PACKET_SOCK_AUXDATA))
pinfo.pdi_flags |= PDI_AUXDATA;
- if (po->origdev)
+ if (packet_sock_flag(po, PACKET_SOCK_ORIGDEV))
pinfo.pdi_flags |= PDI_ORIGDEV;
- if (po->has_vnet_hdr)
+ if (packet_sock_flag(po, PACKET_SOCK_HAS_VNET_HDR))
pinfo.pdi_flags |= PDI_VNETHDR;
- if (po->tp_loss)
+ if (packet_sock_flag(po, PACKET_SOCK_TP_LOSS))
pinfo.pdi_flags |= PDI_LOSS;
return nla_put(nlskb, PACKET_DIAG_INFO, sizeof(pinfo), &pinfo);
diff --git a/net/packet/internal.h b/net/packet/internal.h
index 48af35b1aed2..680703dbce5e 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -116,13 +116,7 @@ struct packet_sock {
int copy_thresh;
spinlock_t bind_lock;
struct mutex pg_vec_lock;
- unsigned int running; /* bind_lock must be held */
- unsigned int auxdata:1, /* writer must hold sock lock */
- origdev:1,
- has_vnet_hdr:1,
- tp_loss:1,
- tp_tx_has_off:1;
- int pressure;
+ unsigned long flags;
int ifindex; /* bound device */
__be16 num;
struct packet_rollover *rollover;
@@ -144,4 +138,30 @@ static inline struct packet_sock *pkt_sk(struct sock *sk)
return (struct packet_sock *)sk;
}
+enum packet_sock_flags {
+ PACKET_SOCK_ORIGDEV,
+ PACKET_SOCK_AUXDATA,
+ PACKET_SOCK_TX_HAS_OFF,
+ PACKET_SOCK_TP_LOSS,
+ PACKET_SOCK_HAS_VNET_HDR,
+ PACKET_SOCK_RUNNING,
+ PACKET_SOCK_PRESSURE,
+};
+
+static inline void packet_sock_flag_set(struct packet_sock *po,
+ enum packet_sock_flags flag,
+ bool val)
+{
+ if (val)
+ set_bit(flag, &po->flags);
+ else
+ clear_bit(flag, &po->flags);
+}
+
+static inline bool packet_sock_flag(const struct packet_sock *po,
+ enum packet_sock_flags flag)
+{
+ return test_bit(flag, &po->flags);
+}
+
#endif
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index aba789c30a2e..fdb8f429333d 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -639,14 +639,16 @@ void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires,
return;
if (hrtimer_is_queued(&wd->timer)) {
+ u64 softexpires;
+
+ softexpires = ktime_to_ns(hrtimer_get_softexpires(&wd->timer));
/* If timer is already set in [expires, expires + delta_ns],
* do not reprogram it.
*/
- if (wd->last_expires - expires <= delta_ns)
+ if (softexpires - expires <= delta_ns)
return;
}
- wd->last_expires = expires;
hrtimer_start_range_ns(&wd->timer,
ns_to_ktime(expires),
delta_ns,
diff --git a/net/sctp/Makefile b/net/sctp/Makefile
index e845e4588535..0448398408d8 100644
--- a/net/sctp/Makefile
+++ b/net/sctp/Makefile
@@ -13,7 +13,8 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
tsnmap.o bind_addr.o socket.o primitive.o \
output.o input.o debug.o stream.o auth.o \
offload.o stream_sched.o stream_sched_prio.o \
- stream_sched_rr.o stream_interleave.o
+ stream_sched_rr.o stream_sched_fc.o \
+ stream_interleave.o
sctp_diag-y := diag.o
diff --git a/net/sctp/input.c b/net/sctp/input.c
index bf70371301ff..127bf28a6033 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -585,7 +585,7 @@ static void sctp_v4_err_handle(struct sctp_transport *t, struct sk_buff *skb,
sk->sk_err = err;
sk_error_report(sk);
} else { /* Only an error on timeout */
- sk->sk_err_soft = err;
+ WRITE_ONCE(sk->sk_err_soft, err);
}
}
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 62b436a2c8fe..43f2731bf590 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -155,7 +155,7 @@ static void sctp_v6_err_handle(struct sctp_transport *t, struct sk_buff *skb,
sk->sk_err = err;
sk_error_report(sk);
} else {
- sk->sk_err_soft = err;
+ WRITE_ONCE(sk->sk_err_soft, err);
}
}
diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c
index 330067002deb..e843760e9aaa 100644
--- a/net/sctp/stream_sched.c
+++ b/net/sctp/stream_sched.c
@@ -124,6 +124,8 @@ void sctp_sched_ops_init(void)
sctp_sched_ops_fcfs_init();
sctp_sched_ops_prio_init();
sctp_sched_ops_rr_init();
+ sctp_sched_ops_fc_init();
+ sctp_sched_ops_wfq_init();
}
static void sctp_sched_free_sched(struct sctp_stream *stream)
diff --git a/net/sctp/stream_sched_fc.c b/net/sctp/stream_sched_fc.c
new file mode 100644
index 000000000000..4bd18a497a6d
--- /dev/null
+++ b/net/sctp/stream_sched_fc.c
@@ -0,0 +1,225 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* SCTP kernel implementation
+ * (C) Copyright Red Hat Inc. 2022
+ *
+ * This file is part of the SCTP kernel implementation
+ *
+ * These functions manipulate sctp stream queue/scheduling.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email addresched(es):
+ * lksctp developers <linux-sctp@vger.kernel.org>
+ *
+ * Written or modified by:
+ * Xin Long <lucien.xin@gmail.com>
+ */
+
+#include <linux/list.h>
+#include <net/sctp/sctp.h>
+#include <net/sctp/sm.h>
+#include <net/sctp/stream_sched.h>
+
+/* Fair Capacity and Weighted Fair Queueing handling
+ * RFC 8260 section 3.5 and 3.6
+ */
+static void sctp_sched_fc_unsched_all(struct sctp_stream *stream);
+
+static int sctp_sched_wfq_set(struct sctp_stream *stream, __u16 sid,
+ __u16 weight, gfp_t gfp)
+{
+ struct sctp_stream_out_ext *soute = SCTP_SO(stream, sid)->ext;
+
+ if (!weight)
+ return -EINVAL;
+
+ soute->fc_weight = weight;
+ return 0;
+}
+
+static int sctp_sched_wfq_get(struct sctp_stream *stream, __u16 sid,
+ __u16 *value)
+{
+ struct sctp_stream_out_ext *soute = SCTP_SO(stream, sid)->ext;
+
+ *value = soute->fc_weight;
+ return 0;
+}
+
+static int sctp_sched_fc_set(struct sctp_stream *stream, __u16 sid,
+ __u16 weight, gfp_t gfp)
+{
+ return 0;
+}
+
+static int sctp_sched_fc_get(struct sctp_stream *stream, __u16 sid,
+ __u16 *value)
+{
+ return 0;
+}
+
+static int sctp_sched_fc_init(struct sctp_stream *stream)
+{
+ INIT_LIST_HEAD(&stream->fc_list);
+
+ return 0;
+}
+
+static int sctp_sched_fc_init_sid(struct sctp_stream *stream, __u16 sid,
+ gfp_t gfp)
+{
+ struct sctp_stream_out_ext *soute = SCTP_SO(stream, sid)->ext;
+
+ INIT_LIST_HEAD(&soute->fc_list);
+ soute->fc_length = 0;
+ soute->fc_weight = 1;
+
+ return 0;
+}
+
+static void sctp_sched_fc_free_sid(struct sctp_stream *stream, __u16 sid)
+{
+}
+
+static void sctp_sched_fc_sched(struct sctp_stream *stream,
+ struct sctp_stream_out_ext *soute)
+{
+ struct sctp_stream_out_ext *pos;
+
+ if (!list_empty(&soute->fc_list))
+ return;
+
+ list_for_each_entry(pos, &stream->fc_list, fc_list)
+ if ((__u64)pos->fc_length * soute->fc_weight >=
+ (__u64)soute->fc_length * pos->fc_weight)
+ break;
+ list_add_tail(&soute->fc_list, &pos->fc_list);
+}
+
+static void sctp_sched_fc_enqueue(struct sctp_outq *q,
+ struct sctp_datamsg *msg)
+{
+ struct sctp_stream *stream;
+ struct sctp_chunk *ch;
+ __u16 sid;
+
+ ch = list_first_entry(&msg->chunks, struct sctp_chunk, frag_list);
+ sid = sctp_chunk_stream_no(ch);
+ stream = &q->asoc->stream;
+ sctp_sched_fc_sched(stream, SCTP_SO(stream, sid)->ext);
+}
+
+static struct sctp_chunk *sctp_sched_fc_dequeue(struct sctp_outq *q)
+{
+ struct sctp_stream *stream = &q->asoc->stream;
+ struct sctp_stream_out_ext *soute;
+ struct sctp_chunk *ch;
+
+ /* Bail out quickly if queue is empty */
+ if (list_empty(&q->out_chunk_list))
+ return NULL;
+
+ /* Find which chunk is next */
+ if (stream->out_curr)
+ soute = stream->out_curr->ext;
+ else
+ soute = list_entry(stream->fc_list.next, struct sctp_stream_out_ext, fc_list);
+ ch = list_entry(soute->outq.next, struct sctp_chunk, stream_list);
+
+ sctp_sched_dequeue_common(q, ch);
+ return ch;
+}
+
+static void sctp_sched_fc_dequeue_done(struct sctp_outq *q,
+ struct sctp_chunk *ch)
+{
+ struct sctp_stream *stream = &q->asoc->stream;
+ struct sctp_stream_out_ext *soute, *pos;
+ __u16 sid, i;
+
+ sid = sctp_chunk_stream_no(ch);
+ soute = SCTP_SO(stream, sid)->ext;
+ /* reduce all fc_lengths by U32_MAX / 4 if the current fc_length overflows. */
+ if (soute->fc_length > U32_MAX - ch->skb->len) {
+ for (i = 0; i < stream->outcnt; i++) {
+ pos = SCTP_SO(stream, i)->ext;
+ if (!pos)
+ continue;
+ if (pos->fc_length <= (U32_MAX >> 2)) {
+ pos->fc_length = 0;
+ continue;
+ }
+ pos->fc_length -= (U32_MAX >> 2);
+ }
+ }
+ soute->fc_length += ch->skb->len;
+
+ if (list_empty(&soute->outq)) {
+ list_del_init(&soute->fc_list);
+ return;
+ }
+
+ pos = soute;
+ list_for_each_entry_continue(pos, &stream->fc_list, fc_list)
+ if ((__u64)pos->fc_length * soute->fc_weight >=
+ (__u64)soute->fc_length * pos->fc_weight)
+ break;
+ list_move_tail(&soute->fc_list, &pos->fc_list);
+}
+
+static void sctp_sched_fc_sched_all(struct sctp_stream *stream)
+{
+ struct sctp_association *asoc;
+ struct sctp_chunk *ch;
+
+ asoc = container_of(stream, struct sctp_association, stream);
+ list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list) {
+ __u16 sid = sctp_chunk_stream_no(ch);
+
+ if (SCTP_SO(stream, sid)->ext)
+ sctp_sched_fc_sched(stream, SCTP_SO(stream, sid)->ext);
+ }
+}
+
+static void sctp_sched_fc_unsched_all(struct sctp_stream *stream)
+{
+ struct sctp_stream_out_ext *soute, *tmp;
+
+ list_for_each_entry_safe(soute, tmp, &stream->fc_list, fc_list)
+ list_del_init(&soute->fc_list);
+}
+
+static struct sctp_sched_ops sctp_sched_fc = {
+ .set = sctp_sched_fc_set,
+ .get = sctp_sched_fc_get,
+ .init = sctp_sched_fc_init,
+ .init_sid = sctp_sched_fc_init_sid,
+ .free_sid = sctp_sched_fc_free_sid,
+ .enqueue = sctp_sched_fc_enqueue,
+ .dequeue = sctp_sched_fc_dequeue,
+ .dequeue_done = sctp_sched_fc_dequeue_done,
+ .sched_all = sctp_sched_fc_sched_all,
+ .unsched_all = sctp_sched_fc_unsched_all,
+};
+
+void sctp_sched_ops_fc_init(void)
+{
+ sctp_sched_ops_register(SCTP_SS_FC, &sctp_sched_fc);
+}
+
+static struct sctp_sched_ops sctp_sched_wfq = {
+ .set = sctp_sched_wfq_set,
+ .get = sctp_sched_wfq_get,
+ .init = sctp_sched_fc_init,
+ .init_sid = sctp_sched_fc_init_sid,
+ .free_sid = sctp_sched_fc_free_sid,
+ .enqueue = sctp_sched_fc_enqueue,
+ .dequeue = sctp_sched_fc_dequeue,
+ .dequeue_done = sctp_sched_fc_dequeue_done,
+ .sched_all = sctp_sched_fc_sched_all,
+ .unsched_all = sctp_sched_fc_unsched_all,
+};
+
+void sctp_sched_ops_wfq_init(void)
+{
+ sctp_sched_ops_register(SCTP_SS_WFQ, &sctp_sched_wfq);
+}
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 08b457c2d294..1645fba0d2d3 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -106,7 +106,10 @@ struct smc_link {
unsigned long *wr_tx_mask; /* bit mask of used indexes */
u32 wr_tx_cnt; /* number of WR send buffers */
wait_queue_head_t wr_tx_wait; /* wait for free WR send buf */
- atomic_t wr_tx_refcnt; /* tx refs to link */
+ struct {
+ struct percpu_ref wr_tx_refs;
+ } ____cacheline_aligned_in_smp;
+ struct completion tx_ref_comp;
struct smc_wr_buf *wr_rx_bufs; /* WR recv payload buffers */
struct ib_recv_wr *wr_rx_ibs; /* WR recv meta data */
@@ -122,7 +125,10 @@ struct smc_link {
struct ib_reg_wr wr_reg; /* WR register memory region */
wait_queue_head_t wr_reg_wait; /* wait for wr_reg result */
- atomic_t wr_reg_refcnt; /* reg refs to link */
+ struct {
+ struct percpu_ref wr_reg_refs;
+ } ____cacheline_aligned_in_smp;
+ struct completion reg_ref_comp;
enum smc_wr_reg_state wr_reg_state; /* state of wr_reg request */
u8 gid[SMC_GID_SIZE];/* gid matching used vlan id*/
diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c
index 3b0b7710c6b0..fbee2493091f 100644
--- a/net/smc/smc_ism.c
+++ b/net/smc/smc_ism.c
@@ -429,7 +429,7 @@ static void smcd_register_dev(struct ism_dev *ism)
u8 *system_eid = NULL;
system_eid = smcd->ops->get_system_eid();
- if (system_eid[24] != '0' || system_eid[28] != '0') {
+ if (smcd->ops->supports_v2()) {
smc_ism_v2_capable = true;
memcpy(smc_ism_v2_system_eid, system_eid,
SMC_MAX_EID_LEN);
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index b0678a417e09..0021065a600a 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -377,12 +377,11 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)
if (rc)
return rc;
- atomic_inc(&link->wr_reg_refcnt);
+ percpu_ref_get(&link->wr_reg_refs);
rc = wait_event_interruptible_timeout(link->wr_reg_wait,
(link->wr_reg_state != POSTED),
SMC_WR_REG_MR_WAIT_TIME);
- if (atomic_dec_and_test(&link->wr_reg_refcnt))
- wake_up_all(&link->wr_reg_wait);
+ percpu_ref_put(&link->wr_reg_refs);
if (!rc) {
/* timeout - terminate link */
smcr_link_down_cond_sched(link);
@@ -647,8 +646,10 @@ void smc_wr_free_link(struct smc_link *lnk)
smc_wr_wakeup_tx_wait(lnk);
smc_wr_tx_wait_no_pending_sends(lnk);
- wait_event(lnk->wr_reg_wait, (!atomic_read(&lnk->wr_reg_refcnt)));
- wait_event(lnk->wr_tx_wait, (!atomic_read(&lnk->wr_tx_refcnt)));
+ percpu_ref_kill(&lnk->wr_reg_refs);
+ wait_for_completion(&lnk->reg_ref_comp);
+ percpu_ref_kill(&lnk->wr_tx_refs);
+ wait_for_completion(&lnk->tx_ref_comp);
if (lnk->wr_rx_dma_addr) {
ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr,
@@ -847,6 +848,20 @@ void smc_wr_add_dev(struct smc_ib_device *smcibdev)
tasklet_setup(&smcibdev->send_tasklet, smc_wr_tx_tasklet_fn);
}
+static void smcr_wr_tx_refs_free(struct percpu_ref *ref)
+{
+ struct smc_link *lnk = container_of(ref, struct smc_link, wr_tx_refs);
+
+ complete(&lnk->tx_ref_comp);
+}
+
+static void smcr_wr_reg_refs_free(struct percpu_ref *ref)
+{
+ struct smc_link *lnk = container_of(ref, struct smc_link, wr_reg_refs);
+
+ complete(&lnk->reg_ref_comp);
+}
+
int smc_wr_create_link(struct smc_link *lnk)
{
struct ib_device *ibdev = lnk->smcibdev->ibdev;
@@ -890,9 +905,15 @@ int smc_wr_create_link(struct smc_link *lnk)
smc_wr_init_sge(lnk);
bitmap_zero(lnk->wr_tx_mask, SMC_WR_BUF_CNT);
init_waitqueue_head(&lnk->wr_tx_wait);
- atomic_set(&lnk->wr_tx_refcnt, 0);
+ rc = percpu_ref_init(&lnk->wr_tx_refs, smcr_wr_tx_refs_free, 0, GFP_KERNEL);
+ if (rc)
+ goto dma_unmap;
+ init_completion(&lnk->tx_ref_comp);
init_waitqueue_head(&lnk->wr_reg_wait);
- atomic_set(&lnk->wr_reg_refcnt, 0);
+ rc = percpu_ref_init(&lnk->wr_reg_refs, smcr_wr_reg_refs_free, 0, GFP_KERNEL);
+ if (rc)
+ goto dma_unmap;
+ init_completion(&lnk->reg_ref_comp);
init_waitqueue_head(&lnk->wr_rx_empty_wait);
return rc;
diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h
index 45e9b894d3f8..f3008dda222a 100644
--- a/net/smc/smc_wr.h
+++ b/net/smc/smc_wr.h
@@ -63,14 +63,13 @@ static inline bool smc_wr_tx_link_hold(struct smc_link *link)
{
if (!smc_link_sendable(link))
return false;
- atomic_inc(&link->wr_tx_refcnt);
+ percpu_ref_get(&link->wr_tx_refs);
return true;
}
static inline void smc_wr_tx_link_put(struct smc_link *link)
{
- if (atomic_dec_and_test(&link->wr_tx_refcnt))
- wake_up_all(&link->wr_tx_wait);
+ percpu_ref_put(&link->wr_tx_refs);
}
static inline void smc_wr_drain_cq(struct smc_link *lnk)
diff --git a/net/socket.c b/net/socket.c
index 9c92c0e6c4da..73e493da4589 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2292,9 +2292,9 @@ INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level,
int __sys_getsockopt(int fd, int level, int optname, char __user *optval,
int __user *optlen)
{
+ int max_optlen __maybe_unused;
int err, fput_needed;
struct socket *sock;
- int max_optlen;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (!sock)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 0b0f18ecce44..fb31e8a4409e 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -557,7 +557,7 @@ static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
* when peer was not connected to us.
*/
if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
- other->sk_err = ECONNRESET;
+ WRITE_ONCE(other->sk_err, ECONNRESET);
sk_error_report(other);
}
}
@@ -630,7 +630,7 @@ static void unix_release_sock(struct sock *sk, int embrion)
/* No more writes */
skpair->sk_shutdown = SHUTDOWN_MASK;
if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
- skpair->sk_err = ECONNRESET;
+ WRITE_ONCE(skpair->sk_err, ECONNRESET);
unix_state_unlock(skpair);
skpair->sk_state_change(skpair);
sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
@@ -3165,7 +3165,7 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa
mask = 0;
/* exceptional events? */
- if (sk->sk_err)
+ if (READ_ONCE(sk->sk_err))
mask |= EPOLLERR;
if (sk->sk_shutdown == SHUTDOWN_MASK)
mask |= EPOLLHUP;
@@ -3208,7 +3208,8 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
mask = 0;
/* exceptional events? */
- if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
+ if (READ_ONCE(sk->sk_err) ||
+ !skb_queue_empty_lockless(&sk->sk_error_queue))
mask |= EPOLLERR |
(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index dc2763540393..2405f0f9af31 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -305,7 +305,7 @@ void unix_gc(void)
* release.path eventually putting registered files.
*/
skb_queue_walk_safe(&hitlist, skb, next_skb) {
- if (skb->scm_io_uring) {
+ if (skb->destructor == io_uring_destruct_scm) {
__skb_unlink(skb, &hitlist);
skb_queue_tail(&skb->sk->sk_receive_queue, skb);
}
diff --git a/net/unix/scm.c b/net/unix/scm.c
index aa27a02478dc..f9152881d77f 100644
--- a/net/unix/scm.c
+++ b/net/unix/scm.c
@@ -152,3 +152,9 @@ void unix_destruct_scm(struct sk_buff *skb)
sock_wfree(skb);
}
EXPORT_SYMBOL(unix_destruct_scm);
+
+void io_uring_destruct_scm(struct sk_buff *skb)
+{
+ unix_destruct_scm(skb);
+}
+EXPORT_SYMBOL(io_uring_destruct_scm);
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 81d3f40d6235..ac059cefbeb3 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -673,6 +673,39 @@ static bool cfg80211_allowed_address(struct wireless_dev *wdev, const u8 *addr)
return ether_addr_equal(addr, wdev_address(wdev));
}
+static bool cfg80211_allowed_random_address(struct wireless_dev *wdev,
+ const struct ieee80211_mgmt *mgmt)
+{
+ if (ieee80211_is_auth(mgmt->frame_control) ||
+ ieee80211_is_deauth(mgmt->frame_control)) {
+ /* Allow random TA to be used with authentication and
+ * deauthentication frames if the driver has indicated support.
+ */
+ if (wiphy_ext_feature_isset(
+ wdev->wiphy,
+ NL80211_EXT_FEATURE_AUTH_AND_DEAUTH_RANDOM_TA))
+ return true;
+ } else if (ieee80211_is_action(mgmt->frame_control) &&
+ mgmt->u.action.category == WLAN_CATEGORY_PUBLIC) {
+ /* Allow random TA to be used with Public Action frames if the
+ * driver has indicated support.
+ */
+ if (!wdev->connected &&
+ wiphy_ext_feature_isset(
+ wdev->wiphy,
+ NL80211_EXT_FEATURE_MGMT_TX_RANDOM_TA))
+ return true;
+
+ if (wdev->connected &&
+ wiphy_ext_feature_isset(
+ wdev->wiphy,
+ NL80211_EXT_FEATURE_MGMT_TX_RANDOM_TA_CONNECTED))
+ return true;
+ }
+
+ return false;
+}
+
int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev,
struct cfg80211_mgmt_tx_params *params, u64 *cookie)
@@ -774,25 +807,9 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
return err;
}
- if (!cfg80211_allowed_address(wdev, mgmt->sa)) {
- /* Allow random TA to be used with Public Action frames if the
- * driver has indicated support for this. Otherwise, only allow
- * the local address to be used.
- */
- if (!ieee80211_is_action(mgmt->frame_control) ||
- mgmt->u.action.category != WLAN_CATEGORY_PUBLIC)
- return -EINVAL;
- if (!wdev->connected &&
- !wiphy_ext_feature_isset(
- &rdev->wiphy,
- NL80211_EXT_FEATURE_MGMT_TX_RANDOM_TA))
- return -EINVAL;
- if (wdev->connected &&
- !wiphy_ext_feature_isset(
- &rdev->wiphy,
- NL80211_EXT_FEATURE_MGMT_TX_RANDOM_TA_CONNECTED))
- return -EINVAL;
- }
+ if (!cfg80211_allowed_address(wdev, mgmt->sa) &&
+ !cfg80211_allowed_random_address(wdev, mgmt))
+ return -EINVAL;
/* Transmit the management frame as requested by user space */
return rdev_mgmt_tx(rdev, wdev, params, cookie);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 4f63059efd81..bfa15defc04e 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -812,6 +812,9 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_MAX_NUM_AKM_SUITES] = { .type = NLA_REJECT },
[NL80211_ATTR_PUNCT_BITMAP] =
NLA_POLICY_FULL_RANGE(NLA_U32, &nl80211_punct_bitmap_range),
+
+ [NL80211_ATTR_MAX_HW_TIMESTAMP_PEERS] = { .type = NLA_U16 },
+ [NL80211_ATTR_HW_TIMESTAMP_ENABLED] = { .type = NLA_FLAG },
};
/* policy for the key attributes */
@@ -2970,6 +2973,11 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_MLO)
nla_put_flag(msg, NL80211_ATTR_MLO_SUPPORT);
+ if (rdev->wiphy.hw_timestamp_max_peers &&
+ nla_put_u16(msg, NL80211_ATTR_MAX_HW_TIMESTAMP_PEERS,
+ rdev->wiphy.hw_timestamp_max_peers))
+ goto nla_put_failure;
+
/* done */
state->split_start = 0;
break;
@@ -9025,7 +9033,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
struct nlattr *attr;
struct wiphy *wiphy;
int err, tmp, n_ssids = 0, n_channels, i;
- size_t ie_len;
+ size_t ie_len, size;
wiphy = &rdev->wiphy;
@@ -9070,10 +9078,10 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
if (ie_len > wiphy->max_scan_ie_len)
return -EINVAL;
- request = kzalloc(sizeof(*request)
- + sizeof(*request->ssids) * n_ssids
- + sizeof(*request->channels) * n_channels
- + ie_len, GFP_KERNEL);
+ size = struct_size(request, channels, n_channels);
+ size = size_add(size, array_size(sizeof(*request->ssids), n_ssids));
+ size = size_add(size, ie_len);
+ request = kzalloc(size, GFP_KERNEL);
if (!request)
return -ENOMEM;
@@ -9406,7 +9414,7 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
struct nlattr *attr;
int err, tmp, n_ssids = 0, n_match_sets = 0, n_channels, i, n_plans = 0;
enum nl80211_band band;
- size_t ie_len;
+ size_t ie_len, size;
struct nlattr *tb[NL80211_SCHED_SCAN_MATCH_ATTR_MAX + 1];
s32 default_match_rssi = NL80211_SCAN_RSSI_THOLD_OFF;
@@ -9515,12 +9523,14 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
attrs[NL80211_ATTR_SCHED_SCAN_RSSI_ADJUST]))
return ERR_PTR(-EINVAL);
- request = kzalloc(sizeof(*request)
- + sizeof(*request->ssids) * n_ssids
- + sizeof(*request->match_sets) * n_match_sets
- + sizeof(*request->scan_plans) * n_plans
- + sizeof(*request->channels) * n_channels
- + ie_len, GFP_KERNEL);
+ size = struct_size(request, channels, n_channels);
+ size = size_add(size, array_size(sizeof(*request->ssids), n_ssids));
+ size = size_add(size, array_size(sizeof(*request->match_sets),
+ n_match_sets));
+ size = size_add(size, array_size(sizeof(*request->scan_plans),
+ n_plans));
+ size = size_add(size, ie_len);
+ request = kzalloc(size, GFP_KERNEL);
if (!request)
return ERR_PTR(-ENOMEM);
@@ -16166,6 +16176,29 @@ nl80211_remove_link_station(struct sk_buff *skb, struct genl_info *info)
return ret;
}
+static int nl80211_set_hw_timestamp(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ struct net_device *dev = info->user_ptr[1];
+ struct cfg80211_set_hw_timestamp hwts = {};
+
+ if (!rdev->wiphy.hw_timestamp_max_peers)
+ return -EOPNOTSUPP;
+
+ if (!info->attrs[NL80211_ATTR_MAC] &&
+ rdev->wiphy.hw_timestamp_max_peers != CFG80211_HW_TIMESTAMP_ALL_PEERS)
+ return -EOPNOTSUPP;
+
+ if (info->attrs[NL80211_ATTR_MAC])
+ hwts.macaddr = nla_data(info->attrs[NL80211_ATTR_MAC]);
+
+ hwts.enable =
+ nla_get_flag(info->attrs[NL80211_ATTR_HW_TIMESTAMP_ENABLED]);
+
+ return rdev_set_hw_timestamp(rdev, dev, &hwts);
+}
+
#define NL80211_FLAG_NEED_WIPHY 0x01
#define NL80211_FLAG_NEED_NETDEV 0x02
#define NL80211_FLAG_NEED_RTNL 0x04
@@ -17340,6 +17373,12 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_MLO_VALID_LINK_ID),
},
+ {
+ .cmd = NL80211_CMD_SET_HW_TIMESTAMP,
+ .doit = nl80211_set_hw_timestamp,
+ .flags = GENL_UNS_ADMIN_PERM,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
+ },
};
static struct genl_family nl80211_fam __ro_after_init = {
@@ -18721,7 +18760,9 @@ EXPORT_SYMBOL(cfg80211_mgmt_tx_status_ext);
static int __nl80211_rx_control_port(struct net_device *dev,
struct sk_buff *skb,
- bool unencrypted, gfp_t gfp)
+ bool unencrypted,
+ int link_id,
+ gfp_t gfp)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
@@ -18753,6 +18794,8 @@ static int __nl80211_rx_control_port(struct net_device *dev,
NL80211_ATTR_PAD) ||
nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr) ||
nla_put_u16(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE, proto) ||
+ (link_id >= 0 &&
+ nla_put_u8(msg, NL80211_ATTR_MLO_LINK_ID, link_id)) ||
(unencrypted && nla_put_flag(msg,
NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT)))
goto nla_put_failure;
@@ -18771,13 +18814,14 @@ static int __nl80211_rx_control_port(struct net_device *dev,
return -ENOBUFS;
}
-bool cfg80211_rx_control_port(struct net_device *dev,
- struct sk_buff *skb, bool unencrypted)
+bool cfg80211_rx_control_port(struct net_device *dev, struct sk_buff *skb,
+ bool unencrypted, int link_id)
{
int ret;
- trace_cfg80211_rx_control_port(dev, skb, unencrypted);
- ret = __nl80211_rx_control_port(dev, skb, unencrypted, GFP_ATOMIC);
+ trace_cfg80211_rx_control_port(dev, skb, unencrypted, link_id);
+ ret = __nl80211_rx_control_port(dev, skb, unencrypted, link_id,
+ GFP_ATOMIC);
trace_cfg80211_return_bool(ret == 0);
return ret == 0;
}
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 13b209a8db28..2e497cf26ef2 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -1494,4 +1494,21 @@ rdev_del_link_station(struct cfg80211_registered_device *rdev,
return ret;
}
+static inline int
+rdev_set_hw_timestamp(struct cfg80211_registered_device *rdev,
+ struct net_device *dev,
+ struct cfg80211_set_hw_timestamp *hwts)
+{
+ struct wiphy *wiphy = &rdev->wiphy;
+ int ret;
+
+ if (!rdev->ops->set_hw_timestamp)
+ return -EOPNOTSUPP;
+
+ trace_rdev_set_hw_timestamp(wiphy, dev, hwts);
+ ret = rdev->ops->set_hw_timestamp(wiphy, dev, hwts);
+ trace_rdev_return_int(wiphy, ret);
+
+ return ret;
+}
#endif /* __CFG80211_RDEV_OPS */
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 790bc31cf82e..a1382255fab3 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -1810,8 +1810,7 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev,
}
int cfg80211_get_ies_channel_number(const u8 *ie, size_t ielen,
- enum nl80211_band band,
- enum cfg80211_bss_frame_type ftype)
+ enum nl80211_band band)
{
const struct element *tmp;
@@ -1830,9 +1829,7 @@ int cfg80211_get_ies_channel_number(const u8 *ie, size_t ielen,
if (!he_6ghz_oper)
return -1;
- if (ftype != CFG80211_BSS_FTYPE_BEACON ||
- he_6ghz_oper->control & IEEE80211_HE_6GHZ_OPER_CTRL_DUP_BEACON)
- return he_6ghz_oper->primary;
+ return he_6ghz_oper->primary;
}
} else if (band == NL80211_BAND_S1GHZ) {
tmp = cfg80211_find_elem(WLAN_EID_S1G_OPERATION, ie, ielen);
@@ -1870,15 +1867,14 @@ EXPORT_SYMBOL(cfg80211_get_ies_channel_number);
static struct ieee80211_channel *
cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen,
struct ieee80211_channel *channel,
- enum nl80211_bss_scan_width scan_width,
- enum cfg80211_bss_frame_type ftype)
+ enum nl80211_bss_scan_width scan_width)
{
u32 freq;
int channel_number;
struct ieee80211_channel *alt_channel;
channel_number = cfg80211_get_ies_channel_number(ie, ielen,
- channel->band, ftype);
+ channel->band);
if (channel_number < 0) {
/* No channel information in frame payload */
@@ -1888,22 +1884,21 @@ cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen,
freq = ieee80211_channel_to_freq_khz(channel_number, channel->band);
/*
- * In 6GHz, duplicated beacon indication is relevant for
- * beacons only.
+ * Frame info (beacon/prob res) is the same as received channel,
+ * no need for further processing.
*/
- if (channel->band == NL80211_BAND_6GHZ &&
- (freq == channel->center_freq ||
- abs(freq - channel->center_freq) > 80))
+ if (freq == ieee80211_channel_to_khz(channel))
return channel;
alt_channel = ieee80211_get_channel_khz(wiphy, freq);
if (!alt_channel) {
- if (channel->band == NL80211_BAND_2GHZ) {
+ if (channel->band == NL80211_BAND_2GHZ ||
+ channel->band == NL80211_BAND_6GHZ) {
/*
* Better not allow unexpected channels when that could
* be going beyond the 1-11 range (e.g., discovering
* BSS on channel 12 when radio is configured for
- * channel 11.
+ * channel 11) or beyond the 6 GHz channel range.
*/
return NULL;
}
@@ -1957,7 +1952,7 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy,
return NULL;
channel = cfg80211_get_bss_channel(wiphy, ie, ielen, data->chan,
- data->scan_width, ftype);
+ data->scan_width);
if (!channel)
return NULL;
@@ -2391,7 +2386,6 @@ cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy,
size_t ielen, min_hdr_len = offsetof(struct ieee80211_mgmt,
u.probe_resp.variable);
int bss_type;
- enum cfg80211_bss_frame_type ftype;
BUILD_BUG_ON(offsetof(struct ieee80211_mgmt, u.probe_resp.variable) !=
offsetof(struct ieee80211_mgmt, u.beacon.variable));
@@ -2428,16 +2422,8 @@ cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy,
variable = ext->u.s1g_beacon.variable;
}
- if (ieee80211_is_beacon(mgmt->frame_control))
- ftype = CFG80211_BSS_FTYPE_BEACON;
- else if (ieee80211_is_probe_resp(mgmt->frame_control))
- ftype = CFG80211_BSS_FTYPE_PRESP;
- else
- ftype = CFG80211_BSS_FTYPE_UNKNOWN;
-
channel = cfg80211_get_bss_channel(wiphy, variable,
- ielen, data->chan, data->scan_width,
- ftype);
+ ielen, data->chan, data->scan_width);
if (!channel)
return NULL;
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index ca7474eec723..716a1fa70069 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -3165,14 +3165,15 @@ TRACE_EVENT(cfg80211_control_port_tx_status,
TRACE_EVENT(cfg80211_rx_control_port,
TP_PROTO(struct net_device *netdev, struct sk_buff *skb,
- bool unencrypted),
- TP_ARGS(netdev, skb, unencrypted),
+ bool unencrypted, int link_id),
+ TP_ARGS(netdev, skb, unencrypted, link_id),
TP_STRUCT__entry(
NETDEV_ENTRY
__field(int, len)
MAC_ENTRY(from)
__field(u16, proto)
__field(bool, unencrypted)
+ __field(int, link_id)
),
TP_fast_assign(
NETDEV_ASSIGN;
@@ -3180,10 +3181,12 @@ TRACE_EVENT(cfg80211_rx_control_port,
MAC_ASSIGN(from, eth_hdr(skb)->h_source);
__entry->proto = be16_to_cpu(skb->protocol);
__entry->unencrypted = unencrypted;
+ __entry->link_id = link_id;
),
- TP_printk(NETDEV_PR_FMT ", len=%d, %pM, proto: 0x%x, unencrypted: %s",
+ TP_printk(NETDEV_PR_FMT ", len=%d, %pM, proto: 0x%x, unencrypted: %s, link: %d",
NETDEV_PR_ARG, __entry->len, __entry->from,
- __entry->proto, BOOL_TO_STR(__entry->unencrypted))
+ __entry->proto, BOOL_TO_STR(__entry->unencrypted),
+ __entry->link_id)
);
TRACE_EVENT(cfg80211_cqm_rssi_notify,
@@ -3918,6 +3921,31 @@ TRACE_EVENT(rdev_del_link_station,
__entry->link_id)
);
+TRACE_EVENT(rdev_set_hw_timestamp,
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+ struct cfg80211_set_hw_timestamp *hwts),
+
+ TP_ARGS(wiphy, netdev, hwts),
+
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ NETDEV_ENTRY
+ MAC_ENTRY(macaddr)
+ __field(bool, enable)
+ ),
+
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ NETDEV_ASSIGN;
+ MAC_ASSIGN(macaddr, hwts->macaddr);
+ __entry->enable = hwts->enable;
+ ),
+
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", mac %pM, enable: %u",
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->macaddr,
+ __entry->enable)
+);
+
#endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */
#undef TRACE_INCLUDE_PATH
diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c
index 771d0fa90ef5..0c38d7175922 100644
--- a/net/xdp/xskmap.c
+++ b/net/xdp/xskmap.c
@@ -24,6 +24,7 @@ static struct xsk_map_node *xsk_map_node_alloc(struct xsk_map *map,
return ERR_PTR(-ENOMEM);
bpf_map_inc(&map->map);
+ atomic_inc(&map->count);
node->map = map;
node->map_entry = map_entry;
@@ -32,8 +33,11 @@ static struct xsk_map_node *xsk_map_node_alloc(struct xsk_map *map,
static void xsk_map_node_free(struct xsk_map_node *node)
{
+ struct xsk_map *map = node->map;
+
bpf_map_put(&node->map->map);
kfree(node);
+ atomic_dec(&map->count);
}
static void xsk_map_sock_add(struct xdp_sock *xs, struct xsk_map_node *node)
@@ -85,6 +89,14 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
return &m->map;
}
+static u64 xsk_map_mem_usage(const struct bpf_map *map)
+{
+ struct xsk_map *m = container_of(map, struct xsk_map, map);
+
+ return struct_size(m, xsk_map, map->max_entries) +
+ (u64)atomic_read(&m->count) * sizeof(struct xsk_map_node);
+}
+
static void xsk_map_free(struct bpf_map *map)
{
struct xsk_map *m = container_of(map, struct xsk_map, map);
@@ -267,6 +279,7 @@ const struct bpf_map_ops xsk_map_ops = {
.map_update_elem = xsk_map_update_elem,
.map_delete_elem = xsk_map_delete_elem,
.map_check_btf = map_check_no_btf,
+ .map_mem_usage = xsk_map_mem_usage,
.map_btf_id = &xsk_map_btf_ids[0],
.map_redirect = xsk_map_redirect,
};
diff --git a/security/lsm_audit.c b/security/lsm_audit.c
index a7355b4b9bb8..00d3bdd386e2 100644
--- a/security/lsm_audit.c
+++ b/security/lsm_audit.c
@@ -317,7 +317,7 @@ static void dump_common_audit_data(struct audit_buffer *ab,
switch (sk->sk_family) {
case AF_INET: {
- struct inet_sock *inet = inet_sk(sk);
+ const struct inet_sock *inet = inet_sk(sk);
print_ipv4_addr(ab, inet->inet_rcv_saddr,
inet->inet_sport,
@@ -329,7 +329,7 @@ static void dump_common_audit_data(struct audit_buffer *ab,
}
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6: {
- struct inet_sock *inet = inet_sk(sk);
+ const struct inet_sock *inet = inet_sk(sk);
print_ipv6_addr(ab, &sk->sk_v6_rcv_saddr,
inet->inet_sport,
diff --git a/tools/arch/arm64/include/uapi/asm/bpf_perf_event.h b/tools/arch/arm64/include/uapi/asm/bpf_perf_event.h
deleted file mode 100644
index b551b741653d..000000000000
--- a/tools/arch/arm64/include/uapi/asm/bpf_perf_event.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__
-#define _UAPI__ASM_BPF_PERF_EVENT_H__
-
-#include <asm/ptrace.h>
-
-typedef struct user_pt_regs bpf_user_pt_regs_t;
-
-#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */
diff --git a/tools/arch/s390/include/uapi/asm/bpf_perf_event.h b/tools/arch/s390/include/uapi/asm/bpf_perf_event.h
deleted file mode 100644
index 0a8e37a519f2..000000000000
--- a/tools/arch/s390/include/uapi/asm/bpf_perf_event.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__
-#define _UAPI__ASM_BPF_PERF_EVENT_H__
-
-#include "ptrace.h"
-
-typedef user_pt_regs bpf_user_pt_regs_t;
-
-#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */
diff --git a/tools/arch/s390/include/uapi/asm/ptrace.h b/tools/arch/s390/include/uapi/asm/ptrace.h
deleted file mode 100644
index ad64d673b5e6..000000000000
--- a/tools/arch/s390/include/uapi/asm/ptrace.h
+++ /dev/null
@@ -1,458 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * S390 version
- * Copyright IBM Corp. 1999, 2000
- * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
- */
-
-#ifndef _UAPI_S390_PTRACE_H
-#define _UAPI_S390_PTRACE_H
-
-/*
- * Offsets in the user_regs_struct. They are used for the ptrace
- * system call and in entry.S
- */
-#ifndef __s390x__
-
-#define PT_PSWMASK 0x00
-#define PT_PSWADDR 0x04
-#define PT_GPR0 0x08
-#define PT_GPR1 0x0C
-#define PT_GPR2 0x10
-#define PT_GPR3 0x14
-#define PT_GPR4 0x18
-#define PT_GPR5 0x1C
-#define PT_GPR6 0x20
-#define PT_GPR7 0x24
-#define PT_GPR8 0x28
-#define PT_GPR9 0x2C
-#define PT_GPR10 0x30
-#define PT_GPR11 0x34
-#define PT_GPR12 0x38
-#define PT_GPR13 0x3C
-#define PT_GPR14 0x40
-#define PT_GPR15 0x44
-#define PT_ACR0 0x48
-#define PT_ACR1 0x4C
-#define PT_ACR2 0x50
-#define PT_ACR3 0x54
-#define PT_ACR4 0x58
-#define PT_ACR5 0x5C
-#define PT_ACR6 0x60
-#define PT_ACR7 0x64
-#define PT_ACR8 0x68
-#define PT_ACR9 0x6C
-#define PT_ACR10 0x70
-#define PT_ACR11 0x74
-#define PT_ACR12 0x78
-#define PT_ACR13 0x7C
-#define PT_ACR14 0x80
-#define PT_ACR15 0x84
-#define PT_ORIGGPR2 0x88
-#define PT_FPC 0x90
-/*
- * A nasty fact of life that the ptrace api
- * only supports passing of longs.
- */
-#define PT_FPR0_HI 0x98
-#define PT_FPR0_LO 0x9C
-#define PT_FPR1_HI 0xA0
-#define PT_FPR1_LO 0xA4
-#define PT_FPR2_HI 0xA8
-#define PT_FPR2_LO 0xAC
-#define PT_FPR3_HI 0xB0
-#define PT_FPR3_LO 0xB4
-#define PT_FPR4_HI 0xB8
-#define PT_FPR4_LO 0xBC
-#define PT_FPR5_HI 0xC0
-#define PT_FPR5_LO 0xC4
-#define PT_FPR6_HI 0xC8
-#define PT_FPR6_LO 0xCC
-#define PT_FPR7_HI 0xD0
-#define PT_FPR7_LO 0xD4
-#define PT_FPR8_HI 0xD8
-#define PT_FPR8_LO 0XDC
-#define PT_FPR9_HI 0xE0
-#define PT_FPR9_LO 0xE4
-#define PT_FPR10_HI 0xE8
-#define PT_FPR10_LO 0xEC
-#define PT_FPR11_HI 0xF0
-#define PT_FPR11_LO 0xF4
-#define PT_FPR12_HI 0xF8
-#define PT_FPR12_LO 0xFC
-#define PT_FPR13_HI 0x100
-#define PT_FPR13_LO 0x104
-#define PT_FPR14_HI 0x108
-#define PT_FPR14_LO 0x10C
-#define PT_FPR15_HI 0x110
-#define PT_FPR15_LO 0x114
-#define PT_CR_9 0x118
-#define PT_CR_10 0x11C
-#define PT_CR_11 0x120
-#define PT_IEEE_IP 0x13C
-#define PT_LASTOFF PT_IEEE_IP
-#define PT_ENDREGS 0x140-1
-
-#define GPR_SIZE 4
-#define CR_SIZE 4
-
-#define STACK_FRAME_OVERHEAD 96 /* size of minimum stack frame */
-
-#else /* __s390x__ */
-
-#define PT_PSWMASK 0x00
-#define PT_PSWADDR 0x08
-#define PT_GPR0 0x10
-#define PT_GPR1 0x18
-#define PT_GPR2 0x20
-#define PT_GPR3 0x28
-#define PT_GPR4 0x30
-#define PT_GPR5 0x38
-#define PT_GPR6 0x40
-#define PT_GPR7 0x48
-#define PT_GPR8 0x50
-#define PT_GPR9 0x58
-#define PT_GPR10 0x60
-#define PT_GPR11 0x68
-#define PT_GPR12 0x70
-#define PT_GPR13 0x78
-#define PT_GPR14 0x80
-#define PT_GPR15 0x88
-#define PT_ACR0 0x90
-#define PT_ACR1 0x94
-#define PT_ACR2 0x98
-#define PT_ACR3 0x9C
-#define PT_ACR4 0xA0
-#define PT_ACR5 0xA4
-#define PT_ACR6 0xA8
-#define PT_ACR7 0xAC
-#define PT_ACR8 0xB0
-#define PT_ACR9 0xB4
-#define PT_ACR10 0xB8
-#define PT_ACR11 0xBC
-#define PT_ACR12 0xC0
-#define PT_ACR13 0xC4
-#define PT_ACR14 0xC8
-#define PT_ACR15 0xCC
-#define PT_ORIGGPR2 0xD0
-#define PT_FPC 0xD8
-#define PT_FPR0 0xE0
-#define PT_FPR1 0xE8
-#define PT_FPR2 0xF0
-#define PT_FPR3 0xF8
-#define PT_FPR4 0x100
-#define PT_FPR5 0x108
-#define PT_FPR6 0x110
-#define PT_FPR7 0x118
-#define PT_FPR8 0x120
-#define PT_FPR9 0x128
-#define PT_FPR10 0x130
-#define PT_FPR11 0x138
-#define PT_FPR12 0x140
-#define PT_FPR13 0x148
-#define PT_FPR14 0x150
-#define PT_FPR15 0x158
-#define PT_CR_9 0x160
-#define PT_CR_10 0x168
-#define PT_CR_11 0x170
-#define PT_IEEE_IP 0x1A8
-#define PT_LASTOFF PT_IEEE_IP
-#define PT_ENDREGS 0x1B0-1
-
-#define GPR_SIZE 8
-#define CR_SIZE 8
-
-#define STACK_FRAME_OVERHEAD 160 /* size of minimum stack frame */
-
-#endif /* __s390x__ */
-
-#define NUM_GPRS 16
-#define NUM_FPRS 16
-#define NUM_CRS 16
-#define NUM_ACRS 16
-
-#define NUM_CR_WORDS 3
-
-#define FPR_SIZE 8
-#define FPC_SIZE 4
-#define FPC_PAD_SIZE 4 /* gcc insists on aligning the fpregs */
-#define ACR_SIZE 4
-
-
-#define PTRACE_OLDSETOPTIONS 21
-#define PTRACE_SYSEMU 31
-#define PTRACE_SYSEMU_SINGLESTEP 32
-#ifndef __ASSEMBLY__
-#include <linux/stddef.h>
-#include <linux/types.h>
-
-typedef union {
- float f;
- double d;
- __u64 ui;
- struct
- {
- __u32 hi;
- __u32 lo;
- } fp;
-} freg_t;
-
-typedef struct {
- __u32 fpc;
- __u32 pad;
- freg_t fprs[NUM_FPRS];
-} s390_fp_regs;
-
-#define FPC_EXCEPTION_MASK 0xF8000000
-#define FPC_FLAGS_MASK 0x00F80000
-#define FPC_DXC_MASK 0x0000FF00
-#define FPC_RM_MASK 0x00000003
-
-/* this typedef defines how a Program Status Word looks like */
-typedef struct {
- unsigned long mask;
- unsigned long addr;
-} __attribute__ ((aligned(8))) psw_t;
-
-#ifndef __s390x__
-
-#define PSW_MASK_PER 0x40000000UL
-#define PSW_MASK_DAT 0x04000000UL
-#define PSW_MASK_IO 0x02000000UL
-#define PSW_MASK_EXT 0x01000000UL
-#define PSW_MASK_KEY 0x00F00000UL
-#define PSW_MASK_BASE 0x00080000UL /* always one */
-#define PSW_MASK_MCHECK 0x00040000UL
-#define PSW_MASK_WAIT 0x00020000UL
-#define PSW_MASK_PSTATE 0x00010000UL
-#define PSW_MASK_ASC 0x0000C000UL
-#define PSW_MASK_CC 0x00003000UL
-#define PSW_MASK_PM 0x00000F00UL
-#define PSW_MASK_RI 0x00000000UL
-#define PSW_MASK_EA 0x00000000UL
-#define PSW_MASK_BA 0x00000000UL
-
-#define PSW_MASK_USER 0x0000FF00UL
-
-#define PSW_ADDR_AMODE 0x80000000UL
-#define PSW_ADDR_INSN 0x7FFFFFFFUL
-
-#define PSW_DEFAULT_KEY (((unsigned long) PAGE_DEFAULT_ACC) << 20)
-
-#define PSW_ASC_PRIMARY 0x00000000UL
-#define PSW_ASC_ACCREG 0x00004000UL
-#define PSW_ASC_SECONDARY 0x00008000UL
-#define PSW_ASC_HOME 0x0000C000UL
-
-#else /* __s390x__ */
-
-#define PSW_MASK_PER 0x4000000000000000UL
-#define PSW_MASK_DAT 0x0400000000000000UL
-#define PSW_MASK_IO 0x0200000000000000UL
-#define PSW_MASK_EXT 0x0100000000000000UL
-#define PSW_MASK_BASE 0x0000000000000000UL
-#define PSW_MASK_KEY 0x00F0000000000000UL
-#define PSW_MASK_MCHECK 0x0004000000000000UL
-#define PSW_MASK_WAIT 0x0002000000000000UL
-#define PSW_MASK_PSTATE 0x0001000000000000UL
-#define PSW_MASK_ASC 0x0000C00000000000UL
-#define PSW_MASK_CC 0x0000300000000000UL
-#define PSW_MASK_PM 0x00000F0000000000UL
-#define PSW_MASK_RI 0x0000008000000000UL
-#define PSW_MASK_EA 0x0000000100000000UL
-#define PSW_MASK_BA 0x0000000080000000UL
-
-#define PSW_MASK_USER 0x0000FF0180000000UL
-
-#define PSW_ADDR_AMODE 0x0000000000000000UL
-#define PSW_ADDR_INSN 0xFFFFFFFFFFFFFFFFUL
-
-#define PSW_DEFAULT_KEY (((unsigned long) PAGE_DEFAULT_ACC) << 52)
-
-#define PSW_ASC_PRIMARY 0x0000000000000000UL
-#define PSW_ASC_ACCREG 0x0000400000000000UL
-#define PSW_ASC_SECONDARY 0x0000800000000000UL
-#define PSW_ASC_HOME 0x0000C00000000000UL
-
-#endif /* __s390x__ */
-
-
-/*
- * The s390_regs structure is used to define the elf_gregset_t.
- */
-typedef struct {
- psw_t psw;
- unsigned long gprs[NUM_GPRS];
- unsigned int acrs[NUM_ACRS];
- unsigned long orig_gpr2;
-} s390_regs;
-
-/*
- * The user_pt_regs structure exports the beginning of
- * the in-kernel pt_regs structure to user space.
- */
-typedef struct {
- unsigned long args[1];
- psw_t psw;
- unsigned long gprs[NUM_GPRS];
-} user_pt_regs;
-
-/*
- * Now for the user space program event recording (trace) definitions.
- * The following structures are used only for the ptrace interface, don't
- * touch or even look at it if you don't want to modify the user-space
- * ptrace interface. In particular stay away from it for in-kernel PER.
- */
-typedef struct {
- unsigned long cr[NUM_CR_WORDS];
-} per_cr_words;
-
-#define PER_EM_MASK 0xE8000000UL
-
-typedef struct {
-#ifdef __s390x__
- unsigned : 32;
-#endif /* __s390x__ */
- unsigned em_branching : 1;
- unsigned em_instruction_fetch : 1;
- /*
- * Switching on storage alteration automatically fixes
- * the storage alteration event bit in the users std.
- */
- unsigned em_storage_alteration : 1;
- unsigned em_gpr_alt_unused : 1;
- unsigned em_store_real_address : 1;
- unsigned : 3;
- unsigned branch_addr_ctl : 1;
- unsigned : 1;
- unsigned storage_alt_space_ctl : 1;
- unsigned : 21;
- unsigned long starting_addr;
- unsigned long ending_addr;
-} per_cr_bits;
-
-typedef struct {
- unsigned short perc_atmid;
- unsigned long address;
- unsigned char access_id;
-} per_lowcore_words;
-
-typedef struct {
- unsigned perc_branching : 1;
- unsigned perc_instruction_fetch : 1;
- unsigned perc_storage_alteration : 1;
- unsigned perc_gpr_alt_unused : 1;
- unsigned perc_store_real_address : 1;
- unsigned : 3;
- unsigned atmid_psw_bit_31 : 1;
- unsigned atmid_validity_bit : 1;
- unsigned atmid_psw_bit_32 : 1;
- unsigned atmid_psw_bit_5 : 1;
- unsigned atmid_psw_bit_16 : 1;
- unsigned atmid_psw_bit_17 : 1;
- unsigned si : 2;
- unsigned long address;
- unsigned : 4;
- unsigned access_id : 4;
-} per_lowcore_bits;
-
-typedef struct {
- union {
- per_cr_words words;
- per_cr_bits bits;
- } control_regs;
- /*
- * The single_step and instruction_fetch bits are obsolete,
- * the kernel always sets them to zero. To enable single
- * stepping use ptrace(PTRACE_SINGLESTEP) instead.
- */
- unsigned single_step : 1;
- unsigned instruction_fetch : 1;
- unsigned : 30;
- /*
- * These addresses are copied into cr10 & cr11 if single
- * stepping is switched off
- */
- unsigned long starting_addr;
- unsigned long ending_addr;
- union {
- per_lowcore_words words;
- per_lowcore_bits bits;
- } lowcore;
-} per_struct;
-
-typedef struct {
- unsigned int len;
- unsigned long kernel_addr;
- unsigned long process_addr;
-} ptrace_area;
-
-/*
- * S/390 specific non posix ptrace requests. I chose unusual values so
- * they are unlikely to clash with future ptrace definitions.
- */
-#define PTRACE_PEEKUSR_AREA 0x5000
-#define PTRACE_POKEUSR_AREA 0x5001
-#define PTRACE_PEEKTEXT_AREA 0x5002
-#define PTRACE_PEEKDATA_AREA 0x5003
-#define PTRACE_POKETEXT_AREA 0x5004
-#define PTRACE_POKEDATA_AREA 0x5005
-#define PTRACE_GET_LAST_BREAK 0x5006
-#define PTRACE_PEEK_SYSTEM_CALL 0x5007
-#define PTRACE_POKE_SYSTEM_CALL 0x5008
-#define PTRACE_ENABLE_TE 0x5009
-#define PTRACE_DISABLE_TE 0x5010
-#define PTRACE_TE_ABORT_RAND 0x5011
-
-/*
- * The numbers chosen here are somewhat arbitrary but absolutely MUST
- * not overlap with any of the number assigned in <linux/ptrace.h>.
- */
-#define PTRACE_SINGLEBLOCK 12 /* resume execution until next branch */
-
-/*
- * PT_PROT definition is loosely based on hppa bsd definition in
- * gdb/hppab-nat.c
- */
-#define PTRACE_PROT 21
-
-typedef enum {
- ptprot_set_access_watchpoint,
- ptprot_set_write_watchpoint,
- ptprot_disable_watchpoint
-} ptprot_flags;
-
-typedef struct {
- unsigned long lowaddr;
- unsigned long hiaddr;
- ptprot_flags prot;
-} ptprot_area;
-
-/* Sequence of bytes for breakpoint illegal instruction. */
-#define S390_BREAKPOINT {0x0,0x1}
-#define S390_BREAKPOINT_U16 ((__u16)0x0001)
-#define S390_SYSCALL_OPCODE ((__u16)0x0a00)
-#define S390_SYSCALL_SIZE 2
-
-/*
- * The user_regs_struct defines the way the user registers are
- * store on the stack for signal handling.
- */
-struct user_regs_struct {
- psw_t psw;
- unsigned long gprs[NUM_GPRS];
- unsigned int acrs[NUM_ACRS];
- unsigned long orig_gpr2;
- s390_fp_regs fp_regs;
- /*
- * These per registers are in here so that gdb can modify them
- * itself as there is no "official" ptrace interface for hardware
- * watchpoints. This is the way intel does it.
- */
- per_struct per_info;
- unsigned long ieee_instruction_pointer; /* obsolete, always 0 */
-};
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* _UAPI_S390_PTRACE_H */
diff --git a/tools/bpf/bpftool/json_writer.c b/tools/bpf/bpftool/json_writer.c
index 7fea83bedf48..bca5dd0a59e3 100644
--- a/tools/bpf/bpftool/json_writer.c
+++ b/tools/bpf/bpftool/json_writer.c
@@ -80,9 +80,6 @@ static void jsonw_puts(json_writer_t *self, const char *str)
case '"':
fputs("\\\"", self->out);
break;
- case '\'':
- fputs("\\\'", self->out);
- break;
default:
putc(*str, self->out);
}
diff --git a/tools/bpf/resolve_btfids/.gitignore b/tools/bpf/resolve_btfids/.gitignore
index 16913fffc985..52d5e9721d92 100644
--- a/tools/bpf/resolve_btfids/.gitignore
+++ b/tools/bpf/resolve_btfids/.gitignore
@@ -1,3 +1,4 @@
/fixdep
/resolve_btfids
/libbpf/
+/libsubcmd/
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 62ce1f5d1b1d..976b194eb775 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4969,6 +4969,12 @@ union bpf_attr {
* different maps if key/value layout matches across maps.
* Every bpf_timer_set_callback() can have different callback_fn.
*
+ * *flags* can be one of:
+ *
+ * **BPF_F_TIMER_ABS**
+ * Start the timer in absolute expire value instead of the
+ * default relative one.
+ *
* Return
* 0 on success.
* **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier
@@ -5325,11 +5331,22 @@ union bpf_attr {
* Description
* Write *len* bytes from *src* into *dst*, starting from *offset*
* into *dst*.
- * *flags* is currently unused.
+ *
+ * *flags* must be 0 except for skb-type dynptrs.
+ *
+ * For skb-type dynptrs:
+ * * All data slices of the dynptr are automatically
+ * invalidated after **bpf_dynptr_write**\ (). This is
+ * because writing may pull the skb and change the
+ * underlying packet buffer.
+ *
+ * * For *flags*, please see the flags accepted by
+ * **bpf_skb_store_bytes**\ ().
* Return
* 0 on success, -E2BIG if *offset* + *len* exceeds the length
* of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst*
- * is a read-only dynptr or if *flags* is not 0.
+ * is a read-only dynptr or if *flags* is not correct. For skb-type dynptrs,
+ * other errors correspond to errors returned by **bpf_skb_store_bytes**\ ().
*
* void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u32 offset, u32 len)
* Description
@@ -5337,6 +5354,9 @@ union bpf_attr {
*
* *len* must be a statically known value. The returned data slice
* is invalidated whenever the dynptr is invalidated.
+ *
+ * skb and xdp type dynptrs may not use bpf_dynptr_data. They should
+ * instead use bpf_dynptr_slice and bpf_dynptr_slice_rdwr.
* Return
* Pointer to the underlying dynptr data, NULL if the dynptr is
* read-only, if the dynptr is invalid, or if the offset and length
@@ -7083,4 +7103,13 @@ struct bpf_core_relo {
enum bpf_core_relo_kind kind;
};
+/*
+ * Flags to control bpf_timer_start() behaviour.
+ * - BPF_F_TIMER_ABS: Timeout passed is absolute time, by default it is
+ * relative to current time.
+ */
+enum {
+ BPF_F_TIMER_ABS = (1ULL << 0),
+};
+
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index 5a3dfb56d78f..b8b0a6369363 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1,4 +1,4 @@
libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
netlink.o bpf_prog_linfo.o libbpf_probes.o hashmap.o \
btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \
- usdt.o
+ usdt.o zip.o
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 9ed9bceb4111..f0f786373238 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
/*
- * common eBPF ELF operations.
+ * Common BPF ELF operations.
*
* Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
* Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
@@ -386,14 +386,73 @@ LIBBPF_API int bpf_link_get_fd_by_id(__u32 id);
LIBBPF_API int bpf_link_get_fd_by_id_opts(__u32 id,
const struct bpf_get_fd_by_id_opts *opts);
LIBBPF_API int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len);
-/* Type-safe variants of bpf_obj_get_info_by_fd(). The callers still needs to
- * pass info_len, which should normally be
- * sizeof(struct bpf_{prog,map,btf,link}_info), in order to be compatible with
- * different libbpf and kernel versions.
+
+/**
+ * @brief **bpf_prog_get_info_by_fd()** obtains information about the BPF
+ * program corresponding to *prog_fd*.
+ *
+ * Populates up to *info_len* bytes of *info* and updates *info_len* with the
+ * actual number of bytes written to *info*.
+ *
+ * @param prog_fd BPF program file descriptor
+ * @param info pointer to **struct bpf_prog_info** that will be populated with
+ * BPF program information
+ * @param info_len pointer to the size of *info*; on success updated with the
+ * number of bytes written to *info*
+ * @return 0, on success; negative error code, otherwise (errno is also set to
+ * the error code)
*/
LIBBPF_API int bpf_prog_get_info_by_fd(int prog_fd, struct bpf_prog_info *info, __u32 *info_len);
+
+/**
+ * @brief **bpf_map_get_info_by_fd()** obtains information about the BPF
+ * map corresponding to *map_fd*.
+ *
+ * Populates up to *info_len* bytes of *info* and updates *info_len* with the
+ * actual number of bytes written to *info*.
+ *
+ * @param map_fd BPF map file descriptor
+ * @param info pointer to **struct bpf_map_info** that will be populated with
+ * BPF map information
+ * @param info_len pointer to the size of *info*; on success updated with the
+ * number of bytes written to *info*
+ * @return 0, on success; negative error code, otherwise (errno is also set to
+ * the error code)
+ */
LIBBPF_API int bpf_map_get_info_by_fd(int map_fd, struct bpf_map_info *info, __u32 *info_len);
+
+/**
+ * @brief **bpf_btf_get_info_by_fd()** obtains information about the
+ * BTF object corresponding to *btf_fd*.
+ *
+ * Populates up to *info_len* bytes of *info* and updates *info_len* with the
+ * actual number of bytes written to *info*.
+ *
+ * @param btf_fd BTF object file descriptor
+ * @param info pointer to **struct bpf_btf_info** that will be populated with
+ * BTF object information
+ * @param info_len pointer to the size of *info*; on success updated with the
+ * number of bytes written to *info*
+ * @return 0, on success; negative error code, otherwise (errno is also set to
+ * the error code)
+ */
LIBBPF_API int bpf_btf_get_info_by_fd(int btf_fd, struct bpf_btf_info *info, __u32 *info_len);
+
+/**
+ * @brief **bpf_btf_get_info_by_fd()** obtains information about the BPF
+ * link corresponding to *link_fd*.
+ *
+ * Populates up to *info_len* bytes of *info* and updates *info_len* with the
+ * actual number of bytes written to *info*.
+ *
+ * @param link_fd BPF link file descriptor
+ * @param info pointer to **struct bpf_link_info** that will be populated with
+ * BPF link information
+ * @param info_len pointer to the size of *info*; on success updated with the
+ * number of bytes written to *info*
+ * @return 0, on success; negative error code, otherwise (errno is also set to
+ * the error code)
+ */
LIBBPF_API int bpf_link_get_info_by_fd(int link_fd, struct bpf_link_info *info, __u32 *info_len);
struct bpf_prog_query_opts {
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index 5ec1871acb2f..7d12d3e620cc 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -174,8 +174,8 @@ enum libbpf_tristate {
#define __kconfig __attribute__((section(".kconfig")))
#define __ksym __attribute__((section(".ksyms")))
+#define __kptr_untrusted __attribute__((btf_type_tag("kptr_untrusted")))
#define __kptr __attribute__((btf_type_tag("kptr")))
-#define __kptr_ref __attribute__((btf_type_tag("kptr_ref")))
#ifndef ___bpf_concat
#define ___bpf_concat(a, b) a ## b
diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
index 6db88f41fa0d..6fb3d0f9af17 100644
--- a/tools/lib/bpf/bpf_tracing.h
+++ b/tools/lib/bpf/bpf_tracing.h
@@ -204,6 +204,7 @@ struct pt_regs___s390 {
#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG
#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG
#define __PT_PARM4_SYSCALL_REG __PT_PARM4_REG
+#define __PT_PARM5_SYSCALL_REG uregs[4]
#define __PT_PARM6_SYSCALL_REG uregs[5]
#define __PT_PARM7_SYSCALL_REG uregs[6]
@@ -415,6 +416,8 @@ struct pt_regs___arm64 {
* https://loongson.github.io/LoongArch-Documentation/LoongArch-ELF-ABI-EN.html
*/
+/* loongarch provides struct user_pt_regs instead of struct pt_regs to userspace */
+#define __PT_REGS_CAST(x) ((const struct user_pt_regs *)(x))
#define __PT_PARM1_REG regs[4]
#define __PT_PARM2_REG regs[5]
#define __PT_PARM3_REG regs[6]
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 9181d36118d2..0a2c079244b6 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1000,8 +1000,6 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf,
}
}
- err = 0;
-
if (!btf_data) {
pr_warn("failed to find '%s' ELF section in %s\n", BTF_ELF_SEC, path);
err = -ENODATA;
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 05c4db355f28..a557718401e4 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -53,6 +53,7 @@
#include "libbpf_internal.h"
#include "hashmap.h"
#include "bpf_gen_internal.h"
+#include "zip.h"
#ifndef BPF_FS_MAGIC
#define BPF_FS_MAGIC 0xcafe4a11
@@ -798,7 +799,6 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
progs = obj->programs;
nr_progs = obj->nr_programs;
nr_syms = symbols->d_size / sizeof(Elf64_Sym);
- sec_off = 0;
for (i = 0; i < nr_syms; i++) {
sym = elf_sym_by_idx(obj, i);
@@ -2615,7 +2615,7 @@ static int bpf_object__init_maps(struct bpf_object *obj,
strict = !OPTS_GET(opts, relaxed_maps, false);
pin_root_path = OPTS_GET(opts, pin_root_path, NULL);
- err = err ?: bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
+ err = bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
err = err ?: bpf_object__init_global_data_maps(obj);
err = err ?: bpf_object__init_kconfig_map(obj);
err = err ?: bpf_object__init_struct_ops_maps(obj);
@@ -9724,6 +9724,7 @@ struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *p
char errmsg[STRERR_BUFSIZE];
struct bpf_link_perf *link;
int prog_fd, link_fd = -1, err;
+ bool force_ioctl_attach;
if (!OPTS_VALID(opts, bpf_perf_event_opts))
return libbpf_err_ptr(-EINVAL);
@@ -9747,7 +9748,8 @@ struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *p
link->link.dealloc = &bpf_link_perf_dealloc;
link->perf_event_fd = pfd;
- if (kernel_supports(prog->obj, FEAT_PERF_LINK)) {
+ force_ioctl_attach = OPTS_GET(opts, force_ioctl_attach, false);
+ if (kernel_supports(prog->obj, FEAT_PERF_LINK) && !force_ioctl_attach) {
DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_opts,
.perf_event.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0));
@@ -10106,6 +10108,7 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog,
const struct bpf_kprobe_opts *opts)
{
DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
+ enum probe_attach_mode attach_mode;
char errmsg[STRERR_BUFSIZE];
char *legacy_probe = NULL;
struct bpf_link *link;
@@ -10116,11 +10119,32 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog,
if (!OPTS_VALID(opts, bpf_kprobe_opts))
return libbpf_err_ptr(-EINVAL);
+ attach_mode = OPTS_GET(opts, attach_mode, PROBE_ATTACH_MODE_DEFAULT);
retprobe = OPTS_GET(opts, retprobe, false);
offset = OPTS_GET(opts, offset, 0);
pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
legacy = determine_kprobe_perf_type() < 0;
+ switch (attach_mode) {
+ case PROBE_ATTACH_MODE_LEGACY:
+ legacy = true;
+ pe_opts.force_ioctl_attach = true;
+ break;
+ case PROBE_ATTACH_MODE_PERF:
+ if (legacy)
+ return libbpf_err_ptr(-ENOTSUP);
+ pe_opts.force_ioctl_attach = true;
+ break;
+ case PROBE_ATTACH_MODE_LINK:
+ if (legacy || !kernel_supports(prog->obj, FEAT_PERF_LINK))
+ return libbpf_err_ptr(-ENOTSUP);
+ break;
+ case PROBE_ATTACH_MODE_DEFAULT:
+ break;
+ default:
+ return libbpf_err_ptr(-EINVAL);
+ }
+
if (!legacy) {
pfd = perf_event_open_probe(false /* uprobe */, retprobe,
func_name, offset,
@@ -10531,32 +10555,19 @@ static Elf_Scn *elf_find_next_scn_by_type(Elf *elf, int sh_type, Elf_Scn *scn)
return NULL;
}
-/* Find offset of function name in object specified by path. "name" matches
- * symbol name or name@@LIB for library functions.
+/* Find offset of function name in the provided ELF object. "binary_path" is
+ * the path to the ELF binary represented by "elf", and only used for error
+ * reporting matters. "name" matches symbol name or name@@LIB for library
+ * functions.
*/
-static long elf_find_func_offset(const char *binary_path, const char *name)
+static long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name)
{
- int fd, i, sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB };
+ int i, sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB };
bool is_shared_lib, is_name_qualified;
- char errmsg[STRERR_BUFSIZE];
long ret = -ENOENT;
size_t name_len;
GElf_Ehdr ehdr;
- Elf *elf;
- fd = open(binary_path, O_RDONLY | O_CLOEXEC);
- if (fd < 0) {
- ret = -errno;
- pr_warn("failed to open %s: %s\n", binary_path,
- libbpf_strerror_r(ret, errmsg, sizeof(errmsg)));
- return ret;
- }
- elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
- if (!elf) {
- pr_warn("elf: could not read elf from %s: %s\n", binary_path, elf_errmsg(-1));
- close(fd);
- return -LIBBPF_ERRNO__FORMAT;
- }
if (!gelf_getehdr(elf, &ehdr)) {
pr_warn("elf: failed to get ehdr from %s: %s\n", binary_path, elf_errmsg(-1));
ret = -LIBBPF_ERRNO__FORMAT;
@@ -10569,7 +10580,7 @@ static long elf_find_func_offset(const char *binary_path, const char *name)
/* Does name specify "@@LIB"? */
is_name_qualified = strstr(name, "@@") != NULL;
- /* Search SHT_DYNSYM, SHT_SYMTAB for symbol. This search order is used because if
+ /* Search SHT_DYNSYM, SHT_SYMTAB for symbol. This search order is used because if
* a binary is stripped, it may only have SHT_DYNSYM, and a fully-statically
* linked binary may not have SHT_DYMSYM, so absence of a section should not be
* reported as a warning/error.
@@ -10682,11 +10693,101 @@ static long elf_find_func_offset(const char *binary_path, const char *name)
}
}
out:
+ return ret;
+}
+
+/* Find offset of function name in ELF object specified by path. "name" matches
+ * symbol name or name@@LIB for library functions.
+ */
+static long elf_find_func_offset_from_file(const char *binary_path, const char *name)
+{
+ char errmsg[STRERR_BUFSIZE];
+ long ret = -ENOENT;
+ Elf *elf;
+ int fd;
+
+ fd = open(binary_path, O_RDONLY | O_CLOEXEC);
+ if (fd < 0) {
+ ret = -errno;
+ pr_warn("failed to open %s: %s\n", binary_path,
+ libbpf_strerror_r(ret, errmsg, sizeof(errmsg)));
+ return ret;
+ }
+ elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
+ if (!elf) {
+ pr_warn("elf: could not read elf from %s: %s\n", binary_path, elf_errmsg(-1));
+ close(fd);
+ return -LIBBPF_ERRNO__FORMAT;
+ }
+
+ ret = elf_find_func_offset(elf, binary_path, name);
elf_end(elf);
close(fd);
return ret;
}
+/* Find offset of function name in archive specified by path. Currently
+ * supported are .zip files that do not compress their contents, as used on
+ * Android in the form of APKs, for example. "file_name" is the name of the ELF
+ * file inside the archive. "func_name" matches symbol name or name@@LIB for
+ * library functions.
+ *
+ * An overview of the APK format specifically provided here:
+ * https://en.wikipedia.org/w/index.php?title=Apk_(file_format)&oldid=1139099120#Package_contents
+ */
+static long elf_find_func_offset_from_archive(const char *archive_path, const char *file_name,
+ const char *func_name)
+{
+ struct zip_archive *archive;
+ struct zip_entry entry;
+ long ret;
+ Elf *elf;
+
+ archive = zip_archive_open(archive_path);
+ if (IS_ERR(archive)) {
+ ret = PTR_ERR(archive);
+ pr_warn("zip: failed to open %s: %ld\n", archive_path, ret);
+ return ret;
+ }
+
+ ret = zip_archive_find_entry(archive, file_name, &entry);
+ if (ret) {
+ pr_warn("zip: could not find archive member %s in %s: %ld\n", file_name,
+ archive_path, ret);
+ goto out;
+ }
+ pr_debug("zip: found entry for %s in %s at 0x%lx\n", file_name, archive_path,
+ (unsigned long)entry.data_offset);
+
+ if (entry.compression) {
+ pr_warn("zip: entry %s of %s is compressed and cannot be handled\n", file_name,
+ archive_path);
+ ret = -LIBBPF_ERRNO__FORMAT;
+ goto out;
+ }
+
+ elf = elf_memory((void *)entry.data, entry.data_length);
+ if (!elf) {
+ pr_warn("elf: could not read elf file %s from %s: %s\n", file_name, archive_path,
+ elf_errmsg(-1));
+ ret = -LIBBPF_ERRNO__LIBELF;
+ goto out;
+ }
+
+ ret = elf_find_func_offset(elf, file_name, func_name);
+ if (ret > 0) {
+ pr_debug("elf: symbol address match for %s of %s in %s: 0x%x + 0x%lx = 0x%lx\n",
+ func_name, file_name, archive_path, entry.data_offset, ret,
+ ret + entry.data_offset);
+ ret += entry.data_offset;
+ }
+ elf_end(elf);
+
+out:
+ zip_archive_close(archive);
+ return ret;
+}
+
static const char *arch_specific_lib_paths(void)
{
/*
@@ -10772,9 +10873,11 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
const char *binary_path, size_t func_offset,
const struct bpf_uprobe_opts *opts)
{
- DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
+ const char *archive_path = NULL, *archive_sep = NULL;
char errmsg[STRERR_BUFSIZE], *legacy_probe = NULL;
- char full_binary_path[PATH_MAX];
+ DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
+ enum probe_attach_mode attach_mode;
+ char full_path[PATH_MAX];
struct bpf_link *link;
size_t ref_ctr_off;
int pfd, err;
@@ -10784,6 +10887,7 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
if (!OPTS_VALID(opts, bpf_uprobe_opts))
return libbpf_err_ptr(-EINVAL);
+ attach_mode = OPTS_GET(opts, attach_mode, PROBE_ATTACH_MODE_DEFAULT);
retprobe = OPTS_GET(opts, retprobe, false);
ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0);
pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
@@ -10791,27 +10895,60 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
if (!binary_path)
return libbpf_err_ptr(-EINVAL);
- if (!strchr(binary_path, '/')) {
- err = resolve_full_path(binary_path, full_binary_path,
- sizeof(full_binary_path));
+ /* Check if "binary_path" refers to an archive. */
+ archive_sep = strstr(binary_path, "!/");
+ if (archive_sep) {
+ full_path[0] = '\0';
+ libbpf_strlcpy(full_path, binary_path,
+ min(sizeof(full_path), (size_t)(archive_sep - binary_path + 1)));
+ archive_path = full_path;
+ binary_path = archive_sep + 2;
+ } else if (!strchr(binary_path, '/')) {
+ err = resolve_full_path(binary_path, full_path, sizeof(full_path));
if (err) {
pr_warn("prog '%s': failed to resolve full path for '%s': %d\n",
prog->name, binary_path, err);
return libbpf_err_ptr(err);
}
- binary_path = full_binary_path;
+ binary_path = full_path;
}
func_name = OPTS_GET(opts, func_name, NULL);
if (func_name) {
long sym_off;
- sym_off = elf_find_func_offset(binary_path, func_name);
+ if (archive_path) {
+ sym_off = elf_find_func_offset_from_archive(archive_path, binary_path,
+ func_name);
+ binary_path = archive_path;
+ } else {
+ sym_off = elf_find_func_offset_from_file(binary_path, func_name);
+ }
if (sym_off < 0)
return libbpf_err_ptr(sym_off);
func_offset += sym_off;
}
legacy = determine_uprobe_perf_type() < 0;
+ switch (attach_mode) {
+ case PROBE_ATTACH_MODE_LEGACY:
+ legacy = true;
+ pe_opts.force_ioctl_attach = true;
+ break;
+ case PROBE_ATTACH_MODE_PERF:
+ if (legacy)
+ return libbpf_err_ptr(-ENOTSUP);
+ pe_opts.force_ioctl_attach = true;
+ break;
+ case PROBE_ATTACH_MODE_LINK:
+ if (legacy || !kernel_supports(prog->obj, FEAT_PERF_LINK))
+ return libbpf_err_ptr(-ENOTSUP);
+ break;
+ case PROBE_ATTACH_MODE_DEFAULT:
+ break;
+ default:
+ return libbpf_err_ptr(-EINVAL);
+ }
+
if (!legacy) {
pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path,
func_offset, pid, ref_ctr_off);
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 2efd80f6f7b9..db4992a036f8 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -447,12 +447,15 @@ LIBBPF_API struct bpf_link *
bpf_program__attach(const struct bpf_program *prog);
struct bpf_perf_event_opts {
- /* size of this struct, for forward/backward compatiblity */
+ /* size of this struct, for forward/backward compatibility */
size_t sz;
/* custom user-provided value fetchable through bpf_get_attach_cookie() */
__u64 bpf_cookie;
+ /* don't use BPF link when attach BPF program */
+ bool force_ioctl_attach;
+ size_t :0;
};
-#define bpf_perf_event_opts__last_field bpf_cookie
+#define bpf_perf_event_opts__last_field force_ioctl_attach
LIBBPF_API struct bpf_link *
bpf_program__attach_perf_event(const struct bpf_program *prog, int pfd);
@@ -461,8 +464,25 @@ LIBBPF_API struct bpf_link *
bpf_program__attach_perf_event_opts(const struct bpf_program *prog, int pfd,
const struct bpf_perf_event_opts *opts);
+/**
+ * enum probe_attach_mode - the mode to attach kprobe/uprobe
+ *
+ * force libbpf to attach kprobe/uprobe in specific mode, -ENOTSUP will
+ * be returned if it is not supported by the kernel.
+ */
+enum probe_attach_mode {
+ /* attach probe in latest supported mode by kernel */
+ PROBE_ATTACH_MODE_DEFAULT = 0,
+ /* attach probe in legacy mode, using debugfs/tracefs */
+ PROBE_ATTACH_MODE_LEGACY,
+ /* create perf event with perf_event_open() syscall */
+ PROBE_ATTACH_MODE_PERF,
+ /* attach probe with BPF link */
+ PROBE_ATTACH_MODE_LINK,
+};
+
struct bpf_kprobe_opts {
- /* size of this struct, for forward/backward compatiblity */
+ /* size of this struct, for forward/backward compatibility */
size_t sz;
/* custom user-provided value fetchable through bpf_get_attach_cookie() */
__u64 bpf_cookie;
@@ -470,9 +490,11 @@ struct bpf_kprobe_opts {
size_t offset;
/* kprobe is return probe */
bool retprobe;
+ /* kprobe attach mode */
+ enum probe_attach_mode attach_mode;
size_t :0;
};
-#define bpf_kprobe_opts__last_field retprobe
+#define bpf_kprobe_opts__last_field attach_mode
LIBBPF_API struct bpf_link *
bpf_program__attach_kprobe(const struct bpf_program *prog, bool retprobe,
@@ -506,7 +528,7 @@ bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog,
const struct bpf_kprobe_multi_opts *opts);
struct bpf_ksyscall_opts {
- /* size of this struct, for forward/backward compatiblity */
+ /* size of this struct, for forward/backward compatibility */
size_t sz;
/* custom user-provided value fetchable through bpf_get_attach_cookie() */
__u64 bpf_cookie;
@@ -552,7 +574,7 @@ bpf_program__attach_ksyscall(const struct bpf_program *prog,
const struct bpf_ksyscall_opts *opts);
struct bpf_uprobe_opts {
- /* size of this struct, for forward/backward compatiblity */
+ /* size of this struct, for forward/backward compatibility */
size_t sz;
/* offset of kernel reference counted USDT semaphore, added in
* a6ca88b241d5 ("trace_uprobe: support reference counter in fd-based uprobe")
@@ -570,9 +592,11 @@ struct bpf_uprobe_opts {
* binary_path.
*/
const char *func_name;
+ /* uprobe attach mode */
+ enum probe_attach_mode attach_mode;
size_t :0;
};
-#define bpf_uprobe_opts__last_field func_name
+#define bpf_uprobe_opts__last_field attach_mode
/**
* @brief **bpf_program__attach_uprobe()** attaches a BPF program
@@ -646,7 +670,7 @@ bpf_program__attach_usdt(const struct bpf_program *prog,
const struct bpf_usdt_opts *opts);
struct bpf_tracepoint_opts {
- /* size of this struct, for forward/backward compatiblity */
+ /* size of this struct, for forward/backward compatibility */
size_t sz;
/* custom user-provided value fetchable through bpf_get_attach_cookie() */
__u64 bpf_cookie;
@@ -1110,7 +1134,7 @@ struct user_ring_buffer;
typedef int (*ring_buffer_sample_fn)(void *ctx, void *data, size_t size);
struct ring_buffer_opts {
- size_t sz; /* size of this struct, for forward/backward compatiblity */
+ size_t sz; /* size of this struct, for forward/backward compatibility */
};
#define ring_buffer_opts__last_field sz
@@ -1475,7 +1499,7 @@ LIBBPF_API void
bpf_object__destroy_subskeleton(struct bpf_object_subskeleton *s);
struct gen_loader_opts {
- size_t sz; /* size of this struct, for forward/backward compatiblity */
+ size_t sz; /* size of this struct, for forward/backward compatibility */
const char *data;
const char *insns;
__u32 data_sz;
@@ -1493,13 +1517,13 @@ enum libbpf_tristate {
};
struct bpf_linker_opts {
- /* size of this struct, for forward/backward compatiblity */
+ /* size of this struct, for forward/backward compatibility */
size_t sz;
};
#define bpf_linker_opts__last_field sz
struct bpf_linker_file_opts {
- /* size of this struct, for forward/backward compatiblity */
+ /* size of this struct, for forward/backward compatibility */
size_t sz;
};
#define bpf_linker_file_opts__last_field sz
@@ -1542,7 +1566,7 @@ typedef int (*libbpf_prog_attach_fn_t)(const struct bpf_program *prog, long cook
struct bpf_link **link);
struct libbpf_prog_handler_opts {
- /* size of this struct, for forward/backward compatiblity */
+ /* size of this struct, for forward/backward compatibility */
size_t sz;
/* User-provided value that is passed to prog_setup_fn,
* prog_prepare_load_fn, and prog_attach_fn callbacks. Allows user to
diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
index 4ac02c28e152..d7069780984a 100644
--- a/tools/lib/bpf/linker.c
+++ b/tools/lib/bpf/linker.c
@@ -1997,7 +1997,6 @@ add_sym:
static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *obj)
{
struct src_sec *src_symtab = &obj->secs[obj->symtab_sec_idx];
- struct dst_sec *dst_symtab;
int i, err;
for (i = 1; i < obj->sec_cnt; i++) {
@@ -2030,9 +2029,6 @@ static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *ob
return -1;
}
- /* add_dst_sec() above could have invalidated linker->secs */
- dst_symtab = &linker->secs[linker->symtab_sec_idx];
-
/* shdr->sh_link points to SYMTAB */
dst_sec->shdr->sh_link = linker->symtab_sec_idx;
@@ -2049,16 +2045,13 @@ static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *ob
dst_rel = dst_sec->raw_data + src_sec->dst_off;
n = src_sec->shdr->sh_size / src_sec->shdr->sh_entsize;
for (j = 0; j < n; j++, src_rel++, dst_rel++) {
- size_t src_sym_idx = ELF64_R_SYM(src_rel->r_info);
- size_t sym_type = ELF64_R_TYPE(src_rel->r_info);
- Elf64_Sym *src_sym, *dst_sym;
- size_t dst_sym_idx;
+ size_t src_sym_idx, dst_sym_idx, sym_type;
+ Elf64_Sym *src_sym;
src_sym_idx = ELF64_R_SYM(src_rel->r_info);
src_sym = src_symtab->data->d_buf + sizeof(*src_sym) * src_sym_idx;
dst_sym_idx = obj->sym_map[src_sym_idx];
- dst_sym = dst_symtab->raw_data + sizeof(*dst_sym) * dst_sym_idx;
dst_rel->r_offset += src_linked_sec->dst_off;
sym_type = ELF64_R_TYPE(src_rel->r_info);
dst_rel->r_info = ELF64_R_INFO(dst_sym_idx, sym_type);
diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
index 1653e7a8b0a1..84dd5fa14905 100644
--- a/tools/lib/bpf/netlink.c
+++ b/tools/lib/bpf/netlink.c
@@ -468,8 +468,13 @@ int bpf_xdp_query(int ifindex, int xdp_flags, struct bpf_xdp_query_opts *opts)
return 0;
err = libbpf_netlink_resolve_genl_family_id("netdev", sizeof("netdev"), &id);
- if (err < 0)
+ if (err < 0) {
+ if (err == -ENOENT) {
+ opts->feature_flags = 0;
+ goto skip_feature_flags;
+ }
return libbpf_err(err);
+ }
memset(&req, 0, sizeof(req));
req.nh.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
@@ -489,6 +494,7 @@ int bpf_xdp_query(int ifindex, int xdp_flags, struct bpf_xdp_query_opts *opts)
opts->feature_flags = md.flags;
+skip_feature_flags:
return 0;
}
diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c
index c4b0e81ae293..a26b2f5fa0fc 100644
--- a/tools/lib/bpf/relo_core.c
+++ b/tools/lib/bpf/relo_core.c
@@ -1551,9 +1551,6 @@ int __bpf_core_types_match(const struct btf *local_btf, __u32 local_id, const st
if (level <= 0)
return -EINVAL;
- local_t = btf_type_by_id(local_btf, local_id);
- targ_t = btf_type_by_id(targ_btf, targ_id);
-
recur:
depth--;
if (depth < 0)
diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c
index 75b411fc2c77..b8402e3f9eb2 100644
--- a/tools/lib/bpf/usdt.c
+++ b/tools/lib/bpf/usdt.c
@@ -1141,12 +1141,13 @@ static int parse_usdt_note(Elf *elf, const char *path, GElf_Nhdr *nhdr,
return 0;
}
-static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg);
+static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz);
static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, __u64 usdt_cookie)
{
+ struct usdt_arg_spec *arg;
const char *s;
- int len;
+ int arg_sz, len;
spec->usdt_cookie = usdt_cookie;
spec->arg_cnt = 0;
@@ -1159,10 +1160,25 @@ static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note,
return -E2BIG;
}
- len = parse_usdt_arg(s, spec->arg_cnt, &spec->args[spec->arg_cnt]);
+ arg = &spec->args[spec->arg_cnt];
+ len = parse_usdt_arg(s, spec->arg_cnt, arg, &arg_sz);
if (len < 0)
return len;
+ arg->arg_signed = arg_sz < 0;
+ if (arg_sz < 0)
+ arg_sz = -arg_sz;
+
+ switch (arg_sz) {
+ case 1: case 2: case 4: case 8:
+ arg->arg_bitshift = 64 - arg_sz * 8;
+ break;
+ default:
+ pr_warn("usdt: unsupported arg #%d (spec '%s') size: %d\n",
+ spec->arg_cnt, s, arg_sz);
+ return -EINVAL;
+ }
+
s += len;
spec->arg_cnt++;
}
@@ -1219,13 +1235,13 @@ static int calc_pt_regs_off(const char *reg_name)
return -ENOENT;
}
-static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg)
+static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz)
{
char reg_name[16];
- int arg_sz, len, reg_off;
+ int len, reg_off;
long off;
- if (sscanf(arg_str, " %d @ %ld ( %%%15[^)] ) %n", &arg_sz, &off, reg_name, &len) == 3) {
+ if (sscanf(arg_str, " %d @ %ld ( %%%15[^)] ) %n", arg_sz, &off, reg_name, &len) == 3) {
/* Memory dereference case, e.g., -4@-20(%rbp) */
arg->arg_type = USDT_ARG_REG_DEREF;
arg->val_off = off;
@@ -1233,7 +1249,7 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec
if (reg_off < 0)
return reg_off;
arg->reg_off = reg_off;
- } else if (sscanf(arg_str, " %d @ ( %%%15[^)] ) %n", &arg_sz, reg_name, &len) == 2) {
+ } else if (sscanf(arg_str, " %d @ ( %%%15[^)] ) %n", arg_sz, reg_name, &len) == 2) {
/* Memory dereference case without offset, e.g., 8@(%rsp) */
arg->arg_type = USDT_ARG_REG_DEREF;
arg->val_off = 0;
@@ -1241,7 +1257,7 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec
if (reg_off < 0)
return reg_off;
arg->reg_off = reg_off;
- } else if (sscanf(arg_str, " %d @ %%%15s %n", &arg_sz, reg_name, &len) == 2) {
+ } else if (sscanf(arg_str, " %d @ %%%15s %n", arg_sz, reg_name, &len) == 2) {
/* Register read case, e.g., -4@%eax */
arg->arg_type = USDT_ARG_REG;
arg->val_off = 0;
@@ -1250,7 +1266,7 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec
if (reg_off < 0)
return reg_off;
arg->reg_off = reg_off;
- } else if (sscanf(arg_str, " %d @ $%ld %n", &arg_sz, &off, &len) == 2) {
+ } else if (sscanf(arg_str, " %d @ $%ld %n", arg_sz, &off, &len) == 2) {
/* Constant value case, e.g., 4@$71 */
arg->arg_type = USDT_ARG_CONST;
arg->val_off = off;
@@ -1260,20 +1276,6 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec
return -EINVAL;
}
- arg->arg_signed = arg_sz < 0;
- if (arg_sz < 0)
- arg_sz = -arg_sz;
-
- switch (arg_sz) {
- case 1: case 2: case 4: case 8:
- arg->arg_bitshift = 64 - arg_sz * 8;
- break;
- default:
- pr_warn("usdt: unsupported arg #%d (spec '%s') size: %d\n",
- arg_num, arg_str, arg_sz);
- return -EINVAL;
- }
-
return len;
}
@@ -1281,13 +1283,13 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec
/* Do not support __s390__ for now, since user_pt_regs is broken with -m31. */
-static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg)
+static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz)
{
unsigned int reg;
- int arg_sz, len;
+ int len;
long off;
- if (sscanf(arg_str, " %d @ %ld ( %%r%u ) %n", &arg_sz, &off, &reg, &len) == 3) {
+ if (sscanf(arg_str, " %d @ %ld ( %%r%u ) %n", arg_sz, &off, &reg, &len) == 3) {
/* Memory dereference case, e.g., -2@-28(%r15) */
arg->arg_type = USDT_ARG_REG_DEREF;
arg->val_off = off;
@@ -1296,7 +1298,7 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec
return -EINVAL;
}
arg->reg_off = offsetof(user_pt_regs, gprs[reg]);
- } else if (sscanf(arg_str, " %d @ %%r%u %n", &arg_sz, &reg, &len) == 2) {
+ } else if (sscanf(arg_str, " %d @ %%r%u %n", arg_sz, &reg, &len) == 2) {
/* Register read case, e.g., -8@%r0 */
arg->arg_type = USDT_ARG_REG;
arg->val_off = 0;
@@ -1305,7 +1307,7 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec
return -EINVAL;
}
arg->reg_off = offsetof(user_pt_regs, gprs[reg]);
- } else if (sscanf(arg_str, " %d @ %ld %n", &arg_sz, &off, &len) == 2) {
+ } else if (sscanf(arg_str, " %d @ %ld %n", arg_sz, &off, &len) == 2) {
/* Constant value case, e.g., 4@71 */
arg->arg_type = USDT_ARG_CONST;
arg->val_off = off;
@@ -1315,20 +1317,6 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec
return -EINVAL;
}
- arg->arg_signed = arg_sz < 0;
- if (arg_sz < 0)
- arg_sz = -arg_sz;
-
- switch (arg_sz) {
- case 1: case 2: case 4: case 8:
- arg->arg_bitshift = 64 - arg_sz * 8;
- break;
- default:
- pr_warn("usdt: unsupported arg #%d (spec '%s') size: %d\n",
- arg_num, arg_str, arg_sz);
- return -EINVAL;
- }
-
return len;
}
@@ -1348,13 +1336,13 @@ static int calc_pt_regs_off(const char *reg_name)
return -ENOENT;
}
-static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg)
+static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz)
{
char reg_name[16];
- int arg_sz, len, reg_off;
+ int len, reg_off;
long off;
- if (sscanf(arg_str, " %d @ \[ %15[a-z0-9], %ld ] %n", &arg_sz, reg_name, &off, &len) == 3) {
+ if (sscanf(arg_str, " %d @ \[ %15[a-z0-9] , %ld ] %n", arg_sz, reg_name, &off, &len) == 3) {
/* Memory dereference case, e.g., -4@[sp, 96] */
arg->arg_type = USDT_ARG_REG_DEREF;
arg->val_off = off;
@@ -1362,7 +1350,7 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec
if (reg_off < 0)
return reg_off;
arg->reg_off = reg_off;
- } else if (sscanf(arg_str, " %d @ \[ %15[a-z0-9] ] %n", &arg_sz, reg_name, &len) == 2) {
+ } else if (sscanf(arg_str, " %d @ \[ %15[a-z0-9] ] %n", arg_sz, reg_name, &len) == 2) {
/* Memory dereference case, e.g., -4@[sp] */
arg->arg_type = USDT_ARG_REG_DEREF;
arg->val_off = 0;
@@ -1370,12 +1358,12 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec
if (reg_off < 0)
return reg_off;
arg->reg_off = reg_off;
- } else if (sscanf(arg_str, " %d @ %ld %n", &arg_sz, &off, &len) == 2) {
+ } else if (sscanf(arg_str, " %d @ %ld %n", arg_sz, &off, &len) == 2) {
/* Constant value case, e.g., 4@5 */
arg->arg_type = USDT_ARG_CONST;
arg->val_off = off;
arg->reg_off = 0;
- } else if (sscanf(arg_str, " %d @ %15[a-z0-9] %n", &arg_sz, reg_name, &len) == 2) {
+ } else if (sscanf(arg_str, " %d @ %15[a-z0-9] %n", arg_sz, reg_name, &len) == 2) {
/* Register read case, e.g., -8@x4 */
arg->arg_type = USDT_ARG_REG;
arg->val_off = 0;
@@ -1388,20 +1376,6 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec
return -EINVAL;
}
- arg->arg_signed = arg_sz < 0;
- if (arg_sz < 0)
- arg_sz = -arg_sz;
-
- switch (arg_sz) {
- case 1: case 2: case 4: case 8:
- arg->arg_bitshift = 64 - arg_sz * 8;
- break;
- default:
- pr_warn("usdt: unsupported arg #%d (spec '%s') size: %d\n",
- arg_num, arg_str, arg_sz);
- return -EINVAL;
- }
-
return len;
}
@@ -1456,13 +1430,13 @@ static int calc_pt_regs_off(const char *reg_name)
return -ENOENT;
}
-static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg)
+static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz)
{
char reg_name[16];
- int arg_sz, len, reg_off;
+ int len, reg_off;
long off;
- if (sscanf(arg_str, " %d @ %ld ( %15[a-z0-9] ) %n", &arg_sz, &off, reg_name, &len) == 3) {
+ if (sscanf(arg_str, " %d @ %ld ( %15[a-z0-9] ) %n", arg_sz, &off, reg_name, &len) == 3) {
/* Memory dereference case, e.g., -8@-88(s0) */
arg->arg_type = USDT_ARG_REG_DEREF;
arg->val_off = off;
@@ -1470,12 +1444,12 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec
if (reg_off < 0)
return reg_off;
arg->reg_off = reg_off;
- } else if (sscanf(arg_str, " %d @ %ld %n", &arg_sz, &off, &len) == 2) {
+ } else if (sscanf(arg_str, " %d @ %ld %n", arg_sz, &off, &len) == 2) {
/* Constant value case, e.g., 4@5 */
arg->arg_type = USDT_ARG_CONST;
arg->val_off = off;
arg->reg_off = 0;
- } else if (sscanf(arg_str, " %d @ %15[a-z0-9] %n", &arg_sz, reg_name, &len) == 2) {
+ } else if (sscanf(arg_str, " %d @ %15[a-z0-9] %n", arg_sz, reg_name, &len) == 2) {
/* Register read case, e.g., -8@a1 */
arg->arg_type = USDT_ARG_REG;
arg->val_off = 0;
@@ -1488,17 +1462,83 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec
return -EINVAL;
}
- arg->arg_signed = arg_sz < 0;
- if (arg_sz < 0)
- arg_sz = -arg_sz;
+ return len;
+}
- switch (arg_sz) {
- case 1: case 2: case 4: case 8:
- arg->arg_bitshift = 64 - arg_sz * 8;
- break;
- default:
- pr_warn("usdt: unsupported arg #%d (spec '%s') size: %d\n",
- arg_num, arg_str, arg_sz);
+#elif defined(__arm__)
+
+static int calc_pt_regs_off(const char *reg_name)
+{
+ static struct {
+ const char *name;
+ size_t pt_regs_off;
+ } reg_map[] = {
+ { "r0", offsetof(struct pt_regs, uregs[0]) },
+ { "r1", offsetof(struct pt_regs, uregs[1]) },
+ { "r2", offsetof(struct pt_regs, uregs[2]) },
+ { "r3", offsetof(struct pt_regs, uregs[3]) },
+ { "r4", offsetof(struct pt_regs, uregs[4]) },
+ { "r5", offsetof(struct pt_regs, uregs[5]) },
+ { "r6", offsetof(struct pt_regs, uregs[6]) },
+ { "r7", offsetof(struct pt_regs, uregs[7]) },
+ { "r8", offsetof(struct pt_regs, uregs[8]) },
+ { "r9", offsetof(struct pt_regs, uregs[9]) },
+ { "r10", offsetof(struct pt_regs, uregs[10]) },
+ { "fp", offsetof(struct pt_regs, uregs[11]) },
+ { "ip", offsetof(struct pt_regs, uregs[12]) },
+ { "sp", offsetof(struct pt_regs, uregs[13]) },
+ { "lr", offsetof(struct pt_regs, uregs[14]) },
+ { "pc", offsetof(struct pt_regs, uregs[15]) },
+ };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(reg_map); i++) {
+ if (strcmp(reg_name, reg_map[i].name) == 0)
+ return reg_map[i].pt_regs_off;
+ }
+
+ pr_warn("usdt: unrecognized register '%s'\n", reg_name);
+ return -ENOENT;
+}
+
+static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz)
+{
+ char reg_name[16];
+ int len, reg_off;
+ long off;
+
+ if (sscanf(arg_str, " %d @ \[ %15[a-z0-9] , #%ld ] %n",
+ arg_sz, reg_name, &off, &len) == 3) {
+ /* Memory dereference case, e.g., -4@[fp, #96] */
+ arg->arg_type = USDT_ARG_REG_DEREF;
+ arg->val_off = off;
+ reg_off = calc_pt_regs_off(reg_name);
+ if (reg_off < 0)
+ return reg_off;
+ arg->reg_off = reg_off;
+ } else if (sscanf(arg_str, " %d @ \[ %15[a-z0-9] ] %n", arg_sz, reg_name, &len) == 2) {
+ /* Memory dereference case, e.g., -4@[sp] */
+ arg->arg_type = USDT_ARG_REG_DEREF;
+ arg->val_off = 0;
+ reg_off = calc_pt_regs_off(reg_name);
+ if (reg_off < 0)
+ return reg_off;
+ arg->reg_off = reg_off;
+ } else if (sscanf(arg_str, " %d @ #%ld %n", arg_sz, &off, &len) == 2) {
+ /* Constant value case, e.g., 4@#5 */
+ arg->arg_type = USDT_ARG_CONST;
+ arg->val_off = off;
+ arg->reg_off = 0;
+ } else if (sscanf(arg_str, " %d @ %15[a-z0-9] %n", arg_sz, reg_name, &len) == 2) {
+ /* Register read case, e.g., -8@r4 */
+ arg->arg_type = USDT_ARG_REG;
+ arg->val_off = 0;
+ reg_off = calc_pt_regs_off(reg_name);
+ if (reg_off < 0)
+ return reg_off;
+ arg->reg_off = reg_off;
+ } else {
+ pr_warn("usdt: unrecognized arg #%d spec '%s'\n", arg_num, arg_str);
return -EINVAL;
}
@@ -1507,7 +1547,7 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec
#else
-static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg)
+static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz)
{
pr_warn("usdt: libbpf doesn't support USDTs on current architecture\n");
return -ENOTSUP;
diff --git a/tools/lib/bpf/zip.c b/tools/lib/bpf/zip.c
new file mode 100644
index 000000000000..f561aa07438f
--- /dev/null
+++ b/tools/lib/bpf/zip.c
@@ -0,0 +1,327 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/*
+ * Routines for dealing with .zip archives.
+ *
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include "libbpf_internal.h"
+#include "zip.h"
+
+/* Specification of ZIP file format can be found here:
+ * https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
+ * For a high level overview of the structure of a ZIP file see
+ * sections 4.3.1 - 4.3.6.
+ *
+ * Data structures appearing in ZIP files do not contain any
+ * padding and they might be misaligned. To allow us to safely
+ * operate on pointers to such structures and their members, we
+ * declare the types as packed.
+ */
+
+#define END_OF_CD_RECORD_MAGIC 0x06054b50
+
+/* See section 4.3.16 of the spec. */
+struct end_of_cd_record {
+ /* Magic value equal to END_OF_CD_RECORD_MAGIC */
+ __u32 magic;
+
+ /* Number of the file containing this structure or 0xFFFF if ZIP64 archive.
+ * Zip archive might span multiple files (disks).
+ */
+ __u16 this_disk;
+
+ /* Number of the file containing the beginning of the central directory or
+ * 0xFFFF if ZIP64 archive.
+ */
+ __u16 cd_disk;
+
+ /* Number of central directory records on this disk or 0xFFFF if ZIP64
+ * archive.
+ */
+ __u16 cd_records;
+
+ /* Number of central directory records on all disks or 0xFFFF if ZIP64
+ * archive.
+ */
+ __u16 cd_records_total;
+
+ /* Size of the central directory record or 0xFFFFFFFF if ZIP64 archive. */
+ __u32 cd_size;
+
+ /* Offset of the central directory from the beginning of the archive or
+ * 0xFFFFFFFF if ZIP64 archive.
+ */
+ __u32 cd_offset;
+
+ /* Length of comment data following end of central directory record. */
+ __u16 comment_length;
+
+ /* Up to 64k of arbitrary bytes. */
+ /* uint8_t comment[comment_length] */
+} __attribute__((packed));
+
+#define CD_FILE_HEADER_MAGIC 0x02014b50
+#define FLAG_ENCRYPTED (1 << 0)
+#define FLAG_HAS_DATA_DESCRIPTOR (1 << 3)
+
+/* See section 4.3.12 of the spec. */
+struct cd_file_header {
+ /* Magic value equal to CD_FILE_HEADER_MAGIC. */
+ __u32 magic;
+ __u16 version;
+ /* Minimum zip version needed to extract the file. */
+ __u16 min_version;
+ __u16 flags;
+ __u16 compression;
+ __u16 last_modified_time;
+ __u16 last_modified_date;
+ __u32 crc;
+ __u32 compressed_size;
+ __u32 uncompressed_size;
+ __u16 file_name_length;
+ __u16 extra_field_length;
+ __u16 file_comment_length;
+ /* Number of the disk where the file starts or 0xFFFF if ZIP64 archive. */
+ __u16 disk;
+ __u16 internal_attributes;
+ __u32 external_attributes;
+ /* Offset from the start of the disk containing the local file header to the
+ * start of the local file header.
+ */
+ __u32 offset;
+} __attribute__((packed));
+
+#define LOCAL_FILE_HEADER_MAGIC 0x04034b50
+
+/* See section 4.3.7 of the spec. */
+struct local_file_header {
+ /* Magic value equal to LOCAL_FILE_HEADER_MAGIC. */
+ __u32 magic;
+ /* Minimum zip version needed to extract the file. */
+ __u16 min_version;
+ __u16 flags;
+ __u16 compression;
+ __u16 last_modified_time;
+ __u16 last_modified_date;
+ __u32 crc;
+ __u32 compressed_size;
+ __u32 uncompressed_size;
+ __u16 file_name_length;
+ __u16 extra_field_length;
+} __attribute__((packed));
+
+struct zip_archive {
+ void *data;
+ __u32 size;
+ __u32 cd_offset;
+ __u32 cd_records;
+};
+
+static void *check_access(struct zip_archive *archive, __u32 offset, __u32 size)
+{
+ if (offset + size > archive->size || offset > offset + size)
+ return NULL;
+
+ return archive->data + offset;
+}
+
+/* Returns 0 on success, -EINVAL on error and -ENOTSUP if the eocd indicates the
+ * archive uses features which are not supported.
+ */
+static int try_parse_end_of_cd(struct zip_archive *archive, __u32 offset)
+{
+ __u16 comment_length, cd_records;
+ struct end_of_cd_record *eocd;
+ __u32 cd_offset, cd_size;
+
+ eocd = check_access(archive, offset, sizeof(*eocd));
+ if (!eocd || eocd->magic != END_OF_CD_RECORD_MAGIC)
+ return -EINVAL;
+
+ comment_length = eocd->comment_length;
+ if (offset + sizeof(*eocd) + comment_length != archive->size)
+ return -EINVAL;
+
+ cd_records = eocd->cd_records;
+ if (eocd->this_disk != 0 || eocd->cd_disk != 0 || eocd->cd_records_total != cd_records)
+ /* This is a valid eocd, but we only support single-file non-ZIP64 archives. */
+ return -ENOTSUP;
+
+ cd_offset = eocd->cd_offset;
+ cd_size = eocd->cd_size;
+ if (!check_access(archive, cd_offset, cd_size))
+ return -EINVAL;
+
+ archive->cd_offset = cd_offset;
+ archive->cd_records = cd_records;
+ return 0;
+}
+
+static int find_cd(struct zip_archive *archive)
+{
+ int64_t limit, offset;
+ int rc = -EINVAL;
+
+ if (archive->size <= sizeof(struct end_of_cd_record))
+ return -EINVAL;
+
+ /* Because the end of central directory ends with a variable length array of
+ * up to 0xFFFF bytes we can't know exactly where it starts and need to
+ * search for it at the end of the file, scanning the (limit, offset] range.
+ */
+ offset = archive->size - sizeof(struct end_of_cd_record);
+ limit = (int64_t)offset - (1 << 16);
+
+ for (; offset >= 0 && offset > limit && rc != 0; offset--) {
+ rc = try_parse_end_of_cd(archive, offset);
+ if (rc == -ENOTSUP)
+ break;
+ }
+ return rc;
+}
+
+struct zip_archive *zip_archive_open(const char *path)
+{
+ struct zip_archive *archive;
+ int err, fd;
+ off_t size;
+ void *data;
+
+ fd = open(path, O_RDONLY | O_CLOEXEC);
+ if (fd < 0)
+ return ERR_PTR(-errno);
+
+ size = lseek(fd, 0, SEEK_END);
+ if (size == (off_t)-1 || size > UINT32_MAX) {
+ close(fd);
+ return ERR_PTR(-EINVAL);
+ }
+
+ data = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
+ err = -errno;
+ close(fd);
+
+ if (data == MAP_FAILED)
+ return ERR_PTR(err);
+
+ archive = malloc(sizeof(*archive));
+ if (!archive) {
+ munmap(data, size);
+ return ERR_PTR(-ENOMEM);
+ };
+
+ archive->data = data;
+ archive->size = size;
+
+ err = find_cd(archive);
+ if (err) {
+ munmap(data, size);
+ free(archive);
+ return ERR_PTR(err);
+ }
+
+ return archive;
+}
+
+void zip_archive_close(struct zip_archive *archive)
+{
+ munmap(archive->data, archive->size);
+ free(archive);
+}
+
+static struct local_file_header *local_file_header_at_offset(struct zip_archive *archive,
+ __u32 offset)
+{
+ struct local_file_header *lfh;
+
+ lfh = check_access(archive, offset, sizeof(*lfh));
+ if (!lfh || lfh->magic != LOCAL_FILE_HEADER_MAGIC)
+ return NULL;
+
+ return lfh;
+}
+
+static int get_entry_at_offset(struct zip_archive *archive, __u32 offset, struct zip_entry *out)
+{
+ struct local_file_header *lfh;
+ __u32 compressed_size;
+ const char *name;
+ void *data;
+
+ lfh = local_file_header_at_offset(archive, offset);
+ if (!lfh)
+ return -EINVAL;
+
+ offset += sizeof(*lfh);
+ if ((lfh->flags & FLAG_ENCRYPTED) || (lfh->flags & FLAG_HAS_DATA_DESCRIPTOR))
+ return -EINVAL;
+
+ name = check_access(archive, offset, lfh->file_name_length);
+ if (!name)
+ return -EINVAL;
+
+ offset += lfh->file_name_length;
+ if (!check_access(archive, offset, lfh->extra_field_length))
+ return -EINVAL;
+
+ offset += lfh->extra_field_length;
+ compressed_size = lfh->compressed_size;
+ data = check_access(archive, offset, compressed_size);
+ if (!data)
+ return -EINVAL;
+
+ out->compression = lfh->compression;
+ out->name_length = lfh->file_name_length;
+ out->name = name;
+ out->data = data;
+ out->data_length = compressed_size;
+ out->data_offset = offset;
+
+ return 0;
+}
+
+int zip_archive_find_entry(struct zip_archive *archive, const char *file_name,
+ struct zip_entry *out)
+{
+ size_t file_name_length = strlen(file_name);
+ __u32 i, offset = archive->cd_offset;
+
+ for (i = 0; i < archive->cd_records; ++i) {
+ __u16 cdfh_name_length, cdfh_flags;
+ struct cd_file_header *cdfh;
+ const char *cdfh_name;
+
+ cdfh = check_access(archive, offset, sizeof(*cdfh));
+ if (!cdfh || cdfh->magic != CD_FILE_HEADER_MAGIC)
+ return -EINVAL;
+
+ offset += sizeof(*cdfh);
+ cdfh_name_length = cdfh->file_name_length;
+ cdfh_name = check_access(archive, offset, cdfh_name_length);
+ if (!cdfh_name)
+ return -EINVAL;
+
+ cdfh_flags = cdfh->flags;
+ if ((cdfh_flags & FLAG_ENCRYPTED) == 0 &&
+ (cdfh_flags & FLAG_HAS_DATA_DESCRIPTOR) == 0 &&
+ file_name_length == cdfh_name_length &&
+ memcmp(file_name, archive->data + offset, file_name_length) == 0) {
+ return get_entry_at_offset(archive, cdfh->offset, out);
+ }
+
+ offset += cdfh_name_length;
+ offset += cdfh->extra_field_length;
+ offset += cdfh->file_comment_length;
+ }
+
+ return -ENOENT;
+}
diff --git a/tools/lib/bpf/zip.h b/tools/lib/bpf/zip.h
new file mode 100644
index 000000000000..1c1bb21fba76
--- /dev/null
+++ b/tools/lib/bpf/zip.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+#ifndef __LIBBPF_ZIP_H
+#define __LIBBPF_ZIP_H
+
+#include <linux/types.h>
+
+/* Represents an open zip archive.
+ * Only basic ZIP files are supported, in particular the following are not
+ * supported:
+ * - encryption
+ * - streaming
+ * - multi-part ZIP files
+ * - ZIP64
+ */
+struct zip_archive;
+
+/* Carries information on name, compression method, and data corresponding to a
+ * file in a zip archive.
+ */
+struct zip_entry {
+ /* Compression method as defined in pkzip spec. 0 means data is uncompressed. */
+ __u16 compression;
+
+ /* Non-null terminated name of the file. */
+ const char *name;
+ /* Length of the file name. */
+ __u16 name_length;
+
+ /* Pointer to the file data. */
+ const void *data;
+ /* Length of the file data. */
+ __u32 data_length;
+ /* Offset of the file data within the archive. */
+ __u32 data_offset;
+};
+
+/* Open a zip archive. Returns NULL in case of an error. */
+struct zip_archive *zip_archive_open(const char *path);
+
+/* Close a zip archive and release resources. */
+void zip_archive_close(struct zip_archive *archive);
+
+/* Look up an entry corresponding to a file in given zip archive. */
+int zip_archive_find_entry(struct zip_archive *archive, const char *name, struct zip_entry *out);
+
+#endif
diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include
index 0efb8f2b33ce..ff527ac065cf 100644
--- a/tools/scripts/Makefile.include
+++ b/tools/scripts/Makefile.include
@@ -108,6 +108,8 @@ endif # GCC_TOOLCHAIN_DIR
endif # CLANG_CROSS_FLAGS
CFLAGS += $(CLANG_CROSS_FLAGS)
AFLAGS += $(CLANG_CROSS_FLAGS)
+else
+CLANG_CROSS_FLAGS :=
endif # CROSS_COMPILE
# Hack to avoid type-punned warnings on old systems such as RHEL5:
diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index b89eb87034e4..a02a085e7f32 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -4,6 +4,8 @@ bloom_filter_map # failed to find kernel BTF type ID of
bpf_cookie # failed to open_and_load program: -524 (trampoline)
bpf_loop # attaches to __x64_sys_nanosleep
cgrp_local_storage # prog_attach unexpected error: -524 (trampoline)
+dynptr/test_dynptr_skb_data
+dynptr/test_skb_readonly
fexit_sleep # fexit_skel_load fexit skeleton failed (trampoline)
get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace)
kprobe_multi_bench_attach # bpf_program__attach_kprobe_multi_opts unexpected error: -95
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index b677dcd0b77a..16f404aa1b23 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -338,7 +338,8 @@ $(RESOLVE_BTFIDS): $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/resolve_btfids \
define get_sys_includes
$(shell $(1) $(2) -v -E - </dev/null 2>&1 \
| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \
-$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}')
+$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}') \
+$(shell $(1) $(2) -dM -E - </dev/null | grep '__loongarch_grlen ' | awk '{printf("-D__BITS_PER_LONG=%d", $$3)}')
endef
# Determine target endianness.
@@ -356,7 +357,7 @@ BPF_CFLAGS = -g -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \
-I$(abspath $(OUTPUT)/../usr/include)
CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \
- -Wno-compare-distinct-pointer-types
+ -Wno-compare-distinct-pointer-types -Wuninitialized
$(OUTPUT)/test_l4lb_noinline.o: BPF_CFLAGS += -fno-inline
$(OUTPUT)/test_xdp_noinline.o: BPF_CFLAGS += -fno-inline
@@ -558,7 +559,7 @@ TRUNNER_BPF_PROGS_DIR := progs
TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \
network_helpers.c testing_helpers.c \
btf_helpers.c flow_dissector_load.h \
- cap_helpers.c test_loader.c xsk.c
+ cap_helpers.c test_loader.c xsk.c disasm.c
TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \
$(OUTPUT)/liburandom_read.so \
$(OUTPUT)/xdp_synproxy \
diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h
new file mode 100644
index 000000000000..8c993ec8ceea
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_kfuncs.h
@@ -0,0 +1,38 @@
+#ifndef __BPF_KFUNCS__
+#define __BPF_KFUNCS__
+
+/* Description
+ * Initializes an skb-type dynptr
+ * Returns
+ * Error code
+ */
+extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags,
+ struct bpf_dynptr *ptr__uninit) __ksym;
+
+/* Description
+ * Initializes an xdp-type dynptr
+ * Returns
+ * Error code
+ */
+extern int bpf_dynptr_from_xdp(struct xdp_md *xdp, __u64 flags,
+ struct bpf_dynptr *ptr__uninit) __ksym;
+
+/* Description
+ * Obtain a read-only pointer to the dynptr's data
+ * Returns
+ * Either a direct pointer to the dynptr data or a pointer to the user-provided
+ * buffer if unable to obtain a direct pointer
+ */
+extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u32 offset,
+ void *buffer, __u32 buffer__szk) __ksym;
+
+/* Description
+ * Obtain a read-write pointer to the dynptr's data
+ * Returns
+ * Either a direct pointer to the dynptr data or a pointer to the user-provided
+ * buffer if unable to obtain a direct pointer
+ */
+extern void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *ptr, __u32 offset,
+ void *buffer, __u32 buffer__szk) __ksym;
+
+#endif
diff --git a/tools/testing/selftests/bpf/disasm.c b/tools/testing/selftests/bpf/disasm.c
new file mode 120000
index 000000000000..b1571927bd54
--- /dev/null
+++ b/tools/testing/selftests/bpf/disasm.c
@@ -0,0 +1 @@
+../../../../kernel/bpf/disasm.c \ No newline at end of file
diff --git a/tools/testing/selftests/bpf/disasm.h b/tools/testing/selftests/bpf/disasm.h
new file mode 120000
index 000000000000..8054fd497340
--- /dev/null
+++ b/tools/testing/selftests/bpf/disasm.h
@@ -0,0 +1 @@
+../../../../kernel/bpf/disasm.h \ No newline at end of file
diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c
index 4666f88f2bb4..c94fa8d6c4f6 100644
--- a/tools/testing/selftests/bpf/prog_tests/align.c
+++ b/tools/testing/selftests/bpf/prog_tests/align.c
@@ -660,16 +660,22 @@ static int do_test_single(struct bpf_align_test *test)
* func#0 @0
* 0: R1=ctx(off=0,imm=0) R10=fp0
* 0: (b7) r3 = 2 ; R3_w=2
+ *
+ * Sometimes it's actually two lines below, e.g. when
+ * searching for "6: R3_w=scalar(umax=255,var_off=(0x0; 0xff))":
+ * from 4 to 6: R0_w=pkt(off=8,r=8,imm=0) R1=ctx(off=0,imm=0) R2_w=pkt(off=0,r=8,imm=0) R3_w=pkt_end(off=0,imm=0) R10=fp0
+ * 6: R0_w=pkt(off=8,r=8,imm=0) R1=ctx(off=0,imm=0) R2_w=pkt(off=0,r=8,imm=0) R3_w=pkt_end(off=0,imm=0) R10=fp0
+ * 6: (71) r3 = *(u8 *)(r2 +0) ; R2_w=pkt(off=0,r=8,imm=0) R3_w=scalar(umax=255,var_off=(0x0; 0xff))
*/
- if (!strstr(line_ptr, m.match)) {
+ while (!strstr(line_ptr, m.match)) {
cur_line = -1;
line_ptr = strtok(NULL, "\n");
- sscanf(line_ptr, "%u: ", &cur_line);
+ sscanf(line_ptr ?: "", "%u: ", &cur_line);
+ if (!line_ptr || cur_line != m.line)
+ break;
}
- if (cur_line != m.line || !line_ptr ||
- !strstr(line_ptr, m.match)) {
- printf("Failed to find match %u: %s\n",
- m.line, m.match);
+ if (cur_line != m.line || !line_ptr || !strstr(line_ptr, m.match)) {
+ printf("Failed to find match %u: %s\n", m.line, m.match);
ret = 1;
printf("%s", bpf_vlog);
break;
diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
index 56374c8b5436..7175af39134f 100644
--- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c
+++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
@@ -1,5 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include "test_attach_kprobe_sleepable.skel.h"
+#include "test_attach_probe_manual.skel.h"
#include "test_attach_probe.skel.h"
/* this is how USDT semaphore is actually defined, except volatile modifier */
@@ -23,81 +25,54 @@ static noinline void trigger_func3(void)
asm volatile ("");
}
+/* attach point for ref_ctr */
+static noinline void trigger_func4(void)
+{
+ asm volatile ("");
+}
+
static char test_data[] = "test_data";
-void test_attach_probe(void)
+/* manual attach kprobe/kretprobe/uprobe/uretprobe testings */
+static void test_attach_probe_manual(enum probe_attach_mode attach_mode)
{
DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
+ DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts);
struct bpf_link *kprobe_link, *kretprobe_link;
struct bpf_link *uprobe_link, *uretprobe_link;
- struct test_attach_probe* skel;
- ssize_t uprobe_offset, ref_ctr_offset;
- struct bpf_link *uprobe_err_link;
- FILE *devnull;
- bool legacy;
-
- /* Check if new-style kprobe/uprobe API is supported.
- * Kernels that support new FD-based kprobe and uprobe BPF attachment
- * through perf_event_open() syscall expose
- * /sys/bus/event_source/devices/kprobe/type and
- * /sys/bus/event_source/devices/uprobe/type files, respectively. They
- * contain magic numbers that are passed as "type" field of
- * perf_event_attr. Lack of such file in the system indicates legacy
- * kernel with old-style kprobe/uprobe attach interface through
- * creating per-probe event through tracefs. For such cases
- * ref_ctr_offset feature is not supported, so we don't test it.
- */
- legacy = access("/sys/bus/event_source/devices/kprobe/type", F_OK) != 0;
+ struct test_attach_probe_manual *skel;
+ ssize_t uprobe_offset;
- uprobe_offset = get_uprobe_offset(&trigger_func);
- if (!ASSERT_GE(uprobe_offset, 0, "uprobe_offset"))
+ skel = test_attach_probe_manual__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_kprobe_manual_open_and_load"))
return;
- ref_ctr_offset = get_rel_offset((uintptr_t)&uprobe_ref_ctr);
- if (!ASSERT_GE(ref_ctr_offset, 0, "ref_ctr_offset"))
- return;
-
- skel = test_attach_probe__open();
- if (!ASSERT_OK_PTR(skel, "skel_open"))
- return;
-
- /* sleepable kprobe test case needs flags set before loading */
- if (!ASSERT_OK(bpf_program__set_flags(skel->progs.handle_kprobe_sleepable,
- BPF_F_SLEEPABLE), "kprobe_sleepable_flags"))
- goto cleanup;
-
- if (!ASSERT_OK(test_attach_probe__load(skel), "skel_load"))
- goto cleanup;
- if (!ASSERT_OK_PTR(skel->bss, "check_bss"))
+ uprobe_offset = get_uprobe_offset(&trigger_func);
+ if (!ASSERT_GE(uprobe_offset, 0, "uprobe_offset"))
goto cleanup;
/* manual-attach kprobe/kretprobe */
- kprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kprobe,
- false /* retprobe */,
- SYS_NANOSLEEP_KPROBE_NAME);
+ kprobe_opts.attach_mode = attach_mode;
+ kprobe_opts.retprobe = false;
+ kprobe_link = bpf_program__attach_kprobe_opts(skel->progs.handle_kprobe,
+ SYS_NANOSLEEP_KPROBE_NAME,
+ &kprobe_opts);
if (!ASSERT_OK_PTR(kprobe_link, "attach_kprobe"))
goto cleanup;
skel->links.handle_kprobe = kprobe_link;
- kretprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kretprobe,
- true /* retprobe */,
- SYS_NANOSLEEP_KPROBE_NAME);
+ kprobe_opts.retprobe = true;
+ kretprobe_link = bpf_program__attach_kprobe_opts(skel->progs.handle_kretprobe,
+ SYS_NANOSLEEP_KPROBE_NAME,
+ &kprobe_opts);
if (!ASSERT_OK_PTR(kretprobe_link, "attach_kretprobe"))
goto cleanup;
skel->links.handle_kretprobe = kretprobe_link;
- /* auto-attachable kprobe and kretprobe */
- skel->links.handle_kprobe_auto = bpf_program__attach(skel->progs.handle_kprobe_auto);
- ASSERT_OK_PTR(skel->links.handle_kprobe_auto, "attach_kprobe_auto");
-
- skel->links.handle_kretprobe_auto = bpf_program__attach(skel->progs.handle_kretprobe_auto);
- ASSERT_OK_PTR(skel->links.handle_kretprobe_auto, "attach_kretprobe_auto");
-
- if (!legacy)
- ASSERT_EQ(uprobe_ref_ctr, 0, "uprobe_ref_ctr_before");
-
+ /* manual-attach uprobe/uretprobe */
+ uprobe_opts.attach_mode = attach_mode;
+ uprobe_opts.ref_ctr_offset = 0;
uprobe_opts.retprobe = false;
- uprobe_opts.ref_ctr_offset = legacy ? 0 : ref_ctr_offset;
uprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe,
0 /* self pid */,
"/proc/self/exe",
@@ -107,12 +82,7 @@ void test_attach_probe(void)
goto cleanup;
skel->links.handle_uprobe = uprobe_link;
- if (!legacy)
- ASSERT_GT(uprobe_ref_ctr, 0, "uprobe_ref_ctr_after");
-
- /* if uprobe uses ref_ctr, uretprobe has to use ref_ctr as well */
uprobe_opts.retprobe = true;
- uprobe_opts.ref_ctr_offset = legacy ? 0 : ref_ctr_offset;
uretprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe,
-1 /* any pid */,
"/proc/self/exe",
@@ -121,12 +91,7 @@ void test_attach_probe(void)
goto cleanup;
skel->links.handle_uretprobe = uretprobe_link;
- /* verify auto-attach fails for old-style uprobe definition */
- uprobe_err_link = bpf_program__attach(skel->progs.handle_uprobe_byname);
- if (!ASSERT_EQ(libbpf_get_error(uprobe_err_link), -EOPNOTSUPP,
- "auto-attach should fail for old-style name"))
- goto cleanup;
-
+ /* attach uprobe by function name manually */
uprobe_opts.func_name = "trigger_func2";
uprobe_opts.retprobe = false;
uprobe_opts.ref_ctr_offset = 0;
@@ -138,11 +103,63 @@ void test_attach_probe(void)
if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname, "attach_uprobe_byname"))
goto cleanup;
+ /* trigger & validate kprobe && kretprobe */
+ usleep(1);
+
+ /* trigger & validate uprobe & uretprobe */
+ trigger_func();
+
+ /* trigger & validate uprobe attached by name */
+ trigger_func2();
+
+ ASSERT_EQ(skel->bss->kprobe_res, 1, "check_kprobe_res");
+ ASSERT_EQ(skel->bss->kretprobe_res, 2, "check_kretprobe_res");
+ ASSERT_EQ(skel->bss->uprobe_res, 3, "check_uprobe_res");
+ ASSERT_EQ(skel->bss->uretprobe_res, 4, "check_uretprobe_res");
+ ASSERT_EQ(skel->bss->uprobe_byname_res, 5, "check_uprobe_byname_res");
+
+cleanup:
+ test_attach_probe_manual__destroy(skel);
+}
+
+static void test_attach_probe_auto(struct test_attach_probe *skel)
+{
+ struct bpf_link *uprobe_err_link;
+
+ /* auto-attachable kprobe and kretprobe */
+ skel->links.handle_kprobe_auto = bpf_program__attach(skel->progs.handle_kprobe_auto);
+ ASSERT_OK_PTR(skel->links.handle_kprobe_auto, "attach_kprobe_auto");
+
+ skel->links.handle_kretprobe_auto = bpf_program__attach(skel->progs.handle_kretprobe_auto);
+ ASSERT_OK_PTR(skel->links.handle_kretprobe_auto, "attach_kretprobe_auto");
+
+ /* verify auto-attach fails for old-style uprobe definition */
+ uprobe_err_link = bpf_program__attach(skel->progs.handle_uprobe_byname);
+ if (!ASSERT_EQ(libbpf_get_error(uprobe_err_link), -EOPNOTSUPP,
+ "auto-attach should fail for old-style name"))
+ return;
+
/* verify auto-attach works */
skel->links.handle_uretprobe_byname =
bpf_program__attach(skel->progs.handle_uretprobe_byname);
if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname, "attach_uretprobe_byname"))
- goto cleanup;
+ return;
+
+ /* trigger & validate kprobe && kretprobe */
+ usleep(1);
+
+ /* trigger & validate uprobe attached by name */
+ trigger_func2();
+
+ ASSERT_EQ(skel->bss->kprobe2_res, 11, "check_kprobe_auto_res");
+ ASSERT_EQ(skel->bss->kretprobe2_res, 22, "check_kretprobe_auto_res");
+ ASSERT_EQ(skel->bss->uretprobe_byname_res, 6, "check_uretprobe_byname_res");
+}
+
+static void test_uprobe_lib(struct test_attach_probe *skel)
+{
+ DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
+ FILE *devnull;
/* test attach by name for a library function, using the library
* as the binary argument. libc.so.6 will be resolved via dlopen()/dlinfo().
@@ -155,7 +172,7 @@ void test_attach_probe(void)
"libc.so.6",
0, &uprobe_opts);
if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname2, "attach_uprobe_byname2"))
- goto cleanup;
+ return;
uprobe_opts.func_name = "fclose";
uprobe_opts.retprobe = true;
@@ -165,62 +182,144 @@ void test_attach_probe(void)
"libc.so.6",
0, &uprobe_opts);
if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname2, "attach_uretprobe_byname2"))
+ return;
+
+ /* trigger & validate shared library u[ret]probes attached by name */
+ devnull = fopen("/dev/null", "r");
+ fclose(devnull);
+
+ ASSERT_EQ(skel->bss->uprobe_byname2_res, 7, "check_uprobe_byname2_res");
+ ASSERT_EQ(skel->bss->uretprobe_byname2_res, 8, "check_uretprobe_byname2_res");
+}
+
+static void test_uprobe_ref_ctr(struct test_attach_probe *skel)
+{
+ DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
+ struct bpf_link *uprobe_link, *uretprobe_link;
+ ssize_t uprobe_offset, ref_ctr_offset;
+
+ uprobe_offset = get_uprobe_offset(&trigger_func4);
+ if (!ASSERT_GE(uprobe_offset, 0, "uprobe_offset_ref_ctr"))
+ return;
+
+ ref_ctr_offset = get_rel_offset((uintptr_t)&uprobe_ref_ctr);
+ if (!ASSERT_GE(ref_ctr_offset, 0, "ref_ctr_offset"))
+ return;
+
+ ASSERT_EQ(uprobe_ref_ctr, 0, "uprobe_ref_ctr_before");
+
+ uprobe_opts.retprobe = false;
+ uprobe_opts.ref_ctr_offset = ref_ctr_offset;
+ uprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe_ref_ctr,
+ 0 /* self pid */,
+ "/proc/self/exe",
+ uprobe_offset,
+ &uprobe_opts);
+ if (!ASSERT_OK_PTR(uprobe_link, "attach_uprobe_ref_ctr"))
+ return;
+ skel->links.handle_uprobe_ref_ctr = uprobe_link;
+
+ ASSERT_GT(uprobe_ref_ctr, 0, "uprobe_ref_ctr_after");
+
+ /* if uprobe uses ref_ctr, uretprobe has to use ref_ctr as well */
+ uprobe_opts.retprobe = true;
+ uprobe_opts.ref_ctr_offset = ref_ctr_offset;
+ uretprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe_ref_ctr,
+ -1 /* any pid */,
+ "/proc/self/exe",
+ uprobe_offset, &uprobe_opts);
+ if (!ASSERT_OK_PTR(uretprobe_link, "attach_uretprobe_ref_ctr"))
+ return;
+ skel->links.handle_uretprobe_ref_ctr = uretprobe_link;
+}
+
+static void test_kprobe_sleepable(void)
+{
+ struct test_attach_kprobe_sleepable *skel;
+
+ skel = test_attach_kprobe_sleepable__open();
+ if (!ASSERT_OK_PTR(skel, "skel_kprobe_sleepable_open"))
+ return;
+
+ /* sleepable kprobe test case needs flags set before loading */
+ if (!ASSERT_OK(bpf_program__set_flags(skel->progs.handle_kprobe_sleepable,
+ BPF_F_SLEEPABLE), "kprobe_sleepable_flags"))
+ goto cleanup;
+
+ if (!ASSERT_OK(test_attach_kprobe_sleepable__load(skel),
+ "skel_kprobe_sleepable_load"))
goto cleanup;
/* sleepable kprobes should not attach successfully */
skel->links.handle_kprobe_sleepable = bpf_program__attach(skel->progs.handle_kprobe_sleepable);
- if (!ASSERT_ERR_PTR(skel->links.handle_kprobe_sleepable, "attach_kprobe_sleepable"))
- goto cleanup;
+ ASSERT_ERR_PTR(skel->links.handle_kprobe_sleepable, "attach_kprobe_sleepable");
+cleanup:
+ test_attach_kprobe_sleepable__destroy(skel);
+}
+
+static void test_uprobe_sleepable(struct test_attach_probe *skel)
+{
/* test sleepable uprobe and uretprobe variants */
skel->links.handle_uprobe_byname3_sleepable = bpf_program__attach(skel->progs.handle_uprobe_byname3_sleepable);
if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname3_sleepable, "attach_uprobe_byname3_sleepable"))
- goto cleanup;
+ return;
skel->links.handle_uprobe_byname3 = bpf_program__attach(skel->progs.handle_uprobe_byname3);
if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname3, "attach_uprobe_byname3"))
- goto cleanup;
+ return;
skel->links.handle_uretprobe_byname3_sleepable = bpf_program__attach(skel->progs.handle_uretprobe_byname3_sleepable);
if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname3_sleepable, "attach_uretprobe_byname3_sleepable"))
- goto cleanup;
+ return;
skel->links.handle_uretprobe_byname3 = bpf_program__attach(skel->progs.handle_uretprobe_byname3);
if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname3, "attach_uretprobe_byname3"))
- goto cleanup;
+ return;
skel->bss->user_ptr = test_data;
- /* trigger & validate kprobe && kretprobe */
- usleep(1);
-
- /* trigger & validate shared library u[ret]probes attached by name */
- devnull = fopen("/dev/null", "r");
- fclose(devnull);
-
- /* trigger & validate uprobe & uretprobe */
- trigger_func();
-
- /* trigger & validate uprobe attached by name */
- trigger_func2();
-
/* trigger & validate sleepable uprobe attached by name */
trigger_func3();
- ASSERT_EQ(skel->bss->kprobe_res, 1, "check_kprobe_res");
- ASSERT_EQ(skel->bss->kprobe2_res, 11, "check_kprobe_auto_res");
- ASSERT_EQ(skel->bss->kretprobe_res, 2, "check_kretprobe_res");
- ASSERT_EQ(skel->bss->kretprobe2_res, 22, "check_kretprobe_auto_res");
- ASSERT_EQ(skel->bss->uprobe_res, 3, "check_uprobe_res");
- ASSERT_EQ(skel->bss->uretprobe_res, 4, "check_uretprobe_res");
- ASSERT_EQ(skel->bss->uprobe_byname_res, 5, "check_uprobe_byname_res");
- ASSERT_EQ(skel->bss->uretprobe_byname_res, 6, "check_uretprobe_byname_res");
- ASSERT_EQ(skel->bss->uprobe_byname2_res, 7, "check_uprobe_byname2_res");
- ASSERT_EQ(skel->bss->uretprobe_byname2_res, 8, "check_uretprobe_byname2_res");
ASSERT_EQ(skel->bss->uprobe_byname3_sleepable_res, 9, "check_uprobe_byname3_sleepable_res");
ASSERT_EQ(skel->bss->uprobe_byname3_res, 10, "check_uprobe_byname3_res");
ASSERT_EQ(skel->bss->uretprobe_byname3_sleepable_res, 11, "check_uretprobe_byname3_sleepable_res");
ASSERT_EQ(skel->bss->uretprobe_byname3_res, 12, "check_uretprobe_byname3_res");
+}
+
+void test_attach_probe(void)
+{
+ struct test_attach_probe *skel;
+
+ skel = test_attach_probe__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ if (!ASSERT_OK(test_attach_probe__load(skel), "skel_load"))
+ goto cleanup;
+ if (!ASSERT_OK_PTR(skel->bss, "check_bss"))
+ goto cleanup;
+
+ if (test__start_subtest("manual-default"))
+ test_attach_probe_manual(PROBE_ATTACH_MODE_DEFAULT);
+ if (test__start_subtest("manual-legacy"))
+ test_attach_probe_manual(PROBE_ATTACH_MODE_LEGACY);
+ if (test__start_subtest("manual-perf"))
+ test_attach_probe_manual(PROBE_ATTACH_MODE_PERF);
+ if (test__start_subtest("manual-link"))
+ test_attach_probe_manual(PROBE_ATTACH_MODE_LINK);
+
+ if (test__start_subtest("auto"))
+ test_attach_probe_auto(skel);
+ if (test__start_subtest("kprobe-sleepable"))
+ test_kprobe_sleepable();
+ if (test__start_subtest("uprobe-lib"))
+ test_uprobe_lib(skel);
+ if (test__start_subtest("uprobe-sleepable"))
+ test_uprobe_sleepable(skel);
+ if (test__start_subtest("uprobe-ref_ctr"))
+ test_uprobe_ref_ctr(skel);
cleanup:
test_attach_probe__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c
index b3f7985c8504..adda85f97058 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c
@@ -84,6 +84,7 @@ static const char * const success_tests[] = {
"test_cgrp_xchg_release",
"test_cgrp_get_release",
"test_cgrp_get_ancestors",
+ "test_cgrp_from_id",
};
void test_cgrp_kfunc(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c
index 2cc759956e3b..63e776f4176e 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c
@@ -193,7 +193,7 @@ out:
cgrp_ls_sleepable__destroy(skel);
}
-static void test_no_rcu_lock(__u64 cgroup_id)
+static void test_yes_rcu_lock(__u64 cgroup_id)
{
struct cgrp_ls_sleepable *skel;
int err;
@@ -204,7 +204,7 @@ static void test_no_rcu_lock(__u64 cgroup_id)
skel->bss->target_pid = syscall(SYS_gettid);
- bpf_program__set_autoload(skel->progs.no_rcu_lock, true);
+ bpf_program__set_autoload(skel->progs.yes_rcu_lock, true);
err = cgrp_ls_sleepable__load(skel);
if (!ASSERT_OK(err, "skel_load"))
goto out;
@@ -220,7 +220,7 @@ out:
cgrp_ls_sleepable__destroy(skel);
}
-static void test_rcu_lock(void)
+static void test_no_rcu_lock(void)
{
struct cgrp_ls_sleepable *skel;
int err;
@@ -229,7 +229,7 @@ static void test_rcu_lock(void)
if (!ASSERT_OK_PTR(skel, "skel_open"))
return;
- bpf_program__set_autoload(skel->progs.yes_rcu_lock, true);
+ bpf_program__set_autoload(skel->progs.no_rcu_lock, true);
err = cgrp_ls_sleepable__load(skel);
ASSERT_ERR(err, "skel_load");
@@ -256,10 +256,10 @@ void test_cgrp_local_storage(void)
test_negative();
if (test__start_subtest("cgroup_iter_sleepable"))
test_cgroup_iter_sleepable(cgroup_fd, cgroup_id);
+ if (test__start_subtest("yes_rcu_lock"))
+ test_yes_rcu_lock(cgroup_id);
if (test__start_subtest("no_rcu_lock"))
- test_no_rcu_lock(cgroup_id);
- if (test__start_subtest("rcu_lock"))
- test_rcu_lock();
+ test_no_rcu_lock();
close(cgroup_fd);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
index 224f016b0a53..2a55f717fc07 100644
--- a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
@@ -13,6 +13,7 @@
#include "progs/test_cls_redirect.h"
#include "test_cls_redirect.skel.h"
+#include "test_cls_redirect_dynptr.skel.h"
#include "test_cls_redirect_subprogs.skel.h"
#define ENCAP_IP INADDR_LOOPBACK
@@ -446,6 +447,28 @@ cleanup:
close_fds((int *)conns, sizeof(conns) / sizeof(conns[0][0]));
}
+static void test_cls_redirect_dynptr(void)
+{
+ struct test_cls_redirect_dynptr *skel;
+ int err;
+
+ skel = test_cls_redirect_dynptr__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->rodata->ENCAPSULATION_IP = htonl(ENCAP_IP);
+ skel->rodata->ENCAPSULATION_PORT = htons(ENCAP_PORT);
+
+ err = test_cls_redirect_dynptr__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ test_cls_redirect_common(skel->progs.cls_redirect);
+
+cleanup:
+ test_cls_redirect_dynptr__destroy(skel);
+}
+
static void test_cls_redirect_inlined(void)
{
struct test_cls_redirect *skel;
@@ -496,4 +519,6 @@ void test_cls_redirect(void)
test_cls_redirect_inlined();
if (test__start_subtest("cls_redirect_subprogs"))
test_cls_redirect_subprogs();
+ if (test__start_subtest("cls_redirect_dynptr"))
+ test_cls_redirect_dynptr();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c
new file mode 100644
index 000000000000..d5fe3d4b936c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c
@@ -0,0 +1,917 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <limits.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <regex.h>
+#include <test_progs.h>
+
+#include "bpf/btf.h"
+#include "bpf_util.h"
+#include "linux/filter.h"
+#include "disasm.h"
+
+#define MAX_PROG_TEXT_SZ (32 * 1024)
+
+/* The code in this file serves the sole purpose of executing test cases
+ * specified in the test_cases array. Each test case specifies a program
+ * type, context field offset, and disassembly patterns that correspond
+ * to read and write instructions generated by
+ * verifier.c:convert_ctx_access() for accessing that field.
+ *
+ * For each test case, up to three programs are created:
+ * - One that uses BPF_LDX_MEM to read the context field.
+ * - One that uses BPF_STX_MEM to write to the context field.
+ * - One that uses BPF_ST_MEM to write to the context field.
+ *
+ * The disassembly of each program is then compared with the pattern
+ * specified in the test case.
+ */
+struct test_case {
+ char *name;
+ enum bpf_prog_type prog_type;
+ enum bpf_attach_type expected_attach_type;
+ int field_offset;
+ int field_sz;
+ /* Program generated for BPF_ST_MEM uses value 42 by default,
+ * this field allows to specify custom value.
+ */
+ struct {
+ bool use;
+ int value;
+ } st_value;
+ /* Pattern for BPF_LDX_MEM(field_sz, dst, ctx, field_offset) */
+ char *read;
+ /* Pattern for BPF_STX_MEM(field_sz, ctx, src, field_offset) and
+ * BPF_ST_MEM (field_sz, ctx, src, field_offset)
+ */
+ char *write;
+ /* Pattern for BPF_ST_MEM(field_sz, ctx, src, field_offset),
+ * takes priority over `write`.
+ */
+ char *write_st;
+ /* Pattern for BPF_STX_MEM (field_sz, ctx, src, field_offset),
+ * takes priority over `write`.
+ */
+ char *write_stx;
+};
+
+#define N(_prog_type, type, field, name_extra...) \
+ .name = #_prog_type "." #field name_extra, \
+ .prog_type = BPF_PROG_TYPE_##_prog_type, \
+ .field_offset = offsetof(type, field), \
+ .field_sz = sizeof(typeof(((type *)NULL)->field))
+
+static struct test_case test_cases[] = {
+/* Sign extension on s390 changes the pattern */
+#if defined(__x86_64__) || defined(__aarch64__)
+ {
+ N(SCHED_CLS, struct __sk_buff, tstamp),
+ .read = "r11 = *(u8 *)($ctx + sk_buff::__pkt_vlan_present_offset);"
+ "w11 &= 160;"
+ "if w11 != 0xa0 goto pc+2;"
+ "$dst = 0;"
+ "goto pc+1;"
+ "$dst = *(u64 *)($ctx + sk_buff::tstamp);",
+ .write = "r11 = *(u8 *)($ctx + sk_buff::__pkt_vlan_present_offset);"
+ "if w11 & 0x80 goto pc+1;"
+ "goto pc+2;"
+ "w11 &= -33;"
+ "*(u8 *)($ctx + sk_buff::__pkt_vlan_present_offset) = r11;"
+ "*(u64 *)($ctx + sk_buff::tstamp) = $src;",
+ },
+#endif
+ {
+ N(SCHED_CLS, struct __sk_buff, priority),
+ .read = "$dst = *(u32 *)($ctx + sk_buff::priority);",
+ .write = "*(u32 *)($ctx + sk_buff::priority) = $src;",
+ },
+ {
+ N(SCHED_CLS, struct __sk_buff, mark),
+ .read = "$dst = *(u32 *)($ctx + sk_buff::mark);",
+ .write = "*(u32 *)($ctx + sk_buff::mark) = $src;",
+ },
+ {
+ N(SCHED_CLS, struct __sk_buff, cb[0]),
+ .read = "$dst = *(u32 *)($ctx + $(sk_buff::cb + qdisc_skb_cb::data));",
+ .write = "*(u32 *)($ctx + $(sk_buff::cb + qdisc_skb_cb::data)) = $src;",
+ },
+ {
+ N(SCHED_CLS, struct __sk_buff, tc_classid),
+ .read = "$dst = *(u16 *)($ctx + $(sk_buff::cb + qdisc_skb_cb::tc_classid));",
+ .write = "*(u16 *)($ctx + $(sk_buff::cb + qdisc_skb_cb::tc_classid)) = $src;",
+ },
+ {
+ N(SCHED_CLS, struct __sk_buff, tc_index),
+ .read = "$dst = *(u16 *)($ctx + sk_buff::tc_index);",
+ .write = "*(u16 *)($ctx + sk_buff::tc_index) = $src;",
+ },
+ {
+ N(SCHED_CLS, struct __sk_buff, queue_mapping),
+ .read = "$dst = *(u16 *)($ctx + sk_buff::queue_mapping);",
+ .write_stx = "if $src >= 0xffff goto pc+1;"
+ "*(u16 *)($ctx + sk_buff::queue_mapping) = $src;",
+ .write_st = "*(u16 *)($ctx + sk_buff::queue_mapping) = $src;",
+ },
+ {
+ /* This is a corner case in filter.c:bpf_convert_ctx_access() */
+ N(SCHED_CLS, struct __sk_buff, queue_mapping, ".ushrt_max"),
+ .st_value = { true, USHRT_MAX },
+ .write_st = "goto pc+0;",
+ },
+ {
+ N(CGROUP_SOCK, struct bpf_sock, bound_dev_if),
+ .read = "$dst = *(u32 *)($ctx + sock_common::skc_bound_dev_if);",
+ .write = "*(u32 *)($ctx + sock_common::skc_bound_dev_if) = $src;",
+ },
+ {
+ N(CGROUP_SOCK, struct bpf_sock, mark),
+ .read = "$dst = *(u32 *)($ctx + sock::sk_mark);",
+ .write = "*(u32 *)($ctx + sock::sk_mark) = $src;",
+ },
+ {
+ N(CGROUP_SOCK, struct bpf_sock, priority),
+ .read = "$dst = *(u32 *)($ctx + sock::sk_priority);",
+ .write = "*(u32 *)($ctx + sock::sk_priority) = $src;",
+ },
+ {
+ N(SOCK_OPS, struct bpf_sock_ops, replylong[0]),
+ .read = "$dst = *(u32 *)($ctx + bpf_sock_ops_kern::replylong);",
+ .write = "*(u32 *)($ctx + bpf_sock_ops_kern::replylong) = $src;",
+ },
+ {
+ N(CGROUP_SYSCTL, struct bpf_sysctl, file_pos),
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ .read = "$dst = *(u64 *)($ctx + bpf_sysctl_kern::ppos);"
+ "$dst = *(u32 *)($dst +0);",
+ .write = "*(u64 *)($ctx + bpf_sysctl_kern::tmp_reg) = r9;"
+ "r9 = *(u64 *)($ctx + bpf_sysctl_kern::ppos);"
+ "*(u32 *)(r9 +0) = $src;"
+ "r9 = *(u64 *)($ctx + bpf_sysctl_kern::tmp_reg);",
+#else
+ .read = "$dst = *(u64 *)($ctx + bpf_sysctl_kern::ppos);"
+ "$dst = *(u32 *)($dst +4);",
+ .write = "*(u64 *)($ctx + bpf_sysctl_kern::tmp_reg) = r9;"
+ "r9 = *(u64 *)($ctx + bpf_sysctl_kern::ppos);"
+ "*(u32 *)(r9 +4) = $src;"
+ "r9 = *(u64 *)($ctx + bpf_sysctl_kern::tmp_reg);",
+#endif
+ },
+ {
+ N(CGROUP_SOCKOPT, struct bpf_sockopt, sk),
+ .read = "$dst = *(u64 *)($ctx + bpf_sockopt_kern::sk);",
+ .expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+ },
+ {
+ N(CGROUP_SOCKOPT, struct bpf_sockopt, level),
+ .read = "$dst = *(u32 *)($ctx + bpf_sockopt_kern::level);",
+ .write = "*(u32 *)($ctx + bpf_sockopt_kern::level) = $src;",
+ .expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+ },
+ {
+ N(CGROUP_SOCKOPT, struct bpf_sockopt, optname),
+ .read = "$dst = *(u32 *)($ctx + bpf_sockopt_kern::optname);",
+ .write = "*(u32 *)($ctx + bpf_sockopt_kern::optname) = $src;",
+ .expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+ },
+ {
+ N(CGROUP_SOCKOPT, struct bpf_sockopt, optlen),
+ .read = "$dst = *(u32 *)($ctx + bpf_sockopt_kern::optlen);",
+ .write = "*(u32 *)($ctx + bpf_sockopt_kern::optlen) = $src;",
+ .expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+ },
+ {
+ N(CGROUP_SOCKOPT, struct bpf_sockopt, retval),
+ .read = "$dst = *(u64 *)($ctx + bpf_sockopt_kern::current_task);"
+ "$dst = *(u64 *)($dst + task_struct::bpf_ctx);"
+ "$dst = *(u32 *)($dst + bpf_cg_run_ctx::retval);",
+ .write = "*(u64 *)($ctx + bpf_sockopt_kern::tmp_reg) = r9;"
+ "r9 = *(u64 *)($ctx + bpf_sockopt_kern::current_task);"
+ "r9 = *(u64 *)(r9 + task_struct::bpf_ctx);"
+ "*(u32 *)(r9 + bpf_cg_run_ctx::retval) = $src;"
+ "r9 = *(u64 *)($ctx + bpf_sockopt_kern::tmp_reg);",
+ .expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+ },
+ {
+ N(CGROUP_SOCKOPT, struct bpf_sockopt, optval),
+ .read = "$dst = *(u64 *)($ctx + bpf_sockopt_kern::optval);",
+ .expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+ },
+ {
+ N(CGROUP_SOCKOPT, struct bpf_sockopt, optval_end),
+ .read = "$dst = *(u64 *)($ctx + bpf_sockopt_kern::optval_end);",
+ .expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+ },
+};
+
+#undef N
+
+static regex_t *ident_regex;
+static regex_t *field_regex;
+
+static char *skip_space(char *str)
+{
+ while (*str && isspace(*str))
+ ++str;
+ return str;
+}
+
+static char *skip_space_and_semi(char *str)
+{
+ while (*str && (isspace(*str) || *str == ';'))
+ ++str;
+ return str;
+}
+
+static char *match_str(char *str, char *prefix)
+{
+ while (*str && *prefix && *str == *prefix) {
+ ++str;
+ ++prefix;
+ }
+ if (*prefix)
+ return NULL;
+ return str;
+}
+
+static char *match_number(char *str, int num)
+{
+ char *next;
+ int snum = strtol(str, &next, 10);
+
+ if (next - str == 0 || num != snum)
+ return NULL;
+
+ return next;
+}
+
+static int find_field_offset_aux(struct btf *btf, int btf_id, char *field_name, int off)
+{
+ const struct btf_type *type = btf__type_by_id(btf, btf_id);
+ const struct btf_member *m;
+ __u16 mnum;
+ int i;
+
+ if (!type) {
+ PRINT_FAIL("Can't find btf_type for id %d\n", btf_id);
+ return -1;
+ }
+
+ if (!btf_is_struct(type) && !btf_is_union(type)) {
+ PRINT_FAIL("BTF id %d is not struct or union\n", btf_id);
+ return -1;
+ }
+
+ m = btf_members(type);
+ mnum = btf_vlen(type);
+
+ for (i = 0; i < mnum; ++i, ++m) {
+ const char *mname = btf__name_by_offset(btf, m->name_off);
+
+ if (strcmp(mname, "") == 0) {
+ int msize = find_field_offset_aux(btf, m->type, field_name,
+ off + m->offset);
+ if (msize >= 0)
+ return msize;
+ }
+
+ if (strcmp(mname, field_name))
+ continue;
+
+ return (off + m->offset) / 8;
+ }
+
+ return -1;
+}
+
+static int find_field_offset(struct btf *btf, char *pattern, regmatch_t *matches)
+{
+ int type_sz = matches[1].rm_eo - matches[1].rm_so;
+ int field_sz = matches[2].rm_eo - matches[2].rm_so;
+ char *type = pattern + matches[1].rm_so;
+ char *field = pattern + matches[2].rm_so;
+ char field_str[128] = {};
+ char type_str[128] = {};
+ int btf_id, field_offset;
+
+ if (type_sz >= sizeof(type_str)) {
+ PRINT_FAIL("Malformed pattern: type ident is too long: %d\n", type_sz);
+ return -1;
+ }
+
+ if (field_sz >= sizeof(field_str)) {
+ PRINT_FAIL("Malformed pattern: field ident is too long: %d\n", field_sz);
+ return -1;
+ }
+
+ strncpy(type_str, type, type_sz);
+ strncpy(field_str, field, field_sz);
+ btf_id = btf__find_by_name(btf, type_str);
+ if (btf_id < 0) {
+ PRINT_FAIL("No BTF info for type %s\n", type_str);
+ return -1;
+ }
+
+ field_offset = find_field_offset_aux(btf, btf_id, field_str, 0);
+ if (field_offset < 0) {
+ PRINT_FAIL("No BTF info for field %s::%s\n", type_str, field_str);
+ return -1;
+ }
+
+ return field_offset;
+}
+
+static regex_t *compile_regex(char *pat)
+{
+ regex_t *re;
+ int err;
+
+ re = malloc(sizeof(regex_t));
+ if (!re) {
+ PRINT_FAIL("Can't alloc regex\n");
+ return NULL;
+ }
+
+ err = regcomp(re, pat, REG_EXTENDED);
+ if (err) {
+ char errbuf[512];
+
+ regerror(err, re, errbuf, sizeof(errbuf));
+ PRINT_FAIL("Can't compile regex: %s\n", errbuf);
+ free(re);
+ return NULL;
+ }
+
+ return re;
+}
+
+static void free_regex(regex_t *re)
+{
+ if (!re)
+ return;
+
+ regfree(re);
+ free(re);
+}
+
+static u32 max_line_len(char *str)
+{
+ u32 max_line = 0;
+ char *next = str;
+
+ while (next) {
+ next = strchr(str, '\n');
+ if (next) {
+ max_line = max_t(u32, max_line, (next - str));
+ str = next + 1;
+ } else {
+ max_line = max_t(u32, max_line, strlen(str));
+ }
+ }
+
+ return min(max_line, 60u);
+}
+
+/* Print strings `pattern_origin` and `text_origin` side by side,
+ * assume `pattern_pos` and `text_pos` designate location within
+ * corresponding origin string where match diverges.
+ * The output should look like:
+ *
+ * Can't match disassembly(left) with pattern(right):
+ * r2 = *(u64 *)(r1 +0) ; $dst = *(u64 *)($ctx + bpf_sockopt_kern::sk1)
+ * ^ ^
+ * r0 = 0 ;
+ * exit ;
+ */
+static void print_match_error(FILE *out,
+ char *pattern_origin, char *text_origin,
+ char *pattern_pos, char *text_pos)
+{
+ char *pattern = pattern_origin;
+ char *text = text_origin;
+ int middle = max_line_len(text) + 2;
+
+ fprintf(out, "Can't match disassembly(left) with pattern(right):\n");
+ while (*pattern || *text) {
+ int column = 0;
+ int mark1 = -1;
+ int mark2 = -1;
+
+ /* Print one line from text */
+ while (*text && *text != '\n') {
+ if (text == text_pos)
+ mark1 = column;
+ fputc(*text, out);
+ ++text;
+ ++column;
+ }
+ if (text == text_pos)
+ mark1 = column;
+
+ /* Pad to the middle */
+ while (column < middle) {
+ fputc(' ', out);
+ ++column;
+ }
+ fputs("; ", out);
+ column += 3;
+
+ /* Print one line from pattern, pattern lines are terminated by ';' */
+ while (*pattern && *pattern != ';') {
+ if (pattern == pattern_pos)
+ mark2 = column;
+ fputc(*pattern, out);
+ ++pattern;
+ ++column;
+ }
+ if (pattern == pattern_pos)
+ mark2 = column;
+
+ fputc('\n', out);
+ if (*pattern)
+ ++pattern;
+ if (*text)
+ ++text;
+
+ /* If pattern and text diverge at this line, print an
+ * additional line with '^' marks, highlighting
+ * positions where match fails.
+ */
+ if (mark1 > 0 || mark2 > 0) {
+ for (column = 0; column <= max(mark1, mark2); ++column) {
+ if (column == mark1 || column == mark2)
+ fputc('^', out);
+ else
+ fputc(' ', out);
+ }
+ fputc('\n', out);
+ }
+ }
+}
+
+/* Test if `text` matches `pattern`. Pattern consists of the following elements:
+ *
+ * - Field offset references:
+ *
+ * <type>::<field>
+ *
+ * When such reference is encountered BTF is used to compute numerical
+ * value for the offset of <field> in <type>. The `text` is expected to
+ * contain matching numerical value.
+ *
+ * - Field groups:
+ *
+ * $(<type>::<field> [+ <type>::<field>]*)
+ *
+ * Allows to specify an offset that is a sum of multiple field offsets.
+ * The `text` is expected to contain matching numerical value.
+ *
+ * - Variable references, e.g. `$src`, `$dst`, `$ctx`.
+ * These are substitutions specified in `reg_map` array.
+ * If a substring of pattern is equal to `reg_map[i][0]` the `text` is
+ * expected to contain `reg_map[i][1]` in the matching position.
+ *
+ * - Whitespace is ignored, ';' counts as whitespace for `pattern`.
+ *
+ * - Any other characters, `pattern` and `text` should match one-to-one.
+ *
+ * Example of a pattern:
+ *
+ * __________ fields group ________________
+ * ' '
+ * *(u16 *)($ctx + $(sk_buff::cb + qdisc_skb_cb::tc_classid)) = $src;
+ * ^^^^ '______________________'
+ * variable reference field offset reference
+ */
+static bool match_pattern(struct btf *btf, char *pattern, char *text, char *reg_map[][2])
+{
+ char *pattern_origin = pattern;
+ char *text_origin = text;
+ regmatch_t matches[3];
+
+_continue:
+ while (*pattern) {
+ if (!*text)
+ goto err;
+
+ /* Skip whitespace */
+ if (isspace(*pattern) || *pattern == ';') {
+ if (!isspace(*text) && text != text_origin && isalnum(text[-1]))
+ goto err;
+ pattern = skip_space_and_semi(pattern);
+ text = skip_space(text);
+ continue;
+ }
+
+ /* Check for variable references */
+ for (int i = 0; reg_map[i][0]; ++i) {
+ char *pattern_next, *text_next;
+
+ pattern_next = match_str(pattern, reg_map[i][0]);
+ if (!pattern_next)
+ continue;
+
+ text_next = match_str(text, reg_map[i][1]);
+ if (!text_next)
+ goto err;
+
+ pattern = pattern_next;
+ text = text_next;
+ goto _continue;
+ }
+
+ /* Match field group:
+ * $(sk_buff::cb + qdisc_skb_cb::tc_classid)
+ */
+ if (strncmp(pattern, "$(", 2) == 0) {
+ char *group_start = pattern, *text_next;
+ int acc_offset = 0;
+
+ pattern += 2;
+
+ for (;;) {
+ int field_offset;
+
+ pattern = skip_space(pattern);
+ if (!*pattern) {
+ PRINT_FAIL("Unexpected end of pattern\n");
+ goto err;
+ }
+
+ if (*pattern == ')') {
+ ++pattern;
+ break;
+ }
+
+ if (*pattern == '+') {
+ ++pattern;
+ continue;
+ }
+
+ printf("pattern: %s\n", pattern);
+ if (regexec(field_regex, pattern, 3, matches, 0) != 0) {
+ PRINT_FAIL("Field reference expected\n");
+ goto err;
+ }
+
+ field_offset = find_field_offset(btf, pattern, matches);
+ if (field_offset < 0)
+ goto err;
+
+ pattern += matches[0].rm_eo;
+ acc_offset += field_offset;
+ }
+
+ text_next = match_number(text, acc_offset);
+ if (!text_next) {
+ PRINT_FAIL("No match for group offset %.*s (%d)\n",
+ (int)(pattern - group_start),
+ group_start,
+ acc_offset);
+ goto err;
+ }
+ text = text_next;
+ }
+
+ /* Match field reference:
+ * sk_buff::cb
+ */
+ if (regexec(field_regex, pattern, 3, matches, 0) == 0) {
+ int field_offset;
+ char *text_next;
+
+ field_offset = find_field_offset(btf, pattern, matches);
+ if (field_offset < 0)
+ goto err;
+
+ text_next = match_number(text, field_offset);
+ if (!text_next) {
+ PRINT_FAIL("No match for field offset %.*s (%d)\n",
+ (int)matches[0].rm_eo, pattern, field_offset);
+ goto err;
+ }
+
+ pattern += matches[0].rm_eo;
+ text = text_next;
+ continue;
+ }
+
+ /* If pattern points to identifier not followed by '::'
+ * skip the identifier to avoid n^2 application of the
+ * field reference rule.
+ */
+ if (regexec(ident_regex, pattern, 1, matches, 0) == 0) {
+ if (strncmp(pattern, text, matches[0].rm_eo) != 0)
+ goto err;
+
+ pattern += matches[0].rm_eo;
+ text += matches[0].rm_eo;
+ continue;
+ }
+
+ /* Match literally */
+ if (*pattern != *text)
+ goto err;
+
+ ++pattern;
+ ++text;
+ }
+
+ return true;
+
+err:
+ test__fail();
+ print_match_error(stdout, pattern_origin, text_origin, pattern, text);
+ return false;
+}
+
+/* Request BPF program instructions after all rewrites are applied,
+ * e.g. verifier.c:convert_ctx_access() is done.
+ */
+static int get_xlated_program(int fd_prog, struct bpf_insn **buf, __u32 *cnt)
+{
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ __u32 xlated_prog_len;
+ __u32 buf_element_size = sizeof(struct bpf_insn);
+
+ if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) {
+ perror("bpf_prog_get_info_by_fd failed");
+ return -1;
+ }
+
+ xlated_prog_len = info.xlated_prog_len;
+ if (xlated_prog_len % buf_element_size) {
+ printf("Program length %d is not multiple of %d\n",
+ xlated_prog_len, buf_element_size);
+ return -1;
+ }
+
+ *cnt = xlated_prog_len / buf_element_size;
+ *buf = calloc(*cnt, buf_element_size);
+ if (!buf) {
+ perror("can't allocate xlated program buffer");
+ return -ENOMEM;
+ }
+
+ bzero(&info, sizeof(info));
+ info.xlated_prog_len = xlated_prog_len;
+ info.xlated_prog_insns = (__u64)(unsigned long)*buf;
+ if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) {
+ perror("second bpf_prog_get_info_by_fd failed");
+ goto out_free_buf;
+ }
+
+ return 0;
+
+out_free_buf:
+ free(*buf);
+ return -1;
+}
+
+static void print_insn(void *private_data, const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ vfprintf((FILE *)private_data, fmt, args);
+ va_end(args);
+}
+
+/* Disassemble instructions to a stream */
+static void print_xlated(FILE *out, struct bpf_insn *insn, __u32 len)
+{
+ const struct bpf_insn_cbs cbs = {
+ .cb_print = print_insn,
+ .cb_call = NULL,
+ .cb_imm = NULL,
+ .private_data = out,
+ };
+ bool double_insn = false;
+ int i;
+
+ for (i = 0; i < len; i++) {
+ if (double_insn) {
+ double_insn = false;
+ continue;
+ }
+
+ double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW);
+ print_bpf_insn(&cbs, insn + i, true);
+ }
+}
+
+/* We share code with kernel BPF disassembler, it adds '(FF) ' prefix
+ * for each instruction (FF stands for instruction `code` byte).
+ * This function removes the prefix inplace for each line in `str`.
+ */
+static void remove_insn_prefix(char *str, int size)
+{
+ const int prefix_size = 5;
+
+ int write_pos = 0, read_pos = prefix_size;
+ int len = strlen(str);
+ char c;
+
+ size = min(size, len);
+
+ while (read_pos < size) {
+ c = str[read_pos++];
+ if (c == 0)
+ break;
+ str[write_pos++] = c;
+ if (c == '\n')
+ read_pos += prefix_size;
+ }
+ str[write_pos] = 0;
+}
+
+struct prog_info {
+ char *prog_kind;
+ enum bpf_prog_type prog_type;
+ enum bpf_attach_type expected_attach_type;
+ struct bpf_insn *prog;
+ u32 prog_len;
+};
+
+static void match_program(struct btf *btf,
+ struct prog_info *pinfo,
+ char *pattern,
+ char *reg_map[][2],
+ bool skip_first_insn)
+{
+ struct bpf_insn *buf = NULL;
+ int err = 0, prog_fd = 0;
+ FILE *prog_out = NULL;
+ char *text = NULL;
+ __u32 cnt = 0;
+
+ text = calloc(MAX_PROG_TEXT_SZ, 1);
+ if (!text) {
+ PRINT_FAIL("Can't allocate %d bytes\n", MAX_PROG_TEXT_SZ);
+ goto out;
+ }
+
+ // TODO: log level
+ LIBBPF_OPTS(bpf_prog_load_opts, opts);
+ opts.log_buf = text;
+ opts.log_size = MAX_PROG_TEXT_SZ;
+ opts.log_level = 1 | 2 | 4;
+ opts.expected_attach_type = pinfo->expected_attach_type;
+
+ prog_fd = bpf_prog_load(pinfo->prog_type, NULL, "GPL",
+ pinfo->prog, pinfo->prog_len, &opts);
+ if (prog_fd < 0) {
+ PRINT_FAIL("Can't load program, errno %d (%s), verifier log:\n%s\n",
+ errno, strerror(errno), text);
+ goto out;
+ }
+
+ memset(text, 0, MAX_PROG_TEXT_SZ);
+
+ err = get_xlated_program(prog_fd, &buf, &cnt);
+ if (err) {
+ PRINT_FAIL("Can't load back BPF program\n");
+ goto out;
+ }
+
+ prog_out = fmemopen(text, MAX_PROG_TEXT_SZ - 1, "w");
+ if (!prog_out) {
+ PRINT_FAIL("Can't open memory stream\n");
+ goto out;
+ }
+ if (skip_first_insn)
+ print_xlated(prog_out, buf + 1, cnt - 1);
+ else
+ print_xlated(prog_out, buf, cnt);
+ fclose(prog_out);
+ remove_insn_prefix(text, MAX_PROG_TEXT_SZ);
+
+ ASSERT_TRUE(match_pattern(btf, pattern, text, reg_map),
+ pinfo->prog_kind);
+
+out:
+ if (prog_fd)
+ close(prog_fd);
+ free(buf);
+ free(text);
+}
+
+static void run_one_testcase(struct btf *btf, struct test_case *test)
+{
+ struct prog_info pinfo = {};
+ int bpf_sz;
+
+ if (!test__start_subtest(test->name))
+ return;
+
+ switch (test->field_sz) {
+ case 8:
+ bpf_sz = BPF_DW;
+ break;
+ case 4:
+ bpf_sz = BPF_W;
+ break;
+ case 2:
+ bpf_sz = BPF_H;
+ break;
+ case 1:
+ bpf_sz = BPF_B;
+ break;
+ default:
+ PRINT_FAIL("Unexpected field size: %d, want 8,4,2 or 1\n", test->field_sz);
+ return;
+ }
+
+ pinfo.prog_type = test->prog_type;
+ pinfo.expected_attach_type = test->expected_attach_type;
+
+ if (test->read) {
+ struct bpf_insn ldx_prog[] = {
+ BPF_LDX_MEM(bpf_sz, BPF_REG_2, BPF_REG_1, test->field_offset),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ char *reg_map[][2] = {
+ { "$ctx", "r1" },
+ { "$dst", "r2" },
+ {}
+ };
+
+ pinfo.prog_kind = "LDX";
+ pinfo.prog = ldx_prog;
+ pinfo.prog_len = ARRAY_SIZE(ldx_prog);
+ match_program(btf, &pinfo, test->read, reg_map, false);
+ }
+
+ if (test->write || test->write_st || test->write_stx) {
+ struct bpf_insn stx_prog[] = {
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_STX_MEM(bpf_sz, BPF_REG_1, BPF_REG_2, test->field_offset),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ char *stx_reg_map[][2] = {
+ { "$ctx", "r1" },
+ { "$src", "r2" },
+ {}
+ };
+ struct bpf_insn st_prog[] = {
+ BPF_ST_MEM(bpf_sz, BPF_REG_1, test->field_offset,
+ test->st_value.use ? test->st_value.value : 42),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ char *st_reg_map[][2] = {
+ { "$ctx", "r1" },
+ { "$src", "42" },
+ {}
+ };
+
+ if (test->write || test->write_stx) {
+ char *pattern = test->write_stx ? test->write_stx : test->write;
+
+ pinfo.prog_kind = "STX";
+ pinfo.prog = stx_prog;
+ pinfo.prog_len = ARRAY_SIZE(stx_prog);
+ match_program(btf, &pinfo, pattern, stx_reg_map, true);
+ }
+
+ if (test->write || test->write_st) {
+ char *pattern = test->write_st ? test->write_st : test->write;
+
+ pinfo.prog_kind = "ST";
+ pinfo.prog = st_prog;
+ pinfo.prog_len = ARRAY_SIZE(st_prog);
+ match_program(btf, &pinfo, pattern, st_reg_map, false);
+ }
+ }
+
+ test__end_subtest();
+}
+
+void test_ctx_rewrite(void)
+{
+ struct btf *btf;
+ int i;
+
+ field_regex = compile_regex("^([[:alpha:]_][[:alnum:]_]+)::([[:alpha:]_][[:alnum:]_]+)");
+ ident_regex = compile_regex("^[[:alpha:]_][[:alnum:]_]+");
+ if (!field_regex || !ident_regex)
+ return;
+
+ btf = btf__load_vmlinux_btf();
+ if (!btf) {
+ PRINT_FAIL("Can't load vmlinux BTF, errno %d (%s)\n", errno, strerror(errno));
+ goto out;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(test_cases); ++i)
+ run_one_testcase(btf, &test_cases[i]);
+
+out:
+ btf__free(btf);
+ free_regex(field_regex);
+ free_regex(ident_regex);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/decap_sanity.c b/tools/testing/selftests/bpf/prog_tests/decap_sanity.c
index 2853883b7cbb..5c0ebe6ba866 100644
--- a/tools/testing/selftests/bpf/prog_tests/decap_sanity.c
+++ b/tools/testing/selftests/bpf/prog_tests/decap_sanity.c
@@ -10,14 +10,6 @@
#include "network_helpers.h"
#include "decap_sanity.skel.h"
-#define SYS(fmt, ...) \
- ({ \
- char cmd[1024]; \
- snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
- if (!ASSERT_OK(system(cmd), cmd)) \
- goto fail; \
- })
-
#define NS_TEST "decap_sanity_ns"
#define IPV6_IFACE_ADDR "face::1"
#define UDP_TEST_PORT 7777
@@ -37,9 +29,9 @@ void test_decap_sanity(void)
if (!ASSERT_OK_PTR(skel, "skel open_and_load"))
return;
- SYS("ip netns add %s", NS_TEST);
- SYS("ip -net %s -6 addr add %s/128 dev lo nodad", NS_TEST, IPV6_IFACE_ADDR);
- SYS("ip -net %s link set dev lo up", NS_TEST);
+ SYS(fail, "ip netns add %s", NS_TEST);
+ SYS(fail, "ip -net %s -6 addr add %s/128 dev lo nodad", NS_TEST, IPV6_IFACE_ADDR);
+ SYS(fail, "ip -net %s link set dev lo up", NS_TEST);
nstoken = open_netns(NS_TEST);
if (!ASSERT_OK_PTR(nstoken, "open_netns"))
@@ -80,6 +72,6 @@ fail:
bpf_tc_hook_destroy(&qdisc_hook);
close_netns(nstoken);
}
- system("ip netns del " NS_TEST " &> /dev/null");
+ SYS_NOFAIL("ip netns del " NS_TEST " &> /dev/null");
decap_sanity__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c
index b99264ec0d9c..d176c34a7d2e 100644
--- a/tools/testing/selftests/bpf/prog_tests/dynptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c
@@ -2,20 +2,32 @@
/* Copyright (c) 2022 Facebook */
#include <test_progs.h>
+#include <network_helpers.h>
#include "dynptr_fail.skel.h"
#include "dynptr_success.skel.h"
-static const char * const success_tests[] = {
- "test_read_write",
- "test_data_slice",
- "test_ringbuf",
+enum test_setup_type {
+ SETUP_SYSCALL_SLEEP,
+ SETUP_SKB_PROG,
};
-static void verify_success(const char *prog_name)
+static struct {
+ const char *prog_name;
+ enum test_setup_type type;
+} success_tests[] = {
+ {"test_read_write", SETUP_SYSCALL_SLEEP},
+ {"test_dynptr_data", SETUP_SYSCALL_SLEEP},
+ {"test_ringbuf", SETUP_SYSCALL_SLEEP},
+ {"test_skb_readonly", SETUP_SKB_PROG},
+ {"test_dynptr_skb_data", SETUP_SKB_PROG},
+};
+
+static void verify_success(const char *prog_name, enum test_setup_type setup_type)
{
struct dynptr_success *skel;
struct bpf_program *prog;
struct bpf_link *link;
+ int err;
skel = dynptr_success__open();
if (!ASSERT_OK_PTR(skel, "dynptr_success__open"))
@@ -23,23 +35,53 @@ static void verify_success(const char *prog_name)
skel->bss->pid = getpid();
- dynptr_success__load(skel);
- if (!ASSERT_OK_PTR(skel, "dynptr_success__load"))
- goto cleanup;
-
prog = bpf_object__find_program_by_name(skel->obj, prog_name);
if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
goto cleanup;
- link = bpf_program__attach(prog);
- if (!ASSERT_OK_PTR(link, "bpf_program__attach"))
+ bpf_program__set_autoload(prog, true);
+
+ err = dynptr_success__load(skel);
+ if (!ASSERT_OK(err, "dynptr_success__load"))
goto cleanup;
- usleep(1);
+ switch (setup_type) {
+ case SETUP_SYSCALL_SLEEP:
+ link = bpf_program__attach(prog);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach"))
+ goto cleanup;
- ASSERT_EQ(skel->bss->err, 0, "err");
+ usleep(1);
+
+ bpf_link__destroy(link);
+ break;
+ case SETUP_SKB_PROG:
+ {
+ int prog_fd;
+ char buf[64];
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .data_out = buf,
+ .data_size_out = sizeof(buf),
+ .repeat = 1,
+ );
- bpf_link__destroy(link);
+ prog_fd = bpf_program__fd(prog);
+ if (!ASSERT_GE(prog_fd, 0, "prog_fd"))
+ goto cleanup;
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+
+ if (!ASSERT_OK(err, "test_run"))
+ goto cleanup;
+
+ break;
+ }
+ }
+
+ ASSERT_EQ(skel->bss->err, 0, "err");
cleanup:
dynptr_success__destroy(skel);
@@ -50,10 +92,10 @@ void test_dynptr(void)
int i;
for (i = 0; i < ARRAY_SIZE(success_tests); i++) {
- if (!test__start_subtest(success_tests[i]))
+ if (!test__start_subtest(success_tests[i].prog_name))
continue;
- verify_success(success_tests[i]);
+ verify_success(success_tests[i].prog_name, success_tests[i].type);
}
RUN_TESTS(dynptr_fail);
diff --git a/tools/testing/selftests/bpf/prog_tests/empty_skb.c b/tools/testing/selftests/bpf/prog_tests/empty_skb.c
index 32dd731e9070..3b77d8a422db 100644
--- a/tools/testing/selftests/bpf/prog_tests/empty_skb.c
+++ b/tools/testing/selftests/bpf/prog_tests/empty_skb.c
@@ -4,11 +4,6 @@
#include <net/if.h>
#include "empty_skb.skel.h"
-#define SYS(cmd) ({ \
- if (!ASSERT_OK(system(cmd), (cmd))) \
- goto out; \
-})
-
void test_empty_skb(void)
{
LIBBPF_OPTS(bpf_test_run_opts, tattr);
@@ -93,18 +88,18 @@ void test_empty_skb(void)
},
};
- SYS("ip netns add empty_skb");
+ SYS(out, "ip netns add empty_skb");
tok = open_netns("empty_skb");
- SYS("ip link add veth0 type veth peer veth1");
- SYS("ip link set dev veth0 up");
- SYS("ip link set dev veth1 up");
- SYS("ip addr add 10.0.0.1/8 dev veth0");
- SYS("ip addr add 10.0.0.2/8 dev veth1");
+ SYS(out, "ip link add veth0 type veth peer veth1");
+ SYS(out, "ip link set dev veth0 up");
+ SYS(out, "ip link set dev veth1 up");
+ SYS(out, "ip addr add 10.0.0.1/8 dev veth0");
+ SYS(out, "ip addr add 10.0.0.2/8 dev veth1");
veth_ifindex = if_nametoindex("veth0");
- SYS("ip link add ipip0 type ipip local 10.0.0.1 remote 10.0.0.2");
- SYS("ip link set ipip0 up");
- SYS("ip addr add 192.168.1.1/16 dev ipip0");
+ SYS(out, "ip link add ipip0 type ipip local 10.0.0.1 remote 10.0.0.2");
+ SYS(out, "ip link set ipip0 up");
+ SYS(out, "ip addr add 192.168.1.1/16 dev ipip0");
ipip_ifindex = if_nametoindex("ipip0");
bpf_obj = empty_skb__open_and_load();
@@ -142,5 +137,5 @@ out:
empty_skb__destroy(bpf_obj);
if (tok)
close_netns(tok);
- system("ip netns del empty_skb");
+ SYS_NOFAIL("ip netns del empty_skb");
}
diff --git a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c
index 61ccddccf485..429393caf612 100644
--- a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c
+++ b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c
@@ -8,14 +8,6 @@
#include "network_helpers.h"
#include "fib_lookup.skel.h"
-#define SYS(fmt, ...) \
- ({ \
- char cmd[1024]; \
- snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
- if (!ASSERT_OK(system(cmd), cmd)) \
- goto fail; \
- })
-
#define NS_TEST "fib_lookup_ns"
#define IPV6_IFACE_ADDR "face::face"
#define IPV6_NUD_FAILED_ADDR "face::1"
@@ -59,16 +51,16 @@ static int setup_netns(void)
{
int err;
- SYS("ip link add veth1 type veth peer name veth2");
- SYS("ip link set dev veth1 up");
+ SYS(fail, "ip link add veth1 type veth peer name veth2");
+ SYS(fail, "ip link set dev veth1 up");
- SYS("ip addr add %s/64 dev veth1 nodad", IPV6_IFACE_ADDR);
- SYS("ip neigh add %s dev veth1 nud failed", IPV6_NUD_FAILED_ADDR);
- SYS("ip neigh add %s dev veth1 lladdr %s nud stale", IPV6_NUD_STALE_ADDR, DMAC);
+ SYS(fail, "ip addr add %s/64 dev veth1 nodad", IPV6_IFACE_ADDR);
+ SYS(fail, "ip neigh add %s dev veth1 nud failed", IPV6_NUD_FAILED_ADDR);
+ SYS(fail, "ip neigh add %s dev veth1 lladdr %s nud stale", IPV6_NUD_STALE_ADDR, DMAC);
- SYS("ip addr add %s/24 dev veth1 nodad", IPV4_IFACE_ADDR);
- SYS("ip neigh add %s dev veth1 nud failed", IPV4_NUD_FAILED_ADDR);
- SYS("ip neigh add %s dev veth1 lladdr %s nud stale", IPV4_NUD_STALE_ADDR, DMAC);
+ SYS(fail, "ip addr add %s/24 dev veth1 nodad", IPV4_IFACE_ADDR);
+ SYS(fail, "ip neigh add %s dev veth1 nud failed", IPV4_NUD_FAILED_ADDR);
+ SYS(fail, "ip neigh add %s dev veth1 lladdr %s nud stale", IPV4_NUD_STALE_ADDR, DMAC);
err = write_sysctl("/proc/sys/net/ipv4/conf/veth1/forwarding", "1");
if (!ASSERT_OK(err, "write_sysctl(net.ipv4.conf.veth1.forwarding)"))
@@ -140,7 +132,7 @@ void test_fib_lookup(void)
return;
prog_fd = bpf_program__fd(skel->progs.fib_lookup);
- SYS("ip netns add %s", NS_TEST);
+ SYS(fail, "ip netns add %s", NS_TEST);
nstoken = open_netns(NS_TEST);
if (!ASSERT_OK_PTR(nstoken, "open_netns"))
@@ -182,6 +174,6 @@ void test_fib_lookup(void)
fail:
if (nstoken)
close_netns(nstoken);
- system("ip netns del " NS_TEST " &> /dev/null");
+ SYS_NOFAIL("ip netns del " NS_TEST " &> /dev/null");
fib_lookup__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
index 7acca37a3d2b..c4773173a4e4 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -346,6 +346,30 @@ struct test tests[] = {
.retval = BPF_OK,
},
{
+ .name = "ipv6-empty-flow-label",
+ .pkt.ipv6 = {
+ .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6),
+ .iph.nexthdr = IPPROTO_TCP,
+ .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
+ .iph.flow_lbl = { 0x00, 0x00, 0x00 },
+ .tcp.doff = 5,
+ .tcp.source = 80,
+ .tcp.dest = 8080,
+ },
+ .keys = {
+ .flags = BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL,
+ .nhoff = ETH_HLEN,
+ .thoff = ETH_HLEN + sizeof(struct ipv6hdr),
+ .addr_proto = ETH_P_IPV6,
+ .ip_proto = IPPROTO_TCP,
+ .n_proto = __bpf_constant_htons(ETH_P_IPV6),
+ .sport = 80,
+ .dport = 8080,
+ },
+ .flags = BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL,
+ .retval = BPF_OK,
+ },
+ {
.name = "ipip-encap",
.pkt.ipip = {
.eth.h_proto = __bpf_constant_htons(ETH_P_IP),
diff --git a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
index 9c1a18573ffd..1eab286b14fe 100644
--- a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
+++ b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
@@ -93,4 +93,6 @@ void test_l4lb_all(void)
test_l4lb("test_l4lb.bpf.o");
if (test__start_subtest("l4lb_noinline"))
test_l4lb("test_l4lb_noinline.bpf.o");
+ if (test__start_subtest("l4lb_noinline_dynptr"))
+ test_l4lb("test_l4lb_noinline_dynptr.bpf.o");
}
diff --git a/tools/testing/selftests/bpf/prog_tests/log_fixup.c b/tools/testing/selftests/bpf/prog_tests/log_fixup.c
index f4ffdcabf4e4..239e1c5753b0 100644
--- a/tools/testing/selftests/bpf/prog_tests/log_fixup.c
+++ b/tools/testing/selftests/bpf/prog_tests/log_fixup.c
@@ -141,7 +141,7 @@ void test_log_fixup(void)
if (test__start_subtest("bad_core_relo_trunc_partial"))
bad_core_relo(300, TRUNC_PARTIAL /* truncate original log a bit */);
if (test__start_subtest("bad_core_relo_trunc_full"))
- bad_core_relo(250, TRUNC_FULL /* truncate also libbpf's message patch */);
+ bad_core_relo(210, TRUNC_FULL /* truncate also libbpf's message patch */);
if (test__start_subtest("bad_core_relo_subprog"))
bad_core_relo_subprog();
if (test__start_subtest("missing_map"))
diff --git a/tools/testing/selftests/bpf/prog_tests/map_kptr.c b/tools/testing/selftests/bpf/prog_tests/map_kptr.c
index 3533a4ecad01..8743df599567 100644
--- a/tools/testing/selftests/bpf/prog_tests/map_kptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/map_kptr.c
@@ -4,70 +4,160 @@
#include "map_kptr.skel.h"
#include "map_kptr_fail.skel.h"
+#include "rcu_tasks_trace_gp.skel.h"
static void test_map_kptr_success(bool test_run)
{
+ LIBBPF_OPTS(bpf_test_run_opts, lopts);
LIBBPF_OPTS(bpf_test_run_opts, opts,
.data_in = &pkt_v4,
.data_size_in = sizeof(pkt_v4),
.repeat = 1,
);
+ int key = 0, ret, cpu;
struct map_kptr *skel;
- int key = 0, ret;
- char buf[16];
+ char buf[16], *pbuf;
skel = map_kptr__open_and_load();
if (!ASSERT_OK_PTR(skel, "map_kptr__open_and_load"))
return;
- ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref), &opts);
- ASSERT_OK(ret, "test_map_kptr_ref refcount");
- ASSERT_OK(opts.retval, "test_map_kptr_ref retval");
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref1), &opts);
+ ASSERT_OK(ret, "test_map_kptr_ref1 refcount");
+ ASSERT_OK(opts.retval, "test_map_kptr_ref1 retval");
ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref2), &opts);
ASSERT_OK(ret, "test_map_kptr_ref2 refcount");
ASSERT_OK(opts.retval, "test_map_kptr_ref2 retval");
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_ls_map_kptr_ref1), &lopts);
+ ASSERT_OK(ret, "test_ls_map_kptr_ref1 refcount");
+ ASSERT_OK(lopts.retval, "test_ls_map_kptr_ref1 retval");
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_ls_map_kptr_ref2), &lopts);
+ ASSERT_OK(ret, "test_ls_map_kptr_ref2 refcount");
+ ASSERT_OK(lopts.retval, "test_ls_map_kptr_ref2 retval");
+
if (test_run)
goto exit;
+ cpu = libbpf_num_possible_cpus();
+ if (!ASSERT_GT(cpu, 0, "libbpf_num_possible_cpus"))
+ goto exit;
+
+ pbuf = calloc(cpu, sizeof(buf));
+ if (!ASSERT_OK_PTR(pbuf, "calloc(pbuf)"))
+ goto exit;
+
ret = bpf_map__update_elem(skel->maps.array_map,
&key, sizeof(key), buf, sizeof(buf), 0);
ASSERT_OK(ret, "array_map update");
- ret = bpf_map__update_elem(skel->maps.array_map,
- &key, sizeof(key), buf, sizeof(buf), 0);
- ASSERT_OK(ret, "array_map update2");
+ skel->data->ref--;
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts);
+ ASSERT_OK(ret, "test_map_kptr_ref3 refcount");
+ ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval");
+
+ ret = bpf_map__update_elem(skel->maps.pcpu_array_map,
+ &key, sizeof(key), pbuf, cpu * sizeof(buf), 0);
+ ASSERT_OK(ret, "pcpu_array_map update");
+ skel->data->ref--;
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts);
+ ASSERT_OK(ret, "test_map_kptr_ref3 refcount");
+ ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval");
- ret = bpf_map__update_elem(skel->maps.hash_map,
- &key, sizeof(key), buf, sizeof(buf), 0);
- ASSERT_OK(ret, "hash_map update");
ret = bpf_map__delete_elem(skel->maps.hash_map, &key, sizeof(key), 0);
ASSERT_OK(ret, "hash_map delete");
+ skel->data->ref--;
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts);
+ ASSERT_OK(ret, "test_map_kptr_ref3 refcount");
+ ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval");
+
+ ret = bpf_map__delete_elem(skel->maps.pcpu_hash_map, &key, sizeof(key), 0);
+ ASSERT_OK(ret, "pcpu_hash_map delete");
+ skel->data->ref--;
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts);
+ ASSERT_OK(ret, "test_map_kptr_ref3 refcount");
+ ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval");
- ret = bpf_map__update_elem(skel->maps.hash_malloc_map,
- &key, sizeof(key), buf, sizeof(buf), 0);
- ASSERT_OK(ret, "hash_malloc_map update");
ret = bpf_map__delete_elem(skel->maps.hash_malloc_map, &key, sizeof(key), 0);
ASSERT_OK(ret, "hash_malloc_map delete");
+ skel->data->ref--;
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts);
+ ASSERT_OK(ret, "test_map_kptr_ref3 refcount");
+ ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval");
+
+ ret = bpf_map__delete_elem(skel->maps.pcpu_hash_malloc_map, &key, sizeof(key), 0);
+ ASSERT_OK(ret, "pcpu_hash_malloc_map delete");
+ skel->data->ref--;
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts);
+ ASSERT_OK(ret, "test_map_kptr_ref3 refcount");
+ ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval");
- ret = bpf_map__update_elem(skel->maps.lru_hash_map,
- &key, sizeof(key), buf, sizeof(buf), 0);
- ASSERT_OK(ret, "lru_hash_map update");
ret = bpf_map__delete_elem(skel->maps.lru_hash_map, &key, sizeof(key), 0);
ASSERT_OK(ret, "lru_hash_map delete");
+ skel->data->ref--;
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts);
+ ASSERT_OK(ret, "test_map_kptr_ref3 refcount");
+ ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval");
+
+ ret = bpf_map__delete_elem(skel->maps.lru_pcpu_hash_map, &key, sizeof(key), 0);
+ ASSERT_OK(ret, "lru_pcpu_hash_map delete");
+ skel->data->ref--;
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts);
+ ASSERT_OK(ret, "test_map_kptr_ref3 refcount");
+ ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval");
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_ls_map_kptr_ref_del), &lopts);
+ ASSERT_OK(ret, "test_ls_map_kptr_ref_del delete");
+ skel->data->ref--;
+ ASSERT_OK(lopts.retval, "test_ls_map_kptr_ref_del retval");
+
+ free(pbuf);
exit:
map_kptr__destroy(skel);
}
-void test_map_kptr(void)
+static int kern_sync_rcu_tasks_trace(struct rcu_tasks_trace_gp *rcu)
{
- if (test__start_subtest("success")) {
+ long gp_seq = READ_ONCE(rcu->bss->gp_seq);
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+
+ if (!ASSERT_OK(bpf_prog_test_run_opts(bpf_program__fd(rcu->progs.do_call_rcu_tasks_trace),
+ &opts), "do_call_rcu_tasks_trace"))
+ return -EFAULT;
+ if (!ASSERT_OK(opts.retval, "opts.retval == 0"))
+ return -EFAULT;
+ while (gp_seq == READ_ONCE(rcu->bss->gp_seq))
+ sched_yield();
+ return 0;
+}
+
+void serial_test_map_kptr(void)
+{
+ struct rcu_tasks_trace_gp *skel;
+
+ RUN_TESTS(map_kptr_fail);
+
+ skel = rcu_tasks_trace_gp__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "rcu_tasks_trace_gp__open_and_load"))
+ return;
+ if (!ASSERT_OK(rcu_tasks_trace_gp__attach(skel), "rcu_tasks_trace_gp__attach"))
+ goto end;
+
+ if (test__start_subtest("success-map")) {
+ test_map_kptr_success(true);
+
+ ASSERT_OK(kern_sync_rcu_tasks_trace(skel), "sync rcu_tasks_trace");
+ ASSERT_OK(kern_sync_rcu(), "sync rcu");
+ /* Observe refcount dropping to 1 on bpf_map_free_deferred */
test_map_kptr_success(false);
- /* Do test_run twice, so that we see refcount going back to 1
- * after we leave it in map from first iteration.
- */
+
+ ASSERT_OK(kern_sync_rcu_tasks_trace(skel), "sync rcu_tasks_trace");
+ ASSERT_OK(kern_sync_rcu(), "sync rcu");
+ /* Observe refcount dropping to 1 on synchronous delete elem */
test_map_kptr_success(true);
}
- RUN_TESTS(map_kptr_fail);
+end:
+ rcu_tasks_trace_gp__destroy(skel);
+ return;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c
index 59f08d6d1d53..cd0c42fff7c0 100644
--- a/tools/testing/selftests/bpf/prog_tests/mptcp.c
+++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
@@ -7,6 +7,8 @@
#include "network_helpers.h"
#include "mptcp_sock.skel.h"
+#define NS_TEST "mptcp_ns"
+
#ifndef TCP_CA_NAME_MAX
#define TCP_CA_NAME_MAX 16
#endif
@@ -138,12 +140,20 @@ out:
static void test_base(void)
{
+ struct nstoken *nstoken = NULL;
int server_fd, cgroup_fd;
cgroup_fd = test__join_cgroup("/mptcp");
if (!ASSERT_GE(cgroup_fd, 0, "test__join_cgroup"))
return;
+ SYS(fail, "ip netns add %s", NS_TEST);
+ SYS(fail, "ip -net %s link set dev lo up", NS_TEST);
+
+ nstoken = open_netns(NS_TEST);
+ if (!ASSERT_OK_PTR(nstoken, "open_netns"))
+ goto fail;
+
/* without MPTCP */
server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0);
if (!ASSERT_GE(server_fd, 0, "start_server"))
@@ -157,13 +167,18 @@ with_mptcp:
/* with MPTCP */
server_fd = start_mptcp_server(AF_INET, NULL, 0, 0);
if (!ASSERT_GE(server_fd, 0, "start_mptcp_server"))
- goto close_cgroup_fd;
+ goto fail;
ASSERT_OK(run_test(cgroup_fd, server_fd, true), "run_test mptcp");
close(server_fd);
-close_cgroup_fd:
+fail:
+ if (nstoken)
+ close_netns(nstoken);
+
+ SYS_NOFAIL("ip netns del " NS_TEST " &> /dev/null");
+
close(cgroup_fd);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/parse_tcp_hdr_opt.c b/tools/testing/selftests/bpf/prog_tests/parse_tcp_hdr_opt.c
new file mode 100644
index 000000000000..daa952711d8f
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/parse_tcp_hdr_opt.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "test_parse_tcp_hdr_opt.skel.h"
+#include "test_parse_tcp_hdr_opt_dynptr.skel.h"
+#include "test_tcp_hdr_options.h"
+
+struct test_pkt {
+ struct ipv6_packet pk6_v6;
+ u8 options[16];
+} __packed;
+
+struct test_pkt pkt = {
+ .pk6_v6.eth.h_proto = __bpf_constant_htons(ETH_P_IPV6),
+ .pk6_v6.iph.nexthdr = IPPROTO_TCP,
+ .pk6_v6.iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
+ .pk6_v6.tcp.urg_ptr = 123,
+ .pk6_v6.tcp.doff = 9, /* 16 bytes of options */
+
+ .options = {
+ TCPOPT_MSS, 4, 0x05, 0xB4, TCPOPT_NOP, TCPOPT_NOP,
+ 0, 6, 0xBB, 0xBB, 0xBB, 0xBB, TCPOPT_EOL
+ },
+};
+
+static void test_parse_opt(void)
+{
+ struct test_parse_tcp_hdr_opt *skel;
+ struct bpf_program *prog;
+ char buf[128];
+ int err;
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt,
+ .data_size_in = sizeof(pkt),
+ .data_out = buf,
+ .data_size_out = sizeof(buf),
+ .repeat = 3,
+ );
+
+ skel = test_parse_tcp_hdr_opt__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ pkt.options[6] = skel->rodata->tcp_hdr_opt_kind_tpr;
+ prog = skel->progs.xdp_ingress_v6;
+
+ err = bpf_prog_test_run_opts(bpf_program__fd(prog), &topts);
+ ASSERT_OK(err, "ipv6 test_run");
+ ASSERT_EQ(topts.retval, XDP_PASS, "ipv6 test_run retval");
+ ASSERT_EQ(skel->bss->server_id, 0xBBBBBBBB, "server id");
+
+ test_parse_tcp_hdr_opt__destroy(skel);
+}
+
+static void test_parse_opt_dynptr(void)
+{
+ struct test_parse_tcp_hdr_opt_dynptr *skel;
+ struct bpf_program *prog;
+ char buf[128];
+ int err;
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt,
+ .data_size_in = sizeof(pkt),
+ .data_out = buf,
+ .data_size_out = sizeof(buf),
+ .repeat = 3,
+ );
+
+ skel = test_parse_tcp_hdr_opt_dynptr__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ pkt.options[6] = skel->rodata->tcp_hdr_opt_kind_tpr;
+ prog = skel->progs.xdp_ingress_v6;
+
+ err = bpf_prog_test_run_opts(bpf_program__fd(prog), &topts);
+ ASSERT_OK(err, "ipv6 test_run");
+ ASSERT_EQ(topts.retval, XDP_PASS, "ipv6 test_run retval");
+ ASSERT_EQ(skel->bss->server_id, 0xBBBBBBBB, "server id");
+
+ test_parse_tcp_hdr_opt_dynptr__destroy(skel);
+}
+
+void test_parse_tcp_hdr_opt(void)
+{
+ if (test__start_subtest("parse_tcp_hdr_opt"))
+ test_parse_opt();
+ if (test__start_subtest("parse_tcp_hdr_opt_dynptr"))
+ test_parse_opt_dynptr();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c
index 447d8560ecb6..3f1f58d3a729 100644
--- a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c
@@ -25,10 +25,10 @@ static void test_success(void)
bpf_program__set_autoload(skel->progs.get_cgroup_id, true);
bpf_program__set_autoload(skel->progs.task_succ, true);
- bpf_program__set_autoload(skel->progs.no_lock, true);
bpf_program__set_autoload(skel->progs.two_regions, true);
bpf_program__set_autoload(skel->progs.non_sleepable_1, true);
bpf_program__set_autoload(skel->progs.non_sleepable_2, true);
+ bpf_program__set_autoload(skel->progs.task_trusted_non_rcuptr, true);
err = rcu_read_lock__load(skel);
if (!ASSERT_OK(err, "skel_load"))
goto out;
@@ -69,6 +69,7 @@ out:
static const char * const inproper_region_tests[] = {
"miss_lock",
+ "no_lock",
"miss_unlock",
"non_sleepable_rcu_mismatch",
"inproper_sleepable_helper",
@@ -99,7 +100,6 @@ out:
}
static const char * const rcuptr_misuse_tests[] = {
- "task_untrusted_non_rcuptr",
"task_untrusted_rcuptr",
"cross_rcu_region",
};
@@ -128,17 +128,8 @@ out:
void test_rcu_read_lock(void)
{
- struct btf *vmlinux_btf;
int cgroup_fd;
- vmlinux_btf = btf__load_vmlinux_btf();
- if (!ASSERT_OK_PTR(vmlinux_btf, "could not load vmlinux BTF"))
- return;
- if (btf__find_by_name_kind(vmlinux_btf, "rcu", BTF_KIND_TYPE_TAG) < 0) {
- test__skip();
- goto out;
- }
-
cgroup_fd = test__join_cgroup("/rcu_read_lock");
if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup /rcu_read_lock"))
goto out;
@@ -153,6 +144,5 @@ void test_rcu_read_lock(void)
if (test__start_subtest("negative_tests_rcuptr_misuse"))
test_rcuptr_misuse();
close(cgroup_fd);
-out:
- btf__free(vmlinux_btf);
+out:;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index bca5e6839ac4..6ee22c3b251a 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -137,24 +137,16 @@ static int get_ifaddr(const char *name, char *ifaddr)
return 0;
}
-#define SYS(fmt, ...) \
- ({ \
- char cmd[1024]; \
- snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
- if (!ASSERT_OK(system(cmd), cmd)) \
- goto fail; \
- })
-
static int netns_setup_links_and_routes(struct netns_setup_result *result)
{
struct nstoken *nstoken = NULL;
char veth_src_fwd_addr[IFADDR_STR_LEN+1] = {};
- SYS("ip link add veth_src type veth peer name veth_src_fwd");
- SYS("ip link add veth_dst type veth peer name veth_dst_fwd");
+ SYS(fail, "ip link add veth_src type veth peer name veth_src_fwd");
+ SYS(fail, "ip link add veth_dst type veth peer name veth_dst_fwd");
- SYS("ip link set veth_dst_fwd address " MAC_DST_FWD);
- SYS("ip link set veth_dst address " MAC_DST);
+ SYS(fail, "ip link set veth_dst_fwd address " MAC_DST_FWD);
+ SYS(fail, "ip link set veth_dst address " MAC_DST);
if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr))
goto fail;
@@ -175,27 +167,27 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
if (!ASSERT_GT(result->ifindex_veth_dst_fwd, 0, "ifindex_veth_dst_fwd"))
goto fail;
- SYS("ip link set veth_src netns " NS_SRC);
- SYS("ip link set veth_src_fwd netns " NS_FWD);
- SYS("ip link set veth_dst_fwd netns " NS_FWD);
- SYS("ip link set veth_dst netns " NS_DST);
+ SYS(fail, "ip link set veth_src netns " NS_SRC);
+ SYS(fail, "ip link set veth_src_fwd netns " NS_FWD);
+ SYS(fail, "ip link set veth_dst_fwd netns " NS_FWD);
+ SYS(fail, "ip link set veth_dst netns " NS_DST);
/** setup in 'src' namespace */
nstoken = open_netns(NS_SRC);
if (!ASSERT_OK_PTR(nstoken, "setns src"))
goto fail;
- SYS("ip addr add " IP4_SRC "/32 dev veth_src");
- SYS("ip addr add " IP6_SRC "/128 dev veth_src nodad");
- SYS("ip link set dev veth_src up");
+ SYS(fail, "ip addr add " IP4_SRC "/32 dev veth_src");
+ SYS(fail, "ip addr add " IP6_SRC "/128 dev veth_src nodad");
+ SYS(fail, "ip link set dev veth_src up");
- SYS("ip route add " IP4_DST "/32 dev veth_src scope global");
- SYS("ip route add " IP4_NET "/16 dev veth_src scope global");
- SYS("ip route add " IP6_DST "/128 dev veth_src scope global");
+ SYS(fail, "ip route add " IP4_DST "/32 dev veth_src scope global");
+ SYS(fail, "ip route add " IP4_NET "/16 dev veth_src scope global");
+ SYS(fail, "ip route add " IP6_DST "/128 dev veth_src scope global");
- SYS("ip neigh add " IP4_DST " dev veth_src lladdr %s",
+ SYS(fail, "ip neigh add " IP4_DST " dev veth_src lladdr %s",
veth_src_fwd_addr);
- SYS("ip neigh add " IP6_DST " dev veth_src lladdr %s",
+ SYS(fail, "ip neigh add " IP6_DST " dev veth_src lladdr %s",
veth_src_fwd_addr);
close_netns(nstoken);
@@ -209,15 +201,15 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
* needs v4 one in order to start ARP probing. IP4_NET route is added
* to the endpoints so that the ARP processing will reply.
*/
- SYS("ip addr add " IP4_SLL "/32 dev veth_src_fwd");
- SYS("ip addr add " IP4_DLL "/32 dev veth_dst_fwd");
- SYS("ip link set dev veth_src_fwd up");
- SYS("ip link set dev veth_dst_fwd up");
+ SYS(fail, "ip addr add " IP4_SLL "/32 dev veth_src_fwd");
+ SYS(fail, "ip addr add " IP4_DLL "/32 dev veth_dst_fwd");
+ SYS(fail, "ip link set dev veth_src_fwd up");
+ SYS(fail, "ip link set dev veth_dst_fwd up");
- SYS("ip route add " IP4_SRC "/32 dev veth_src_fwd scope global");
- SYS("ip route add " IP6_SRC "/128 dev veth_src_fwd scope global");
- SYS("ip route add " IP4_DST "/32 dev veth_dst_fwd scope global");
- SYS("ip route add " IP6_DST "/128 dev veth_dst_fwd scope global");
+ SYS(fail, "ip route add " IP4_SRC "/32 dev veth_src_fwd scope global");
+ SYS(fail, "ip route add " IP6_SRC "/128 dev veth_src_fwd scope global");
+ SYS(fail, "ip route add " IP4_DST "/32 dev veth_dst_fwd scope global");
+ SYS(fail, "ip route add " IP6_DST "/128 dev veth_dst_fwd scope global");
close_netns(nstoken);
@@ -226,16 +218,16 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
if (!ASSERT_OK_PTR(nstoken, "setns dst"))
goto fail;
- SYS("ip addr add " IP4_DST "/32 dev veth_dst");
- SYS("ip addr add " IP6_DST "/128 dev veth_dst nodad");
- SYS("ip link set dev veth_dst up");
+ SYS(fail, "ip addr add " IP4_DST "/32 dev veth_dst");
+ SYS(fail, "ip addr add " IP6_DST "/128 dev veth_dst nodad");
+ SYS(fail, "ip link set dev veth_dst up");
- SYS("ip route add " IP4_SRC "/32 dev veth_dst scope global");
- SYS("ip route add " IP4_NET "/16 dev veth_dst scope global");
- SYS("ip route add " IP6_SRC "/128 dev veth_dst scope global");
+ SYS(fail, "ip route add " IP4_SRC "/32 dev veth_dst scope global");
+ SYS(fail, "ip route add " IP4_NET "/16 dev veth_dst scope global");
+ SYS(fail, "ip route add " IP6_SRC "/128 dev veth_dst scope global");
- SYS("ip neigh add " IP4_SRC " dev veth_dst lladdr " MAC_DST_FWD);
- SYS("ip neigh add " IP6_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+ SYS(fail, "ip neigh add " IP4_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+ SYS(fail, "ip neigh add " IP6_SRC " dev veth_dst lladdr " MAC_DST_FWD);
close_netns(nstoken);
@@ -375,7 +367,7 @@ done:
static int test_ping(int family, const char *addr)
{
- SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping_command(family), addr);
+ SYS(fail, "ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping_command(family), addr);
return 0;
fail:
return -1;
@@ -953,7 +945,7 @@ static int tun_open(char *name)
if (!ASSERT_OK(err, "ioctl TUNSETIFF"))
goto fail;
- SYS("ip link set dev %s up", name);
+ SYS(fail, "ip link set dev %s up", name);
return fd;
fail:
@@ -1076,23 +1068,23 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, skel->progs.tc_chk, 0);
/* Setup route and neigh tables */
- SYS("ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24");
- SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP4_TUN_FWD "/24");
+ SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24");
+ SYS(fail, "ip -netns " NS_FWD " addr add dev tun_fwd " IP4_TUN_FWD "/24");
- SYS("ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad");
- SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad");
+ SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad");
+ SYS(fail, "ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad");
- SYS("ip -netns " NS_SRC " route del " IP4_DST "/32 dev veth_src scope global");
- SYS("ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD
+ SYS(fail, "ip -netns " NS_SRC " route del " IP4_DST "/32 dev veth_src scope global");
+ SYS(fail, "ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD
" dev tun_src scope global");
- SYS("ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev veth_dst scope global");
- SYS("ip -netns " NS_SRC " route del " IP6_DST "/128 dev veth_src scope global");
- SYS("ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD
+ SYS(fail, "ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev veth_dst scope global");
+ SYS(fail, "ip -netns " NS_SRC " route del " IP6_DST "/128 dev veth_src scope global");
+ SYS(fail, "ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD
" dev tun_src scope global");
- SYS("ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev veth_dst scope global");
+ SYS(fail, "ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev veth_dst scope global");
- SYS("ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
- SYS("ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+ SYS(fail, "ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+ SYS(fail, "ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
goto fail;
diff --git a/tools/testing/selftests/bpf/prog_tests/test_ima.c b/tools/testing/selftests/bpf/prog_tests/test_ima.c
index b13feceb38f1..810b14981c2e 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_ima.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_ima.c
@@ -70,7 +70,7 @@ void test_test_ima(void)
u64 bin_true_sample;
char cmd[256];
- int err, duration = 0;
+ int err, duration = 0, fresh_digest_idx = 0;
struct ima *skel = NULL;
skel = ima__open_and_load();
@@ -129,7 +129,15 @@ void test_test_ima(void)
/*
* Test #3
* - Goal: confirm that bpf_ima_inode_hash() returns a non-fresh digest
- * - Expected result: 2 samples (/bin/true: non-fresh, fresh)
+ * - Expected result:
+ * 1 sample (/bin/true: fresh) if commit 62622dab0a28 applied
+ * 2 samples (/bin/true: non-fresh, fresh) if commit 62622dab0a28 is
+ * not applied
+ *
+ * If commit 62622dab0a28 ("ima: return IMA digest value only when
+ * IMA_COLLECTED flag is set") is applied, bpf_ima_inode_hash() refuses
+ * to give a non-fresh digest, hence the correct result is 1 instead of
+ * 2.
*/
test_init(skel->bss);
@@ -144,13 +152,18 @@ void test_test_ima(void)
goto close_clean;
err = ring_buffer__consume(ringbuf);
- ASSERT_EQ(err, 2, "num_samples_or_err");
- ASSERT_NEQ(ima_hash_from_bpf[0], 0, "ima_hash");
- ASSERT_NEQ(ima_hash_from_bpf[1], 0, "ima_hash");
- ASSERT_EQ(ima_hash_from_bpf[0], bin_true_sample, "sample_equal_or_err");
+ ASSERT_GE(err, 1, "num_samples_or_err");
+ if (err == 2) {
+ ASSERT_NEQ(ima_hash_from_bpf[0], 0, "ima_hash");
+ ASSERT_EQ(ima_hash_from_bpf[0], bin_true_sample,
+ "sample_equal_or_err");
+ fresh_digest_idx = 1;
+ }
+
+ ASSERT_NEQ(ima_hash_from_bpf[fresh_digest_idx], 0, "ima_hash");
/* IMA refreshed the digest. */
- ASSERT_NEQ(ima_hash_from_bpf[1], bin_true_sample,
- "sample_different_or_err");
+ ASSERT_NEQ(ima_hash_from_bpf[fresh_digest_idx], bin_true_sample,
+ "sample_equal_or_err");
/*
* Test #4
diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
index 07ad457f3370..47f1d482fe39 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
@@ -91,30 +91,15 @@
#define PING_ARGS "-i 0.01 -c 3 -w 10 -q"
-#define SYS(fmt, ...) \
- ({ \
- char cmd[1024]; \
- snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
- if (!ASSERT_OK(system(cmd), cmd)) \
- goto fail; \
- })
-
-#define SYS_NOFAIL(fmt, ...) \
- ({ \
- char cmd[1024]; \
- snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
- system(cmd); \
- })
-
static int config_device(void)
{
- SYS("ip netns add at_ns0");
- SYS("ip link add veth0 address " MAC_VETH1 " type veth peer name veth1");
- SYS("ip link set veth0 netns at_ns0");
- SYS("ip addr add " IP4_ADDR1_VETH1 "/24 dev veth1");
- SYS("ip link set dev veth1 up mtu 1500");
- SYS("ip netns exec at_ns0 ip addr add " IP4_ADDR_VETH0 "/24 dev veth0");
- SYS("ip netns exec at_ns0 ip link set dev veth0 up mtu 1500");
+ SYS(fail, "ip netns add at_ns0");
+ SYS(fail, "ip link add veth0 address " MAC_VETH1 " type veth peer name veth1");
+ SYS(fail, "ip link set veth0 netns at_ns0");
+ SYS(fail, "ip addr add " IP4_ADDR1_VETH1 "/24 dev veth1");
+ SYS(fail, "ip link set dev veth1 up mtu 1500");
+ SYS(fail, "ip netns exec at_ns0 ip addr add " IP4_ADDR_VETH0 "/24 dev veth0");
+ SYS(fail, "ip netns exec at_ns0 ip link set dev veth0 up mtu 1500");
return 0;
fail:
@@ -132,23 +117,23 @@ static void cleanup(void)
static int add_vxlan_tunnel(void)
{
/* at_ns0 namespace */
- SYS("ip netns exec at_ns0 ip link add dev %s type vxlan external gbp dstport 4789",
+ SYS(fail, "ip netns exec at_ns0 ip link add dev %s type vxlan external gbp dstport 4789",
VXLAN_TUNL_DEV0);
- SYS("ip netns exec at_ns0 ip link set dev %s address %s up",
+ SYS(fail, "ip netns exec at_ns0 ip link set dev %s address %s up",
VXLAN_TUNL_DEV0, MAC_TUNL_DEV0);
- SYS("ip netns exec at_ns0 ip addr add dev %s %s/24",
+ SYS(fail, "ip netns exec at_ns0 ip addr add dev %s %s/24",
VXLAN_TUNL_DEV0, IP4_ADDR_TUNL_DEV0);
- SYS("ip netns exec at_ns0 ip neigh add %s lladdr %s dev %s",
+ SYS(fail, "ip netns exec at_ns0 ip neigh add %s lladdr %s dev %s",
IP4_ADDR_TUNL_DEV1, MAC_TUNL_DEV1, VXLAN_TUNL_DEV0);
- SYS("ip netns exec at_ns0 ip neigh add %s lladdr %s dev veth0",
+ SYS(fail, "ip netns exec at_ns0 ip neigh add %s lladdr %s dev veth0",
IP4_ADDR2_VETH1, MAC_VETH1);
/* root namespace */
- SYS("ip link add dev %s type vxlan external gbp dstport 4789",
+ SYS(fail, "ip link add dev %s type vxlan external gbp dstport 4789",
VXLAN_TUNL_DEV1);
- SYS("ip link set dev %s address %s up", VXLAN_TUNL_DEV1, MAC_TUNL_DEV1);
- SYS("ip addr add dev %s %s/24", VXLAN_TUNL_DEV1, IP4_ADDR_TUNL_DEV1);
- SYS("ip neigh add %s lladdr %s dev %s",
+ SYS(fail, "ip link set dev %s address %s up", VXLAN_TUNL_DEV1, MAC_TUNL_DEV1);
+ SYS(fail, "ip addr add dev %s %s/24", VXLAN_TUNL_DEV1, IP4_ADDR_TUNL_DEV1);
+ SYS(fail, "ip neigh add %s lladdr %s dev %s",
IP4_ADDR_TUNL_DEV0, MAC_TUNL_DEV0, VXLAN_TUNL_DEV1);
return 0;
@@ -165,26 +150,26 @@ static void delete_vxlan_tunnel(void)
static int add_ip6vxlan_tunnel(void)
{
- SYS("ip netns exec at_ns0 ip -6 addr add %s/96 dev veth0",
+ SYS(fail, "ip netns exec at_ns0 ip -6 addr add %s/96 dev veth0",
IP6_ADDR_VETH0);
- SYS("ip netns exec at_ns0 ip link set dev veth0 up");
- SYS("ip -6 addr add %s/96 dev veth1", IP6_ADDR1_VETH1);
- SYS("ip -6 addr add %s/96 dev veth1", IP6_ADDR2_VETH1);
- SYS("ip link set dev veth1 up");
+ SYS(fail, "ip netns exec at_ns0 ip link set dev veth0 up");
+ SYS(fail, "ip -6 addr add %s/96 dev veth1", IP6_ADDR1_VETH1);
+ SYS(fail, "ip -6 addr add %s/96 dev veth1", IP6_ADDR2_VETH1);
+ SYS(fail, "ip link set dev veth1 up");
/* at_ns0 namespace */
- SYS("ip netns exec at_ns0 ip link add dev %s type vxlan external dstport 4789",
+ SYS(fail, "ip netns exec at_ns0 ip link add dev %s type vxlan external dstport 4789",
IP6VXLAN_TUNL_DEV0);
- SYS("ip netns exec at_ns0 ip addr add dev %s %s/24",
+ SYS(fail, "ip netns exec at_ns0 ip addr add dev %s %s/24",
IP6VXLAN_TUNL_DEV0, IP4_ADDR_TUNL_DEV0);
- SYS("ip netns exec at_ns0 ip link set dev %s address %s up",
+ SYS(fail, "ip netns exec at_ns0 ip link set dev %s address %s up",
IP6VXLAN_TUNL_DEV0, MAC_TUNL_DEV0);
/* root namespace */
- SYS("ip link add dev %s type vxlan external dstport 4789",
+ SYS(fail, "ip link add dev %s type vxlan external dstport 4789",
IP6VXLAN_TUNL_DEV1);
- SYS("ip addr add dev %s %s/24", IP6VXLAN_TUNL_DEV1, IP4_ADDR_TUNL_DEV1);
- SYS("ip link set dev %s address %s up",
+ SYS(fail, "ip addr add dev %s %s/24", IP6VXLAN_TUNL_DEV1, IP4_ADDR_TUNL_DEV1);
+ SYS(fail, "ip link set dev %s address %s up",
IP6VXLAN_TUNL_DEV1, MAC_TUNL_DEV1);
return 0;
@@ -205,7 +190,7 @@ static void delete_ip6vxlan_tunnel(void)
static int test_ping(int family, const char *addr)
{
- SYS("%s %s %s > /dev/null", ping_command(family), PING_ARGS, addr);
+ SYS(fail, "%s %s %s > /dev/null", ping_command(family), PING_ARGS, addr);
return 0;
fail:
return -1;
diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c
index 7eb049214859..290c21dbe65a 100644
--- a/tools/testing/selftests/bpf/prog_tests/timer.c
+++ b/tools/testing/selftests/bpf/prog_tests/timer.c
@@ -29,6 +29,9 @@ static int timer(struct timer *timer_skel)
/* check that timer_cb2() was executed twice */
ASSERT_EQ(timer_skel->bss->bss_data, 10, "bss_data");
+ /* check that timer_cb3() was executed twice */
+ ASSERT_EQ(timer_skel->bss->abs_data, 12, "abs_data");
+
/* check that there were no errors in timer execution */
ASSERT_EQ(timer_skel->bss->err, 0, "err");
diff --git a/tools/testing/selftests/bpf/prog_tests/uninit_stack.c b/tools/testing/selftests/bpf/prog_tests/uninit_stack.c
new file mode 100644
index 000000000000..e64c71948491
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/uninit_stack.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "uninit_stack.skel.h"
+
+void test_uninit_stack(void)
+{
+ RUN_TESTS(uninit_stack);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c
index 3a13e102c149..e51721df14fc 100644
--- a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c
+++ b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c
@@ -590,7 +590,7 @@ static void *kick_kernel_cb(void *arg)
/* Kick the kernel, causing it to drain the ring buffer and then wake
* up the test thread waiting on epoll.
*/
- syscall(__NR_getrlimit);
+ syscall(__NR_prlimit64);
return NULL;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
index d4cd9f873c14..fa3cac5488f5 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
@@ -4,11 +4,10 @@
#define IFINDEX_LO 1
#define XDP_FLAGS_REPLACE (1U << 4)
-void serial_test_xdp_attach(void)
+static void test_xdp_attach(const char *file)
{
__u32 duration = 0, id1, id2, id0 = 0, len;
struct bpf_object *obj1, *obj2, *obj3;
- const char *file = "./test_xdp.bpf.o";
struct bpf_prog_info info = {};
int err, fd1, fd2, fd3;
LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
@@ -85,3 +84,11 @@ out_2:
out_1:
bpf_object__close(obj1);
}
+
+void serial_test_xdp_attach(void)
+{
+ if (test__start_subtest("xdp_attach"))
+ test_xdp_attach("./test_xdp.bpf.o");
+ if (test__start_subtest("xdp_attach_dynptr"))
+ test_xdp_attach("./test_xdp_dynptr.bpf.o");
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
index 5e3a26b15ec6..d19f79048ff6 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
@@ -141,41 +141,33 @@ static const char * const xmit_policy_names[] = {
static int bonding_setup(struct skeletons *skeletons, int mode, int xmit_policy,
int bond_both_attach)
{
-#define SYS(fmt, ...) \
- ({ \
- char cmd[1024]; \
- snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
- if (!ASSERT_OK(system(cmd), cmd)) \
- return -1; \
- })
-
- SYS("ip netns add ns_dst");
- SYS("ip link add veth1_1 type veth peer name veth2_1 netns ns_dst");
- SYS("ip link add veth1_2 type veth peer name veth2_2 netns ns_dst");
-
- SYS("ip link add bond1 type bond mode %s xmit_hash_policy %s",
+ SYS(fail, "ip netns add ns_dst");
+ SYS(fail, "ip link add veth1_1 type veth peer name veth2_1 netns ns_dst");
+ SYS(fail, "ip link add veth1_2 type veth peer name veth2_2 netns ns_dst");
+
+ SYS(fail, "ip link add bond1 type bond mode %s xmit_hash_policy %s",
mode_names[mode], xmit_policy_names[xmit_policy]);
- SYS("ip link set bond1 up address " BOND1_MAC_STR " addrgenmode none");
- SYS("ip -netns ns_dst link add bond2 type bond mode %s xmit_hash_policy %s",
+ SYS(fail, "ip link set bond1 up address " BOND1_MAC_STR " addrgenmode none");
+ SYS(fail, "ip -netns ns_dst link add bond2 type bond mode %s xmit_hash_policy %s",
mode_names[mode], xmit_policy_names[xmit_policy]);
- SYS("ip -netns ns_dst link set bond2 up address " BOND2_MAC_STR " addrgenmode none");
+ SYS(fail, "ip -netns ns_dst link set bond2 up address " BOND2_MAC_STR " addrgenmode none");
- SYS("ip link set veth1_1 master bond1");
+ SYS(fail, "ip link set veth1_1 master bond1");
if (bond_both_attach == BOND_BOTH_AND_ATTACH) {
- SYS("ip link set veth1_2 master bond1");
+ SYS(fail, "ip link set veth1_2 master bond1");
} else {
- SYS("ip link set veth1_2 up addrgenmode none");
+ SYS(fail, "ip link set veth1_2 up addrgenmode none");
if (xdp_attach(skeletons, skeletons->xdp_dummy->progs.xdp_dummy_prog, "veth1_2"))
return -1;
}
- SYS("ip -netns ns_dst link set veth2_1 master bond2");
+ SYS(fail, "ip -netns ns_dst link set veth2_1 master bond2");
if (bond_both_attach == BOND_BOTH_AND_ATTACH)
- SYS("ip -netns ns_dst link set veth2_2 master bond2");
+ SYS(fail, "ip -netns ns_dst link set veth2_2 master bond2");
else
- SYS("ip -netns ns_dst link set veth2_2 up addrgenmode none");
+ SYS(fail, "ip -netns ns_dst link set veth2_2 up addrgenmode none");
/* Load a dummy program on sending side as with veth peer needs to have a
* XDP program loaded as well.
@@ -194,8 +186,8 @@ static int bonding_setup(struct skeletons *skeletons, int mode, int xmit_policy,
}
return 0;
-
-#undef SYS
+fail:
+ return -1;
}
static void bonding_cleanup(struct skeletons *skeletons)
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
index 7271a18ab3e2..662b6c6c5ed7 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
@@ -12,14 +12,6 @@
#include <uapi/linux/netdev.h>
#include "test_xdp_do_redirect.skel.h"
-#define SYS(fmt, ...) \
- ({ \
- char cmd[1024]; \
- snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
- if (!ASSERT_OK(system(cmd), cmd)) \
- goto out; \
- })
-
struct udp_packet {
struct ethhdr eth;
struct ipv6hdr iph;
@@ -127,19 +119,19 @@ void test_xdp_do_redirect(void)
* iface and NUM_PKTS-2 in the TC hook. We match the packets on the UDP
* payload.
*/
- SYS("ip netns add testns");
+ SYS(out, "ip netns add testns");
nstoken = open_netns("testns");
if (!ASSERT_OK_PTR(nstoken, "setns"))
goto out;
- SYS("ip link add veth_src type veth peer name veth_dst");
- SYS("ip link set dev veth_src address 00:11:22:33:44:55");
- SYS("ip link set dev veth_dst address 66:77:88:99:aa:bb");
- SYS("ip link set dev veth_src up");
- SYS("ip link set dev veth_dst up");
- SYS("ip addr add dev veth_src fc00::1/64");
- SYS("ip addr add dev veth_dst fc00::2/64");
- SYS("ip neigh add fc00::2 dev veth_src lladdr 66:77:88:99:aa:bb");
+ SYS(out, "ip link add veth_src type veth peer name veth_dst");
+ SYS(out, "ip link set dev veth_src address 00:11:22:33:44:55");
+ SYS(out, "ip link set dev veth_dst address 66:77:88:99:aa:bb");
+ SYS(out, "ip link set dev veth_src up");
+ SYS(out, "ip link set dev veth_dst up");
+ SYS(out, "ip addr add dev veth_src fc00::1/64");
+ SYS(out, "ip addr add dev veth_dst fc00::2/64");
+ SYS(out, "ip neigh add fc00::2 dev veth_src lladdr 66:77:88:99:aa:bb");
/* We enable forwarding in the test namespace because that will cause
* the packets that go through the kernel stack (with XDP_PASS) to be
@@ -152,7 +144,7 @@ void test_xdp_do_redirect(void)
* code didn't have this, so we keep the test behaviour to make sure the
* bug doesn't resurface.
*/
- SYS("sysctl -qw net.ipv6.conf.all.forwarding=1");
+ SYS(out, "sysctl -qw net.ipv6.conf.all.forwarding=1");
ifindex_src = if_nametoindex("veth_src");
ifindex_dst = if_nametoindex("veth_dst");
@@ -226,6 +218,6 @@ out_tc:
out:
if (nstoken)
close_netns(nstoken);
- system("ip netns del testns");
+ SYS_NOFAIL("ip netns del testns");
test_xdp_do_redirect__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
index aa4beae99f4f..490e851dc27d 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
@@ -34,11 +34,6 @@
#define PREFIX_LEN "8"
#define FAMILY AF_INET
-#define SYS(cmd) ({ \
- if (!ASSERT_OK(system(cmd), (cmd))) \
- goto out; \
-})
-
struct xsk {
void *umem_area;
struct xsk_umem *umem;
@@ -298,16 +293,16 @@ void test_xdp_metadata(void)
/* Setup new networking namespace, with a veth pair. */
- SYS("ip netns add xdp_metadata");
+ SYS(out, "ip netns add xdp_metadata");
tok = open_netns("xdp_metadata");
- SYS("ip link add numtxqueues 1 numrxqueues 1 " TX_NAME
+ SYS(out, "ip link add numtxqueues 1 numrxqueues 1 " TX_NAME
" type veth peer " RX_NAME " numtxqueues 1 numrxqueues 1");
- SYS("ip link set dev " TX_NAME " address 00:00:00:00:00:01");
- SYS("ip link set dev " RX_NAME " address 00:00:00:00:00:02");
- SYS("ip link set dev " TX_NAME " up");
- SYS("ip link set dev " RX_NAME " up");
- SYS("ip addr add " TX_ADDR "/" PREFIX_LEN " dev " TX_NAME);
- SYS("ip addr add " RX_ADDR "/" PREFIX_LEN " dev " RX_NAME);
+ SYS(out, "ip link set dev " TX_NAME " address 00:00:00:00:00:01");
+ SYS(out, "ip link set dev " RX_NAME " address 00:00:00:00:00:02");
+ SYS(out, "ip link set dev " TX_NAME " up");
+ SYS(out, "ip link set dev " RX_NAME " up");
+ SYS(out, "ip addr add " TX_ADDR "/" PREFIX_LEN " dev " TX_NAME);
+ SYS(out, "ip addr add " RX_ADDR "/" PREFIX_LEN " dev " RX_NAME);
rx_ifindex = if_nametoindex(RX_NAME);
tx_ifindex = if_nametoindex(TX_NAME);
@@ -405,5 +400,5 @@ out:
xdp_metadata__destroy(bpf_obj);
if (tok)
close_netns(tok);
- system("ip netns del xdp_metadata");
+ SYS_NOFAIL("ip netns del xdp_metadata");
}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c
index c72083885b6d..8b50a992d233 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c
@@ -8,11 +8,6 @@
#define CMD_OUT_BUF_SIZE 1023
-#define SYS(cmd) ({ \
- if (!ASSERT_OK(system(cmd), (cmd))) \
- goto out; \
-})
-
#define SYS_OUT(cmd, ...) ({ \
char buf[1024]; \
snprintf(buf, sizeof(buf), (cmd), ##__VA_ARGS__); \
@@ -69,37 +64,37 @@ static void test_synproxy(bool xdp)
char buf[CMD_OUT_BUF_SIZE];
size_t size;
- SYS("ip netns add synproxy");
+ SYS(out, "ip netns add synproxy");
- SYS("ip link add tmp0 type veth peer name tmp1");
- SYS("ip link set tmp1 netns synproxy");
- SYS("ip link set tmp0 up");
- SYS("ip addr replace 198.18.0.1/24 dev tmp0");
+ SYS(out, "ip link add tmp0 type veth peer name tmp1");
+ SYS(out, "ip link set tmp1 netns synproxy");
+ SYS(out, "ip link set tmp0 up");
+ SYS(out, "ip addr replace 198.18.0.1/24 dev tmp0");
/* When checksum offload is enabled, the XDP program sees wrong
* checksums and drops packets.
*/
- SYS("ethtool -K tmp0 tx off");
+ SYS(out, "ethtool -K tmp0 tx off");
if (xdp)
/* Workaround required for veth. */
- SYS("ip link set tmp0 xdp object xdp_dummy.bpf.o section xdp 2> /dev/null");
+ SYS(out, "ip link set tmp0 xdp object xdp_dummy.bpf.o section xdp 2> /dev/null");
ns = open_netns("synproxy");
if (!ASSERT_OK_PTR(ns, "setns"))
goto out;
- SYS("ip link set lo up");
- SYS("ip link set tmp1 up");
- SYS("ip addr replace 198.18.0.2/24 dev tmp1");
- SYS("sysctl -w net.ipv4.tcp_syncookies=2");
- SYS("sysctl -w net.ipv4.tcp_timestamps=1");
- SYS("sysctl -w net.netfilter.nf_conntrack_tcp_loose=0");
- SYS("iptables-legacy -t raw -I PREROUTING \
+ SYS(out, "ip link set lo up");
+ SYS(out, "ip link set tmp1 up");
+ SYS(out, "ip addr replace 198.18.0.2/24 dev tmp1");
+ SYS(out, "sysctl -w net.ipv4.tcp_syncookies=2");
+ SYS(out, "sysctl -w net.ipv4.tcp_timestamps=1");
+ SYS(out, "sysctl -w net.netfilter.nf_conntrack_tcp_loose=0");
+ SYS(out, "iptables-legacy -t raw -I PREROUTING \
-i tmp1 -p tcp -m tcp --syn --dport 8080 -j CT --notrack");
- SYS("iptables-legacy -t filter -A INPUT \
+ SYS(out, "iptables-legacy -t filter -A INPUT \
-i tmp1 -p tcp -m tcp --dport 8080 -m state --state INVALID,UNTRACKED \
-j SYNPROXY --sack-perm --timestamp --wscale 7 --mss 1460");
- SYS("iptables-legacy -t filter -A INPUT \
+ SYS(out, "iptables-legacy -t filter -A INPUT \
-i tmp1 -m state --state INVALID -j DROP");
ctrl_file = SYS_OUT("./xdp_synproxy --iface tmp1 --ports 8080 \
@@ -170,8 +165,8 @@ out:
if (ns)
close_netns(ns);
- system("ip link del tmp0");
- system("ip netns del synproxy");
+ SYS_NOFAIL("ip link del tmp0");
+ SYS_NOFAIL("ip netns del synproxy");
}
void test_xdp_synproxy(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/xfrm_info.c b/tools/testing/selftests/bpf/prog_tests/xfrm_info.c
index 8b03c9bb4862..d37f5394e199 100644
--- a/tools/testing/selftests/bpf/prog_tests/xfrm_info.c
+++ b/tools/testing/selftests/bpf/prog_tests/xfrm_info.c
@@ -69,21 +69,6 @@
"proto esp aead 'rfc4106(gcm(aes))' " \
"0xe4d8f4b4da1df18a3510b3781496daa82488b713 128 mode tunnel "
-#define SYS(fmt, ...) \
- ({ \
- char cmd[1024]; \
- snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
- if (!ASSERT_OK(system(cmd), cmd)) \
- goto fail; \
- })
-
-#define SYS_NOFAIL(fmt, ...) \
- ({ \
- char cmd[1024]; \
- snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
- system(cmd); \
- })
-
static int attach_tc_prog(struct bpf_tc_hook *hook, int igr_fd, int egr_fd)
{
LIBBPF_OPTS(bpf_tc_opts, opts1, .handle = 1, .priority = 1,
@@ -126,23 +111,23 @@ static void cleanup(void)
static int config_underlay(void)
{
- SYS("ip netns add " NS0);
- SYS("ip netns add " NS1);
- SYS("ip netns add " NS2);
+ SYS(fail, "ip netns add " NS0);
+ SYS(fail, "ip netns add " NS1);
+ SYS(fail, "ip netns add " NS2);
/* NS0 <-> NS1 [veth01 <-> veth10] */
- SYS("ip link add veth01 netns " NS0 " type veth peer name veth10 netns " NS1);
- SYS("ip -net " NS0 " addr add " IP4_ADDR_VETH01 "/24 dev veth01");
- SYS("ip -net " NS0 " link set dev veth01 up");
- SYS("ip -net " NS1 " addr add " IP4_ADDR_VETH10 "/24 dev veth10");
- SYS("ip -net " NS1 " link set dev veth10 up");
+ SYS(fail, "ip link add veth01 netns " NS0 " type veth peer name veth10 netns " NS1);
+ SYS(fail, "ip -net " NS0 " addr add " IP4_ADDR_VETH01 "/24 dev veth01");
+ SYS(fail, "ip -net " NS0 " link set dev veth01 up");
+ SYS(fail, "ip -net " NS1 " addr add " IP4_ADDR_VETH10 "/24 dev veth10");
+ SYS(fail, "ip -net " NS1 " link set dev veth10 up");
/* NS0 <-> NS2 [veth02 <-> veth20] */
- SYS("ip link add veth02 netns " NS0 " type veth peer name veth20 netns " NS2);
- SYS("ip -net " NS0 " addr add " IP4_ADDR_VETH02 "/24 dev veth02");
- SYS("ip -net " NS0 " link set dev veth02 up");
- SYS("ip -net " NS2 " addr add " IP4_ADDR_VETH20 "/24 dev veth20");
- SYS("ip -net " NS2 " link set dev veth20 up");
+ SYS(fail, "ip link add veth02 netns " NS0 " type veth peer name veth20 netns " NS2);
+ SYS(fail, "ip -net " NS0 " addr add " IP4_ADDR_VETH02 "/24 dev veth02");
+ SYS(fail, "ip -net " NS0 " link set dev veth02 up");
+ SYS(fail, "ip -net " NS2 " addr add " IP4_ADDR_VETH20 "/24 dev veth20");
+ SYS(fail, "ip -net " NS2 " link set dev veth20 up");
return 0;
fail:
@@ -153,20 +138,20 @@ static int setup_xfrm_tunnel_ns(const char *ns, const char *ipv4_local,
const char *ipv4_remote, int if_id)
{
/* State: local -> remote */
- SYS("ip -net %s xfrm state add src %s dst %s spi 1 "
+ SYS(fail, "ip -net %s xfrm state add src %s dst %s spi 1 "
ESP_DUMMY_PARAMS "if_id %d", ns, ipv4_local, ipv4_remote, if_id);
/* State: local <- remote */
- SYS("ip -net %s xfrm state add src %s dst %s spi 1 "
+ SYS(fail, "ip -net %s xfrm state add src %s dst %s spi 1 "
ESP_DUMMY_PARAMS "if_id %d", ns, ipv4_remote, ipv4_local, if_id);
/* Policy: local -> remote */
- SYS("ip -net %s xfrm policy add dir out src 0.0.0.0/0 dst 0.0.0.0/0 "
+ SYS(fail, "ip -net %s xfrm policy add dir out src 0.0.0.0/0 dst 0.0.0.0/0 "
"if_id %d tmpl src %s dst %s proto esp mode tunnel if_id %d", ns,
if_id, ipv4_local, ipv4_remote, if_id);
/* Policy: local <- remote */
- SYS("ip -net %s xfrm policy add dir in src 0.0.0.0/0 dst 0.0.0.0/0 "
+ SYS(fail, "ip -net %s xfrm policy add dir in src 0.0.0.0/0 dst 0.0.0.0/0 "
"if_id %d tmpl src %s dst %s proto esp mode tunnel if_id %d", ns,
if_id, ipv4_remote, ipv4_local, if_id);
@@ -274,16 +259,16 @@ static int config_overlay(void)
if (!ASSERT_OK(setup_xfrmi_external_dev(NS0), "xfrmi"))
goto fail;
- SYS("ip -net " NS0 " addr add 192.168.1.100/24 dev ipsec0");
- SYS("ip -net " NS0 " link set dev ipsec0 up");
+ SYS(fail, "ip -net " NS0 " addr add 192.168.1.100/24 dev ipsec0");
+ SYS(fail, "ip -net " NS0 " link set dev ipsec0 up");
- SYS("ip -net " NS1 " link add ipsec0 type xfrm if_id %d", IF_ID_1);
- SYS("ip -net " NS1 " addr add 192.168.1.200/24 dev ipsec0");
- SYS("ip -net " NS1 " link set dev ipsec0 up");
+ SYS(fail, "ip -net " NS1 " link add ipsec0 type xfrm if_id %d", IF_ID_1);
+ SYS(fail, "ip -net " NS1 " addr add 192.168.1.200/24 dev ipsec0");
+ SYS(fail, "ip -net " NS1 " link set dev ipsec0 up");
- SYS("ip -net " NS2 " link add ipsec0 type xfrm if_id %d", IF_ID_2);
- SYS("ip -net " NS2 " addr add 192.168.1.200/24 dev ipsec0");
- SYS("ip -net " NS2 " link set dev ipsec0 up");
+ SYS(fail, "ip -net " NS2 " link add ipsec0 type xfrm if_id %d", IF_ID_2);
+ SYS(fail, "ip -net " NS2 " addr add 192.168.1.200/24 dev ipsec0");
+ SYS(fail, "ip -net " NS2 " link set dev ipsec0 up");
return 0;
fail:
@@ -294,7 +279,7 @@ static int test_xfrm_ping(struct xfrm_info *skel, u32 if_id)
{
skel->bss->req_if_id = if_id;
- SYS("ping -i 0.01 -c 3 -w 10 -q 192.168.1.200 > /dev/null");
+ SYS(fail, "ping -i 0.01 -c 3 -w 10 -q 192.168.1.200 > /dev/null");
if (!ASSERT_EQ(skel->bss->resp_if_id, if_id, "if_id"))
goto fail;
diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c
index a20c5ed5e454..b04e092fac94 100644
--- a/tools/testing/selftests/bpf/progs/bpf_flow.c
+++ b/tools/testing/selftests/bpf/progs/bpf_flow.c
@@ -337,7 +337,7 @@ PROG(IPV6)(struct __sk_buff *skb)
keys->ip_proto = ip6h->nexthdr;
keys->flow_label = ip6_flowlabel(ip6h);
- if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)
+ if (keys->flow_label && keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)
return export_flow_keys(keys, BPF_OK);
return parse_ipv6_proto(skb, ip6h->nexthdr);
diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h
index 14e28f991451..f704885aa534 100644
--- a/tools/testing/selftests/bpf/progs/bpf_misc.h
+++ b/tools/testing/selftests/bpf/progs/bpf_misc.h
@@ -2,10 +2,33 @@
#ifndef __BPF_MISC_H__
#define __BPF_MISC_H__
+/* This set of attributes controls behavior of the
+ * test_loader.c:test_loader__run_subtests().
+ *
+ * __msg Message expected to be found in the verifier log.
+ * Multiple __msg attributes could be specified.
+ *
+ * __success Expect program load success in privileged mode.
+ *
+ * __failure Expect program load failure in privileged mode.
+ *
+ * __log_level Log level to use for the program, numeric value expected.
+ *
+ * __flag Adds one flag use for the program, the following values are valid:
+ * - BPF_F_STRICT_ALIGNMENT;
+ * - BPF_F_TEST_RND_HI32;
+ * - BPF_F_TEST_STATE_FREQ;
+ * - BPF_F_SLEEPABLE;
+ * - BPF_F_XDP_HAS_FRAGS;
+ * - A numeric value.
+ * Multiple __flag attributes could be specified, the final flags
+ * value is derived by applying binary "or" to all specified values.
+ */
#define __msg(msg) __attribute__((btf_decl_tag("comment:test_expect_msg=" msg)))
#define __failure __attribute__((btf_decl_tag("comment:test_expect_failure")))
#define __success __attribute__((btf_decl_tag("comment:test_expect_success")))
#define __log_level(lvl) __attribute__((btf_decl_tag("comment:test_log_level="#lvl)))
+#define __flag(flag) __attribute__((btf_decl_tag("comment:test_prog_flags="#flag)))
/* Convenience macro for use with 'asm volatile' blocks */
#define __naked __attribute__((naked))
diff --git a/tools/testing/selftests/bpf/progs/cb_refs.c b/tools/testing/selftests/bpf/progs/cb_refs.c
index 7653df1bc787..ce96b33e38d6 100644
--- a/tools/testing/selftests/bpf/progs/cb_refs.c
+++ b/tools/testing/selftests/bpf/progs/cb_refs.c
@@ -4,7 +4,7 @@
#include <bpf/bpf_helpers.h>
struct map_value {
- struct prog_test_ref_kfunc __kptr_ref *ptr;
+ struct prog_test_ref_kfunc __kptr *ptr;
};
struct {
diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h b/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h
index 7d30855bfe78..d0b7cd0d09d7 100644
--- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h
+++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h
@@ -10,7 +10,7 @@
#include <bpf/bpf_tracing.h>
struct __cgrps_kfunc_map_value {
- struct cgroup __kptr_ref * cgrp;
+ struct cgroup __kptr * cgrp;
};
struct hash_map {
@@ -24,6 +24,7 @@ struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym;
struct cgroup *bpf_cgroup_kptr_get(struct cgroup **pp) __ksym;
void bpf_cgroup_release(struct cgroup *p) __ksym;
struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level) __ksym;
+struct cgroup *bpf_cgroup_from_id(u64 cgid) __ksym;
static inline struct __cgrps_kfunc_map_value *cgrps_kfunc_map_value_lookup(struct cgroup *cgrp)
{
diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c
index 4ad7fe24966d..b42291ed9586 100644
--- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c
+++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c
@@ -205,7 +205,7 @@ int BPF_PROG(cgrp_kfunc_get_unreleased, struct cgroup *cgrp, const char *path)
}
SEC("tp_btf/cgroup_mkdir")
-__failure __msg("arg#0 is untrusted_ptr_or_null_ expected ptr_ or socket")
+__failure __msg("expects refcounted")
int BPF_PROG(cgrp_kfunc_release_untrusted, struct cgroup *cgrp, const char *path)
{
struct __cgrps_kfunc_map_value *v;
diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c
index 0c23ea32df9f..030aff700084 100644
--- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c
+++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c
@@ -61,7 +61,7 @@ int BPF_PROG(test_cgrp_acquire_leave_in_map, struct cgroup *cgrp, const char *pa
SEC("tp_btf/cgroup_mkdir")
int BPF_PROG(test_cgrp_xchg_release, struct cgroup *cgrp, const char *path)
{
- struct cgroup *kptr;
+ struct cgroup *kptr, *cg;
struct __cgrps_kfunc_map_value *v;
long status;
@@ -80,6 +80,16 @@ int BPF_PROG(test_cgrp_xchg_release, struct cgroup *cgrp, const char *path)
return 0;
}
+ kptr = v->cgrp;
+ if (!kptr) {
+ err = 4;
+ return 0;
+ }
+
+ cg = bpf_cgroup_ancestor(kptr, 1);
+ if (cg) /* verifier only check */
+ bpf_cgroup_release(cg);
+
kptr = bpf_kptr_xchg(&v->cgrp, NULL);
if (!kptr) {
err = 3;
@@ -168,3 +178,45 @@ int BPF_PROG(test_cgrp_get_ancestors, struct cgroup *cgrp, const char *path)
return 0;
}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(test_cgrp_from_id, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup *parent, *res;
+ u64 parent_cgid;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ /* @cgrp's ID is not visible yet, let's test with the parent */
+ parent = bpf_cgroup_ancestor(cgrp, cgrp->level - 1);
+ if (!parent) {
+ err = 1;
+ return 0;
+ }
+
+ parent_cgid = parent->kn->id;
+ bpf_cgroup_release(parent);
+
+ res = bpf_cgroup_from_id(parent_cgid);
+ if (!res) {
+ err = 2;
+ return 0;
+ }
+
+ bpf_cgroup_release(res);
+
+ if (res != parent) {
+ err = 3;
+ return 0;
+ }
+
+ res = bpf_cgroup_from_id((u64)-1);
+ if (res) {
+ bpf_cgroup_release(res);
+ err = 4;
+ return 0;
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c
index 2d11ed528b6f..7615dc23d301 100644
--- a/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c
+++ b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c
@@ -49,7 +49,7 @@ int no_rcu_lock(void *ctx)
if (task->pid != target_pid)
return 0;
- /* ptr_to_btf_id semantics. should work. */
+ /* task->cgroups is untrusted in sleepable prog outside of RCU CS */
cgrp = task->cgroups->dfl_cgrp;
ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
@@ -71,7 +71,7 @@ int yes_rcu_lock(void *ctx)
bpf_rcu_read_lock();
cgrp = task->cgroups->dfl_cgrp;
- /* cgrp is untrusted and cannot pass to bpf_cgrp_storage_get() helper. */
+ /* cgrp is trusted under RCU CS */
ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE);
if (ptr)
cgroup_id = cgrp->kn->id;
diff --git a/tools/testing/selftests/bpf/progs/cpumask_common.h b/tools/testing/selftests/bpf/progs/cpumask_common.h
index ad34f3b602be..65e5496ca1b2 100644
--- a/tools/testing/selftests/bpf/progs/cpumask_common.h
+++ b/tools/testing/selftests/bpf/progs/cpumask_common.h
@@ -10,7 +10,7 @@
int err;
struct __cpumask_map_value {
- struct bpf_cpumask __kptr_ref * cpumask;
+ struct bpf_cpumask __kptr * cpumask;
};
struct array_map {
diff --git a/tools/testing/selftests/bpf/progs/cpumask_failure.c b/tools/testing/selftests/bpf/progs/cpumask_failure.c
index 33e8e86dd090..c16f7563b84e 100644
--- a/tools/testing/selftests/bpf/progs/cpumask_failure.c
+++ b/tools/testing/selftests/bpf/progs/cpumask_failure.c
@@ -44,7 +44,7 @@ int BPF_PROG(test_alloc_double_release, struct task_struct *task, u64 clone_flag
}
SEC("tp_btf/task_newtask")
-__failure __msg("bpf_cpumask_acquire args#0 expected pointer to STRUCT bpf_cpumask")
+__failure __msg("must be referenced")
int BPF_PROG(test_acquire_wrong_cpumask, struct task_struct *task, u64 clone_flags)
{
struct bpf_cpumask *cpumask;
diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c
index aa5b69354b91..20ce920d891d 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c
@@ -5,7 +5,9 @@
#include <string.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include <linux/if_ether.h>
#include "bpf_misc.h"
+#include "bpf_kfuncs.h"
char _license[] SEC("license") = "GPL";
@@ -244,6 +246,27 @@ done:
return 0;
}
+/* A data slice can't be accessed out of bounds */
+SEC("?tc")
+__failure __msg("value is outside of the allowed memory range")
+int data_slice_out_of_bounds_skb(struct __sk_buff *skb)
+{
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+ char buffer[sizeof(*hdr)] = {};
+
+ bpf_dynptr_from_skb(skb, 0, &ptr);
+
+ hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer));
+ if (!hdr)
+ return SK_DROP;
+
+ /* this should fail */
+ *(__u8*)(hdr + 1) = 1;
+
+ return SK_PASS;
+}
+
SEC("?raw_tp")
__failure __msg("value is outside of the allowed memory range")
int data_slice_out_of_bounds_map_value(void *ctx)
@@ -399,7 +422,6 @@ int invalid_helper2(void *ctx)
/* this should fail */
bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 8, 0, 0);
-
return 0;
}
@@ -1044,6 +1066,193 @@ int dynptr_read_into_slot(void *ctx)
return 0;
}
+/* bpf_dynptr_slice()s are read-only and cannot be written to */
+SEC("?tc")
+__failure __msg("R0 cannot write into rdonly_mem")
+int skb_invalid_slice_write(struct __sk_buff *skb)
+{
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+ char buffer[sizeof(*hdr)] = {};
+
+ bpf_dynptr_from_skb(skb, 0, &ptr);
+
+ hdr = bpf_dynptr_slice(&ptr, 0, buffer, sizeof(buffer));
+ if (!hdr)
+ return SK_DROP;
+
+ /* this should fail */
+ hdr->h_proto = 1;
+
+ return SK_PASS;
+}
+
+/* The read-only data slice is invalidated whenever a helper changes packet data */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int skb_invalid_data_slice1(struct __sk_buff *skb)
+{
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+ char buffer[sizeof(*hdr)] = {};
+
+ bpf_dynptr_from_skb(skb, 0, &ptr);
+
+ hdr = bpf_dynptr_slice(&ptr, 0, buffer, sizeof(buffer));
+ if (!hdr)
+ return SK_DROP;
+
+ val = hdr->h_proto;
+
+ if (bpf_skb_pull_data(skb, skb->len))
+ return SK_DROP;
+
+ /* this should fail */
+ val = hdr->h_proto;
+
+ return SK_PASS;
+}
+
+/* The read-write data slice is invalidated whenever a helper changes packet data */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int skb_invalid_data_slice2(struct __sk_buff *skb)
+{
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+ char buffer[sizeof(*hdr)] = {};
+
+ bpf_dynptr_from_skb(skb, 0, &ptr);
+
+ hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer));
+ if (!hdr)
+ return SK_DROP;
+
+ hdr->h_proto = 123;
+
+ if (bpf_skb_pull_data(skb, skb->len))
+ return SK_DROP;
+
+ /* this should fail */
+ hdr->h_proto = 1;
+
+ return SK_PASS;
+}
+
+/* The read-only data slice is invalidated whenever bpf_dynptr_write() is called */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int skb_invalid_data_slice3(struct __sk_buff *skb)
+{
+ char write_data[64] = "hello there, world!!";
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+ char buffer[sizeof(*hdr)] = {};
+
+ bpf_dynptr_from_skb(skb, 0, &ptr);
+
+ hdr = bpf_dynptr_slice(&ptr, 0, buffer, sizeof(buffer));
+ if (!hdr)
+ return SK_DROP;
+
+ val = hdr->h_proto;
+
+ bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data), 0);
+
+ /* this should fail */
+ val = hdr->h_proto;
+
+ return SK_PASS;
+}
+
+/* The read-write data slice is invalidated whenever bpf_dynptr_write() is called */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int skb_invalid_data_slice4(struct __sk_buff *skb)
+{
+ char write_data[64] = "hello there, world!!";
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+ char buffer[sizeof(*hdr)] = {};
+
+ bpf_dynptr_from_skb(skb, 0, &ptr);
+ hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer));
+ if (!hdr)
+ return SK_DROP;
+
+ hdr->h_proto = 123;
+
+ bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data), 0);
+
+ /* this should fail */
+ hdr->h_proto = 1;
+
+ return SK_PASS;
+}
+
+/* The read-only data slice is invalidated whenever a helper changes packet data */
+SEC("?xdp")
+__failure __msg("invalid mem access 'scalar'")
+int xdp_invalid_data_slice1(struct xdp_md *xdp)
+{
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+ char buffer[sizeof(*hdr)] = {};
+
+ bpf_dynptr_from_xdp(xdp, 0, &ptr);
+ hdr = bpf_dynptr_slice(&ptr, 0, buffer, sizeof(buffer));
+ if (!hdr)
+ return SK_DROP;
+
+ val = hdr->h_proto;
+
+ if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(*hdr)))
+ return XDP_DROP;
+
+ /* this should fail */
+ val = hdr->h_proto;
+
+ return XDP_PASS;
+}
+
+/* The read-write data slice is invalidated whenever a helper changes packet data */
+SEC("?xdp")
+__failure __msg("invalid mem access 'scalar'")
+int xdp_invalid_data_slice2(struct xdp_md *xdp)
+{
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+ char buffer[sizeof(*hdr)] = {};
+
+ bpf_dynptr_from_xdp(xdp, 0, &ptr);
+ hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer));
+ if (!hdr)
+ return SK_DROP;
+
+ hdr->h_proto = 9;
+
+ if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(*hdr)))
+ return XDP_DROP;
+
+ /* this should fail */
+ hdr->h_proto = 1;
+
+ return XDP_PASS;
+}
+
+/* Only supported prog type can create skb-type dynptrs */
+SEC("?raw_tp")
+__failure __msg("calling kernel function bpf_dynptr_from_skb is not allowed")
+int skb_invalid_ctx(void *ctx)
+{
+ struct bpf_dynptr ptr;
+
+ /* this should fail */
+ bpf_dynptr_from_skb(ctx, 0, &ptr);
+
+ return 0;
+}
+
/* Reject writes to dynptr slot for uninit arg */
SEC("?raw_tp")
__failure __msg("potential write to dynptr at off=-16")
@@ -1061,6 +1270,61 @@ int uninit_write_into_slot(void *ctx)
return 0;
}
+/* Only supported prog type can create xdp-type dynptrs */
+SEC("?raw_tp")
+__failure __msg("calling kernel function bpf_dynptr_from_xdp is not allowed")
+int xdp_invalid_ctx(void *ctx)
+{
+ struct bpf_dynptr ptr;
+
+ /* this should fail */
+ bpf_dynptr_from_xdp(ctx, 0, &ptr);
+
+ return 0;
+}
+
+__u32 hdr_size = sizeof(struct ethhdr);
+/* Can't pass in variable-sized len to bpf_dynptr_slice */
+SEC("?tc")
+__failure __msg("unbounded memory access")
+int dynptr_slice_var_len1(struct __sk_buff *skb)
+{
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+ char buffer[sizeof(*hdr)] = {};
+
+ bpf_dynptr_from_skb(skb, 0, &ptr);
+
+ /* this should fail */
+ hdr = bpf_dynptr_slice(&ptr, 0, buffer, hdr_size);
+ if (!hdr)
+ return SK_DROP;
+
+ return SK_PASS;
+}
+
+/* Can't pass in variable-sized len to bpf_dynptr_slice */
+SEC("?tc")
+__failure __msg("must be a known constant")
+int dynptr_slice_var_len2(struct __sk_buff *skb)
+{
+ char buffer[sizeof(struct ethhdr)] = {};
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+
+ bpf_dynptr_from_skb(skb, 0, &ptr);
+
+ if (hdr_size <= sizeof(buffer)) {
+ /* this should fail */
+ hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, hdr_size);
+ if (!hdr)
+ return SK_DROP;
+ hdr->h_proto = 12;
+ }
+
+ return SK_PASS;
+}
+
static int callback(__u32 index, void *data)
{
*(__u32 *)data = 123;
@@ -1092,3 +1356,24 @@ int invalid_data_slices(void *ctx)
return 0;
}
+
+/* Program types that don't allow writes to packet data should fail if
+ * bpf_dynptr_slice_rdwr is called
+ */
+SEC("cgroup_skb/ingress")
+__failure __msg("the prog does not allow writes to packet data")
+int invalid_slice_rdwr_rdonly(struct __sk_buff *skb)
+{
+ char buffer[sizeof(struct ethhdr)] = {};
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+
+ bpf_dynptr_from_skb(skb, 0, &ptr);
+
+ /* this should fail since cgroup_skb doesn't allow
+ * changing packet data
+ */
+ hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer));
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c
index 35db7c6c1fc7..c8358a7c7924 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_success.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_success.c
@@ -5,6 +5,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
+#include "bpf_kfuncs.h"
#include "errno.h"
char _license[] SEC("license") = "GPL";
@@ -30,7 +31,7 @@ struct {
__type(value, __u32);
} array_map SEC(".maps");
-SEC("tp/syscalls/sys_enter_nanosleep")
+SEC("?tp/syscalls/sys_enter_nanosleep")
int test_read_write(void *ctx)
{
char write_data[64] = "hello there, world!!";
@@ -61,8 +62,8 @@ int test_read_write(void *ctx)
return 0;
}
-SEC("tp/syscalls/sys_enter_nanosleep")
-int test_data_slice(void *ctx)
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int test_dynptr_data(void *ctx)
{
__u32 key = 0, val = 235, *map_val;
struct bpf_dynptr ptr;
@@ -131,7 +132,7 @@ static int ringbuf_callback(__u32 index, void *data)
return 0;
}
-SEC("tp/syscalls/sys_enter_nanosleep")
+SEC("?tp/syscalls/sys_enter_nanosleep")
int test_ringbuf(void *ctx)
{
struct bpf_dynptr ptr;
@@ -163,3 +164,49 @@ done:
bpf_ringbuf_discard_dynptr(&ptr, 0);
return 0;
}
+
+SEC("?cgroup_skb/egress")
+int test_skb_readonly(struct __sk_buff *skb)
+{
+ __u8 write_data[2] = {1, 2};
+ struct bpf_dynptr ptr;
+ __u64 *data;
+ int ret;
+
+ if (bpf_dynptr_from_skb(skb, 0, &ptr)) {
+ err = 1;
+ return 1;
+ }
+
+ /* since cgroup skbs are read only, writes should fail */
+ ret = bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data), 0);
+ if (ret != -EINVAL) {
+ err = 2;
+ return 1;
+ }
+
+ return 1;
+}
+
+SEC("?cgroup_skb/egress")
+int test_dynptr_skb_data(struct __sk_buff *skb)
+{
+ __u8 write_data[2] = {1, 2};
+ struct bpf_dynptr ptr;
+ __u64 *data;
+ int ret;
+
+ if (bpf_dynptr_from_skb(skb, 0, &ptr)) {
+ err = 1;
+ return 1;
+ }
+
+ /* This should return NULL. Must use bpf_dynptr_slice API */
+ data = bpf_dynptr_data(&ptr, 0, 1);
+ if (data) {
+ err = 2;
+ return 1;
+ }
+
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/find_vma_fail1.c b/tools/testing/selftests/bpf/progs/find_vma_fail1.c
index b3b326b8e2d1..47d5dedff554 100644
--- a/tools/testing/selftests/bpf/progs/find_vma_fail1.c
+++ b/tools/testing/selftests/bpf/progs/find_vma_fail1.c
@@ -13,7 +13,7 @@ static long write_vma(struct task_struct *task, struct vm_area_struct *vma,
struct callback_ctx *data)
{
/* writing to vma, which is illegal */
- vma->vm_flags |= 0x55;
+ vma->vm_start = 0xffffffffff600000;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/jit_probe_mem.c b/tools/testing/selftests/bpf/progs/jit_probe_mem.c
index 2d2e61470794..13f00ca2ed0a 100644
--- a/tools/testing/selftests/bpf/progs/jit_probe_mem.c
+++ b/tools/testing/selftests/bpf/progs/jit_probe_mem.c
@@ -4,7 +4,7 @@
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_helpers.h>
-static struct prog_test_ref_kfunc __kptr_ref *v;
+static struct prog_test_ref_kfunc __kptr *v;
long total_sum = -1;
extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym;
diff --git a/tools/testing/selftests/bpf/progs/lru_bug.c b/tools/testing/selftests/bpf/progs/lru_bug.c
index 687081a724b3..ad73029cb1e3 100644
--- a/tools/testing/selftests/bpf/progs/lru_bug.c
+++ b/tools/testing/selftests/bpf/progs/lru_bug.c
@@ -4,7 +4,7 @@
#include <bpf/bpf_helpers.h>
struct map_value {
- struct task_struct __kptr *ptr;
+ struct task_struct __kptr_untrusted *ptr;
};
struct {
diff --git a/tools/testing/selftests/bpf/progs/map_kptr.c b/tools/testing/selftests/bpf/progs/map_kptr.c
index 228ec45365a8..3903d30217b8 100644
--- a/tools/testing/selftests/bpf/progs/map_kptr.c
+++ b/tools/testing/selftests/bpf/progs/map_kptr.c
@@ -4,8 +4,8 @@
#include <bpf/bpf_helpers.h>
struct map_value {
- struct prog_test_ref_kfunc __kptr *unref_ptr;
- struct prog_test_ref_kfunc __kptr_ref *ref_ptr;
+ struct prog_test_ref_kfunc __kptr_untrusted *unref_ptr;
+ struct prog_test_ref_kfunc __kptr *ref_ptr;
};
struct array_map {
@@ -15,6 +15,13 @@ struct array_map {
__uint(max_entries, 1);
} array_map SEC(".maps");
+struct pcpu_array_map {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+} pcpu_array_map SEC(".maps");
+
struct hash_map {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, int);
@@ -22,6 +29,13 @@ struct hash_map {
__uint(max_entries, 1);
} hash_map SEC(".maps");
+struct pcpu_hash_map {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+} pcpu_hash_map SEC(".maps");
+
struct hash_malloc_map {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, int);
@@ -30,6 +44,14 @@ struct hash_malloc_map {
__uint(map_flags, BPF_F_NO_PREALLOC);
} hash_malloc_map SEC(".maps");
+struct pcpu_hash_malloc_map {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+} pcpu_hash_malloc_map SEC(".maps");
+
struct lru_hash_map {
__uint(type, BPF_MAP_TYPE_LRU_HASH);
__type(key, int);
@@ -37,6 +59,41 @@ struct lru_hash_map {
__uint(max_entries, 1);
} lru_hash_map SEC(".maps");
+struct lru_pcpu_hash_map {
+ __uint(type, BPF_MAP_TYPE_LRU_PERCPU_HASH);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+} lru_pcpu_hash_map SEC(".maps");
+
+struct cgrp_ls_map {
+ __uint(type, BPF_MAP_TYPE_CGRP_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct map_value);
+} cgrp_ls_map SEC(".maps");
+
+struct task_ls_map {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct map_value);
+} task_ls_map SEC(".maps");
+
+struct inode_ls_map {
+ __uint(type, BPF_MAP_TYPE_INODE_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct map_value);
+} inode_ls_map SEC(".maps");
+
+struct sk_ls_map {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct map_value);
+} sk_ls_map SEC(".maps");
+
#define DEFINE_MAP_OF_MAP(map_type, inner_map_type, name) \
struct { \
__uint(type, map_type); \
@@ -61,6 +118,7 @@ extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp
extern struct prog_test_ref_kfunc *
bpf_kfunc_call_test_kptr_get(struct prog_test_ref_kfunc **p, int a, int b) __ksym;
extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym;
+void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p) __ksym;
#define WRITE_ONCE(x, val) ((*(volatile typeof(x) *) &(x)) = (val))
@@ -90,12 +148,23 @@ static void test_kptr_ref(struct map_value *v)
WRITE_ONCE(v->unref_ptr, p);
if (!p)
return;
+ /*
+ * p is rcu_ptr_prog_test_ref_kfunc,
+ * because bpf prog is non-sleepable and runs in RCU CS.
+ * p can be passed to kfunc that requires KF_RCU.
+ */
+ bpf_kfunc_call_test_ref(p);
if (p->a + p->b > 100)
return;
/* store NULL */
p = bpf_kptr_xchg(&v->ref_ptr, NULL);
if (!p)
return;
+ /*
+ * p is trusted_ptr_prog_test_ref_kfunc.
+ * p can be passed to kfunc that requires KF_RCU.
+ */
+ bpf_kfunc_call_test_ref(p);
if (p->a + p->b > 100) {
bpf_kfunc_call_test_release(p);
return;
@@ -160,6 +229,58 @@ int test_map_kptr(struct __sk_buff *ctx)
return 0;
}
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(test_cgrp_map_kptr, struct cgroup *cgrp, const char *path)
+{
+ struct map_value *v;
+
+ v = bpf_cgrp_storage_get(&cgrp_ls_map, cgrp, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (v)
+ test_kptr(v);
+ return 0;
+}
+
+SEC("lsm/inode_unlink")
+int BPF_PROG(test_task_map_kptr, struct inode *inode, struct dentry *victim)
+{
+ struct task_struct *task;
+ struct map_value *v;
+
+ task = bpf_get_current_task_btf();
+ if (!task)
+ return 0;
+ v = bpf_task_storage_get(&task_ls_map, task, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (v)
+ test_kptr(v);
+ return 0;
+}
+
+SEC("lsm/inode_unlink")
+int BPF_PROG(test_inode_map_kptr, struct inode *inode, struct dentry *victim)
+{
+ struct map_value *v;
+
+ v = bpf_inode_storage_get(&inode_ls_map, inode, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (v)
+ test_kptr(v);
+ return 0;
+}
+
+SEC("tc")
+int test_sk_map_kptr(struct __sk_buff *ctx)
+{
+ struct map_value *v;
+ struct bpf_sock *sk;
+
+ sk = ctx->sk;
+ if (!sk)
+ return 0;
+ v = bpf_sk_storage_get(&sk_ls_map, sk, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (v)
+ test_kptr(v);
+ return 0;
+}
+
SEC("tc")
int test_map_in_map_kptr(struct __sk_buff *ctx)
{
@@ -189,106 +310,257 @@ int test_map_in_map_kptr(struct __sk_buff *ctx)
return 0;
}
-SEC("tc")
-int test_map_kptr_ref(struct __sk_buff *ctx)
+int ref = 1;
+
+static __always_inline
+int test_map_kptr_ref_pre(struct map_value *v)
{
struct prog_test_ref_kfunc *p, *p_st;
unsigned long arg = 0;
- struct map_value *v;
- int key = 0, ret;
+ int ret;
p = bpf_kfunc_call_test_acquire(&arg);
if (!p)
return 1;
+ ref++;
p_st = p->next;
- if (p_st->cnt.refs.counter != 2) {
+ if (p_st->cnt.refs.counter != ref) {
ret = 2;
goto end;
}
- v = bpf_map_lookup_elem(&array_map, &key);
- if (!v) {
- ret = 3;
- goto end;
- }
-
p = bpf_kptr_xchg(&v->ref_ptr, p);
if (p) {
- ret = 4;
+ ret = 3;
goto end;
}
- if (p_st->cnt.refs.counter != 2)
- return 5;
+ if (p_st->cnt.refs.counter != ref)
+ return 4;
p = bpf_kfunc_call_test_kptr_get(&v->ref_ptr, 0, 0);
if (!p)
- return 6;
- if (p_st->cnt.refs.counter != 3) {
- ret = 7;
+ return 5;
+ ref++;
+ if (p_st->cnt.refs.counter != ref) {
+ ret = 6;
goto end;
}
bpf_kfunc_call_test_release(p);
- if (p_st->cnt.refs.counter != 2)
- return 8;
+ ref--;
+ if (p_st->cnt.refs.counter != ref)
+ return 7;
p = bpf_kptr_xchg(&v->ref_ptr, NULL);
if (!p)
- return 9;
+ return 8;
bpf_kfunc_call_test_release(p);
- if (p_st->cnt.refs.counter != 1)
- return 10;
+ ref--;
+ if (p_st->cnt.refs.counter != ref)
+ return 9;
p = bpf_kfunc_call_test_acquire(&arg);
if (!p)
- return 11;
+ return 10;
+ ref++;
p = bpf_kptr_xchg(&v->ref_ptr, p);
if (p) {
- ret = 12;
+ ret = 11;
goto end;
}
- if (p_st->cnt.refs.counter != 2)
- return 13;
+ if (p_st->cnt.refs.counter != ref)
+ return 12;
/* Leave in map */
return 0;
end:
+ ref--;
bpf_kfunc_call_test_release(p);
return ret;
}
-SEC("tc")
-int test_map_kptr_ref2(struct __sk_buff *ctx)
+static __always_inline
+int test_map_kptr_ref_post(struct map_value *v)
{
struct prog_test_ref_kfunc *p, *p_st;
- struct map_value *v;
- int key = 0;
-
- v = bpf_map_lookup_elem(&array_map, &key);
- if (!v)
- return 1;
p_st = v->ref_ptr;
- if (!p_st || p_st->cnt.refs.counter != 2)
- return 2;
+ if (!p_st || p_st->cnt.refs.counter != ref)
+ return 1;
p = bpf_kptr_xchg(&v->ref_ptr, NULL);
if (!p)
- return 3;
- if (p_st->cnt.refs.counter != 2) {
+ return 2;
+ if (p_st->cnt.refs.counter != ref) {
bpf_kfunc_call_test_release(p);
- return 4;
+ return 3;
}
p = bpf_kptr_xchg(&v->ref_ptr, p);
if (p) {
bpf_kfunc_call_test_release(p);
- return 5;
+ return 4;
}
- if (p_st->cnt.refs.counter != 2)
- return 6;
+ if (p_st->cnt.refs.counter != ref)
+ return 5;
+
+ return 0;
+}
+
+#define TEST(map) \
+ v = bpf_map_lookup_elem(&map, &key); \
+ if (!v) \
+ return -1; \
+ ret = test_map_kptr_ref_pre(v); \
+ if (ret) \
+ return ret;
+
+#define TEST_PCPU(map) \
+ v = bpf_map_lookup_percpu_elem(&map, &key, 0); \
+ if (!v) \
+ return -1; \
+ ret = test_map_kptr_ref_pre(v); \
+ if (ret) \
+ return ret;
+
+SEC("tc")
+int test_map_kptr_ref1(struct __sk_buff *ctx)
+{
+ struct map_value *v, val = {};
+ int key = 0, ret;
+
+ bpf_map_update_elem(&hash_map, &key, &val, 0);
+ bpf_map_update_elem(&hash_malloc_map, &key, &val, 0);
+ bpf_map_update_elem(&lru_hash_map, &key, &val, 0);
+
+ bpf_map_update_elem(&pcpu_hash_map, &key, &val, 0);
+ bpf_map_update_elem(&pcpu_hash_malloc_map, &key, &val, 0);
+ bpf_map_update_elem(&lru_pcpu_hash_map, &key, &val, 0);
+
+ TEST(array_map);
+ TEST(hash_map);
+ TEST(hash_malloc_map);
+ TEST(lru_hash_map);
+
+ TEST_PCPU(pcpu_array_map);
+ TEST_PCPU(pcpu_hash_map);
+ TEST_PCPU(pcpu_hash_malloc_map);
+ TEST_PCPU(lru_pcpu_hash_map);
+
+ return 0;
+}
+
+#undef TEST
+#undef TEST_PCPU
+
+#define TEST(map) \
+ v = bpf_map_lookup_elem(&map, &key); \
+ if (!v) \
+ return -1; \
+ ret = test_map_kptr_ref_post(v); \
+ if (ret) \
+ return ret;
+
+#define TEST_PCPU(map) \
+ v = bpf_map_lookup_percpu_elem(&map, &key, 0); \
+ if (!v) \
+ return -1; \
+ ret = test_map_kptr_ref_post(v); \
+ if (ret) \
+ return ret;
+
+SEC("tc")
+int test_map_kptr_ref2(struct __sk_buff *ctx)
+{
+ struct map_value *v;
+ int key = 0, ret;
+
+ TEST(array_map);
+ TEST(hash_map);
+ TEST(hash_malloc_map);
+ TEST(lru_hash_map);
+
+ TEST_PCPU(pcpu_array_map);
+ TEST_PCPU(pcpu_hash_map);
+ TEST_PCPU(pcpu_hash_malloc_map);
+ TEST_PCPU(lru_pcpu_hash_map);
return 0;
}
+#undef TEST
+#undef TEST_PCPU
+
+SEC("tc")
+int test_map_kptr_ref3(struct __sk_buff *ctx)
+{
+ struct prog_test_ref_kfunc *p;
+ unsigned long sp = 0;
+
+ p = bpf_kfunc_call_test_acquire(&sp);
+ if (!p)
+ return 1;
+ ref++;
+ if (p->cnt.refs.counter != ref) {
+ bpf_kfunc_call_test_release(p);
+ return 2;
+ }
+ bpf_kfunc_call_test_release(p);
+ ref--;
+ return 0;
+}
+
+SEC("syscall")
+int test_ls_map_kptr_ref1(void *ctx)
+{
+ struct task_struct *current;
+ struct map_value *v;
+ int ret;
+
+ current = bpf_get_current_task_btf();
+ if (!current)
+ return 100;
+ v = bpf_task_storage_get(&task_ls_map, current, NULL, 0);
+ if (v)
+ return 150;
+ v = bpf_task_storage_get(&task_ls_map, current, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!v)
+ return 200;
+ return test_map_kptr_ref_pre(v);
+}
+
+SEC("syscall")
+int test_ls_map_kptr_ref2(void *ctx)
+{
+ struct task_struct *current;
+ struct map_value *v;
+ int ret;
+
+ current = bpf_get_current_task_btf();
+ if (!current)
+ return 100;
+ v = bpf_task_storage_get(&task_ls_map, current, NULL, 0);
+ if (!v)
+ return 200;
+ return test_map_kptr_ref_post(v);
+}
+
+SEC("syscall")
+int test_ls_map_kptr_ref_del(void *ctx)
+{
+ struct task_struct *current;
+ struct map_value *v;
+ int ret;
+
+ current = bpf_get_current_task_btf();
+ if (!current)
+ return 100;
+ v = bpf_task_storage_get(&task_ls_map, current, NULL, 0);
+ if (!v)
+ return 200;
+ if (!v->ref_ptr)
+ return 300;
+ return bpf_task_storage_delete(&task_ls_map, current);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/map_kptr_fail.c b/tools/testing/selftests/bpf/progs/map_kptr_fail.c
index 760e41e1a632..08f9ec18c345 100644
--- a/tools/testing/selftests/bpf/progs/map_kptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/map_kptr_fail.c
@@ -7,9 +7,9 @@
struct map_value {
char buf[8];
- struct prog_test_ref_kfunc __kptr *unref_ptr;
- struct prog_test_ref_kfunc __kptr_ref *ref_ptr;
- struct prog_test_member __kptr_ref *ref_memb_ptr;
+ struct prog_test_ref_kfunc __kptr_untrusted *unref_ptr;
+ struct prog_test_ref_kfunc __kptr *ref_ptr;
+ struct prog_test_member __kptr *ref_memb_ptr;
};
struct array_map {
@@ -281,7 +281,7 @@ int reject_kptr_get_bad_type_match(struct __sk_buff *ctx)
}
SEC("?tc")
-__failure __msg("R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_")
+__failure __msg("R1 type=rcu_ptr_or_null_ expected=percpu_ptr_")
int mark_ref_as_untrusted_or_null(struct __sk_buff *ctx)
{
struct map_value *v;
@@ -316,7 +316,7 @@ int reject_untrusted_store_to_ref(struct __sk_buff *ctx)
}
SEC("?tc")
-__failure __msg("R2 type=untrusted_ptr_ expected=ptr_")
+__failure __msg("R2 must be referenced")
int reject_untrusted_xchg(struct __sk_buff *ctx)
{
struct prog_test_ref_kfunc *p;
diff --git a/tools/testing/selftests/bpf/progs/nested_trust_failure.c b/tools/testing/selftests/bpf/progs/nested_trust_failure.c
index 14aff7676436..0d1aa6bbace4 100644
--- a/tools/testing/selftests/bpf/progs/nested_trust_failure.c
+++ b/tools/testing/selftests/bpf/progs/nested_trust_failure.c
@@ -17,7 +17,7 @@ char _license[] SEC("license") = "GPL";
*/
SEC("tp_btf/task_newtask")
-__failure __msg("R2 must be referenced or trusted")
+__failure __msg("R2 must be")
int BPF_PROG(test_invalid_nested_user_cpus, struct task_struct *task, u64 clone_flags)
{
bpf_cpumask_test_cpu(0, task->user_cpus_ptr);
diff --git a/tools/testing/selftests/bpf/progs/rbtree.c b/tools/testing/selftests/bpf/progs/rbtree.c
index e5db1a4287e5..4c90aa6abddd 100644
--- a/tools/testing/selftests/bpf/progs/rbtree.c
+++ b/tools/testing/selftests/bpf/progs/rbtree.c
@@ -75,7 +75,7 @@ SEC("tc")
long rbtree_add_and_remove(void *ctx)
{
struct bpf_rb_node *res = NULL;
- struct node_data *n, *m;
+ struct node_data *n, *m = NULL;
n = bpf_obj_new(typeof(*n));
if (!n)
diff --git a/tools/testing/selftests/bpf/progs/rbtree_fail.c b/tools/testing/selftests/bpf/progs/rbtree_fail.c
index bf3cba115897..1ced900f3fce 100644
--- a/tools/testing/selftests/bpf/progs/rbtree_fail.c
+++ b/tools/testing/selftests/bpf/progs/rbtree_fail.c
@@ -232,8 +232,11 @@ long rbtree_api_first_release_unlock_escape(void *ctx)
bpf_spin_lock(&glock);
res = bpf_rbtree_first(&groot);
- if (res)
- n = container_of(res, struct node_data, node);
+ if (!res) {
+ bpf_spin_unlock(&glock);
+ return 1;
+ }
+ n = container_of(res, struct node_data, node);
bpf_spin_unlock(&glock);
bpf_spin_lock(&glock);
diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c
index 5cecbdbbb16e..7250bb76d18a 100644
--- a/tools/testing/selftests/bpf/progs/rcu_read_lock.c
+++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c
@@ -81,7 +81,7 @@ int no_lock(void *ctx)
{
struct task_struct *task, *real_parent;
- /* no bpf_rcu_read_lock(), old code still works */
+ /* old style ptr_to_btf_id is not allowed in sleepable */
task = bpf_get_current_task_btf();
real_parent = task->real_parent;
(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
@@ -286,13 +286,13 @@ out:
}
SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
-int task_untrusted_non_rcuptr(void *ctx)
+int task_trusted_non_rcuptr(void *ctx)
{
struct task_struct *task, *group_leader;
task = bpf_get_current_task_btf();
bpf_rcu_read_lock();
- /* the pointer group_leader marked as untrusted */
+ /* the pointer group_leader is explicitly marked as trusted */
group_leader = task->real_parent->group_leader;
(void)bpf_task_storage_get(&map_a, group_leader, 0, 0);
bpf_rcu_read_unlock();
diff --git a/tools/testing/selftests/bpf/progs/rcu_tasks_trace_gp.c b/tools/testing/selftests/bpf/progs/rcu_tasks_trace_gp.c
new file mode 100644
index 000000000000..df4873558634
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/rcu_tasks_trace_gp.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+struct task_ls_map {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, int);
+} task_ls_map SEC(".maps");
+
+long gp_seq;
+
+SEC("syscall")
+int do_call_rcu_tasks_trace(void *ctx)
+{
+ struct task_struct *current;
+ int *v;
+
+ current = bpf_get_current_task_btf();
+ v = bpf_task_storage_get(&task_ls_map, current, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!v)
+ return 1;
+ /* Invoke call_rcu_tasks_trace */
+ return bpf_task_storage_delete(&task_ls_map, current);
+}
+
+SEC("kprobe/rcu_tasks_trace_postgp")
+int rcu_tasks_trace_postgp(void *ctx)
+{
+ __sync_add_and_fetch(&gp_seq, 1);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_common.h b/tools/testing/selftests/bpf/progs/task_kfunc_common.h
index c0ffd171743e..4c2a4b0e3a25 100644
--- a/tools/testing/selftests/bpf/progs/task_kfunc_common.h
+++ b/tools/testing/selftests/bpf/progs/task_kfunc_common.h
@@ -10,7 +10,7 @@
#include <bpf/bpf_tracing.h>
struct __tasks_kfunc_map_value {
- struct task_struct __kptr_ref * task;
+ struct task_struct __kptr * task;
};
struct hash_map {
diff --git a/tools/testing/selftests/bpf/progs/test_attach_kprobe_sleepable.c b/tools/testing/selftests/bpf/progs/test_attach_kprobe_sleepable.c
new file mode 100644
index 000000000000..f548b7446218
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_attach_kprobe_sleepable.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Facebook
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+
+int kprobe_res = 0;
+
+/**
+ * This program will be manually made sleepable on the userspace side
+ * and should thus be unattachable.
+ */
+SEC("kprobe/" SYS_PREFIX "sys_nanosleep")
+int handle_kprobe_sleepable(struct pt_regs *ctx)
+{
+ kprobe_res = 1;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe.c b/tools/testing/selftests/bpf/progs/test_attach_probe.c
index 3b5dc34d23e9..68466a6ad18c 100644
--- a/tools/testing/selftests/bpf/progs/test_attach_probe.c
+++ b/tools/testing/selftests/bpf/progs/test_attach_probe.c
@@ -7,12 +7,8 @@
#include <bpf/bpf_core_read.h>
#include "bpf_misc.h"
-int kprobe_res = 0;
int kprobe2_res = 0;
-int kretprobe_res = 0;
int kretprobe2_res = 0;
-int uprobe_res = 0;
-int uretprobe_res = 0;
int uprobe_byname_res = 0;
int uretprobe_byname_res = 0;
int uprobe_byname2_res = 0;
@@ -23,13 +19,6 @@ int uretprobe_byname3_sleepable_res = 0;
int uretprobe_byname3_res = 0;
void *user_ptr = 0;
-SEC("kprobe")
-int handle_kprobe(struct pt_regs *ctx)
-{
- kprobe_res = 1;
- return 0;
-}
-
SEC("ksyscall/nanosleep")
int BPF_KSYSCALL(handle_kprobe_auto, struct __kernel_timespec *req, struct __kernel_timespec *rem)
{
@@ -37,24 +26,6 @@ int BPF_KSYSCALL(handle_kprobe_auto, struct __kernel_timespec *req, struct __ker
return 0;
}
-/**
- * This program will be manually made sleepable on the userspace side
- * and should thus be unattachable.
- */
-SEC("kprobe/" SYS_PREFIX "sys_nanosleep")
-int handle_kprobe_sleepable(struct pt_regs *ctx)
-{
- kprobe_res = 2;
- return 0;
-}
-
-SEC("kretprobe")
-int handle_kretprobe(struct pt_regs *ctx)
-{
- kretprobe_res = 2;
- return 0;
-}
-
SEC("kretsyscall/nanosleep")
int BPF_KRETPROBE(handle_kretprobe_auto, int ret)
{
@@ -63,16 +34,14 @@ int BPF_KRETPROBE(handle_kretprobe_auto, int ret)
}
SEC("uprobe")
-int handle_uprobe(struct pt_regs *ctx)
+int handle_uprobe_ref_ctr(struct pt_regs *ctx)
{
- uprobe_res = 3;
return 0;
}
SEC("uretprobe")
-int handle_uretprobe(struct pt_regs *ctx)
+int handle_uretprobe_ref_ctr(struct pt_regs *ctx)
{
- uretprobe_res = 4;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe_manual.c b/tools/testing/selftests/bpf/progs/test_attach_probe_manual.c
new file mode 100644
index 000000000000..7f08bce94596
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_attach_probe_manual.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Facebook
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+
+int kprobe_res = 0;
+int kretprobe_res = 0;
+int uprobe_res = 0;
+int uretprobe_res = 0;
+int uprobe_byname_res = 0;
+void *user_ptr = 0;
+
+SEC("kprobe")
+int handle_kprobe(struct pt_regs *ctx)
+{
+ kprobe_res = 1;
+ return 0;
+}
+
+SEC("kretprobe")
+int handle_kretprobe(struct pt_regs *ctx)
+{
+ kretprobe_res = 2;
+ return 0;
+}
+
+SEC("uprobe")
+int handle_uprobe(struct pt_regs *ctx)
+{
+ uprobe_res = 3;
+ return 0;
+}
+
+SEC("uretprobe")
+int handle_uretprobe(struct pt_regs *ctx)
+{
+ uretprobe_res = 4;
+ return 0;
+}
+
+SEC("uprobe")
+int handle_uprobe_byname(struct pt_regs *ctx)
+{
+ uprobe_byname_res = 5;
+ return 0;
+}
+
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c b/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c
new file mode 100644
index 000000000000..f45a7095de7a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c
@@ -0,0 +1,980 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+// Copyright (c) 2019, 2020 Cloudflare
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include <linux/bpf.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/pkt_cls.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#include "test_cls_redirect.h"
+#include "bpf_kfuncs.h"
+
+#define offsetofend(TYPE, MEMBER) \
+ (offsetof(TYPE, MEMBER) + sizeof((((TYPE *)0)->MEMBER)))
+
+#define IP_OFFSET_MASK (0x1FFF)
+#define IP_MF (0x2000)
+
+char _license[] SEC("license") = "Dual BSD/GPL";
+
+/**
+ * Destination port and IP used for UDP encapsulation.
+ */
+volatile const __be16 ENCAPSULATION_PORT;
+volatile const __be32 ENCAPSULATION_IP;
+
+typedef struct {
+ uint64_t processed_packets_total;
+ uint64_t l3_protocol_packets_total_ipv4;
+ uint64_t l3_protocol_packets_total_ipv6;
+ uint64_t l4_protocol_packets_total_tcp;
+ uint64_t l4_protocol_packets_total_udp;
+ uint64_t accepted_packets_total_syn;
+ uint64_t accepted_packets_total_syn_cookies;
+ uint64_t accepted_packets_total_last_hop;
+ uint64_t accepted_packets_total_icmp_echo_request;
+ uint64_t accepted_packets_total_established;
+ uint64_t forwarded_packets_total_gue;
+ uint64_t forwarded_packets_total_gre;
+
+ uint64_t errors_total_unknown_l3_proto;
+ uint64_t errors_total_unknown_l4_proto;
+ uint64_t errors_total_malformed_ip;
+ uint64_t errors_total_fragmented_ip;
+ uint64_t errors_total_malformed_icmp;
+ uint64_t errors_total_unwanted_icmp;
+ uint64_t errors_total_malformed_icmp_pkt_too_big;
+ uint64_t errors_total_malformed_tcp;
+ uint64_t errors_total_malformed_udp;
+ uint64_t errors_total_icmp_echo_replies;
+ uint64_t errors_total_malformed_encapsulation;
+ uint64_t errors_total_encap_adjust_failed;
+ uint64_t errors_total_encap_buffer_too_small;
+ uint64_t errors_total_redirect_loop;
+ uint64_t errors_total_encap_mtu_violate;
+} metrics_t;
+
+typedef enum {
+ INVALID = 0,
+ UNKNOWN,
+ ECHO_REQUEST,
+ SYN,
+ SYN_COOKIE,
+ ESTABLISHED,
+} verdict_t;
+
+typedef struct {
+ uint16_t src, dst;
+} flow_ports_t;
+
+_Static_assert(
+ sizeof(flow_ports_t) !=
+ offsetofend(struct bpf_sock_tuple, ipv4.dport) -
+ offsetof(struct bpf_sock_tuple, ipv4.sport) - 1,
+ "flow_ports_t must match sport and dport in struct bpf_sock_tuple");
+_Static_assert(
+ sizeof(flow_ports_t) !=
+ offsetofend(struct bpf_sock_tuple, ipv6.dport) -
+ offsetof(struct bpf_sock_tuple, ipv6.sport) - 1,
+ "flow_ports_t must match sport and dport in struct bpf_sock_tuple");
+
+struct iphdr_info {
+ void *hdr;
+ __u64 len;
+};
+
+typedef int ret_t;
+
+/* This is a bit of a hack. We need a return value which allows us to
+ * indicate that the regular flow of the program should continue,
+ * while allowing functions to use XDP_PASS and XDP_DROP, etc.
+ */
+static const ret_t CONTINUE_PROCESSING = -1;
+
+/* Convenience macro to call functions which return ret_t.
+ */
+#define MAYBE_RETURN(x) \
+ do { \
+ ret_t __ret = x; \
+ if (__ret != CONTINUE_PROCESSING) \
+ return __ret; \
+ } while (0)
+
+static bool ipv4_is_fragment(const struct iphdr *ip)
+{
+ uint16_t frag_off = ip->frag_off & bpf_htons(IP_OFFSET_MASK);
+ return (ip->frag_off & bpf_htons(IP_MF)) != 0 || frag_off > 0;
+}
+
+static int pkt_parse_ipv4(struct bpf_dynptr *dynptr, __u64 *offset, struct iphdr *iphdr)
+{
+ if (bpf_dynptr_read(iphdr, sizeof(*iphdr), dynptr, *offset, 0))
+ return -1;
+
+ *offset += sizeof(*iphdr);
+
+ if (iphdr->ihl < 5)
+ return -1;
+
+ /* skip ipv4 options */
+ *offset += (iphdr->ihl - 5) * 4;
+
+ return 0;
+}
+
+/* Parse the L4 ports from a packet, assuming a layout like TCP or UDP. */
+static bool pkt_parse_icmp_l4_ports(struct bpf_dynptr *dynptr, __u64 *offset, flow_ports_t *ports)
+{
+ if (bpf_dynptr_read(ports, sizeof(*ports), dynptr, *offset, 0))
+ return false;
+
+ *offset += sizeof(*ports);
+
+ /* Ports in the L4 headers are reversed, since we are parsing an ICMP
+ * payload which is going towards the eyeball.
+ */
+ uint16_t dst = ports->src;
+ ports->src = ports->dst;
+ ports->dst = dst;
+ return true;
+}
+
+static uint16_t pkt_checksum_fold(uint32_t csum)
+{
+ /* The highest reasonable value for an IPv4 header
+ * checksum requires two folds, so we just do that always.
+ */
+ csum = (csum & 0xffff) + (csum >> 16);
+ csum = (csum & 0xffff) + (csum >> 16);
+ return (uint16_t)~csum;
+}
+
+static void pkt_ipv4_checksum(struct iphdr *iph)
+{
+ iph->check = 0;
+
+ /* An IP header without options is 20 bytes. Two of those
+ * are the checksum, which we always set to zero. Hence,
+ * the maximum accumulated value is 18 / 2 * 0xffff = 0x8fff7,
+ * which fits in 32 bit.
+ */
+ _Static_assert(sizeof(struct iphdr) == 20, "iphdr must be 20 bytes");
+ uint32_t acc = 0;
+ uint16_t *ipw = (uint16_t *)iph;
+
+ for (size_t i = 0; i < sizeof(struct iphdr) / 2; i++)
+ acc += ipw[i];
+
+ iph->check = pkt_checksum_fold(acc);
+}
+
+static bool pkt_skip_ipv6_extension_headers(struct bpf_dynptr *dynptr, __u64 *offset,
+ const struct ipv6hdr *ipv6, uint8_t *upper_proto,
+ bool *is_fragment)
+{
+ /* We understand five extension headers.
+ * https://tools.ietf.org/html/rfc8200#section-4.1 states that all
+ * headers should occur once, except Destination Options, which may
+ * occur twice. Hence we give up after 6 headers.
+ */
+ struct {
+ uint8_t next;
+ uint8_t len;
+ } exthdr = {
+ .next = ipv6->nexthdr,
+ };
+ *is_fragment = false;
+
+ for (int i = 0; i < 6; i++) {
+ switch (exthdr.next) {
+ case IPPROTO_FRAGMENT:
+ *is_fragment = true;
+ /* NB: We don't check that hdrlen == 0 as per spec. */
+ /* fallthrough; */
+
+ case IPPROTO_HOPOPTS:
+ case IPPROTO_ROUTING:
+ case IPPROTO_DSTOPTS:
+ case IPPROTO_MH:
+ if (bpf_dynptr_read(&exthdr, sizeof(exthdr), dynptr, *offset, 0))
+ return false;
+
+ /* hdrlen is in 8-octet units, and excludes the first 8 octets. */
+ *offset += (exthdr.len + 1) * 8;
+
+ /* Decode next header */
+ break;
+
+ default:
+ /* The next header is not one of the known extension
+ * headers, treat it as the upper layer header.
+ *
+ * This handles IPPROTO_NONE.
+ *
+ * Encapsulating Security Payload (50) and Authentication
+ * Header (51) also end up here (and will trigger an
+ * unknown proto error later). They have a custom header
+ * format and seem too esoteric to care about.
+ */
+ *upper_proto = exthdr.next;
+ return true;
+ }
+ }
+
+ /* We never found an upper layer header. */
+ return false;
+}
+
+static int pkt_parse_ipv6(struct bpf_dynptr *dynptr, __u64 *offset, struct ipv6hdr *ipv6,
+ uint8_t *proto, bool *is_fragment)
+{
+ if (bpf_dynptr_read(ipv6, sizeof(*ipv6), dynptr, *offset, 0))
+ return -1;
+
+ *offset += sizeof(*ipv6);
+
+ if (!pkt_skip_ipv6_extension_headers(dynptr, offset, ipv6, proto, is_fragment))
+ return -1;
+
+ return 0;
+}
+
+/* Global metrics, per CPU
+ */
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, unsigned int);
+ __type(value, metrics_t);
+} metrics_map SEC(".maps");
+
+static metrics_t *get_global_metrics(void)
+{
+ uint64_t key = 0;
+ return bpf_map_lookup_elem(&metrics_map, &key);
+}
+
+static ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap)
+{
+ const int payload_off =
+ sizeof(*encap) +
+ sizeof(struct in_addr) * encap->unigue.hop_count;
+ int32_t encap_overhead = payload_off - sizeof(struct ethhdr);
+
+ /* Changing the ethertype if the encapsulated packet is ipv6 */
+ if (encap->gue.proto_ctype == IPPROTO_IPV6)
+ encap->eth.h_proto = bpf_htons(ETH_P_IPV6);
+
+ if (bpf_skb_adjust_room(skb, -encap_overhead, BPF_ADJ_ROOM_MAC,
+ BPF_F_ADJ_ROOM_FIXED_GSO |
+ BPF_F_ADJ_ROOM_NO_CSUM_RESET) ||
+ bpf_csum_level(skb, BPF_CSUM_LEVEL_DEC))
+ return TC_ACT_SHOT;
+
+ return bpf_redirect(skb->ifindex, BPF_F_INGRESS);
+}
+
+static ret_t forward_with_gre(struct __sk_buff *skb, struct bpf_dynptr *dynptr,
+ encap_headers_t *encap, struct in_addr *next_hop,
+ metrics_t *metrics)
+{
+ const int payload_off =
+ sizeof(*encap) +
+ sizeof(struct in_addr) * encap->unigue.hop_count;
+ int32_t encap_overhead =
+ payload_off - sizeof(struct ethhdr) - sizeof(struct iphdr);
+ int32_t delta = sizeof(struct gre_base_hdr) - encap_overhead;
+ __u8 encap_buffer[sizeof(encap_gre_t)] = {};
+ uint16_t proto = ETH_P_IP;
+ uint32_t mtu_len = 0;
+ encap_gre_t *encap_gre;
+
+ metrics->forwarded_packets_total_gre++;
+
+ /* Loop protection: the inner packet's TTL is decremented as a safeguard
+ * against any forwarding loop. As the only interesting field is the TTL
+ * hop limit for IPv6, it is easier to use bpf_skb_load_bytes/bpf_skb_store_bytes
+ * as they handle the split packets if needed (no need for the data to be
+ * in the linear section).
+ */
+ if (encap->gue.proto_ctype == IPPROTO_IPV6) {
+ proto = ETH_P_IPV6;
+ uint8_t ttl;
+ int rc;
+
+ rc = bpf_skb_load_bytes(
+ skb, payload_off + offsetof(struct ipv6hdr, hop_limit),
+ &ttl, 1);
+ if (rc != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ if (ttl == 0) {
+ metrics->errors_total_redirect_loop++;
+ return TC_ACT_SHOT;
+ }
+
+ ttl--;
+ rc = bpf_skb_store_bytes(
+ skb, payload_off + offsetof(struct ipv6hdr, hop_limit),
+ &ttl, 1, 0);
+ if (rc != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+ } else {
+ uint8_t ttl;
+ int rc;
+
+ rc = bpf_skb_load_bytes(
+ skb, payload_off + offsetof(struct iphdr, ttl), &ttl,
+ 1);
+ if (rc != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ if (ttl == 0) {
+ metrics->errors_total_redirect_loop++;
+ return TC_ACT_SHOT;
+ }
+
+ /* IPv4 also has a checksum to patch. While the TTL is only one byte,
+ * this function only works for 2 and 4 bytes arguments (the result is
+ * the same).
+ */
+ rc = bpf_l3_csum_replace(
+ skb, payload_off + offsetof(struct iphdr, check), ttl,
+ ttl - 1, 2);
+ if (rc != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ ttl--;
+ rc = bpf_skb_store_bytes(
+ skb, payload_off + offsetof(struct iphdr, ttl), &ttl, 1,
+ 0);
+ if (rc != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+ }
+
+ if (bpf_check_mtu(skb, skb->ifindex, &mtu_len, delta, 0)) {
+ metrics->errors_total_encap_mtu_violate++;
+ return TC_ACT_SHOT;
+ }
+
+ if (bpf_skb_adjust_room(skb, delta, BPF_ADJ_ROOM_NET,
+ BPF_F_ADJ_ROOM_FIXED_GSO |
+ BPF_F_ADJ_ROOM_NO_CSUM_RESET) ||
+ bpf_csum_level(skb, BPF_CSUM_LEVEL_INC)) {
+ metrics->errors_total_encap_adjust_failed++;
+ return TC_ACT_SHOT;
+ }
+
+ if (bpf_skb_pull_data(skb, sizeof(encap_gre_t))) {
+ metrics->errors_total_encap_buffer_too_small++;
+ return TC_ACT_SHOT;
+ }
+
+ encap_gre = bpf_dynptr_slice_rdwr(dynptr, 0, encap_buffer, sizeof(encap_buffer));
+ if (!encap_gre) {
+ metrics->errors_total_encap_buffer_too_small++;
+ return TC_ACT_SHOT;
+ }
+
+ encap_gre->ip.protocol = IPPROTO_GRE;
+ encap_gre->ip.daddr = next_hop->s_addr;
+ encap_gre->ip.saddr = ENCAPSULATION_IP;
+ encap_gre->ip.tot_len =
+ bpf_htons(bpf_ntohs(encap_gre->ip.tot_len) + delta);
+ encap_gre->gre.flags = 0;
+ encap_gre->gre.protocol = bpf_htons(proto);
+ pkt_ipv4_checksum((void *)&encap_gre->ip);
+
+ if (encap_gre == encap_buffer)
+ bpf_dynptr_write(dynptr, 0, encap_buffer, sizeof(encap_buffer), 0);
+
+ return bpf_redirect(skb->ifindex, 0);
+}
+
+static ret_t forward_to_next_hop(struct __sk_buff *skb, struct bpf_dynptr *dynptr,
+ encap_headers_t *encap, struct in_addr *next_hop,
+ metrics_t *metrics)
+{
+ /* swap L2 addresses */
+ /* This assumes that packets are received from a router.
+ * So just swapping the MAC addresses here will make the packet go back to
+ * the router, which will send it to the appropriate machine.
+ */
+ unsigned char temp[ETH_ALEN];
+ memcpy(temp, encap->eth.h_dest, sizeof(temp));
+ memcpy(encap->eth.h_dest, encap->eth.h_source,
+ sizeof(encap->eth.h_dest));
+ memcpy(encap->eth.h_source, temp, sizeof(encap->eth.h_source));
+
+ if (encap->unigue.next_hop == encap->unigue.hop_count - 1 &&
+ encap->unigue.last_hop_gre) {
+ return forward_with_gre(skb, dynptr, encap, next_hop, metrics);
+ }
+
+ metrics->forwarded_packets_total_gue++;
+ uint32_t old_saddr = encap->ip.saddr;
+ encap->ip.saddr = encap->ip.daddr;
+ encap->ip.daddr = next_hop->s_addr;
+ if (encap->unigue.next_hop < encap->unigue.hop_count) {
+ encap->unigue.next_hop++;
+ }
+
+ /* Remove ip->saddr, add next_hop->s_addr */
+ const uint64_t off = offsetof(typeof(*encap), ip.check);
+ int ret = bpf_l3_csum_replace(skb, off, old_saddr, next_hop->s_addr, 4);
+ if (ret < 0) {
+ return TC_ACT_SHOT;
+ }
+
+ return bpf_redirect(skb->ifindex, 0);
+}
+
+static ret_t skip_next_hops(__u64 *offset, int n)
+{
+ __u32 res;
+ switch (n) {
+ case 1:
+ *offset += sizeof(struct in_addr);
+ case 0:
+ return CONTINUE_PROCESSING;
+
+ default:
+ return TC_ACT_SHOT;
+ }
+}
+
+/* Get the next hop from the GLB header.
+ *
+ * Sets next_hop->s_addr to 0 if there are no more hops left.
+ * pkt is positioned just after the variable length GLB header
+ * iff the call is successful.
+ */
+static ret_t get_next_hop(struct bpf_dynptr *dynptr, __u64 *offset, encap_headers_t *encap,
+ struct in_addr *next_hop)
+{
+ if (encap->unigue.next_hop > encap->unigue.hop_count)
+ return TC_ACT_SHOT;
+
+ /* Skip "used" next hops. */
+ MAYBE_RETURN(skip_next_hops(offset, encap->unigue.next_hop));
+
+ if (encap->unigue.next_hop == encap->unigue.hop_count) {
+ /* No more next hops, we are at the end of the GLB header. */
+ next_hop->s_addr = 0;
+ return CONTINUE_PROCESSING;
+ }
+
+ if (bpf_dynptr_read(next_hop, sizeof(*next_hop), dynptr, *offset, 0))
+ return TC_ACT_SHOT;
+
+ *offset += sizeof(*next_hop);
+
+ /* Skip the remainig next hops (may be zero). */
+ return skip_next_hops(offset, encap->unigue.hop_count - encap->unigue.next_hop - 1);
+}
+
+/* Fill a bpf_sock_tuple to be used with the socket lookup functions.
+ * This is a kludge that let's us work around verifier limitations:
+ *
+ * fill_tuple(&t, foo, sizeof(struct iphdr), 123, 321)
+ *
+ * clang will substitue a costant for sizeof, which allows the verifier
+ * to track it's value. Based on this, it can figure out the constant
+ * return value, and calling code works while still being "generic" to
+ * IPv4 and IPv6.
+ */
+static uint64_t fill_tuple(struct bpf_sock_tuple *tuple, void *iph,
+ uint64_t iphlen, uint16_t sport, uint16_t dport)
+{
+ switch (iphlen) {
+ case sizeof(struct iphdr): {
+ struct iphdr *ipv4 = (struct iphdr *)iph;
+ tuple->ipv4.daddr = ipv4->daddr;
+ tuple->ipv4.saddr = ipv4->saddr;
+ tuple->ipv4.sport = sport;
+ tuple->ipv4.dport = dport;
+ return sizeof(tuple->ipv4);
+ }
+
+ case sizeof(struct ipv6hdr): {
+ struct ipv6hdr *ipv6 = (struct ipv6hdr *)iph;
+ memcpy(&tuple->ipv6.daddr, &ipv6->daddr,
+ sizeof(tuple->ipv6.daddr));
+ memcpy(&tuple->ipv6.saddr, &ipv6->saddr,
+ sizeof(tuple->ipv6.saddr));
+ tuple->ipv6.sport = sport;
+ tuple->ipv6.dport = dport;
+ return sizeof(tuple->ipv6);
+ }
+
+ default:
+ return 0;
+ }
+}
+
+static verdict_t classify_tcp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple,
+ uint64_t tuplen, void *iph, struct tcphdr *tcp)
+{
+ struct bpf_sock *sk =
+ bpf_skc_lookup_tcp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0);
+
+ if (sk == NULL)
+ return UNKNOWN;
+
+ if (sk->state != BPF_TCP_LISTEN) {
+ bpf_sk_release(sk);
+ return ESTABLISHED;
+ }
+
+ if (iph != NULL && tcp != NULL) {
+ /* Kludge: we've run out of arguments, but need the length of the ip header. */
+ uint64_t iphlen = sizeof(struct iphdr);
+
+ if (tuplen == sizeof(tuple->ipv6))
+ iphlen = sizeof(struct ipv6hdr);
+
+ if (bpf_tcp_check_syncookie(sk, iph, iphlen, tcp,
+ sizeof(*tcp)) == 0) {
+ bpf_sk_release(sk);
+ return SYN_COOKIE;
+ }
+ }
+
+ bpf_sk_release(sk);
+ return UNKNOWN;
+}
+
+static verdict_t classify_udp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, uint64_t tuplen)
+{
+ struct bpf_sock *sk =
+ bpf_sk_lookup_udp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0);
+
+ if (sk == NULL)
+ return UNKNOWN;
+
+ if (sk->state == BPF_TCP_ESTABLISHED) {
+ bpf_sk_release(sk);
+ return ESTABLISHED;
+ }
+
+ bpf_sk_release(sk);
+ return UNKNOWN;
+}
+
+static verdict_t classify_icmp(struct __sk_buff *skb, uint8_t proto, struct bpf_sock_tuple *tuple,
+ uint64_t tuplen, metrics_t *metrics)
+{
+ switch (proto) {
+ case IPPROTO_TCP:
+ return classify_tcp(skb, tuple, tuplen, NULL, NULL);
+
+ case IPPROTO_UDP:
+ return classify_udp(skb, tuple, tuplen);
+
+ default:
+ metrics->errors_total_malformed_icmp++;
+ return INVALID;
+ }
+}
+
+static verdict_t process_icmpv4(struct __sk_buff *skb, struct bpf_dynptr *dynptr, __u64 *offset,
+ metrics_t *metrics)
+{
+ struct icmphdr icmp;
+ struct iphdr ipv4;
+
+ if (bpf_dynptr_read(&icmp, sizeof(icmp), dynptr, *offset, 0)) {
+ metrics->errors_total_malformed_icmp++;
+ return INVALID;
+ }
+
+ *offset += sizeof(icmp);
+
+ /* We should never receive encapsulated echo replies. */
+ if (icmp.type == ICMP_ECHOREPLY) {
+ metrics->errors_total_icmp_echo_replies++;
+ return INVALID;
+ }
+
+ if (icmp.type == ICMP_ECHO)
+ return ECHO_REQUEST;
+
+ if (icmp.type != ICMP_DEST_UNREACH || icmp.code != ICMP_FRAG_NEEDED) {
+ metrics->errors_total_unwanted_icmp++;
+ return INVALID;
+ }
+
+ if (pkt_parse_ipv4(dynptr, offset, &ipv4)) {
+ metrics->errors_total_malformed_icmp_pkt_too_big++;
+ return INVALID;
+ }
+
+ /* The source address in the outer IP header is from the entity that
+ * originated the ICMP message. Use the original IP header to restore
+ * the correct flow tuple.
+ */
+ struct bpf_sock_tuple tuple;
+ tuple.ipv4.saddr = ipv4.daddr;
+ tuple.ipv4.daddr = ipv4.saddr;
+
+ if (!pkt_parse_icmp_l4_ports(dynptr, offset, (flow_ports_t *)&tuple.ipv4.sport)) {
+ metrics->errors_total_malformed_icmp_pkt_too_big++;
+ return INVALID;
+ }
+
+ return classify_icmp(skb, ipv4.protocol, &tuple,
+ sizeof(tuple.ipv4), metrics);
+}
+
+static verdict_t process_icmpv6(struct bpf_dynptr *dynptr, __u64 *offset, struct __sk_buff *skb,
+ metrics_t *metrics)
+{
+ struct bpf_sock_tuple tuple;
+ struct ipv6hdr ipv6;
+ struct icmp6hdr icmp6;
+ bool is_fragment;
+ uint8_t l4_proto;
+
+ if (bpf_dynptr_read(&icmp6, sizeof(icmp6), dynptr, *offset, 0)) {
+ metrics->errors_total_malformed_icmp++;
+ return INVALID;
+ }
+
+ /* We should never receive encapsulated echo replies. */
+ if (icmp6.icmp6_type == ICMPV6_ECHO_REPLY) {
+ metrics->errors_total_icmp_echo_replies++;
+ return INVALID;
+ }
+
+ if (icmp6.icmp6_type == ICMPV6_ECHO_REQUEST) {
+ return ECHO_REQUEST;
+ }
+
+ if (icmp6.icmp6_type != ICMPV6_PKT_TOOBIG) {
+ metrics->errors_total_unwanted_icmp++;
+ return INVALID;
+ }
+
+ if (pkt_parse_ipv6(dynptr, offset, &ipv6, &l4_proto, &is_fragment)) {
+ metrics->errors_total_malformed_icmp_pkt_too_big++;
+ return INVALID;
+ }
+
+ if (is_fragment) {
+ metrics->errors_total_fragmented_ip++;
+ return INVALID;
+ }
+
+ /* Swap source and dest addresses. */
+ memcpy(&tuple.ipv6.saddr, &ipv6.daddr, sizeof(tuple.ipv6.saddr));
+ memcpy(&tuple.ipv6.daddr, &ipv6.saddr, sizeof(tuple.ipv6.daddr));
+
+ if (!pkt_parse_icmp_l4_ports(dynptr, offset, (flow_ports_t *)&tuple.ipv6.sport)) {
+ metrics->errors_total_malformed_icmp_pkt_too_big++;
+ return INVALID;
+ }
+
+ return classify_icmp(skb, l4_proto, &tuple, sizeof(tuple.ipv6),
+ metrics);
+}
+
+static verdict_t process_tcp(struct bpf_dynptr *dynptr, __u64 *offset, struct __sk_buff *skb,
+ struct iphdr_info *info, metrics_t *metrics)
+{
+ struct bpf_sock_tuple tuple;
+ struct tcphdr tcp;
+ uint64_t tuplen;
+
+ metrics->l4_protocol_packets_total_tcp++;
+
+ if (bpf_dynptr_read(&tcp, sizeof(tcp), dynptr, *offset, 0)) {
+ metrics->errors_total_malformed_tcp++;
+ return INVALID;
+ }
+
+ *offset += sizeof(tcp);
+
+ if (tcp.syn)
+ return SYN;
+
+ tuplen = fill_tuple(&tuple, info->hdr, info->len, tcp.source, tcp.dest);
+ return classify_tcp(skb, &tuple, tuplen, info->hdr, &tcp);
+}
+
+static verdict_t process_udp(struct bpf_dynptr *dynptr, __u64 *offset, struct __sk_buff *skb,
+ struct iphdr_info *info, metrics_t *metrics)
+{
+ struct bpf_sock_tuple tuple;
+ struct udphdr udph;
+ uint64_t tuplen;
+
+ metrics->l4_protocol_packets_total_udp++;
+
+ if (bpf_dynptr_read(&udph, sizeof(udph), dynptr, *offset, 0)) {
+ metrics->errors_total_malformed_udp++;
+ return INVALID;
+ }
+ *offset += sizeof(udph);
+
+ tuplen = fill_tuple(&tuple, info->hdr, info->len, udph.source, udph.dest);
+ return classify_udp(skb, &tuple, tuplen);
+}
+
+static verdict_t process_ipv4(struct __sk_buff *skb, struct bpf_dynptr *dynptr,
+ __u64 *offset, metrics_t *metrics)
+{
+ struct iphdr ipv4;
+ struct iphdr_info info = {
+ .hdr = &ipv4,
+ .len = sizeof(ipv4),
+ };
+
+ metrics->l3_protocol_packets_total_ipv4++;
+
+ if (pkt_parse_ipv4(dynptr, offset, &ipv4)) {
+ metrics->errors_total_malformed_ip++;
+ return INVALID;
+ }
+
+ if (ipv4.version != 4) {
+ metrics->errors_total_malformed_ip++;
+ return INVALID;
+ }
+
+ if (ipv4_is_fragment(&ipv4)) {
+ metrics->errors_total_fragmented_ip++;
+ return INVALID;
+ }
+
+ switch (ipv4.protocol) {
+ case IPPROTO_ICMP:
+ return process_icmpv4(skb, dynptr, offset, metrics);
+
+ case IPPROTO_TCP:
+ return process_tcp(dynptr, offset, skb, &info, metrics);
+
+ case IPPROTO_UDP:
+ return process_udp(dynptr, offset, skb, &info, metrics);
+
+ default:
+ metrics->errors_total_unknown_l4_proto++;
+ return INVALID;
+ }
+}
+
+static verdict_t process_ipv6(struct __sk_buff *skb, struct bpf_dynptr *dynptr,
+ __u64 *offset, metrics_t *metrics)
+{
+ struct ipv6hdr ipv6;
+ struct iphdr_info info = {
+ .hdr = &ipv6,
+ .len = sizeof(ipv6),
+ };
+ uint8_t l4_proto;
+ bool is_fragment;
+
+ metrics->l3_protocol_packets_total_ipv6++;
+
+ if (pkt_parse_ipv6(dynptr, offset, &ipv6, &l4_proto, &is_fragment)) {
+ metrics->errors_total_malformed_ip++;
+ return INVALID;
+ }
+
+ if (ipv6.version != 6) {
+ metrics->errors_total_malformed_ip++;
+ return INVALID;
+ }
+
+ if (is_fragment) {
+ metrics->errors_total_fragmented_ip++;
+ return INVALID;
+ }
+
+ switch (l4_proto) {
+ case IPPROTO_ICMPV6:
+ return process_icmpv6(dynptr, offset, skb, metrics);
+
+ case IPPROTO_TCP:
+ return process_tcp(dynptr, offset, skb, &info, metrics);
+
+ case IPPROTO_UDP:
+ return process_udp(dynptr, offset, skb, &info, metrics);
+
+ default:
+ metrics->errors_total_unknown_l4_proto++;
+ return INVALID;
+ }
+}
+
+SEC("tc")
+int cls_redirect(struct __sk_buff *skb)
+{
+ __u8 encap_buffer[sizeof(encap_headers_t)] = {};
+ struct bpf_dynptr dynptr;
+ struct in_addr next_hop;
+ /* Tracks offset of the dynptr. This will be unnecessary once
+ * bpf_dynptr_advance() is available.
+ */
+ __u64 off = 0;
+ ret_t ret;
+
+ bpf_dynptr_from_skb(skb, 0, &dynptr);
+
+ metrics_t *metrics = get_global_metrics();
+ if (metrics == NULL)
+ return TC_ACT_SHOT;
+
+ metrics->processed_packets_total++;
+
+ /* Pass bogus packets as long as we're not sure they're
+ * destined for us.
+ */
+ if (skb->protocol != bpf_htons(ETH_P_IP))
+ return TC_ACT_OK;
+
+ encap_headers_t *encap;
+
+ /* Make sure that all encapsulation headers are available in
+ * the linear portion of the skb. This makes it easy to manipulate them.
+ */
+ if (bpf_skb_pull_data(skb, sizeof(*encap)))
+ return TC_ACT_OK;
+
+ encap = bpf_dynptr_slice_rdwr(&dynptr, 0, encap_buffer, sizeof(encap_buffer));
+ if (!encap)
+ return TC_ACT_OK;
+
+ off += sizeof(*encap);
+
+ if (encap->ip.ihl != 5)
+ /* We never have any options. */
+ return TC_ACT_OK;
+
+ if (encap->ip.daddr != ENCAPSULATION_IP ||
+ encap->ip.protocol != IPPROTO_UDP)
+ return TC_ACT_OK;
+
+ /* TODO Check UDP length? */
+ if (encap->udp.dest != ENCAPSULATION_PORT)
+ return TC_ACT_OK;
+
+ /* We now know that the packet is destined to us, we can
+ * drop bogus ones.
+ */
+ if (ipv4_is_fragment((void *)&encap->ip)) {
+ metrics->errors_total_fragmented_ip++;
+ return TC_ACT_SHOT;
+ }
+
+ if (encap->gue.variant != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ if (encap->gue.control != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ if (encap->gue.flags != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ if (encap->gue.hlen !=
+ sizeof(encap->unigue) / 4 + encap->unigue.hop_count) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ if (encap->unigue.version != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ if (encap->unigue.reserved != 0)
+ return TC_ACT_SHOT;
+
+ MAYBE_RETURN(get_next_hop(&dynptr, &off, encap, &next_hop));
+
+ if (next_hop.s_addr == 0) {
+ metrics->accepted_packets_total_last_hop++;
+ return accept_locally(skb, encap);
+ }
+
+ verdict_t verdict;
+ switch (encap->gue.proto_ctype) {
+ case IPPROTO_IPIP:
+ verdict = process_ipv4(skb, &dynptr, &off, metrics);
+ break;
+
+ case IPPROTO_IPV6:
+ verdict = process_ipv6(skb, &dynptr, &off, metrics);
+ break;
+
+ default:
+ metrics->errors_total_unknown_l3_proto++;
+ return TC_ACT_SHOT;
+ }
+
+ switch (verdict) {
+ case INVALID:
+ /* metrics have already been bumped */
+ return TC_ACT_SHOT;
+
+ case UNKNOWN:
+ return forward_to_next_hop(skb, &dynptr, encap, &next_hop, metrics);
+
+ case ECHO_REQUEST:
+ metrics->accepted_packets_total_icmp_echo_request++;
+ break;
+
+ case SYN:
+ if (encap->unigue.forward_syn) {
+ return forward_to_next_hop(skb, &dynptr, encap, &next_hop,
+ metrics);
+ }
+
+ metrics->accepted_packets_total_syn++;
+ break;
+
+ case SYN_COOKIE:
+ metrics->accepted_packets_total_syn_cookies++;
+ break;
+
+ case ESTABLISHED:
+ metrics->accepted_packets_total_established++;
+ break;
+ }
+
+ ret = accept_locally(skb, encap);
+
+ if (encap == encap_buffer)
+ bpf_dynptr_write(&dynptr, 0, encap_buffer, sizeof(encap_buffer), 0);
+
+ return ret;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_global_func10.c b/tools/testing/selftests/bpf/progs/test_global_func10.c
index 98327bdbbfd2..8fba3f3649e2 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func10.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func10.c
@@ -5,12 +5,12 @@
#include "bpf_misc.h"
struct Small {
- int x;
+ long x;
};
struct Big {
- int x;
- int y;
+ long x;
+ long y;
};
__noinline int foo(const struct Big *big)
@@ -22,7 +22,7 @@ __noinline int foo(const struct Big *big)
}
SEC("cgroup_skb/ingress")
-__failure __msg("invalid indirect read from stack")
+__failure __msg("invalid indirect access to stack")
int global_func10(struct __sk_buff *skb)
{
const struct Small small = {.x = skb->len };
diff --git a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
index 2fbef3cc7ad8..2dde8e3fe4c9 100644
--- a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
+++ b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
@@ -48,7 +48,7 @@ SEC("?lsm.s/bpf")
__failure __msg("arg#0 expected pointer to stack or dynptr_ptr")
int BPF_PROG(not_ptr_to_stack, int cmd, union bpf_attr *attr, unsigned int size)
{
- unsigned long val;
+ unsigned long val = 0;
return bpf_verify_pkcs7_signature((struct bpf_dynptr *)val,
(struct bpf_dynptr *)val, NULL);
diff --git a/tools/testing/selftests/bpf/progs/test_l4lb_noinline_dynptr.c b/tools/testing/selftests/bpf/progs/test_l4lb_noinline_dynptr.c
new file mode 100644
index 000000000000..f997f5080748
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_l4lb_noinline_dynptr.c
@@ -0,0 +1,487 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Facebook
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/pkt_cls.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <bpf/bpf_helpers.h>
+#include "test_iptunnel_common.h"
+#include <bpf/bpf_endian.h>
+
+#include "bpf_kfuncs.h"
+
+static __always_inline __u32 rol32(__u32 word, unsigned int shift)
+{
+ return (word << shift) | (word >> ((-shift) & 31));
+}
+
+/* copy paste of jhash from kernel sources to make sure llvm
+ * can compile it into valid sequence of bpf instructions
+ */
+#define __jhash_mix(a, b, c) \
+{ \
+ a -= c; a ^= rol32(c, 4); c += b; \
+ b -= a; b ^= rol32(a, 6); a += c; \
+ c -= b; c ^= rol32(b, 8); b += a; \
+ a -= c; a ^= rol32(c, 16); c += b; \
+ b -= a; b ^= rol32(a, 19); a += c; \
+ c -= b; c ^= rol32(b, 4); b += a; \
+}
+
+#define __jhash_final(a, b, c) \
+{ \
+ c ^= b; c -= rol32(b, 14); \
+ a ^= c; a -= rol32(c, 11); \
+ b ^= a; b -= rol32(a, 25); \
+ c ^= b; c -= rol32(b, 16); \
+ a ^= c; a -= rol32(c, 4); \
+ b ^= a; b -= rol32(a, 14); \
+ c ^= b; c -= rol32(b, 24); \
+}
+
+#define JHASH_INITVAL 0xdeadbeef
+
+typedef unsigned int u32;
+
+static __noinline u32 jhash(const void *key, u32 length, u32 initval)
+{
+ u32 a, b, c;
+ const unsigned char *k = key;
+
+ a = b = c = JHASH_INITVAL + length + initval;
+
+ while (length > 12) {
+ a += *(u32 *)(k);
+ b += *(u32 *)(k + 4);
+ c += *(u32 *)(k + 8);
+ __jhash_mix(a, b, c);
+ length -= 12;
+ k += 12;
+ }
+ switch (length) {
+ case 12: c += (u32)k[11]<<24;
+ case 11: c += (u32)k[10]<<16;
+ case 10: c += (u32)k[9]<<8;
+ case 9: c += k[8];
+ case 8: b += (u32)k[7]<<24;
+ case 7: b += (u32)k[6]<<16;
+ case 6: b += (u32)k[5]<<8;
+ case 5: b += k[4];
+ case 4: a += (u32)k[3]<<24;
+ case 3: a += (u32)k[2]<<16;
+ case 2: a += (u32)k[1]<<8;
+ case 1: a += k[0];
+ __jhash_final(a, b, c);
+ case 0: /* Nothing left to add */
+ break;
+ }
+
+ return c;
+}
+
+static __noinline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
+{
+ a += initval;
+ b += initval;
+ c += initval;
+ __jhash_final(a, b, c);
+ return c;
+}
+
+static __noinline u32 jhash_2words(u32 a, u32 b, u32 initval)
+{
+ return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
+}
+
+#define PCKT_FRAGMENTED 65343
+#define IPV4_HDR_LEN_NO_OPT 20
+#define IPV4_PLUS_ICMP_HDR 28
+#define IPV6_PLUS_ICMP_HDR 48
+#define RING_SIZE 2
+#define MAX_VIPS 12
+#define MAX_REALS 5
+#define CTL_MAP_SIZE 16
+#define CH_RINGS_SIZE (MAX_VIPS * RING_SIZE)
+#define F_IPV6 (1 << 0)
+#define F_HASH_NO_SRC_PORT (1 << 0)
+#define F_ICMP (1 << 0)
+#define F_SYN_SET (1 << 1)
+
+struct packet_description {
+ union {
+ __be32 src;
+ __be32 srcv6[4];
+ };
+ union {
+ __be32 dst;
+ __be32 dstv6[4];
+ };
+ union {
+ __u32 ports;
+ __u16 port16[2];
+ };
+ __u8 proto;
+ __u8 flags;
+};
+
+struct ctl_value {
+ union {
+ __u64 value;
+ __u32 ifindex;
+ __u8 mac[6];
+ };
+};
+
+struct vip_meta {
+ __u32 flags;
+ __u32 vip_num;
+};
+
+struct real_definition {
+ union {
+ __be32 dst;
+ __be32 dstv6[4];
+ };
+ __u8 flags;
+};
+
+struct vip_stats {
+ __u64 bytes;
+ __u64 pkts;
+};
+
+struct eth_hdr {
+ unsigned char eth_dest[ETH_ALEN];
+ unsigned char eth_source[ETH_ALEN];
+ unsigned short eth_proto;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, MAX_VIPS);
+ __type(key, struct vip);
+ __type(value, struct vip_meta);
+} vip_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, CH_RINGS_SIZE);
+ __type(key, __u32);
+ __type(value, __u32);
+} ch_rings SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, MAX_REALS);
+ __type(key, __u32);
+ __type(value, struct real_definition);
+} reals SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, MAX_VIPS);
+ __type(key, __u32);
+ __type(value, struct vip_stats);
+} stats SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, CTL_MAP_SIZE);
+ __type(key, __u32);
+ __type(value, struct ctl_value);
+} ctl_array SEC(".maps");
+
+static __noinline __u32 get_packet_hash(struct packet_description *pckt, bool ipv6)
+{
+ if (ipv6)
+ return jhash_2words(jhash(pckt->srcv6, 16, MAX_VIPS),
+ pckt->ports, CH_RINGS_SIZE);
+ else
+ return jhash_2words(pckt->src, pckt->ports, CH_RINGS_SIZE);
+}
+
+static __noinline bool get_packet_dst(struct real_definition **real,
+ struct packet_description *pckt,
+ struct vip_meta *vip_info,
+ bool is_ipv6)
+{
+ __u32 hash = get_packet_hash(pckt, is_ipv6);
+ __u32 key = RING_SIZE * vip_info->vip_num + hash % RING_SIZE;
+ __u32 *real_pos;
+
+ if (hash != 0x358459b7 /* jhash of ipv4 packet */ &&
+ hash != 0x2f4bc6bb /* jhash of ipv6 packet */)
+ return false;
+
+ real_pos = bpf_map_lookup_elem(&ch_rings, &key);
+ if (!real_pos)
+ return false;
+ key = *real_pos;
+ *real = bpf_map_lookup_elem(&reals, &key);
+ if (!(*real))
+ return false;
+ return true;
+}
+
+static __noinline int parse_icmpv6(struct bpf_dynptr *skb_ptr, __u64 off,
+ struct packet_description *pckt)
+{
+ __u8 buffer[sizeof(struct ipv6hdr)] = {};
+ struct icmp6hdr *icmp_hdr;
+ struct ipv6hdr *ip6h;
+
+ icmp_hdr = bpf_dynptr_slice(skb_ptr, off, buffer, sizeof(buffer));
+ if (!icmp_hdr)
+ return TC_ACT_SHOT;
+
+ if (icmp_hdr->icmp6_type != ICMPV6_PKT_TOOBIG)
+ return TC_ACT_OK;
+ off += sizeof(struct icmp6hdr);
+ ip6h = bpf_dynptr_slice(skb_ptr, off, buffer, sizeof(buffer));
+ if (!ip6h)
+ return TC_ACT_SHOT;
+ pckt->proto = ip6h->nexthdr;
+ pckt->flags |= F_ICMP;
+ memcpy(pckt->srcv6, ip6h->daddr.s6_addr32, 16);
+ memcpy(pckt->dstv6, ip6h->saddr.s6_addr32, 16);
+ return TC_ACT_UNSPEC;
+}
+
+static __noinline int parse_icmp(struct bpf_dynptr *skb_ptr, __u64 off,
+ struct packet_description *pckt)
+{
+ __u8 buffer_icmp[sizeof(struct iphdr)] = {};
+ __u8 buffer_ip[sizeof(struct iphdr)] = {};
+ struct icmphdr *icmp_hdr;
+ struct iphdr *iph;
+
+ icmp_hdr = bpf_dynptr_slice(skb_ptr, off, buffer_icmp, sizeof(buffer_icmp));
+ if (!icmp_hdr)
+ return TC_ACT_SHOT;
+ if (icmp_hdr->type != ICMP_DEST_UNREACH ||
+ icmp_hdr->code != ICMP_FRAG_NEEDED)
+ return TC_ACT_OK;
+ off += sizeof(struct icmphdr);
+ iph = bpf_dynptr_slice(skb_ptr, off, buffer_ip, sizeof(buffer_ip));
+ if (!iph || iph->ihl != 5)
+ return TC_ACT_SHOT;
+ pckt->proto = iph->protocol;
+ pckt->flags |= F_ICMP;
+ pckt->src = iph->daddr;
+ pckt->dst = iph->saddr;
+ return TC_ACT_UNSPEC;
+}
+
+static __noinline bool parse_udp(struct bpf_dynptr *skb_ptr, __u64 off,
+ struct packet_description *pckt)
+{
+ __u8 buffer[sizeof(struct udphdr)] = {};
+ struct udphdr *udp;
+
+ udp = bpf_dynptr_slice(skb_ptr, off, buffer, sizeof(buffer));
+ if (!udp)
+ return false;
+
+ if (!(pckt->flags & F_ICMP)) {
+ pckt->port16[0] = udp->source;
+ pckt->port16[1] = udp->dest;
+ } else {
+ pckt->port16[0] = udp->dest;
+ pckt->port16[1] = udp->source;
+ }
+ return true;
+}
+
+static __noinline bool parse_tcp(struct bpf_dynptr *skb_ptr, __u64 off,
+ struct packet_description *pckt)
+{
+ __u8 buffer[sizeof(struct tcphdr)] = {};
+ struct tcphdr *tcp;
+
+ tcp = bpf_dynptr_slice(skb_ptr, off, buffer, sizeof(buffer));
+ if (!tcp)
+ return false;
+
+ if (tcp->syn)
+ pckt->flags |= F_SYN_SET;
+
+ if (!(pckt->flags & F_ICMP)) {
+ pckt->port16[0] = tcp->source;
+ pckt->port16[1] = tcp->dest;
+ } else {
+ pckt->port16[0] = tcp->dest;
+ pckt->port16[1] = tcp->source;
+ }
+ return true;
+}
+
+static __noinline int process_packet(struct bpf_dynptr *skb_ptr,
+ struct eth_hdr *eth, __u64 off,
+ bool is_ipv6, struct __sk_buff *skb)
+{
+ struct packet_description pckt = {};
+ struct bpf_tunnel_key tkey = {};
+ struct vip_stats *data_stats;
+ struct real_definition *dst;
+ struct vip_meta *vip_info;
+ struct ctl_value *cval;
+ __u32 v4_intf_pos = 1;
+ __u32 v6_intf_pos = 2;
+ struct ipv6hdr *ip6h;
+ struct vip vip = {};
+ struct iphdr *iph;
+ int tun_flag = 0;
+ __u16 pkt_bytes;
+ __u64 iph_len;
+ __u32 ifindex;
+ __u8 protocol;
+ __u32 vip_num;
+ int action;
+
+ tkey.tunnel_ttl = 64;
+ if (is_ipv6) {
+ __u8 buffer[sizeof(struct ipv6hdr)] = {};
+
+ ip6h = bpf_dynptr_slice(skb_ptr, off, buffer, sizeof(buffer));
+ if (!ip6h)
+ return TC_ACT_SHOT;
+
+ iph_len = sizeof(struct ipv6hdr);
+ protocol = ip6h->nexthdr;
+ pckt.proto = protocol;
+ pkt_bytes = bpf_ntohs(ip6h->payload_len);
+ off += iph_len;
+ if (protocol == IPPROTO_FRAGMENT) {
+ return TC_ACT_SHOT;
+ } else if (protocol == IPPROTO_ICMPV6) {
+ action = parse_icmpv6(skb_ptr, off, &pckt);
+ if (action >= 0)
+ return action;
+ off += IPV6_PLUS_ICMP_HDR;
+ } else {
+ memcpy(pckt.srcv6, ip6h->saddr.s6_addr32, 16);
+ memcpy(pckt.dstv6, ip6h->daddr.s6_addr32, 16);
+ }
+ } else {
+ __u8 buffer[sizeof(struct iphdr)] = {};
+
+ iph = bpf_dynptr_slice(skb_ptr, off, buffer, sizeof(buffer));
+ if (!iph || iph->ihl != 5)
+ return TC_ACT_SHOT;
+
+ protocol = iph->protocol;
+ pckt.proto = protocol;
+ pkt_bytes = bpf_ntohs(iph->tot_len);
+ off += IPV4_HDR_LEN_NO_OPT;
+
+ if (iph->frag_off & PCKT_FRAGMENTED)
+ return TC_ACT_SHOT;
+ if (protocol == IPPROTO_ICMP) {
+ action = parse_icmp(skb_ptr, off, &pckt);
+ if (action >= 0)
+ return action;
+ off += IPV4_PLUS_ICMP_HDR;
+ } else {
+ pckt.src = iph->saddr;
+ pckt.dst = iph->daddr;
+ }
+ }
+ protocol = pckt.proto;
+
+ if (protocol == IPPROTO_TCP) {
+ if (!parse_tcp(skb_ptr, off, &pckt))
+ return TC_ACT_SHOT;
+ } else if (protocol == IPPROTO_UDP) {
+ if (!parse_udp(skb_ptr, off, &pckt))
+ return TC_ACT_SHOT;
+ } else {
+ return TC_ACT_SHOT;
+ }
+
+ if (is_ipv6)
+ memcpy(vip.daddr.v6, pckt.dstv6, 16);
+ else
+ vip.daddr.v4 = pckt.dst;
+
+ vip.dport = pckt.port16[1];
+ vip.protocol = pckt.proto;
+ vip_info = bpf_map_lookup_elem(&vip_map, &vip);
+ if (!vip_info) {
+ vip.dport = 0;
+ vip_info = bpf_map_lookup_elem(&vip_map, &vip);
+ if (!vip_info)
+ return TC_ACT_SHOT;
+ pckt.port16[1] = 0;
+ }
+
+ if (vip_info->flags & F_HASH_NO_SRC_PORT)
+ pckt.port16[0] = 0;
+
+ if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6))
+ return TC_ACT_SHOT;
+
+ if (dst->flags & F_IPV6) {
+ cval = bpf_map_lookup_elem(&ctl_array, &v6_intf_pos);
+ if (!cval)
+ return TC_ACT_SHOT;
+ ifindex = cval->ifindex;
+ memcpy(tkey.remote_ipv6, dst->dstv6, 16);
+ tun_flag = BPF_F_TUNINFO_IPV6;
+ } else {
+ cval = bpf_map_lookup_elem(&ctl_array, &v4_intf_pos);
+ if (!cval)
+ return TC_ACT_SHOT;
+ ifindex = cval->ifindex;
+ tkey.remote_ipv4 = dst->dst;
+ }
+ vip_num = vip_info->vip_num;
+ data_stats = bpf_map_lookup_elem(&stats, &vip_num);
+ if (!data_stats)
+ return TC_ACT_SHOT;
+ data_stats->pkts++;
+ data_stats->bytes += pkt_bytes;
+ bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), tun_flag);
+ *(u32 *)eth->eth_dest = tkey.remote_ipv4;
+ return bpf_redirect(ifindex, 0);
+}
+
+SEC("tc")
+int balancer_ingress(struct __sk_buff *ctx)
+{
+ __u8 buffer[sizeof(struct eth_hdr)] = {};
+ struct bpf_dynptr ptr;
+ struct eth_hdr *eth;
+ __u32 eth_proto;
+ __u32 nh_off;
+ int err;
+
+ nh_off = sizeof(struct eth_hdr);
+
+ bpf_dynptr_from_skb(ctx, 0, &ptr);
+ eth = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer));
+ if (!eth)
+ return TC_ACT_SHOT;
+ eth_proto = eth->eth_proto;
+ if (eth_proto == bpf_htons(ETH_P_IP))
+ err = process_packet(&ptr, eth, nh_off, false, ctx);
+ else if (eth_proto == bpf_htons(ETH_P_IPV6))
+ err = process_packet(&ptr, eth, nh_off, true, ctx);
+ else
+ return TC_ACT_SHOT;
+
+ if (eth == buffer)
+ bpf_dynptr_write(&ptr, 0, buffer, sizeof(buffer), 0);
+
+ return err;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt.c b/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt.c
new file mode 100644
index 000000000000..79bab9b50e9e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* This parsing logic is taken from the open source library katran, a layer 4
+ * load balancer.
+ *
+ * This code logic using dynptrs can be found in test_parse_tcp_hdr_opt_dynptr.c
+ *
+ * https://github.com/facebookincubator/katran/blob/main/katran/lib/bpf/pckt_parsing.h
+ */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <linux/tcp.h>
+#include <stdbool.h>
+#include <linux/ipv6.h>
+#include <linux/if_ether.h>
+#include "test_tcp_hdr_options.h"
+
+char _license[] SEC("license") = "GPL";
+
+/* Kind number used for experiments */
+const __u32 tcp_hdr_opt_kind_tpr = 0xFD;
+/* Length of the tcp header option */
+const __u32 tcp_hdr_opt_len_tpr = 6;
+/* maximum number of header options to check to lookup server_id */
+const __u32 tcp_hdr_opt_max_opt_checks = 15;
+
+__u32 server_id;
+
+struct hdr_opt_state {
+ __u32 server_id;
+ __u8 byte_offset;
+ __u8 hdr_bytes_remaining;
+};
+
+static int parse_hdr_opt(const struct xdp_md *xdp, struct hdr_opt_state *state)
+{
+ const void *data = (void *)(long)xdp->data;
+ const void *data_end = (void *)(long)xdp->data_end;
+ __u8 *tcp_opt, kind, hdr_len;
+
+ tcp_opt = (__u8 *)(data + state->byte_offset);
+ if (tcp_opt + 1 > data_end)
+ return -1;
+
+ kind = tcp_opt[0];
+
+ if (kind == TCPOPT_EOL)
+ return -1;
+
+ if (kind == TCPOPT_NOP) {
+ state->hdr_bytes_remaining--;
+ state->byte_offset++;
+ return 0;
+ }
+
+ if (state->hdr_bytes_remaining < 2 ||
+ tcp_opt + sizeof(__u8) + sizeof(__u8) > data_end)
+ return -1;
+
+ hdr_len = tcp_opt[1];
+ if (hdr_len > state->hdr_bytes_remaining)
+ return -1;
+
+ if (kind == tcp_hdr_opt_kind_tpr) {
+ if (hdr_len != tcp_hdr_opt_len_tpr)
+ return -1;
+
+ if (tcp_opt + tcp_hdr_opt_len_tpr > data_end)
+ return -1;
+
+ state->server_id = *(__u32 *)&tcp_opt[2];
+ return 1;
+ }
+
+ state->hdr_bytes_remaining -= hdr_len;
+ state->byte_offset += hdr_len;
+ return 0;
+}
+
+SEC("xdp")
+int xdp_ingress_v6(struct xdp_md *xdp)
+{
+ const void *data = (void *)(long)xdp->data;
+ const void *data_end = (void *)(long)xdp->data_end;
+ struct hdr_opt_state opt_state = {};
+ __u8 tcp_hdr_opt_len = 0;
+ struct tcphdr *tcp_hdr;
+ __u64 tcp_offset = 0;
+ __u32 off;
+ int err;
+
+ tcp_offset = sizeof(struct ethhdr) + sizeof(struct ipv6hdr);
+ tcp_hdr = (struct tcphdr *)(data + tcp_offset);
+ if (tcp_hdr + 1 > data_end)
+ return XDP_DROP;
+
+ tcp_hdr_opt_len = (tcp_hdr->doff * 4) - sizeof(struct tcphdr);
+ if (tcp_hdr_opt_len < tcp_hdr_opt_len_tpr)
+ return XDP_DROP;
+
+ opt_state.hdr_bytes_remaining = tcp_hdr_opt_len;
+ opt_state.byte_offset = sizeof(struct tcphdr) + tcp_offset;
+
+ /* max number of bytes of options in tcp header is 40 bytes */
+ for (int i = 0; i < tcp_hdr_opt_max_opt_checks; i++) {
+ err = parse_hdr_opt(xdp, &opt_state);
+
+ if (err || !opt_state.hdr_bytes_remaining)
+ break;
+ }
+
+ if (!opt_state.server_id)
+ return XDP_DROP;
+
+ server_id = opt_state.server_id;
+
+ return XDP_PASS;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt_dynptr.c b/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt_dynptr.c
new file mode 100644
index 000000000000..d3b319722e30
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt_dynptr.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* This logic is lifted from a real-world use case of packet parsing, used in
+ * the open source library katran, a layer 4 load balancer.
+ *
+ * This test demonstrates how to parse packet contents using dynptrs. The
+ * original code (parsing without dynptrs) can be found in test_parse_tcp_hdr_opt.c
+ */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <linux/tcp.h>
+#include <stdbool.h>
+#include <linux/ipv6.h>
+#include <linux/if_ether.h>
+#include "test_tcp_hdr_options.h"
+#include "bpf_kfuncs.h"
+
+char _license[] SEC("license") = "GPL";
+
+/* Kind number used for experiments */
+const __u32 tcp_hdr_opt_kind_tpr = 0xFD;
+/* Length of the tcp header option */
+const __u32 tcp_hdr_opt_len_tpr = 6;
+/* maximum number of header options to check to lookup server_id */
+const __u32 tcp_hdr_opt_max_opt_checks = 15;
+
+__u32 server_id;
+
+static int parse_hdr_opt(struct bpf_dynptr *ptr, __u32 *off, __u8 *hdr_bytes_remaining,
+ __u32 *server_id)
+{
+ __u8 *tcp_opt, kind, hdr_len;
+ __u8 buffer[sizeof(kind) + sizeof(hdr_len) + sizeof(*server_id)];
+ __u8 *data;
+
+ __builtin_memset(buffer, 0, sizeof(buffer));
+
+ data = bpf_dynptr_slice(ptr, *off, buffer, sizeof(buffer));
+ if (!data)
+ return -1;
+
+ kind = data[0];
+
+ if (kind == TCPOPT_EOL)
+ return -1;
+
+ if (kind == TCPOPT_NOP) {
+ *off += 1;
+ *hdr_bytes_remaining -= 1;
+ return 0;
+ }
+
+ if (*hdr_bytes_remaining < 2)
+ return -1;
+
+ hdr_len = data[1];
+ if (hdr_len > *hdr_bytes_remaining)
+ return -1;
+
+ if (kind == tcp_hdr_opt_kind_tpr) {
+ if (hdr_len != tcp_hdr_opt_len_tpr)
+ return -1;
+
+ __builtin_memcpy(server_id, (__u32 *)(data + 2), sizeof(*server_id));
+ return 1;
+ }
+
+ *off += hdr_len;
+ *hdr_bytes_remaining -= hdr_len;
+ return 0;
+}
+
+SEC("xdp")
+int xdp_ingress_v6(struct xdp_md *xdp)
+{
+ __u8 buffer[sizeof(struct tcphdr)] = {};
+ __u8 hdr_bytes_remaining;
+ struct tcphdr *tcp_hdr;
+ __u8 tcp_hdr_opt_len;
+ int err = 0;
+ __u32 off;
+
+ struct bpf_dynptr ptr;
+
+ bpf_dynptr_from_xdp(xdp, 0, &ptr);
+
+ off = sizeof(struct ethhdr) + sizeof(struct ipv6hdr);
+
+ tcp_hdr = bpf_dynptr_slice(&ptr, off, buffer, sizeof(buffer));
+ if (!tcp_hdr)
+ return XDP_DROP;
+
+ tcp_hdr_opt_len = (tcp_hdr->doff * 4) - sizeof(struct tcphdr);
+ if (tcp_hdr_opt_len < tcp_hdr_opt_len_tpr)
+ return XDP_DROP;
+
+ hdr_bytes_remaining = tcp_hdr_opt_len;
+
+ off += sizeof(struct tcphdr);
+
+ /* max number of bytes of options in tcp header is 40 bytes */
+ for (int i = 0; i < tcp_hdr_opt_max_opt_checks; i++) {
+ err = parse_hdr_opt(&ptr, &off, &hdr_bytes_remaining, &server_id);
+
+ if (err || !hdr_bytes_remaining)
+ break;
+ }
+
+ if (!server_id)
+ return XDP_DROP;
+
+ return XDP_PASS;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
index b502e5c92e33..6ccf6d546074 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
@@ -23,8 +23,8 @@ static struct bpf_sock_tuple *get_tuple(void *data, __u64 nh_off,
bool *ipv4)
{
struct bpf_sock_tuple *result;
+ __u64 ihl_len = 0;
__u8 proto = 0;
- __u64 ihl_len;
if (eth_proto == bpf_htons(ETH_P_IP)) {
struct iphdr *iph = (struct iphdr *)(data + nh_off);
diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
index 508da4a23c4f..95b4aa0928ba 100644
--- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
@@ -324,11 +324,11 @@ int ip4ip6erspan_get_tunnel(struct __sk_buff *skb)
SEC("tc")
int vxlan_set_tunnel_dst(struct __sk_buff *skb)
{
- int ret;
struct bpf_tunnel_key key;
struct vxlan_metadata md;
__u32 index = 0;
__u32 *local_ip = NULL;
+ int ret = 0;
local_ip = bpf_map_lookup_elem(&local_ip_map, &index);
if (!local_ip) {
@@ -363,11 +363,11 @@ int vxlan_set_tunnel_dst(struct __sk_buff *skb)
SEC("tc")
int vxlan_set_tunnel_src(struct __sk_buff *skb)
{
- int ret;
struct bpf_tunnel_key key;
struct vxlan_metadata md;
__u32 index = 0;
__u32 *local_ip = NULL;
+ int ret = 0;
local_ip = bpf_map_lookup_elem(&local_ip_map, &index);
if (!local_ip) {
@@ -494,9 +494,9 @@ SEC("tc")
int ip6vxlan_set_tunnel_dst(struct __sk_buff *skb)
{
struct bpf_tunnel_key key;
- int ret;
__u32 index = 0;
__u32 *local_ip;
+ int ret = 0;
local_ip = bpf_map_lookup_elem(&local_ip_map, &index);
if (!local_ip) {
@@ -525,9 +525,9 @@ SEC("tc")
int ip6vxlan_set_tunnel_src(struct __sk_buff *skb)
{
struct bpf_tunnel_key key;
- int ret;
__u32 index = 0;
__u32 *local_ip;
+ int ret = 0;
local_ip = bpf_map_lookup_elem(&local_ip_map, &index);
if (!local_ip) {
@@ -556,9 +556,9 @@ SEC("tc")
int ip6vxlan_get_tunnel_src(struct __sk_buff *skb)
{
struct bpf_tunnel_key key;
- int ret;
__u32 index = 0;
__u32 *local_ip;
+ int ret = 0;
local_ip = bpf_map_lookup_elem(&local_ip_map, &index);
if (!local_ip) {
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c b/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c
new file mode 100644
index 000000000000..7521a805b506
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c
@@ -0,0 +1,257 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta */
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/in.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+#include <linux/pkt_cls.h>
+#include <sys/socket.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include "test_iptunnel_common.h"
+#include "bpf_kfuncs.h"
+
+const size_t tcphdr_sz = sizeof(struct tcphdr);
+const size_t udphdr_sz = sizeof(struct udphdr);
+const size_t ethhdr_sz = sizeof(struct ethhdr);
+const size_t iphdr_sz = sizeof(struct iphdr);
+const size_t ipv6hdr_sz = sizeof(struct ipv6hdr);
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, 256);
+ __type(key, __u32);
+ __type(value, __u64);
+} rxcnt SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, MAX_IPTNL_ENTRIES);
+ __type(key, struct vip);
+ __type(value, struct iptnl_info);
+} vip2tnl SEC(".maps");
+
+static __always_inline void count_tx(__u32 protocol)
+{
+ __u64 *rxcnt_count;
+
+ rxcnt_count = bpf_map_lookup_elem(&rxcnt, &protocol);
+ if (rxcnt_count)
+ *rxcnt_count += 1;
+}
+
+static __always_inline int get_dport(void *trans_data, __u8 protocol)
+{
+ struct tcphdr *th;
+ struct udphdr *uh;
+
+ switch (protocol) {
+ case IPPROTO_TCP:
+ th = (struct tcphdr *)trans_data;
+ return th->dest;
+ case IPPROTO_UDP:
+ uh = (struct udphdr *)trans_data;
+ return uh->dest;
+ default:
+ return 0;
+ }
+}
+
+static __always_inline void set_ethhdr(struct ethhdr *new_eth,
+ const struct ethhdr *old_eth,
+ const struct iptnl_info *tnl,
+ __be16 h_proto)
+{
+ memcpy(new_eth->h_source, old_eth->h_dest, sizeof(new_eth->h_source));
+ memcpy(new_eth->h_dest, tnl->dmac, sizeof(new_eth->h_dest));
+ new_eth->h_proto = h_proto;
+}
+
+static __always_inline int handle_ipv4(struct xdp_md *xdp, struct bpf_dynptr *xdp_ptr)
+{
+ __u8 eth_buffer[ethhdr_sz + iphdr_sz + ethhdr_sz];
+ __u8 iph_buffer_tcp[iphdr_sz + tcphdr_sz];
+ __u8 iph_buffer_udp[iphdr_sz + udphdr_sz];
+ struct bpf_dynptr new_xdp_ptr;
+ struct iptnl_info *tnl;
+ struct ethhdr *new_eth;
+ struct ethhdr *old_eth;
+ __u32 transport_hdr_sz;
+ struct iphdr *iph;
+ __u16 *next_iph;
+ __u16 payload_len;
+ struct vip vip = {};
+ int dport;
+ __u32 csum = 0;
+ int i;
+
+ __builtin_memset(eth_buffer, 0, sizeof(eth_buffer));
+ __builtin_memset(iph_buffer_tcp, 0, sizeof(iph_buffer_tcp));
+ __builtin_memset(iph_buffer_udp, 0, sizeof(iph_buffer_udp));
+
+ if (ethhdr_sz + iphdr_sz + tcphdr_sz > xdp->data_end - xdp->data)
+ iph = bpf_dynptr_slice(xdp_ptr, ethhdr_sz, iph_buffer_udp, sizeof(iph_buffer_udp));
+ else
+ iph = bpf_dynptr_slice(xdp_ptr, ethhdr_sz, iph_buffer_tcp, sizeof(iph_buffer_tcp));
+
+ if (!iph)
+ return XDP_DROP;
+
+ dport = get_dport(iph + 1, iph->protocol);
+ if (dport == -1)
+ return XDP_DROP;
+
+ vip.protocol = iph->protocol;
+ vip.family = AF_INET;
+ vip.daddr.v4 = iph->daddr;
+ vip.dport = dport;
+ payload_len = bpf_ntohs(iph->tot_len);
+
+ tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
+ /* It only does v4-in-v4 */
+ if (!tnl || tnl->family != AF_INET)
+ return XDP_PASS;
+
+ if (bpf_xdp_adjust_head(xdp, 0 - (int)iphdr_sz))
+ return XDP_DROP;
+
+ bpf_dynptr_from_xdp(xdp, 0, &new_xdp_ptr);
+ new_eth = bpf_dynptr_slice_rdwr(&new_xdp_ptr, 0, eth_buffer, sizeof(eth_buffer));
+ if (!new_eth)
+ return XDP_DROP;
+
+ iph = (struct iphdr *)(new_eth + 1);
+ old_eth = (struct ethhdr *)(iph + 1);
+
+ set_ethhdr(new_eth, old_eth, tnl, bpf_htons(ETH_P_IP));
+
+ if (new_eth == eth_buffer)
+ bpf_dynptr_write(&new_xdp_ptr, 0, eth_buffer, sizeof(eth_buffer), 0);
+
+ iph->version = 4;
+ iph->ihl = iphdr_sz >> 2;
+ iph->frag_off = 0;
+ iph->protocol = IPPROTO_IPIP;
+ iph->check = 0;
+ iph->tos = 0;
+ iph->tot_len = bpf_htons(payload_len + iphdr_sz);
+ iph->daddr = tnl->daddr.v4;
+ iph->saddr = tnl->saddr.v4;
+ iph->ttl = 8;
+
+ next_iph = (__u16 *)iph;
+ for (i = 0; i < iphdr_sz >> 1; i++)
+ csum += *next_iph++;
+
+ iph->check = ~((csum & 0xffff) + (csum >> 16));
+
+ count_tx(vip.protocol);
+
+ return XDP_TX;
+}
+
+static __always_inline int handle_ipv6(struct xdp_md *xdp, struct bpf_dynptr *xdp_ptr)
+{
+ __u8 eth_buffer[ethhdr_sz + ipv6hdr_sz + ethhdr_sz];
+ __u8 ip6h_buffer_tcp[ipv6hdr_sz + tcphdr_sz];
+ __u8 ip6h_buffer_udp[ipv6hdr_sz + udphdr_sz];
+ struct bpf_dynptr new_xdp_ptr;
+ struct iptnl_info *tnl;
+ struct ethhdr *new_eth;
+ struct ethhdr *old_eth;
+ __u32 transport_hdr_sz;
+ struct ipv6hdr *ip6h;
+ __u16 payload_len;
+ struct vip vip = {};
+ int dport;
+
+ __builtin_memset(eth_buffer, 0, sizeof(eth_buffer));
+ __builtin_memset(ip6h_buffer_tcp, 0, sizeof(ip6h_buffer_tcp));
+ __builtin_memset(ip6h_buffer_udp, 0, sizeof(ip6h_buffer_udp));
+
+ if (ethhdr_sz + iphdr_sz + tcphdr_sz > xdp->data_end - xdp->data)
+ ip6h = bpf_dynptr_slice(xdp_ptr, ethhdr_sz, ip6h_buffer_udp, sizeof(ip6h_buffer_udp));
+ else
+ ip6h = bpf_dynptr_slice(xdp_ptr, ethhdr_sz, ip6h_buffer_tcp, sizeof(ip6h_buffer_tcp));
+
+ if (!ip6h)
+ return XDP_DROP;
+
+ dport = get_dport(ip6h + 1, ip6h->nexthdr);
+ if (dport == -1)
+ return XDP_DROP;
+
+ vip.protocol = ip6h->nexthdr;
+ vip.family = AF_INET6;
+ memcpy(vip.daddr.v6, ip6h->daddr.s6_addr32, sizeof(vip.daddr));
+ vip.dport = dport;
+ payload_len = ip6h->payload_len;
+
+ tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
+ /* It only does v6-in-v6 */
+ if (!tnl || tnl->family != AF_INET6)
+ return XDP_PASS;
+
+ if (bpf_xdp_adjust_head(xdp, 0 - (int)ipv6hdr_sz))
+ return XDP_DROP;
+
+ bpf_dynptr_from_xdp(xdp, 0, &new_xdp_ptr);
+ new_eth = bpf_dynptr_slice_rdwr(&new_xdp_ptr, 0, eth_buffer, sizeof(eth_buffer));
+ if (!new_eth)
+ return XDP_DROP;
+
+ ip6h = (struct ipv6hdr *)(new_eth + 1);
+ old_eth = (struct ethhdr *)(ip6h + 1);
+
+ set_ethhdr(new_eth, old_eth, tnl, bpf_htons(ETH_P_IPV6));
+
+ if (new_eth == eth_buffer)
+ bpf_dynptr_write(&new_xdp_ptr, 0, eth_buffer, sizeof(eth_buffer), 0);
+
+ ip6h->version = 6;
+ ip6h->priority = 0;
+ memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl));
+ ip6h->payload_len = bpf_htons(bpf_ntohs(payload_len) + ipv6hdr_sz);
+ ip6h->nexthdr = IPPROTO_IPV6;
+ ip6h->hop_limit = 8;
+ memcpy(ip6h->saddr.s6_addr32, tnl->saddr.v6, sizeof(tnl->saddr.v6));
+ memcpy(ip6h->daddr.s6_addr32, tnl->daddr.v6, sizeof(tnl->daddr.v6));
+
+ count_tx(vip.protocol);
+
+ return XDP_TX;
+}
+
+SEC("xdp")
+int _xdp_tx_iptunnel(struct xdp_md *xdp)
+{
+ __u8 buffer[ethhdr_sz];
+ struct bpf_dynptr ptr;
+ struct ethhdr *eth;
+ __u16 h_proto;
+
+ __builtin_memset(buffer, 0, sizeof(buffer));
+
+ bpf_dynptr_from_xdp(xdp, 0, &ptr);
+ eth = bpf_dynptr_slice(&ptr, 0, buffer, sizeof(buffer));
+ if (!eth)
+ return XDP_DROP;
+
+ h_proto = eth->h_proto;
+
+ if (h_proto == bpf_htons(ETH_P_IP))
+ return handle_ipv4(xdp, &ptr);
+ else if (h_proto == bpf_htons(ETH_P_IPV6))
+
+ return handle_ipv6(xdp, &ptr);
+ else
+ return XDP_DROP;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/timer.c b/tools/testing/selftests/bpf/progs/timer.c
index acda5c9cea93..9a16d95213e1 100644
--- a/tools/testing/selftests/bpf/progs/timer.c
+++ b/tools/testing/selftests/bpf/progs/timer.c
@@ -46,7 +46,15 @@ struct {
__type(value, struct elem);
} lru SEC(".maps");
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} abs_timer SEC(".maps");
+
__u64 bss_data;
+__u64 abs_data;
__u64 err;
__u64 ok;
__u64 callback_check = 52;
@@ -284,3 +292,40 @@ int BPF_PROG2(test2, int, a, int, b)
return bpf_timer_test();
}
+
+/* callback for absolute timer */
+static int timer_cb3(void *map, int *key, struct bpf_timer *timer)
+{
+ abs_data += 6;
+
+ if (abs_data < 12) {
+ bpf_timer_start(timer, bpf_ktime_get_boot_ns() + 1000,
+ BPF_F_TIMER_ABS);
+ } else {
+ /* Re-arm timer ~35 seconds in future */
+ bpf_timer_start(timer, bpf_ktime_get_boot_ns() + (1ull << 35),
+ BPF_F_TIMER_ABS);
+ }
+
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test3")
+int BPF_PROG2(test3, int, a)
+{
+ int key = 0;
+ struct bpf_timer *timer;
+
+ bpf_printk("test3");
+
+ timer = bpf_map_lookup_elem(&abs_timer, &key);
+ if (timer) {
+ if (bpf_timer_init(timer, &abs_timer, CLOCK_BOOTTIME) != 0)
+ err |= 2048;
+ bpf_timer_set_callback(timer, timer_cb3);
+ bpf_timer_start(timer, bpf_ktime_get_boot_ns() + 1000,
+ BPF_F_TIMER_ABS);
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/uninit_stack.c b/tools/testing/selftests/bpf/progs/uninit_stack.c
new file mode 100644
index 000000000000..8a403470e557
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/uninit_stack.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+/* Read an uninitialized value from stack at a fixed offset */
+SEC("socket")
+__naked int read_uninit_stack_fixed_off(void *ctx)
+{
+ asm volatile (" \
+ r0 = 0; \
+ /* force stack depth to be 128 */ \
+ *(u64*)(r10 - 128) = r1; \
+ r1 = *(u8 *)(r10 - 8 ); \
+ r0 += r1; \
+ r1 = *(u8 *)(r10 - 11); \
+ r1 = *(u8 *)(r10 - 13); \
+ r1 = *(u8 *)(r10 - 15); \
+ r1 = *(u16*)(r10 - 16); \
+ r1 = *(u32*)(r10 - 32); \
+ r1 = *(u64*)(r10 - 64); \
+ /* read from a spill of a wrong size, it is a separate \
+ * branch in check_stack_read_fixed_off() \
+ */ \
+ *(u32*)(r10 - 72) = r1; \
+ r1 = *(u64*)(r10 - 72); \
+ r0 = 0; \
+ exit; \
+"
+ ::: __clobber_all);
+}
+
+/* Read an uninitialized value from stack at a variable offset */
+SEC("socket")
+__naked int read_uninit_stack_var_off(void *ctx)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ /* force stack depth to be 64 */ \
+ *(u64*)(r10 - 64) = r0; \
+ r0 = -r0; \
+ /* give r0 a range [-31, -1] */ \
+ if r0 s<= -32 goto exit_%=; \
+ if r0 s>= 0 goto exit_%=; \
+ /* access stack using r0 */ \
+ r1 = r10; \
+ r1 += r0; \
+ r2 = *(u8*)(r1 + 0); \
+exit_%=: r0 = 0; \
+ exit; \
+"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+static __noinline void dummy(void) {}
+
+/* Pass a pointer to uninitialized stack memory to a helper.
+ * Passed memory block should be marked as STACK_MISC after helper call.
+ */
+SEC("socket")
+__log_level(7) __msg("fp-104=mmmmmmmm")
+__naked int helper_uninit_to_misc(void *ctx)
+{
+ asm volatile (" \
+ /* force stack depth to be 128 */ \
+ *(u64*)(r10 - 128) = r1; \
+ r1 = r10; \
+ r1 += -128; \
+ r2 = 32; \
+ call %[bpf_trace_printk]; \
+ /* Call to dummy() forces print_verifier_state(..., true), \
+ * thus showing the stack state, matched by __msg(). \
+ */ \
+ call %[dummy]; \
+ r0 = 0; \
+ exit; \
+"
+ :
+ : __imm(bpf_trace_printk),
+ __imm(dummy)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/user_ringbuf_success.c b/tools/testing/selftests/bpf/progs/user_ringbuf_success.c
index b39093dd5715..0ade1110613b 100644
--- a/tools/testing/selftests/bpf/progs/user_ringbuf_success.c
+++ b/tools/testing/selftests/bpf/progs/user_ringbuf_success.c
@@ -202,7 +202,7 @@ do_nothing_cb(struct bpf_dynptr *dynptr, void *context)
return 0;
}
-SEC("fentry/" SYS_PREFIX "sys_getrlimit")
+SEC("fentry/" SYS_PREFIX "sys_prlimit64")
int test_user_ringbuf_epoll(void *ctx)
{
long num_samples;
diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c
index 679efb3aa785..bf41390157bf 100644
--- a/tools/testing/selftests/bpf/test_loader.c
+++ b/tools/testing/selftests/bpf/test_loader.c
@@ -13,12 +13,15 @@
#define TEST_TAG_EXPECT_SUCCESS "comment:test_expect_success"
#define TEST_TAG_EXPECT_MSG_PFX "comment:test_expect_msg="
#define TEST_TAG_LOG_LEVEL_PFX "comment:test_log_level="
+#define TEST_TAG_PROG_FLAGS_PFX "comment:test_prog_flags="
struct test_spec {
const char *name;
bool expect_failure;
- const char *expect_msg;
+ const char **expect_msgs;
+ size_t expect_msg_cnt;
int log_level;
+ int prog_flags;
};
static int tester_init(struct test_loader *tester)
@@ -67,7 +70,8 @@ static int parse_test_spec(struct test_loader *tester,
for (i = 1; i < btf__type_cnt(btf); i++) {
const struct btf_type *t;
- const char *s;
+ const char *s, *val;
+ char *e;
t = btf__type_by_id(btf, i);
if (!btf_is_decl_tag(t))
@@ -82,14 +86,48 @@ static int parse_test_spec(struct test_loader *tester,
} else if (strcmp(s, TEST_TAG_EXPECT_SUCCESS) == 0) {
spec->expect_failure = false;
} else if (str_has_pfx(s, TEST_TAG_EXPECT_MSG_PFX)) {
- spec->expect_msg = s + sizeof(TEST_TAG_EXPECT_MSG_PFX) - 1;
+ void *tmp;
+ const char **msg;
+
+ tmp = realloc(spec->expect_msgs,
+ (1 + spec->expect_msg_cnt) * sizeof(void *));
+ if (!tmp) {
+ ASSERT_FAIL("failed to realloc memory for messages\n");
+ return -ENOMEM;
+ }
+ spec->expect_msgs = tmp;
+ msg = &spec->expect_msgs[spec->expect_msg_cnt++];
+ *msg = s + sizeof(TEST_TAG_EXPECT_MSG_PFX) - 1;
} else if (str_has_pfx(s, TEST_TAG_LOG_LEVEL_PFX)) {
+ val = s + sizeof(TEST_TAG_LOG_LEVEL_PFX) - 1;
errno = 0;
- spec->log_level = strtol(s + sizeof(TEST_TAG_LOG_LEVEL_PFX) - 1, NULL, 0);
- if (errno) {
+ spec->log_level = strtol(val, &e, 0);
+ if (errno || e[0] != '\0') {
ASSERT_FAIL("failed to parse test log level from '%s'", s);
return -EINVAL;
}
+ } else if (str_has_pfx(s, TEST_TAG_PROG_FLAGS_PFX)) {
+ val = s + sizeof(TEST_TAG_PROG_FLAGS_PFX) - 1;
+ if (strcmp(val, "BPF_F_STRICT_ALIGNMENT") == 0) {
+ spec->prog_flags |= BPF_F_STRICT_ALIGNMENT;
+ } else if (strcmp(val, "BPF_F_ANY_ALIGNMENT") == 0) {
+ spec->prog_flags |= BPF_F_ANY_ALIGNMENT;
+ } else if (strcmp(val, "BPF_F_TEST_RND_HI32") == 0) {
+ spec->prog_flags |= BPF_F_TEST_RND_HI32;
+ } else if (strcmp(val, "BPF_F_TEST_STATE_FREQ") == 0) {
+ spec->prog_flags |= BPF_F_TEST_STATE_FREQ;
+ } else if (strcmp(val, "BPF_F_SLEEPABLE") == 0) {
+ spec->prog_flags |= BPF_F_SLEEPABLE;
+ } else if (strcmp(val, "BPF_F_XDP_HAS_FRAGS") == 0) {
+ spec->prog_flags |= BPF_F_XDP_HAS_FRAGS;
+ } else /* assume numeric value */ {
+ errno = 0;
+ spec->prog_flags |= strtol(val, &e, 0);
+ if (errno || e[0] != '\0') {
+ ASSERT_FAIL("failed to parse test prog flags from '%s'", s);
+ return -EINVAL;
+ }
+ }
}
}
@@ -101,7 +139,7 @@ static void prepare_case(struct test_loader *tester,
struct bpf_object *obj,
struct bpf_program *prog)
{
- int min_log_level = 0;
+ int min_log_level = 0, prog_flags;
if (env.verbosity > VERBOSE_NONE)
min_log_level = 1;
@@ -119,7 +157,11 @@ static void prepare_case(struct test_loader *tester,
else
bpf_program__set_log_level(prog, spec->log_level);
+ prog_flags = bpf_program__flags(prog);
+ bpf_program__set_flags(prog, prog_flags | spec->prog_flags);
+
tester->log_buf[0] = '\0';
+ tester->next_match_pos = 0;
}
static void emit_verifier_log(const char *log_buf, bool force)
@@ -135,17 +177,26 @@ static void validate_case(struct test_loader *tester,
struct bpf_program *prog,
int load_err)
{
- if (spec->expect_msg) {
+ int i, j;
+
+ for (i = 0; i < spec->expect_msg_cnt; i++) {
char *match;
+ const char *expect_msg;
+
+ expect_msg = spec->expect_msgs[i];
- match = strstr(tester->log_buf, spec->expect_msg);
+ match = strstr(tester->log_buf + tester->next_match_pos, expect_msg);
if (!ASSERT_OK_PTR(match, "expect_msg")) {
/* if we are in verbose mode, we've already emitted log */
if (env.verbosity == VERBOSE_NONE)
emit_verifier_log(tester->log_buf, true /*force*/);
- fprintf(stderr, "EXPECTED MSG: '%s'\n", spec->expect_msg);
+ for (j = 0; j < i; j++)
+ fprintf(stderr, "MATCHED MSG: '%s'\n", spec->expect_msgs[j]);
+ fprintf(stderr, "EXPECTED MSG: '%s'\n", expect_msg);
return;
}
+
+ tester->next_match_pos = match - tester->log_buf + strlen(expect_msg);
}
}
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index d5d51ec97ec8..3cbf005747ed 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -376,6 +376,21 @@ int test__join_cgroup(const char *path);
___ok; \
})
+#define SYS(goto_label, fmt, ...) \
+ ({ \
+ char cmd[1024]; \
+ snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
+ if (!ASSERT_OK(system(cmd), cmd)) \
+ goto goto_label; \
+ })
+
+#define SYS_NOFAIL(fmt, ...) \
+ ({ \
+ char cmd[1024]; \
+ snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
+ system(cmd); \
+ })
+
static inline __u64 ptr_to_u64(const void *ptr)
{
return (__u64) (unsigned long) ptr;
@@ -412,6 +427,7 @@ int get_bpf_max_tramp_links(void);
struct test_loader {
char *log_buf;
size_t log_buf_sz;
+ size_t next_match_pos;
struct bpf_object *obj;
};
diff --git a/tools/testing/selftests/bpf/test_tcp_hdr_options.h b/tools/testing/selftests/bpf/test_tcp_hdr_options.h
index 6118e3ab61fc..56c9f8a3ad3d 100644
--- a/tools/testing/selftests/bpf/test_tcp_hdr_options.h
+++ b/tools/testing/selftests/bpf/test_tcp_hdr_options.h
@@ -50,6 +50,7 @@ struct linum_err {
#define TCPOPT_EOL 0
#define TCPOPT_NOP 1
+#define TCPOPT_MSS 2
#define TCPOPT_WINDOW 3
#define TCPOPT_EXP 254
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 8b9949bb833d..49a70d9beb0b 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -699,13 +699,13 @@ static int create_cgroup_storage(bool percpu)
* struct bpf_timer t;
* };
* struct btf_ptr {
+ * struct prog_test_ref_kfunc __kptr_untrusted *ptr;
* struct prog_test_ref_kfunc __kptr *ptr;
- * struct prog_test_ref_kfunc __kptr_ref *ptr;
- * struct prog_test_member __kptr_ref *ptr;
+ * struct prog_test_member __kptr *ptr;
* }
*/
static const char btf_str_sec[] = "\0bpf_spin_lock\0val\0cnt\0l\0bpf_timer\0timer\0t"
- "\0btf_ptr\0prog_test_ref_kfunc\0ptr\0kptr\0kptr_ref"
+ "\0btf_ptr\0prog_test_ref_kfunc\0ptr\0kptr\0kptr_untrusted"
"\0prog_test_member";
static __u32 btf_raw_types[] = {
/* int */
@@ -724,20 +724,20 @@ static __u32 btf_raw_types[] = {
BTF_MEMBER_ENC(41, 4, 0), /* struct bpf_timer t; */
/* struct prog_test_ref_kfunc */ /* [6] */
BTF_STRUCT_ENC(51, 0, 0),
- BTF_STRUCT_ENC(89, 0, 0), /* [7] */
+ BTF_STRUCT_ENC(95, 0, 0), /* [7] */
+ /* type tag "kptr_untrusted" */
+ BTF_TYPE_TAG_ENC(80, 6), /* [8] */
/* type tag "kptr" */
- BTF_TYPE_TAG_ENC(75, 6), /* [8] */
- /* type tag "kptr_ref" */
- BTF_TYPE_TAG_ENC(80, 6), /* [9] */
- BTF_TYPE_TAG_ENC(80, 7), /* [10] */
+ BTF_TYPE_TAG_ENC(75, 6), /* [9] */
+ BTF_TYPE_TAG_ENC(75, 7), /* [10] */
BTF_PTR_ENC(8), /* [11] */
BTF_PTR_ENC(9), /* [12] */
BTF_PTR_ENC(10), /* [13] */
/* struct btf_ptr */ /* [14] */
BTF_STRUCT_ENC(43, 3, 24),
- BTF_MEMBER_ENC(71, 11, 0), /* struct prog_test_ref_kfunc __kptr *ptr; */
- BTF_MEMBER_ENC(71, 12, 64), /* struct prog_test_ref_kfunc __kptr_ref *ptr; */
- BTF_MEMBER_ENC(71, 13, 128), /* struct prog_test_member __kptr_ref *ptr; */
+ BTF_MEMBER_ENC(71, 11, 0), /* struct prog_test_ref_kfunc __kptr_untrusted *ptr; */
+ BTF_MEMBER_ENC(71, 12, 64), /* struct prog_test_ref_kfunc __kptr *ptr; */
+ BTF_MEMBER_ENC(71, 13, 128), /* struct prog_test_member __kptr *ptr; */
};
static char bpf_vlog[UINT_MAX >> 8];
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index 9d993926bf0e..5702fc9761ef 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -181,7 +181,7 @@
},
.result_unpriv = REJECT,
.result = REJECT,
- .errstr = "negative offset ptr_ ptr R1 off=-4 disallowed",
+ .errstr = "ptr R1 off=-4 disallowed",
},
{
"calls: invalid kfunc call: PTR_TO_BTF_ID with variable offset",
@@ -243,7 +243,7 @@
},
.result_unpriv = REJECT,
.result = REJECT,
- .errstr = "R1 must be referenced",
+ .errstr = "R1 must be",
},
{
"calls: valid kfunc call: referenced arg needs refcounted PTR_TO_BTF_ID",
@@ -2221,19 +2221,22 @@
* that fp-8 stack slot was unused in the fall-through
* branch and will accept the program incorrectly
*/
- BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 2, 2),
+ BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_0, 2, 2),
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
BPF_JMP_IMM(BPF_JA, 0, 0, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map_hash_48b = { 6 },
- .errstr = "invalid indirect read from stack R2 off -8+0 size 8",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
+ .fixup_map_hash_48b = { 7 },
+ .errstr_unpriv = "invalid indirect read from stack R2 off -8+0 size 8",
+ .result_unpriv = REJECT,
+ /* in privileged mode reads from uninitialized stack locations are permitted */
+ .result = ACCEPT,
},
{
"calls: ctx read at start of subprog",
diff --git a/tools/testing/selftests/bpf/verifier/ctx.c b/tools/testing/selftests/bpf/verifier/ctx.c
index c8eaf0536c24..2fd31612c0b8 100644
--- a/tools/testing/selftests/bpf/verifier/ctx.c
+++ b/tools/testing/selftests/bpf/verifier/ctx.c
@@ -1,15 +1,4 @@
{
- "context stores via ST",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_ST_MEM(BPF_DW, BPF_REG_1, offsetof(struct __sk_buff, mark), 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "BPF_ST stores into R1 ctx is not allowed",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
"context stores via BPF_ATOMIC",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 0),
diff --git a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
index a6c869a7319c..9c4885885aba 100644
--- a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
+++ b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
@@ -29,19 +29,30 @@
{
"helper access to variable memory: stack, bitwise AND, zero included",
.insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
+ /* set max stack size */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -128, 0),
+ /* set r3 to a random value */
+ BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+ /* use bitwise AND to limit r3 range to [0, 64] */
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_3, 64),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ /* Call bpf_ringbuf_output(), it is one of a few helper functions with
+ * ARG_CONST_SIZE_OR_ZERO parameter allowed in unpriv mode.
+ * For unpriv this should signal an error, because memory at &fp[-64] is
+ * not initialized.
+ */
+ BPF_EMIT_CALL(BPF_FUNC_ringbuf_output),
BPF_EXIT_INSN(),
},
- .errstr = "invalid indirect read from stack R1 off -64+0 size 64",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .fixup_map_ringbuf = { 4 },
+ .errstr_unpriv = "invalid indirect read from stack R2 off -64+0 size 64",
+ .result_unpriv = REJECT,
+ /* in privileged mode reads from uninitialized stack locations are permitted */
+ .result = ACCEPT,
},
{
"helper access to variable memory: stack, bitwise AND + JMP, wrong max",
@@ -183,20 +194,31 @@
{
"helper access to variable memory: stack, JMP, no min check",
.insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 3),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
+ /* set max stack size */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -128, 0),
+ /* set r3 to a random value */
+ BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+ /* use JMP to limit r3 range to [0, 64] */
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_3, 64, 6),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ /* Call bpf_ringbuf_output(), it is one of a few helper functions with
+ * ARG_CONST_SIZE_OR_ZERO parameter allowed in unpriv mode.
+ * For unpriv this should signal an error, because memory at &fp[-64] is
+ * not initialized.
+ */
+ BPF_EMIT_CALL(BPF_FUNC_ringbuf_output),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .errstr = "invalid indirect read from stack R1 off -64+0 size 64",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .fixup_map_ringbuf = { 4 },
+ .errstr_unpriv = "invalid indirect read from stack R2 off -64+0 size 64",
+ .result_unpriv = REJECT,
+ /* in privileged mode reads from uninitialized stack locations are permitted */
+ .result = ACCEPT,
},
{
"helper access to variable memory: stack, JMP (signed), no min check",
@@ -564,29 +586,41 @@
{
"helper access to variable memory: 8 bytes leak",
.insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+ /* set max stack size */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -128, 0),
+ /* set r3 to a random value */
+ BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64),
BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56),
BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48),
BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40),
+ /* Note: fp[-32] left uninitialized */
BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),
BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 63),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ /* Limit r3 range to [1, 64] */
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_3, 63),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 1),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ /* Call bpf_ringbuf_output(), it is one of a few helper functions with
+ * ARG_CONST_SIZE_OR_ZERO parameter allowed in unpriv mode.
+ * For unpriv this should signal an error, because memory region [1, 64]
+ * at &fp[-64] is not fully initialized.
+ */
+ BPF_EMIT_CALL(BPF_FUNC_ringbuf_output),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .errstr = "invalid indirect read from stack R1 off -64+32 size 64",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .fixup_map_ringbuf = { 3 },
+ .errstr_unpriv = "invalid indirect read from stack R2 off -64+32 size 64",
+ .result_unpriv = REJECT,
+ /* in privileged mode reads from uninitialized stack locations are permitted */
+ .result = ACCEPT,
},
{
"helper access to variable memory: 8 bytes no leak (init memory)",
diff --git a/tools/testing/selftests/bpf/verifier/int_ptr.c b/tools/testing/selftests/bpf/verifier/int_ptr.c
index 070893fb2900..02d9e004260b 100644
--- a/tools/testing/selftests/bpf/verifier/int_ptr.c
+++ b/tools/testing/selftests/bpf/verifier/int_ptr.c
@@ -54,12 +54,13 @@
/* bpf_strtoul() */
BPF_EMIT_CALL(BPF_FUNC_strtoul),
- BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL,
- .errstr = "invalid indirect read from stack R4 off -16+4 size 8",
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "invalid indirect read from stack R4 off -16+4 size 8",
+ /* in privileged mode reads from uninitialized stack locations are permitted */
+ .result = ACCEPT,
},
{
"ARG_PTR_TO_LONG misaligned",
diff --git a/tools/testing/selftests/bpf/verifier/map_kptr.c b/tools/testing/selftests/bpf/verifier/map_kptr.c
index 6914904344c0..d775ccb01989 100644
--- a/tools/testing/selftests/bpf/verifier/map_kptr.c
+++ b/tools/testing/selftests/bpf/verifier/map_kptr.c
@@ -336,7 +336,7 @@
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.fixup_map_kptr = { 1 },
.result = REJECT,
- .errstr = "R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_",
+ .errstr = "R1 type=rcu_ptr_or_null_ expected=percpu_ptr_",
},
{
"map_kptr: ref: reject off != 0",
diff --git a/tools/testing/selftests/bpf/verifier/search_pruning.c b/tools/testing/selftests/bpf/verifier/search_pruning.c
index d63fd8991b03..745d6b5842fd 100644
--- a/tools/testing/selftests/bpf/verifier/search_pruning.c
+++ b/tools/testing/selftests/bpf/verifier/search_pruning.c
@@ -128,9 +128,10 @@
BPF_EXIT_INSN(),
},
.fixup_map_hash_8b = { 3 },
- .errstr = "invalid read from stack off -16+0 size 8",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr_unpriv = "invalid read from stack off -16+0 size 8",
+ .result_unpriv = REJECT,
+ /* in privileged mode reads from uninitialized stack locations are permitted */
+ .result = ACCEPT,
},
{
"precision tracking for u32 spill/fill",
@@ -258,6 +259,8 @@
BPF_EXIT_INSN(),
},
.flags = BPF_F_TEST_STATE_FREQ,
- .errstr = "invalid read from stack off -8+1 size 8",
- .result = REJECT,
+ .errstr_unpriv = "invalid read from stack off -8+1 size 8",
+ .result_unpriv = REJECT,
+ /* in privileged mode reads from uninitialized stack locations are permitted */
+ .result = ACCEPT,
},
diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c
index d11d0b28be41..108dd3ee1edd 100644
--- a/tools/testing/selftests/bpf/verifier/sock.c
+++ b/tools/testing/selftests/bpf/verifier/sock.c
@@ -531,33 +531,6 @@
.result = ACCEPT,
},
{
- "sk_storage_get(map, skb->sk, &stack_value, 1): partially init stack_value",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_4, 1),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -8),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_sk_storage_get),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_sk_storage_map = { 14 },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = REJECT,
- .errstr = "invalid indirect read from stack",
-},
-{
"bpf_map_lookup_elem(smap, &key)",
.insns = {
BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
diff --git a/tools/testing/selftests/bpf/verifier/spill_fill.c b/tools/testing/selftests/bpf/verifier/spill_fill.c
index 9bb302dade23..d1463bf4949a 100644
--- a/tools/testing/selftests/bpf/verifier/spill_fill.c
+++ b/tools/testing/selftests/bpf/verifier/spill_fill.c
@@ -171,9 +171,10 @@
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .result = REJECT,
- .errstr = "invalid read from stack off -4+0 size 4",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "invalid read from stack off -4+0 size 4",
+ /* in privileged mode reads from uninitialized stack locations are permitted */
+ .result = ACCEPT,
},
{
"Spill a u32 const scalar. Refill as u16. Offset to skb->data",
diff --git a/tools/testing/selftests/bpf/verifier/unpriv.c b/tools/testing/selftests/bpf/verifier/unpriv.c
index 878ca26c3f0a..af0c0f336625 100644
--- a/tools/testing/selftests/bpf/verifier/unpriv.c
+++ b/tools/testing/selftests/bpf/verifier/unpriv.c
@@ -240,6 +240,29 @@
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
},
{
+ /* Same as above, but use BPF_ST_MEM to save 42
+ * instead of BPF_STX_MEM.
+ */
+ "unpriv: spill/fill of different pointers st",
+ .insns = {
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_1, offsetof(struct __sk_buff, mark), 42),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "same insn cannot be used with different pointers",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+},
+{
"unpriv: spill/fill of different pointers stx - ctx and sock",
.insns = {
BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
diff --git a/tools/testing/selftests/bpf/verifier/var_off.c b/tools/testing/selftests/bpf/verifier/var_off.c
index d37f512fad16..b183e26c03f1 100644
--- a/tools/testing/selftests/bpf/verifier/var_off.c
+++ b/tools/testing/selftests/bpf/verifier/var_off.c
@@ -213,31 +213,6 @@
.prog_type = BPF_PROG_TYPE_LWT_IN,
},
{
- "indirect variable-offset stack access, max_off+size > max_initialized",
- .insns = {
- /* Fill only the second from top 8 bytes of the stack. */
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0),
- /* Get an unknown value. */
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
- /* Make it small and 4-byte aligned. */
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16),
- /* Add it to fp. We now have either fp-12 or fp-16, but we don't know
- * which. fp-12 size 8 is partially uninitialized stack.
- */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
- /* Dereference it indirectly. */
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 5 },
- .errstr = "invalid indirect read from stack R2 var_off",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_LWT_IN,
-},
-{
"indirect variable-offset stack access, min_off < min_initialized",
.insns = {
/* Fill only the top 8 bytes of the stack. */
@@ -290,33 +265,6 @@
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
},
{
- "indirect variable-offset stack access, uninitialized",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 6),
- BPF_MOV64_IMM(BPF_REG_3, 28),
- /* Fill the top 16 bytes of the stack. */
- BPF_ST_MEM(BPF_W, BPF_REG_10, -16, 0),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- /* Get an unknown value. */
- BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, 0),
- /* Make it small and 4-byte aligned. */
- BPF_ALU64_IMM(BPF_AND, BPF_REG_4, 4),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_4, 16),
- /* Add it to fp. We now have either fp-12 or fp-16, we don't know
- * which, but either way it points to initialized stack.
- */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_10),
- BPF_MOV64_IMM(BPF_REG_5, 8),
- /* Dereference it indirectly. */
- BPF_EMIT_CALL(BPF_FUNC_getsockopt),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid indirect read from stack R4 var_off",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SOCK_OPS,
-},
-{
"indirect variable-offset stack access, ok",
.insns = {
/* Fill the top 16 bytes of the stack. */
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 80fbfe0330f6..e57750e44f71 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -48,6 +48,7 @@ TEST_PROGS += l2_tos_ttl_inherit.sh
TEST_PROGS += bind_bhash.sh
TEST_PROGS += ip_local_port_range.sh
TEST_PROGS += rps_default_mask.sh
+TEST_PROGS += big_tcp.sh
TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh
TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh
TEST_GEN_FILES = socket nettest
@@ -81,6 +82,7 @@ TEST_GEN_FILES += csum
TEST_GEN_FILES += nat6to4.o
TEST_GEN_FILES += ip_local_port_range
TEST_GEN_FILES += bind_wildcard
+TEST_PROGS += test_vxlan_mdb.sh
TEST_FILES := settings
diff --git a/tools/testing/selftests/net/big_tcp.sh b/tools/testing/selftests/net/big_tcp.sh
new file mode 100755
index 000000000000..cde9a91c4797
--- /dev/null
+++ b/tools/testing/selftests/net/big_tcp.sh
@@ -0,0 +1,180 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Testing For IPv4 and IPv6 BIG TCP.
+# TOPO: CLIENT_NS (link0)<--->(link1) ROUTER_NS (link2)<--->(link3) SERVER_NS
+
+CLIENT_NS=$(mktemp -u client-XXXXXXXX)
+CLIENT_IP4="198.51.100.1"
+CLIENT_IP6="2001:db8:1::1"
+
+SERVER_NS=$(mktemp -u server-XXXXXXXX)
+SERVER_IP4="203.0.113.1"
+SERVER_IP6="2001:db8:2::1"
+
+ROUTER_NS=$(mktemp -u router-XXXXXXXX)
+SERVER_GW4="203.0.113.2"
+CLIENT_GW4="198.51.100.2"
+SERVER_GW6="2001:db8:2::2"
+CLIENT_GW6="2001:db8:1::2"
+
+MAX_SIZE=128000
+CHK_SIZE=65535
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+setup() {
+ ip netns add $CLIENT_NS
+ ip netns add $SERVER_NS
+ ip netns add $ROUTER_NS
+ ip -net $ROUTER_NS link add link1 type veth peer name link0 netns $CLIENT_NS
+ ip -net $ROUTER_NS link add link2 type veth peer name link3 netns $SERVER_NS
+
+ ip -net $CLIENT_NS link set link0 up
+ ip -net $CLIENT_NS link set link0 mtu 1442
+ ip -net $CLIENT_NS addr add $CLIENT_IP4/24 dev link0
+ ip -net $CLIENT_NS addr add $CLIENT_IP6/64 dev link0 nodad
+ ip -net $CLIENT_NS route add $SERVER_IP4 dev link0 via $CLIENT_GW4
+ ip -net $CLIENT_NS route add $SERVER_IP6 dev link0 via $CLIENT_GW6
+ ip -net $CLIENT_NS link set dev link0 \
+ gro_ipv4_max_size $MAX_SIZE gso_ipv4_max_size $MAX_SIZE
+ ip -net $CLIENT_NS link set dev link0 \
+ gro_max_size $MAX_SIZE gso_max_size $MAX_SIZE
+ ip net exec $CLIENT_NS sysctl -wq net.ipv4.tcp_window_scaling=10
+
+ ip -net $ROUTER_NS link set link1 up
+ ip -net $ROUTER_NS link set link2 up
+ ip -net $ROUTER_NS addr add $CLIENT_GW4/24 dev link1
+ ip -net $ROUTER_NS addr add $CLIENT_GW6/64 dev link1 nodad
+ ip -net $ROUTER_NS addr add $SERVER_GW4/24 dev link2
+ ip -net $ROUTER_NS addr add $SERVER_GW6/64 dev link2 nodad
+ ip -net $ROUTER_NS link set dev link1 \
+ gro_ipv4_max_size $MAX_SIZE gso_ipv4_max_size $MAX_SIZE
+ ip -net $ROUTER_NS link set dev link2 \
+ gro_ipv4_max_size $MAX_SIZE gso_ipv4_max_size $MAX_SIZE
+ ip -net $ROUTER_NS link set dev link1 \
+ gro_max_size $MAX_SIZE gso_max_size $MAX_SIZE
+ ip -net $ROUTER_NS link set dev link2 \
+ gro_max_size $MAX_SIZE gso_max_size $MAX_SIZE
+ # test for nf_ct_skb_network_trim in nf_conntrack_ovs used by TC ct action.
+ ip net exec $ROUTER_NS tc qdisc add dev link1 ingress
+ ip net exec $ROUTER_NS tc filter add dev link1 ingress \
+ proto ip flower ip_proto tcp action ct
+ ip net exec $ROUTER_NS tc filter add dev link1 ingress \
+ proto ipv6 flower ip_proto tcp action ct
+ ip net exec $ROUTER_NS sysctl -wq net.ipv4.ip_forward=1
+ ip net exec $ROUTER_NS sysctl -wq net.ipv6.conf.all.forwarding=1
+
+ ip -net $SERVER_NS link set link3 up
+ ip -net $SERVER_NS addr add $SERVER_IP4/24 dev link3
+ ip -net $SERVER_NS addr add $SERVER_IP6/64 dev link3 nodad
+ ip -net $SERVER_NS route add $CLIENT_IP4 dev link3 via $SERVER_GW4
+ ip -net $SERVER_NS route add $CLIENT_IP6 dev link3 via $SERVER_GW6
+ ip -net $SERVER_NS link set dev link3 \
+ gro_ipv4_max_size $MAX_SIZE gso_ipv4_max_size $MAX_SIZE
+ ip -net $SERVER_NS link set dev link3 \
+ gro_max_size $MAX_SIZE gso_max_size $MAX_SIZE
+ ip net exec $SERVER_NS sysctl -wq net.ipv4.tcp_window_scaling=10
+ ip net exec $SERVER_NS netserver 2>&1 >/dev/null
+}
+
+cleanup() {
+ ip net exec $SERVER_NS pkill netserver
+ ip -net $ROUTER_NS link del link1
+ ip -net $ROUTER_NS link del link2
+ ip netns del "$CLIENT_NS"
+ ip netns del "$SERVER_NS"
+ ip netns del "$ROUTER_NS"
+}
+
+start_counter() {
+ local ipt="iptables"
+ local iface=$1
+ local netns=$2
+
+ [ "$NF" = "6" ] && ipt="ip6tables"
+ ip net exec $netns $ipt -t raw -A PREROUTING -i $iface \
+ -m length ! --length 0:$CHK_SIZE -j ACCEPT
+}
+
+check_counter() {
+ local ipt="iptables"
+ local iface=$1
+ local netns=$2
+
+ [ "$NF" = "6" ] && ipt="ip6tables"
+ test `ip net exec $netns $ipt -t raw -L -v |grep $iface | awk '{print $1}'` != "0"
+}
+
+stop_counter() {
+ local ipt="iptables"
+ local iface=$1
+ local netns=$2
+
+ [ "$NF" = "6" ] && ipt="ip6tables"
+ ip net exec $netns $ipt -t raw -D PREROUTING -i $iface \
+ -m length ! --length 0:$CHK_SIZE -j ACCEPT
+}
+
+do_netperf() {
+ local serip=$SERVER_IP4
+ local netns=$1
+
+ [ "$NF" = "6" ] && serip=$SERVER_IP6
+ ip net exec $netns netperf -$NF -t TCP_STREAM -H $serip 2>&1 >/dev/null
+}
+
+do_test() {
+ local cli_tso=$1
+ local gw_gro=$2
+ local gw_tso=$3
+ local ser_gro=$4
+ local ret="PASS"
+
+ ip net exec $CLIENT_NS ethtool -K link0 tso $cli_tso
+ ip net exec $ROUTER_NS ethtool -K link1 gro $gw_gro
+ ip net exec $ROUTER_NS ethtool -K link2 tso $gw_tso
+ ip net exec $SERVER_NS ethtool -K link3 gro $ser_gro
+
+ start_counter link1 $ROUTER_NS
+ start_counter link3 $SERVER_NS
+ do_netperf $CLIENT_NS
+
+ if check_counter link1 $ROUTER_NS; then
+ check_counter link3 $SERVER_NS || ret="FAIL_on_link3"
+ else
+ ret="FAIL_on_link1"
+ fi
+
+ stop_counter link1 $ROUTER_NS
+ stop_counter link3 $SERVER_NS
+ printf "%-9s %-8s %-8s %-8s: [%s]\n" \
+ $cli_tso $gw_gro $gw_tso $ser_gro $ret
+ test $ret = "PASS"
+}
+
+testup() {
+ echo "CLI GSO | GW GRO | GW GSO | SER GRO" && \
+ do_test "on" "on" "on" "on" && \
+ do_test "on" "off" "on" "off" && \
+ do_test "off" "on" "on" "on" && \
+ do_test "on" "on" "off" "on" && \
+ do_test "off" "on" "off" "on"
+}
+
+if ! netperf -V &> /dev/null; then
+ echo "SKIP: Could not run test without netperf tool"
+ exit $ksft_skip
+fi
+
+if ! ip link help 2>&1 | grep gso_ipv4_max_size &> /dev/null; then
+ echo "SKIP: Could not run test without gso/gro_ipv4_max_size supported in ip-link"
+ exit $ksft_skip
+fi
+
+trap cleanup EXIT
+setup && echo "Testing for BIG TCP:" && \
+NF=4 testup && echo "***v4 Tests Done***" && \
+NF=6 testup && echo "***v6 Tests Done***"
+exit $?
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index cc9fd55ab869..4c7ce07afa2f 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -48,3 +48,4 @@ CONFIG_BAREUDP=m
CONFIG_IPV6_IOAM6_LWTUNNEL=y
CONFIG_CRYPTO_SM4_GENERIC=y
CONFIG_AMT=m
+CONFIG_VXLAN=m
diff --git a/tools/testing/selftests/net/test_vxlan_mdb.sh b/tools/testing/selftests/net/test_vxlan_mdb.sh
new file mode 100755
index 000000000000..31e5f0f8859d
--- /dev/null
+++ b/tools/testing/selftests/net/test_vxlan_mdb.sh
@@ -0,0 +1,2318 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test is for checking VXLAN MDB functionality. The topology consists of
+# two sets of namespaces: One for the testing of IPv4 underlay and another for
+# IPv6. In both cases, both IPv4 and IPv6 overlay traffic are tested.
+#
+# Data path functionality is tested by sending traffic from one of the upper
+# namespaces and checking using ingress tc filters that the expected traffic
+# was received by one of the lower namespaces.
+#
+# +------------------------------------+ +------------------------------------+
+# | ns1_v4 | | ns1_v6 |
+# | | | |
+# | br0.10 br0.4000 br0.20 | | br0.10 br0.4000 br0.20 |
+# | + + + | | + + + |
+# | | | | | | | | | |
+# | | | | | | | | | |
+# | +---------+---------+ | | +---------+---------+ |
+# | | | | | |
+# | | | | | |
+# | + | | + |
+# | br0 | | br0 |
+# | + | | + |
+# | | | | | |
+# | | | | | |
+# | + | | + |
+# | vx0 | | vx0 |
+# | | | |
+# | | | |
+# | veth0 | | veth0 |
+# | + | | + |
+# +-----------------|------------------+ +-----------------|------------------+
+# | |
+# +-----------------|------------------+ +-----------------|------------------+
+# | + | | + |
+# | veth0 | | veth0 |
+# | | | |
+# | | | |
+# | vx0 | | vx0 |
+# | + | | + |
+# | | | | | |
+# | | | | | |
+# | + | | + |
+# | br0 | | br0 |
+# | + | | + |
+# | | | | | |
+# | | | | | |
+# | +---------+---------+ | | +---------+---------+ |
+# | | | | | | | | | |
+# | | | | | | | | | |
+# | + + + | | + + + |
+# | br0.10 br0.4000 br0.10 | | br0.10 br0.4000 br0.20 |
+# | | | |
+# | ns2_v4 | | ns2_v6 |
+# +------------------------------------+ +------------------------------------+
+
+ret=0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+CONTROL_PATH_TESTS="
+ basic_star_g_ipv4_ipv4
+ basic_star_g_ipv6_ipv4
+ basic_star_g_ipv4_ipv6
+ basic_star_g_ipv6_ipv6
+ basic_sg_ipv4_ipv4
+ basic_sg_ipv6_ipv4
+ basic_sg_ipv4_ipv6
+ basic_sg_ipv6_ipv6
+ star_g_ipv4_ipv4
+ star_g_ipv6_ipv4
+ star_g_ipv4_ipv6
+ star_g_ipv6_ipv6
+ sg_ipv4_ipv4
+ sg_ipv6_ipv4
+ sg_ipv4_ipv6
+ sg_ipv6_ipv6
+ dump_ipv4_ipv4
+ dump_ipv6_ipv4
+ dump_ipv4_ipv6
+ dump_ipv6_ipv6
+"
+
+DATA_PATH_TESTS="
+ encap_params_ipv4_ipv4
+ encap_params_ipv6_ipv4
+ encap_params_ipv4_ipv6
+ encap_params_ipv6_ipv6
+ starg_exclude_ir_ipv4_ipv4
+ starg_exclude_ir_ipv6_ipv4
+ starg_exclude_ir_ipv4_ipv6
+ starg_exclude_ir_ipv6_ipv6
+ starg_include_ir_ipv4_ipv4
+ starg_include_ir_ipv6_ipv4
+ starg_include_ir_ipv4_ipv6
+ starg_include_ir_ipv6_ipv6
+ starg_exclude_p2mp_ipv4_ipv4
+ starg_exclude_p2mp_ipv6_ipv4
+ starg_exclude_p2mp_ipv4_ipv6
+ starg_exclude_p2mp_ipv6_ipv6
+ starg_include_p2mp_ipv4_ipv4
+ starg_include_p2mp_ipv6_ipv4
+ starg_include_p2mp_ipv4_ipv6
+ starg_include_p2mp_ipv6_ipv6
+ egress_vni_translation_ipv4_ipv4
+ egress_vni_translation_ipv6_ipv4
+ egress_vni_translation_ipv4_ipv6
+ egress_vni_translation_ipv6_ipv6
+ all_zeros_mdb_ipv4
+ all_zeros_mdb_ipv6
+ mdb_fdb_ipv4_ipv4
+ mdb_fdb_ipv6_ipv4
+ mdb_fdb_ipv4_ipv6
+ mdb_fdb_ipv6_ipv6
+ mdb_torture_ipv4_ipv4
+ mdb_torture_ipv6_ipv4
+ mdb_torture_ipv4_ipv6
+ mdb_torture_ipv6_ipv6
+"
+
+# All tests in this script. Can be overridden with -t option.
+TESTS="
+ $CONTROL_PATH_TESTS
+ $DATA_PATH_TESTS
+"
+VERBOSE=0
+PAUSE_ON_FAIL=no
+PAUSE=no
+
+################################################################################
+# Utilities
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ printf "TEST: %-60s [ OK ]\n" "${msg}"
+ nsuccess=$((nsuccess+1))
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "$VERBOSE" = "1" ]; then
+ echo " rc=$rc, expected $expected"
+ fi
+
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+
+ if [ "${PAUSE}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+
+ [ "$VERBOSE" = "1" ] && echo
+}
+
+run_cmd()
+{
+ local cmd="$1"
+ local out
+ local stderr="2>/dev/null"
+
+ if [ "$VERBOSE" = "1" ]; then
+ printf "COMMAND: $cmd\n"
+ stderr=
+ fi
+
+ out=$(eval $cmd $stderr)
+ rc=$?
+ if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+ echo " $out"
+ fi
+
+ return $rc
+}
+
+tc_check_packets()
+{
+ local ns=$1; shift
+ local id=$1; shift
+ local handle=$1; shift
+ local count=$1; shift
+ local pkts
+
+ sleep 0.1
+ pkts=$(tc -n $ns -j -s filter show $id \
+ | jq ".[] | select(.options.handle == $handle) | \
+ .options.actions[0].stats.packets")
+ [[ $pkts == $count ]]
+}
+
+################################################################################
+# Setup
+
+setup_common_ns()
+{
+ local ns=$1; shift
+ local local_addr=$1; shift
+
+ ip netns exec $ns sysctl -qw net.ipv4.ip_forward=1
+ ip netns exec $ns sysctl -qw net.ipv4.fib_multipath_use_neigh=1
+ ip netns exec $ns sysctl -qw net.ipv4.conf.default.ignore_routes_with_linkdown=1
+ ip netns exec $ns sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1
+ ip netns exec $ns sysctl -qw net.ipv6.conf.all.forwarding=1
+ ip netns exec $ns sysctl -qw net.ipv6.conf.default.forwarding=1
+ ip netns exec $ns sysctl -qw net.ipv6.conf.default.ignore_routes_with_linkdown=1
+ ip netns exec $ns sysctl -qw net.ipv6.conf.all.accept_dad=0
+ ip netns exec $ns sysctl -qw net.ipv6.conf.default.accept_dad=0
+
+ ip -n $ns link set dev lo up
+ ip -n $ns address add $local_addr dev lo
+
+ ip -n $ns link set dev veth0 up
+
+ ip -n $ns link add name br0 up type bridge vlan_filtering 1 \
+ vlan_default_pvid 0 mcast_snooping 0
+
+ ip -n $ns link add link br0 name br0.10 up type vlan id 10
+ bridge -n $ns vlan add vid 10 dev br0 self
+
+ ip -n $ns link add link br0 name br0.20 up type vlan id 20
+ bridge -n $ns vlan add vid 20 dev br0 self
+
+ ip -n $ns link add link br0 name br0.4000 up type vlan id 4000
+ bridge -n $ns vlan add vid 4000 dev br0 self
+
+ ip -n $ns link add name vx0 up master br0 type vxlan \
+ local $local_addr dstport 4789 external vnifilter
+ bridge -n $ns link set dev vx0 vlan_tunnel on
+
+ bridge -n $ns vlan add vid 10 dev vx0
+ bridge -n $ns vlan add vid 10 dev vx0 tunnel_info id 10010
+ bridge -n $ns vni add vni 10010 dev vx0
+
+ bridge -n $ns vlan add vid 20 dev vx0
+ bridge -n $ns vlan add vid 20 dev vx0 tunnel_info id 10020
+ bridge -n $ns vni add vni 10020 dev vx0
+
+ bridge -n $ns vlan add vid 4000 dev vx0 pvid
+ bridge -n $ns vlan add vid 4000 dev vx0 tunnel_info id 14000
+ bridge -n $ns vni add vni 14000 dev vx0
+}
+
+setup_common()
+{
+ local ns1=$1; shift
+ local ns2=$1; shift
+ local local_addr1=$1; shift
+ local local_addr2=$1; shift
+
+ ip netns add $ns1
+ ip netns add $ns2
+
+ ip link add name veth0 type veth peer name veth1
+ ip link set dev veth0 netns $ns1 name veth0
+ ip link set dev veth1 netns $ns2 name veth0
+
+ setup_common_ns $ns1 $local_addr1
+ setup_common_ns $ns2 $local_addr2
+}
+
+setup_v4()
+{
+ setup_common ns1_v4 ns2_v4 192.0.2.1 192.0.2.2
+
+ ip -n ns1_v4 address add 192.0.2.17/28 dev veth0
+ ip -n ns2_v4 address add 192.0.2.18/28 dev veth0
+
+ ip -n ns1_v4 route add default via 192.0.2.18
+ ip -n ns2_v4 route add default via 192.0.2.17
+}
+
+cleanup_v4()
+{
+ ip netns del ns2_v4
+ ip netns del ns1_v4
+}
+
+setup_v6()
+{
+ setup_common ns1_v6 ns2_v6 2001:db8:1::1 2001:db8:1::2
+
+ ip -n ns1_v6 address add 2001:db8:2::1/64 dev veth0 nodad
+ ip -n ns2_v6 address add 2001:db8:2::2/64 dev veth0 nodad
+
+ ip -n ns1_v6 route add default via 2001:db8:2::2
+ ip -n ns2_v6 route add default via 2001:db8:2::1
+}
+
+cleanup_v6()
+{
+ ip netns del ns2_v6
+ ip netns del ns1_v6
+}
+
+setup()
+{
+ set -e
+
+ setup_v4
+ setup_v6
+
+ sleep 5
+
+ set +e
+}
+
+cleanup()
+{
+ cleanup_v6 &> /dev/null
+ cleanup_v4 &> /dev/null
+}
+
+################################################################################
+# Tests - Control path
+
+basic_common()
+{
+ local ns1=$1; shift
+ local grp_key=$1; shift
+ local vtep_ip=$1; shift
+
+ # Test basic control path operations common to all MDB entry types.
+
+ # Basic add, replace and delete behavior.
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010"
+ log_test $? 0 "MDB entry addition"
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\""
+ log_test $? 0 "MDB entry presence after addition"
+
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010"
+ log_test $? 0 "MDB entry replacement"
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\""
+ log_test $? 0 "MDB entry presence after replacement"
+
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010"
+ log_test $? 0 "MDB entry deletion"
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\""
+ log_test $? 1 "MDB entry presence after deletion"
+
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010"
+ log_test $? 255 "Non-existent MDB entry deletion"
+
+ # Default protocol and replacement.
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \"proto static\""
+ log_test $? 0 "MDB entry default protocol"
+
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent proto 123 dst $vtep_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \"proto 123\""
+ log_test $? 0 "MDB entry protocol replacement"
+
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010"
+
+ # Default destination port and replacement.
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \" dst_port \""
+ log_test $? 1 "MDB entry default destination port"
+
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent dst $vtep_ip dst_port 1234 src_vni 10010"
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \"dst_port 1234\""
+ log_test $? 0 "MDB entry destination port replacement"
+
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010"
+
+ # Default destination VNI and replacement.
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \" vni \""
+ log_test $? 1 "MDB entry default destination VNI"
+
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent dst $vtep_ip vni 1234 src_vni 10010"
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \"vni 1234\""
+ log_test $? 0 "MDB entry destination VNI replacement"
+
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010"
+
+ # Default outgoing interface and replacement.
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \" via \""
+ log_test $? 1 "MDB entry default outgoing interface"
+
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010 via veth0"
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \"via veth0\""
+ log_test $? 0 "MDB entry outgoing interface replacement"
+
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010"
+
+ # Common error cases.
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port veth0 $grp_key permanent dst $vtep_ip src_vni 10010"
+ log_test $? 255 "MDB entry with mismatch between device and port"
+
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key temp dst $vtep_ip src_vni 10010"
+ log_test $? 255 "MDB entry with temp state"
+
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent vid 10 dst $vtep_ip src_vni 10010"
+ log_test $? 255 "MDB entry with VLAN"
+
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp 01:02:03:04:05:06 permanent dst $vtep_ip src_vni 10010"
+ log_test $? 255 "MDB entry MAC address"
+
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent"
+ log_test $? 255 "MDB entry without extended parameters"
+
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent proto 3 dst $vtep_ip src_vni 10010"
+ log_test $? 255 "MDB entry with an invalid protocol"
+
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip vni $((2 ** 24)) src_vni 10010"
+ log_test $? 255 "MDB entry with an invalid destination VNI"
+
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni $((2 ** 24))"
+ log_test $? 255 "MDB entry with an invalid source VNI"
+
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent src_vni 10010"
+ log_test $? 255 "MDB entry without a remote destination IP"
+
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010"
+ log_test $? 255 "Duplicate MDB entries"
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010"
+}
+
+basic_star_g_ipv4_ipv4()
+{
+ local ns1=ns1_v4
+ local grp_key="grp 239.1.1.1"
+ local vtep_ip=198.51.100.100
+
+ echo
+ echo "Control path: Basic (*, G) operations - IPv4 overlay / IPv4 underlay"
+ echo "--------------------------------------------------------------------"
+
+ basic_common $ns1 "$grp_key" $vtep_ip
+}
+
+basic_star_g_ipv6_ipv4()
+{
+ local ns1=ns1_v4
+ local grp_key="grp ff0e::1"
+ local vtep_ip=198.51.100.100
+
+ echo
+ echo "Control path: Basic (*, G) operations - IPv6 overlay / IPv4 underlay"
+ echo "--------------------------------------------------------------------"
+
+ basic_common $ns1 "$grp_key" $vtep_ip
+}
+
+basic_star_g_ipv4_ipv6()
+{
+ local ns1=ns1_v6
+ local grp_key="grp 239.1.1.1"
+ local vtep_ip=2001:db8:1000::1
+
+ echo
+ echo "Control path: Basic (*, G) operations - IPv4 overlay / IPv6 underlay"
+ echo "--------------------------------------------------------------------"
+
+ basic_common $ns1 "$grp_key" $vtep_ip
+}
+
+basic_star_g_ipv6_ipv6()
+{
+ local ns1=ns1_v6
+ local grp_key="grp ff0e::1"
+ local vtep_ip=2001:db8:1000::1
+
+ echo
+ echo "Control path: Basic (*, G) operations - IPv6 overlay / IPv6 underlay"
+ echo "--------------------------------------------------------------------"
+
+ basic_common $ns1 "$grp_key" $vtep_ip
+}
+
+basic_sg_ipv4_ipv4()
+{
+ local ns1=ns1_v4
+ local grp_key="grp 239.1.1.1 src 192.0.2.129"
+ local vtep_ip=198.51.100.100
+
+ echo
+ echo "Control path: Basic (S, G) operations - IPv4 overlay / IPv4 underlay"
+ echo "--------------------------------------------------------------------"
+
+ basic_common $ns1 "$grp_key" $vtep_ip
+}
+
+basic_sg_ipv6_ipv4()
+{
+ local ns1=ns1_v4
+ local grp_key="grp ff0e::1 src 2001:db8:100::1"
+ local vtep_ip=198.51.100.100
+
+ echo
+ echo "Control path: Basic (S, G) operations - IPv6 overlay / IPv4 underlay"
+ echo "---------------------------------------------------------------------"
+
+ basic_common $ns1 "$grp_key" $vtep_ip
+}
+
+basic_sg_ipv4_ipv6()
+{
+ local ns1=ns1_v6
+ local grp_key="grp 239.1.1.1 src 192.0.2.129"
+ local vtep_ip=2001:db8:1000::1
+
+ echo
+ echo "Control path: Basic (S, G) operations - IPv4 overlay / IPv6 underlay"
+ echo "--------------------------------------------------------------------"
+
+ basic_common $ns1 "$grp_key" $vtep_ip
+}
+
+basic_sg_ipv6_ipv6()
+{
+ local ns1=ns1_v6
+ local grp_key="grp ff0e::1 src 2001:db8:100::1"
+ local vtep_ip=2001:db8:1000::1
+
+ echo
+ echo "Control path: Basic (S, G) operations - IPv6 overlay / IPv6 underlay"
+ echo "--------------------------------------------------------------------"
+
+ basic_common $ns1 "$grp_key" $vtep_ip
+}
+
+star_g_common()
+{
+ local ns1=$1; shift
+ local grp=$1; shift
+ local src1=$1; shift
+ local src2=$1; shift
+ local src3=$1; shift
+ local vtep_ip=$1; shift
+ local all_zeros_grp=$1; shift
+
+ # Test control path operations specific to (*, G) entries.
+
+ # Basic add, replace and delete behavior.
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010"
+ log_test $? 0 "(*, G) MDB entry addition with source list"
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \""
+ log_test $? 0 "(*, G) MDB entry presence after addition"
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\""
+ log_test $? 0 "(S, G) MDB entry presence after addition"
+
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010"
+ log_test $? 0 "(*, G) MDB entry replacement with source list"
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \""
+ log_test $? 0 "(*, G) MDB entry presence after replacement"
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\""
+ log_test $? 0 "(S, G) MDB entry presence after replacement"
+
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010"
+ log_test $? 0 "(*, G) MDB entry deletion"
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \""
+ log_test $? 1 "(*, G) MDB entry presence after deletion"
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\""
+ log_test $? 1 "(S, G) MDB entry presence after deletion"
+
+ # Default filter mode and replacement.
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent dst $vtep_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep exclude"
+ log_test $? 0 "(*, G) MDB entry default filter mode"
+
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $src1 dst $vtep_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep include"
+ log_test $? 0 "(*, G) MDB entry after replacing filter mode to \"include\""
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\""
+ log_test $? 0 "(S, G) MDB entry after replacing filter mode to \"include\""
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\" | grep blocked"
+ log_test $? 1 "\"blocked\" flag after replacing filter mode to \"include\""
+
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep exclude"
+ log_test $? 0 "(*, G) MDB entry after replacing filter mode to \"exclude\""
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\""
+ log_test $? 0 "(S, G) MDB entry after replacing filter mode to \"exclude\""
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\" | grep blocked"
+ log_test $? 0 "\"blocked\" flag after replacing filter mode to \"exclude\""
+
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010"
+
+ # Default source list and replacement.
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent dst $vtep_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep source_list"
+ log_test $? 1 "(*, G) MDB entry default source list"
+
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1,$src2,$src3 dst $vtep_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\""
+ log_test $? 0 "(S, G) MDB entry of 1st source after replacing source list"
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src2\""
+ log_test $? 0 "(S, G) MDB entry of 2nd source after replacing source list"
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src3\""
+ log_test $? 0 "(S, G) MDB entry of 3rd source after replacing source list"
+
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1,$src3 dst $vtep_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\""
+ log_test $? 0 "(S, G) MDB entry of 1st source after removing source"
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src2\""
+ log_test $? 1 "(S, G) MDB entry of 2nd source after removing source"
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src3\""
+ log_test $? 0 "(S, G) MDB entry of 3rd source after removing source"
+
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010"
+
+ # Default protocol and replacement.
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \"proto static\""
+ log_test $? 0 "(*, G) MDB entry default protocol"
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \"proto static\""
+ log_test $? 0 "(S, G) MDB entry default protocol"
+
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 proto bgp dst $vtep_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \"proto bgp\""
+ log_test $? 0 "(*, G) MDB entry protocol after replacement"
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \"proto bgp\""
+ log_test $? 0 "(S, G) MDB entry protocol after replacement"
+
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010"
+
+ # Default destination port and replacement.
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \" dst_port \""
+ log_test $? 1 "(*, G) MDB entry default destination port"
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \" dst_port \""
+ log_test $? 1 "(S, G) MDB entry default destination port"
+
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip dst_port 1234 src_vni 10010"
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \" dst_port 1234 \""
+ log_test $? 0 "(*, G) MDB entry destination port after replacement"
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \" dst_port 1234 \""
+ log_test $? 0 "(S, G) MDB entry destination port after replacement"
+
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010"
+
+ # Default destination VNI and replacement.
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \" vni \""
+ log_test $? 1 "(*, G) MDB entry default destination VNI"
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \" vni \""
+ log_test $? 1 "(S, G) MDB entry default destination VNI"
+
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip vni 1234 src_vni 10010"
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \" vni 1234 \""
+ log_test $? 0 "(*, G) MDB entry destination VNI after replacement"
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \" vni 1234 \""
+ log_test $? 0 "(S, G) MDB entry destination VNI after replacement"
+
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010"
+
+ # Default outgoing interface and replacement.
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \" via \""
+ log_test $? 1 "(*, G) MDB entry default outgoing interface"
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \" via \""
+ log_test $? 1 "(S, G) MDB entry default outgoing interface"
+
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010 via veth0"
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \" via veth0 \""
+ log_test $? 0 "(*, G) MDB entry outgoing interface after replacement"
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \" via veth0 \""
+ log_test $? 0 "(S, G) MDB entry outgoing interface after replacement"
+
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010"
+
+ # Error cases.
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $all_zeros_grp permanent filter_mode exclude dst $vtep_ip src_vni 10010"
+ log_test $? 255 "All-zeros group with filter mode"
+
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $all_zeros_grp permanent source_list $src1 dst $vtep_ip src_vni 10010"
+ log_test $? 255 "All-zeros group with source list"
+
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode include dst $vtep_ip src_vni 10010"
+ log_test $? 255 "(*, G) INCLUDE with an empty source list"
+
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $grp dst $vtep_ip src_vni 10010"
+ log_test $? 255 "Invalid source in source list"
+
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent source_list $src1 dst $vtep_ip src_vni 10010"
+ log_test $? 255 "Source list without filter mode"
+}
+
+star_g_ipv4_ipv4()
+{
+ local ns1=ns1_v4
+ local grp=239.1.1.1
+ local src1=192.0.2.129
+ local src2=192.0.2.130
+ local src3=192.0.2.131
+ local vtep_ip=198.51.100.100
+ local all_zeros_grp=0.0.0.0
+
+ echo
+ echo "Control path: (*, G) operations - IPv4 overlay / IPv4 underlay"
+ echo "--------------------------------------------------------------"
+
+ star_g_common $ns1 $grp $src1 $src2 $src3 $vtep_ip $all_zeros_grp
+}
+
+star_g_ipv6_ipv4()
+{
+ local ns1=ns1_v4
+ local grp=ff0e::1
+ local src1=2001:db8:100::1
+ local src2=2001:db8:100::2
+ local src3=2001:db8:100::3
+ local vtep_ip=198.51.100.100
+ local all_zeros_grp=::
+
+ echo
+ echo "Control path: (*, G) operations - IPv6 overlay / IPv4 underlay"
+ echo "--------------------------------------------------------------"
+
+ star_g_common $ns1 $grp $src1 $src2 $src3 $vtep_ip $all_zeros_grp
+}
+
+star_g_ipv4_ipv6()
+{
+ local ns1=ns1_v6
+ local grp=239.1.1.1
+ local src1=192.0.2.129
+ local src2=192.0.2.130
+ local src3=192.0.2.131
+ local vtep_ip=2001:db8:1000::1
+ local all_zeros_grp=0.0.0.0
+
+ echo
+ echo "Control path: (*, G) operations - IPv4 overlay / IPv6 underlay"
+ echo "--------------------------------------------------------------"
+
+ star_g_common $ns1 $grp $src1 $src2 $src3 $vtep_ip $all_zeros_grp
+}
+
+star_g_ipv6_ipv6()
+{
+ local ns1=ns1_v6
+ local grp=ff0e::1
+ local src1=2001:db8:100::1
+ local src2=2001:db8:100::2
+ local src3=2001:db8:100::3
+ local vtep_ip=2001:db8:1000::1
+ local all_zeros_grp=::
+
+ echo
+ echo "Control path: (*, G) operations - IPv6 overlay / IPv6 underlay"
+ echo "--------------------------------------------------------------"
+
+ star_g_common $ns1 $grp $src1 $src2 $src3 $vtep_ip $all_zeros_grp
+}
+
+sg_common()
+{
+ local ns1=$1; shift
+ local grp=$1; shift
+ local src=$1; shift
+ local vtep_ip=$1; shift
+ local all_zeros_grp=$1; shift
+
+ # Test control path operations specific to (S, G) entries.
+
+ # Default filter mode.
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp src $src permanent dst $vtep_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep include"
+ log_test $? 0 "(S, G) MDB entry default filter mode"
+
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp src $src permanent dst $vtep_ip src_vni 10010"
+
+ # Error cases.
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp src $src permanent filter_mode include dst $vtep_ip src_vni 10010"
+ log_test $? 255 "(S, G) with filter mode"
+
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp src $src permanent source_list $src dst $vtep_ip src_vni 10010"
+ log_test $? 255 "(S, G) with source list"
+
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp src $grp permanent dst $vtep_ip src_vni 10010"
+ log_test $? 255 "(S, G) with an invalid source list"
+
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $all_zeros_grp src $src permanent dst $vtep_ip src_vni 10010"
+ log_test $? 255 "All-zeros group with source"
+}
+
+sg_ipv4_ipv4()
+{
+ local ns1=ns1_v4
+ local grp=239.1.1.1
+ local src=192.0.2.129
+ local vtep_ip=198.51.100.100
+ local all_zeros_grp=0.0.0.0
+
+ echo
+ echo "Control path: (S, G) operations - IPv4 overlay / IPv4 underlay"
+ echo "--------------------------------------------------------------"
+
+ sg_common $ns1 $grp $src $vtep_ip $all_zeros_grp
+}
+
+sg_ipv6_ipv4()
+{
+ local ns1=ns1_v4
+ local grp=ff0e::1
+ local src=2001:db8:100::1
+ local vtep_ip=198.51.100.100
+ local all_zeros_grp=::
+
+ echo
+ echo "Control path: (S, G) operations - IPv6 overlay / IPv4 underlay"
+ echo "--------------------------------------------------------------"
+
+ sg_common $ns1 $grp $src $vtep_ip $all_zeros_grp
+}
+
+sg_ipv4_ipv6()
+{
+ local ns1=ns1_v6
+ local grp=239.1.1.1
+ local src=192.0.2.129
+ local vtep_ip=2001:db8:1000::1
+ local all_zeros_grp=0.0.0.0
+
+ echo
+ echo "Control path: (S, G) operations - IPv4 overlay / IPv6 underlay"
+ echo "--------------------------------------------------------------"
+
+ sg_common $ns1 $grp $src $vtep_ip $all_zeros_grp
+}
+
+sg_ipv6_ipv6()
+{
+ local ns1=ns1_v6
+ local grp=ff0e::1
+ local src=2001:db8:100::1
+ local vtep_ip=2001:db8:1000::1
+ local all_zeros_grp=::
+
+ echo
+ echo "Control path: (S, G) operations - IPv6 overlay / IPv6 underlay"
+ echo "--------------------------------------------------------------"
+
+ sg_common $ns1 $grp $src $vtep_ip $all_zeros_grp
+}
+
+ipv4_grps_get()
+{
+ local max_grps=$1; shift
+ local i
+
+ for i in $(seq 0 $((max_grps - 1))); do
+ echo "239.1.1.$i"
+ done
+}
+
+ipv6_grps_get()
+{
+ local max_grps=$1; shift
+ local i
+
+ for i in $(seq 0 $((max_grps - 1))); do
+ echo "ff0e::$(printf %x $i)"
+ done
+}
+
+dump_common()
+{
+ local ns1=$1; shift
+ local local_addr=$1; shift
+ local remote_prefix=$1; shift
+ local fn=$1; shift
+ local max_vxlan_devs=2
+ local max_remotes=64
+ local max_grps=256
+ local num_entries
+ local batch_file
+ local grp
+ local i j
+
+ # The kernel maintains various markers for the MDB dump. Add a test for
+ # large scale MDB dump to make sure that all the configured entries are
+ # dumped and that the markers are used correctly.
+
+ # Create net devices.
+ for i in $(seq 1 $max_vxlan_devs); do
+ ip -n $ns1 link add name vx-test${i} up type vxlan \
+ local $local_addr dstport 4789 external vnifilter
+ done
+
+ # Create batch file with MDB entries.
+ batch_file=$(mktemp)
+ for i in $(seq 1 $max_vxlan_devs); do
+ for j in $(seq 1 $max_remotes); do
+ for grp in $($fn $max_grps); do
+ echo "mdb add dev vx-test${i} port vx-test${i} grp $grp permanent dst ${remote_prefix}${j}" >> $batch_file
+ done
+ done
+ done
+
+ # Program the batch file and check for expected number of entries.
+ bridge -n $ns1 -b $batch_file
+ for i in $(seq 1 $max_vxlan_devs); do
+ num_entries=$(bridge -n $ns1 mdb show dev vx-test${i} | grep "permanent" | wc -l)
+ [[ $num_entries -eq $((max_grps * max_remotes)) ]]
+ log_test $? 0 "Large scale dump - VXLAN device #$i"
+ done
+
+ rm -rf $batch_file
+}
+
+dump_ipv4_ipv4()
+{
+ local ns1=ns1_v4
+ local local_addr=192.0.2.1
+ local remote_prefix=198.51.100.
+ local fn=ipv4_grps_get
+
+ echo
+ echo "Control path: Large scale MDB dump - IPv4 overlay / IPv4 underlay"
+ echo "-----------------------------------------------------------------"
+
+ dump_common $ns1 $local_addr $remote_prefix $fn
+}
+
+dump_ipv6_ipv4()
+{
+ local ns1=ns1_v4
+ local local_addr=192.0.2.1
+ local remote_prefix=198.51.100.
+ local fn=ipv6_grps_get
+
+ echo
+ echo "Control path: Large scale MDB dump - IPv6 overlay / IPv4 underlay"
+ echo "-----------------------------------------------------------------"
+
+ dump_common $ns1 $local_addr $remote_prefix $fn
+}
+
+dump_ipv4_ipv6()
+{
+ local ns1=ns1_v6
+ local local_addr=2001:db8:1::1
+ local remote_prefix=2001:db8:1000::
+ local fn=ipv4_grps_get
+
+ echo
+ echo "Control path: Large scale MDB dump - IPv4 overlay / IPv6 underlay"
+ echo "-----------------------------------------------------------------"
+
+ dump_common $ns1 $local_addr $remote_prefix $fn
+}
+
+dump_ipv6_ipv6()
+{
+ local ns1=ns1_v6
+ local local_addr=2001:db8:1::1
+ local remote_prefix=2001:db8:1000::
+ local fn=ipv6_grps_get
+
+ echo
+ echo "Control path: Large scale MDB dump - IPv6 overlay / IPv6 underlay"
+ echo "-----------------------------------------------------------------"
+
+ dump_common $ns1 $local_addr $remote_prefix $fn
+}
+
+################################################################################
+# Tests - Data path
+
+encap_params_common()
+{
+ local ns1=$1; shift
+ local ns2=$1; shift
+ local vtep1_ip=$1; shift
+ local vtep2_ip=$1; shift
+ local plen=$1; shift
+ local enc_ethtype=$1; shift
+ local grp=$1; shift
+ local src=$1; shift
+ local mz=$1; shift
+
+ # Test that packets forwarded by the VXLAN MDB are encapsulated with
+ # the correct parameters. Transmit packets from the first namespace and
+ # check that they hit the corresponding filters on the ingress of the
+ # second namespace.
+
+ run_cmd "tc -n $ns2 qdisc replace dev veth0 clsact"
+ run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact"
+ run_cmd "ip -n $ns2 address replace $vtep1_ip/$plen dev lo"
+ run_cmd "ip -n $ns2 address replace $vtep2_ip/$plen dev lo"
+
+ # Check destination IP.
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep2_ip src_vni 10020"
+
+ run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Destination IP - match"
+
+ run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Destination IP - no match"
+
+ run_cmd "tc -n $ns2 filter del dev vx0 ingress pref 1 handle 101 flower"
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10020"
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10010"
+
+ # Check destination port.
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip dst_port 1111 src_vni 10020"
+
+ run_cmd "tc -n $ns2 filter replace dev veth0 ingress pref 1 handle 101 proto $enc_ethtype flower ip_proto udp dst_port 4789 action pass"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev veth0 ingress" 101 1
+ log_test $? 0 "Default destination port - match"
+
+ run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev veth0 ingress" 101 1
+ log_test $? 0 "Default destination port - no match"
+
+ run_cmd "tc -n $ns2 filter replace dev veth0 ingress pref 1 handle 101 proto $enc_ethtype flower ip_proto udp dst_port 1111 action pass"
+ run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev veth0 ingress" 101 1
+ log_test $? 0 "Non-default destination port - match"
+
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev veth0 ingress" 101 1
+ log_test $? 0 "Non-default destination port - no match"
+
+ run_cmd "tc -n $ns2 filter del dev veth0 ingress pref 1 handle 101 flower"
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10020"
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10010"
+
+ # Check default VNI.
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10020"
+
+ run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_key_id 10010 action pass"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Default destination VNI - match"
+
+ run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Default destination VNI - no match"
+
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip vni 10020 src_vni 10010"
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip vni 10010 src_vni 10020"
+
+ run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_key_id 10020 action pass"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Non-default destination VNI - match"
+
+ run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Non-default destination VNI - no match"
+
+ run_cmd "tc -n $ns2 filter del dev vx0 ingress pref 1 handle 101 flower"
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10020"
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10010"
+}
+
+encap_params_ipv4_ipv4()
+{
+ local ns1=ns1_v4
+ local ns2=ns2_v4
+ local vtep1_ip=198.51.100.100
+ local vtep2_ip=198.51.100.200
+ local plen=32
+ local enc_ethtype="ip"
+ local grp=239.1.1.1
+ local src=192.0.2.129
+
+ echo
+ echo "Data path: Encapsulation parameters - IPv4 overlay / IPv4 underlay"
+ echo "------------------------------------------------------------------"
+
+ encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \
+ $grp $src "mausezahn"
+}
+
+encap_params_ipv6_ipv4()
+{
+ local ns1=ns1_v4
+ local ns2=ns2_v4
+ local vtep1_ip=198.51.100.100
+ local vtep2_ip=198.51.100.200
+ local plen=32
+ local enc_ethtype="ip"
+ local grp=ff0e::1
+ local src=2001:db8:100::1
+
+ echo
+ echo "Data path: Encapsulation parameters - IPv6 overlay / IPv4 underlay"
+ echo "------------------------------------------------------------------"
+
+ encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \
+ $grp $src "mausezahn -6"
+}
+
+encap_params_ipv4_ipv6()
+{
+ local ns1=ns1_v6
+ local ns2=ns2_v6
+ local vtep1_ip=2001:db8:1000::1
+ local vtep2_ip=2001:db8:2000::1
+ local plen=128
+ local enc_ethtype="ipv6"
+ local grp=239.1.1.1
+ local src=192.0.2.129
+
+ echo
+ echo "Data path: Encapsulation parameters - IPv4 overlay / IPv6 underlay"
+ echo "------------------------------------------------------------------"
+
+ encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \
+ $grp $src "mausezahn"
+}
+
+encap_params_ipv6_ipv6()
+{
+ local ns1=ns1_v6
+ local ns2=ns2_v6
+ local vtep1_ip=2001:db8:1000::1
+ local vtep2_ip=2001:db8:2000::1
+ local plen=128
+ local enc_ethtype="ipv6"
+ local grp=ff0e::1
+ local src=2001:db8:100::1
+
+ echo
+ echo "Data path: Encapsulation parameters - IPv6 overlay / IPv6 underlay"
+ echo "------------------------------------------------------------------"
+
+ encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \
+ $grp $src "mausezahn -6"
+}
+
+starg_exclude_ir_common()
+{
+ local ns1=$1; shift
+ local ns2=$1; shift
+ local vtep1_ip=$1; shift
+ local vtep2_ip=$1; shift
+ local plen=$1; shift
+ local grp=$1; shift
+ local valid_src=$1; shift
+ local invalid_src=$1; shift
+ local mz=$1; shift
+
+ # Install a (*, G) EXCLUDE MDB entry with one source and two remote
+ # VTEPs. Make sure that the source in the source list is not forwarded
+ # and that a source not in the list is forwarded. Remove one of the
+ # VTEPs from the entry and make sure that packets are only forwarded to
+ # the remaining VTEP.
+
+ run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact"
+ run_cmd "ip -n $ns2 address replace $vtep1_ip/$plen dev lo"
+ run_cmd "ip -n $ns2 address replace $vtep2_ip/$plen dev lo"
+
+ run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass"
+ run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 102 proto all flower enc_dst_ip $vtep2_ip action pass"
+
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $vtep1_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $vtep2_ip src_vni 10010"
+
+ # Check that invalid source is not forwarded to any VTEP.
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 0
+ log_test $? 0 "Block excluded source - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 0
+ log_test $? 0 "Block excluded source - second VTEP"
+
+ # Check that valid source is forwarded to both VTEPs.
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Forward valid source - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+ log_test $? 0 "Forward valid source - second VTEP"
+
+ # Remove second VTEP.
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10010"
+
+ # Check that invalid source is not forwarded to any VTEP.
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Block excluded source after removal - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+ log_test $? 0 "Block excluded source after removal - second VTEP"
+
+ # Check that valid source is forwarded to the remaining VTEP.
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 2
+ log_test $? 0 "Forward valid source after removal - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+ log_test $? 0 "Forward valid source after removal - second VTEP"
+}
+
+starg_exclude_ir_ipv4_ipv4()
+{
+ local ns1=ns1_v4
+ local ns2=ns2_v4
+ local vtep1_ip=198.51.100.100
+ local vtep2_ip=198.51.100.200
+ local plen=32
+ local grp=239.1.1.1
+ local valid_src=192.0.2.129
+ local invalid_src=192.0.2.145
+
+ echo
+ echo "Data path: (*, G) EXCLUDE - IR - IPv4 overlay / IPv4 underlay"
+ echo "-------------------------------------------------------------"
+
+ starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
+ $valid_src $invalid_src "mausezahn"
+}
+
+starg_exclude_ir_ipv6_ipv4()
+{
+ local ns1=ns1_v4
+ local ns2=ns2_v4
+ local vtep1_ip=198.51.100.100
+ local vtep2_ip=198.51.100.200
+ local plen=32
+ local grp=ff0e::1
+ local valid_src=2001:db8:100::1
+ local invalid_src=2001:db8:200::1
+
+ echo
+ echo "Data path: (*, G) EXCLUDE - IR - IPv6 overlay / IPv4 underlay"
+ echo "-------------------------------------------------------------"
+
+ starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
+ $valid_src $invalid_src "mausezahn -6"
+}
+
+starg_exclude_ir_ipv4_ipv6()
+{
+ local ns1=ns1_v6
+ local ns2=ns2_v6
+ local vtep1_ip=2001:db8:1000::1
+ local vtep2_ip=2001:db8:2000::1
+ local plen=128
+ local grp=239.1.1.1
+ local valid_src=192.0.2.129
+ local invalid_src=192.0.2.145
+
+ echo
+ echo "Data path: (*, G) EXCLUDE - IR - IPv4 overlay / IPv6 underlay"
+ echo "-------------------------------------------------------------"
+
+ starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
+ $valid_src $invalid_src "mausezahn"
+}
+
+starg_exclude_ir_ipv6_ipv6()
+{
+ local ns1=ns1_v6
+ local ns2=ns2_v6
+ local vtep1_ip=2001:db8:1000::1
+ local vtep2_ip=2001:db8:2000::1
+ local plen=128
+ local grp=ff0e::1
+ local valid_src=2001:db8:100::1
+ local invalid_src=2001:db8:200::1
+
+ echo
+ echo "Data path: (*, G) EXCLUDE - IR - IPv6 overlay / IPv6 underlay"
+ echo "-------------------------------------------------------------"
+
+ starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
+ $valid_src $invalid_src "mausezahn -6"
+}
+
+starg_include_ir_common()
+{
+ local ns1=$1; shift
+ local ns2=$1; shift
+ local vtep1_ip=$1; shift
+ local vtep2_ip=$1; shift
+ local plen=$1; shift
+ local grp=$1; shift
+ local valid_src=$1; shift
+ local invalid_src=$1; shift
+ local mz=$1; shift
+
+ # Install a (*, G) INCLUDE MDB entry with one source and two remote
+ # VTEPs. Make sure that the source in the source list is forwarded and
+ # that a source not in the list is not forwarded. Remove one of the
+ # VTEPs from the entry and make sure that packets are only forwarded to
+ # the remaining VTEP.
+
+ run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact"
+ run_cmd "ip -n $ns2 address replace $vtep1_ip/$plen dev lo"
+ run_cmd "ip -n $ns2 address replace $vtep2_ip/$plen dev lo"
+
+ run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass"
+ run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 102 proto all flower enc_dst_ip $vtep2_ip action pass"
+
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $vtep1_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $vtep2_ip src_vni 10010"
+
+ # Check that invalid source is not forwarded to any VTEP.
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 0
+ log_test $? 0 "Block excluded source - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 0
+ log_test $? 0 "Block excluded source - second VTEP"
+
+ # Check that valid source is forwarded to both VTEPs.
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Forward valid source - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+ log_test $? 0 "Forward valid source - second VTEP"
+
+ # Remove second VTEP.
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10010"
+
+ # Check that invalid source is not forwarded to any VTEP.
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Block excluded source after removal - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+ log_test $? 0 "Block excluded source after removal - second VTEP"
+
+ # Check that valid source is forwarded to the remaining VTEP.
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 2
+ log_test $? 0 "Forward valid source after removal - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+ log_test $? 0 "Forward valid source after removal - second VTEP"
+}
+
+starg_include_ir_ipv4_ipv4()
+{
+ local ns1=ns1_v4
+ local ns2=ns2_v4
+ local vtep1_ip=198.51.100.100
+ local vtep2_ip=198.51.100.200
+ local plen=32
+ local grp=239.1.1.1
+ local valid_src=192.0.2.129
+ local invalid_src=192.0.2.145
+
+ echo
+ echo "Data path: (*, G) INCLUDE - IR - IPv4 overlay / IPv4 underlay"
+ echo "-------------------------------------------------------------"
+
+ starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
+ $valid_src $invalid_src "mausezahn"
+}
+
+starg_include_ir_ipv6_ipv4()
+{
+ local ns1=ns1_v4
+ local ns2=ns2_v4
+ local vtep1_ip=198.51.100.100
+ local vtep2_ip=198.51.100.200
+ local plen=32
+ local grp=ff0e::1
+ local valid_src=2001:db8:100::1
+ local invalid_src=2001:db8:200::1
+
+ echo
+ echo "Data path: (*, G) INCLUDE - IR - IPv6 overlay / IPv4 underlay"
+ echo "-------------------------------------------------------------"
+
+ starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
+ $valid_src $invalid_src "mausezahn -6"
+}
+
+starg_include_ir_ipv4_ipv6()
+{
+ local ns1=ns1_v6
+ local ns2=ns2_v6
+ local vtep1_ip=2001:db8:1000::1
+ local vtep2_ip=2001:db8:2000::1
+ local plen=128
+ local grp=239.1.1.1
+ local valid_src=192.0.2.129
+ local invalid_src=192.0.2.145
+
+ echo
+ echo "Data path: (*, G) INCLUDE - IR - IPv4 overlay / IPv6 underlay"
+ echo "-------------------------------------------------------------"
+
+ starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
+ $valid_src $invalid_src "mausezahn"
+}
+
+starg_include_ir_ipv6_ipv6()
+{
+ local ns1=ns1_v6
+ local ns2=ns2_v6
+ local vtep1_ip=2001:db8:1000::1
+ local vtep2_ip=2001:db8:2000::1
+ local plen=128
+ local grp=ff0e::1
+ local valid_src=2001:db8:100::1
+ local invalid_src=2001:db8:200::1
+
+ echo
+ echo "Data path: (*, G) INCLUDE - IR - IPv6 overlay / IPv6 underlay"
+ echo "-------------------------------------------------------------"
+
+ starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
+ $valid_src $invalid_src "mausezahn -6"
+}
+
+starg_exclude_p2mp_common()
+{
+ local ns1=$1; shift
+ local ns2=$1; shift
+ local mcast_grp=$1; shift
+ local plen=$1; shift
+ local grp=$1; shift
+ local valid_src=$1; shift
+ local invalid_src=$1; shift
+ local mz=$1; shift
+
+ # Install a (*, G) EXCLUDE MDB entry with one source and one multicast
+ # group to which packets are sent. Make sure that the source in the
+ # source list is not forwarded and that a source not in the list is
+ # forwarded.
+
+ run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact"
+ run_cmd "ip -n $ns2 address replace $mcast_grp/$plen dev veth0 autojoin"
+
+ run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $mcast_grp action pass"
+
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $mcast_grp src_vni 10010 via veth0"
+
+ # Check that invalid source is not forwarded.
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 0
+ log_test $? 0 "Block excluded source"
+
+ # Check that valid source is forwarded.
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Forward valid source"
+
+ # Remove the VTEP from the multicast group.
+ run_cmd "ip -n $ns2 address del $mcast_grp/$plen dev veth0"
+
+ # Check that valid source is not received anymore.
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Receive of valid source after removal from group"
+}
+
+starg_exclude_p2mp_ipv4_ipv4()
+{
+ local ns1=ns1_v4
+ local ns2=ns2_v4
+ local mcast_grp=238.1.1.1
+ local plen=32
+ local grp=239.1.1.1
+ local valid_src=192.0.2.129
+ local invalid_src=192.0.2.145
+
+ echo
+ echo "Data path: (*, G) EXCLUDE - P2MP - IPv4 overlay / IPv4 underlay"
+ echo "---------------------------------------------------------------"
+
+ starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ $valid_src $invalid_src "mausezahn"
+}
+
+starg_exclude_p2mp_ipv6_ipv4()
+{
+ local ns1=ns1_v4
+ local ns2=ns2_v4
+ local mcast_grp=238.1.1.1
+ local plen=32
+ local grp=ff0e::1
+ local valid_src=2001:db8:100::1
+ local invalid_src=2001:db8:200::1
+
+ echo
+ echo "Data path: (*, G) EXCLUDE - P2MP - IPv6 overlay / IPv4 underlay"
+ echo "---------------------------------------------------------------"
+
+ starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ $valid_src $invalid_src "mausezahn -6"
+}
+
+starg_exclude_p2mp_ipv4_ipv6()
+{
+ local ns1=ns1_v6
+ local ns2=ns2_v6
+ local mcast_grp=ff0e::2
+ local plen=128
+ local grp=239.1.1.1
+ local valid_src=192.0.2.129
+ local invalid_src=192.0.2.145
+
+ echo
+ echo "Data path: (*, G) EXCLUDE - P2MP - IPv4 overlay / IPv6 underlay"
+ echo "---------------------------------------------------------------"
+
+ starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ $valid_src $invalid_src "mausezahn"
+}
+
+starg_exclude_p2mp_ipv6_ipv6()
+{
+ local ns1=ns1_v6
+ local ns2=ns2_v6
+ local mcast_grp=ff0e::2
+ local plen=128
+ local grp=ff0e::1
+ local valid_src=2001:db8:100::1
+ local invalid_src=2001:db8:200::1
+
+ echo
+ echo "Data path: (*, G) EXCLUDE - P2MP - IPv6 overlay / IPv6 underlay"
+ echo "---------------------------------------------------------------"
+
+ starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ $valid_src $invalid_src "mausezahn -6"
+}
+
+starg_include_p2mp_common()
+{
+ local ns1=$1; shift
+ local ns2=$1; shift
+ local mcast_grp=$1; shift
+ local plen=$1; shift
+ local grp=$1; shift
+ local valid_src=$1; shift
+ local invalid_src=$1; shift
+ local mz=$1; shift
+
+ # Install a (*, G) INCLUDE MDB entry with one source and one multicast
+ # group to which packets are sent. Make sure that the source in the
+ # source list is forwarded and that a source not in the list is not
+ # forwarded.
+
+ run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact"
+ run_cmd "ip -n $ns2 address replace $mcast_grp/$plen dev veth0 autojoin"
+
+ run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $mcast_grp action pass"
+
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $mcast_grp src_vni 10010 via veth0"
+
+ # Check that invalid source is not forwarded.
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 0
+ log_test $? 0 "Block excluded source"
+
+ # Check that valid source is forwarded.
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Forward valid source"
+
+ # Remove the VTEP from the multicast group.
+ run_cmd "ip -n $ns2 address del $mcast_grp/$plen dev veth0"
+
+ # Check that valid source is not received anymore.
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Receive of valid source after removal from group"
+}
+
+starg_include_p2mp_ipv4_ipv4()
+{
+ local ns1=ns1_v4
+ local ns2=ns2_v4
+ local mcast_grp=238.1.1.1
+ local plen=32
+ local grp=239.1.1.1
+ local valid_src=192.0.2.129
+ local invalid_src=192.0.2.145
+
+ echo
+ echo "Data path: (*, G) INCLUDE - P2MP - IPv4 overlay / IPv4 underlay"
+ echo "---------------------------------------------------------------"
+
+ starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ $valid_src $invalid_src "mausezahn"
+}
+
+starg_include_p2mp_ipv6_ipv4()
+{
+ local ns1=ns1_v4
+ local ns2=ns2_v4
+ local mcast_grp=238.1.1.1
+ local plen=32
+ local grp=ff0e::1
+ local valid_src=2001:db8:100::1
+ local invalid_src=2001:db8:200::1
+
+ echo
+ echo "Data path: (*, G) INCLUDE - P2MP - IPv6 overlay / IPv4 underlay"
+ echo "---------------------------------------------------------------"
+
+ starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ $valid_src $invalid_src "mausezahn -6"
+}
+
+starg_include_p2mp_ipv4_ipv6()
+{
+ local ns1=ns1_v6
+ local ns2=ns2_v6
+ local mcast_grp=ff0e::2
+ local plen=128
+ local grp=239.1.1.1
+ local valid_src=192.0.2.129
+ local invalid_src=192.0.2.145
+
+ echo
+ echo "Data path: (*, G) INCLUDE - P2MP - IPv4 overlay / IPv6 underlay"
+ echo "---------------------------------------------------------------"
+
+ starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ $valid_src $invalid_src "mausezahn"
+}
+
+starg_include_p2mp_ipv6_ipv6()
+{
+ local ns1=ns1_v6
+ local ns2=ns2_v6
+ local mcast_grp=ff0e::2
+ local plen=128
+ local grp=ff0e::1
+ local valid_src=2001:db8:100::1
+ local invalid_src=2001:db8:200::1
+
+ echo
+ echo "Data path: (*, G) INCLUDE - P2MP - IPv6 overlay / IPv6 underlay"
+ echo "---------------------------------------------------------------"
+
+ starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ $valid_src $invalid_src "mausezahn -6"
+}
+
+egress_vni_translation_common()
+{
+ local ns1=$1; shift
+ local ns2=$1; shift
+ local mcast_grp=$1; shift
+ local plen=$1; shift
+ local proto=$1; shift
+ local grp=$1; shift
+ local src=$1; shift
+ local mz=$1; shift
+
+ # When P2MP tunnels are used with optimized inter-subnet multicast
+ # (OISM) [1], the ingress VTEP does not perform VNI translation and
+ # uses the VNI of the source broadcast domain (BD). If the egress VTEP
+ # is a member in the source BD, then no VNI translation is needed.
+ # Otherwise, the egress VTEP needs to translate the VNI to the
+ # supplementary broadcast domain (SBD) VNI, which is usually the L3VNI.
+ #
+ # In this test, remove the VTEP in the second namespace from VLAN 10
+ # (VNI 10010) and make sure that a packet sent from this VLAN on the
+ # first VTEP is received by the SVI corresponding to the L3VNI (14000 /
+ # VLAN 4000) on the second VTEP.
+ #
+ # The second VTEP will be able to decapsulate the packet with VNI 10010
+ # because this VNI is configured on its shared VXLAN device. Later,
+ # when ingressing the bridge, the VNI to VLAN lookup will fail because
+ # the VTEP is not a member in VLAN 10, which will cause the packet to
+ # be tagged with VLAN 4000 since it is configured as PVID.
+ #
+ # [1] https://datatracker.ietf.org/doc/html/draft-ietf-bess-evpn-irb-mcast
+
+ run_cmd "tc -n $ns2 qdisc replace dev br0.4000 clsact"
+ run_cmd "ip -n $ns2 address replace $mcast_grp/$plen dev veth0 autojoin"
+ run_cmd "tc -n $ns2 filter replace dev br0.4000 ingress pref 1 handle 101 proto $proto flower src_ip $src dst_ip $grp action pass"
+
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp src $src permanent dst $mcast_grp src_vni 10010 via veth0"
+
+ # Remove the second VTEP from VLAN 10.
+ run_cmd "bridge -n $ns2 vlan del vid 10 dev vx0"
+
+ # Make sure that packets sent from the first VTEP over VLAN 10 are
+ # received by the SVI corresponding to the L3VNI (14000 / VLAN 4000) on
+ # the second VTEP, since it is configured as PVID.
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev br0.4000 ingress" 101 1
+ log_test $? 0 "Egress VNI translation - PVID configured"
+
+ # Remove PVID flag from VLAN 4000 on the second VTEP and make sure
+ # packets are no longer received by the SVI interface.
+ run_cmd "bridge -n $ns2 vlan add vid 4000 dev vx0"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev br0.4000 ingress" 101 1
+ log_test $? 0 "Egress VNI translation - no PVID configured"
+
+ # Reconfigure the PVID and make sure packets are received again.
+ run_cmd "bridge -n $ns2 vlan add vid 4000 dev vx0 pvid"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev br0.4000 ingress" 101 2
+ log_test $? 0 "Egress VNI translation - PVID reconfigured"
+}
+
+egress_vni_translation_ipv4_ipv4()
+{
+ local ns1=ns1_v4
+ local ns2=ns2_v4
+ local mcast_grp=238.1.1.1
+ local plen=32
+ local proto="ipv4"
+ local grp=239.1.1.1
+ local src=192.0.2.129
+
+ echo
+ echo "Data path: Egress VNI translation - IPv4 overlay / IPv4 underlay"
+ echo "----------------------------------------------------------------"
+
+ egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \
+ $src "mausezahn"
+}
+
+egress_vni_translation_ipv6_ipv4()
+{
+ local ns1=ns1_v4
+ local ns2=ns2_v4
+ local mcast_grp=238.1.1.1
+ local plen=32
+ local proto="ipv6"
+ local grp=ff0e::1
+ local src=2001:db8:100::1
+
+ echo
+ echo "Data path: Egress VNI translation - IPv6 overlay / IPv4 underlay"
+ echo "----------------------------------------------------------------"
+
+ egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \
+ $src "mausezahn -6"
+}
+
+egress_vni_translation_ipv4_ipv6()
+{
+ local ns1=ns1_v6
+ local ns2=ns2_v6
+ local mcast_grp=ff0e::2
+ local plen=128
+ local proto="ipv4"
+ local grp=239.1.1.1
+ local src=192.0.2.129
+
+ echo
+ echo "Data path: Egress VNI translation - IPv4 overlay / IPv6 underlay"
+ echo "----------------------------------------------------------------"
+
+ egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \
+ $src "mausezahn"
+}
+
+egress_vni_translation_ipv6_ipv6()
+{
+ local ns1=ns1_v6
+ local ns2=ns2_v6
+ local mcast_grp=ff0e::2
+ local plen=128
+ local proto="ipv6"
+ local grp=ff0e::1
+ local src=2001:db8:100::1
+
+ echo
+ echo "Data path: Egress VNI translation - IPv6 overlay / IPv6 underlay"
+ echo "----------------------------------------------------------------"
+
+ egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \
+ $src "mausezahn -6"
+}
+
+all_zeros_mdb_common()
+{
+ local ns1=$1; shift
+ local ns2=$1; shift
+ local vtep1_ip=$1; shift
+ local vtep2_ip=$1; shift
+ local vtep3_ip=$1; shift
+ local vtep4_ip=$1; shift
+ local plen=$1; shift
+ local ipv4_grp=239.1.1.1
+ local ipv4_unreg_grp=239.2.2.2
+ local ipv4_ll_grp=224.0.0.100
+ local ipv4_src=192.0.2.129
+ local ipv6_grp=ff0e::1
+ local ipv6_unreg_grp=ff0e::2
+ local ipv6_ll_grp=ff02::1
+ local ipv6_src=2001:db8:100::1
+
+ # Install all-zeros (catchall) MDB entries for IPv4 and IPv6 traffic
+ # and make sure they only forward unregistered IP multicast traffic
+ # which is not link-local. Also make sure that each entry only forwards
+ # traffic from the matching address family.
+
+ # Associate two different VTEPs with one all-zeros MDB entry: Two with
+ # the IPv4 entry (0.0.0.0) and another two with the IPv6 one (::).
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp 0.0.0.0 permanent dst $vtep1_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp 0.0.0.0 permanent dst $vtep2_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp :: permanent dst $vtep3_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp :: permanent dst $vtep4_ip src_vni 10010"
+
+ # Associate one VTEP from each set with a regular MDB entry: One with
+ # an IPv4 entry and another with an IPv6 one.
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $ipv4_grp permanent dst $vtep1_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $ipv6_grp permanent dst $vtep3_ip src_vni 10010"
+
+ # Add filters to match on decapsulated traffic in the second namespace.
+ run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact"
+ run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass"
+ run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 102 proto all flower enc_dst_ip $vtep2_ip action pass"
+ run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 103 proto all flower enc_dst_ip $vtep3_ip action pass"
+ run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 104 proto all flower enc_dst_ip $vtep4_ip action pass"
+
+ # Configure the VTEP addresses in the second namespace to enable
+ # decapsulation.
+ run_cmd "ip -n $ns2 address replace $vtep1_ip/$plen dev lo"
+ run_cmd "ip -n $ns2 address replace $vtep2_ip/$plen dev lo"
+ run_cmd "ip -n $ns2 address replace $vtep3_ip/$plen dev lo"
+ run_cmd "ip -n $ns2 address replace $vtep4_ip/$plen dev lo"
+
+ # Send registered IPv4 multicast and make sure it only arrives to the
+ # first VTEP.
+ run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Registered IPv4 multicast - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 0
+ log_test $? 0 "Registered IPv4 multicast - second VTEP"
+
+ # Send unregistered IPv4 multicast that is not link-local and make sure
+ # it arrives to the first and second VTEPs.
+ run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 2
+ log_test $? 0 "Unregistered IPv4 multicast - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+ log_test $? 0 "Unregistered IPv4 multicast - second VTEP"
+
+ # Send IPv4 link-local multicast traffic and make sure it does not
+ # arrive to any VTEP.
+ run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 2
+ log_test $? 0 "Link-local IPv4 multicast - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+ log_test $? 0 "Link-local IPv4 multicast - second VTEP"
+
+ # Send registered IPv4 multicast using a unicast MAC address and make
+ # sure it does not arrive to any VTEP.
+ run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b 00:11:22:33:44:55 -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 2
+ log_test $? 0 "Registered IPv4 multicast with a unicast MAC - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+ log_test $? 0 "Registered IPv4 multicast with a unicast MAC - second VTEP"
+
+ # Send registered IPv4 multicast using a broadcast MAC address and make
+ # sure it does not arrive to any VTEP.
+ run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b bcast -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 2
+ log_test $? 0 "Registered IPv4 multicast with a broadcast MAC - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+ log_test $? 0 "Registered IPv4 multicast with a broadcast MAC - second VTEP"
+
+ # Make sure IPv4 traffic did not reach the VTEPs associated with
+ # IPv6 entries.
+ tc_check_packets "$ns2" "dev vx0 ingress" 103 0
+ log_test $? 0 "IPv4 traffic - third VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 104 0
+ log_test $? 0 "IPv4 traffic - fourth VTEP"
+
+ # Reset IPv4 filters before testing IPv6 traffic.
+ run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass"
+ run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 102 proto all flower enc_dst_ip $vtep2_ip action pass"
+
+ # Send registered IPv6 multicast and make sure it only arrives to the
+ # third VTEP.
+ run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 103 1
+ log_test $? 0 "Registered IPv6 multicast - third VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 104 0
+ log_test $? 0 "Registered IPv6 multicast - fourth VTEP"
+
+ # Send unregistered IPv6 multicast that is not link-local and make sure
+ # it arrives to the third and fourth VTEPs.
+ run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 103 2
+ log_test $? 0 "Unregistered IPv6 multicast - third VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 104 1
+ log_test $? 0 "Unregistered IPv6 multicast - fourth VTEP"
+
+ # Send IPv6 link-local multicast traffic and make sure it does not
+ # arrive to any VTEP.
+ run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 103 2
+ log_test $? 0 "Link-local IPv6 multicast - third VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 104 1
+ log_test $? 0 "Link-local IPv6 multicast - fourth VTEP"
+
+ # Send registered IPv6 multicast using a unicast MAC address and make
+ # sure it does not arrive to any VTEP.
+ run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b 00:11:22:33:44:55 -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 103 2
+ log_test $? 0 "Registered IPv6 multicast with a unicast MAC - third VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 104 1
+ log_test $? 0 "Registered IPv6 multicast with a unicast MAC - fourth VTEP"
+
+ # Send registered IPv6 multicast using a broadcast MAC address and make
+ # sure it does not arrive to any VTEP.
+ run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b bcast -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 103 2
+ log_test $? 0 "Registered IPv6 multicast with a broadcast MAC - third VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 104 1
+ log_test $? 0 "Registered IPv6 multicast with a broadcast MAC - fourth VTEP"
+
+ # Make sure IPv6 traffic did not reach the VTEPs associated with
+ # IPv4 entries.
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 0
+ log_test $? 0 "IPv6 traffic - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 0
+ log_test $? 0 "IPv6 traffic - second VTEP"
+}
+
+all_zeros_mdb_ipv4()
+{
+ local ns1=ns1_v4
+ local ns2=ns2_v4
+ local vtep1_ip=198.51.100.101
+ local vtep2_ip=198.51.100.102
+ local vtep3_ip=198.51.100.103
+ local vtep4_ip=198.51.100.104
+ local plen=32
+
+ echo
+ echo "Data path: All-zeros MDB entry - IPv4 underlay"
+ echo "----------------------------------------------"
+
+ all_zeros_mdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $vtep3_ip \
+ $vtep4_ip $plen
+}
+
+all_zeros_mdb_ipv6()
+{
+ local ns1=ns1_v6
+ local ns2=ns2_v6
+ local vtep1_ip=2001:db8:1000::1
+ local vtep2_ip=2001:db8:2000::1
+ local vtep3_ip=2001:db8:3000::1
+ local vtep4_ip=2001:db8:4000::1
+ local plen=128
+
+ echo
+ echo "Data path: All-zeros MDB entry - IPv6 underlay"
+ echo "----------------------------------------------"
+
+ all_zeros_mdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $vtep3_ip \
+ $vtep4_ip $plen
+}
+
+mdb_fdb_common()
+{
+ local ns1=$1; shift
+ local ns2=$1; shift
+ local vtep1_ip=$1; shift
+ local vtep2_ip=$1; shift
+ local plen=$1; shift
+ local proto=$1; shift
+ local grp=$1; shift
+ local src=$1; shift
+ local mz=$1; shift
+
+ # Install an MDB entry and an FDB entry and make sure that the FDB
+ # entry only forwards traffic that was not forwarded by the MDB.
+
+ # Associate the MDB entry with one VTEP and the FDB entry with another
+ # VTEP.
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 fdb add 00:00:00:00:00:00 dev vx0 self static dst $vtep2_ip src_vni 10010"
+
+ # Add filters to match on decapsulated traffic in the second namespace.
+ run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact"
+ run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto $proto flower ip_proto udp dst_port 54321 enc_dst_ip $vtep1_ip action pass"
+ run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 102 proto $proto flower ip_proto udp dst_port 54321 enc_dst_ip $vtep2_ip action pass"
+
+ # Configure the VTEP addresses in the second namespace to enable
+ # decapsulation.
+ run_cmd "ip -n $ns2 address replace $vtep1_ip/$plen dev lo"
+ run_cmd "ip -n $ns2 address replace $vtep2_ip/$plen dev lo"
+
+ # Send IP multicast traffic and make sure it is forwarded by the MDB
+ # and only arrives to the first VTEP.
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "IP multicast - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 0
+ log_test $? 0 "IP multicast - second VTEP"
+
+ # Send broadcast traffic and make sure it is forwarded by the FDB and
+ # only arrives to the second VTEP.
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b bcast -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Broadcast - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+ log_test $? 0 "Broadcast - second VTEP"
+
+ # Remove the MDB entry and make sure that IP multicast is now forwarded
+ # by the FDB to the second VTEP.
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10010"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "IP multicast after removal - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 2
+ log_test $? 0 "IP multicast after removal - second VTEP"
+}
+
+mdb_fdb_ipv4_ipv4()
+{
+ local ns1=ns1_v4
+ local ns2=ns2_v4
+ local vtep1_ip=198.51.100.100
+ local vtep2_ip=198.51.100.200
+ local plen=32
+ local proto="ipv4"
+ local grp=239.1.1.1
+ local src=192.0.2.129
+
+ echo
+ echo "Data path: MDB with FDB - IPv4 overlay / IPv4 underlay"
+ echo "------------------------------------------------------"
+
+ mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \
+ "mausezahn"
+}
+
+mdb_fdb_ipv6_ipv4()
+{
+ local ns1=ns1_v4
+ local ns2=ns2_v4
+ local vtep1_ip=198.51.100.100
+ local vtep2_ip=198.51.100.200
+ local plen=32
+ local proto="ipv6"
+ local grp=ff0e::1
+ local src=2001:db8:100::1
+
+ echo
+ echo "Data path: MDB with FDB - IPv6 overlay / IPv4 underlay"
+ echo "------------------------------------------------------"
+
+ mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \
+ "mausezahn -6"
+}
+
+mdb_fdb_ipv4_ipv6()
+{
+ local ns1=ns1_v6
+ local ns2=ns2_v6
+ local vtep1_ip=2001:db8:1000::1
+ local vtep2_ip=2001:db8:2000::1
+ local plen=128
+ local proto="ipv4"
+ local grp=239.1.1.1
+ local src=192.0.2.129
+
+ echo
+ echo "Data path: MDB with FDB - IPv4 overlay / IPv6 underlay"
+ echo "------------------------------------------------------"
+
+ mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \
+ "mausezahn"
+}
+
+mdb_fdb_ipv6_ipv6()
+{
+ local ns1=ns1_v6
+ local ns2=ns2_v6
+ local vtep1_ip=2001:db8:1000::1
+ local vtep2_ip=2001:db8:2000::1
+ local plen=128
+ local proto="ipv6"
+ local grp=ff0e::1
+ local src=2001:db8:100::1
+
+ echo
+ echo "Data path: MDB with FDB - IPv6 overlay / IPv6 underlay"
+ echo "------------------------------------------------------"
+
+ mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \
+ "mausezahn -6"
+}
+
+mdb_grp1_loop()
+{
+ local ns1=$1; shift
+ local vtep1_ip=$1; shift
+ local grp1=$1; shift
+
+ while true; do
+ bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp1 dst $vtep1_ip src_vni 10010
+ bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp1 permanent dst $vtep1_ip src_vni 10010
+ done >/dev/null 2>&1
+}
+
+mdb_grp2_loop()
+{
+ local ns1=$1; shift
+ local vtep1_ip=$1; shift
+ local vtep2_ip=$1; shift
+ local grp2=$1; shift
+
+ while true; do
+ bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp2 dst $vtep1_ip src_vni 10010
+ bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp2 permanent dst $vtep1_ip src_vni 10010
+ bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp2 permanent dst $vtep2_ip src_vni 10010
+ done >/dev/null 2>&1
+}
+
+mdb_torture_common()
+{
+ local ns1=$1; shift
+ local vtep1_ip=$1; shift
+ local vtep2_ip=$1; shift
+ local grp1=$1; shift
+ local grp2=$1; shift
+ local src=$1; shift
+ local mz=$1; shift
+ local pid1
+ local pid2
+ local pid3
+ local pid4
+
+ # Continuously send two streams that are forwarded by two different MDB
+ # entries. The first entry will be added and deleted in a loop. This
+ # allows us to test that the data path does not use freed MDB entry
+ # memory. The second entry will have two remotes, one that is added and
+ # deleted in a loop and another that is replaced in a loop. This allows
+ # us to test that the data path does not use freed remote entry memory.
+ # The test is considered successful if nothing crashed.
+
+ # Create the MDB entries that will be continuously deleted / replaced.
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp1 permanent dst $vtep1_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp2 permanent dst $vtep1_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp2 permanent dst $vtep2_ip src_vni 10010"
+
+ mdb_grp1_loop $ns1 $vtep1_ip $grp1 &
+ pid1=$!
+ mdb_grp2_loop $ns1 $vtep1_ip $vtep2_ip $grp2 &
+ pid2=$!
+ ip netns exec $ns1 $mz br0.10 -A $src -B $grp1 -t udp sp=12345,dp=54321 -p 100 -c 0 -q &
+ pid3=$!
+ ip netns exec $ns1 $mz br0.10 -A $src -B $grp2 -t udp sp=12345,dp=54321 -p 100 -c 0 -q &
+ pid4=$!
+
+ sleep 30
+ kill -9 $pid1 $pid2 $pid3 $pid4
+ wait $pid1 $pid2 $pid3 $pid4 2>/dev/null
+
+ log_test 0 0 "Torture test"
+}
+
+mdb_torture_ipv4_ipv4()
+{
+ local ns1=ns1_v4
+ local vtep1_ip=198.51.100.100
+ local vtep2_ip=198.51.100.200
+ local grp1=239.1.1.1
+ local grp2=239.2.2.2
+ local src=192.0.2.129
+
+ echo
+ echo "Data path: MDB torture test - IPv4 overlay / IPv4 underlay"
+ echo "----------------------------------------------------------"
+
+ mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \
+ "mausezahn"
+}
+
+mdb_torture_ipv6_ipv4()
+{
+ local ns1=ns1_v4
+ local vtep1_ip=198.51.100.100
+ local vtep2_ip=198.51.100.200
+ local grp1=ff0e::1
+ local grp2=ff0e::2
+ local src=2001:db8:100::1
+
+ echo
+ echo "Data path: MDB torture test - IPv6 overlay / IPv4 underlay"
+ echo "----------------------------------------------------------"
+
+ mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \
+ "mausezahn -6"
+}
+
+mdb_torture_ipv4_ipv6()
+{
+ local ns1=ns1_v6
+ local vtep1_ip=2001:db8:1000::1
+ local vtep2_ip=2001:db8:2000::1
+ local grp1=239.1.1.1
+ local grp2=239.2.2.2
+ local src=192.0.2.129
+
+ echo
+ echo "Data path: MDB torture test - IPv4 overlay / IPv6 underlay"
+ echo "----------------------------------------------------------"
+
+ mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \
+ "mausezahn"
+}
+
+mdb_torture_ipv6_ipv6()
+{
+ local ns1=ns1_v6
+ local vtep1_ip=2001:db8:1000::1
+ local vtep2_ip=2001:db8:2000::1
+ local grp1=ff0e::1
+ local grp2=ff0e::2
+ local src=2001:db8:100::1
+
+ echo
+ echo "Data path: MDB torture test - IPv6 overlay / IPv6 underlay"
+ echo "----------------------------------------------------------"
+
+ mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \
+ "mausezahn -6"
+}
+
+################################################################################
+# Usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -t <test> Test(s) to run (default: all)
+ (options: $TESTS)
+ -c Control path tests only
+ -d Data path tests only
+ -p Pause on fail
+ -P Pause after each test before cleanup
+ -v Verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
+# Main
+
+trap cleanup EXIT
+
+while getopts ":t:cdpPvh" opt; do
+ case $opt in
+ t) TESTS=$OPTARG;;
+ c) TESTS=${CONTROL_PATH_TESTS};;
+ d) TESTS=${DATA_PATH_TESTS};;
+ p) PAUSE_ON_FAIL=yes;;
+ P) PAUSE=yes;;
+ v) VERBOSE=$(($VERBOSE + 1));;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+# Make sure we don't pause twice.
+[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit $ksft_skip;
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v bridge)" ]; then
+ echo "SKIP: Could not run test without bridge tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v mausezahn)" ]; then
+ echo "SKIP: Could not run test without mausezahn tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v jq)" ]; then
+ echo "SKIP: Could not run test without jq tool"
+ exit $ksft_skip
+fi
+
+bridge mdb help 2>&1 | grep -q "src_vni"
+if [ $? -ne 0 ]; then
+ echo "SKIP: iproute2 bridge too old, missing VXLAN MDB support"
+ exit $ksft_skip
+fi
+
+# Start clean.
+cleanup
+
+for t in $TESTS
+do
+ setup; $t; cleanup;
+done
+
+if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/actions.json b/tools/testing/selftests/tc-testing/tc-tests/infra/actions.json
new file mode 100644
index 000000000000..16f3a83605e4
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/infra/actions.json
@@ -0,0 +1,416 @@
+[
+ {
+ "id": "abdc",
+ "name": "Reference pedit action object in filter",
+ "category": [
+ "infra",
+ "pedit"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC action add action pedit munge offset 0 u8 clear index 1"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action pedit index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress",
+ "$IP link del dev $DUMMY type dummy",
+ "$TC actions flush action pedit"
+ ]
+ },
+ {
+ "id": "7a70",
+ "name": "Reference mpls action object in filter",
+ "category": [
+ "infra",
+ "mpls"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC action add action mpls pop protocol ipv4 index 1"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action mpls index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress",
+ "$IP link del dev $DUMMY type dummy",
+ "$TC actions flush action mpls"
+ ]
+ },
+ {
+ "id": "d241",
+ "name": "Reference bpf action object in filter",
+ "category": [
+ "infra",
+ "bpf"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' index 1"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action bpf index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress",
+ "$IP link del dev $DUMMY type dummy",
+ "$TC actions flush action bpf"
+ ]
+ },
+ {
+ "id": "383a",
+ "name": "Reference connmark action object in filter",
+ "category": [
+ "infra",
+ "connmark"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC actions add action connmark"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action connmark index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress",
+ "$IP link del dev $DUMMY type dummy",
+ "$TC actions flush action connmark"
+ ]
+ },
+ {
+ "id": "c619",
+ "name": "Reference csum action object in filter",
+ "category": [
+ "infra",
+ "csum"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC actions add action csum ip4h index 1"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action csum index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress",
+ "$IP link del dev $DUMMY type dummy",
+ "$TC actions flush action csum"
+ ]
+ },
+ {
+ "id": "a93d",
+ "name": "Reference ct action object in filter",
+ "category": [
+ "infra",
+ "ct"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC actions add action ct index 1"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action ct index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress",
+ "$IP link del dev $DUMMY type dummy",
+ "$TC actions flush action ct"
+ ]
+ },
+ {
+ "id": "8bb5",
+ "name": "Reference ctinfo action object in filter",
+ "category": [
+ "infra",
+ "ctinfo"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC action add action ctinfo index 1"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action ctinfo index 10",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress",
+ "$IP link del dev $DUMMY type dummy",
+ "$TC actions flush action ctinfo"
+ ]
+ },
+ {
+ "id": "2241",
+ "name": "Reference gact action object in filter",
+ "category": [
+ "infra",
+ "gact"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC actions add action pass index 1"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action gact index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress",
+ "$IP link del dev $DUMMY type dummy",
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "35e9",
+ "name": "Reference gate action object in filter",
+ "category": [
+ "infra",
+ "gate"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC action add action gate priority 1 sched-entry close 100000000ns index 1"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action gate index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress",
+ "$IP link del dev $DUMMY type dummy",
+ "$TC actions flush action gate"
+ ]
+ },
+ {
+ "id": "b22e",
+ "name": "Reference ife action object in filter",
+ "category": [
+ "infra",
+ "ife"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC actions add action ife encode allow mark pass index 1"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action ife index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress",
+ "$IP link del dev $DUMMY type dummy",
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "ef74",
+ "name": "Reference mirred action object in filter",
+ "category": [
+ "infra",
+ "mirred"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC actions add action mirred egress mirror index 1 dev lo"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action mirred index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress",
+ "$IP link del dev $DUMMY type dummy",
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "2c81",
+ "name": "Reference nat action object in filter",
+ "category": [
+ "infra",
+ "nat"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC actions add action nat ingress 192.168.1.1 200.200.200.1"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action nat index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress",
+ "$IP link del dev $DUMMY type dummy",
+ "$TC actions flush action nat"
+ ]
+ },
+ {
+ "id": "ac9d",
+ "name": "Reference police action object in filter",
+ "category": [
+ "infra",
+ "police"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC actions add action police rate 1kbit burst 10k index 1"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action police index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress",
+ "$IP link del dev $DUMMY type dummy",
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "68be",
+ "name": "Reference sample action object in filter",
+ "category": [
+ "infra",
+ "sample"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC actions add action sample rate 10 group 1 index 1"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action sample index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress",
+ "$IP link del dev $DUMMY type dummy",
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "cf01",
+ "name": "Reference skbedit action object in filter",
+ "category": [
+ "infra",
+ "skbedit"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC actions add action skbedit mark 1"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action skbedit index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress",
+ "$IP link del dev $DUMMY type dummy",
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "c109",
+ "name": "Reference skbmod action object in filter",
+ "category": [
+ "infra",
+ "skbmod"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC actions add action skbmod set dmac 11:22:33:44:55:66 index 1"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action skbmod index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress",
+ "$IP link del dev $DUMMY type dummy",
+ "$TC actions flush action skbmod"
+ ]
+ },
+ {
+ "id": "4abc",
+ "name": "Reference tunnel_key action object in filter",
+ "category": [
+ "infra",
+ "tunnel_key"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 id 1 index 1"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action tunnel_key index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress",
+ "$IP link del dev $DUMMY type dummy",
+ "$TC actions flush action tunnel_key"
+ ]
+ },
+ {
+ "id": "dadd",
+ "name": "Reference vlan action object in filter",
+ "category": [
+ "infra",
+ "tunnel_key"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC actions add action vlan pop pipe index 1"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action vlan index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress",
+ "$IP link del dev $DUMMY type dummy",
+ "$TC actions flush action vlan"
+ ]
+ }
+]