summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-05-25 12:22:58 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-05-25 12:22:58 -0700
commit7e062cda7d90543ac8c7700fc7c5527d0c0f22ad (patch)
tree2f1602595d9416be41cc2e88a659ba4c145734b9 /net
parent5d1772b1739b085721431eef0c0400f3aff01abf (diff)
parent57d7becda9c9e612e6b00676f2eecfac3e719e88 (diff)
Merge tag 'net-next-5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Jakub Kicinski: "Core ---- - Support TCPv6 segmentation offload with super-segments larger than 64k bytes using the IPv6 Jumbogram extension header (AKA BIG TCP). - Generalize skb freeing deferral to per-cpu lists, instead of per-socket lists. - Add a netdev statistic for packets dropped due to L2 address mismatch (rx_otherhost_dropped). - Continue work annotating skb drop reasons. - Accept alternative netdev names (ALT_IFNAME) in more netlink requests. - Add VLAN support for AF_PACKET SOCK_RAW GSO. - Allow receiving skb mark from the socket as a cmsg. - Enable memcg accounting for veth queues, sysctl tables and IPv6. BPF --- - Add libbpf support for User Statically-Defined Tracing (USDTs). - Speed up symbol resolution for kprobes multi-link attachments. - Support storing typed pointers to referenced and unreferenced objects in BPF maps. - Add support for BPF link iterator. - Introduce access to remote CPU map elements in BPF per-cpu map. - Allow middle-of-the-road settings for the kernel.unprivileged_bpf_disabled sysctl. - Implement basic types of dynamic pointers e.g. to allow for dynamically sized ringbuf reservations without extra memory copies. Protocols --------- - Retire port only listening_hash table, add a second bind table hashed by port and address. Avoid linear list walk when binding to very popular ports (e.g. 443). - Add bridge FDB bulk flush filtering support allowing user space to remove all FDB entries matching a condition. - Introduce accept_unsolicited_na sysctl for IPv6 to implement router-side changes for RFC9131. - Support for MPTCP path manager in user space. - Add MPTCP support for fallback to regular TCP for connections that have never connected additional subflows or transmitted out-of-sequence data (partial support for RFC8684 fallback). - Avoid races in MPTCP-level window tracking, stabilize and improve throughput. - Support lockless operation of GRE tunnels with seq numbers enabled. - WiFi support for host based BSS color collision detection. - Add support for SO_TXTIME/SCM_TXTIME on CAN sockets. - Support transmission w/o flow control in CAN ISOTP (ISO 15765-2). - Support zero-copy Tx with TLS 1.2 crypto offload (sendfile). - Allow matching on the number of VLAN tags via tc-flower. - Add tracepoint for tcp_set_ca_state(). Driver API ---------- - Improve error reporting from classifier and action offload. - Add support for listing line cards in switches (devlink). - Add helpers for reporting page pool statistics with ethtool -S. - Add support for reading clock cycles when using PTP virtual clocks, instead of having the driver convert to time before reporting. This makes it possible to report time from different vclocks. - Support configuring low-latency Tx descriptor push via ethtool. - Separate Clause 22 and Clause 45 MDIO accesses more explicitly. New hardware / drivers ---------------------- - Ethernet: - Marvell's Octeon NIC PCI Endpoint support (octeon_ep) - Sunplus SP7021 SoC (sp7021_emac) - Add support for Renesas RZ/V2M (in ravb) - Add support for MediaTek mt7986 switches (in mtk_eth_soc) - Ethernet PHYs: - ADIN1100 industrial PHYs (w/ 10BASE-T1L and SQI reporting) - TI DP83TD510 PHY - Microchip LAN8742/LAN88xx PHYs - WiFi: - Driver for pureLiFi X, XL, XC devices (plfxlc) - Driver for Silicon Labs devices (wfx) - Support for WCN6750 (in ath11k) - Support Realtek 8852ce devices (in rtw89) - Mobile: - MediaTek T700 modems (Intel 5G 5000 M.2 cards) - CAN: - ctucanfd: add support for CTU CAN FD open-source IP core from Czech Technical University in Prague Drivers ------- - Delete a number of old drivers still using virt_to_bus(). - Ethernet NICs: - intel: support TSO on tunnels MPLS - broadcom: support multi-buffer XDP - nfp: support VF rate limiting - sfc: use hardware tx timestamps for more than PTP - mlx5: multi-port eswitch support - hyper-v: add support for XDP_REDIRECT - atlantic: XDP support (including multi-buffer) - macb: improve real-time perf by deferring Tx processing to NAPI - High-speed Ethernet switches: - mlxsw: implement basic line card information querying - prestera: add support for traffic policing on ingress and egress - Embedded Ethernet switches: - lan966x: add support for packet DMA (FDMA) - lan966x: add support for PTP programmable pins - ti: cpsw_new: enable bc/mc storm prevention - Qualcomm 802.11ax WiFi (ath11k): - Wake-on-WLAN support for QCA6390 and WCN6855 - device recovery (firmware restart) support - support setting Specific Absorption Rate (SAR) for WCN6855 - read country code from SMBIOS for WCN6855/QCA6390 - enable keep-alive during WoWLAN suspend - implement remain-on-channel support - MediaTek WiFi (mt76): - support Wireless Ethernet Dispatch offloading packet movement between the Ethernet switch and WiFi interfaces - non-standard VHT MCS10-11 support - mt7921 AP mode support - mt7921 IPv6 NS offload support - Ethernet PHYs: - micrel: ksz9031/ksz9131: cabletest support - lan87xx: SQI support for T1 PHYs - lan937x: add interrupt support for link detection" * tag 'net-next-5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1809 commits) ptp: ocp: Add firmware header checks ptp: ocp: fix PPS source selector debugfs reporting ptp: ocp: add .init function for sma_op vector ptp: ocp: vectorize the sma accessor functions ptp: ocp: constify selectors ptp: ocp: parameterize input/output sma selectors ptp: ocp: revise firmware display ptp: ocp: add Celestica timecard PCI ids ptp: ocp: Remove #ifdefs around PCI IDs ptp: ocp: 32-bit fixups for pci start address Revert "net/smc: fix listen processing for SMC-Rv2" ath6kl: Use cc-disable-warning to disable -Wdangling-pointer selftests/bpf: Dynptr tests bpf: Add dynptr data slices bpf: Add bpf_dynptr_read and bpf_dynptr_write bpf: Dynptr support for ring buffers bpf: Add bpf_dynptr_from_mem for local dynptrs bpf: Add verifier support for dynptrs bpf: Suppress 'passing zero to PTR_ERR' warning bpf: Introduce bpf_arch_text_invalidate for bpf_prog_pack ...
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c3
-rw-r--r--net/8021q/vlan_dev.c3
-rw-r--r--net/Kconfig.debug7
-rw-r--r--net/appletalk/ddp.c3
-rw-r--r--net/atm/common.c4
-rw-r--r--net/ax25/af_ax25.c3
-rw-r--r--net/ax25/ax25_dev.c22
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c4
-rw-r--r--net/batman-adv/hard-interface.c2
-rw-r--r--net/batman-adv/main.h2
-rw-r--r--net/batman-adv/translation-table.c12
-rw-r--r--net/bluetooth/af_bluetooth.c7
-rw-r--r--net/bluetooth/eir.c31
-rw-r--r--net/bluetooth/eir.h4
-rw-r--r--net/bluetooth/hci_conn.c7
-rw-r--r--net/bluetooth/hci_core.c2
-rw-r--r--net/bluetooth/hci_event.c35
-rw-r--r--net/bluetooth/hci_request.c4
-rw-r--r--net/bluetooth/hci_sock.c3
-rw-r--r--net/bluetooth/hci_sync.c90
-rw-r--r--net/bluetooth/mgmt.c18
-rw-r--r--net/bluetooth/mgmt_util.c2
-rw-r--r--net/bluetooth/sco.c23
-rw-r--r--net/bpf/bpf_dummy_struct_ops.c24
-rw-r--r--net/bpf/test_run.c86
-rw-r--r--net/bridge/br_device.c1
-rw-r--r--net/bridge/br_fdb.c160
-rw-r--r--net/bridge/br_if.c12
-rw-r--r--net/bridge/br_mdb.c12
-rw-r--r--net/bridge/br_netlink.c9
-rw-r--r--net/bridge/br_private.h21
-rw-r--r--net/bridge/br_switchdev.c3
-rw-r--r--net/bridge/br_sysfs_br.c6
-rw-r--r--net/caif/caif_socket.c2
-rw-r--r--net/can/bcm.c7
-rw-r--r--net/can/isotp.c130
-rw-r--r--net/can/j1939/socket.c4
-rw-r--r--net/can/raw.c20
-rw-r--r--net/core/bpf_sk_storage.c11
-rw-r--r--net/core/datagram.c7
-rw-r--r--net/core/datagram.h15
-rw-r--r--net/core/dev.c232
-rw-r--r--net/core/dev.h112
-rw-r--r--net/core/dev_addr_lists.c2
-rw-r--r--net/core/dev_ioctl.c2
-rw-r--r--net/core/devlink.c653
-rw-r--r--net/core/drop_monitor.c2
-rw-r--r--net/core/filter.c37
-rw-r--r--net/core/flow_dissector.c20
-rw-r--r--net/core/gro.c8
-rw-r--r--net/core/link_watch.c1
-rw-r--r--net/core/neighbour.c2
-rw-r--r--net/core/net-procfs.c2
-rw-r--r--net/core/net-sysfs.c22
-rw-r--r--net/core/page_pool.c83
-rw-r--r--net/core/rtnetlink.c449
-rw-r--r--net/core/skbuff.c67
-rw-r--r--net/core/skmsg.c22
-rw-r--r--net/core/sock.c126
-rw-r--r--net/core/sock_map.c10
-rw-r--r--net/core/sysctl_net_core.c29
-rw-r--r--net/dccp/dccp.h4
-rw-r--r--net/dccp/ipv4.c7
-rw-r--r--net/dccp/ipv6.c6
-rw-r--r--net/dccp/proto.c40
-rw-r--r--net/decnet/dn_route.c2
-rw-r--r--net/dsa/dsa.c49
-rw-r--r--net/dsa/dsa2.c25
-rw-r--r--net/dsa/dsa_priv.h29
-rw-r--r--net/dsa/port.c136
-rw-r--r--net/dsa/slave.c67
-rw-r--r--net/dsa/switch.c198
-rw-r--r--net/dsa/tag_8021q.c10
-rw-r--r--net/ethernet/eth.c2
-rw-r--r--net/ethtool/common.c3
-rw-r--r--net/ethtool/netlink.h2
-rw-r--r--net/ethtool/rings.c54
-rw-r--r--net/ieee802154/socket.c12
-rw-r--r--net/ipv4/Kconfig1
-rw-r--r--net/ipv4/af_inet.c11
-rw-r--r--net/ipv4/arp.c7
-rw-r--r--net/ipv4/datagram.c7
-rw-r--r--net/ipv4/devinet.c2
-rw-r--r--net/ipv4/esp4.c6
-rw-r--r--net/ipv4/fib_frontend.c4
-rw-r--r--net/ipv4/fib_rules.c2
-rw-r--r--net/ipv4/fib_semantics.c4
-rw-r--r--net/ipv4/fib_trie.c12
-rw-r--r--net/ipv4/fou.c1
-rw-r--r--net/ipv4/icmp.c77
-rw-r--r--net/ipv4/igmp.c4
-rw-r--r--net/ipv4/inet_connection_sock.c245
-rw-r--r--net/ipv4/inet_diag.c5
-rw-r--r--net/ipv4/inet_fragment.c2
-rw-r--r--net/ipv4/inet_hashtables.c329
-rw-r--r--net/ipv4/ip_forward.c13
-rw-r--r--net/ipv4/ip_gre.c50
-rw-r--r--net/ipv4/ip_input.c1
-rw-r--r--net/ipv4/ipmr.c2
-rw-r--r--net/ipv4/netfilter.c3
-rw-r--r--net/ipv4/netfilter/nf_reject_ipv4.c10
-rw-r--r--net/ipv4/netfilter/nft_fib_ipv4.c4
-rw-r--r--net/ipv4/ping.c40
-rw-r--r--net/ipv4/raw.c6
-rw-r--r--net/ipv4/route.c51
-rw-r--r--net/ipv4/sysctl_net_ipv4.c16
-rw-r--r--net/ipv4/tcp.c67
-rw-r--r--net/ipv4/tcp_bbr.c22
-rw-r--r--net/ipv4/tcp_bic.c14
-rw-r--r--net/ipv4/tcp_bpf.c15
-rw-r--r--net/ipv4/tcp_cdg.c30
-rw-r--r--net/ipv4/tcp_cong.c30
-rw-r--r--net/ipv4/tcp_cubic.c26
-rw-r--r--net/ipv4/tcp_dctcp.c11
-rw-r--r--net/ipv4/tcp_highspeed.c18
-rw-r--r--net/ipv4/tcp_htcp.c10
-rw-r--r--net/ipv4/tcp_hybla.c18
-rw-r--r--net/ipv4/tcp_illinois.c12
-rw-r--r--net/ipv4/tcp_input.c177
-rw-r--r--net/ipv4/tcp_ipv4.c30
-rw-r--r--net/ipv4/tcp_lp.c6
-rw-r--r--net/ipv4/tcp_metrics.c12
-rw-r--r--net/ipv4/tcp_nv.c24
-rw-r--r--net/ipv4/tcp_output.c46
-rw-r--r--net/ipv4/tcp_rate.c2
-rw-r--r--net/ipv4/tcp_recovery.c15
-rw-r--r--net/ipv4/tcp_scalable.c4
-rw-r--r--net/ipv4/tcp_vegas.c21
-rw-r--r--net/ipv4/tcp_veno.c24
-rw-r--r--net/ipv4/tcp_westwood.c3
-rw-r--r--net/ipv4/tcp_yeah.c30
-rw-r--r--net/ipv4/udp.c16
-rw-r--r--net/ipv4/udp_bpf.c17
-rw-r--r--net/ipv4/udp_impl.h4
-rw-r--r--net/ipv6/addrconf.c51
-rw-r--r--net/ipv6/af_inet6.c7
-rw-r--r--net/ipv6/datagram.c10
-rw-r--r--net/ipv6/esp6.c6
-rw-r--r--net/ipv6/exthdrs.c44
-rw-r--r--net/ipv6/icmp.c31
-rw-r--r--net/ipv6/inet6_hashtables.c11
-rw-r--r--net/ipv6/ip6_gre.c34
-rw-r--r--net/ipv6/ip6_input.c41
-rw-r--r--net/ipv6/ip6_offload.c56
-rw-r--r--net/ipv6/ip6_output.c56
-rw-r--r--net/ipv6/ip6_tunnel.c2
-rw-r--r--net/ipv6/ndisc.c20
-rw-r--r--net/ipv6/netfilter.c3
-rw-r--r--net/ipv6/netfilter/nf_reject_ipv6.c4
-rw-r--r--net/ipv6/netfilter/nft_fib_ipv6.c4
-rw-r--r--net/ipv6/raw.c6
-rw-r--r--net/ipv6/route.c6
-rw-r--r--net/ipv6/sysctl_net_ipv6.c6
-rw-r--r--net/ipv6/tcp_ipv6.c6
-rw-r--r--net/ipv6/udp.c23
-rw-r--r--net/ipv6/udp_impl.h4
-rw-r--r--net/iucv/af_iucv.c3
-rw-r--r--net/key/af_key.c4
-rw-r--r--net/l2tp/l2tp_ip.c8
-rw-r--r--net/l2tp/l2tp_ip6.c12
-rw-r--r--net/l2tp/l2tp_ppp.c3
-rw-r--r--net/mac80211/agg-rx.c12
-rw-r--r--net/mac80211/agg-tx.c6
-rw-r--r--net/mac80211/airtime.c4
-rw-r--r--net/mac80211/cfg.c81
-rw-r--r--net/mac80211/chan.c8
-rw-r--r--net/mac80211/debugfs.c1
-rw-r--r--net/mac80211/debugfs_netdev.c2
-rw-r--r--net/mac80211/debugfs_sta.c12
-rw-r--r--net/mac80211/eht.c6
-rw-r--r--net/mac80211/ethtool.c4
-rw-r--r--net/mac80211/he.c8
-rw-r--r--net/mac80211/ht.c8
-rw-r--r--net/mac80211/ibss.c26
-rw-r--r--net/mac80211/ieee80211_i.h12
-rw-r--r--net/mac80211/key.c9
-rw-r--r--net/mac80211/main.c4
-rw-r--r--net/mac80211/mesh_hwmp.c2
-rw-r--r--net/mac80211/mesh_plink.c24
-rw-r--r--net/mac80211/mlme.c135
-rw-r--r--net/mac80211/ocb.c2
-rw-r--r--net/mac80211/offchannel.c2
-rw-r--r--net/mac80211/rate.c8
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c177
-rw-r--r--net/mac80211/rc80211_minstrel_ht.h2
-rw-r--r--net/mac80211/rx.c131
-rw-r--r--net/mac80211/s1g.c4
-rw-r--r--net/mac80211/scan.c20
-rw-r--r--net/mac80211/sta_info.c110
-rw-r--r--net/mac80211/sta_info.h155
-rw-r--r--net/mac80211/status.c130
-rw-r--r--net/mac80211/tdls.c26
-rw-r--r--net/mac80211/trace.h4
-rw-r--r--net/mac80211/tx.c28
-rw-r--r--net/mac80211/util.c40
-rw-r--r--net/mac80211/vht.c78
-rw-r--r--net/mac80211/wpa.c103
-rw-r--r--net/mac802154/cfg.c1
-rw-r--r--net/mac802154/ieee802154_i.h2
-rw-r--r--net/mac802154/main.c54
-rw-r--r--net/mac802154/util.c22
-rw-r--r--net/mctp/af_mctp.c4
-rw-r--r--net/mctp/test/route-test.c8
-rw-r--r--net/mpls/af_mpls.c3
-rw-r--r--net/mptcp/Makefile4
-rw-r--r--net/mptcp/bpf.c21
-rw-r--r--net/mptcp/ctrl.c21
-rw-r--r--net/mptcp/mib.c5
-rw-r--r--net/mptcp/mib.h7
-rw-r--r--net/mptcp/mptcp_diag.c105
-rw-r--r--net/mptcp/options.c69
-rw-r--r--net/mptcp/pm.c108
-rw-r--r--net/mptcp/pm_netlink.c266
-rw-r--r--net/mptcp/pm_userspace.c429
-rw-r--r--net/mptcp/protocol.c123
-rw-r--r--net/mptcp/protocol.h101
-rw-r--r--net/mptcp/sockopt.c21
-rw-r--r--net/mptcp/subflow.c72
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c4
-rw-r--r--net/netfilter/nf_conncount.c11
-rw-r--r--net/netfilter/nf_conntrack_bpf.c22
-rw-r--r--net/netfilter/nf_conntrack_core.c304
-rw-r--r--net/netfilter/nf_conntrack_ecache.c178
-rw-r--r--net/netfilter/nf_conntrack_extend.c32
-rw-r--r--net/netfilter/nf_conntrack_helper.c5
-rw-r--r--net/netfilter/nf_conntrack_netlink.c152
-rw-r--r--net/netfilter/nf_conntrack_proto.c10
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c52
-rw-r--r--net/netfilter/nf_conntrack_standalone.c2
-rw-r--r--net/netfilter/nf_conntrack_timeout.c7
-rw-r--r--net/netfilter/nf_log_syslog.c136
-rw-r--r--net/netfilter/nf_nat_masquerade.c5
-rw-r--r--net/netfilter/nf_tables_api.c6
-rw-r--r--net/netfilter/nfnetlink.c40
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c61
-rw-r--r--net/netfilter/nft_bitwise.c13
-rw-r--r--net/netfilter/nft_fib.c4
-rw-r--r--net/netfilter/nft_flow_offload.c8
-rw-r--r--net/netlink/af_netlink.c3
-rw-r--r--net/netrom/af_netrom.c3
-rw-r--r--net/nfc/core.c1
-rw-r--r--net/nfc/llcp_sock.c3
-rw-r--r--net/nfc/rawsock.c3
-rw-r--r--net/packet/af_packet.c22
-rw-r--r--net/phonet/datagram.c4
-rw-r--r--net/phonet/pep.c7
-rw-r--r--net/qrtr/af_qrtr.c3
-rw-r--r--net/rose/af_rose.c3
-rw-r--r--net/rose/rose_route.c25
-rw-r--r--net/rxrpc/af_rxrpc.c2
-rw-r--r--net/rxrpc/ar-internal.h38
-rw-r--r--net/rxrpc/call_accept.c10
-rw-r--r--net/rxrpc/call_event.c7
-rw-r--r--net/rxrpc/call_object.c62
-rw-r--r--net/rxrpc/conn_client.c30
-rw-r--r--net/rxrpc/conn_object.c51
-rw-r--r--net/rxrpc/conn_service.c8
-rw-r--r--net/rxrpc/input.c62
-rw-r--r--net/rxrpc/local_object.c68
-rw-r--r--net/rxrpc/net_ns.c7
-rw-r--r--net/rxrpc/output.c20
-rw-r--r--net/rxrpc/peer_object.c40
-rw-r--r--net/rxrpc/proc.c85
-rw-r--r--net/rxrpc/recvmsg.c8
-rw-r--r--net/rxrpc/sendmsg.c6
-rw-r--r--net/rxrpc/skbuff.c1
-rw-r--r--net/rxrpc/sysctl.c4
-rw-r--r--net/sched/act_api.c4
-rw-r--r--net/sched/act_csum.c3
-rw-r--r--net/sched/act_ct.c3
-rw-r--r--net/sched/act_gact.c13
-rw-r--r--net/sched/act_gate.c3
-rw-r--r--net/sched/act_mirred.c4
-rw-r--r--net/sched/act_mpls.c10
-rw-r--r--net/sched/act_pedit.c4
-rw-r--r--net/sched/act_police.c20
-rw-r--r--net/sched/act_sample.c3
-rw-r--r--net/sched/act_skbedit.c65
-rw-r--r--net/sched/act_tunnel_key.c4
-rw-r--r--net/sched/act_vlan.c4
-rw-r--r--net/sched/cls_api.c22
-rw-r--r--net/sched/cls_flower.c104
-rw-r--r--net/sched/cls_matchall.c19
-rw-r--r--net/sched/em_meta.c7
-rw-r--r--net/sched/sch_generic.c12
-rw-r--r--net/sctp/input.c4
-rw-r--r--net/sctp/ipv6.c4
-rw-r--r--net/sctp/output.c3
-rw-r--r--net/sctp/socket.c18
-rw-r--r--net/sctp/stream_sched.c9
-rw-r--r--net/sctp/ulpevent.c2
-rw-r--r--net/smc/af_smc.c52
-rw-r--r--net/smc/smc_ib.c1
-rw-r--r--net/smc/smc_tx.c17
-rw-r--r--net/smc/smc_wr.c5
-rw-r--r--net/socket.c75
-rw-r--r--net/sunrpc/svcsock.c2
-rw-r--r--net/sunrpc/xprtsock.c2
-rw-r--r--net/tls/tls_device.c59
-rw-r--r--net/tls/tls_main.c55
-rw-r--r--net/tls/tls_sw.c491
-rw-r--r--net/unix/af_unix.c11
-rw-r--r--net/unix/unix_bpf.c5
-rw-r--r--net/vmw_vsock/virtio_transport.c197
-rw-r--r--net/vmw_vsock/vmci_transport.c5
-rw-r--r--net/wireless/chan.c93
-rw-r--r--net/wireless/core.h14
-rw-r--r--net/wireless/ibss.c4
-rw-r--r--net/wireless/nl80211.c417
-rw-r--r--net/wireless/reg.c4
-rw-r--r--net/x25/af_x25.c3
-rw-r--r--net/x25/x25_proc.c3
-rw-r--r--net/xdp/xsk.c4
-rw-r--r--net/xdp/xsk_queue.h4
-rw-r--r--net/xdp/xskmap.c6
-rw-r--r--net/xfrm/espintcp.c4
-rw-r--r--net/xfrm/xfrm_device.c15
-rw-r--r--net/xfrm/xfrm_state.c4
-rw-r--r--net/xfrm/xfrm_user.c5
319 files changed, 7705 insertions, 4092 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 788076b002b3..e40aa3e3641c 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -319,8 +319,7 @@ static void vlan_transfer_features(struct net_device *dev,
{
struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev);
- netif_set_gso_max_size(vlandev, dev->gso_max_size);
- netif_set_gso_max_segs(vlandev, dev->gso_max_segs);
+ netif_inherit_tso_max(vlandev, dev);
if (vlan_hw_offload_capable(dev->features, vlan->vlan_proto))
vlandev->hard_header_len = dev->hard_header_len;
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index e5d23e75572a..839f2020b015 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -573,8 +573,7 @@ static int vlan_dev_init(struct net_device *dev)
NETIF_F_ALL_FCOE;
dev->features |= dev->hw_features | NETIF_F_LLTX;
- netif_set_gso_max_size(dev, real_dev->gso_max_size);
- netif_set_gso_max_segs(dev, real_dev->gso_max_segs);
+ netif_inherit_tso_max(dev, real_dev);
if (dev->features & NETIF_F_VLAN_FEATURES)
netdev_warn(real_dev, "VLAN features are set incorrectly. Q-in-Q configurations may not work correctly.\n");
diff --git a/net/Kconfig.debug b/net/Kconfig.debug
index 2f50611df858..a5781cf63b16 100644
--- a/net/Kconfig.debug
+++ b/net/Kconfig.debug
@@ -17,3 +17,10 @@ config NET_NS_REFCNT_TRACKER
help
Enable debugging feature to track netns references.
This adds memory and cpu costs.
+
+config DEBUG_NET
+ bool "Add generic networking debug"
+ depends on DEBUG_KERNEL
+ help
+ Enable extra sanity checks in networking.
+ This is mostly used by fuzzers, but is safe to select.
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index bf5736c1d458..a06f4d4a6f47 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1753,8 +1753,7 @@ static int atalk_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int err = 0;
struct sk_buff *skb;
- skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
- flags & MSG_DONTWAIT, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
lock_sock(sk);
if (!skb)
diff --git a/net/atm/common.c b/net/atm/common.c
index 1cfa9bf1d187..f7019df41c3e 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -540,7 +540,7 @@ int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
!test_bit(ATM_VF_READY, &vcc->flags))
return 0;
- skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &error);
+ skb = skb_recv_datagram(sk, flags, &error);
if (!skb)
return error;
@@ -553,7 +553,7 @@ int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
error = skb_copy_datagram_msg(skb, 0, msg, copied);
if (error)
return error;
- sock_recv_ts_and_drops(msg, sk, skb);
+ sock_recv_cmsgs(msg, sk, skb);
if (!(flags & MSG_PEEK)) {
pr_debug("%d -= %d\n", atomic_read(&sk->sk_rmem_alloc),
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 363d47f94532..116481e4da82 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1669,8 +1669,7 @@ static int ax25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
}
/* Now we can treat all alike */
- skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
- flags & MSG_DONTWAIT, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (skb == NULL)
goto out;
diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c
index d2a244e1c260..b80fccbac62a 100644
--- a/net/ax25/ax25_dev.c
+++ b/net/ax25/ax25_dev.c
@@ -115,23 +115,13 @@ void ax25_dev_device_down(struct net_device *dev)
if ((s = ax25_dev_list) == ax25_dev) {
ax25_dev_list = s->next;
- spin_unlock_bh(&ax25_dev_lock);
- ax25_dev_put(ax25_dev);
- dev->ax25_ptr = NULL;
- dev_put_track(dev, &ax25_dev->dev_tracker);
- ax25_dev_put(ax25_dev);
- return;
+ goto unlock_put;
}
while (s != NULL && s->next != NULL) {
if (s->next == ax25_dev) {
s->next = ax25_dev->next;
- spin_unlock_bh(&ax25_dev_lock);
- ax25_dev_put(ax25_dev);
- dev->ax25_ptr = NULL;
- dev_put_track(dev, &ax25_dev->dev_tracker);
- ax25_dev_put(ax25_dev);
- return;
+ goto unlock_put;
}
s = s->next;
@@ -139,6 +129,14 @@ void ax25_dev_device_down(struct net_device *dev)
spin_unlock_bh(&ax25_dev_lock);
dev->ax25_ptr = NULL;
ax25_dev_put(ax25_dev);
+ return;
+
+unlock_put:
+ spin_unlock_bh(&ax25_dev_lock);
+ ax25_dev_put(ax25_dev);
+ dev->ax25_ptr = NULL;
+ dev_put_track(dev, &ax25_dev->dev_tracker);
+ ax25_dev_put(ax25_dev);
}
int ax25_fwd_ioctl(unsigned int cmd, struct ax25_fwd_struct *fwd)
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 7f8a14d99cdb..37ce6cfb3520 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -65,7 +65,7 @@ batadv_bla_send_announce(struct batadv_priv *bat_priv,
*/
static inline u32 batadv_choose_claim(const void *data, u32 size)
{
- struct batadv_bla_claim *claim = (struct batadv_bla_claim *)data;
+ const struct batadv_bla_claim *claim = data;
u32 hash = 0;
hash = jhash(&claim->addr, sizeof(claim->addr), hash);
@@ -86,7 +86,7 @@ static inline u32 batadv_choose_backbone_gw(const void *data, u32 size)
const struct batadv_bla_backbone_gw *gw;
u32 hash = 0;
- gw = (struct batadv_bla_backbone_gw *)data;
+ gw = data;
hash = jhash(&gw->orig, sizeof(gw->orig), hash);
hash = jhash(&gw->vid, sizeof(gw->vid), hash);
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 83fb51b6e299..b8f8da7ee3de 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -307,9 +307,11 @@ static bool batadv_is_cfg80211_netdev(struct net_device *net_device)
if (!net_device)
return false;
+#if IS_ENABLED(CONFIG_CFG80211)
/* cfg80211 drivers have to set ieee80211_ptr */
if (net_device->ieee80211_ptr)
return true;
+#endif
return false;
}
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index f3be82999f1f..23f3d53f4b51 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -13,7 +13,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2022.1"
+#define BATADV_SOURCE_VERSION "2022.2"
#endif
/* B.A.T.M.A.N. parameters */
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 8478034d3abf..01d30c1e412c 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -103,10 +103,10 @@ static bool batadv_compare_tt(const struct hlist_node *node, const void *data2)
*/
static inline u32 batadv_choose_tt(const void *data, u32 size)
{
- struct batadv_tt_common_entry *tt;
+ const struct batadv_tt_common_entry *tt;
u32 hash = 0;
- tt = (struct batadv_tt_common_entry *)data;
+ tt = data;
hash = jhash(&tt->addr, ETH_ALEN, hash);
hash = jhash(&tt->vid, sizeof(tt->vid), hash);
@@ -2766,7 +2766,7 @@ static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv,
u32 i;
tt_tot = batadv_tt_entries(tt_len);
- tt_change = (struct batadv_tvlv_tt_change *)tvlv_buff;
+ tt_change = tvlv_buff;
if (!valid_cb)
return;
@@ -3994,7 +3994,7 @@ static void batadv_tt_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
if (tvlv_value_len < sizeof(*tt_data))
return;
- tt_data = (struct batadv_tvlv_tt_data *)tvlv_value;
+ tt_data = tvlv_value;
tvlv_value_len -= sizeof(*tt_data);
num_vlan = ntohs(tt_data->num_vlan);
@@ -4037,7 +4037,7 @@ static int batadv_tt_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv,
if (tvlv_value_len < sizeof(*tt_data))
return NET_RX_SUCCESS;
- tt_data = (struct batadv_tvlv_tt_data *)tvlv_value;
+ tt_data = tvlv_value;
tvlv_value_len -= sizeof(*tt_data);
tt_vlan_len = sizeof(struct batadv_tvlv_tt_vlan_data);
@@ -4129,7 +4129,7 @@ static int batadv_roam_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv,
goto out;
batadv_inc_counter(bat_priv, BATADV_CNT_TT_ROAM_ADV_RX);
- roaming_adv = (struct batadv_tvlv_roam_adv *)tvlv_value;
+ roaming_adv = tvlv_value;
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Received ROAMING_ADV from %pM (client %pM)\n",
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index a0cb2e3da8d4..b506409bb498 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -251,7 +251,6 @@ EXPORT_SYMBOL(bt_accept_dequeue);
int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
int flags)
{
- int noblock = flags & MSG_DONTWAIT;
struct sock *sk = sock->sk;
struct sk_buff *skb;
size_t copied;
@@ -263,7 +262,7 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
if (flags & MSG_OOB)
return -EOPNOTSUPP;
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (!skb) {
if (sk->sk_shutdown & RCV_SHUTDOWN)
return 0;
@@ -281,7 +280,7 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
skb_reset_transport_header(skb);
err = skb_copy_datagram_msg(skb, 0, msg, copied);
if (err == 0) {
- sock_recv_ts_and_drops(msg, sk, skb);
+ sock_recv_cmsgs(msg, sk, skb);
if (msg->msg_name && bt_sk(sk)->skb_msg_name)
bt_sk(sk)->skb_msg_name(skb, msg->msg_name,
@@ -385,7 +384,7 @@ int bt_sock_stream_recvmsg(struct socket *sock, struct msghdr *msg,
copied += chunk;
size -= chunk;
- sock_recv_ts_and_drops(msg, sk, skb);
+ sock_recv_cmsgs(msg, sk, skb);
if (!(flags & MSG_PEEK)) {
int skb_len = skb_headlen(skb);
diff --git a/net/bluetooth/eir.c b/net/bluetooth/eir.c
index 7e930f77ecab..7d77fb00c2bf 100644
--- a/net/bluetooth/eir.c
+++ b/net/bluetooth/eir.c
@@ -55,6 +55,19 @@ u8 eir_append_appearance(struct hci_dev *hdev, u8 *ptr, u8 ad_len)
return eir_append_le16(ptr, ad_len, EIR_APPEARANCE, hdev->appearance);
}
+u8 eir_append_service_data(u8 *eir, u16 eir_len, u16 uuid, u8 *data,
+ u8 data_len)
+{
+ eir[eir_len++] = sizeof(u8) + sizeof(uuid) + data_len;
+ eir[eir_len++] = EIR_SERVICE_DATA;
+ put_unaligned_le16(uuid, &eir[eir_len]);
+ eir_len += sizeof(uuid);
+ memcpy(&eir[eir_len], data, data_len);
+ eir_len += data_len;
+
+ return eir_len;
+}
+
static u8 *create_uuid16_list(struct hci_dev *hdev, u8 *data, ptrdiff_t len)
{
u8 *ptr = data, *uuids_start = NULL;
@@ -333,3 +346,21 @@ u8 eir_create_scan_rsp(struct hci_dev *hdev, u8 instance, u8 *ptr)
return scan_rsp_len;
}
+
+void *eir_get_service_data(u8 *eir, size_t eir_len, u16 uuid, size_t *len)
+{
+ while ((eir = eir_get_data(eir, eir_len, EIR_SERVICE_DATA, len))) {
+ u16 value = get_unaligned_le16(eir);
+
+ if (uuid == value) {
+ if (len)
+ *len -= 2;
+ return &eir[2];
+ }
+
+ eir += *len;
+ eir_len -= *len;
+ }
+
+ return NULL;
+}
diff --git a/net/bluetooth/eir.h b/net/bluetooth/eir.h
index 43f1945bffc5..62f2374078f2 100644
--- a/net/bluetooth/eir.h
+++ b/net/bluetooth/eir.h
@@ -14,6 +14,8 @@ u8 eir_create_scan_rsp(struct hci_dev *hdev, u8 instance, u8 *ptr);
u8 eir_append_local_name(struct hci_dev *hdev, u8 *eir, u8 ad_len);
u8 eir_append_appearance(struct hci_dev *hdev, u8 *ptr, u8 ad_len);
+u8 eir_append_service_data(u8 *eir, u16 eir_len, u16 uuid, u8 *data,
+ u8 data_len);
static inline u16 eir_precalc_len(u8 data_len)
{
@@ -92,3 +94,5 @@ static inline void *eir_get_data(u8 *eir, size_t eir_len, u8 type,
return NULL;
}
+
+void *eir_get_service_data(u8 *eir, size_t eir_len, u16 uuid, size_t *len);
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index fe803bee419a..ac06c9724c7f 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -481,7 +481,7 @@ static bool hci_setup_sync_conn(struct hci_conn *conn, __u16 handle)
bool hci_setup_sync(struct hci_conn *conn, __u16 handle)
{
- if (enhanced_sco_capable(conn->hdev))
+ if (enhanced_sync_conn_capable(conn->hdev))
return hci_enhanced_setup_sync_conn(conn, handle);
return hci_setup_sync_conn(conn, handle);
@@ -943,10 +943,11 @@ static void create_le_conn_complete(struct hci_dev *hdev, void *data, int err)
bt_dev_err(hdev, "request failed to create LE connection: err %d", err);
- if (!conn)
+ /* Check if connection is still pending */
+ if (conn != hci_lookup_le_connect(hdev))
goto done;
- hci_le_conn_failed(conn, err);
+ hci_conn_failed(conn, err);
done:
hci_dev_unlock(hdev);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 45c2dd2e1590..5abb2ca5b129 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -2675,8 +2675,6 @@ void hci_unregister_dev(struct hci_dev *hdev)
list_del(&hdev->list);
write_unlock(&hci_dev_list_lock);
- cancel_work_sync(&hdev->power_on);
-
hci_cmd_sync_clear(hdev);
if (!test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks))
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 66451661283c..af17dfb20e01 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1835,7 +1835,9 @@ static u8 hci_cc_le_clear_accept_list(struct hci_dev *hdev, void *data,
if (rp->status)
return rp->status;
+ hci_dev_lock(hdev);
hci_bdaddr_list_clear(&hdev->le_accept_list);
+ hci_dev_unlock(hdev);
return rp->status;
}
@@ -1855,8 +1857,10 @@ static u8 hci_cc_le_add_to_accept_list(struct hci_dev *hdev, void *data,
if (!sent)
return rp->status;
+ hci_dev_lock(hdev);
hci_bdaddr_list_add(&hdev->le_accept_list, &sent->bdaddr,
sent->bdaddr_type);
+ hci_dev_unlock(hdev);
return rp->status;
}
@@ -1876,8 +1880,10 @@ static u8 hci_cc_le_del_from_accept_list(struct hci_dev *hdev, void *data,
if (!sent)
return rp->status;
+ hci_dev_lock(hdev);
hci_bdaddr_list_del(&hdev->le_accept_list, &sent->bdaddr,
sent->bdaddr_type);
+ hci_dev_unlock(hdev);
return rp->status;
}
@@ -1949,9 +1955,11 @@ static u8 hci_cc_le_add_to_resolv_list(struct hci_dev *hdev, void *data,
if (!sent)
return rp->status;
+ hci_dev_lock(hdev);
hci_bdaddr_list_add_with_irk(&hdev->le_resolv_list, &sent->bdaddr,
sent->bdaddr_type, sent->peer_irk,
sent->local_irk);
+ hci_dev_unlock(hdev);
return rp->status;
}
@@ -1971,8 +1979,10 @@ static u8 hci_cc_le_del_from_resolv_list(struct hci_dev *hdev, void *data,
if (!sent)
return rp->status;
+ hci_dev_lock(hdev);
hci_bdaddr_list_del_with_irk(&hdev->le_resolv_list, &sent->bdaddr,
sent->bdaddr_type);
+ hci_dev_unlock(hdev);
return rp->status;
}
@@ -1987,7 +1997,9 @@ static u8 hci_cc_le_clear_resolv_list(struct hci_dev *hdev, void *data,
if (rp->status)
return rp->status;
+ hci_dev_lock(hdev);
hci_bdaddr_list_clear(&hdev->le_resolv_list);
+ hci_dev_unlock(hdev);
return rp->status;
}
@@ -3225,10 +3237,12 @@ static void hci_conn_request_evt(struct hci_dev *hdev, void *data,
return;
}
+ hci_dev_lock(hdev);
+
if (hci_bdaddr_list_lookup(&hdev->reject_list, &ev->bdaddr,
BDADDR_BREDR)) {
hci_reject_conn(hdev, &ev->bdaddr);
- return;
+ goto unlock;
}
/* Require HCI_CONNECTABLE or an accept list entry to accept the
@@ -3240,13 +3254,11 @@ static void hci_conn_request_evt(struct hci_dev *hdev, void *data,
!hci_bdaddr_list_lookup_with_flags(&hdev->accept_list, &ev->bdaddr,
BDADDR_BREDR)) {
hci_reject_conn(hdev, &ev->bdaddr);
- return;
+ goto unlock;
}
/* Connection accepted */
- hci_dev_lock(hdev);
-
ie = hci_inquiry_cache_lookup(hdev, &ev->bdaddr);
if (ie)
memcpy(ie->data.dev_class, ev->dev_class, 3);
@@ -3258,8 +3270,7 @@ static void hci_conn_request_evt(struct hci_dev *hdev, void *data,
HCI_ROLE_SLAVE);
if (!conn) {
bt_dev_err(hdev, "no memory for new connection");
- hci_dev_unlock(hdev);
- return;
+ goto unlock;
}
}
@@ -3299,6 +3310,10 @@ static void hci_conn_request_evt(struct hci_dev *hdev, void *data,
conn->state = BT_CONNECT2;
hci_connect_cfm(conn, 0);
}
+
+ return;
+unlock:
+ hci_dev_unlock(hdev);
}
static u8 hci_to_mgmt_reason(u8 err)
@@ -5617,10 +5632,12 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
status = HCI_ERROR_INVALID_PARAMETERS;
}
- if (status) {
- hci_conn_failed(conn, status);
+ /* All connection failure handling is taken care of by the
+ * hci_conn_failed function which is triggered by the HCI
+ * request completion callbacks used for connecting.
+ */
+ if (status)
goto unlock;
- }
if (conn->dst_type == ADDR_LE_DEV_PUBLIC)
addr_type = BDADDR_LE_PUBLIC;
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 42c8047a9897..635cc5fb451e 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -261,7 +261,7 @@ void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen,
if (skb_queue_empty(&req->cmd_q))
bt_cb(skb)->hci.req_flags |= HCI_REQ_START;
- bt_cb(skb)->hci.req_event = event;
+ hci_skb_event(skb) = event;
skb_queue_tail(&req->cmd_q, skb);
}
@@ -2260,6 +2260,7 @@ static int active_scan(struct hci_request *req, unsigned long opt)
if (err < 0)
own_addr_type = ADDR_LE_DEV_PUBLIC;
+ hci_dev_lock(hdev);
if (hci_is_adv_monitoring(hdev)) {
/* Duplicate filter should be disabled when some advertisement
* monitor is activated, otherwise AdvMon can only receive one
@@ -2276,6 +2277,7 @@ static int active_scan(struct hci_request *req, unsigned long opt)
*/
filter_dup = LE_SCAN_FILTER_DUP_DISABLE;
}
+ hci_dev_unlock(hdev);
hci_req_start_scan(req, LE_SCAN_ACTIVE, interval,
hdev->le_scan_window_discovery, own_addr_type,
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 33b3c0ffc339..189e3115c8c6 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -1453,7 +1453,6 @@ static void hci_sock_cmsg(struct sock *sk, struct msghdr *msg,
static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg,
size_t len, int flags)
{
- int noblock = flags & MSG_DONTWAIT;
struct sock *sk = sock->sk;
struct sk_buff *skb;
int copied, err;
@@ -1470,7 +1469,7 @@ static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg,
if (sk->sk_state == BT_CLOSED)
return 0;
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (!skb)
return err;
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 13600bf120b0..4d2203c5f1bb 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -1664,20 +1664,19 @@ static int hci_le_add_accept_list_sync(struct hci_dev *hdev,
struct hci_cp_le_add_to_accept_list cp;
int err;
+ /* During suspend, only wakeable devices can be in acceptlist */
+ if (hdev->suspended &&
+ !test_bit(HCI_CONN_FLAG_REMOTE_WAKEUP, params->flags))
+ return 0;
+
/* Select filter policy to accept all advertising */
if (*num_entries >= hdev->le_accept_list_size)
return -ENOSPC;
/* Accept list can not be used with RPAs */
if (!use_ll_privacy(hdev) &&
- hci_find_irk_by_addr(hdev, &params->addr, params->addr_type)) {
+ hci_find_irk_by_addr(hdev, &params->addr, params->addr_type))
return -EINVAL;
- }
-
- /* During suspend, only wakeable devices can be in acceptlist */
- if (hdev->suspended &&
- !test_bit(HCI_CONN_FLAG_REMOTE_WAKEUP, params->flags))
- return 0;
/* Attempt to program the device in the resolving list first to avoid
* having to rollback in case it fails since the resolving list is
@@ -3825,6 +3824,30 @@ static int hci_init_sync(struct hci_dev *hdev)
return 0;
}
+#define HCI_QUIRK_BROKEN(_quirk, _desc) { HCI_QUIRK_BROKEN_##_quirk, _desc }
+
+static const struct {
+ unsigned long quirk;
+ const char *desc;
+} hci_broken_table[] = {
+ HCI_QUIRK_BROKEN(LOCAL_COMMANDS,
+ "HCI Read Local Supported Commands not supported"),
+ HCI_QUIRK_BROKEN(STORED_LINK_KEY,
+ "HCI Delete Stored Link Key command is advertised, "
+ "but not supported."),
+ HCI_QUIRK_BROKEN(ERR_DATA_REPORTING,
+ "HCI Read Default Erroneous Data Reporting command is "
+ "advertised, but not supported."),
+ HCI_QUIRK_BROKEN(READ_TRANSMIT_POWER,
+ "HCI Read Transmit Power Level command is advertised, "
+ "but not supported."),
+ HCI_QUIRK_BROKEN(FILTER_CLEAR_ALL,
+ "HCI Set Event Filter command not supported."),
+ HCI_QUIRK_BROKEN(ENHANCED_SETUP_SYNC_CONN,
+ "HCI Enhanced Setup Synchronous Connection command is "
+ "advertised, but not supported.")
+};
+
int hci_dev_open_sync(struct hci_dev *hdev)
{
int ret = 0;
@@ -3886,12 +3909,19 @@ int hci_dev_open_sync(struct hci_dev *hdev)
if (hci_dev_test_flag(hdev, HCI_SETUP) ||
test_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks)) {
bool invalid_bdaddr;
+ size_t i;
hci_sock_dev_event(hdev, HCI_DEV_SETUP);
if (hdev->setup)
ret = hdev->setup(hdev);
+ for (i = 0; i < ARRAY_SIZE(hci_broken_table); i++) {
+ if (test_bit(hci_broken_table[i].quirk, &hdev->quirks))
+ bt_dev_warn(hdev, "%s",
+ hci_broken_table[i].desc);
+ }
+
/* The transport driver can set the quirk to mark the
* BD_ADDR invalid before creating the HCI device or in
* its setup callback.
@@ -4058,6 +4088,7 @@ int hci_dev_close_sync(struct hci_dev *hdev)
bt_dev_dbg(hdev, "");
+ cancel_work_sync(&hdev->power_on);
cancel_delayed_work(&hdev->power_off);
cancel_delayed_work(&hdev->ncmd_timer);
@@ -4881,10 +4912,28 @@ static int hci_update_event_filter_sync(struct hci_dev *hdev)
return 0;
}
+/* This function disables scan (BR and LE) and mark it as paused */
+static int hci_pause_scan_sync(struct hci_dev *hdev)
+{
+ if (hdev->scanning_paused)
+ return 0;
+
+ /* Disable page scan if enabled */
+ if (test_bit(HCI_PSCAN, &hdev->flags))
+ hci_write_scan_enable_sync(hdev, SCAN_DISABLED);
+
+ hci_scan_disable_sync(hdev);
+
+ hdev->scanning_paused = true;
+
+ return 0;
+}
+
/* This function performs the HCI suspend procedures in the follow order:
*
* Pause discovery (active scanning/inquiry)
* Pause Directed Advertising/Advertising
+ * Pause Scanning (passive scanning in case discovery was not active)
* Disconnect all connections
* Set suspend_status to BT_SUSPEND_DISCONNECT if hdev cannot wakeup
* otherwise:
@@ -4910,15 +4959,11 @@ int hci_suspend_sync(struct hci_dev *hdev)
/* Pause other advertisements */
hci_pause_advertising_sync(hdev);
- /* Disable page scan if enabled */
- if (test_bit(HCI_PSCAN, &hdev->flags))
- hci_write_scan_enable_sync(hdev, SCAN_DISABLED);
-
/* Suspend monitor filters */
hci_suspend_monitor_sync(hdev);
/* Prevent disconnects from causing scanning to be re-enabled */
- hdev->scanning_paused = true;
+ hci_pause_scan_sync(hdev);
/* Soft disconnect everything (power off) */
err = hci_disconnect_all_sync(hdev, HCI_ERROR_REMOTE_POWER_OFF);
@@ -4989,6 +5034,22 @@ static void hci_resume_monitor_sync(struct hci_dev *hdev)
}
}
+/* This function resume scan and reset paused flag */
+static int hci_resume_scan_sync(struct hci_dev *hdev)
+{
+ if (!hdev->scanning_paused)
+ return 0;
+
+ hci_update_scan_sync(hdev);
+
+ /* Reset passive scanning to normal */
+ hci_update_passive_scan_sync(hdev);
+
+ hdev->scanning_paused = false;
+
+ return 0;
+}
+
/* This function performs the HCI suspend procedures in the follow order:
*
* Restore event mask
@@ -5011,10 +5072,9 @@ int hci_resume_sync(struct hci_dev *hdev)
/* Clear any event filters and restore scan state */
hci_clear_event_filter_sync(hdev);
- hci_update_scan_sync(hdev);
- /* Reset passive scanning to normal */
- hci_update_passive_scan_sync(hdev);
+ /* Resume scanning */
+ hci_resume_scan_sync(hdev);
/* Resume monitor filters */
hci_resume_monitor_sync(hdev);
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index d2d390534e54..74937a834648 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -4529,6 +4529,23 @@ static int set_device_flags(struct sock *sk, struct hci_dev *hdev, void *data,
params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr,
le_addr_type(cp->addr.type));
if (params) {
+ DECLARE_BITMAP(flags, __HCI_CONN_NUM_FLAGS);
+
+ bitmap_from_u64(flags, current_flags);
+
+ /* Devices using RPAs can only be programmed in the
+ * acceptlist LL Privacy has been enable otherwise they
+ * cannot mark HCI_CONN_FLAG_REMOTE_WAKEUP.
+ */
+ if (test_bit(HCI_CONN_FLAG_REMOTE_WAKEUP, flags) &&
+ !use_ll_privacy(hdev) &&
+ hci_find_irk_by_addr(hdev, &params->addr,
+ params->addr_type)) {
+ bt_dev_warn(hdev,
+ "Cannot set wakeable for RPA");
+ goto unlock;
+ }
+
bitmap_from_u64(params->flags, current_flags);
status = MGMT_STATUS_SUCCESS;
@@ -4545,6 +4562,7 @@ static int set_device_flags(struct sock *sk, struct hci_dev *hdev, void *data,
}
}
+unlock:
hci_dev_unlock(hdev);
done:
diff --git a/net/bluetooth/mgmt_util.c b/net/bluetooth/mgmt_util.c
index 37eef2ce55ae..b69cfed62088 100644
--- a/net/bluetooth/mgmt_util.c
+++ b/net/bluetooth/mgmt_util.c
@@ -297,7 +297,7 @@ struct mgmt_pending_cmd *mgmt_pending_add(struct sock *sk, u16 opcode,
if (!cmd)
return NULL;
- list_add(&cmd->list, &hdev->mgmt_pending);
+ list_add_tail(&cmd->list, &hdev->mgmt_pending);
return cmd;
}
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 8eabf41b2993..1111da4e2f2b 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -574,19 +574,24 @@ static int sco_sock_connect(struct socket *sock, struct sockaddr *addr, int alen
addr->sa_family != AF_BLUETOOTH)
return -EINVAL;
- if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND)
- return -EBADFD;
+ lock_sock(sk);
+ if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) {
+ err = -EBADFD;
+ goto done;
+ }
- if (sk->sk_type != SOCK_SEQPACKET)
- return -EINVAL;
+ if (sk->sk_type != SOCK_SEQPACKET) {
+ err = -EINVAL;
+ goto done;
+ }
hdev = hci_get_route(&sa->sco_bdaddr, &sco_pi(sk)->src, BDADDR_BREDR);
- if (!hdev)
- return -EHOSTUNREACH;
+ if (!hdev) {
+ err = -EHOSTUNREACH;
+ goto done;
+ }
hci_dev_lock(hdev);
- lock_sock(sk);
-
/* Set destination address and psm */
bacpy(&sco_pi(sk)->dst, &sa->sco_bdaddr);
@@ -885,7 +890,7 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
err = -EBADFD;
break;
}
- if (enhanced_sco_capable(hdev) &&
+ if (enhanced_sync_conn_capable(hdev) &&
voice.setting == BT_VOICE_TRANSPARENT)
sco_pi(sk)->codec.id = BT_CODEC_TRANSPARENT;
hci_dev_put(hdev);
diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c
index d0e54e30658a..e78dadfc5829 100644
--- a/net/bpf/bpf_dummy_struct_ops.c
+++ b/net/bpf/bpf_dummy_struct_ops.c
@@ -72,13 +72,16 @@ static int dummy_ops_call_op(void *image, struct bpf_dummy_ops_test_args *args)
args->args[3], args->args[4]);
}
+extern const struct bpf_link_ops bpf_struct_ops_link_lops;
+
int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
union bpf_attr __user *uattr)
{
const struct bpf_struct_ops *st_ops = &bpf_bpf_dummy_ops;
const struct btf_type *func_proto;
struct bpf_dummy_ops_test_args *args;
- struct bpf_tramp_progs *tprogs;
+ struct bpf_tramp_links *tlinks;
+ struct bpf_tramp_link *link = NULL;
void *image = NULL;
unsigned int op_idx;
int prog_ret;
@@ -92,8 +95,8 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
if (IS_ERR(args))
return PTR_ERR(args);
- tprogs = kcalloc(BPF_TRAMP_MAX, sizeof(*tprogs), GFP_KERNEL);
- if (!tprogs) {
+ tlinks = kcalloc(BPF_TRAMP_MAX, sizeof(*tlinks), GFP_KERNEL);
+ if (!tlinks) {
err = -ENOMEM;
goto out;
}
@@ -105,8 +108,17 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
}
set_vm_flush_reset_perms(image);
+ link = kzalloc(sizeof(*link), GFP_USER);
+ if (!link) {
+ err = -ENOMEM;
+ goto out;
+ }
+ /* prog doesn't take the ownership of the reference from caller */
+ bpf_prog_inc(prog);
+ bpf_link_init(&link->link, BPF_LINK_TYPE_STRUCT_OPS, &bpf_struct_ops_link_lops, prog);
+
op_idx = prog->expected_attach_type;
- err = bpf_struct_ops_prepare_trampoline(tprogs, prog,
+ err = bpf_struct_ops_prepare_trampoline(tlinks, link,
&st_ops->func_models[op_idx],
image, image + PAGE_SIZE);
if (err < 0)
@@ -124,7 +136,9 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
out:
kfree(args);
bpf_jit_free_exec(image);
- kfree(tprogs);
+ if (link)
+ bpf_link_put(&link->link);
+ kfree(tlinks);
return err;
}
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index af709c182674..56f059b3c242 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -551,8 +551,13 @@ struct sock * noinline bpf_kfunc_call_test3(struct sock *sk)
return sk;
}
+struct prog_test_member1 {
+ int a;
+};
+
struct prog_test_member {
- u64 c;
+ struct prog_test_member1 m;
+ int c;
};
struct prog_test_ref_kfunc {
@@ -560,31 +565,58 @@ struct prog_test_ref_kfunc {
int b;
struct prog_test_member memb;
struct prog_test_ref_kfunc *next;
+ refcount_t cnt;
};
static struct prog_test_ref_kfunc prog_test_struct = {
.a = 42,
.b = 108,
.next = &prog_test_struct,
+ .cnt = REFCOUNT_INIT(1),
};
noinline struct prog_test_ref_kfunc *
bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr)
{
- /* randomly return NULL */
- if (get_jiffies_64() % 2)
- return NULL;
+ refcount_inc(&prog_test_struct.cnt);
return &prog_test_struct;
}
+noinline struct prog_test_member *
+bpf_kfunc_call_memb_acquire(void)
+{
+ WARN_ON_ONCE(1);
+ return NULL;
+}
+
noinline void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p)
{
+ if (!p)
+ return;
+
+ refcount_dec(&p->cnt);
}
noinline void bpf_kfunc_call_memb_release(struct prog_test_member *p)
{
}
+noinline void bpf_kfunc_call_memb1_release(struct prog_test_member1 *p)
+{
+ WARN_ON_ONCE(1);
+}
+
+noinline struct prog_test_ref_kfunc *
+bpf_kfunc_call_test_kptr_get(struct prog_test_ref_kfunc **pp, int a, int b)
+{
+ struct prog_test_ref_kfunc *p = READ_ONCE(*pp);
+
+ if (!p)
+ return NULL;
+ refcount_inc(&p->cnt);
+ return p;
+}
+
struct prog_test_pass1 {
int x0;
struct {
@@ -668,8 +700,11 @@ BTF_ID(func, bpf_kfunc_call_test1)
BTF_ID(func, bpf_kfunc_call_test2)
BTF_ID(func, bpf_kfunc_call_test3)
BTF_ID(func, bpf_kfunc_call_test_acquire)
+BTF_ID(func, bpf_kfunc_call_memb_acquire)
BTF_ID(func, bpf_kfunc_call_test_release)
BTF_ID(func, bpf_kfunc_call_memb_release)
+BTF_ID(func, bpf_kfunc_call_memb1_release)
+BTF_ID(func, bpf_kfunc_call_test_kptr_get)
BTF_ID(func, bpf_kfunc_call_test_pass_ctx)
BTF_ID(func, bpf_kfunc_call_test_pass1)
BTF_ID(func, bpf_kfunc_call_test_pass2)
@@ -683,17 +718,26 @@ BTF_SET_END(test_sk_check_kfunc_ids)
BTF_SET_START(test_sk_acquire_kfunc_ids)
BTF_ID(func, bpf_kfunc_call_test_acquire)
+BTF_ID(func, bpf_kfunc_call_memb_acquire)
+BTF_ID(func, bpf_kfunc_call_test_kptr_get)
BTF_SET_END(test_sk_acquire_kfunc_ids)
BTF_SET_START(test_sk_release_kfunc_ids)
BTF_ID(func, bpf_kfunc_call_test_release)
BTF_ID(func, bpf_kfunc_call_memb_release)
+BTF_ID(func, bpf_kfunc_call_memb1_release)
BTF_SET_END(test_sk_release_kfunc_ids)
BTF_SET_START(test_sk_ret_null_kfunc_ids)
BTF_ID(func, bpf_kfunc_call_test_acquire)
+BTF_ID(func, bpf_kfunc_call_memb_acquire)
+BTF_ID(func, bpf_kfunc_call_test_kptr_get)
BTF_SET_END(test_sk_ret_null_kfunc_ids)
+BTF_SET_START(test_sk_kptr_acquire_kfunc_ids)
+BTF_ID(func, bpf_kfunc_call_test_kptr_get)
+BTF_SET_END(test_sk_kptr_acquire_kfunc_ids)
+
static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size,
u32 size, u32 headroom, u32 tailroom)
{
@@ -968,7 +1012,7 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb)
cb->pkt_len = skb->len;
} else {
if (__skb->wire_len < skb->len ||
- __skb->wire_len > GSO_MAX_SIZE)
+ __skb->wire_len > GSO_LEGACY_MAX_SIZE)
return -EINVAL;
cb->pkt_len = __skb->wire_len;
}
@@ -1580,14 +1624,36 @@ out:
static const struct btf_kfunc_id_set bpf_prog_test_kfunc_set = {
.owner = THIS_MODULE,
- .check_set = &test_sk_check_kfunc_ids,
- .acquire_set = &test_sk_acquire_kfunc_ids,
- .release_set = &test_sk_release_kfunc_ids,
- .ret_null_set = &test_sk_ret_null_kfunc_ids,
+ .check_set = &test_sk_check_kfunc_ids,
+ .acquire_set = &test_sk_acquire_kfunc_ids,
+ .release_set = &test_sk_release_kfunc_ids,
+ .ret_null_set = &test_sk_ret_null_kfunc_ids,
+ .kptr_acquire_set = &test_sk_kptr_acquire_kfunc_ids
};
+BTF_ID_LIST(bpf_prog_test_dtor_kfunc_ids)
+BTF_ID(struct, prog_test_ref_kfunc)
+BTF_ID(func, bpf_kfunc_call_test_release)
+BTF_ID(struct, prog_test_member)
+BTF_ID(func, bpf_kfunc_call_memb_release)
+
static int __init bpf_prog_test_run_init(void)
{
- return register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_prog_test_kfunc_set);
+ const struct btf_id_dtor_kfunc bpf_prog_test_dtor_kfunc[] = {
+ {
+ .btf_id = bpf_prog_test_dtor_kfunc_ids[0],
+ .kfunc_btf_id = bpf_prog_test_dtor_kfunc_ids[1]
+ },
+ {
+ .btf_id = bpf_prog_test_dtor_kfunc_ids[2],
+ .kfunc_btf_id = bpf_prog_test_dtor_kfunc_ids[3],
+ },
+ };
+ int ret;
+
+ ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_prog_test_kfunc_set);
+ return ret ?: register_btf_id_dtor_kfuncs(bpf_prog_test_dtor_kfunc,
+ ARRAY_SIZE(bpf_prog_test_dtor_kfunc),
+ THIS_MODULE);
}
late_initcall(bpf_prog_test_run_init);
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 8d6bab244c4a..58a4f70e01e3 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -465,6 +465,7 @@ static const struct net_device_ops br_netdev_ops = {
.ndo_fix_features = br_fix_features,
.ndo_fdb_add = br_fdb_add,
.ndo_fdb_del = br_fdb_delete,
+ .ndo_fdb_del_bulk = br_fdb_delete_bulk,
.ndo_fdb_dump = br_fdb_dump,
.ndo_fdb_get = br_fdb_get,
.ndo_bridge_getlink = br_getlink,
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 6ccda68bd473..e7f4fccb6adb 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -558,18 +558,161 @@ void br_fdb_cleanup(struct work_struct *work)
mod_delayed_work(system_long_wq, &br->gc_work, work_delay);
}
-/* Completely flush all dynamic entries in forwarding database.*/
-void br_fdb_flush(struct net_bridge *br)
+static bool __fdb_flush_matches(const struct net_bridge *br,
+ const struct net_bridge_fdb_entry *f,
+ const struct net_bridge_fdb_flush_desc *desc)
+{
+ const struct net_bridge_port *dst = READ_ONCE(f->dst);
+ int port_ifidx = dst ? dst->dev->ifindex : br->dev->ifindex;
+
+ if (desc->vlan_id && desc->vlan_id != f->key.vlan_id)
+ return false;
+ if (desc->port_ifindex && desc->port_ifindex != port_ifidx)
+ return false;
+ if (desc->flags_mask && (f->flags & desc->flags_mask) != desc->flags)
+ return false;
+
+ return true;
+}
+
+/* Flush forwarding database entries matching the description */
+void br_fdb_flush(struct net_bridge *br,
+ const struct net_bridge_fdb_flush_desc *desc)
{
struct net_bridge_fdb_entry *f;
- struct hlist_node *tmp;
- spin_lock_bh(&br->hash_lock);
- hlist_for_each_entry_safe(f, tmp, &br->fdb_list, fdb_node) {
- if (!test_bit(BR_FDB_STATIC, &f->flags))
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) {
+ if (!__fdb_flush_matches(br, f, desc))
+ continue;
+
+ spin_lock_bh(&br->hash_lock);
+ if (!hlist_unhashed(&f->fdb_node))
fdb_delete(br, f, true);
+ spin_unlock_bh(&br->hash_lock);
}
- spin_unlock_bh(&br->hash_lock);
+ rcu_read_unlock();
+}
+
+static unsigned long __ndm_state_to_fdb_flags(u16 ndm_state)
+{
+ unsigned long flags = 0;
+
+ if (ndm_state & NUD_PERMANENT)
+ __set_bit(BR_FDB_LOCAL, &flags);
+ if (ndm_state & NUD_NOARP)
+ __set_bit(BR_FDB_STATIC, &flags);
+
+ return flags;
+}
+
+static unsigned long __ndm_flags_to_fdb_flags(u8 ndm_flags)
+{
+ unsigned long flags = 0;
+
+ if (ndm_flags & NTF_USE)
+ __set_bit(BR_FDB_ADDED_BY_USER, &flags);
+ if (ndm_flags & NTF_EXT_LEARNED)
+ __set_bit(BR_FDB_ADDED_BY_EXT_LEARN, &flags);
+ if (ndm_flags & NTF_OFFLOADED)
+ __set_bit(BR_FDB_OFFLOADED, &flags);
+ if (ndm_flags & NTF_STICKY)
+ __set_bit(BR_FDB_STICKY, &flags);
+
+ return flags;
+}
+
+static int __fdb_flush_validate_ifindex(const struct net_bridge *br,
+ int ifindex,
+ struct netlink_ext_ack *extack)
+{
+ const struct net_device *dev;
+
+ dev = __dev_get_by_index(dev_net(br->dev), ifindex);
+ if (!dev) {
+ NL_SET_ERR_MSG_MOD(extack, "Unknown flush device ifindex");
+ return -ENODEV;
+ }
+ if (!netif_is_bridge_master(dev) && !netif_is_bridge_port(dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "Flush device is not a bridge or bridge port");
+ return -EINVAL;
+ }
+ if (netif_is_bridge_master(dev) && dev != br->dev) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Flush bridge device does not match target bridge device");
+ return -EINVAL;
+ }
+ if (netif_is_bridge_port(dev)) {
+ struct net_bridge_port *p = br_port_get_rtnl(dev);
+
+ if (p->br != br) {
+ NL_SET_ERR_MSG_MOD(extack, "Port belongs to a different bridge device");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+int br_fdb_delete_bulk(struct ndmsg *ndm, struct nlattr *tb[],
+ struct net_device *dev, u16 vid,
+ struct netlink_ext_ack *extack)
+{
+ u8 ndm_flags = ndm->ndm_flags & ~FDB_FLUSH_IGNORED_NDM_FLAGS;
+ struct net_bridge_fdb_flush_desc desc = { .vlan_id = vid };
+ struct net_bridge_port *p = NULL;
+ struct net_bridge *br;
+
+ if (netif_is_bridge_master(dev)) {
+ br = netdev_priv(dev);
+ } else {
+ p = br_port_get_rtnl(dev);
+ if (!p) {
+ NL_SET_ERR_MSG_MOD(extack, "Device is not a bridge port");
+ return -EINVAL;
+ }
+ br = p->br;
+ }
+
+ if (ndm_flags & ~FDB_FLUSH_ALLOWED_NDM_FLAGS) {
+ NL_SET_ERR_MSG(extack, "Unsupported fdb flush ndm flag bits set");
+ return -EINVAL;
+ }
+ if (ndm->ndm_state & ~FDB_FLUSH_ALLOWED_NDM_STATES) {
+ NL_SET_ERR_MSG(extack, "Unsupported fdb flush ndm state bits set");
+ return -EINVAL;
+ }
+
+ desc.flags |= __ndm_state_to_fdb_flags(ndm->ndm_state);
+ desc.flags |= __ndm_flags_to_fdb_flags(ndm_flags);
+ if (tb[NDA_NDM_STATE_MASK]) {
+ u16 ndm_state_mask = nla_get_u16(tb[NDA_NDM_STATE_MASK]);
+
+ desc.flags_mask |= __ndm_state_to_fdb_flags(ndm_state_mask);
+ }
+ if (tb[NDA_NDM_FLAGS_MASK]) {
+ u8 ndm_flags_mask = nla_get_u8(tb[NDA_NDM_FLAGS_MASK]);
+
+ desc.flags_mask |= __ndm_flags_to_fdb_flags(ndm_flags_mask);
+ }
+ if (tb[NDA_IFINDEX]) {
+ int err, ifidx = nla_get_s32(tb[NDA_IFINDEX]);
+
+ err = __fdb_flush_validate_ifindex(br, ifidx, extack);
+ if (err)
+ return err;
+ desc.port_ifindex = ifidx;
+ } else if (p) {
+ /* flush was invoked with port device and NTF_MASTER */
+ desc.port_ifindex = p->dev->ifindex;
+ }
+
+ br_debug(br, "flushing port ifindex: %d vlan id: %u flags: 0x%lx flags mask: 0x%lx\n",
+ desc.port_ifindex, desc.vlan_id, desc.flags, desc.flags_mask);
+
+ br_fdb_flush(br, &desc);
+
+ return 0;
}
/* Flush all entries referring to a specific port.
@@ -1110,7 +1253,8 @@ static int __br_fdb_delete(struct net_bridge *br,
/* Remove neighbor entry with RTM_DELNEIGH */
int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev,
- const unsigned char *addr, u16 vid)
+ const unsigned char *addr, u16 vid,
+ struct netlink_ext_ack *extack)
{
struct net_bridge_vlan_group *vg;
struct net_bridge_port *p = NULL;
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 55f47cadb114..47fcbade7389 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -517,16 +517,16 @@ void br_mtu_auto_adjust(struct net_bridge *br)
static void br_set_gso_limits(struct net_bridge *br)
{
- unsigned int gso_max_size = GSO_MAX_SIZE;
- u16 gso_max_segs = GSO_MAX_SEGS;
+ unsigned int tso_max_size = TSO_MAX_SIZE;
const struct net_bridge_port *p;
+ u16 tso_max_segs = TSO_MAX_SEGS;
list_for_each_entry(p, &br->port_list, list) {
- gso_max_size = min(gso_max_size, p->dev->gso_max_size);
- gso_max_segs = min(gso_max_segs, p->dev->gso_max_segs);
+ tso_max_size = min(tso_max_size, p->dev->tso_max_size);
+ tso_max_segs = min(tso_max_segs, p->dev->tso_max_segs);
}
- netif_set_gso_max_size(br->dev, gso_max_size);
- netif_set_gso_max_segs(br->dev, gso_max_segs);
+ netif_set_tso_max_size(br->dev, tso_max_size);
+ netif_set_tso_max_segs(br->dev, tso_max_segs);
}
/*
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 4556d913955b..fdcc641fc89a 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -251,14 +251,16 @@ static int __mdb_fill_info(struct sk_buff *skb,
__mdb_entry_fill_flags(&e, flags);
e.ifindex = ifindex;
e.vid = mp->addr.vid;
- if (mp->addr.proto == htons(ETH_P_IP))
+ if (mp->addr.proto == htons(ETH_P_IP)) {
e.addr.u.ip4 = mp->addr.dst.ip4;
#if IS_ENABLED(CONFIG_IPV6)
- else if (mp->addr.proto == htons(ETH_P_IPV6))
+ } else if (mp->addr.proto == htons(ETH_P_IPV6)) {
e.addr.u.ip6 = mp->addr.dst.ip6;
#endif
- else
+ } else {
ether_addr_copy(e.addr.u.mac_addr, mp->addr.dst.mac_addr);
+ e.state = MDB_PG_FLAGS_PERMANENT;
+ }
e.addr.proto = mp->addr.proto;
nest_ent = nla_nest_start_noflag(skb,
MDBA_MDB_ENTRY_INFO);
@@ -873,8 +875,8 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
return -EINVAL;
/* host join errors which can happen before creating the group */
- if (!port) {
- /* don't allow any flags for host-joined groups */
+ if (!port && !br_group_is_l2(&group)) {
+ /* don't allow any flags for host-joined IP groups */
if (entry->state) {
NL_SET_ERR_MSG_MOD(extack, "Flags are not allowed for host groups");
return -EINVAL;
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 200ad05b296f..bb01776d2d88 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -1326,8 +1326,13 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
br_recalculate_fwd_mask(br);
}
- if (data[IFLA_BR_FDB_FLUSH])
- br_fdb_flush(br);
+ if (data[IFLA_BR_FDB_FLUSH]) {
+ struct net_bridge_fdb_flush_desc desc = {
+ .flags_mask = BR_FDB_STATIC
+ };
+
+ br_fdb_flush(br, &desc);
+ }
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
if (data[IFLA_BR_MCAST_ROUTER]) {
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 18ccc3d5d296..06e5f6faa431 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -274,6 +274,13 @@ struct net_bridge_fdb_entry {
struct rcu_head rcu;
};
+struct net_bridge_fdb_flush_desc {
+ unsigned long flags;
+ unsigned long flags_mask;
+ int port_ifindex;
+ u16 vlan_id;
+};
+
#define MDB_PG_FLAGS_PERMANENT BIT(0)
#define MDB_PG_FLAGS_OFFLOAD BIT(1)
#define MDB_PG_FLAGS_FAST_LEAVE BIT(2)
@@ -755,11 +762,17 @@ static inline void br_netpoll_disable(struct net_bridge_port *p)
#endif
/* br_fdb.c */
+#define FDB_FLUSH_IGNORED_NDM_FLAGS (NTF_MASTER | NTF_SELF)
+#define FDB_FLUSH_ALLOWED_NDM_STATES (NUD_PERMANENT | NUD_NOARP)
+#define FDB_FLUSH_ALLOWED_NDM_FLAGS (NTF_USE | NTF_EXT_LEARNED | \
+ NTF_STICKY | NTF_OFFLOADED)
+
int br_fdb_init(void);
void br_fdb_fini(void);
int br_fdb_hash_init(struct net_bridge *br);
void br_fdb_hash_fini(struct net_bridge *br);
-void br_fdb_flush(struct net_bridge *br);
+void br_fdb_flush(struct net_bridge *br,
+ const struct net_bridge_fdb_flush_desc *desc);
void br_fdb_find_delete_local(struct net_bridge *br,
const struct net_bridge_port *p,
const unsigned char *addr, u16 vid);
@@ -780,7 +793,11 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
const unsigned char *addr, u16 vid, unsigned long flags);
int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
- struct net_device *dev, const unsigned char *addr, u16 vid);
+ struct net_device *dev, const unsigned char *addr, u16 vid,
+ struct netlink_ext_ack *extack);
+int br_fdb_delete_bulk(struct ndmsg *ndm, struct nlattr *tb[],
+ struct net_device *dev, u16 vid,
+ struct netlink_ext_ack *extack);
int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev,
const unsigned char *addr, u16 vid, u16 nlh_flags,
struct netlink_ext_ack *extack);
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index 18affda2b522..8f3d76c751dd 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -72,7 +72,8 @@ bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
/* Flags that can be offloaded to hardware */
#define BR_PORT_FLAGS_HW_OFFLOAD (BR_LEARNING | BR_FLOOD | \
- BR_MCAST_FLOOD | BR_BCAST_FLOOD | BR_PORT_LOCKED)
+ BR_MCAST_FLOOD | BR_BCAST_FLOOD | BR_PORT_LOCKED | \
+ BR_HAIRPIN_MODE | BR_ISOLATED | BR_MULTICAST_TO_UNICAST)
int br_switchdev_set_port_flag(struct net_bridge_port *p,
unsigned long flags,
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 3f7ca88c2aa3..612e367fff20 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -344,7 +344,11 @@ static DEVICE_ATTR_RW(group_addr);
static int set_flush(struct net_bridge *br, unsigned long val,
struct netlink_ext_ack *extack)
{
- br_fdb_flush(br);
+ struct net_bridge_fdb_flush_desc desc = {
+ .flags_mask = BR_FDB_STATIC
+ };
+
+ br_fdb_flush(br, &desc);
return 0;
}
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 2b8892d502f7..251e666ba9a2 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -282,7 +282,7 @@ static int caif_seqpkt_recvmsg(struct socket *sock, struct msghdr *m,
if (flags & MSG_OOB)
goto read_error;
- skb = skb_recv_datagram(sk, flags, 0 , &ret);
+ skb = skb_recv_datagram(sk, flags, &ret);
if (!skb)
goto read_error;
copylen = skb->len;
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 95d209b52e6a..65ee1b784a30 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1632,12 +1632,9 @@ static int bcm_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
struct sock *sk = sock->sk;
struct sk_buff *skb;
int error = 0;
- int noblock;
int err;
- noblock = flags & MSG_DONTWAIT;
- flags &= ~MSG_DONTWAIT;
- skb = skb_recv_datagram(sk, flags, noblock, &error);
+ skb = skb_recv_datagram(sk, flags, &error);
if (!skb)
return error;
@@ -1650,7 +1647,7 @@ static int bcm_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
return err;
}
- sock_recv_ts_and_drops(msg, sk, skb);
+ sock_recv_cmsgs(msg, sk, skb);
if (msg->msg_name) {
__sockaddr_check_size(BCM_MIN_NAMELEN);
diff --git a/net/can/isotp.c b/net/can/isotp.c
index 1e7c6a460ef9..43a27d19cdac 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -104,6 +104,7 @@ MODULE_ALIAS("can-proto-6");
#define FC_CONTENT_SZ 3 /* flow control content size in byte (FS/BS/STmin) */
#define ISOTP_CHECK_PADDING (CAN_ISOTP_CHK_PAD_LEN | CAN_ISOTP_CHK_PAD_DATA)
+#define ISOTP_ALL_BC_FLAGS (CAN_ISOTP_SF_BROADCAST | CAN_ISOTP_CF_BROADCAST)
/* Flow Status given in FC frame */
#define ISOTP_FC_CTS 0 /* clear to send */
@@ -159,6 +160,23 @@ static inline struct isotp_sock *isotp_sk(const struct sock *sk)
return (struct isotp_sock *)sk;
}
+static u32 isotp_bc_flags(struct isotp_sock *so)
+{
+ return so->opt.flags & ISOTP_ALL_BC_FLAGS;
+}
+
+static bool isotp_register_rxid(struct isotp_sock *so)
+{
+ /* no broadcast modes => register rx_id for FC frame reception */
+ return (isotp_bc_flags(so) == 0);
+}
+
+static bool isotp_register_txecho(struct isotp_sock *so)
+{
+ /* all modes but SF_BROADCAST register for tx echo skbs */
+ return (isotp_bc_flags(so) != CAN_ISOTP_SF_BROADCAST);
+}
+
static enum hrtimer_restart isotp_rx_timer_handler(struct hrtimer *hrtimer)
{
struct isotp_sock *so = container_of(hrtimer, struct isotp_sock,
@@ -803,7 +821,6 @@ static void isotp_create_fframe(struct canfd_frame *cf, struct isotp_sock *so,
cf->data[i] = so->tx.buf[so->tx.idx++];
so->tx.sn = 1;
- so->tx.state = ISOTP_WAIT_FIRST_FC;
}
static void isotp_rcv_echo(struct sk_buff *skb, void *data)
@@ -936,7 +953,7 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
off = (so->tx.ll_dl > CAN_MAX_DLEN) ? 1 : 0;
/* does the given data fit into a single frame for SF_BROADCAST? */
- if ((so->opt.flags & CAN_ISOTP_SF_BROADCAST) &&
+ if ((isotp_bc_flags(so) == CAN_ISOTP_SF_BROADCAST) &&
(size > so->tx.ll_dl - SF_PCI_SZ4 - ae - off)) {
err = -EINVAL;
goto err_out_drop;
@@ -1000,12 +1017,41 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
/* don't enable wait queue for a single frame transmission */
wait_tx_done = 0;
} else {
- /* send first frame and wait for FC */
+ /* send first frame */
isotp_create_fframe(cf, so, ae);
- /* start timeout for FC */
- hrtimer_sec = 1;
+ if (isotp_bc_flags(so) == CAN_ISOTP_CF_BROADCAST) {
+ /* set timer for FC-less operation (STmin = 0) */
+ if (so->opt.flags & CAN_ISOTP_FORCE_TXSTMIN)
+ so->tx_gap = ktime_set(0, so->force_tx_stmin);
+ else
+ so->tx_gap = ktime_set(0, so->frame_txtime);
+
+ /* disable wait for FCs due to activated block size */
+ so->txfc.bs = 0;
+
+ /* cfecho should have been zero'ed by init */
+ if (so->cfecho)
+ pr_notice_once("can-isotp: no fc cfecho %08X\n",
+ so->cfecho);
+
+ /* set consecutive frame echo tag */
+ so->cfecho = *(u32 *)cf->data;
+
+ /* switch directly to ISOTP_SENDING state */
+ so->tx.state = ISOTP_SENDING;
+
+ /* start timeout for unlikely lost echo skb */
+ hrtimer_sec = 2;
+ } else {
+ /* standard flow control check */
+ so->tx.state = ISOTP_WAIT_FIRST_FC;
+
+ /* start timeout for FC */
+ hrtimer_sec = 1;
+ }
+
hrtimer_start(&so->txtimer, ktime_set(hrtimer_sec, 0),
HRTIMER_MODE_REL_SOFT);
}
@@ -1025,6 +1071,9 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
if (hrtimer_sec)
hrtimer_cancel(&so->txtimer);
+ /* reset consecutive frame echo tag */
+ so->cfecho = 0;
+
goto err_out_drop;
}
@@ -1055,7 +1104,6 @@ static int isotp_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
struct sock *sk = sock->sk;
struct sk_buff *skb;
struct isotp_sock *so = isotp_sk(sk);
- int noblock = flags & MSG_DONTWAIT;
int ret = 0;
if (flags & ~(MSG_DONTWAIT | MSG_TRUNC | MSG_PEEK))
@@ -1064,8 +1112,7 @@ static int isotp_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
if (!so->bound)
return -EADDRNOTAVAIL;
- flags &= ~MSG_DONTWAIT;
- skb = skb_recv_datagram(sk, flags, noblock, &ret);
+ skb = skb_recv_datagram(sk, flags, &ret);
if (!skb)
return ret;
@@ -1122,15 +1169,17 @@ static int isotp_release(struct socket *sock)
lock_sock(sk);
/* remove current filters & unregister */
- if (so->bound && (!(so->opt.flags & CAN_ISOTP_SF_BROADCAST))) {
+ if (so->bound && isotp_register_txecho(so)) {
if (so->ifindex) {
struct net_device *dev;
dev = dev_get_by_index(net, so->ifindex);
if (dev) {
- can_rx_unregister(net, dev, so->rxid,
- SINGLE_MASK(so->rxid),
- isotp_rcv, sk);
+ if (isotp_register_rxid(so))
+ can_rx_unregister(net, dev, so->rxid,
+ SINGLE_MASK(so->rxid),
+ isotp_rcv, sk);
+
can_rx_unregister(net, dev, so->txid,
SINGLE_MASK(so->txid),
isotp_rcv_echo, sk);
@@ -1163,26 +1212,35 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len)
struct net *net = sock_net(sk);
int ifindex;
struct net_device *dev;
- canid_t tx_id, rx_id;
+ canid_t tx_id = addr->can_addr.tp.tx_id;
+ canid_t rx_id = addr->can_addr.tp.rx_id;
int err = 0;
int notify_enetdown = 0;
- int do_rx_reg = 1;
if (len < ISOTP_MIN_NAMELEN)
return -EINVAL;
- /* sanitize tx/rx CAN identifiers */
- tx_id = addr->can_addr.tp.tx_id;
+ /* sanitize tx CAN identifier */
if (tx_id & CAN_EFF_FLAG)
tx_id &= (CAN_EFF_FLAG | CAN_EFF_MASK);
else
tx_id &= CAN_SFF_MASK;
- rx_id = addr->can_addr.tp.rx_id;
- if (rx_id & CAN_EFF_FLAG)
- rx_id &= (CAN_EFF_FLAG | CAN_EFF_MASK);
- else
- rx_id &= CAN_SFF_MASK;
+ /* give feedback on wrong CAN-ID value */
+ if (tx_id != addr->can_addr.tp.tx_id)
+ return -EINVAL;
+
+ /* sanitize rx CAN identifier (if needed) */
+ if (isotp_register_rxid(so)) {
+ if (rx_id & CAN_EFF_FLAG)
+ rx_id &= (CAN_EFF_FLAG | CAN_EFF_MASK);
+ else
+ rx_id &= CAN_SFF_MASK;
+
+ /* give feedback on wrong CAN-ID value */
+ if (rx_id != addr->can_addr.tp.rx_id)
+ return -EINVAL;
+ }
if (!addr->can_ifindex)
return -ENODEV;
@@ -1194,12 +1252,8 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len)
goto out;
}
- /* do not register frame reception for functional addressing */
- if (so->opt.flags & CAN_ISOTP_SF_BROADCAST)
- do_rx_reg = 0;
-
- /* do not validate rx address for functional addressing */
- if (do_rx_reg && rx_id == tx_id) {
+ /* ensure different CAN IDs when the rx_id is to be registered */
+ if (isotp_register_rxid(so) && rx_id == tx_id) {
err = -EADDRNOTAVAIL;
goto out;
}
@@ -1224,10 +1278,11 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len)
ifindex = dev->ifindex;
- if (do_rx_reg) {
+ if (isotp_register_rxid(so))
can_rx_register(net, dev, rx_id, SINGLE_MASK(rx_id),
isotp_rcv, sk, "isotp", sk);
+ if (isotp_register_txecho(so)) {
/* no consecutive frame echo skb in flight */
so->cfecho = 0;
@@ -1296,6 +1351,15 @@ static int isotp_setsockopt_locked(struct socket *sock, int level, int optname,
if (!(so->opt.flags & CAN_ISOTP_RX_EXT_ADDR))
so->opt.rx_ext_address = so->opt.ext_address;
+ /* these broadcast flags are not allowed together */
+ if (isotp_bc_flags(so) == ISOTP_ALL_BC_FLAGS) {
+ /* CAN_ISOTP_SF_BROADCAST is prioritized */
+ so->opt.flags &= ~CAN_ISOTP_CF_BROADCAST;
+
+ /* give user feedback on wrong config attempt */
+ ret = -EINVAL;
+ }
+
/* check for frame_txtime changes (0 => no changes) */
if (so->opt.frame_txtime) {
if (so->opt.frame_txtime == CAN_ISOTP_FRAME_TXTIME_ZERO)
@@ -1446,10 +1510,12 @@ static void isotp_notify(struct isotp_sock *so, unsigned long msg,
case NETDEV_UNREGISTER:
lock_sock(sk);
/* remove current filters & unregister */
- if (so->bound && (!(so->opt.flags & CAN_ISOTP_SF_BROADCAST))) {
- can_rx_unregister(dev_net(dev), dev, so->rxid,
- SINGLE_MASK(so->rxid),
- isotp_rcv, sk);
+ if (so->bound && isotp_register_txecho(so)) {
+ if (isotp_register_rxid(so))
+ can_rx_unregister(dev_net(dev), dev, so->rxid,
+ SINGLE_MASK(so->rxid),
+ isotp_rcv, sk);
+
can_rx_unregister(dev_net(dev), dev, so->txid,
SINGLE_MASK(so->txid),
isotp_rcv_echo, sk);
diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
index 6dff4510687a..f5ecfdcf57b2 100644
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -802,7 +802,7 @@ static int j1939_sk_recvmsg(struct socket *sock, struct msghdr *msg,
return sock_recv_errqueue(sock->sk, msg, size, SOL_CAN_J1939,
SCM_J1939_ERRQUEUE);
- skb = skb_recv_datagram(sk, flags, 0, &ret);
+ skb = skb_recv_datagram(sk, flags, &ret);
if (!skb)
return ret;
@@ -841,7 +841,7 @@ static int j1939_sk_recvmsg(struct socket *sock, struct msghdr *msg,
paddr->can_addr.j1939.pgn = skcb->addr.pgn;
}
- sock_recv_ts_and_drops(msg, sk, skb);
+ sock_recv_cmsgs(msg, sk, skb);
msg->msg_flags |= skcb->msg_flags;
skb_free_datagram(sk, skb);
diff --git a/net/can/raw.c b/net/can/raw.c
index 7105fa4824e4..d1bd9cc51ebe 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -772,6 +772,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
{
struct sock *sk = sock->sk;
struct raw_sock *ro = raw_sk(sk);
+ struct sockcm_cookie sockc;
struct sk_buff *skb;
struct net_device *dev;
int ifindex;
@@ -817,11 +818,18 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
if (err < 0)
goto free_skb;
- skb_setup_tx_timestamp(skb, sk->sk_tsflags);
+ sockcm_init(&sockc, sk);
+ if (msg->msg_controllen) {
+ err = sock_cmsg_send(sk, msg, &sockc);
+ if (unlikely(err))
+ goto free_skb;
+ }
skb->dev = dev;
- skb->sk = sk;
skb->priority = sk->sk_priority;
+ skb->tstamp = sockc.transmit_time;
+
+ skb_setup_tx_timestamp(skb, sockc.tsflags);
err = can_send(skb, ro->loopback);
@@ -846,16 +854,12 @@ static int raw_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
struct sock *sk = sock->sk;
struct sk_buff *skb;
int err = 0;
- int noblock;
-
- noblock = flags & MSG_DONTWAIT;
- flags &= ~MSG_DONTWAIT;
if (flags & MSG_ERRQUEUE)
return sock_recv_errqueue(sk, msg, size,
SOL_CAN_RAW, SCM_CAN_RAW_ERRQUEUE);
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (!skb)
return err;
@@ -870,7 +874,7 @@ static int raw_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
return err;
}
- sock_recv_ts_and_drops(msg, sk, skb);
+ sock_recv_cmsgs(msg, sk, skb);
if (msg->msg_name) {
__sockaddr_check_size(RAW_MIN_NAMELEN);
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index e3ac36380520..a25ec93729b9 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -40,7 +40,7 @@ static int bpf_sk_storage_del(struct sock *sk, struct bpf_map *map)
if (!sdata)
return -ENOENT;
- bpf_selem_unlink(SELEM(sdata));
+ bpf_selem_unlink(SELEM(sdata), true);
return 0;
}
@@ -75,8 +75,8 @@ void bpf_sk_storage_free(struct sock *sk)
* sk_storage.
*/
bpf_selem_unlink_map(selem);
- free_sk_storage = bpf_selem_unlink_storage_nolock(sk_storage,
- selem, true);
+ free_sk_storage = bpf_selem_unlink_storage_nolock(
+ sk_storage, selem, true, false);
}
raw_spin_unlock_bh(&sk_storage->lock);
rcu_read_unlock();
@@ -338,7 +338,7 @@ bpf_sk_storage_ptr(void *owner)
return &sk->sk_bpf_storage;
}
-static int sk_storage_map_btf_id;
+BTF_ID_LIST_SINGLE(sk_storage_map_btf_ids, struct, bpf_local_storage_map)
const struct bpf_map_ops sk_storage_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
.map_alloc_check = bpf_local_storage_map_alloc_check,
@@ -349,8 +349,7 @@ const struct bpf_map_ops sk_storage_map_ops = {
.map_update_elem = bpf_fd_sk_storage_update_elem,
.map_delete_elem = bpf_fd_sk_storage_delete_elem,
.map_check_btf = bpf_local_storage_map_check_btf,
- .map_btf_name = "bpf_local_storage_map",
- .map_btf_id = &sk_storage_map_btf_id,
+ .map_btf_id = &sk_storage_map_btf_ids[0],
.map_local_storage_charge = bpf_sk_storage_charge,
.map_local_storage_uncharge = bpf_sk_storage_uncharge,
.map_owner_storage_ptr = bpf_sk_storage_ptr,
diff --git a/net/core/datagram.c b/net/core/datagram.c
index ee290776c661..50f4faeea76c 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -62,8 +62,6 @@
#include <trace/events/skb.h>
#include <net/busy_poll.h>
-#include "datagram.h"
-
/*
* Is a socket 'connection oriented' ?
*/
@@ -310,12 +308,11 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk,
EXPORT_SYMBOL(__skb_recv_datagram);
struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags,
- int noblock, int *err)
+ int *err)
{
int off = 0;
- return __skb_recv_datagram(sk, &sk->sk_receive_queue,
- flags | (noblock ? MSG_DONTWAIT : 0),
+ return __skb_recv_datagram(sk, &sk->sk_receive_queue, flags,
&off, err);
}
EXPORT_SYMBOL(skb_recv_datagram);
diff --git a/net/core/datagram.h b/net/core/datagram.h
deleted file mode 100644
index bcfb75bfa3b2..000000000000
--- a/net/core/datagram.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-#ifndef _NET_CORE_DATAGRAM_H_
-#define _NET_CORE_DATAGRAM_H_
-
-#include <linux/types.h>
-
-struct sock;
-struct sk_buff;
-struct iov_iter;
-
-int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
- struct iov_iter *from, size_t length);
-
-#endif /* _NET_CORE_DATAGRAM_H_ */
diff --git a/net/core/dev.c b/net/core/dev.c
index 191ec76d4c3b..08ce317fcec8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -151,6 +151,7 @@
#include <linux/prandom.h>
#include <linux/once_lite.h>
+#include "dev.h"
#include "net-sysfs.h"
@@ -701,6 +702,10 @@ int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
if (WARN_ON_ONCE(last_dev == ctx.dev))
return -1;
}
+
+ if (!ctx.dev)
+ return ret;
+
path = dev_fwd_path(stack);
if (!path)
return -1;
@@ -2988,6 +2993,52 @@ undo_rx:
EXPORT_SYMBOL(netif_set_real_num_queues);
/**
+ * netif_set_tso_max_size() - set the max size of TSO frames supported
+ * @dev: netdev to update
+ * @size: max skb->len of a TSO frame
+ *
+ * Set the limit on the size of TSO super-frames the device can handle.
+ * Unless explicitly set the stack will assume the value of
+ * %GSO_LEGACY_MAX_SIZE.
+ */
+void netif_set_tso_max_size(struct net_device *dev, unsigned int size)
+{
+ dev->tso_max_size = min(GSO_MAX_SIZE, size);
+ if (size < READ_ONCE(dev->gso_max_size))
+ netif_set_gso_max_size(dev, size);
+}
+EXPORT_SYMBOL(netif_set_tso_max_size);
+
+/**
+ * netif_set_tso_max_segs() - set the max number of segs supported for TSO
+ * @dev: netdev to update
+ * @segs: max number of TCP segments
+ *
+ * Set the limit on the number of TCP segments the device can generate from
+ * a single TSO super-frame.
+ * Unless explicitly set the stack will assume the value of %GSO_MAX_SEGS.
+ */
+void netif_set_tso_max_segs(struct net_device *dev, unsigned int segs)
+{
+ dev->tso_max_segs = segs;
+ if (segs < READ_ONCE(dev->gso_max_segs))
+ netif_set_gso_max_segs(dev, segs);
+}
+EXPORT_SYMBOL(netif_set_tso_max_segs);
+
+/**
+ * netif_inherit_tso_max() - copy all TSO limits from a lower device to an upper
+ * @to: netdev to update
+ * @from: netdev from which to copy the limits
+ */
+void netif_inherit_tso_max(struct net_device *to, const struct net_device *from)
+{
+ netif_set_tso_max_size(to, from->tso_max_size);
+ netif_set_tso_max_segs(to, from->tso_max_segs);
+}
+EXPORT_SYMBOL(netif_inherit_tso_max);
+
+/**
* netif_get_num_default_rss_queues - default number of RSS queues
*
* Default value is the number of physical cores if there are only 1 or 2, or
@@ -3215,12 +3266,18 @@ int skb_checksum_help(struct sk_buff *skb)
}
offset = skb_checksum_start_offset(skb);
- BUG_ON(offset >= skb_headlen(skb));
+ ret = -EINVAL;
+ if (WARN_ON_ONCE(offset >= skb_headlen(skb))) {
+ DO_ONCE_LITE(skb_dump, KERN_ERR, skb, false);
+ goto out;
+ }
csum = skb_checksum(skb, offset, skb->len - offset, 0);
offset += skb->csum_offset;
- BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
-
+ if (WARN_ON_ONCE(offset + sizeof(__sum16) > skb_headlen(skb))) {
+ DO_ONCE_LITE(skb_dump, KERN_ERR, skb, false);
+ goto out;
+ }
ret = skb_ensure_writable(skb, offset + sizeof(__sum16));
if (ret)
goto out;
@@ -3919,6 +3976,25 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
return skb;
}
+
+static struct netdev_queue *
+netdev_tx_queue_mapping(struct net_device *dev, struct sk_buff *skb)
+{
+ int qm = skb_get_queue_mapping(skb);
+
+ return netdev_get_tx_queue(dev, netdev_cap_txqueue(dev, qm));
+}
+
+static bool netdev_xmit_txqueue_skipped(void)
+{
+ return __this_cpu_read(softnet_data.xmit.skip_txqueue);
+}
+
+void netdev_xmit_skip_txqueue(bool skip)
+{
+ __this_cpu_write(softnet_data.xmit.skip_txqueue, skip);
+}
+EXPORT_SYMBOL_GPL(netdev_xmit_skip_txqueue);
#endif /* CONFIG_NET_EGRESS */
#ifdef CONFIG_XPS
@@ -4061,35 +4137,30 @@ struct netdev_queue *netdev_core_pick_tx(struct net_device *dev,
}
/**
- * __dev_queue_xmit - transmit a buffer
- * @skb: buffer to transmit
- * @sb_dev: suboordinate device used for L2 forwarding offload
- *
- * Queue a buffer for transmission to a network device. The caller must
- * have set the device and priority and built the buffer before calling
- * this function. The function can be called from an interrupt.
+ * __dev_queue_xmit() - transmit a buffer
+ * @skb: buffer to transmit
+ * @sb_dev: suboordinate device used for L2 forwarding offload
*
- * A negative errno code is returned on a failure. A success does not
- * guarantee the frame will be transmitted as it may be dropped due
- * to congestion or traffic shaping.
+ * Queue a buffer for transmission to a network device. The caller must
+ * have set the device and priority and built the buffer before calling
+ * this function. The function can be called from an interrupt.
*
- * -----------------------------------------------------------------------------------
- * I notice this method can also return errors from the queue disciplines,
- * including NET_XMIT_DROP, which is a positive value. So, errors can also
- * be positive.
+ * When calling this method, interrupts MUST be enabled. This is because
+ * the BH enable code must have IRQs enabled so that it will not deadlock.
*
- * Regardless of the return value, the skb is consumed, so it is currently
- * difficult to retry a send to this method. (You can bump the ref count
- * before sending to hold a reference for retry if you are careful.)
+ * Regardless of the return value, the skb is consumed, so it is currently
+ * difficult to retry a send to this method. (You can bump the ref count
+ * before sending to hold a reference for retry if you are careful.)
*
- * When calling this method, interrupts MUST be enabled. This is because
- * the BH enable code must have IRQs enabled so that it will not deadlock.
- * --BLG
+ * Return:
+ * * 0 - buffer successfully transmitted
+ * * positive qdisc return code - NET_XMIT_DROP etc.
+ * * negative errno - other errors
*/
-static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
+int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
{
struct net_device *dev = skb->dev;
- struct netdev_queue *txq;
+ struct netdev_queue *txq = NULL;
struct Qdisc *q;
int rc = -ENOMEM;
bool again = false;
@@ -4117,11 +4188,17 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
if (!skb)
goto out;
}
+
+ netdev_xmit_skip_txqueue(false);
+
nf_skip_egress(skb, true);
skb = sch_handle_egress(skb, &rc, dev);
if (!skb)
goto out;
nf_skip_egress(skb, false);
+
+ if (netdev_xmit_txqueue_skipped())
+ txq = netdev_tx_queue_mapping(dev, skb);
}
#endif
/* If device/qdisc don't need skb->dst, release it right now while
@@ -4132,7 +4209,9 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
else
skb_dst_force(skb);
- txq = netdev_core_pick_tx(dev, skb, sb_dev);
+ if (!txq)
+ txq = netdev_core_pick_tx(dev, skb, sb_dev);
+
q = rcu_dereference_bh(txq->qdisc);
trace_net_dev_queue(skb);
@@ -4201,18 +4280,7 @@ out:
rcu_read_unlock_bh();
return rc;
}
-
-int dev_queue_xmit(struct sk_buff *skb)
-{
- return __dev_queue_xmit(skb, NULL);
-}
-EXPORT_SYMBOL(dev_queue_xmit);
-
-int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev)
-{
- return __dev_queue_xmit(skb, sb_dev);
-}
-EXPORT_SYMBOL(dev_queue_xmit_accel);
+EXPORT_SYMBOL(__dev_queue_xmit);
int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id)
{
@@ -4259,6 +4327,7 @@ int netdev_max_backlog __read_mostly = 1000;
EXPORT_SYMBOL(netdev_max_backlog);
int netdev_tstamp_prequeue __read_mostly = 1;
+unsigned int sysctl_skb_defer_max __read_mostly = 64;
int netdev_budget __read_mostly = 300;
/* Must be at least 2 jiffes to guarantee 1 jiffy timeout */
unsigned int __read_mostly netdev_budget_usecs = 2 * USEC_PER_SEC / HZ;
@@ -4510,6 +4579,15 @@ static void rps_trigger_softirq(void *data)
#endif /* CONFIG_RPS */
+/* Called from hardirq (IPI) context */
+static void trigger_rx_softirq(void *data)
+{
+ struct softnet_data *sd = data;
+
+ __raise_softirq_irqoff(NET_RX_SOFTIRQ);
+ smp_store_release(&sd->defer_ipi_scheduled, 0);
+}
+
/*
* Check if this softnet_data structure is another cpu one
* If yes, queue it to our IPI list and return 1
@@ -5367,13 +5445,11 @@ check_vlan_id:
*ppt_prev = pt_prev;
} else {
drop:
- if (!deliver_exact) {
+ if (!deliver_exact)
dev_core_stats_rx_dropped_inc(skb->dev);
- kfree_skb_reason(skb, SKB_DROP_REASON_PTYPE_ABSENT);
- } else {
+ else
dev_core_stats_rx_nohandler_inc(skb->dev);
- kfree_skb(skb);
- }
+ kfree_skb_reason(skb, SKB_DROP_REASON_UNHANDLED_PROTO);
/* Jamal, now you will not able to escape explaining
* me how you were going to use this. :-)
*/
@@ -6275,8 +6351,8 @@ int dev_set_threaded(struct net_device *dev, bool threaded)
}
EXPORT_SYMBOL(dev_set_threaded);
-void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
- int (*poll)(struct napi_struct *, int), int weight)
+void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
+ int (*poll)(struct napi_struct *, int), int weight)
{
if (WARN_ON(test_and_set_bit(NAPI_STATE_LISTED, &napi->state)))
return;
@@ -6309,7 +6385,7 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
if (dev->threaded && napi_kthread_create(napi))
dev->threaded = 0;
}
-EXPORT_SYMBOL(netif_napi_add);
+EXPORT_SYMBOL(netif_napi_add_weight);
void napi_disable(struct napi_struct *n)
{
@@ -6538,6 +6614,28 @@ static int napi_threaded_poll(void *data)
return 0;
}
+static void skb_defer_free_flush(struct softnet_data *sd)
+{
+ struct sk_buff *skb, *next;
+ unsigned long flags;
+
+ /* Paired with WRITE_ONCE() in skb_attempt_defer_free() */
+ if (!READ_ONCE(sd->defer_list))
+ return;
+
+ spin_lock_irqsave(&sd->defer_lock, flags);
+ skb = sd->defer_list;
+ sd->defer_list = NULL;
+ sd->defer_count = 0;
+ spin_unlock_irqrestore(&sd->defer_lock, flags);
+
+ while (skb != NULL) {
+ next = skb->next;
+ napi_consume_skb(skb, 1);
+ skb = next;
+ }
+}
+
static __latent_entropy void net_rx_action(struct softirq_action *h)
{
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
@@ -6554,9 +6652,11 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
for (;;) {
struct napi_struct *n;
+ skb_defer_free_flush(sd);
+
if (list_empty(&list)) {
if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll))
- return;
+ goto end;
break;
}
@@ -6583,6 +6683,7 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
net_rps_action_and_irq_enable(sd);
+end:;
}
struct netdev_adjacent {
@@ -8638,7 +8739,6 @@ void dev_set_group(struct net_device *dev, int new_group)
{
dev->group = new_group;
}
-EXPORT_SYMBOL(dev_set_group);
/**
* dev_pre_changeaddr_notify - Call NETDEV_PRE_CHANGEADDR.
@@ -8753,7 +8853,6 @@ int dev_change_carrier(struct net_device *dev, bool new_carrier)
return -ENODEV;
return ops->ndo_change_carrier(dev, new_carrier);
}
-EXPORT_SYMBOL(dev_change_carrier);
/**
* dev_get_phys_port_id - Get device physical port ID
@@ -8771,7 +8870,6 @@ int dev_get_phys_port_id(struct net_device *dev,
return -EOPNOTSUPP;
return ops->ndo_get_phys_port_id(dev, ppid);
}
-EXPORT_SYMBOL(dev_get_phys_port_id);
/**
* dev_get_phys_port_name - Get device physical port name
@@ -8794,7 +8892,6 @@ int dev_get_phys_port_name(struct net_device *dev,
}
return devlink_compat_phys_port_name_get(dev, name, len);
}
-EXPORT_SYMBOL(dev_get_phys_port_name);
/**
* dev_get_port_parent_id - Get the device's port parent identifier
@@ -8876,7 +8973,6 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down)
dev->proto_down = proto_down;
return 0;
}
-EXPORT_SYMBOL(dev_change_proto_down);
/**
* dev_change_proto_down_reason - proto down reason
@@ -8901,7 +8997,6 @@ void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask,
}
}
}
-EXPORT_SYMBOL(dev_change_proto_down_reason);
struct bpf_xdp_link {
struct bpf_link link;
@@ -9428,7 +9523,7 @@ static int dev_new_index(struct net *net)
}
/* Delayed registration/unregisteration */
-static LIST_HEAD(net_todo_list);
+LIST_HEAD(net_todo_list);
DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);
static void net_set_todo(struct net_device *dev)
@@ -9835,22 +9930,14 @@ void netif_tx_stop_all_queues(struct net_device *dev)
EXPORT_SYMBOL(netif_tx_stop_all_queues);
/**
- * register_netdevice - register a network device
- * @dev: device to register
- *
- * Take a completed network device structure and add it to the kernel
- * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
- * chain. 0 is returned on success. A negative errno code is returned
- * on a failure to set up the device, or if the name is a duplicate.
+ * register_netdevice() - register a network device
+ * @dev: device to register
*
- * Callers must hold the rtnl semaphore. You may want
- * register_netdev() instead of this.
- *
- * BUGS:
- * The locking appears insufficient to guarantee two parallel registers
- * will not get the same name.
+ * Take a prepared network device structure and make it externally accessible.
+ * A %NETDEV_REGISTER message is sent to the netdev notifier chain.
+ * Callers must hold the rtnl lock - you may want register_netdev()
+ * instead of this.
*/
-
int register_netdevice(struct net_device *dev)
{
int ret;
@@ -10352,6 +10439,7 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
storage->rx_dropped += READ_ONCE(core_stats->rx_dropped);
storage->tx_dropped += READ_ONCE(core_stats->tx_dropped);
storage->rx_nohandler += READ_ONCE(core_stats->rx_nohandler);
+ storage->rx_otherhost_dropped += READ_ONCE(core_stats->rx_otherhost_dropped);
}
}
return storage;
@@ -10510,9 +10598,11 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev_net_set(dev, &init_net);
- dev->gso_max_size = GSO_MAX_SIZE;
+ dev->gso_max_size = GSO_LEGACY_MAX_SIZE;
dev->gso_max_segs = GSO_MAX_SEGS;
- dev->gro_max_size = GRO_MAX_SIZE;
+ dev->gro_max_size = GRO_LEGACY_MAX_SIZE;
+ dev->tso_max_size = TSO_LEGACY_MAX_SIZE;
+ dev->tso_max_segs = TSO_MAX_SEGS;
dev->upper_level = 1;
dev->lower_level = 1;
#ifdef CONFIG_LOCKDEP
@@ -11294,6 +11384,8 @@ static int __init net_dev_init(void)
INIT_CSD(&sd->csd, rps_trigger_softirq, sd);
sd->cpu = i;
#endif
+ INIT_CSD(&sd->defer_csd, trigger_rx_softirq, sd);
+ spin_lock_init(&sd->defer_lock);
init_gro_hash(&sd->backlog);
sd->backlog.poll = process_backlog;
diff --git a/net/core/dev.h b/net/core/dev.h
new file mode 100644
index 000000000000..cbb8a925175a
--- /dev/null
+++ b/net/core/dev.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _NET_CORE_DEV_H
+#define _NET_CORE_DEV_H
+
+#include <linux/types.h>
+
+struct net;
+struct net_device;
+struct netdev_bpf;
+struct netdev_phys_item_id;
+struct netlink_ext_ack;
+
+/* Random bits of netdevice that don't need to be exposed */
+#define FLOW_LIMIT_HISTORY (1 << 7) /* must be ^2 and !overflow buckets */
+struct sd_flow_limit {
+ u64 count;
+ unsigned int num_buckets;
+ unsigned int history_head;
+ u16 history[FLOW_LIMIT_HISTORY];
+ u8 buckets[];
+};
+
+extern int netdev_flow_limit_table_len;
+
+#ifdef CONFIG_PROC_FS
+int __init dev_proc_init(void);
+#else
+#define dev_proc_init() 0
+#endif
+
+void linkwatch_init_dev(struct net_device *dev);
+void linkwatch_forget_dev(struct net_device *dev);
+void linkwatch_run_queue(void);
+
+void dev_addr_flush(struct net_device *dev);
+int dev_addr_init(struct net_device *dev);
+void dev_addr_check(struct net_device *dev);
+
+/* sysctls not referred to from outside net/core/ */
+extern int netdev_budget;
+extern unsigned int netdev_budget_usecs;
+extern unsigned int sysctl_skb_defer_max;
+extern int netdev_tstamp_prequeue;
+extern int netdev_unregister_timeout_secs;
+extern int weight_p;
+extern int dev_weight_rx_bias;
+extern int dev_weight_tx_bias;
+
+/* rtnl helpers */
+extern struct list_head net_todo_list;
+void netdev_run_todo(void);
+
+/* netdev management, shared between various uAPI entry points */
+struct netdev_name_node {
+ struct hlist_node hlist;
+ struct list_head list;
+ struct net_device *dev;
+ const char *name;
+};
+
+int netdev_get_name(struct net *net, char *name, int ifindex);
+int dev_change_name(struct net_device *dev, const char *newname);
+
+int netdev_name_node_alt_create(struct net_device *dev, const char *name);
+int netdev_name_node_alt_destroy(struct net_device *dev, const char *name);
+
+int dev_validate_mtu(struct net_device *dev, int mtu,
+ struct netlink_ext_ack *extack);
+int dev_set_mtu_ext(struct net_device *dev, int mtu,
+ struct netlink_ext_ack *extack);
+
+int dev_get_phys_port_id(struct net_device *dev,
+ struct netdev_phys_item_id *ppid);
+int dev_get_phys_port_name(struct net_device *dev,
+ char *name, size_t len);
+
+int dev_change_proto_down(struct net_device *dev, bool proto_down);
+void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask,
+ u32 value);
+
+typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf);
+int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
+ int fd, int expected_fd, u32 flags);
+
+int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len);
+void dev_set_group(struct net_device *dev, int new_group);
+int dev_change_carrier(struct net_device *dev, bool new_carrier);
+
+void __dev_set_rx_mode(struct net_device *dev);
+
+static inline void netif_set_gso_max_size(struct net_device *dev,
+ unsigned int size)
+{
+ /* dev->gso_max_size is read locklessly from sk_setup_caps() */
+ WRITE_ONCE(dev->gso_max_size, size);
+}
+
+static inline void netif_set_gso_max_segs(struct net_device *dev,
+ unsigned int segs)
+{
+ /* dev->gso_max_segs is read locklessly from sk_setup_caps() */
+ WRITE_ONCE(dev->gso_max_segs, segs);
+}
+
+static inline void netif_set_gro_max_size(struct net_device *dev,
+ unsigned int size)
+{
+ /* This pairs with the READ_ONCE() in skb_gro_receive() */
+ WRITE_ONCE(dev->gro_max_size, size);
+}
+
+#endif
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index bead38ca50bd..baa63dee2829 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -12,6 +12,8 @@
#include <linux/export.h>
#include <linux/list.h>
+#include "dev.h"
+
/*
* General list handling functions
*/
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 1b807d119da5..4f6be442ae7e 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -10,6 +10,8 @@
#include <net/dsa.h>
#include <net/wext.h>
+#include "dev.h"
+
/*
* Map an interface index to its name (SIOCGIFNAME)
*/
diff --git a/net/core/devlink.c b/net/core/devlink.c
index aeca13b6e57b..5cc88490f18f 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -54,6 +54,8 @@ struct devlink {
struct list_head trap_list;
struct list_head trap_group_list;
struct list_head trap_policer_list;
+ struct list_head linecard_list;
+ struct mutex linecards_lock; /* protects linecard_list */
const struct devlink_ops *ops;
u64 features;
struct xarray snapshot_ids;
@@ -70,6 +72,23 @@ struct devlink {
char priv[] __aligned(NETDEV_ALIGN);
};
+struct devlink_linecard_ops;
+struct devlink_linecard_type;
+
+struct devlink_linecard {
+ struct list_head list;
+ struct devlink *devlink;
+ unsigned int index;
+ refcount_t refcount;
+ const struct devlink_linecard_ops *ops;
+ void *priv;
+ enum devlink_linecard_state state;
+ struct mutex state_lock; /* Protects state */
+ const char *type;
+ struct devlink_linecard_type *types;
+ unsigned int types_count;
+};
+
/**
* struct devlink_resource - devlink resource
* @name: name of the resource
@@ -397,6 +416,58 @@ devlink_rate_get_from_info(struct devlink *devlink, struct genl_info *info)
return ERR_PTR(-EINVAL);
}
+static struct devlink_linecard *
+devlink_linecard_get_by_index(struct devlink *devlink,
+ unsigned int linecard_index)
+{
+ struct devlink_linecard *devlink_linecard;
+
+ list_for_each_entry(devlink_linecard, &devlink->linecard_list, list) {
+ if (devlink_linecard->index == linecard_index)
+ return devlink_linecard;
+ }
+ return NULL;
+}
+
+static bool devlink_linecard_index_exists(struct devlink *devlink,
+ unsigned int linecard_index)
+{
+ return devlink_linecard_get_by_index(devlink, linecard_index);
+}
+
+static struct devlink_linecard *
+devlink_linecard_get_from_attrs(struct devlink *devlink, struct nlattr **attrs)
+{
+ if (attrs[DEVLINK_ATTR_LINECARD_INDEX]) {
+ u32 linecard_index = nla_get_u32(attrs[DEVLINK_ATTR_LINECARD_INDEX]);
+ struct devlink_linecard *linecard;
+
+ mutex_lock(&devlink->linecards_lock);
+ linecard = devlink_linecard_get_by_index(devlink, linecard_index);
+ if (linecard)
+ refcount_inc(&linecard->refcount);
+ mutex_unlock(&devlink->linecards_lock);
+ if (!linecard)
+ return ERR_PTR(-ENODEV);
+ return linecard;
+ }
+ return ERR_PTR(-EINVAL);
+}
+
+static struct devlink_linecard *
+devlink_linecard_get_from_info(struct devlink *devlink, struct genl_info *info)
+{
+ return devlink_linecard_get_from_attrs(devlink, info->attrs);
+}
+
+static void devlink_linecard_put(struct devlink_linecard *linecard)
+{
+ if (refcount_dec_and_test(&linecard->refcount)) {
+ mutex_destroy(&linecard->state_lock);
+ kfree(linecard);
+ }
+}
+
struct devlink_sb {
struct list_head list;
unsigned int index;
@@ -617,16 +688,18 @@ devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id)
#define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1)
#define DEVLINK_NL_FLAG_NEED_RATE BIT(2)
#define DEVLINK_NL_FLAG_NEED_RATE_NODE BIT(3)
+#define DEVLINK_NL_FLAG_NEED_LINECARD BIT(4)
/* The per devlink instance lock is taken by default in the pre-doit
* operation, yet several commands do not require this. The global
* devlink lock is taken and protects from disruption by user-calls.
*/
-#define DEVLINK_NL_FLAG_NO_LOCK BIT(4)
+#define DEVLINK_NL_FLAG_NO_LOCK BIT(5)
static int devlink_nl_pre_doit(const struct genl_ops *ops,
struct sk_buff *skb, struct genl_info *info)
{
+ struct devlink_linecard *linecard;
struct devlink_port *devlink_port;
struct devlink *devlink;
int err;
@@ -669,6 +742,13 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops,
goto unlock;
}
info->user_ptr[1] = rate_node;
+ } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_LINECARD) {
+ linecard = devlink_linecard_get_from_info(devlink, info);
+ if (IS_ERR(linecard)) {
+ err = PTR_ERR(linecard);
+ goto unlock;
+ }
+ info->user_ptr[1] = linecard;
}
return 0;
@@ -683,9 +763,14 @@ unlock:
static void devlink_nl_post_doit(const struct genl_ops *ops,
struct sk_buff *skb, struct genl_info *info)
{
+ struct devlink_linecard *linecard;
struct devlink *devlink;
devlink = info->user_ptr[0];
+ if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_LINECARD) {
+ linecard = info->user_ptr[1];
+ devlink_linecard_put(linecard);
+ }
if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
mutex_unlock(&devlink->lock);
devlink_put(devlink);
@@ -1158,6 +1243,10 @@ static int devlink_nl_port_fill(struct sk_buff *msg,
goto nla_put_failure;
if (devlink_nl_port_function_attrs_put(msg, devlink_port, extack))
goto nla_put_failure;
+ if (devlink_port->linecard &&
+ nla_put_u32(msg, DEVLINK_ATTR_LINECARD_INDEX,
+ devlink_port->linecard->index))
+ goto nla_put_failure;
genlmsg_end(msg, hdr);
return 0;
@@ -1964,6 +2053,322 @@ static int devlink_nl_cmd_rate_del_doit(struct sk_buff *skb,
return err;
}
+struct devlink_linecard_type {
+ const char *type;
+ const void *priv;
+};
+
+static int devlink_nl_linecard_fill(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct devlink_linecard *linecard,
+ enum devlink_command cmd, u32 portid,
+ u32 seq, int flags,
+ struct netlink_ext_ack *extack)
+{
+ struct devlink_linecard_type *linecard_type;
+ struct nlattr *attr;
+ void *hdr;
+ int i;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+ if (nla_put_u32(msg, DEVLINK_ATTR_LINECARD_INDEX, linecard->index))
+ goto nla_put_failure;
+ if (nla_put_u8(msg, DEVLINK_ATTR_LINECARD_STATE, linecard->state))
+ goto nla_put_failure;
+ if (linecard->type &&
+ nla_put_string(msg, DEVLINK_ATTR_LINECARD_TYPE, linecard->type))
+ goto nla_put_failure;
+
+ if (linecard->types_count) {
+ attr = nla_nest_start(msg,
+ DEVLINK_ATTR_LINECARD_SUPPORTED_TYPES);
+ if (!attr)
+ goto nla_put_failure;
+ for (i = 0; i < linecard->types_count; i++) {
+ linecard_type = &linecard->types[i];
+ if (nla_put_string(msg, DEVLINK_ATTR_LINECARD_TYPE,
+ linecard_type->type)) {
+ nla_nest_cancel(msg, attr);
+ goto nla_put_failure;
+ }
+ }
+ nla_nest_end(msg, attr);
+ }
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static void devlink_linecard_notify(struct devlink_linecard *linecard,
+ enum devlink_command cmd)
+{
+ struct devlink *devlink = linecard->devlink;
+ struct sk_buff *msg;
+ int err;
+
+ WARN_ON(cmd != DEVLINK_CMD_LINECARD_NEW &&
+ cmd != DEVLINK_CMD_LINECARD_DEL);
+
+ if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ return;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ err = devlink_nl_linecard_fill(msg, devlink, linecard, cmd, 0, 0, 0,
+ NULL);
+ if (err) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+ msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+static int devlink_nl_cmd_linecard_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink_linecard *linecard = info->user_ptr[1];
+ struct devlink *devlink = linecard->devlink;
+ struct sk_buff *msg;
+ int err;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ mutex_lock(&linecard->state_lock);
+ err = devlink_nl_linecard_fill(msg, devlink, linecard,
+ DEVLINK_CMD_LINECARD_NEW,
+ info->snd_portid, info->snd_seq, 0,
+ info->extack);
+ mutex_unlock(&linecard->state_lock);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static int devlink_nl_cmd_linecard_get_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb)
+{
+ struct devlink_linecard *linecard;
+ struct devlink *devlink;
+ int start = cb->args[0];
+ unsigned long index;
+ int idx = 0;
+ int err;
+
+ mutex_lock(&devlink_mutex);
+ xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+ if (!devlink_try_get(devlink))
+ continue;
+
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ goto retry;
+
+ mutex_lock(&devlink->linecards_lock);
+ list_for_each_entry(linecard, &devlink->linecard_list, list) {
+ if (idx < start) {
+ idx++;
+ continue;
+ }
+ mutex_lock(&linecard->state_lock);
+ err = devlink_nl_linecard_fill(msg, devlink, linecard,
+ DEVLINK_CMD_LINECARD_NEW,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI,
+ cb->extack);
+ mutex_unlock(&linecard->state_lock);
+ if (err) {
+ mutex_unlock(&devlink->linecards_lock);
+ devlink_put(devlink);
+ goto out;
+ }
+ idx++;
+ }
+ mutex_unlock(&devlink->linecards_lock);
+retry:
+ devlink_put(devlink);
+ }
+out:
+ mutex_unlock(&devlink_mutex);
+
+ cb->args[0] = idx;
+ return msg->len;
+}
+
+static struct devlink_linecard_type *
+devlink_linecard_type_lookup(struct devlink_linecard *linecard,
+ const char *type)
+{
+ struct devlink_linecard_type *linecard_type;
+ int i;
+
+ for (i = 0; i < linecard->types_count; i++) {
+ linecard_type = &linecard->types[i];
+ if (!strcmp(type, linecard_type->type))
+ return linecard_type;
+ }
+ return NULL;
+}
+
+static int devlink_linecard_type_set(struct devlink_linecard *linecard,
+ const char *type,
+ struct netlink_ext_ack *extack)
+{
+ const struct devlink_linecard_ops *ops = linecard->ops;
+ struct devlink_linecard_type *linecard_type;
+ int err;
+
+ mutex_lock(&linecard->state_lock);
+ if (linecard->state == DEVLINK_LINECARD_STATE_PROVISIONING) {
+ NL_SET_ERR_MSG_MOD(extack, "Line card is currently being provisioned");
+ err = -EBUSY;
+ goto out;
+ }
+ if (linecard->state == DEVLINK_LINECARD_STATE_UNPROVISIONING) {
+ NL_SET_ERR_MSG_MOD(extack, "Line card is currently being unprovisioned");
+ err = -EBUSY;
+ goto out;
+ }
+
+ linecard_type = devlink_linecard_type_lookup(linecard, type);
+ if (!linecard_type) {
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported line card type provided");
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (linecard->state != DEVLINK_LINECARD_STATE_UNPROVISIONED &&
+ linecard->state != DEVLINK_LINECARD_STATE_PROVISIONING_FAILED) {
+ NL_SET_ERR_MSG_MOD(extack, "Line card already provisioned");
+ err = -EBUSY;
+ /* Check if the line card is provisioned in the same
+ * way the user asks. In case it is, make the operation
+ * to return success.
+ */
+ if (ops->same_provision &&
+ ops->same_provision(linecard, linecard->priv,
+ linecard_type->type,
+ linecard_type->priv))
+ err = 0;
+ goto out;
+ }
+
+ linecard->state = DEVLINK_LINECARD_STATE_PROVISIONING;
+ linecard->type = linecard_type->type;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ mutex_unlock(&linecard->state_lock);
+ err = ops->provision(linecard, linecard->priv, linecard_type->type,
+ linecard_type->priv, extack);
+ if (err) {
+ /* Provisioning failed. Assume the linecard is unprovisioned
+ * for future operations.
+ */
+ mutex_lock(&linecard->state_lock);
+ linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
+ linecard->type = NULL;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ mutex_unlock(&linecard->state_lock);
+ }
+ return err;
+
+out:
+ mutex_unlock(&linecard->state_lock);
+ return err;
+}
+
+static int devlink_linecard_type_unset(struct devlink_linecard *linecard,
+ struct netlink_ext_ack *extack)
+{
+ int err;
+
+ mutex_lock(&linecard->state_lock);
+ if (linecard->state == DEVLINK_LINECARD_STATE_PROVISIONING) {
+ NL_SET_ERR_MSG_MOD(extack, "Line card is currently being provisioned");
+ err = -EBUSY;
+ goto out;
+ }
+ if (linecard->state == DEVLINK_LINECARD_STATE_UNPROVISIONING) {
+ NL_SET_ERR_MSG_MOD(extack, "Line card is currently being unprovisioned");
+ err = -EBUSY;
+ goto out;
+ }
+ if (linecard->state == DEVLINK_LINECARD_STATE_PROVISIONING_FAILED) {
+ linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
+ linecard->type = NULL;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ err = 0;
+ goto out;
+ }
+
+ if (linecard->state == DEVLINK_LINECARD_STATE_UNPROVISIONED) {
+ NL_SET_ERR_MSG_MOD(extack, "Line card is not provisioned");
+ err = 0;
+ goto out;
+ }
+ linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONING;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ mutex_unlock(&linecard->state_lock);
+ err = linecard->ops->unprovision(linecard, linecard->priv,
+ extack);
+ if (err) {
+ /* Unprovisioning failed. Assume the linecard is unprovisioned
+ * for future operations.
+ */
+ mutex_lock(&linecard->state_lock);
+ linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
+ linecard->type = NULL;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ mutex_unlock(&linecard->state_lock);
+ }
+ return err;
+
+out:
+ mutex_unlock(&linecard->state_lock);
+ return err;
+}
+
+static int devlink_nl_cmd_linecard_set_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink_linecard *linecard = info->user_ptr[1];
+ struct netlink_ext_ack *extack = info->extack;
+ int err;
+
+ if (info->attrs[DEVLINK_ATTR_LINECARD_TYPE]) {
+ const char *type;
+
+ type = nla_data(info->attrs[DEVLINK_ATTR_LINECARD_TYPE]);
+ if (strcmp(type, "")) {
+ err = devlink_linecard_type_set(linecard, type, extack);
+ if (err)
+ return err;
+ } else {
+ err = devlink_linecard_type_unset(linecard, extack);
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
static int devlink_nl_sb_fill(struct sk_buff *msg, struct devlink *devlink,
struct devlink_sb *devlink_sb,
enum devlink_command cmd, u32 portid,
@@ -8589,6 +8994,8 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_RATE_TX_MAX] = { .type = NLA_U64 },
[DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING },
[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_LINECARD_INDEX] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_LINECARD_TYPE] = { .type = NLA_NUL_STRING },
};
static const struct genl_small_ops devlink_nl_ops[] = {
@@ -8665,6 +9072,19 @@ static const struct genl_small_ops devlink_nl_ops[] = {
.internal_flags = DEVLINK_NL_FLAG_NO_LOCK,
},
{
+ .cmd = DEVLINK_CMD_LINECARD_GET,
+ .doit = devlink_nl_cmd_linecard_get_doit,
+ .dumpit = devlink_nl_cmd_linecard_get_dumpit,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_LINECARD,
+ /* can be retrieved by unprivileged users */
+ },
+ {
+ .cmd = DEVLINK_CMD_LINECARD_SET,
+ .doit = devlink_nl_cmd_linecard_set_doit,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_LINECARD,
+ },
+ {
.cmd = DEVLINK_CMD_SB_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_sb_get_doit,
@@ -9043,6 +9463,7 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
write_pnet(&devlink->_net, net);
INIT_LIST_HEAD(&devlink->port_list);
INIT_LIST_HEAD(&devlink->rate_list);
+ INIT_LIST_HEAD(&devlink->linecard_list);
INIT_LIST_HEAD(&devlink->sb_list);
INIT_LIST_HEAD_RCU(&devlink->dpipe_table_list);
INIT_LIST_HEAD(&devlink->resource_list);
@@ -9054,6 +9475,7 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
INIT_LIST_HEAD(&devlink->trap_policer_list);
mutex_init(&devlink->lock);
mutex_init(&devlink->reporters_lock);
+ mutex_init(&devlink->linecards_lock);
refcount_set(&devlink->refcount, 1);
init_completion(&devlink->comp);
@@ -9080,10 +9502,14 @@ static void devlink_notify_register(struct devlink *devlink)
struct devlink_param_item *param_item;
struct devlink_trap_item *trap_item;
struct devlink_port *devlink_port;
+ struct devlink_linecard *linecard;
struct devlink_rate *rate_node;
struct devlink_region *region;
devlink_notify(devlink, DEVLINK_CMD_NEW);
+ list_for_each_entry(linecard, &devlink->linecard_list, list)
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+
list_for_each_entry(devlink_port, &devlink->port_list, list)
devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
@@ -9191,6 +9617,7 @@ void devlink_free(struct devlink *devlink)
{
ASSERT_DEVLINK_NOT_REGISTERED(devlink);
+ mutex_destroy(&devlink->linecards_lock);
mutex_destroy(&devlink->reporters_lock);
mutex_destroy(&devlink->lock);
WARN_ON(!list_empty(&devlink->trap_policer_list));
@@ -9203,6 +9630,7 @@ void devlink_free(struct devlink *devlink)
WARN_ON(!list_empty(&devlink->dpipe_table_list));
WARN_ON(!list_empty(&devlink->sb_list));
WARN_ON(!list_empty(&devlink->rate_list));
+ WARN_ON(!list_empty(&devlink->linecard_list));
WARN_ON(!list_empty(&devlink->port_list));
xa_destroy(&devlink->snapshot_ids);
@@ -9681,6 +10109,21 @@ void devlink_rate_nodes_destroy(struct devlink *devlink)
}
EXPORT_SYMBOL_GPL(devlink_rate_nodes_destroy);
+/**
+ * devlink_port_linecard_set - Link port with a linecard
+ *
+ * @devlink_port: devlink port
+ * @linecard: devlink linecard
+ */
+void devlink_port_linecard_set(struct devlink_port *devlink_port,
+ struct devlink_linecard *linecard)
+{
+ if (WARN_ON(devlink_port->devlink))
+ return;
+ devlink_port->linecard = linecard;
+}
+EXPORT_SYMBOL_GPL(devlink_port_linecard_set);
+
static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
char *name, size_t len)
{
@@ -9692,7 +10135,12 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
switch (attrs->flavour) {
case DEVLINK_PORT_FLAVOUR_PHYSICAL:
- n = snprintf(name, len, "p%u", attrs->phys.port_number);
+ if (devlink_port->linecard)
+ n = snprintf(name, len, "l%u",
+ devlink_port->linecard->index);
+ if (n < len)
+ n += snprintf(name + n, len - n, "p%u",
+ attrs->phys.port_number);
if (n < len && attrs->split)
n += snprintf(name + n, len - n, "s%u",
attrs->phys.split_subport_number);
@@ -9747,6 +10195,207 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
return 0;
}
+static int devlink_linecard_types_init(struct devlink_linecard *linecard)
+{
+ struct devlink_linecard_type *linecard_type;
+ unsigned int count;
+ int i;
+
+ count = linecard->ops->types_count(linecard, linecard->priv);
+ linecard->types = kmalloc_array(count, sizeof(*linecard_type),
+ GFP_KERNEL);
+ if (!linecard->types)
+ return -ENOMEM;
+ linecard->types_count = count;
+
+ for (i = 0; i < count; i++) {
+ linecard_type = &linecard->types[i];
+ linecard->ops->types_get(linecard, linecard->priv, i,
+ &linecard_type->type,
+ &linecard_type->priv);
+ }
+ return 0;
+}
+
+static void devlink_linecard_types_fini(struct devlink_linecard *linecard)
+{
+ kfree(linecard->types);
+}
+
+/**
+ * devlink_linecard_create - Create devlink linecard
+ *
+ * @devlink: devlink
+ * @linecard_index: driver-specific numerical identifier of the linecard
+ * @ops: linecards ops
+ * @priv: user priv pointer
+ *
+ * Create devlink linecard instance with provided linecard index.
+ * Caller can use any indexing, even hw-related one.
+ *
+ * Return: Line card structure or an ERR_PTR() encoded error code.
+ */
+struct devlink_linecard *
+devlink_linecard_create(struct devlink *devlink, unsigned int linecard_index,
+ const struct devlink_linecard_ops *ops, void *priv)
+{
+ struct devlink_linecard *linecard;
+ int err;
+
+ if (WARN_ON(!ops || !ops->provision || !ops->unprovision ||
+ !ops->types_count || !ops->types_get))
+ return ERR_PTR(-EINVAL);
+
+ mutex_lock(&devlink->linecards_lock);
+ if (devlink_linecard_index_exists(devlink, linecard_index)) {
+ mutex_unlock(&devlink->linecards_lock);
+ return ERR_PTR(-EEXIST);
+ }
+
+ linecard = kzalloc(sizeof(*linecard), GFP_KERNEL);
+ if (!linecard) {
+ mutex_unlock(&devlink->linecards_lock);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ linecard->devlink = devlink;
+ linecard->index = linecard_index;
+ linecard->ops = ops;
+ linecard->priv = priv;
+ linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
+ mutex_init(&linecard->state_lock);
+
+ err = devlink_linecard_types_init(linecard);
+ if (err) {
+ mutex_destroy(&linecard->state_lock);
+ kfree(linecard);
+ mutex_unlock(&devlink->linecards_lock);
+ return ERR_PTR(err);
+ }
+
+ list_add_tail(&linecard->list, &devlink->linecard_list);
+ refcount_set(&linecard->refcount, 1);
+ mutex_unlock(&devlink->linecards_lock);
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ return linecard;
+}
+EXPORT_SYMBOL_GPL(devlink_linecard_create);
+
+/**
+ * devlink_linecard_destroy - Destroy devlink linecard
+ *
+ * @linecard: devlink linecard
+ */
+void devlink_linecard_destroy(struct devlink_linecard *linecard)
+{
+ struct devlink *devlink = linecard->devlink;
+
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_DEL);
+ mutex_lock(&devlink->linecards_lock);
+ list_del(&linecard->list);
+ devlink_linecard_types_fini(linecard);
+ mutex_unlock(&devlink->linecards_lock);
+ devlink_linecard_put(linecard);
+}
+EXPORT_SYMBOL_GPL(devlink_linecard_destroy);
+
+/**
+ * devlink_linecard_provision_set - Set provisioning on linecard
+ *
+ * @linecard: devlink linecard
+ * @type: linecard type
+ *
+ * This is either called directly from the provision() op call or
+ * as a result of the provision() op call asynchronously.
+ */
+void devlink_linecard_provision_set(struct devlink_linecard *linecard,
+ const char *type)
+{
+ mutex_lock(&linecard->state_lock);
+ WARN_ON(linecard->type && strcmp(linecard->type, type));
+ linecard->state = DEVLINK_LINECARD_STATE_PROVISIONED;
+ linecard->type = type;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ mutex_unlock(&linecard->state_lock);
+}
+EXPORT_SYMBOL_GPL(devlink_linecard_provision_set);
+
+/**
+ * devlink_linecard_provision_clear - Clear provisioning on linecard
+ *
+ * @linecard: devlink linecard
+ *
+ * This is either called directly from the unprovision() op call or
+ * as a result of the unprovision() op call asynchronously.
+ */
+void devlink_linecard_provision_clear(struct devlink_linecard *linecard)
+{
+ mutex_lock(&linecard->state_lock);
+ linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
+ linecard->type = NULL;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ mutex_unlock(&linecard->state_lock);
+}
+EXPORT_SYMBOL_GPL(devlink_linecard_provision_clear);
+
+/**
+ * devlink_linecard_provision_fail - Fail provisioning on linecard
+ *
+ * @linecard: devlink linecard
+ *
+ * This is either called directly from the provision() op call or
+ * as a result of the provision() op call asynchronously.
+ */
+void devlink_linecard_provision_fail(struct devlink_linecard *linecard)
+{
+ mutex_lock(&linecard->state_lock);
+ linecard->state = DEVLINK_LINECARD_STATE_PROVISIONING_FAILED;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ mutex_unlock(&linecard->state_lock);
+}
+EXPORT_SYMBOL_GPL(devlink_linecard_provision_fail);
+
+/**
+ * devlink_linecard_activate - Set linecard active
+ *
+ * @linecard: devlink linecard
+ */
+void devlink_linecard_activate(struct devlink_linecard *linecard)
+{
+ mutex_lock(&linecard->state_lock);
+ WARN_ON(linecard->state != DEVLINK_LINECARD_STATE_PROVISIONED);
+ linecard->state = DEVLINK_LINECARD_STATE_ACTIVE;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ mutex_unlock(&linecard->state_lock);
+}
+EXPORT_SYMBOL_GPL(devlink_linecard_activate);
+
+/**
+ * devlink_linecard_deactivate - Set linecard inactive
+ *
+ * @linecard: devlink linecard
+ */
+void devlink_linecard_deactivate(struct devlink_linecard *linecard)
+{
+ mutex_lock(&linecard->state_lock);
+ switch (linecard->state) {
+ case DEVLINK_LINECARD_STATE_ACTIVE:
+ linecard->state = DEVLINK_LINECARD_STATE_PROVISIONED;
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
+ break;
+ case DEVLINK_LINECARD_STATE_UNPROVISIONING:
+ /* Line card is being deactivated as part
+ * of unprovisioning flow.
+ */
+ break;
+ default:
+ WARN_ON(1);
+ break;
+ }
+ mutex_unlock(&linecard->state_lock);
+}
+EXPORT_SYMBOL_GPL(devlink_linecard_deactivate);
+
int devlink_sb_register(struct devlink *devlink, unsigned int sb_index,
u32 size, u16 ingress_pools_count,
u16 egress_pools_count, u16 ingress_tc_count,
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index b89e3e95bffc..41cac0e4834e 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -517,7 +517,7 @@ static void net_dm_packet_trace_kfree_skb_hit(void *ignore,
if (!nskb)
return;
- if ((unsigned int)reason >= SKB_DROP_REASON_MAX)
+ if (unlikely(reason >= SKB_DROP_REASON_MAX || reason <= 0))
reason = SKB_DROP_REASON_NOT_SPECIFIED;
cb = NET_DM_SKB_CB(nskb);
cb->reason = reason;
diff --git a/net/core/filter.c b/net/core/filter.c
index 64470a727ef7..5af58eb48587 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -78,6 +78,7 @@
#include <linux/btf_ids.h>
#include <net/tls.h>
#include <net/xdp.h>
+#include <net/mptcp.h>
static const struct bpf_func_proto *
bpf_sk_base_func_proto(enum bpf_func_id func_id);
@@ -1687,7 +1688,7 @@ BPF_CALL_5(bpf_skb_store_bytes, struct sk_buff *, skb, u32, offset,
if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH)))
return -EINVAL;
- if (unlikely(offset > 0xffff))
+ if (unlikely(offset > INT_MAX))
return -EFAULT;
if (unlikely(bpf_try_make_writable(skb, offset + len)))
return -EFAULT;
@@ -1722,7 +1723,7 @@ BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset,
{
void *ptr;
- if (unlikely(offset > 0xffff))
+ if (unlikely(offset > INT_MAX))
goto err_clear;
ptr = skb_header_pointer(skb, offset, len, to);
@@ -4498,6 +4499,7 @@ BPF_CALL_4(bpf_skb_get_tunnel_key, struct sk_buff *, skb, struct bpf_tunnel_key
if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
err = -EINVAL;
switch (size) {
+ case offsetof(struct bpf_tunnel_key, local_ipv6[0]):
case offsetof(struct bpf_tunnel_key, tunnel_label):
case offsetof(struct bpf_tunnel_key, tunnel_ext):
goto set_compat;
@@ -4523,10 +4525,14 @@ set_compat:
if (flags & BPF_F_TUNINFO_IPV6) {
memcpy(to->remote_ipv6, &info->key.u.ipv6.src,
sizeof(to->remote_ipv6));
+ memcpy(to->local_ipv6, &info->key.u.ipv6.dst,
+ sizeof(to->local_ipv6));
to->tunnel_label = be32_to_cpu(info->key.label);
} else {
to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src);
memset(&to->remote_ipv6[1], 0, sizeof(__u32) * 3);
+ to->local_ipv4 = be32_to_cpu(info->key.u.ipv4.dst);
+ memset(&to->local_ipv6[1], 0, sizeof(__u32) * 3);
to->tunnel_label = 0;
}
@@ -4597,6 +4603,7 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
return -EINVAL;
if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
switch (size) {
+ case offsetof(struct bpf_tunnel_key, local_ipv6[0]):
case offsetof(struct bpf_tunnel_key, tunnel_label):
case offsetof(struct bpf_tunnel_key, tunnel_ext):
case offsetof(struct bpf_tunnel_key, remote_ipv6[1]):
@@ -4639,10 +4646,13 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
info->mode |= IP_TUNNEL_INFO_IPV6;
memcpy(&info->key.u.ipv6.dst, from->remote_ipv6,
sizeof(from->remote_ipv6));
+ memcpy(&info->key.u.ipv6.src, from->local_ipv6,
+ sizeof(from->local_ipv6));
info->key.label = cpu_to_be32(from->tunnel_label) &
IPV6_FLOWLABEL_MASK;
} else {
info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4);
+ info->key.u.ipv4.src = cpu_to_be32(from->local_ipv4);
}
return 0;
@@ -5173,7 +5183,7 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
if (val <= 0 || tp->data_segs_out > tp->syn_data)
ret = -EINVAL;
else
- tp->snd_cwnd = val;
+ tcp_snd_cwnd_set(tp, val);
break;
case TCP_BPF_SNDCWND_CLAMP:
if (val <= 0) {
@@ -6621,7 +6631,7 @@ static const struct bpf_func_proto bpf_sk_release_proto = {
.func = bpf_sk_release,
.gpl_only = false,
.ret_type = RET_INTEGER,
- .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
+ .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON | OBJ_RELEASE,
};
BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx,
@@ -7099,7 +7109,7 @@ BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
*/
switch (((struct iphdr *)iph)->version) {
case 4:
- if (sk->sk_family == AF_INET6 && sk->sk_ipv6only)
+ if (sk->sk_family == AF_INET6 && ipv6_only_sock(sk))
return -EINVAL;
mss = tcp_v4_get_syncookie(sk, iph, th, &cookie);
@@ -11272,6 +11282,20 @@ const struct bpf_func_proto bpf_skc_to_unix_sock_proto = {
.ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_UNIX],
};
+BPF_CALL_1(bpf_skc_to_mptcp_sock, struct sock *, sk)
+{
+ BTF_TYPE_EMIT(struct mptcp_sock);
+ return (unsigned long)bpf_mptcp_sock_from_subflow(sk);
+}
+
+const struct bpf_func_proto bpf_skc_to_mptcp_sock_proto = {
+ .func = bpf_skc_to_mptcp_sock,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
+ .arg1_type = ARG_PTR_TO_SOCK_COMMON,
+ .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_MPTCP],
+};
+
BPF_CALL_1(bpf_sock_from_file, struct file *, file)
{
return (unsigned long)sock_from_file(file);
@@ -11314,6 +11338,9 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id)
case BPF_FUNC_skc_to_unix_sock:
func = &bpf_skc_to_unix_sock_proto;
break;
+ case BPF_FUNC_skc_to_mptcp_sock:
+ func = &bpf_skc_to_mptcp_sock_proto;
+ break;
case BPF_FUNC_ktime_get_coarse_ns:
return &bpf_ktime_get_coarse_ns_proto;
default:
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 6f7ec72016dc..6aee04f75e3e 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -1035,6 +1035,16 @@ bool __skb_flow_dissect(const struct net *net,
memcpy(key_eth_addrs, eth, sizeof(*key_eth_addrs));
}
+ if (dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_NUM_OF_VLANS)) {
+ struct flow_dissector_key_num_of_vlans *key_num_of_vlans;
+
+ key_num_of_vlans = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_NUM_OF_VLANS,
+ target_container);
+ key_num_of_vlans->num_of_vlans = 0;
+ }
+
proto_again:
fdret = FLOW_DISSECT_RET_CONTINUE;
@@ -1158,6 +1168,16 @@ proto_again:
nhoff += sizeof(*vlan);
}
+ if (dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_NUM_OF_VLANS)) {
+ struct flow_dissector_key_num_of_vlans *key_nvs;
+
+ key_nvs = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_NUM_OF_VLANS,
+ target_container);
+ key_nvs->num_of_vlans++;
+ }
+
if (dissector_vlan == FLOW_DISSECTOR_KEY_MAX) {
dissector_vlan = FLOW_DISSECTOR_KEY_VLAN;
} else if (dissector_vlan == FLOW_DISSECTOR_KEY_VLAN) {
diff --git a/net/core/gro.c b/net/core/gro.c
index 78110edf5d4b..b4190eb08467 100644
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -167,6 +167,14 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
if (unlikely(p->len + len >= gro_max_size || NAPI_GRO_CB(skb)->flush))
return -E2BIG;
+ if (unlikely(p->len + len >= GRO_LEGACY_MAX_SIZE)) {
+ if (p->protocol != htons(ETH_P_IPV6) ||
+ skb_headroom(p) < sizeof(struct hop_jumbo_hdr) ||
+ ipv6_hdr(p)->nexthdr != IPPROTO_TCP ||
+ p->encapsulation)
+ return -E2BIG;
+ }
+
lp = NAPI_GRO_CB(p)->last;
pinfo = skb_shinfo(lp);
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 95098d1a49bd..a244d3bade7d 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -18,6 +18,7 @@
#include <linux/bitops.h>
#include <linux/types.h>
+#include "dev.h"
enum lw_bits {
LW_URGENT = 0,
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index f64ebd050f6c..47b6c1f0fdbb 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -3728,7 +3728,7 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
char *p_name;
- t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
+ t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL_ACCOUNT);
if (!t)
goto err;
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index 88cc0ad7d386..1ec23bf8b05c 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -4,6 +4,8 @@
#include <linux/seq_file.h>
#include <net/wext.h>
+#include "dev.h"
+
#define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1)
#define get_bucket(x) ((x) >> BUCKET_SPACE)
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 9cbc1c8289bc..e319e242dddf 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -24,6 +24,7 @@
#include <linux/of_net.h>
#include <linux/cpu.h>
+#include "dev.h"
#include "net-sysfs.h"
#ifdef CONFIG_SYSFS
@@ -745,7 +746,6 @@ static const struct attribute_group netstat_group = {
.attrs = netstat_attrs,
};
-#if IS_ENABLED(CONFIG_WIRELESS_EXT) || IS_ENABLED(CONFIG_CFG80211)
static struct attribute *wireless_attrs[] = {
NULL
};
@@ -754,7 +754,19 @@ static const struct attribute_group wireless_group = {
.name = "wireless",
.attrs = wireless_attrs,
};
+
+static bool wireless_group_needed(struct net_device *ndev)
+{
+#if IS_ENABLED(CONFIG_CFG80211)
+ if (ndev->ieee80211_ptr)
+ return true;
#endif
+#if IS_ENABLED(CONFIG_WIRELESS_EXT)
+ if (ndev->wireless_handlers)
+ return true;
+#endif
+ return false;
+}
#else /* CONFIG_SYSFS */
#define net_class_groups NULL
@@ -1995,14 +2007,8 @@ int netdev_register_kobject(struct net_device *ndev)
*groups++ = &netstat_group;
-#if IS_ENABLED(CONFIG_WIRELESS_EXT) || IS_ENABLED(CONFIG_CFG80211)
- if (ndev->ieee80211_ptr)
- *groups++ = &wireless_group;
-#if IS_ENABLED(CONFIG_WIRELESS_EXT)
- else if (ndev->wireless_handlers)
+ if (wireless_group_needed(ndev))
*groups++ = &wireless_group;
-#endif
-#endif
#endif /* CONFIG_SYSFS */
error = device_add(dev);
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 1943c0f0307d..f18e6e771993 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -18,6 +18,7 @@
#include <linux/page-flags.h>
#include <linux/mm.h> /* for __put_page() */
#include <linux/poison.h>
+#include <linux/ethtool.h>
#include <trace/events/page_pool.h>
@@ -36,6 +37,26 @@
this_cpu_inc(s->__stat); \
} while (0)
+#define recycle_stat_add(pool, __stat, val) \
+ do { \
+ struct page_pool_recycle_stats __percpu *s = pool->recycle_stats; \
+ this_cpu_add(s->__stat, val); \
+ } while (0)
+
+static const char pp_stats[][ETH_GSTRING_LEN] = {
+ "rx_pp_alloc_fast",
+ "rx_pp_alloc_slow",
+ "rx_pp_alloc_slow_ho",
+ "rx_pp_alloc_empty",
+ "rx_pp_alloc_refill",
+ "rx_pp_alloc_waive",
+ "rx_pp_recycle_cached",
+ "rx_pp_recycle_cache_full",
+ "rx_pp_recycle_ring",
+ "rx_pp_recycle_ring_full",
+ "rx_pp_recycle_released_ref",
+};
+
bool page_pool_get_stats(struct page_pool *pool,
struct page_pool_stats *stats)
{
@@ -44,7 +65,13 @@ bool page_pool_get_stats(struct page_pool *pool,
if (!stats)
return false;
- memcpy(&stats->alloc_stats, &pool->alloc_stats, sizeof(pool->alloc_stats));
+ /* The caller is responsible to initialize stats. */
+ stats->alloc_stats.fast += pool->alloc_stats.fast;
+ stats->alloc_stats.slow += pool->alloc_stats.slow;
+ stats->alloc_stats.slow_high_order += pool->alloc_stats.slow_high_order;
+ stats->alloc_stats.empty += pool->alloc_stats.empty;
+ stats->alloc_stats.refill += pool->alloc_stats.refill;
+ stats->alloc_stats.waive += pool->alloc_stats.waive;
for_each_possible_cpu(cpu) {
const struct page_pool_recycle_stats *pcpu =
@@ -60,9 +87,50 @@ bool page_pool_get_stats(struct page_pool *pool,
return true;
}
EXPORT_SYMBOL(page_pool_get_stats);
+
+u8 *page_pool_ethtool_stats_get_strings(u8 *data)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(pp_stats); i++) {
+ memcpy(data, pp_stats[i], ETH_GSTRING_LEN);
+ data += ETH_GSTRING_LEN;
+ }
+
+ return data;
+}
+EXPORT_SYMBOL(page_pool_ethtool_stats_get_strings);
+
+int page_pool_ethtool_stats_get_count(void)
+{
+ return ARRAY_SIZE(pp_stats);
+}
+EXPORT_SYMBOL(page_pool_ethtool_stats_get_count);
+
+u64 *page_pool_ethtool_stats_get(u64 *data, void *stats)
+{
+ struct page_pool_stats *pool_stats = stats;
+
+ *data++ = pool_stats->alloc_stats.fast;
+ *data++ = pool_stats->alloc_stats.slow;
+ *data++ = pool_stats->alloc_stats.slow_high_order;
+ *data++ = pool_stats->alloc_stats.empty;
+ *data++ = pool_stats->alloc_stats.refill;
+ *data++ = pool_stats->alloc_stats.waive;
+ *data++ = pool_stats->recycle_stats.cached;
+ *data++ = pool_stats->recycle_stats.cache_full;
+ *data++ = pool_stats->recycle_stats.ring;
+ *data++ = pool_stats->recycle_stats.ring_full;
+ *data++ = pool_stats->recycle_stats.released_refcnt;
+
+ return data;
+}
+EXPORT_SYMBOL(page_pool_ethtool_stats_get);
+
#else
#define alloc_stat_inc(pool, __stat)
#define recycle_stat_inc(pool, __stat)
+#define recycle_stat_add(pool, __stat, val)
#endif
static int page_pool_init(struct page_pool *pool,
@@ -566,9 +634,13 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
/* Bulk producer into ptr_ring page_pool cache */
page_pool_ring_lock(pool);
for (i = 0; i < bulk_len; i++) {
- if (__ptr_ring_produce(&pool->ring, data[i]))
- break; /* ring full */
+ if (__ptr_ring_produce(&pool->ring, data[i])) {
+ /* ring full */
+ recycle_stat_inc(pool, ring_full);
+ break;
+ }
}
+ recycle_stat_add(pool, ring, i);
page_pool_ring_unlock(pool);
/* Hopefully all pages was return into ptr_ring */
@@ -632,8 +704,10 @@ struct page *page_pool_alloc_frag(struct page_pool *pool,
if (page && *offset + size > max_size) {
page = page_pool_drain_frag(pool, page);
- if (page)
+ if (page) {
+ alloc_stat_inc(pool, fast);
goto frag_reset;
+ }
}
if (!page) {
@@ -655,6 +729,7 @@ frag_reset:
pool->frag_users++;
pool->frag_offset = *offset + size;
+ alloc_stat_inc(pool, fast);
return page;
}
EXPORT_SYMBOL(page_pool_alloc_frag);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d1381ea6d52e..ac45328607f7 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -54,6 +54,8 @@
#include <net/rtnetlink.h>
#include <net/net_namespace.h>
+#include "dev.h"
+
#define RTNL_MAX_TYPE 50
#define RTNL_SLAVE_MAX_TYPE 40
@@ -95,6 +97,39 @@ void __rtnl_unlock(void)
defer_kfree_skb_list = NULL;
+ /* Ensure that we didn't actually add any TODO item when __rtnl_unlock()
+ * is used. In some places, e.g. in cfg80211, we have code that will do
+ * something like
+ * rtnl_lock()
+ * wiphy_lock()
+ * ...
+ * rtnl_unlock()
+ *
+ * and because netdev_run_todo() acquires the RTNL for items on the list
+ * we could cause a situation such as this:
+ * Thread 1 Thread 2
+ * rtnl_lock()
+ * unregister_netdevice()
+ * __rtnl_unlock()
+ * rtnl_lock()
+ * wiphy_lock()
+ * rtnl_unlock()
+ * netdev_run_todo()
+ * __rtnl_unlock()
+ *
+ * // list not empty now
+ * // because of thread 2
+ * rtnl_lock()
+ * while (!list_empty(...))
+ * rtnl_lock()
+ * wiphy_lock()
+ * **** DEADLOCK ****
+ *
+ * However, usage of __rtnl_unlock() is rare, and so we can ensure that
+ * it's not used in cases where something is added to do the list.
+ */
+ WARN_ON(!list_empty(&net_todo_list));
+
mutex_unlock(&rtnl_mutex);
while (head) {
@@ -214,6 +249,8 @@ static int rtnl_register_internal(struct module *owner,
if (dumpit)
link->dumpit = dumpit;
+ WARN_ON(rtnl_msgtype_kind(msgtype) != RTNL_KIND_DEL &&
+ (flags & RTNL_FLAG_BULK_DEL_SUPPORTED));
link->flags |= flags;
/* publish protocol:msgtype */
@@ -1027,6 +1064,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ nla_total_size(4) /* IFLA_GSO_MAX_SEGS */
+ nla_total_size(4) /* IFLA_GSO_MAX_SIZE */
+ nla_total_size(4) /* IFLA_GRO_MAX_SIZE */
+ + nla_total_size(4) /* IFLA_TSO_MAX_SIZE */
+ + nla_total_size(4) /* IFLA_TSO_MAX_SEGS */
+ nla_total_size(1) /* IFLA_OPERSTATE */
+ nla_total_size(1) /* IFLA_LINKMODE */
+ nla_total_size(4) /* IFLA_CARRIER_CHANGES */
@@ -1732,6 +1771,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
nla_put_u32(skb, IFLA_GSO_MAX_SEGS, dev->gso_max_segs) ||
nla_put_u32(skb, IFLA_GSO_MAX_SIZE, dev->gso_max_size) ||
nla_put_u32(skb, IFLA_GRO_MAX_SIZE, dev->gro_max_size) ||
+ nla_put_u32(skb, IFLA_TSO_MAX_SIZE, dev->tso_max_size) ||
+ nla_put_u32(skb, IFLA_TSO_MAX_SEGS, dev->tso_max_segs) ||
#ifdef CONFIG_RPS
nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) ||
#endif
@@ -1885,6 +1926,8 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_NEW_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 1),
[IFLA_PARENT_DEV_NAME] = { .type = NLA_NUL_STRING },
[IFLA_GRO_MAX_SIZE] = { .type = NLA_U32 },
+ [IFLA_TSO_MAX_SIZE] = { .type = NLA_REJECT },
+ [IFLA_TSO_MAX_SEGS] = { .type = NLA_REJECT },
};
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -2269,6 +2312,19 @@ invalid_attr:
return -EINVAL;
}
+static int rtnl_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate,
+ int max_tx_rate)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ if (!ops->ndo_set_vf_rate)
+ return -EOPNOTSUPP;
+ if (max_tx_rate && max_tx_rate < min_tx_rate)
+ return -EINVAL;
+
+ return ops->ndo_set_vf_rate(dev, vf, min_tx_rate, max_tx_rate);
+}
+
static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[],
struct netlink_ext_ack *extack)
{
@@ -2304,14 +2360,6 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[],
}
}
- if (tb[IFLA_GRO_MAX_SIZE]) {
- u32 gro_max_size = nla_get_u32(tb[IFLA_GRO_MAX_SIZE]);
-
- if (gro_max_size > GRO_MAX_SIZE) {
- NL_SET_ERR_MSG(extack, "too big gro_max_size");
- return -EINVAL;
- }
- }
return 0;
}
@@ -2406,11 +2454,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
if (err < 0)
return err;
- err = -EOPNOTSUPP;
- if (ops->ndo_set_vf_rate)
- err = ops->ndo_set_vf_rate(dev, ivt->vf,
- ivf.min_tx_rate,
- ivt->rate);
+ err = rtnl_set_vf_rate(dev, ivt->vf,
+ ivf.min_tx_rate, ivt->rate);
if (err < 0)
return err;
}
@@ -2420,11 +2465,9 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
if (ivt->vf >= INT_MAX)
return -EINVAL;
- err = -EOPNOTSUPP;
- if (ops->ndo_set_vf_rate)
- err = ops->ndo_set_vf_rate(dev, ivt->vf,
- ivt->min_tx_rate,
- ivt->max_tx_rate);
+
+ err = rtnl_set_vf_rate(dev, ivt->vf,
+ ivt->min_tx_rate, ivt->max_tx_rate);
if (err < 0)
return err;
}
@@ -2607,17 +2650,23 @@ static int do_set_proto_down(struct net_device *dev,
static int do_setlink(const struct sk_buff *skb,
struct net_device *dev, struct ifinfomsg *ifm,
struct netlink_ext_ack *extack,
- struct nlattr **tb, char *ifname, int status)
+ struct nlattr **tb, int status)
{
const struct net_device_ops *ops = dev->netdev_ops;
+ char ifname[IFNAMSIZ];
int err;
err = validate_linkmsg(dev, tb, extack);
if (err < 0)
return err;
+ if (tb[IFLA_IFNAME])
+ nla_strscpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
+ else
+ ifname[0] = '\0';
+
if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD] || tb[IFLA_TARGET_NETNSID]) {
- const char *pat = ifname && ifname[0] ? ifname : NULL;
+ const char *pat = ifname[0] ? ifname : NULL;
struct net *net;
int new_ifindex;
@@ -2760,7 +2809,7 @@ static int do_setlink(const struct sk_buff *skb,
if (tb[IFLA_GSO_MAX_SIZE]) {
u32 max_size = nla_get_u32(tb[IFLA_GSO_MAX_SIZE]);
- if (max_size > GSO_MAX_SIZE) {
+ if (max_size > dev->tso_max_size) {
err = -EINVAL;
goto errout;
}
@@ -2774,7 +2823,7 @@ static int do_setlink(const struct sk_buff *skb,
if (tb[IFLA_GSO_MAX_SEGS]) {
u32 max_segs = nla_get_u32(tb[IFLA_GSO_MAX_SEGS]);
- if (max_segs > GSO_MAX_SEGS) {
+ if (max_segs > GSO_MAX_SEGS || max_segs > dev->tso_max_segs) {
err = -EINVAL;
goto errout;
}
@@ -2973,21 +3022,16 @@ errout:
}
static struct net_device *rtnl_dev_get(struct net *net,
- struct nlattr *ifname_attr,
- struct nlattr *altifname_attr,
- char *ifname)
-{
- char buffer[ALTIFNAMSIZ];
-
- if (!ifname) {
- ifname = buffer;
- if (ifname_attr)
- nla_strscpy(ifname, ifname_attr, IFNAMSIZ);
- else if (altifname_attr)
- nla_strscpy(ifname, altifname_attr, ALTIFNAMSIZ);
- else
- return NULL;
- }
+ struct nlattr *tb[])
+{
+ char ifname[ALTIFNAMSIZ];
+
+ if (tb[IFLA_IFNAME])
+ nla_strscpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
+ else if (tb[IFLA_ALT_IFNAME])
+ nla_strscpy(ifname, tb[IFLA_ALT_IFNAME], ALTIFNAMSIZ);
+ else
+ return NULL;
return __dev_get_by_name(net, ifname);
}
@@ -3000,7 +3044,6 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct net_device *dev;
int err;
struct nlattr *tb[IFLA_MAX+1];
- char ifname[IFNAMSIZ];
err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFLA_MAX,
ifla_policy, extack);
@@ -3011,17 +3054,12 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0)
goto errout;
- if (tb[IFLA_IFNAME])
- nla_strscpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
- else
- ifname[0] = '\0';
-
err = -EINVAL;
ifm = nlmsg_data(nlh);
if (ifm->ifi_index > 0)
dev = __dev_get_by_index(net, ifm->ifi_index);
else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME])
- dev = rtnl_dev_get(net, NULL, tb[IFLA_ALT_IFNAME], ifname);
+ dev = rtnl_dev_get(net, tb);
else
goto errout;
@@ -3030,7 +3068,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
goto errout;
}
- err = do_setlink(skb, dev, ifm, extack, tb, ifname, 0);
+ err = do_setlink(skb, dev, ifm, extack, tb, 0);
errout:
return err;
}
@@ -3119,15 +3157,14 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
if (ifm->ifi_index > 0)
dev = __dev_get_by_index(tgt_net, ifm->ifi_index);
else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME])
- dev = rtnl_dev_get(net, tb[IFLA_IFNAME],
- tb[IFLA_ALT_IFNAME], NULL);
+ dev = rtnl_dev_get(net, tb);
else if (tb[IFLA_GROUP])
err = rtnl_group_dellink(tgt_net, nla_get_u32(tb[IFLA_GROUP]));
else
goto out;
if (!dev) {
- if (tb[IFLA_IFNAME] || ifm->ifi_index > 0)
+ if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME] || ifm->ifi_index > 0)
err = -ENODEV;
goto out;
@@ -3262,7 +3299,7 @@ static int rtnl_group_changelink(const struct sk_buff *skb,
for_each_netdev_safe(net, dev, aux) {
if (dev->group == group) {
- err = do_setlink(skb, dev, ifm, extack, tb, NULL, 0);
+ err = do_setlink(skb, dev, ifm, extack, tb, 0);
if (err < 0)
return err;
}
@@ -3271,24 +3308,118 @@ static int rtnl_group_changelink(const struct sk_buff *skb,
return 0;
}
-static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct nlattr **attr, struct netlink_ext_ack *extack)
+static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm,
+ const struct rtnl_link_ops *ops,
+ struct nlattr **tb, struct nlattr **data,
+ struct netlink_ext_ack *extack)
{
- struct nlattr *slave_attr[RTNL_SLAVE_MAX_TYPE + 1];
unsigned char name_assign_type = NET_NAME_USER;
+ struct net *net = sock_net(skb->sk);
+ struct net *dest_net, *link_net;
+ struct net_device *dev;
+ char ifname[IFNAMSIZ];
+ int err;
+
+ if (!ops->alloc && !ops->setup)
+ return -EOPNOTSUPP;
+
+ if (tb[IFLA_IFNAME]) {
+ nla_strscpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
+ } else {
+ snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind);
+ name_assign_type = NET_NAME_ENUM;
+ }
+
+ dest_net = rtnl_link_get_net_capable(skb, net, tb, CAP_NET_ADMIN);
+ if (IS_ERR(dest_net))
+ return PTR_ERR(dest_net);
+
+ if (tb[IFLA_LINK_NETNSID]) {
+ int id = nla_get_s32(tb[IFLA_LINK_NETNSID]);
+
+ link_net = get_net_ns_by_id(dest_net, id);
+ if (!link_net) {
+ NL_SET_ERR_MSG(extack, "Unknown network namespace id");
+ err = -EINVAL;
+ goto out;
+ }
+ err = -EPERM;
+ if (!netlink_ns_capable(skb, link_net->user_ns, CAP_NET_ADMIN))
+ goto out;
+ } else {
+ link_net = NULL;
+ }
+
+ dev = rtnl_create_link(link_net ? : dest_net, ifname,
+ name_assign_type, ops, tb, extack);
+ if (IS_ERR(dev)) {
+ err = PTR_ERR(dev);
+ goto out;
+ }
+
+ dev->ifindex = ifm->ifi_index;
+
+ if (ops->newlink)
+ err = ops->newlink(link_net ? : net, dev, tb, data, extack);
+ else
+ err = register_netdevice(dev);
+ if (err < 0) {
+ free_netdev(dev);
+ goto out;
+ }
+
+ err = rtnl_configure_link(dev, ifm);
+ if (err < 0)
+ goto out_unregister;
+ if (link_net) {
+ err = dev_change_net_namespace(dev, dest_net, ifname);
+ if (err < 0)
+ goto out_unregister;
+ }
+ if (tb[IFLA_MASTER]) {
+ err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]), extack);
+ if (err)
+ goto out_unregister;
+ }
+out:
+ if (link_net)
+ put_net(link_net);
+ put_net(dest_net);
+ return err;
+out_unregister:
+ if (ops->newlink) {
+ LIST_HEAD(list_kill);
+
+ ops->dellink(dev, &list_kill);
+ unregister_netdevice_many(&list_kill);
+ } else {
+ unregister_netdevice(dev);
+ }
+ goto out;
+}
+
+struct rtnl_newlink_tbs {
+ struct nlattr *tb[IFLA_MAX + 1];
+ struct nlattr *attr[RTNL_MAX_TYPE + 1];
+ struct nlattr *slave_attr[RTNL_SLAVE_MAX_TYPE + 1];
+};
+
+static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct rtnl_newlink_tbs *tbs,
+ struct netlink_ext_ack *extack)
+{
struct nlattr *linkinfo[IFLA_INFO_MAX + 1];
+ struct nlattr ** const tb = tbs->tb;
const struct rtnl_link_ops *m_ops;
struct net_device *master_dev;
struct net *net = sock_net(skb->sk);
const struct rtnl_link_ops *ops;
- struct nlattr *tb[IFLA_MAX + 1];
- struct net *dest_net, *link_net;
struct nlattr **slave_data;
char kind[MODULE_NAME_LEN];
struct net_device *dev;
struct ifinfomsg *ifm;
- char ifname[IFNAMSIZ];
struct nlattr **data;
+ bool link_specified;
int err;
#ifdef CONFIG_MODULES
@@ -3303,18 +3434,17 @@ replay:
if (err < 0)
return err;
- if (tb[IFLA_IFNAME])
- nla_strscpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
- else
- ifname[0] = '\0';
-
ifm = nlmsg_data(nlh);
- if (ifm->ifi_index > 0)
+ if (ifm->ifi_index > 0) {
+ link_specified = true;
dev = __dev_get_by_index(net, ifm->ifi_index);
- else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME])
- dev = rtnl_dev_get(net, NULL, tb[IFLA_ALT_IFNAME], ifname);
- else
+ } else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) {
+ link_specified = true;
+ dev = rtnl_dev_get(net, tb);
+ } else {
+ link_specified = false;
dev = NULL;
+ }
master_dev = NULL;
m_ops = NULL;
@@ -3351,12 +3481,12 @@ replay:
return -EINVAL;
if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) {
- err = nla_parse_nested_deprecated(attr, ops->maxtype,
+ err = nla_parse_nested_deprecated(tbs->attr, ops->maxtype,
linkinfo[IFLA_INFO_DATA],
ops->policy, extack);
if (err < 0)
return err;
- data = attr;
+ data = tbs->attr;
}
if (ops->validate) {
err = ops->validate(tb, data, extack);
@@ -3372,14 +3502,14 @@ replay:
if (m_ops->slave_maxtype &&
linkinfo[IFLA_INFO_SLAVE_DATA]) {
- err = nla_parse_nested_deprecated(slave_attr,
+ err = nla_parse_nested_deprecated(tbs->slave_attr,
m_ops->slave_maxtype,
linkinfo[IFLA_INFO_SLAVE_DATA],
m_ops->slave_policy,
extack);
if (err < 0)
return err;
- slave_data = slave_attr;
+ slave_data = tbs->slave_attr;
}
}
@@ -3413,11 +3543,16 @@ replay:
status |= DO_SETLINK_NOTIFY;
}
- return do_setlink(skb, dev, ifm, extack, tb, ifname, status);
+ return do_setlink(skb, dev, ifm, extack, tb, status);
}
if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
- if (ifm->ifi_index == 0 && tb[IFLA_GROUP])
+ /* No dev found and NLM_F_CREATE not set. Requested dev does not exist,
+ * or it's for a group
+ */
+ if (link_specified)
+ return -ENODEV;
+ if (tb[IFLA_GROUP])
return rtnl_group_changelink(skb, net,
nla_get_u32(tb[IFLA_GROUP]),
ifm, extack, tb);
@@ -3442,94 +3577,21 @@ replay:
return -EOPNOTSUPP;
}
- if (!ops->alloc && !ops->setup)
- return -EOPNOTSUPP;
-
- if (!ifname[0]) {
- snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind);
- name_assign_type = NET_NAME_ENUM;
- }
-
- dest_net = rtnl_link_get_net_capable(skb, net, tb, CAP_NET_ADMIN);
- if (IS_ERR(dest_net))
- return PTR_ERR(dest_net);
-
- if (tb[IFLA_LINK_NETNSID]) {
- int id = nla_get_s32(tb[IFLA_LINK_NETNSID]);
-
- link_net = get_net_ns_by_id(dest_net, id);
- if (!link_net) {
- NL_SET_ERR_MSG(extack, "Unknown network namespace id");
- err = -EINVAL;
- goto out;
- }
- err = -EPERM;
- if (!netlink_ns_capable(skb, link_net->user_ns, CAP_NET_ADMIN))
- goto out;
- } else {
- link_net = NULL;
- }
-
- dev = rtnl_create_link(link_net ? : dest_net, ifname,
- name_assign_type, ops, tb, extack);
- if (IS_ERR(dev)) {
- err = PTR_ERR(dev);
- goto out;
- }
-
- dev->ifindex = ifm->ifi_index;
-
- if (ops->newlink)
- err = ops->newlink(link_net ? : net, dev, tb, data, extack);
- else
- err = register_netdevice(dev);
- if (err < 0) {
- free_netdev(dev);
- goto out;
- }
-
- err = rtnl_configure_link(dev, ifm);
- if (err < 0)
- goto out_unregister;
- if (link_net) {
- err = dev_change_net_namespace(dev, dest_net, ifname);
- if (err < 0)
- goto out_unregister;
- }
- if (tb[IFLA_MASTER]) {
- err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]), extack);
- if (err)
- goto out_unregister;
- }
-out:
- if (link_net)
- put_net(link_net);
- put_net(dest_net);
- return err;
-out_unregister:
- if (ops->newlink) {
- LIST_HEAD(list_kill);
-
- ops->dellink(dev, &list_kill);
- unregister_netdevice_many(&list_kill);
- } else {
- unregister_netdevice(dev);
- }
- goto out;
+ return rtnl_newlink_create(skb, ifm, ops, tb, data, extack);
}
static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
- struct nlattr **attr;
+ struct rtnl_newlink_tbs *tbs;
int ret;
- attr = kmalloc_array(RTNL_MAX_TYPE + 1, sizeof(*attr), GFP_KERNEL);
- if (!attr)
+ tbs = kmalloc(sizeof(*tbs), GFP_KERNEL);
+ if (!tbs)
return -ENOMEM;
- ret = __rtnl_newlink(skb, nlh, attr, extack);
- kfree(attr);
+ ret = __rtnl_newlink(skb, nlh, tbs, extack);
+ kfree(tbs);
return ret;
}
@@ -3617,8 +3679,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
if (ifm->ifi_index > 0)
dev = __dev_get_by_index(tgt_net, ifm->ifi_index);
else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME])
- dev = rtnl_dev_get(tgt_net, tb[IFLA_IFNAME],
- tb[IFLA_ALT_IFNAME], NULL);
+ dev = rtnl_dev_get(tgt_net, tb);
else
goto out;
@@ -3713,8 +3774,7 @@ static int rtnl_linkprop(int cmd, struct sk_buff *skb, struct nlmsghdr *nlh,
if (ifm->ifi_index > 0)
dev = __dev_get_by_index(net, ifm->ifi_index);
else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME])
- dev = rtnl_dev_get(net, tb[IFLA_IFNAME],
- tb[IFLA_ALT_IFNAME], NULL);
+ dev = rtnl_dev_get(net, tb);
else
return -EINVAL;
@@ -4132,22 +4192,36 @@ int ndo_dflt_fdb_del(struct ndmsg *ndm,
}
EXPORT_SYMBOL(ndo_dflt_fdb_del);
+static const struct nla_policy fdb_del_bulk_policy[NDA_MAX + 1] = {
+ [NDA_VLAN] = { .type = NLA_U16 },
+ [NDA_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 1),
+ [NDA_NDM_STATE_MASK] = { .type = NLA_U16 },
+ [NDA_NDM_FLAGS_MASK] = { .type = NLA_U8 },
+};
+
static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
+ bool del_bulk = !!(nlh->nlmsg_flags & NLM_F_BULK);
struct net *net = sock_net(skb->sk);
+ const struct net_device_ops *ops;
struct ndmsg *ndm;
struct nlattr *tb[NDA_MAX+1];
struct net_device *dev;
- __u8 *addr;
+ __u8 *addr = NULL;
int err;
u16 vid;
if (!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
- err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX, NULL,
- extack);
+ if (!del_bulk) {
+ err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX,
+ NULL, extack);
+ } else {
+ err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX,
+ fdb_del_bulk_policy, extack);
+ }
if (err < 0)
return err;
@@ -4163,9 +4237,12 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
return -ENODEV;
}
- if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) {
- NL_SET_ERR_MSG(extack, "invalid address");
- return -EINVAL;
+ if (!del_bulk) {
+ if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) {
+ NL_SET_ERR_MSG(extack, "invalid address");
+ return -EINVAL;
+ }
+ addr = nla_data(tb[NDA_LLADDR]);
}
if (dev->type != ARPHRD_ETHER) {
@@ -4173,8 +4250,6 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
return -EINVAL;
}
- addr = nla_data(tb[NDA_LLADDR]);
-
err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack);
if (err)
return err;
@@ -4185,10 +4260,16 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) &&
netif_is_bridge_port(dev)) {
struct net_device *br_dev = netdev_master_upper_dev_get(dev);
- const struct net_device_ops *ops = br_dev->netdev_ops;
- if (ops->ndo_fdb_del)
- err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid);
+ ops = br_dev->netdev_ops;
+ if (!del_bulk) {
+ if (ops->ndo_fdb_del)
+ err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid, extack);
+ } else {
+ if (ops->ndo_fdb_del_bulk)
+ err = ops->ndo_fdb_del_bulk(ndm, tb, dev, vid,
+ extack);
+ }
if (err)
goto out;
@@ -4198,15 +4279,24 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
/* Embedded bridge, macvlan, and any other device support */
if (ndm->ndm_flags & NTF_SELF) {
- if (dev->netdev_ops->ndo_fdb_del)
- err = dev->netdev_ops->ndo_fdb_del(ndm, tb, dev, addr,
- vid);
- else
- err = ndo_dflt_fdb_del(ndm, tb, dev, addr, vid);
+ ops = dev->netdev_ops;
+ if (!del_bulk) {
+ if (ops->ndo_fdb_del)
+ err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid, extack);
+ else
+ err = ndo_dflt_fdb_del(ndm, tb, dev, addr, vid);
+ } else {
+ /* in case err was cleared by NTF_MASTER call */
+ err = -EOPNOTSUPP;
+ if (ops->ndo_fdb_del_bulk)
+ err = ops->ndo_fdb_del_bulk(ndm, tb, dev, vid,
+ extack);
+ }
if (!err) {
- rtnl_fdb_notify(dev, addr, vid, RTM_DELNEIGH,
- ndm->ndm_state);
+ if (!del_bulk)
+ rtnl_fdb_notify(dev, addr, vid, RTM_DELNEIGH,
+ ndm->ndm_state);
ndm->ndm_flags &= ~NTF_SELF;
}
}
@@ -5896,11 +5986,11 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
{
struct net *net = sock_net(skb->sk);
struct rtnl_link *link;
+ enum rtnl_kinds kind;
struct module *owner;
int err = -EOPNOTSUPP;
rtnl_doit_func doit;
unsigned int flags;
- int kind;
int family;
int type;
@@ -5915,13 +6005,13 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
return 0;
family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
- kind = type&3;
+ kind = rtnl_msgtype_kind(type);
- if (kind != 2 && !netlink_net_capable(skb, CAP_NET_ADMIN))
+ if (kind != RTNL_KIND_GET && !netlink_net_capable(skb, CAP_NET_ADMIN))
return -EPERM;
rcu_read_lock();
- if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
+ if (kind == RTNL_KIND_GET && (nlh->nlmsg_flags & NLM_F_DUMP)) {
struct sock *rtnl;
rtnl_dumpit_func dumpit;
u32 min_dump_alloc = 0;
@@ -5977,6 +6067,12 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
}
flags = link->flags;
+ if (kind == RTNL_KIND_DEL && (nlh->nlmsg_flags & NLM_F_BULK) &&
+ !(flags & RTNL_FLAG_BULK_DEL_SUPPORTED)) {
+ NL_SET_ERR_MSG(extack, "Bulk delete is not supported");
+ goto err_unlock;
+ }
+
if (flags & RTNL_FLAG_DOIT_UNLOCKED) {
doit = link->doit;
rcu_read_unlock();
@@ -6105,7 +6201,8 @@ void __init rtnetlink_init(void)
rtnl_register(PF_UNSPEC, RTM_DELLINKPROP, rtnl_dellinkprop, NULL, 0);
rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, 0);
- rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, 0);
+ rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL,
+ RTNL_FLAG_BULK_DEL_SUPPORTED);
rtnl_register(PF_BRIDGE, RTM_GETNEIGH, rtnl_fdb_get, rtnl_fdb_dump, 0);
rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, 0);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c90c74de90d5..5b3559cb1d82 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -80,7 +80,7 @@
#include <linux/user_namespace.h>
#include <linux/indirect_call_wrapper.h>
-#include "datagram.h"
+#include "dev.h"
#include "sock_destructor.h"
struct kmem_cache *skbuff_head_cache __ro_after_init;
@@ -204,7 +204,7 @@ static void __build_skb_around(struct sk_buff *skb, void *data,
skb_set_end_offset(skb, size);
skb->mac_header = (typeof(skb->mac_header))~0U;
skb->transport_header = (typeof(skb->transport_header))~0U;
-
+ skb->alloc_cpu = raw_smp_processor_id();
/* make sure we initialize shinfo sequentially */
shinfo = skb_shinfo(skb);
memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
@@ -772,6 +772,8 @@ void kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason)
if (!skb_unref(skb))
return;
+ DEBUG_NET_WARN_ON_ONCE(reason <= 0 || reason >= SKB_DROP_REASON_MAX);
+
trace_kfree_skb(skb, __builtin_return_address(0), reason);
__kfree_skb(skb);
}
@@ -1037,6 +1039,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
#ifdef CONFIG_NET_RX_BUSY_POLL
CHECK_SKB_FIELD(napi_id);
#endif
+ CHECK_SKB_FIELD(alloc_cpu);
#ifdef CONFIG_XPS
CHECK_SKB_FIELD(sender_cpu);
#endif
@@ -1165,7 +1168,7 @@ void mm_unaccount_pinned_pages(struct mmpin *mmp)
}
EXPORT_SYMBOL_GPL(mm_unaccount_pinned_pages);
-struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size)
+static struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size)
{
struct ubuf_info *uarg;
struct sk_buff *skb;
@@ -1196,7 +1199,6 @@ struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size)
return uarg;
}
-EXPORT_SYMBOL_GPL(msg_zerocopy_alloc);
static inline struct sk_buff *skb_from_uarg(struct ubuf_info *uarg)
{
@@ -1339,18 +1341,11 @@ void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref)
}
EXPORT_SYMBOL_GPL(msg_zerocopy_put_abort);
-int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len)
-{
- return __zerocopy_sg_from_iter(skb->sk, skb, &msg->msg_iter, len);
-}
-EXPORT_SYMBOL_GPL(skb_zerocopy_iter_dgram);
-
int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
struct msghdr *msg, int len,
struct ubuf_info *uarg)
{
struct ubuf_info *orig_uarg = skb_zcopy(skb);
- struct iov_iter orig_iter = msg->msg_iter;
int err, orig_len = skb->len;
/* An skb can only point to one uarg. This edge case happens when
@@ -1364,7 +1359,7 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
struct sock *save_sk = skb->sk;
/* Streams do not free skb on error. Reset to prev state. */
- msg->msg_iter = orig_iter;
+ iov_iter_revert(&msg->msg_iter, skb->len - orig_len);
skb->sk = sk;
___pskb_trim(skb, orig_len);
skb->sk = save_sk;
@@ -5603,7 +5598,7 @@ err_free:
}
EXPORT_SYMBOL(skb_vlan_untag);
-int skb_ensure_writable(struct sk_buff *skb, int write_len)
+int skb_ensure_writable(struct sk_buff *skb, unsigned int write_len)
{
if (!pskb_may_pull(skb, write_len))
return -ENOMEM;
@@ -6488,3 +6483,49 @@ free_now:
}
EXPORT_SYMBOL(__skb_ext_put);
#endif /* CONFIG_SKB_EXTENSIONS */
+
+/**
+ * skb_attempt_defer_free - queue skb for remote freeing
+ * @skb: buffer
+ *
+ * Put @skb in a per-cpu list, using the cpu which
+ * allocated the skb/pages to reduce false sharing
+ * and memory zone spinlock contention.
+ */
+void skb_attempt_defer_free(struct sk_buff *skb)
+{
+ int cpu = skb->alloc_cpu;
+ struct softnet_data *sd;
+ unsigned long flags;
+ unsigned int defer_max;
+ bool kick;
+
+ if (WARN_ON_ONCE(cpu >= nr_cpu_ids) ||
+ !cpu_online(cpu) ||
+ cpu == raw_smp_processor_id()) {
+nodefer: __kfree_skb(skb);
+ return;
+ }
+
+ sd = &per_cpu(softnet_data, cpu);
+ defer_max = READ_ONCE(sysctl_skb_defer_max);
+ if (READ_ONCE(sd->defer_count) >= defer_max)
+ goto nodefer;
+
+ spin_lock_irqsave(&sd->defer_lock, flags);
+ /* Send an IPI every time queue reaches half capacity. */
+ kick = sd->defer_count == (defer_max >> 1);
+ /* Paired with the READ_ONCE() few lines above */
+ WRITE_ONCE(sd->defer_count, sd->defer_count + 1);
+
+ skb->next = sd->defer_list;
+ /* Paired with READ_ONCE() in skb_defer_free_flush() */
+ WRITE_ONCE(sd->defer_list, skb);
+ spin_unlock_irqrestore(&sd->defer_lock, flags);
+
+ /* Make sure to trigger NET_RX_SOFTIRQ on the remote CPU
+ * if we are unlucky enough (this seems very unlikely).
+ */
+ if (unlikely(kick) && !cmpxchg(&sd->defer_ipi_scheduled, 0, 1))
+ smp_call_function_single_async(cpu, &sd->defer_csd);
+}
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index cc381165ea08..22b983ade0e7 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -524,16 +524,20 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
{
int num_sge, copied;
- /* skb linearize may fail with ENOMEM, but lets simply try again
- * later if this happens. Under memory pressure we don't want to
- * drop the skb. We need to linearize the skb so that the mapping
- * in skb_to_sgvec can not error.
- */
- if (skb_linearize(skb))
- return -EAGAIN;
num_sge = skb_to_sgvec(skb, msg->sg.data, off, len);
- if (unlikely(num_sge < 0))
- return num_sge;
+ if (num_sge < 0) {
+ /* skb linearize may fail with ENOMEM, but lets simply try again
+ * later if this happens. Under memory pressure we don't want to
+ * drop the skb. We need to linearize the skb so that the mapping
+ * in skb_to_sgvec can not error.
+ */
+ if (skb_linearize(skb))
+ return -EAGAIN;
+
+ num_sge = skb_to_sgvec(skb, msg->sg.data, off, len);
+ if (unlikely(num_sge < 0))
+ return num_sge;
+ }
copied = len;
msg->sg.start = 0;
diff --git a/net/core/sock.c b/net/core/sock.c
index 1180a0cb0110..2ff40dd0a7a6 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -141,9 +141,14 @@
#include <linux/ethtool.h>
+#include "dev.h"
+
static DEFINE_MUTEX(proto_list_mutex);
static LIST_HEAD(proto_list);
+static void sock_def_write_space_wfree(struct sock *sk);
+static void sock_def_write_space(struct sock *sk);
+
/**
* sk_ns_capable - General socket capability test
* @sk: Socket to use a capability on or through
@@ -503,17 +508,35 @@ int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
}
EXPORT_SYMBOL(__sock_queue_rcv_skb);
-int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+int sock_queue_rcv_skb_reason(struct sock *sk, struct sk_buff *skb,
+ enum skb_drop_reason *reason)
{
+ enum skb_drop_reason drop_reason;
int err;
err = sk_filter(sk, skb);
- if (err)
- return err;
-
- return __sock_queue_rcv_skb(sk, skb);
+ if (err) {
+ drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
+ goto out;
+ }
+ err = __sock_queue_rcv_skb(sk, skb);
+ switch (err) {
+ case -ENOMEM:
+ drop_reason = SKB_DROP_REASON_SOCKET_RCVBUFF;
+ break;
+ case -ENOBUFS:
+ drop_reason = SKB_DROP_REASON_PROTO_MEM;
+ break;
+ default:
+ drop_reason = SKB_NOT_DROPPED_YET;
+ break;
+ }
+out:
+ if (reason)
+ *reason = drop_reason;
+ return err;
}
-EXPORT_SYMBOL(sock_queue_rcv_skb);
+EXPORT_SYMBOL(sock_queue_rcv_skb_reason);
int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
const int nested, unsigned int trim_cap, bool refcounted)
@@ -612,7 +635,9 @@ static int sock_bindtoindex_locked(struct sock *sk, int ifindex)
if (ifindex < 0)
goto out;
- sk->sk_bound_dev_if = ifindex;
+ /* Paired with all READ_ONCE() done locklessly. */
+ WRITE_ONCE(sk->sk_bound_dev_if, ifindex);
+
if (sk->sk_prot->rehash)
sk->sk_prot->rehash(sk);
sk_dst_reset(sk);
@@ -690,10 +715,11 @@ static int sock_getbindtodevice(struct sock *sk, char __user *optval,
{
int ret = -ENOPROTOOPT;
#ifdef CONFIG_NETDEVICES
+ int bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
struct net *net = sock_net(sk);
char devname[IFNAMSIZ];
- if (sk->sk_bound_dev_if == 0) {
+ if (bound_dev_if == 0) {
len = 0;
goto zero;
}
@@ -702,7 +728,7 @@ static int sock_getbindtodevice(struct sock *sk, char __user *optval,
if (len < IFNAMSIZ)
goto out;
- ret = netdev_get_name(net, devname, sk->sk_bound_dev_if);
+ ret = netdev_get_name(net, devname, bound_dev_if);
if (ret)
goto out;
@@ -1291,6 +1317,15 @@ set_sndbuf:
__sock_set_mark(sk, val);
break;
+ case SO_RCVMARK:
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
+ !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
+ ret = -EPERM;
+ break;
+ }
+
+ sock_valbool_flag(sk, SOCK_RCVMARK, valbool);
+ break;
case SO_RXQ_OVFL:
sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
@@ -1717,6 +1752,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
v.val = sk->sk_mark;
break;
+ case SO_RCVMARK:
+ v.val = sock_flag(sk, SOCK_RCVMARK);
+ break;
+
case SO_RXQ_OVFL:
v.val = sock_flag(sk, SOCK_RXQ_OVFL);
break;
@@ -1825,7 +1864,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
break;
case SO_BINDTOIFINDEX:
- v.val = sk->sk_bound_dev_if;
+ v.val = READ_ONCE(sk->sk_bound_dev_if);
break;
case SO_NETNS_COOKIE:
@@ -2062,9 +2101,6 @@ void sk_destruct(struct sock *sk)
{
bool use_call_rcu = sock_flag(sk, SOCK_RCU_FREE);
- WARN_ON_ONCE(!llist_empty(&sk->defer_list));
- sk_defer_free_flush(sk);
-
if (rcu_access_pointer(sk->sk_reuseport_cb)) {
reuseport_detach_sock(sk);
use_call_rcu = true;
@@ -2260,6 +2296,19 @@ void sk_free_unlock_clone(struct sock *sk)
}
EXPORT_SYMBOL_GPL(sk_free_unlock_clone);
+static void sk_trim_gso_size(struct sock *sk)
+{
+ if (sk->sk_gso_max_size <= GSO_LEGACY_MAX_SIZE)
+ return;
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6 &&
+ sk_is_tcp(sk) &&
+ !ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
+ return;
+#endif
+ sk->sk_gso_max_size = GSO_LEGACY_MAX_SIZE;
+}
+
void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
{
u32 max_segs = 1;
@@ -2279,6 +2328,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
/* pairs with the WRITE_ONCE() in netif_set_gso_max_size() */
sk->sk_gso_max_size = READ_ONCE(dst->dev->gso_max_size);
+ sk_trim_gso_size(sk);
sk->sk_gso_max_size -= (MAX_TCP_HEADER + 1);
/* pairs with the WRITE_ONCE() in netif_set_gso_max_segs() */
max_segs = max_t(u32, READ_ONCE(dst->dev->gso_max_segs), 1);
@@ -2300,8 +2350,20 @@ void sock_wfree(struct sk_buff *skb)
{
struct sock *sk = skb->sk;
unsigned int len = skb->truesize;
+ bool free;
if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
+ if (sock_flag(sk, SOCK_RCU_FREE) &&
+ sk->sk_write_space == sock_def_write_space) {
+ rcu_read_lock();
+ free = refcount_sub_and_test(len, &sk->sk_wmem_alloc);
+ sock_def_write_space_wfree(sk);
+ rcu_read_unlock();
+ if (unlikely(free))
+ __sk_free(sk);
+ return;
+ }
+
/*
* Keep a reference on sk_wmem_alloc, this will be released
* after sk_write_space() call
@@ -2611,13 +2673,6 @@ failure:
}
EXPORT_SYMBOL(sock_alloc_send_pskb);
-struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
- int noblock, int *errcode)
-{
- return sock_alloc_send_pskb(sk, size, 0, noblock, errcode, 0);
-}
-EXPORT_SYMBOL(sock_alloc_send_skb);
-
int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg,
struct sockcm_cookie *sockc)
{
@@ -3174,20 +3229,42 @@ static void sock_def_write_space(struct sock *sk)
/* Do not wake up a writer until he can make "significant"
* progress. --DaveM
*/
- if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= READ_ONCE(sk->sk_sndbuf)) {
+ if (sock_writeable(sk)) {
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT |
EPOLLWRNORM | EPOLLWRBAND);
/* Should agree with poll, otherwise some programs break */
- if (sock_writeable(sk))
- sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
+ sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
}
rcu_read_unlock();
}
+/* An optimised version of sock_def_write_space(), should only be called
+ * for SOCK_RCU_FREE sockets under RCU read section and after putting
+ * ->sk_wmem_alloc.
+ */
+static void sock_def_write_space_wfree(struct sock *sk)
+{
+ /* Do not wake up a writer until he can make "significant"
+ * progress. --DaveM
+ */
+ if (sock_writeable(sk)) {
+ struct socket_wq *wq = rcu_dereference(sk->sk_wq);
+
+ /* rely on refcount_sub from sock_wfree() */
+ smp_mb__after_atomic();
+ if (wq && waitqueue_active(&wq->wait))
+ wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT |
+ EPOLLWRNORM | EPOLLWRBAND);
+
+ /* Should agree with poll, otherwise some programs break */
+ sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
+ }
+}
+
static void sock_def_destruct(struct sock *sk)
{
}
@@ -3486,8 +3563,7 @@ int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int addr_len = 0;
int err;
- err = sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
- flags & ~MSG_DONTWAIT, &addr_len);
+ err = sk->sk_prot->recvmsg(sk, msg, size, flags, &addr_len);
if (err >= 0)
msg->msg_namelen = addr_len;
return err;
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 2d213c4011db..81d4b4756a02 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -793,7 +793,7 @@ static const struct bpf_iter_seq_info sock_map_iter_seq_info = {
.seq_priv_size = sizeof(struct sock_map_seq_info),
};
-static int sock_map_btf_id;
+BTF_ID_LIST_SINGLE(sock_map_btf_ids, struct, bpf_stab)
const struct bpf_map_ops sock_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
.map_alloc = sock_map_alloc,
@@ -805,8 +805,7 @@ const struct bpf_map_ops sock_map_ops = {
.map_lookup_elem = sock_map_lookup,
.map_release_uref = sock_map_release_progs,
.map_check_btf = map_check_no_btf,
- .map_btf_name = "bpf_stab",
- .map_btf_id = &sock_map_btf_id,
+ .map_btf_id = &sock_map_btf_ids[0],
.iter_seq_info = &sock_map_iter_seq_info,
};
@@ -1385,7 +1384,7 @@ static const struct bpf_iter_seq_info sock_hash_iter_seq_info = {
.seq_priv_size = sizeof(struct sock_hash_seq_info),
};
-static int sock_hash_map_btf_id;
+BTF_ID_LIST_SINGLE(sock_hash_map_btf_ids, struct, bpf_shtab)
const struct bpf_map_ops sock_hash_ops = {
.map_meta_equal = bpf_map_meta_equal,
.map_alloc = sock_hash_alloc,
@@ -1397,8 +1396,7 @@ const struct bpf_map_ops sock_hash_ops = {
.map_lookup_elem_sys_only = sock_hash_lookup_sys,
.map_release_uref = sock_hash_release_progs,
.map_check_btf = map_check_no_btf,
- .map_btf_name = "bpf_shtab",
- .map_btf_id = &sock_hash_map_btf_id,
+ .map_btf_id = &sock_hash_map_btf_ids[0],
.iter_seq_info = &sock_hash_iter_seq_info,
};
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 7123fe7feeac..71a13596ea2b 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -23,13 +23,12 @@
#include <net/busy_poll.h>
#include <net/pkt_sched.h>
-static int two = 2;
-static int three = 3;
+#include "dev.h"
+
static int int_3600 = 3600;
static int min_sndbuf = SOCK_MIN_SNDBUF;
static int min_rcvbuf = SOCK_MIN_RCVBUF;
static int max_skb_frags = MAX_SKB_FRAGS;
-static long long_one __maybe_unused = 1;
static long long_max __maybe_unused = LONG_MAX;
static int net_msg_warn; /* Unused, but still a sysctl */
@@ -265,6 +264,8 @@ static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write,
loff_t *ppos)
{
int ret, jit_enable = *(int *)table->data;
+ int min = *(int *)table->extra1;
+ int max = *(int *)table->extra2;
struct ctl_table tmp = *table;
if (write && !capable(CAP_SYS_ADMIN))
@@ -282,6 +283,10 @@ static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write,
ret = -EPERM;
}
}
+
+ if (write && ret && min == max)
+ pr_info_once("CONFIG_BPF_JIT_ALWAYS_ON is enabled, bpf_jit_enable is permanently set to 1.\n");
+
return ret;
}
@@ -388,7 +393,7 @@ static struct ctl_table net_core_table[] = {
.extra2 = SYSCTL_ONE,
# else
.extra1 = SYSCTL_ZERO,
- .extra2 = &two,
+ .extra2 = SYSCTL_TWO,
# endif
},
# ifdef CONFIG_HAVE_EBPF_JIT
@@ -399,7 +404,7 @@ static struct ctl_table net_core_table[] = {
.mode = 0600,
.proc_handler = proc_dointvec_minmax_bpf_restricted,
.extra1 = SYSCTL_ZERO,
- .extra2 = &two,
+ .extra2 = SYSCTL_TWO,
},
{
.procname = "bpf_jit_kallsyms",
@@ -417,7 +422,7 @@ static struct ctl_table net_core_table[] = {
.maxlen = sizeof(long),
.mode = 0600,
.proc_handler = proc_dolongvec_minmax_bpf_restricted,
- .extra1 = &long_one,
+ .extra1 = SYSCTL_LONG_ONE,
.extra2 = &bpf_jit_limit_max,
},
#endif
@@ -544,7 +549,7 @@ static struct ctl_table net_core_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
- .extra2 = &two,
+ .extra2 = SYSCTL_TWO,
},
{
.procname = "devconf_inherit_init_net",
@@ -553,7 +558,7 @@ static struct ctl_table net_core_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
- .extra2 = &three,
+ .extra2 = SYSCTL_THREE,
},
{
.procname = "high_order_alloc_disable",
@@ -579,6 +584,14 @@ static struct ctl_table net_core_table[] = {
.extra1 = SYSCTL_ONE,
.extra2 = &int_3600,
},
+ {
+ .procname = "skb_defer_max",
+ .data = &sysctl_skb_defer_max,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ },
{ }
};
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 671c377f0889..7dfc00c9fb32 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -293,8 +293,8 @@ int dccp_setsockopt(struct sock *sk, int level, int optname,
sockptr_t optval, unsigned int optlen);
int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg);
int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
-int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
- int flags, int *addr_len);
+int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
+ int *addr_len);
void dccp_shutdown(struct sock *sk, int how);
int inet_dccp_listen(struct socket *sock, int backlog);
__poll_t dccp_poll(struct file *file, struct socket *sock,
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 0ea29270d7e5..da6e3b20cd75 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -76,9 +76,8 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
orig_dport = usin->sin_port;
fl4 = &inet->cork.fl.u.ip4;
rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
- RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
- IPPROTO_DCCP,
- orig_sport, orig_dport, sk);
+ sk->sk_bound_dev_if, IPPROTO_DCCP, orig_sport,
+ orig_dport, sk);
if (IS_ERR(rt))
return PTR_ERR(rt);
@@ -629,7 +628,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
ireq->ir_mark = inet_request_mark(sk, skb);
ireq->ireq_family = AF_INET;
- ireq->ir_iif = sk->sk_bound_dev_if;
+ ireq->ir_iif = READ_ONCE(sk->sk_bound_dev_if);
/*
* Step 3: Process LISTEN state
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index fa663518fa0e..fd44638ec16b 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -374,10 +374,10 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
refcount_inc(&skb->users);
ireq->pktopts = skb;
}
- ireq->ir_iif = sk->sk_bound_dev_if;
+ ireq->ir_iif = READ_ONCE(sk->sk_bound_dev_if);
/* So that link locals have meaning */
- if (!sk->sk_bound_dev_if &&
+ if (!ireq->ir_iif &&
ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
ireq->ir_iif = inet6_iif(skb);
@@ -892,7 +892,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
- if (__ipv6_only_sock(sk))
+ if (ipv6_only_sock(sk))
return -ENETUNREACH;
sin.sin_family = AF_INET;
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index a976b4d29892..2e78458900f2 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -791,8 +791,8 @@ out_discard:
EXPORT_SYMBOL_GPL(dccp_sendmsg);
-int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
- int flags, int *addr_len)
+int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
+ int *addr_len)
{
const struct dccp_hdr *dh;
long timeo;
@@ -804,7 +804,7 @@ int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
goto out;
}
- timeo = sock_rcvtimeo(sk, nonblock);
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
do {
struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
@@ -1110,7 +1110,6 @@ static int __init dccp_init(void)
BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
sizeof_field(struct sk_buff, cb));
- inet_hashinfo_init(&dccp_hashinfo);
rc = inet_hashinfo2_init_mod(&dccp_hashinfo);
if (rc)
goto out_fail;
@@ -1121,6 +1120,12 @@ static int __init dccp_init(void)
SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
if (!dccp_hashinfo.bind_bucket_cachep)
goto out_free_hashinfo2;
+ dccp_hashinfo.bind2_bucket_cachep =
+ kmem_cache_create("dccp_bind2_bucket",
+ sizeof(struct inet_bind2_bucket), 0,
+ SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
+ if (!dccp_hashinfo.bind2_bucket_cachep)
+ goto out_free_bind_bucket_cachep;
/*
* Size and allocate the main established and bind bucket
@@ -1151,7 +1156,7 @@ static int __init dccp_init(void)
if (!dccp_hashinfo.ehash) {
DCCP_CRIT("Failed to allocate DCCP established hash table");
- goto out_free_bind_bucket_cachep;
+ goto out_free_bind2_bucket_cachep;
}
for (i = 0; i <= dccp_hashinfo.ehash_mask; i++)
@@ -1177,14 +1182,23 @@ static int __init dccp_init(void)
goto out_free_dccp_locks;
}
+ dccp_hashinfo.bhash2 = (struct inet_bind2_hashbucket *)
+ __get_free_pages(GFP_ATOMIC | __GFP_NOWARN, bhash_order);
+
+ if (!dccp_hashinfo.bhash2) {
+ DCCP_CRIT("Failed to allocate DCCP bind2 hash table");
+ goto out_free_dccp_bhash;
+ }
+
for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
spin_lock_init(&dccp_hashinfo.bhash[i].lock);
INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
+ INIT_HLIST_HEAD(&dccp_hashinfo.bhash2[i].chain);
}
rc = dccp_mib_init();
if (rc)
- goto out_free_dccp_bhash;
+ goto out_free_dccp_bhash2;
rc = dccp_ackvec_init();
if (rc)
@@ -1208,30 +1222,38 @@ out_ackvec_exit:
dccp_ackvec_exit();
out_free_dccp_mib:
dccp_mib_exit();
+out_free_dccp_bhash2:
+ free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order);
out_free_dccp_bhash:
free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
out_free_dccp_locks:
inet_ehash_locks_free(&dccp_hashinfo);
out_free_dccp_ehash:
free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
+out_free_bind2_bucket_cachep:
+ kmem_cache_destroy(dccp_hashinfo.bind2_bucket_cachep);
out_free_bind_bucket_cachep:
kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
out_free_hashinfo2:
inet_hashinfo2_free_mod(&dccp_hashinfo);
out_fail:
dccp_hashinfo.bhash = NULL;
+ dccp_hashinfo.bhash2 = NULL;
dccp_hashinfo.ehash = NULL;
dccp_hashinfo.bind_bucket_cachep = NULL;
+ dccp_hashinfo.bind2_bucket_cachep = NULL;
return rc;
}
static void __exit dccp_fini(void)
{
+ int bhash_order = get_order(dccp_hashinfo.bhash_size *
+ sizeof(struct inet_bind_hashbucket));
+
ccid_cleanup_builtins();
dccp_mib_exit();
- free_pages((unsigned long)dccp_hashinfo.bhash,
- get_order(dccp_hashinfo.bhash_size *
- sizeof(struct inet_bind_hashbucket)));
+ free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
+ free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order);
free_pages((unsigned long)dccp_hashinfo.ehash,
get_order((dccp_hashinfo.ehash_mask + 1) *
sizeof(struct inet_ehash_bucket)));
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index d1d78a463a06..552a53f1d5d0 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -159,7 +159,7 @@ static void dn_dst_ifdown(struct dst_entry *dst, struct net_device *dev, int how
struct neighbour *n = rt->n;
if (n && n->dev == dev) {
- n->dev = dev_net(dev)->loopback_dev;
+ n->dev = blackhole_netdev;
dev_hold(n->dev);
dev_put(dev);
}
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 89c6c86e746f..be7b320cda76 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -7,19 +7,10 @@
#include <linux/device.h>
#include <linux/list.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
#include <linux/module.h>
-#include <linux/notifier.h>
-#include <linux/of.h>
-#include <linux/of_mdio.h>
-#include <linux/of_platform.h>
-#include <linux/of_net.h>
#include <linux/netdevice.h>
#include <linux/sysfs.h>
-#include <linux/phy_fixed.h>
#include <linux/ptp_classify.h>
-#include <linux/etherdevice.h>
#include "dsa_priv.h"
@@ -467,46 +458,6 @@ struct dsa_port *dsa_port_from_netdev(struct net_device *netdev)
}
EXPORT_SYMBOL_GPL(dsa_port_from_netdev);
-int dsa_port_walk_fdbs(struct dsa_switch *ds, int port, dsa_fdb_walk_cb_t cb)
-{
- struct dsa_port *dp = dsa_to_port(ds, port);
- struct dsa_mac_addr *a;
- int err = 0;
-
- mutex_lock(&dp->addr_lists_lock);
-
- list_for_each_entry(a, &dp->fdbs, list) {
- err = cb(ds, port, a->addr, a->vid, a->db);
- if (err)
- break;
- }
-
- mutex_unlock(&dp->addr_lists_lock);
-
- return err;
-}
-EXPORT_SYMBOL_GPL(dsa_port_walk_fdbs);
-
-int dsa_port_walk_mdbs(struct dsa_switch *ds, int port, dsa_fdb_walk_cb_t cb)
-{
- struct dsa_port *dp = dsa_to_port(ds, port);
- struct dsa_mac_addr *a;
- int err = 0;
-
- mutex_lock(&dp->addr_lists_lock);
-
- list_for_each_entry(a, &dp->mdbs, list) {
- err = cb(ds, port, a->addr, a->vid, a->db);
- if (err)
- break;
- }
-
- mutex_unlock(&dp->addr_lists_lock);
-
- return err;
-}
-EXPORT_SYMBOL_GPL(dsa_port_walk_mdbs);
-
bool dsa_db_equal(const struct dsa_db *a, const struct dsa_db *b)
{
if (a->type != b->type)
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index cf933225df32..cac48a741f27 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -13,6 +13,7 @@
#include <linux/slab.h>
#include <linux/rtnetlink.h>
#include <linux/of.h>
+#include <linux/of_mdio.h>
#include <linux/of_net.h>
#include <net/devlink.h>
#include <net/sch_generic.h>
@@ -809,22 +810,18 @@ static int dsa_switch_setup_tag_protocol(struct dsa_switch *ds)
{
const struct dsa_device_ops *tag_ops = ds->dst->tag_ops;
struct dsa_switch_tree *dst = ds->dst;
- struct dsa_port *cpu_dp;
int err;
if (tag_ops->proto == dst->default_proto)
goto connect;
- dsa_switch_for_each_cpu_port(cpu_dp, ds) {
- rtnl_lock();
- err = ds->ops->change_tag_protocol(ds, cpu_dp->index,
- tag_ops->proto);
- rtnl_unlock();
- if (err) {
- dev_err(ds->dev, "Unable to use tag protocol \"%s\": %pe\n",
- tag_ops->name, ERR_PTR(err));
- return err;
- }
+ rtnl_lock();
+ err = ds->ops->change_tag_protocol(ds, tag_ops->proto);
+ rtnl_unlock();
+ if (err) {
+ dev_err(ds->dev, "Unable to use tag protocol \"%s\": %pe\n",
+ tag_ops->name, ERR_PTR(err));
+ return err;
}
connect:
@@ -856,6 +853,7 @@ disconnect:
static int dsa_switch_setup(struct dsa_switch *ds)
{
struct dsa_devlink_priv *dl_priv;
+ struct device_node *dn;
struct dsa_port *dp;
int err;
@@ -911,7 +909,10 @@ static int dsa_switch_setup(struct dsa_switch *ds)
dsa_slave_mii_bus_init(ds);
- err = mdiobus_register(ds->slave_mii_bus);
+ dn = of_get_child_by_name(ds->dev->of_node, "mdio");
+
+ err = of_mdiobus_register(ds->slave_mii_bus, dn);
+ of_node_put(dn);
if (err < 0)
goto free_slave_mii_bus;
}
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 5d3f4a67dce1..d9722e49864b 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -54,18 +54,15 @@ struct dsa_notifier_ageing_time_info {
/* DSA_NOTIFIER_BRIDGE_* */
struct dsa_notifier_bridge_info {
+ const struct dsa_port *dp;
struct dsa_bridge bridge;
- int tree_index;
- int sw_index;
- int port;
bool tx_fwd_offload;
struct netlink_ext_ack *extack;
};
/* DSA_NOTIFIER_FDB_* */
struct dsa_notifier_fdb_info {
- int sw_index;
- int port;
+ const struct dsa_port *dp;
const unsigned char *addr;
u16 vid;
struct dsa_db db;
@@ -81,34 +78,28 @@ struct dsa_notifier_lag_fdb_info {
/* DSA_NOTIFIER_MDB_* */
struct dsa_notifier_mdb_info {
+ const struct dsa_port *dp;
const struct switchdev_obj_port_mdb *mdb;
- int sw_index;
- int port;
struct dsa_db db;
};
/* DSA_NOTIFIER_LAG_* */
struct dsa_notifier_lag_info {
+ const struct dsa_port *dp;
struct dsa_lag lag;
- int sw_index;
- int port;
-
struct netdev_lag_upper_info *info;
};
/* DSA_NOTIFIER_VLAN_* */
struct dsa_notifier_vlan_info {
+ const struct dsa_port *dp;
const struct switchdev_obj_port_vlan *vlan;
- int sw_index;
- int port;
struct netlink_ext_ack *extack;
};
/* DSA_NOTIFIER_MTU */
struct dsa_notifier_mtu_info {
- bool targeted_match;
- int sw_index;
- int port;
+ const struct dsa_port *dp;
int mtu;
};
@@ -119,9 +110,7 @@ struct dsa_notifier_tag_proto_info {
/* DSA_NOTIFIER_TAG_8021Q_VLAN_* */
struct dsa_notifier_tag_8021q_vlan_info {
- int tree_index;
- int sw_index;
- int port;
+ const struct dsa_port *dp;
u16 vid;
};
@@ -241,8 +230,7 @@ int dsa_port_mst_enable(struct dsa_port *dp, bool on,
struct netlink_ext_ack *extack);
int dsa_port_vlan_msti(struct dsa_port *dp,
const struct switchdev_vlan_msti *msti);
-int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu,
- bool targeted_match);
+int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu);
int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr,
u16 vid);
int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr,
@@ -303,6 +291,7 @@ int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr);
void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr);
int dsa_port_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid, bool broadcast);
void dsa_port_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid, bool broadcast);
+void dsa_port_set_host_flood(struct dsa_port *dp, bool uc, bool mc);
/* slave.c */
extern const struct dsa_device_ops notag_netdev_ops;
diff --git a/net/dsa/port.c b/net/dsa/port.c
index bdccb613285d..3738f2d40a0b 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -242,6 +242,59 @@ void dsa_port_disable(struct dsa_port *dp)
rtnl_unlock();
}
+static void dsa_port_reset_vlan_filtering(struct dsa_port *dp,
+ struct dsa_bridge bridge)
+{
+ struct netlink_ext_ack extack = {0};
+ bool change_vlan_filtering = false;
+ struct dsa_switch *ds = dp->ds;
+ bool vlan_filtering;
+ int err;
+
+ if (ds->needs_standalone_vlan_filtering &&
+ !br_vlan_enabled(bridge.dev)) {
+ change_vlan_filtering = true;
+ vlan_filtering = true;
+ } else if (!ds->needs_standalone_vlan_filtering &&
+ br_vlan_enabled(bridge.dev)) {
+ change_vlan_filtering = true;
+ vlan_filtering = false;
+ }
+
+ /* If the bridge was vlan_filtering, the bridge core doesn't trigger an
+ * event for changing vlan_filtering setting upon slave ports leaving
+ * it. That is a good thing, because that lets us handle it and also
+ * handle the case where the switch's vlan_filtering setting is global
+ * (not per port). When that happens, the correct moment to trigger the
+ * vlan_filtering callback is only when the last port leaves the last
+ * VLAN-aware bridge.
+ */
+ if (change_vlan_filtering && ds->vlan_filtering_is_global) {
+ dsa_switch_for_each_port(dp, ds) {
+ struct net_device *br = dsa_port_bridge_dev_get(dp);
+
+ if (br && br_vlan_enabled(br)) {
+ change_vlan_filtering = false;
+ break;
+ }
+ }
+ }
+
+ if (!change_vlan_filtering)
+ return;
+
+ err = dsa_port_vlan_filtering(dp, vlan_filtering, &extack);
+ if (extack._msg) {
+ dev_err(ds->dev, "port %d: %s\n", dp->index,
+ extack._msg);
+ }
+ if (err && err != -EOPNOTSUPP) {
+ dev_err(ds->dev,
+ "port %d failed to reset VLAN filtering to %d: %pe\n",
+ dp->index, vlan_filtering, ERR_PTR(err));
+ }
+}
+
static int dsa_port_inherit_brport_flags(struct dsa_port *dp,
struct netlink_ext_ack *extack)
{
@@ -313,7 +366,8 @@ static int dsa_port_switchdev_sync_attrs(struct dsa_port *dp,
return 0;
}
-static void dsa_port_switchdev_unsync_attrs(struct dsa_port *dp)
+static void dsa_port_switchdev_unsync_attrs(struct dsa_port *dp,
+ struct dsa_bridge bridge)
{
/* Configure the port for standalone mode (no address learning,
* flood everything).
@@ -333,7 +387,7 @@ static void dsa_port_switchdev_unsync_attrs(struct dsa_port *dp)
*/
dsa_port_set_state_now(dp, BR_STATE_FORWARDING, true);
- /* VLAN filtering is handled by dsa_switch_bridge_leave */
+ dsa_port_reset_vlan_filtering(dp, bridge);
/* Ageing time may be global to the switch chip, so don't change it
* here because we have no good reason (or value) to change it to.
@@ -405,9 +459,7 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br,
struct netlink_ext_ack *extack)
{
struct dsa_notifier_bridge_info info = {
- .tree_index = dp->ds->dst->index,
- .sw_index = dp->ds->index,
- .port = dp->index,
+ .dp = dp,
.extack = extack,
};
struct net_device *dev = dp->slave;
@@ -477,9 +529,7 @@ void dsa_port_pre_bridge_leave(struct dsa_port *dp, struct net_device *br)
void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br)
{
struct dsa_notifier_bridge_info info = {
- .tree_index = dp->ds->dst->index,
- .sw_index = dp->ds->index,
- .port = dp->index,
+ .dp = dp,
};
int err;
@@ -502,15 +552,14 @@ void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br)
"port %d failed to notify DSA_NOTIFIER_BRIDGE_LEAVE: %pe\n",
dp->index, ERR_PTR(err));
- dsa_port_switchdev_unsync_attrs(dp);
+ dsa_port_switchdev_unsync_attrs(dp, info.bridge);
}
int dsa_port_lag_change(struct dsa_port *dp,
struct netdev_lag_lower_state_info *linfo)
{
struct dsa_notifier_lag_info info = {
- .sw_index = dp->ds->index,
- .port = dp->index,
+ .dp = dp,
};
bool tx_enabled;
@@ -579,8 +628,7 @@ int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag_dev,
struct netlink_ext_ack *extack)
{
struct dsa_notifier_lag_info info = {
- .sw_index = dp->ds->index,
- .port = dp->index,
+ .dp = dp,
.info = uinfo,
};
struct net_device *bridge_dev;
@@ -625,8 +673,7 @@ void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag_dev)
{
struct net_device *br = dsa_port_bridge_dev_get(dp);
struct dsa_notifier_lag_info info = {
- .sw_index = dp->ds->index,
- .port = dp->index,
+ .dp = dp,
};
int err;
@@ -873,6 +920,14 @@ int dsa_port_bridge_flags(struct dsa_port *dp,
return 0;
}
+void dsa_port_set_host_flood(struct dsa_port *dp, bool uc, bool mc)
+{
+ struct dsa_switch *ds = dp->ds;
+
+ if (ds->ops->port_set_host_flood)
+ ds->ops->port_set_host_flood(ds, dp->index, uc, mc);
+}
+
int dsa_port_vlan_msti(struct dsa_port *dp,
const struct switchdev_vlan_msti *msti)
{
@@ -884,13 +939,10 @@ int dsa_port_vlan_msti(struct dsa_port *dp,
return ds->ops->vlan_msti_set(ds, *dp->bridge, msti);
}
-int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu,
- bool targeted_match)
+int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu)
{
struct dsa_notifier_mtu_info info = {
- .sw_index = dp->ds->index,
- .targeted_match = targeted_match,
- .port = dp->index,
+ .dp = dp,
.mtu = new_mtu,
};
@@ -901,8 +953,7 @@ int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr,
u16 vid)
{
struct dsa_notifier_fdb_info info = {
- .sw_index = dp->ds->index,
- .port = dp->index,
+ .dp = dp,
.addr = addr,
.vid = vid,
.db = {
@@ -925,8 +976,7 @@ int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr,
u16 vid)
{
struct dsa_notifier_fdb_info info = {
- .sw_index = dp->ds->index,
- .port = dp->index,
+ .dp = dp,
.addr = addr,
.vid = vid,
.db = {
@@ -946,8 +996,7 @@ static int dsa_port_host_fdb_add(struct dsa_port *dp,
struct dsa_db db)
{
struct dsa_notifier_fdb_info info = {
- .sw_index = dp->ds->index,
- .port = dp->index,
+ .dp = dp,
.addr = addr,
.vid = vid,
.db = db,
@@ -998,8 +1047,7 @@ static int dsa_port_host_fdb_del(struct dsa_port *dp,
struct dsa_db db)
{
struct dsa_notifier_fdb_info info = {
- .sw_index = dp->ds->index,
- .port = dp->index,
+ .dp = dp,
.addr = addr,
.vid = vid,
.db = db,
@@ -1094,8 +1142,7 @@ int dsa_port_mdb_add(const struct dsa_port *dp,
const struct switchdev_obj_port_mdb *mdb)
{
struct dsa_notifier_mdb_info info = {
- .sw_index = dp->ds->index,
- .port = dp->index,
+ .dp = dp,
.mdb = mdb,
.db = {
.type = DSA_DB_BRIDGE,
@@ -1113,8 +1160,7 @@ int dsa_port_mdb_del(const struct dsa_port *dp,
const struct switchdev_obj_port_mdb *mdb)
{
struct dsa_notifier_mdb_info info = {
- .sw_index = dp->ds->index,
- .port = dp->index,
+ .dp = dp,
.mdb = mdb,
.db = {
.type = DSA_DB_BRIDGE,
@@ -1133,8 +1179,7 @@ static int dsa_port_host_mdb_add(const struct dsa_port *dp,
struct dsa_db db)
{
struct dsa_notifier_mdb_info info = {
- .sw_index = dp->ds->index,
- .port = dp->index,
+ .dp = dp,
.mdb = mdb,
.db = db,
};
@@ -1178,8 +1223,7 @@ static int dsa_port_host_mdb_del(const struct dsa_port *dp,
struct dsa_db db)
{
struct dsa_notifier_mdb_info info = {
- .sw_index = dp->ds->index,
- .port = dp->index,
+ .dp = dp,
.mdb = mdb,
.db = db,
};
@@ -1223,8 +1267,7 @@ int dsa_port_vlan_add(struct dsa_port *dp,
struct netlink_ext_ack *extack)
{
struct dsa_notifier_vlan_info info = {
- .sw_index = dp->ds->index,
- .port = dp->index,
+ .dp = dp,
.vlan = vlan,
.extack = extack,
};
@@ -1236,8 +1279,7 @@ int dsa_port_vlan_del(struct dsa_port *dp,
const struct switchdev_obj_port_vlan *vlan)
{
struct dsa_notifier_vlan_info info = {
- .sw_index = dp->ds->index,
- .port = dp->index,
+ .dp = dp,
.vlan = vlan,
};
@@ -1249,8 +1291,7 @@ int dsa_port_host_vlan_add(struct dsa_port *dp,
struct netlink_ext_ack *extack)
{
struct dsa_notifier_vlan_info info = {
- .sw_index = dp->ds->index,
- .port = dp->index,
+ .dp = dp,
.vlan = vlan,
.extack = extack,
};
@@ -1270,8 +1311,7 @@ int dsa_port_host_vlan_del(struct dsa_port *dp,
const struct switchdev_obj_port_vlan *vlan)
{
struct dsa_notifier_vlan_info info = {
- .sw_index = dp->ds->index,
- .port = dp->index,
+ .dp = dp,
.vlan = vlan,
};
struct dsa_port *cpu_dp = dp->cpu_dp;
@@ -1692,9 +1732,7 @@ void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr)
int dsa_port_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid, bool broadcast)
{
struct dsa_notifier_tag_8021q_vlan_info info = {
- .tree_index = dp->ds->dst->index,
- .sw_index = dp->ds->index,
- .port = dp->index,
+ .dp = dp,
.vid = vid,
};
@@ -1707,9 +1745,7 @@ int dsa_port_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid, bool broadcast)
void dsa_port_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid, bool broadcast)
{
struct dsa_notifier_tag_8021q_vlan_info info = {
- .tree_index = dp->ds->dst->index,
- .sw_index = dp->ds->index,
- .port = dp->index,
+ .dp = dp,
.vid = vid,
};
int err;
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 8022d50584db..801a5d445833 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -262,37 +262,13 @@ static int dsa_slave_close(struct net_device *dev)
return 0;
}
-/* Keep flooding enabled towards this port's CPU port as long as it serves at
- * least one port in the tree that requires it.
- */
-static void dsa_port_manage_cpu_flood(struct dsa_port *dp)
+static void dsa_slave_manage_host_flood(struct net_device *dev)
{
- struct switchdev_brport_flags flags = {
- .mask = BR_FLOOD | BR_MCAST_FLOOD,
- };
- struct dsa_switch_tree *dst = dp->ds->dst;
- struct dsa_port *cpu_dp = dp->cpu_dp;
- struct dsa_port *other_dp;
- int err;
-
- list_for_each_entry(other_dp, &dst->ports, list) {
- if (!dsa_port_is_user(other_dp))
- continue;
-
- if (other_dp->cpu_dp != cpu_dp)
- continue;
-
- if (other_dp->slave->flags & IFF_ALLMULTI)
- flags.val |= BR_MCAST_FLOOD;
- if (other_dp->slave->flags & IFF_PROMISC)
- flags.val |= BR_FLOOD | BR_MCAST_FLOOD;
- }
-
- err = dsa_port_pre_bridge_flags(dp, flags, NULL);
- if (err)
- return;
+ bool mc = dev->flags & (IFF_PROMISC | IFF_ALLMULTI);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ bool uc = dev->flags & IFF_PROMISC;
- dsa_port_bridge_flags(cpu_dp, flags, NULL);
+ dsa_port_set_host_flood(dp, uc, mc);
}
static void dsa_slave_change_rx_flags(struct net_device *dev, int change)
@@ -310,7 +286,7 @@ static void dsa_slave_change_rx_flags(struct net_device *dev, int change)
if (dsa_switch_supports_uc_filtering(ds) &&
dsa_switch_supports_mc_filtering(ds))
- dsa_port_manage_cpu_flood(dp);
+ dsa_slave_manage_host_flood(dev);
}
static void dsa_slave_set_rx_mode(struct net_device *dev)
@@ -1806,11 +1782,9 @@ int dsa_slave_change_mtu(struct net_device *dev, int new_mtu)
{
struct net_device *master = dsa_slave_to_master(dev);
struct dsa_port *dp = dsa_slave_to_port(dev);
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
- struct dsa_port *dp_iter;
- struct dsa_port *cpu_dp;
- int port = p->dp->index;
+ struct dsa_port *cpu_dp = dp->cpu_dp;
+ struct dsa_switch *ds = dp->ds;
+ struct dsa_port *other_dp;
int largest_mtu = 0;
int new_master_mtu;
int old_master_mtu;
@@ -1821,33 +1795,28 @@ int dsa_slave_change_mtu(struct net_device *dev, int new_mtu)
if (!ds->ops->port_change_mtu)
return -EOPNOTSUPP;
- list_for_each_entry(dp_iter, &ds->dst->ports, list) {
+ dsa_tree_for_each_user_port(other_dp, ds->dst) {
int slave_mtu;
- if (!dsa_port_is_user(dp_iter))
- continue;
-
/* During probe, this function will be called for each slave
* device, while not all of them have been allocated. That's
* ok, it doesn't change what the maximum is, so ignore it.
*/
- if (!dp_iter->slave)
+ if (!other_dp->slave)
continue;
/* Pretend that we already applied the setting, which we
* actually haven't (still haven't done all integrity checks)
*/
- if (dp_iter == dp)
+ if (dp == other_dp)
slave_mtu = new_mtu;
else
- slave_mtu = dp_iter->slave->mtu;
+ slave_mtu = other_dp->slave->mtu;
if (largest_mtu < slave_mtu)
largest_mtu = slave_mtu;
}
- cpu_dp = dsa_to_port(ds, port)->cpu_dp;
-
mtu_limit = min_t(int, master->max_mtu, dev->max_mtu);
old_master_mtu = master->mtu;
new_master_mtu = largest_mtu + dsa_tag_protocol_overhead(cpu_dp->tag_ops);
@@ -1866,15 +1835,14 @@ int dsa_slave_change_mtu(struct net_device *dev, int new_mtu)
goto out_master_failed;
/* We only need to propagate the MTU of the CPU port to
- * upstream switches, so create a non-targeted notifier which
- * updates all switches.
+ * upstream switches, so emit a notifier which updates them.
*/
- err = dsa_port_mtu_change(cpu_dp, cpu_mtu, false);
+ err = dsa_port_mtu_change(cpu_dp, cpu_mtu);
if (err)
goto out_cpu_failed;
}
- err = dsa_port_mtu_change(dp, new_mtu, true);
+ err = ds->ops->port_change_mtu(ds, dp->index, new_mtu);
if (err)
goto out_port_failed;
@@ -1887,8 +1855,7 @@ int dsa_slave_change_mtu(struct net_device *dev, int new_mtu)
out_port_failed:
if (new_master_mtu != old_master_mtu)
dsa_port_mtu_change(cpu_dp, old_master_mtu -
- dsa_tag_protocol_overhead(cpu_dp->tag_ops),
- false);
+ dsa_tag_protocol_overhead(cpu_dp->tag_ops));
out_cpu_failed:
if (new_master_mtu != old_master_mtu)
dev_set_mtu(master, old_master_mtu);
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index d25cd1da3eb3..2b56218fc57c 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -49,19 +49,7 @@ static int dsa_switch_ageing_time(struct dsa_switch *ds,
static bool dsa_port_mtu_match(struct dsa_port *dp,
struct dsa_notifier_mtu_info *info)
{
- if (dp->ds->index == info->sw_index && dp->index == info->port)
- return true;
-
- /* Do not propagate to other switches in the tree if the notifier was
- * targeted for a single switch.
- */
- if (info->targeted_match)
- return false;
-
- if (dsa_port_is_dsa(dp) || dsa_port_is_cpu(dp))
- return true;
-
- return false;
+ return dp == info->dp || dsa_port_is_dsa(dp) || dsa_port_is_cpu(dp);
}
static int dsa_switch_mtu(struct dsa_switch *ds,
@@ -88,25 +76,26 @@ static int dsa_switch_mtu(struct dsa_switch *ds,
static int dsa_switch_bridge_join(struct dsa_switch *ds,
struct dsa_notifier_bridge_info *info)
{
- struct dsa_switch_tree *dst = ds->dst;
int err;
- if (dst->index == info->tree_index && ds->index == info->sw_index) {
+ if (info->dp->ds == ds) {
if (!ds->ops->port_bridge_join)
return -EOPNOTSUPP;
- err = ds->ops->port_bridge_join(ds, info->port, info->bridge,
+ err = ds->ops->port_bridge_join(ds, info->dp->index,
+ info->bridge,
&info->tx_fwd_offload,
info->extack);
if (err)
return err;
}
- if ((dst->index != info->tree_index || ds->index != info->sw_index) &&
- ds->ops->crosschip_bridge_join) {
- err = ds->ops->crosschip_bridge_join(ds, info->tree_index,
- info->sw_index,
- info->port, info->bridge,
+ if (info->dp->ds != ds && ds->ops->crosschip_bridge_join) {
+ err = ds->ops->crosschip_bridge_join(ds,
+ info->dp->ds->dst->index,
+ info->dp->ds->index,
+ info->dp->index,
+ info->bridge,
info->extack);
if (err)
return err;
@@ -115,79 +104,18 @@ static int dsa_switch_bridge_join(struct dsa_switch *ds,
return 0;
}
-static int dsa_switch_sync_vlan_filtering(struct dsa_switch *ds,
- struct dsa_notifier_bridge_info *info)
-{
- struct netlink_ext_ack extack = {0};
- bool change_vlan_filtering = false;
- bool vlan_filtering;
- struct dsa_port *dp;
- int err;
-
- if (ds->needs_standalone_vlan_filtering &&
- !br_vlan_enabled(info->bridge.dev)) {
- change_vlan_filtering = true;
- vlan_filtering = true;
- } else if (!ds->needs_standalone_vlan_filtering &&
- br_vlan_enabled(info->bridge.dev)) {
- change_vlan_filtering = true;
- vlan_filtering = false;
- }
-
- /* If the bridge was vlan_filtering, the bridge core doesn't trigger an
- * event for changing vlan_filtering setting upon slave ports leaving
- * it. That is a good thing, because that lets us handle it and also
- * handle the case where the switch's vlan_filtering setting is global
- * (not per port). When that happens, the correct moment to trigger the
- * vlan_filtering callback is only when the last port leaves the last
- * VLAN-aware bridge.
- */
- if (change_vlan_filtering && ds->vlan_filtering_is_global) {
- dsa_switch_for_each_port(dp, ds) {
- struct net_device *br = dsa_port_bridge_dev_get(dp);
-
- if (br && br_vlan_enabled(br)) {
- change_vlan_filtering = false;
- break;
- }
- }
- }
-
- if (change_vlan_filtering) {
- err = dsa_port_vlan_filtering(dsa_to_port(ds, info->port),
- vlan_filtering, &extack);
- if (extack._msg)
- dev_err(ds->dev, "port %d: %s\n", info->port,
- extack._msg);
- if (err && err != -EOPNOTSUPP)
- return err;
- }
-
- return 0;
-}
-
static int dsa_switch_bridge_leave(struct dsa_switch *ds,
struct dsa_notifier_bridge_info *info)
{
- struct dsa_switch_tree *dst = ds->dst;
- int err;
-
- if (dst->index == info->tree_index && ds->index == info->sw_index &&
- ds->ops->port_bridge_leave)
- ds->ops->port_bridge_leave(ds, info->port, info->bridge);
+ if (info->dp->ds == ds && ds->ops->port_bridge_leave)
+ ds->ops->port_bridge_leave(ds, info->dp->index, info->bridge);
- if ((dst->index != info->tree_index || ds->index != info->sw_index) &&
- ds->ops->crosschip_bridge_leave)
- ds->ops->crosschip_bridge_leave(ds, info->tree_index,
- info->sw_index, info->port,
+ if (info->dp->ds != ds && ds->ops->crosschip_bridge_leave)
+ ds->ops->crosschip_bridge_leave(ds, info->dp->ds->dst->index,
+ info->dp->ds->index,
+ info->dp->index,
info->bridge);
- if (ds->dst->index == info->tree_index && ds->index == info->sw_index) {
- err = dsa_switch_sync_vlan_filtering(ds, info);
- if (err)
- return err;
- }
-
return 0;
}
@@ -196,16 +124,11 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds,
* emitted and its dedicated CPU port.
*/
static bool dsa_port_host_address_match(struct dsa_port *dp,
- int info_sw_index, int info_port)
+ const struct dsa_port *targeted_dp)
{
- struct dsa_port *targeted_dp, *cpu_dp;
- struct dsa_switch *targeted_ds;
-
- targeted_ds = dsa_switch_find(dp->ds->dst->index, info_sw_index);
- targeted_dp = dsa_to_port(targeted_ds, info_port);
- cpu_dp = targeted_dp->cpu_dp;
+ struct dsa_port *cpu_dp = targeted_dp->cpu_dp;
- if (dsa_switch_is_upstream_of(dp->ds, targeted_ds))
+ if (dsa_switch_is_upstream_of(dp->ds, targeted_dp->ds))
return dp->index == dsa_towards_port(dp->ds, cpu_dp->ds->index,
cpu_dp->index);
@@ -473,8 +396,7 @@ static int dsa_switch_host_fdb_add(struct dsa_switch *ds,
return -EOPNOTSUPP;
dsa_switch_for_each_port(dp, ds) {
- if (dsa_port_host_address_match(dp, info->sw_index,
- info->port)) {
+ if (dsa_port_host_address_match(dp, info->dp)) {
err = dsa_port_do_fdb_add(dp, info->addr, info->vid,
info->db);
if (err)
@@ -495,8 +417,7 @@ static int dsa_switch_host_fdb_del(struct dsa_switch *ds,
return -EOPNOTSUPP;
dsa_switch_for_each_port(dp, ds) {
- if (dsa_port_host_address_match(dp, info->sw_index,
- info->port)) {
+ if (dsa_port_host_address_match(dp, info->dp)) {
err = dsa_port_do_fdb_del(dp, info->addr, info->vid,
info->db);
if (err)
@@ -510,7 +431,7 @@ static int dsa_switch_host_fdb_del(struct dsa_switch *ds,
static int dsa_switch_fdb_add(struct dsa_switch *ds,
struct dsa_notifier_fdb_info *info)
{
- int port = dsa_towards_port(ds, info->sw_index, info->port);
+ int port = dsa_towards_port(ds, info->dp->ds->index, info->dp->index);
struct dsa_port *dp = dsa_to_port(ds, port);
if (!ds->ops->port_fdb_add)
@@ -522,7 +443,7 @@ static int dsa_switch_fdb_add(struct dsa_switch *ds,
static int dsa_switch_fdb_del(struct dsa_switch *ds,
struct dsa_notifier_fdb_info *info)
{
- int port = dsa_towards_port(ds, info->sw_index, info->port);
+ int port = dsa_towards_port(ds, info->dp->ds->index, info->dp->index);
struct dsa_port *dp = dsa_to_port(ds, port);
if (!ds->ops->port_fdb_del)
@@ -570,12 +491,12 @@ static int dsa_switch_lag_fdb_del(struct dsa_switch *ds,
static int dsa_switch_lag_change(struct dsa_switch *ds,
struct dsa_notifier_lag_info *info)
{
- if (ds->index == info->sw_index && ds->ops->port_lag_change)
- return ds->ops->port_lag_change(ds, info->port);
+ if (info->dp->ds == ds && ds->ops->port_lag_change)
+ return ds->ops->port_lag_change(ds, info->dp->index);
- if (ds->index != info->sw_index && ds->ops->crosschip_lag_change)
- return ds->ops->crosschip_lag_change(ds, info->sw_index,
- info->port);
+ if (info->dp->ds != ds && ds->ops->crosschip_lag_change)
+ return ds->ops->crosschip_lag_change(ds, info->dp->ds->index,
+ info->dp->index);
return 0;
}
@@ -583,13 +504,13 @@ static int dsa_switch_lag_change(struct dsa_switch *ds,
static int dsa_switch_lag_join(struct dsa_switch *ds,
struct dsa_notifier_lag_info *info)
{
- if (ds->index == info->sw_index && ds->ops->port_lag_join)
- return ds->ops->port_lag_join(ds, info->port, info->lag,
+ if (info->dp->ds == ds && ds->ops->port_lag_join)
+ return ds->ops->port_lag_join(ds, info->dp->index, info->lag,
info->info);
- if (ds->index != info->sw_index && ds->ops->crosschip_lag_join)
- return ds->ops->crosschip_lag_join(ds, info->sw_index,
- info->port, info->lag,
+ if (info->dp->ds != ds && ds->ops->crosschip_lag_join)
+ return ds->ops->crosschip_lag_join(ds, info->dp->ds->index,
+ info->dp->index, info->lag,
info->info);
return -EOPNOTSUPP;
@@ -598,12 +519,12 @@ static int dsa_switch_lag_join(struct dsa_switch *ds,
static int dsa_switch_lag_leave(struct dsa_switch *ds,
struct dsa_notifier_lag_info *info)
{
- if (ds->index == info->sw_index && ds->ops->port_lag_leave)
- return ds->ops->port_lag_leave(ds, info->port, info->lag);
+ if (info->dp->ds == ds && ds->ops->port_lag_leave)
+ return ds->ops->port_lag_leave(ds, info->dp->index, info->lag);
- if (ds->index != info->sw_index && ds->ops->crosschip_lag_leave)
- return ds->ops->crosschip_lag_leave(ds, info->sw_index,
- info->port, info->lag);
+ if (info->dp->ds != ds && ds->ops->crosschip_lag_leave)
+ return ds->ops->crosschip_lag_leave(ds, info->dp->ds->index,
+ info->dp->index, info->lag);
return -EOPNOTSUPP;
}
@@ -611,7 +532,7 @@ static int dsa_switch_lag_leave(struct dsa_switch *ds,
static int dsa_switch_mdb_add(struct dsa_switch *ds,
struct dsa_notifier_mdb_info *info)
{
- int port = dsa_towards_port(ds, info->sw_index, info->port);
+ int port = dsa_towards_port(ds, info->dp->ds->index, info->dp->index);
struct dsa_port *dp = dsa_to_port(ds, port);
if (!ds->ops->port_mdb_add)
@@ -623,7 +544,7 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds,
static int dsa_switch_mdb_del(struct dsa_switch *ds,
struct dsa_notifier_mdb_info *info)
{
- int port = dsa_towards_port(ds, info->sw_index, info->port);
+ int port = dsa_towards_port(ds, info->dp->ds->index, info->dp->index);
struct dsa_port *dp = dsa_to_port(ds, port);
if (!ds->ops->port_mdb_del)
@@ -642,8 +563,7 @@ static int dsa_switch_host_mdb_add(struct dsa_switch *ds,
return -EOPNOTSUPP;
dsa_switch_for_each_port(dp, ds) {
- if (dsa_port_host_address_match(dp, info->sw_index,
- info->port)) {
+ if (dsa_port_host_address_match(dp, info->dp)) {
err = dsa_port_do_mdb_add(dp, info->mdb, info->db);
if (err)
break;
@@ -663,8 +583,7 @@ static int dsa_switch_host_mdb_del(struct dsa_switch *ds,
return -EOPNOTSUPP;
dsa_switch_for_each_port(dp, ds) {
- if (dsa_port_host_address_match(dp, info->sw_index,
- info->port)) {
+ if (dsa_port_host_address_match(dp, info->dp)) {
err = dsa_port_do_mdb_del(dp, info->mdb, info->db);
if (err)
break;
@@ -678,29 +597,18 @@ static int dsa_switch_host_mdb_del(struct dsa_switch *ds,
static bool dsa_port_vlan_match(struct dsa_port *dp,
struct dsa_notifier_vlan_info *info)
{
- if (dp->ds->index == info->sw_index && dp->index == info->port)
- return true;
-
- if (dsa_port_is_dsa(dp))
- return true;
-
- return false;
+ return dsa_port_is_dsa(dp) || dp == info->dp;
}
/* Host VLANs match on the targeted port's CPU port, and on all DSA ports
* (upstream and downstream) of that switch and its upstream switches.
*/
static bool dsa_port_host_vlan_match(struct dsa_port *dp,
- struct dsa_notifier_vlan_info *info)
+ const struct dsa_port *targeted_dp)
{
- struct dsa_port *targeted_dp, *cpu_dp;
- struct dsa_switch *targeted_ds;
+ struct dsa_port *cpu_dp = targeted_dp->cpu_dp;
- targeted_ds = dsa_switch_find(dp->ds->dst->index, info->sw_index);
- targeted_dp = dsa_to_port(targeted_ds, info->port);
- cpu_dp = targeted_dp->cpu_dp;
-
- if (dsa_switch_is_upstream_of(dp->ds, targeted_ds))
+ if (dsa_switch_is_upstream_of(dp->ds, targeted_dp->ds))
return dsa_port_is_dsa(dp) || dp == cpu_dp;
return false;
@@ -858,7 +766,7 @@ static int dsa_switch_host_vlan_add(struct dsa_switch *ds,
return -EOPNOTSUPP;
dsa_switch_for_each_port(dp, ds) {
- if (dsa_port_host_vlan_match(dp, info)) {
+ if (dsa_port_host_vlan_match(dp, info->dp)) {
err = dsa_port_do_vlan_add(dp, info->vlan,
info->extack);
if (err)
@@ -879,7 +787,7 @@ static int dsa_switch_host_vlan_del(struct dsa_switch *ds,
return -EOPNOTSUPP;
dsa_switch_for_each_port(dp, ds) {
- if (dsa_port_host_vlan_match(dp, info)) {
+ if (dsa_port_host_vlan_match(dp, info->dp)) {
err = dsa_port_do_vlan_del(dp, info->vlan);
if (err)
return err;
@@ -901,14 +809,12 @@ static int dsa_switch_change_tag_proto(struct dsa_switch *ds,
ASSERT_RTNL();
- dsa_switch_for_each_cpu_port(cpu_dp, ds) {
- err = ds->ops->change_tag_protocol(ds, cpu_dp->index,
- tag_ops->proto);
- if (err)
- return err;
+ err = ds->ops->change_tag_protocol(ds, tag_ops->proto);
+ if (err)
+ return err;
+ dsa_switch_for_each_cpu_port(cpu_dp, ds)
dsa_port_set_tag_protocol(cpu_dp, tag_ops);
- }
/* Now that changing the tag protocol can no longer fail, let's update
* the remaining bits which are "duplicated for faster access", and the
diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c
index a786569203f0..01a427800797 100644
--- a/net/dsa/tag_8021q.c
+++ b/net/dsa/tag_8021q.c
@@ -196,15 +196,7 @@ static bool
dsa_port_tag_8021q_vlan_match(struct dsa_port *dp,
struct dsa_notifier_tag_8021q_vlan_info *info)
{
- struct dsa_switch *ds = dp->ds;
-
- if (dsa_port_is_dsa(dp) || dsa_port_is_cpu(dp))
- return true;
-
- if (ds->dst->index == info->tree_index && ds->index == info->sw_index)
- return dp->index == info->port;
-
- return false;
+ return dsa_port_is_dsa(dp) || dsa_port_is_cpu(dp) || dp == info->dp;
}
int dsa_switch_tag_8021q_vlan_add(struct dsa_switch *ds,
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index ebcc812735a4..62b89d6f54fd 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -391,7 +391,7 @@ EXPORT_SYMBOL(ether_setup);
struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs,
unsigned int rxqs)
{
- return alloc_netdev_mqs(sizeof_priv, "eth%d", NET_NAME_UNKNOWN,
+ return alloc_netdev_mqs(sizeof_priv, "eth%d", NET_NAME_ENUM,
ether_setup, txqs, rxqs);
}
EXPORT_SYMBOL(alloc_etherdev_mqs);
diff --git a/net/ethtool/common.c b/net/ethtool/common.c
index 0c5210015911..566adf85e658 100644
--- a/net/ethtool/common.c
+++ b/net/ethtool/common.c
@@ -201,6 +201,7 @@ const char link_mode_names[][ETH_GSTRING_LEN] = {
__DEFINE_LINK_MODE_NAME(400000, CR4, Full),
__DEFINE_LINK_MODE_NAME(100, FX, Half),
__DEFINE_LINK_MODE_NAME(100, FX, Full),
+ __DEFINE_LINK_MODE_NAME(10, T1L, Full),
};
static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS);
@@ -236,6 +237,7 @@ static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS);
#define __LINK_MODE_LANES_T1 1
#define __LINK_MODE_LANES_X 1
#define __LINK_MODE_LANES_FX 1
+#define __LINK_MODE_LANES_T1L 1
#define __DEFINE_LINK_MODE_PARAMS(_speed, _type, _duplex) \
[ETHTOOL_LINK_MODE(_speed, _type, _duplex)] = { \
@@ -349,6 +351,7 @@ const struct link_mode_info link_mode_params[] = {
__DEFINE_LINK_MODE_PARAMS(400000, CR4, Full),
__DEFINE_LINK_MODE_PARAMS(100, FX, Half),
__DEFINE_LINK_MODE_PARAMS(100, FX, Full),
+ __DEFINE_LINK_MODE_PARAMS(10, T1L, Full),
};
static_assert(ARRAY_SIZE(link_mode_params) == __ETHTOOL_LINK_MODE_MASK_NBITS);
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index 29d01662a48b..7919ddb2371c 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -363,7 +363,7 @@ extern const struct nla_policy ethnl_features_set_policy[ETHTOOL_A_FEATURES_WANT
extern const struct nla_policy ethnl_privflags_get_policy[ETHTOOL_A_PRIVFLAGS_HEADER + 1];
extern const struct nla_policy ethnl_privflags_set_policy[ETHTOOL_A_PRIVFLAGS_FLAGS + 1];
extern const struct nla_policy ethnl_rings_get_policy[ETHTOOL_A_RINGS_HEADER + 1];
-extern const struct nla_policy ethnl_rings_set_policy[ETHTOOL_A_RINGS_CQE_SIZE + 1];
+extern const struct nla_policy ethnl_rings_set_policy[ETHTOOL_A_RINGS_TX_PUSH + 1];
extern const struct nla_policy ethnl_channels_get_policy[ETHTOOL_A_CHANNELS_HEADER + 1];
extern const struct nla_policy ethnl_channels_set_policy[ETHTOOL_A_CHANNELS_COMBINED_COUNT + 1];
extern const struct nla_policy ethnl_coalesce_get_policy[ETHTOOL_A_COALESCE_HEADER + 1];
diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c
index 9f33c9689b56..fa3ec8d438f7 100644
--- a/net/ethtool/rings.c
+++ b/net/ethtool/rings.c
@@ -55,7 +55,8 @@ static int rings_reply_size(const struct ethnl_req_info *req_base,
nla_total_size(sizeof(u32)) + /* _RINGS_TX */
nla_total_size(sizeof(u32)) + /* _RINGS_RX_BUF_LEN */
nla_total_size(sizeof(u8)) + /* _RINGS_TCP_DATA_SPLIT */
- nla_total_size(sizeof(u32)); /* _RINGS_CQE_SIZE */
+ nla_total_size(sizeof(u32) + /* _RINGS_CQE_SIZE */
+ nla_total_size(sizeof(u8))); /* _RINGS_TX_PUSH */
}
static int rings_fill_reply(struct sk_buff *skb,
@@ -94,7 +95,8 @@ static int rings_fill_reply(struct sk_buff *skb,
(nla_put_u8(skb, ETHTOOL_A_RINGS_TCP_DATA_SPLIT,
kr->tcp_data_split))) ||
(kr->cqe_size &&
- (nla_put_u32(skb, ETHTOOL_A_RINGS_CQE_SIZE, kr->cqe_size))))
+ (nla_put_u32(skb, ETHTOOL_A_RINGS_CQE_SIZE, kr->cqe_size))) ||
+ nla_put_u8(skb, ETHTOOL_A_RINGS_TX_PUSH, !!kr->tx_push))
return -EMSGSIZE;
return 0;
@@ -123,6 +125,7 @@ const struct nla_policy ethnl_rings_set_policy[] = {
[ETHTOOL_A_RINGS_TX] = { .type = NLA_U32 },
[ETHTOOL_A_RINGS_RX_BUF_LEN] = NLA_POLICY_MIN(NLA_U32, 1),
[ETHTOOL_A_RINGS_CQE_SIZE] = NLA_POLICY_MIN(NLA_U32, 1),
+ [ETHTOOL_A_RINGS_TX_PUSH] = NLA_POLICY_MAX(NLA_U8, 1),
};
int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info)
@@ -149,6 +152,33 @@ int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info)
if (!ops->get_ringparam || !ops->set_ringparam)
goto out_dev;
+ if (tb[ETHTOOL_A_RINGS_RX_BUF_LEN] &&
+ !(ops->supported_ring_params & ETHTOOL_RING_USE_RX_BUF_LEN)) {
+ ret = -EOPNOTSUPP;
+ NL_SET_ERR_MSG_ATTR(info->extack,
+ tb[ETHTOOL_A_RINGS_RX_BUF_LEN],
+ "setting rx buf len not supported");
+ goto out_dev;
+ }
+
+ if (tb[ETHTOOL_A_RINGS_CQE_SIZE] &&
+ !(ops->supported_ring_params & ETHTOOL_RING_USE_CQE_SIZE)) {
+ ret = -EOPNOTSUPP;
+ NL_SET_ERR_MSG_ATTR(info->extack,
+ tb[ETHTOOL_A_RINGS_CQE_SIZE],
+ "setting cqe size not supported");
+ goto out_dev;
+ }
+
+ if (tb[ETHTOOL_A_RINGS_TX_PUSH] &&
+ !(ops->supported_ring_params & ETHTOOL_RING_USE_TX_PUSH)) {
+ ret = -EOPNOTSUPP;
+ NL_SET_ERR_MSG_ATTR(info->extack,
+ tb[ETHTOOL_A_RINGS_TX_PUSH],
+ "setting tx push not supported");
+ goto out_dev;
+ }
+
rtnl_lock();
ret = ethnl_ops_begin(dev);
if (ret < 0)
@@ -165,6 +195,8 @@ int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info)
tb[ETHTOOL_A_RINGS_RX_BUF_LEN], &mod);
ethnl_update_u32(&kernel_ringparam.cqe_size,
tb[ETHTOOL_A_RINGS_CQE_SIZE], &mod);
+ ethnl_update_u8(&kernel_ringparam.tx_push,
+ tb[ETHTOOL_A_RINGS_TX_PUSH], &mod);
ret = 0;
if (!mod)
goto out_ops;
@@ -187,24 +219,6 @@ int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info)
goto out_ops;
}
- if (kernel_ringparam.rx_buf_len != 0 &&
- !(ops->supported_ring_params & ETHTOOL_RING_USE_RX_BUF_LEN)) {
- ret = -EOPNOTSUPP;
- NL_SET_ERR_MSG_ATTR(info->extack,
- tb[ETHTOOL_A_RINGS_RX_BUF_LEN],
- "setting rx buf len not supported");
- goto out_ops;
- }
-
- if (kernel_ringparam.cqe_size &&
- !(ops->supported_ring_params & ETHTOOL_RING_USE_CQE_SIZE)) {
- ret = -EOPNOTSUPP;
- NL_SET_ERR_MSG_ATTR(info->extack,
- tb[ETHTOOL_A_RINGS_CQE_SIZE],
- "setting cqe size not supported");
- goto out_ops;
- }
-
ret = dev->ethtool_ops->set_ringparam(dev, &ringparam,
&kernel_ringparam, info->extack);
if (ret < 0)
diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
index 3b2366a88c3c..718fb77bb372 100644
--- a/net/ieee802154/socket.c
+++ b/net/ieee802154/socket.c
@@ -308,13 +308,13 @@ out:
}
static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int noblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
size_t copied = 0;
int err = -EOPNOTSUPP;
struct sk_buff *skb;
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (!skb)
goto out;
@@ -328,7 +328,7 @@ static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
if (err)
goto done;
- sock_recv_ts_and_drops(msg, sk, skb);
+ sock_recv_cmsgs(msg, sk, skb);
if (flags & MSG_TRUNC)
copied = skb->len;
@@ -695,7 +695,7 @@ out:
}
static int dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int noblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
size_t copied = 0;
int err = -EOPNOTSUPP;
@@ -703,7 +703,7 @@ static int dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
struct dgram_sock *ro = dgram_sk(sk);
DECLARE_SOCKADDR(struct sockaddr_ieee802154 *, saddr, msg->msg_name);
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (!skb)
goto out;
@@ -718,7 +718,7 @@ static int dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
if (err)
goto done;
- sock_recv_ts_and_drops(msg, sk, skb);
+ sock_recv_cmsgs(msg, sk, skb);
if (saddr) {
/* Clear the implicit padding in struct sockaddr_ieee802154
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 87983e70f03f..e983bb0c5012 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -321,7 +321,6 @@ config NET_UDP_TUNNEL
config NET_FOU
tristate "IP: Foo (IP protocols) over UDP"
- select XFRM
select NET_UDP_TUNNEL
help
Foo over UDP allows any IP protocol to be directly encapsulated
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 72fde2888ad2..93da9f783bec 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -836,7 +836,7 @@ ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
EXPORT_SYMBOL(inet_sendpage);
INDIRECT_CALLABLE_DECLARE(int udp_recvmsg(struct sock *, struct msghdr *,
- size_t, int, int, int *));
+ size_t, int, int *));
int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int flags)
{
@@ -848,8 +848,7 @@ int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
sock_rps_record_flow(sk);
err = INDIRECT_CALL_2(sk->sk_prot->recvmsg, tcp_recvmsg, udp_recvmsg,
- sk, msg, size, flags & MSG_DONTWAIT,
- flags & ~MSG_DONTWAIT, &addr_len);
+ sk, msg, size, flags, &addr_len);
if (err >= 0)
msg->msg_namelen = addr_len;
return err;
@@ -1234,9 +1233,9 @@ static int inet_sk_reselect_saddr(struct sock *sk)
/* Query new route. */
fl4 = &inet->cork.fl.u.ip4;
- rt = ip_route_connect(fl4, daddr, 0, RT_CONN_FLAGS(sk),
- sk->sk_bound_dev_if, sk->sk_protocol,
- inet->inet_sport, inet->inet_dport, sk);
+ rt = ip_route_connect(fl4, daddr, 0, sk->sk_bound_dev_if,
+ sk->sk_protocol, inet->inet_sport,
+ inet->inet_dport, sk);
if (IS_ERR(rt))
return PTR_ERR(rt);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 2d0c05ca9c6f..ab4a5601c82a 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1304,9 +1304,9 @@ static struct packet_type arp_packet_type __read_mostly = {
.func = arp_rcv,
};
+#ifdef CONFIG_PROC_FS
#if IS_ENABLED(CONFIG_AX25)
-/* ------------------------------------------------------------------------ */
/*
* ax25 -> ASCII conversion
*/
@@ -1412,16 +1412,13 @@ static void *arp_seq_start(struct seq_file *seq, loff_t *pos)
return neigh_seq_start(seq, pos, &arp_tbl, NEIGH_SEQ_SKIP_NOARP);
}
-/* ------------------------------------------------------------------------ */
-
static const struct seq_operations arp_seq_ops = {
.start = arp_seq_start,
.next = neigh_seq_next,
.stop = neigh_seq_stop,
.show = arp_seq_show,
};
-
-/* ------------------------------------------------------------------------ */
+#endif /* CONFIG_PROC_FS */
static int __net_init arp_net_init(struct net *net)
{
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 48f337ccf949..ffd57523331f 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -44,10 +44,9 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
saddr = inet->mc_addr;
}
fl4 = &inet->cork.fl.u.ip4;
- rt = ip_route_connect(fl4, usin->sin_addr.s_addr, saddr,
- RT_CONN_FLAGS(sk), oif,
- sk->sk_protocol,
- inet->inet_sport, usin->sin_port, sk);
+ rt = ip_route_connect(fl4, usin->sin_addr.s_addr, saddr, oif,
+ sk->sk_protocol, inet->inet_sport,
+ usin->sin_port, sk);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
if (err == -ENETUNREACH)
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 3d6d33ac20cc..b2366ad540e6 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -2571,7 +2571,7 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name,
struct devinet_sysctl_table *t;
char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
- t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
+ t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL_ACCOUNT);
if (!t)
goto out;
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index d747166bb291..b21238df3301 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -705,7 +705,6 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
static inline int esp_remove_trailer(struct sk_buff *skb)
{
struct xfrm_state *x = xfrm_input_state(skb);
- struct xfrm_offload *xo = xfrm_offload(skb);
struct crypto_aead *aead = x->data;
int alen, hlen, elen;
int padlen, trimlen;
@@ -717,11 +716,6 @@ static inline int esp_remove_trailer(struct sk_buff *skb)
hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
elen = skb->len - hlen;
- if (xo && (xo->flags & XFRM_ESP_NO_TRAILER)) {
- ret = xo->proto;
- goto out;
- }
-
if (skb_copy_bits(skb, skb->len - alen - 2, nexthdr, 2))
BUG();
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index af8209f912ab..f361d3d56be2 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1384,7 +1384,7 @@ static void nl_fib_input(struct sk_buff *skb)
return;
nlh = nlmsg_hdr(skb);
- frn = (struct fib_result_nl *) nlmsg_data(nlh);
+ frn = nlmsg_data(nlh);
nl_fib_lookup(net, frn);
portid = NETLINK_CB(skb).portid; /* netlink portid */
@@ -1425,7 +1425,7 @@ static void fib_disable_ip(struct net_device *dev, unsigned long event,
static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
{
- struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
+ struct in_ifaddr *ifa = ptr;
struct net_device *dev = ifa->ifa_dev->dev;
struct net *net = dev_net(dev);
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 001fea394bde..513f475c6a53 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -145,7 +145,7 @@ INDIRECT_CALLABLE_SCOPE bool fib4_rule_suppress(struct fib_rule *rule,
int flags,
struct fib_lookup_arg *arg)
{
- struct fib_result *result = (struct fib_result *) arg->result;
+ struct fib_result *result = arg->result;
struct net_device *dev = NULL;
if (result->fi) {
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index ccb62038f6a4..a57ba23571c9 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -524,7 +524,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
fri.tb_id = tb_id;
fri.dst = key;
fri.dst_len = dst_len;
- fri.tos = inet_dscp_to_dsfield(fa->fa_dscp);
+ fri.dscp = fa->fa_dscp;
fri.type = fa->fa_type;
fri.offload = READ_ONCE(fa->offload);
fri.trap = READ_ONCE(fa->trap);
@@ -1781,7 +1781,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
rtm->rtm_family = AF_INET;
rtm->rtm_dst_len = fri->dst_len;
rtm->rtm_src_len = 0;
- rtm->rtm_tos = fri->tos;
+ rtm->rtm_tos = inet_dscp_to_dsfield(fri->dscp);
if (tb_id < 256)
rtm->rtm_table = tb_id;
else
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index fb0e49c36c2e..2734c3af7e24 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -82,7 +82,7 @@ static int call_fib_entry_notifier(struct notifier_block *nb,
.dst = dst,
.dst_len = dst_len,
.fi = fa->fa_info,
- .tos = inet_dscp_to_dsfield(fa->fa_dscp),
+ .dscp = fa->fa_dscp,
.type = fa->fa_type,
.tb_id = fa->tb_id,
};
@@ -99,7 +99,7 @@ static int call_fib_entry_notifiers(struct net *net,
.dst = dst,
.dst_len = dst_len,
.fi = fa->fa_info,
- .tos = inet_dscp_to_dsfield(fa->fa_dscp),
+ .dscp = fa->fa_dscp,
.type = fa->fa_type,
.tb_id = fa->tb_id,
};
@@ -1032,8 +1032,8 @@ fib_find_matching_alias(struct net *net, const struct fib_rt_info *fri)
hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
if (fa->fa_slen == slen && fa->tb_id == fri->tb_id &&
- fa->fa_dscp == inet_dsfield_to_dscp(fri->tos) &&
- fa->fa_info == fri->fi && fa->fa_type == fri->type)
+ fa->fa_dscp == fri->dscp && fa->fa_info == fri->fi &&
+ fa->fa_type == fri->type)
return fa;
}
@@ -2305,7 +2305,7 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
fri.tb_id = tb->tb_id;
fri.dst = xkey;
fri.dst_len = KEYLENGTH - fa->fa_slen;
- fri.tos = inet_dscp_to_dsfield(fa->fa_dscp);
+ fri.dscp = fa->fa_dscp;
fri.type = fa->fa_type;
fri.offload = READ_ONCE(fa->offload);
fri.trap = READ_ONCE(fa->trap);
@@ -2625,7 +2625,7 @@ static void fib_table_print(struct seq_file *seq, struct fib_table *tb)
static int fib_triestat_seq_show(struct seq_file *seq, void *v)
{
- struct net *net = (struct net *)seq->private;
+ struct net *net = seq->private;
unsigned int h;
seq_printf(seq,
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 0d085cc8d96c..025a33c1b04d 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -16,7 +16,6 @@
#include <net/protocol.h>
#include <net/udp.h>
#include <net/udp_tunnel.h>
-#include <net/xfrm.h>
#include <uapi/linux/fou.h>
#include <uapi/linux/genetlink.h>
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 72a375c7f417..efea0e796f06 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -186,7 +186,7 @@ EXPORT_SYMBOL(icmp_err_convert);
*/
struct icmp_control {
- bool (*handler)(struct sk_buff *skb);
+ enum skb_drop_reason (*handler)(struct sk_buff *skb);
short error; /* This ICMP is classed as an error message */
};
@@ -342,7 +342,7 @@ void icmp_out_count(struct net *net, unsigned char type)
static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd,
struct sk_buff *skb)
{
- struct icmp_bxm *icmp_param = (struct icmp_bxm *)from;
+ struct icmp_bxm *icmp_param = from;
__wsum csum;
csum = skb_copy_and_csum_bits(icmp_param->skb,
@@ -839,8 +839,9 @@ static bool icmp_tag_validation(int proto)
* ICMP_PARAMETERPROB.
*/
-static bool icmp_unreach(struct sk_buff *skb)
+static enum skb_drop_reason icmp_unreach(struct sk_buff *skb)
{
+ enum skb_drop_reason reason = SKB_NOT_DROPPED_YET;
const struct iphdr *iph;
struct icmphdr *icmph;
struct net *net;
@@ -860,8 +861,10 @@ static bool icmp_unreach(struct sk_buff *skb)
icmph = icmp_hdr(skb);
iph = (const struct iphdr *)skb->data;
- if (iph->ihl < 5) /* Mangled header, drop. */
+ if (iph->ihl < 5) { /* Mangled header, drop. */
+ reason = SKB_DROP_REASON_IP_INHDR;
goto out_err;
+ }
switch (icmph->type) {
case ICMP_DEST_UNREACH:
@@ -941,10 +944,10 @@ static bool icmp_unreach(struct sk_buff *skb)
icmp_socket_deliver(skb, info);
out:
- return true;
+ return reason;
out_err:
__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
- return false;
+ return reason ?: SKB_DROP_REASON_NOT_SPECIFIED;
}
@@ -952,20 +955,20 @@ out_err:
* Handle ICMP_REDIRECT.
*/
-static bool icmp_redirect(struct sk_buff *skb)
+static enum skb_drop_reason icmp_redirect(struct sk_buff *skb)
{
if (skb->len < sizeof(struct iphdr)) {
__ICMP_INC_STATS(dev_net(skb->dev), ICMP_MIB_INERRORS);
- return false;
+ return SKB_DROP_REASON_PKT_TOO_SMALL;
}
if (!pskb_may_pull(skb, sizeof(struct iphdr))) {
/* there aught to be a stat */
- return false;
+ return SKB_DROP_REASON_NOMEM;
}
icmp_socket_deliver(skb, ntohl(icmp_hdr(skb)->un.gateway));
- return true;
+ return SKB_NOT_DROPPED_YET;
}
/*
@@ -982,7 +985,7 @@ static bool icmp_redirect(struct sk_buff *skb)
* See also WRT handling of options once they are done and working.
*/
-static bool icmp_echo(struct sk_buff *skb)
+static enum skb_drop_reason icmp_echo(struct sk_buff *skb)
{
struct icmp_bxm icmp_param;
struct net *net;
@@ -990,7 +993,7 @@ static bool icmp_echo(struct sk_buff *skb)
net = dev_net(skb_dst(skb)->dev);
/* should there be an ICMP stat for ignored echos? */
if (net->ipv4.sysctl_icmp_echo_ignore_all)
- return true;
+ return SKB_NOT_DROPPED_YET;
icmp_param.data.icmph = *icmp_hdr(skb);
icmp_param.skb = skb;
@@ -1001,10 +1004,10 @@ static bool icmp_echo(struct sk_buff *skb)
if (icmp_param.data.icmph.type == ICMP_ECHO)
icmp_param.data.icmph.type = ICMP_ECHOREPLY;
else if (!icmp_build_probe(skb, &icmp_param.data.icmph))
- return true;
+ return SKB_NOT_DROPPED_YET;
icmp_reply(&icmp_param, skb);
- return true;
+ return SKB_NOT_DROPPED_YET;
}
/* Helper for icmp_echo and icmpv6_echo_reply.
@@ -1122,7 +1125,7 @@ EXPORT_SYMBOL_GPL(icmp_build_probe);
* MUST be accurate to a few minutes.
* MUST be updated at least at 15Hz.
*/
-static bool icmp_timestamp(struct sk_buff *skb)
+static enum skb_drop_reason icmp_timestamp(struct sk_buff *skb)
{
struct icmp_bxm icmp_param;
/*
@@ -1147,17 +1150,17 @@ static bool icmp_timestamp(struct sk_buff *skb)
icmp_param.data_len = 0;
icmp_param.head_len = sizeof(struct icmphdr) + 12;
icmp_reply(&icmp_param, skb);
- return true;
+ return SKB_NOT_DROPPED_YET;
out_err:
__ICMP_INC_STATS(dev_net(skb_dst(skb)->dev), ICMP_MIB_INERRORS);
- return false;
+ return SKB_DROP_REASON_PKT_TOO_SMALL;
}
-static bool icmp_discard(struct sk_buff *skb)
+static enum skb_drop_reason icmp_discard(struct sk_buff *skb)
{
/* pretend it was a success */
- return true;
+ return SKB_NOT_DROPPED_YET;
}
/*
@@ -1165,18 +1168,20 @@ static bool icmp_discard(struct sk_buff *skb)
*/
int icmp_rcv(struct sk_buff *skb)
{
- struct icmphdr *icmph;
+ enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
struct rtable *rt = skb_rtable(skb);
struct net *net = dev_net(rt->dst.dev);
- bool success;
+ struct icmphdr *icmph;
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
struct sec_path *sp = skb_sec_path(skb);
int nh;
if (!(sp && sp->xvec[sp->len - 1]->props.flags &
- XFRM_STATE_ICMP))
+ XFRM_STATE_ICMP)) {
+ reason = SKB_DROP_REASON_XFRM_POLICY;
goto drop;
+ }
if (!pskb_may_pull(skb, sizeof(*icmph) + sizeof(struct iphdr)))
goto drop;
@@ -1184,8 +1189,11 @@ int icmp_rcv(struct sk_buff *skb)
nh = skb_network_offset(skb);
skb_set_network_header(skb, sizeof(*icmph));
- if (!xfrm4_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
+ if (!xfrm4_policy_check_reverse(NULL, XFRM_POLICY_IN,
+ skb)) {
+ reason = SKB_DROP_REASON_XFRM_POLICY;
goto drop;
+ }
skb_set_network_header(skb, nh);
}
@@ -1207,13 +1215,13 @@ int icmp_rcv(struct sk_buff *skb)
/* We can't use icmp_pointers[].handler() because it is an array of
* size NR_ICMP_TYPES + 1 (19 elements) and PROBE has code 42.
*/
- success = icmp_echo(skb);
- goto success_check;
+ reason = icmp_echo(skb);
+ goto reason_check;
}
if (icmph->type == ICMP_EXT_ECHOREPLY) {
- success = ping_rcv(skb);
- goto success_check;
+ reason = ping_rcv(skb);
+ goto reason_check;
}
/*
@@ -1222,8 +1230,10 @@ int icmp_rcv(struct sk_buff *skb)
* RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently
* discarded.
*/
- if (icmph->type > NR_ICMP_TYPES)
+ if (icmph->type > NR_ICMP_TYPES) {
+ reason = SKB_DROP_REASON_UNHANDLED_PROTO;
goto error;
+ }
/*
* Parse the ICMP message
@@ -1239,27 +1249,30 @@ int icmp_rcv(struct sk_buff *skb)
if ((icmph->type == ICMP_ECHO ||
icmph->type == ICMP_TIMESTAMP) &&
net->ipv4.sysctl_icmp_echo_ignore_broadcasts) {
+ reason = SKB_DROP_REASON_INVALID_PROTO;
goto error;
}
if (icmph->type != ICMP_ECHO &&
icmph->type != ICMP_TIMESTAMP &&
icmph->type != ICMP_ADDRESS &&
icmph->type != ICMP_ADDRESSREPLY) {
+ reason = SKB_DROP_REASON_INVALID_PROTO;
goto error;
}
}
- success = icmp_pointers[icmph->type].handler(skb);
-success_check:
- if (success) {
+ reason = icmp_pointers[icmph->type].handler(skb);
+reason_check:
+ if (!reason) {
consume_skb(skb);
return NET_RX_SUCCESS;
}
drop:
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
return NET_RX_DROP;
csum_error:
+ reason = SKB_DROP_REASON_ICMP_CSUM;
__ICMP_INC_STATS(net, ICMP_MIB_CSUMERRORS);
error:
__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 1d9e6d5e9a76..b65d074d9620 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2839,7 +2839,7 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v)
seq_puts(seq,
"Idx\tDevice : Count Querier\tGroup Users Timer\tReporter\n");
else {
- struct ip_mc_list *im = (struct ip_mc_list *)v;
+ struct ip_mc_list *im = v;
struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq);
char *querier;
long delta;
@@ -2983,7 +2983,7 @@ static void igmp_mcf_seq_stop(struct seq_file *seq, void *v)
static int igmp_mcf_seq_show(struct seq_file *seq, void *v)
{
- struct ip_sf_list *psf = (struct ip_sf_list *)v;
+ struct ip_sf_list *psf = v;
struct igmp_mcf_iter_state *state = igmp_mcf_seq_private(seq);
if (v == SEQ_START_TOKEN) {
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 1e5b53c2bb26..c0b7e6c21360 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -117,6 +117,32 @@ bool inet_rcv_saddr_any(const struct sock *sk)
return !sk->sk_rcv_saddr;
}
+static bool use_bhash2_on_bind(const struct sock *sk)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ int addr_type;
+
+ if (sk->sk_family == AF_INET6) {
+ addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
+ return addr_type != IPV6_ADDR_ANY &&
+ addr_type != IPV6_ADDR_MAPPED;
+ }
+#endif
+ return sk->sk_rcv_saddr != htonl(INADDR_ANY);
+}
+
+static u32 get_bhash2_nulladdr_hash(const struct sock *sk, struct net *net,
+ int port)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ struct in6_addr nulladdr = {};
+
+ if (sk->sk_family == AF_INET6)
+ return ipv6_portaddr_hash(net, &nulladdr, port);
+#endif
+ return ipv4_portaddr_hash(net, 0, port);
+}
+
void inet_get_local_port_range(struct net *net, int *low, int *high)
{
unsigned int seq;
@@ -130,16 +156,71 @@ void inet_get_local_port_range(struct net *net, int *low, int *high)
}
EXPORT_SYMBOL(inet_get_local_port_range);
-static int inet_csk_bind_conflict(const struct sock *sk,
- const struct inet_bind_bucket *tb,
- bool relax, bool reuseport_ok)
+static bool bind_conflict_exist(const struct sock *sk, struct sock *sk2,
+ kuid_t sk_uid, bool relax,
+ bool reuseport_cb_ok, bool reuseport_ok)
+{
+ int bound_dev_if2;
+
+ if (sk == sk2)
+ return false;
+
+ bound_dev_if2 = READ_ONCE(sk2->sk_bound_dev_if);
+
+ if (!sk->sk_bound_dev_if || !bound_dev_if2 ||
+ sk->sk_bound_dev_if == bound_dev_if2) {
+ if (sk->sk_reuse && sk2->sk_reuse &&
+ sk2->sk_state != TCP_LISTEN) {
+ if (!relax || (!reuseport_ok && sk->sk_reuseport &&
+ sk2->sk_reuseport && reuseport_cb_ok &&
+ (sk2->sk_state == TCP_TIME_WAIT ||
+ uid_eq(sk_uid, sock_i_uid(sk2)))))
+ return true;
+ } else if (!reuseport_ok || !sk->sk_reuseport ||
+ !sk2->sk_reuseport || !reuseport_cb_ok ||
+ (sk2->sk_state != TCP_TIME_WAIT &&
+ !uid_eq(sk_uid, sock_i_uid(sk2)))) {
+ return true;
+ }
+ }
+ return false;
+}
+
+static bool check_bhash2_conflict(const struct sock *sk,
+ struct inet_bind2_bucket *tb2, kuid_t sk_uid,
+ bool relax, bool reuseport_cb_ok,
+ bool reuseport_ok)
{
struct sock *sk2;
- bool reuseport_cb_ok;
- bool reuse = sk->sk_reuse;
- bool reuseport = !!sk->sk_reuseport;
- struct sock_reuseport *reuseport_cb;
+
+ sk_for_each_bound_bhash2(sk2, &tb2->owners) {
+ if (sk->sk_family == AF_INET && ipv6_only_sock(sk2))
+ continue;
+
+ if (bind_conflict_exist(sk, sk2, sk_uid, relax,
+ reuseport_cb_ok, reuseport_ok))
+ return true;
+ }
+ return false;
+}
+
+/* This should be called only when the corresponding inet_bind_bucket spinlock
+ * is held
+ */
+static int inet_csk_bind_conflict(const struct sock *sk, int port,
+ struct inet_bind_bucket *tb,
+ struct inet_bind2_bucket *tb2, /* may be null */
+ bool relax, bool reuseport_ok)
+{
+ struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
kuid_t uid = sock_i_uid((struct sock *)sk);
+ struct sock_reuseport *reuseport_cb;
+ struct inet_bind2_hashbucket *head2;
+ bool reuseport_cb_ok;
+ struct sock *sk2;
+ struct net *net;
+ int l3mdev;
+ u32 hash;
rcu_read_lock();
reuseport_cb = rcu_dereference(sk->sk_reuseport_cb);
@@ -150,36 +231,42 @@ static int inet_csk_bind_conflict(const struct sock *sk,
/*
* Unlike other sk lookup places we do not check
* for sk_net here, since _all_ the socks listed
- * in tb->owners list belong to the same net - the
- * one this bucket belongs to.
+ * in tb->owners and tb2->owners list belong
+ * to the same net
*/
- sk_for_each_bound(sk2, &tb->owners) {
- if (sk != sk2 &&
- (!sk->sk_bound_dev_if ||
- !sk2->sk_bound_dev_if ||
- sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
- if (reuse && sk2->sk_reuse &&
- sk2->sk_state != TCP_LISTEN) {
- if ((!relax ||
- (!reuseport_ok &&
- reuseport && sk2->sk_reuseport &&
- reuseport_cb_ok &&
- (sk2->sk_state == TCP_TIME_WAIT ||
- uid_eq(uid, sock_i_uid(sk2))))) &&
- inet_rcv_saddr_equal(sk, sk2, true))
- break;
- } else if (!reuseport_ok ||
- !reuseport || !sk2->sk_reuseport ||
- !reuseport_cb_ok ||
- (sk2->sk_state != TCP_TIME_WAIT &&
- !uid_eq(uid, sock_i_uid(sk2)))) {
- if (inet_rcv_saddr_equal(sk, sk2, true))
- break;
- }
- }
+ if (!use_bhash2_on_bind(sk)) {
+ sk_for_each_bound(sk2, &tb->owners)
+ if (bind_conflict_exist(sk, sk2, uid, relax,
+ reuseport_cb_ok, reuseport_ok) &&
+ inet_rcv_saddr_equal(sk, sk2, true))
+ return true;
+
+ return false;
}
- return sk2 != NULL;
+
+ if (tb2 && check_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok,
+ reuseport_ok))
+ return true;
+
+ net = sock_net(sk);
+
+ /* check there's no conflict with an existing IPV6_ADDR_ANY (if ipv6) or
+ * INADDR_ANY (if ipv4) socket.
+ */
+ hash = get_bhash2_nulladdr_hash(sk, net, port);
+ head2 = &hinfo->bhash2[hash & (hinfo->bhash_size - 1)];
+
+ l3mdev = inet_sk_bound_l3mdev(sk);
+ inet_bind_bucket_for_each(tb2, &head2->chain)
+ if (check_bind2_bucket_match_nulladdr(tb2, net, port, l3mdev, sk))
+ break;
+
+ if (tb2 && check_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok,
+ reuseport_ok))
+ return true;
+
+ return false;
}
/*
@@ -187,16 +274,20 @@ static int inet_csk_bind_conflict(const struct sock *sk,
* inet_bind_hashbucket lock held.
*/
static struct inet_bind_hashbucket *
-inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret, int *port_ret)
+inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret,
+ struct inet_bind2_bucket **tb2_ret,
+ struct inet_bind2_hashbucket **head2_ret, int *port_ret)
{
struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
- int port = 0;
+ struct inet_bind2_hashbucket *head2;
struct inet_bind_hashbucket *head;
struct net *net = sock_net(sk);
- bool relax = false;
int i, low, high, attempt_half;
+ struct inet_bind2_bucket *tb2;
struct inet_bind_bucket *tb;
u32 remaining, offset;
+ bool relax = false;
+ int port = 0;
int l3mdev;
l3mdev = inet_sk_bound_l3mdev(sk);
@@ -235,10 +326,12 @@ other_parity_scan:
head = &hinfo->bhash[inet_bhashfn(net, port,
hinfo->bhash_size)];
spin_lock_bh(&head->lock);
+ tb2 = inet_bind2_bucket_find(hinfo, net, port, l3mdev, sk,
+ &head2);
inet_bind_bucket_for_each(tb, &head->chain)
- if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
- tb->port == port) {
- if (!inet_csk_bind_conflict(sk, tb, relax, false))
+ if (check_bind_bucket_match(tb, net, port, l3mdev)) {
+ if (!inet_csk_bind_conflict(sk, port, tb, tb2,
+ relax, false))
goto success;
goto next_port;
}
@@ -268,6 +361,8 @@ next_port:
success:
*port_ret = port;
*tb_ret = tb;
+ *tb2_ret = tb2;
+ *head2_ret = head2;
return head;
}
@@ -363,54 +458,81 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
{
bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
- int ret = 1, port = snum;
+ bool bhash_created = false, bhash2_created = false;
+ struct inet_bind2_bucket *tb2 = NULL;
+ struct inet_bind2_hashbucket *head2;
+ struct inet_bind_bucket *tb = NULL;
struct inet_bind_hashbucket *head;
struct net *net = sock_net(sk);
- struct inet_bind_bucket *tb = NULL;
+ int ret = 1, port = snum;
+ bool found_port = false;
int l3mdev;
l3mdev = inet_sk_bound_l3mdev(sk);
if (!port) {
- head = inet_csk_find_open_port(sk, &tb, &port);
+ head = inet_csk_find_open_port(sk, &tb, &tb2, &head2, &port);
if (!head)
return ret;
+ if (tb && tb2)
+ goto success;
+ found_port = true;
+ } else {
+ head = &hinfo->bhash[inet_bhashfn(net, port,
+ hinfo->bhash_size)];
+ spin_lock_bh(&head->lock);
+ inet_bind_bucket_for_each(tb, &head->chain)
+ if (check_bind_bucket_match(tb, net, port, l3mdev))
+ break;
+
+ tb2 = inet_bind2_bucket_find(hinfo, net, port, l3mdev, sk,
+ &head2);
+ }
+
+ if (!tb) {
+ tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, net,
+ head, port, l3mdev);
if (!tb)
- goto tb_not_found;
- goto success;
+ goto fail_unlock;
+ bhash_created = true;
}
- head = &hinfo->bhash[inet_bhashfn(net, port,
- hinfo->bhash_size)];
- spin_lock_bh(&head->lock);
- inet_bind_bucket_for_each(tb, &head->chain)
- if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
- tb->port == port)
- goto tb_found;
-tb_not_found:
- tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
- net, head, port, l3mdev);
- if (!tb)
- goto fail_unlock;
-tb_found:
- if (!hlist_empty(&tb->owners)) {
+
+ if (!tb2) {
+ tb2 = inet_bind2_bucket_create(hinfo->bind2_bucket_cachep,
+ net, head2, port, l3mdev, sk);
+ if (!tb2)
+ goto fail_unlock;
+ bhash2_created = true;
+ }
+
+ /* If we had to find an open port, we already checked for conflicts */
+ if (!found_port && !hlist_empty(&tb->owners)) {
if (sk->sk_reuse == SK_FORCE_REUSE)
goto success;
if ((tb->fastreuse > 0 && reuse) ||
sk_reuseport_match(tb, sk))
goto success;
- if (inet_csk_bind_conflict(sk, tb, true, true))
+ if (inet_csk_bind_conflict(sk, port, tb, tb2, true, true))
goto fail_unlock;
}
success:
inet_csk_update_fastreuse(tb, sk);
if (!inet_csk(sk)->icsk_bind_hash)
- inet_bind_hash(sk, tb, port);
+ inet_bind_hash(sk, tb, tb2, port);
WARN_ON(inet_csk(sk)->icsk_bind_hash != tb);
+ WARN_ON(inet_csk(sk)->icsk_bind2_hash != tb2);
ret = 0;
fail_unlock:
+ if (ret) {
+ if (bhash_created)
+ inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb);
+ if (bhash2_created)
+ inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep,
+ tb2);
+ }
spin_unlock_bh(&head->lock);
return ret;
}
@@ -957,6 +1079,7 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
inet_sk_set_state(newsk, TCP_SYN_RECV);
newicsk->icsk_bind_hash = NULL;
+ newicsk->icsk_bind2_hash = NULL;
inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port;
inet_sk(newsk)->inet_num = inet_rsk(req)->ir_num;
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 581b5b2d72a5..b812eb36f0e3 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -1028,12 +1028,13 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
if (!(idiag_states & TCPF_LISTEN) || r->id.idiag_dport)
goto skip_listen_ht;
- for (i = s_i; i < INET_LHTABLE_SIZE; i++) {
+ for (i = s_i; i <= hashinfo->lhash2_mask; i++) {
struct inet_listen_hashbucket *ilb;
struct hlist_nulls_node *node;
num = 0;
- ilb = &hashinfo->listening_hash[i];
+ ilb = &hashinfo->lhash2[i];
+
spin_lock(&ilb->lock);
sk_nulls_for_each(sk, node, &ilb->nulls_head) {
struct inet_sock *inet = inet_sk(sk);
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 63948f6aeca0..c9f9ac5013a7 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -510,7 +510,7 @@ EXPORT_SYMBOL(inet_frag_reasm_prepare);
void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
void *reasm_data, bool try_coalesce)
{
- struct sk_buff **nextp = (struct sk_buff **)reasm_data;
+ struct sk_buff **nextp = reasm_data;
struct rb_node *rbn;
struct sk_buff *fp;
int sum_truesize;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index a5d57fa679ca..e8de5e699b3f 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -81,6 +81,41 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,
return tb;
}
+struct inet_bind2_bucket *inet_bind2_bucket_create(struct kmem_cache *cachep,
+ struct net *net,
+ struct inet_bind2_hashbucket *head,
+ const unsigned short port,
+ int l3mdev,
+ const struct sock *sk)
+{
+ struct inet_bind2_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC);
+
+ if (tb) {
+ write_pnet(&tb->ib_net, net);
+ tb->l3mdev = l3mdev;
+ tb->port = port;
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6)
+ tb->v6_rcv_saddr = sk->sk_v6_rcv_saddr;
+ else
+#endif
+ tb->rcv_saddr = sk->sk_rcv_saddr;
+ INIT_HLIST_HEAD(&tb->owners);
+ hlist_add_head(&tb->node, &head->chain);
+ }
+ return tb;
+}
+
+static bool bind2_bucket_addr_match(struct inet_bind2_bucket *tb2, struct sock *sk)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6)
+ return ipv6_addr_equal(&tb2->v6_rcv_saddr,
+ &sk->sk_v6_rcv_saddr);
+#endif
+ return tb2->rcv_saddr == sk->sk_rcv_saddr;
+}
+
/*
* Caller must hold hashbucket lock for this tb with local BH disabled
*/
@@ -92,12 +127,25 @@ void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket
}
}
+/* Caller must hold the lock for the corresponding hashbucket in the bhash table
+ * with local BH disabled
+ */
+void inet_bind2_bucket_destroy(struct kmem_cache *cachep, struct inet_bind2_bucket *tb)
+{
+ if (hlist_empty(&tb->owners)) {
+ __hlist_del(&tb->node);
+ kmem_cache_free(cachep, tb);
+ }
+}
+
void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
- const unsigned short snum)
+ struct inet_bind2_bucket *tb2, const unsigned short snum)
{
inet_sk(sk)->inet_num = snum;
sk_add_bind_node(sk, &tb->owners);
inet_csk(sk)->icsk_bind_hash = tb;
+ sk_add_bind2_node(sk, &tb2->owners);
+ inet_csk(sk)->icsk_bind2_hash = tb2;
}
/*
@@ -109,6 +157,7 @@ static void __inet_put_port(struct sock *sk)
const int bhash = inet_bhashfn(sock_net(sk), inet_sk(sk)->inet_num,
hashinfo->bhash_size);
struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
+ struct inet_bind2_bucket *tb2;
struct inet_bind_bucket *tb;
spin_lock(&head->lock);
@@ -117,6 +166,13 @@ static void __inet_put_port(struct sock *sk)
inet_csk(sk)->icsk_bind_hash = NULL;
inet_sk(sk)->inet_num = 0;
inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
+
+ if (inet_csk(sk)->icsk_bind2_hash) {
+ tb2 = inet_csk(sk)->icsk_bind2_hash;
+ __sk_del_bind2_node(sk);
+ inet_csk(sk)->icsk_bind2_hash = NULL;
+ inet_bind2_bucket_destroy(hashinfo->bind2_bucket_cachep, tb2);
+ }
spin_unlock(&head->lock);
}
@@ -133,14 +189,19 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child)
struct inet_hashinfo *table = sk->sk_prot->h.hashinfo;
unsigned short port = inet_sk(child)->inet_num;
const int bhash = inet_bhashfn(sock_net(sk), port,
- table->bhash_size);
+ table->bhash_size);
struct inet_bind_hashbucket *head = &table->bhash[bhash];
+ struct inet_bind2_hashbucket *head_bhash2;
+ bool created_inet_bind_bucket = false;
+ struct net *net = sock_net(sk);
+ struct inet_bind2_bucket *tb2;
struct inet_bind_bucket *tb;
int l3mdev;
spin_lock(&head->lock);
tb = inet_csk(sk)->icsk_bind_hash;
- if (unlikely(!tb)) {
+ tb2 = inet_csk(sk)->icsk_bind2_hash;
+ if (unlikely(!tb || !tb2)) {
spin_unlock(&head->lock);
return -ENOENT;
}
@@ -153,25 +214,45 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child)
* as that of the child socket. We have to look up or
* create a new bind bucket for the child here. */
inet_bind_bucket_for_each(tb, &head->chain) {
- if (net_eq(ib_net(tb), sock_net(sk)) &&
- tb->l3mdev == l3mdev && tb->port == port)
+ if (check_bind_bucket_match(tb, net, port, l3mdev))
break;
}
if (!tb) {
tb = inet_bind_bucket_create(table->bind_bucket_cachep,
- sock_net(sk), head, port,
- l3mdev);
+ net, head, port, l3mdev);
if (!tb) {
spin_unlock(&head->lock);
return -ENOMEM;
}
+ created_inet_bind_bucket = true;
}
inet_csk_update_fastreuse(tb, child);
+
+ goto bhash2_find;
+ } else if (!bind2_bucket_addr_match(tb2, child)) {
+ l3mdev = inet_sk_bound_l3mdev(sk);
+
+bhash2_find:
+ tb2 = inet_bind2_bucket_find(table, net, port, l3mdev, child,
+ &head_bhash2);
+ if (!tb2) {
+ tb2 = inet_bind2_bucket_create(table->bind2_bucket_cachep,
+ net, head_bhash2, port,
+ l3mdev, child);
+ if (!tb2)
+ goto error;
+ }
}
- inet_bind_hash(child, tb, port);
+ inet_bind_hash(child, tb, tb2, port);
spin_unlock(&head->lock);
return 0;
+
+error:
+ if (created_inet_bind_bucket)
+ inet_bind_bucket_destroy(table->bind_bucket_cachep, tb);
+ spin_unlock(&head->lock);
+ return -ENOMEM;
}
EXPORT_SYMBOL_GPL(__inet_inherit_port);
@@ -193,42 +274,6 @@ inet_lhash2_bucket_sk(struct inet_hashinfo *h, struct sock *sk)
return inet_lhash2_bucket(h, hash);
}
-static void inet_hash2(struct inet_hashinfo *h, struct sock *sk)
-{
- struct inet_listen_hashbucket *ilb2;
-
- if (!h->lhash2)
- return;
-
- ilb2 = inet_lhash2_bucket_sk(h, sk);
-
- spin_lock(&ilb2->lock);
- if (sk->sk_reuseport && sk->sk_family == AF_INET6)
- hlist_add_tail_rcu(&inet_csk(sk)->icsk_listen_portaddr_node,
- &ilb2->head);
- else
- hlist_add_head_rcu(&inet_csk(sk)->icsk_listen_portaddr_node,
- &ilb2->head);
- ilb2->count++;
- spin_unlock(&ilb2->lock);
-}
-
-static void inet_unhash2(struct inet_hashinfo *h, struct sock *sk)
-{
- struct inet_listen_hashbucket *ilb2;
-
- if (!h->lhash2 ||
- WARN_ON_ONCE(hlist_unhashed(&inet_csk(sk)->icsk_listen_portaddr_node)))
- return;
-
- ilb2 = inet_lhash2_bucket_sk(h, sk);
-
- spin_lock(&ilb2->lock);
- hlist_del_init_rcu(&inet_csk(sk)->icsk_listen_portaddr_node);
- ilb2->count--;
- spin_unlock(&ilb2->lock);
-}
-
static inline int compute_score(struct sock *sk, struct net *net,
const unsigned short hnum, const __be32 daddr,
const int dif, const int sdif)
@@ -282,12 +327,11 @@ static struct sock *inet_lhash2_lookup(struct net *net,
const __be32 daddr, const unsigned short hnum,
const int dif, const int sdif)
{
- struct inet_connection_sock *icsk;
struct sock *sk, *result = NULL;
+ struct hlist_nulls_node *node;
int score, hiscore = 0;
- inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) {
- sk = (struct sock *)icsk;
+ sk_nulls_for_each_rcu(sk, node, &ilb2->nulls_head) {
score = compute_score(sk, net, hnum, daddr, dif, sdif);
if (score > hiscore) {
result = lookup_reuseport(net, sk, skb, doff,
@@ -410,13 +454,11 @@ begin:
sk_nulls_for_each_rcu(sk, node, &head->chain) {
if (sk->sk_hash != hash)
continue;
- if (likely(INET_MATCH(sk, net, acookie,
- saddr, daddr, ports, dif, sdif))) {
+ if (likely(inet_match(net, sk, acookie, ports, dif, sdif))) {
if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
goto out;
- if (unlikely(!INET_MATCH(sk, net, acookie,
- saddr, daddr, ports,
- dif, sdif))) {
+ if (unlikely(!inet_match(net, sk, acookie,
+ ports, dif, sdif))) {
sock_gen_put(sk);
goto begin;
}
@@ -465,8 +507,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
if (sk2->sk_hash != hash)
continue;
- if (likely(INET_MATCH(sk2, net, acookie,
- saddr, daddr, ports, dif, sdif))) {
+ if (likely(inet_match(net, sk2, acookie, ports, dif, sdif))) {
if (sk2->sk_state == TCP_TIME_WAIT) {
tw = inet_twsk(sk2);
if (twsk_unique(sk, sk2, twp))
@@ -532,16 +573,14 @@ static bool inet_ehash_lookup_by_sk(struct sock *sk,
if (esk->sk_hash != sk->sk_hash)
continue;
if (sk->sk_family == AF_INET) {
- if (unlikely(INET_MATCH(esk, net, acookie,
- sk->sk_daddr,
- sk->sk_rcv_saddr,
+ if (unlikely(inet_match(net, esk, acookie,
ports, dif, sdif))) {
return true;
}
}
#if IS_ENABLED(CONFIG_IPV6)
else if (sk->sk_family == AF_INET6) {
- if (unlikely(INET6_MATCH(esk, net,
+ if (unlikely(inet6_match(net, esk,
&sk->sk_v6_daddr,
&sk->sk_v6_rcv_saddr,
ports, dif, sdif))) {
@@ -633,7 +672,7 @@ static int inet_reuseport_add_sock(struct sock *sk,
int __inet_hash(struct sock *sk, struct sock *osk)
{
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
- struct inet_listen_hashbucket *ilb;
+ struct inet_listen_hashbucket *ilb2;
int err = 0;
if (sk->sk_state != TCP_LISTEN) {
@@ -643,25 +682,23 @@ int __inet_hash(struct sock *sk, struct sock *osk)
return 0;
}
WARN_ON(!sk_unhashed(sk));
- ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
+ ilb2 = inet_lhash2_bucket_sk(hashinfo, sk);
- spin_lock(&ilb->lock);
+ spin_lock(&ilb2->lock);
if (sk->sk_reuseport) {
- err = inet_reuseport_add_sock(sk, ilb);
+ err = inet_reuseport_add_sock(sk, ilb2);
if (err)
goto unlock;
}
if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport &&
sk->sk_family == AF_INET6)
- __sk_nulls_add_node_tail_rcu(sk, &ilb->nulls_head);
+ __sk_nulls_add_node_tail_rcu(sk, &ilb2->nulls_head);
else
- __sk_nulls_add_node_rcu(sk, &ilb->nulls_head);
- inet_hash2(hashinfo, sk);
- ilb->count++;
+ __sk_nulls_add_node_rcu(sk, &ilb2->nulls_head);
sock_set_flag(sk, SOCK_RCU_FREE);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
unlock:
- spin_unlock(&ilb->lock);
+ spin_unlock(&ilb2->lock);
return err;
}
@@ -678,23 +715,6 @@ int inet_hash(struct sock *sk)
}
EXPORT_SYMBOL_GPL(inet_hash);
-static void __inet_unhash(struct sock *sk, struct inet_listen_hashbucket *ilb)
-{
- if (sk_unhashed(sk))
- return;
-
- if (rcu_access_pointer(sk->sk_reuseport_cb))
- reuseport_stop_listen_sock(sk);
- if (ilb) {
- struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
-
- inet_unhash2(hashinfo, sk);
- ilb->count--;
- }
- __sk_nulls_del_node_init_rcu(sk);
- sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
-}
-
void inet_unhash(struct sock *sk)
{
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
@@ -703,25 +723,109 @@ void inet_unhash(struct sock *sk)
return;
if (sk->sk_state == TCP_LISTEN) {
- struct inet_listen_hashbucket *ilb;
+ struct inet_listen_hashbucket *ilb2;
- ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
+ ilb2 = inet_lhash2_bucket_sk(hashinfo, sk);
/* Don't disable bottom halves while acquiring the lock to
* avoid circular locking dependency on PREEMPT_RT.
*/
- spin_lock(&ilb->lock);
- __inet_unhash(sk, ilb);
- spin_unlock(&ilb->lock);
+ spin_lock(&ilb2->lock);
+ if (sk_unhashed(sk)) {
+ spin_unlock(&ilb2->lock);
+ return;
+ }
+
+ if (rcu_access_pointer(sk->sk_reuseport_cb))
+ reuseport_stop_listen_sock(sk);
+
+ __sk_nulls_del_node_init_rcu(sk);
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
+ spin_unlock(&ilb2->lock);
} else {
spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
spin_lock_bh(lock);
- __inet_unhash(sk, NULL);
+ if (sk_unhashed(sk)) {
+ spin_unlock_bh(lock);
+ return;
+ }
+ __sk_nulls_del_node_init_rcu(sk);
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
spin_unlock_bh(lock);
}
}
EXPORT_SYMBOL_GPL(inet_unhash);
+static bool check_bind2_bucket_match(struct inet_bind2_bucket *tb,
+ struct net *net, unsigned short port,
+ int l3mdev, struct sock *sk)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6)
+ return net_eq(ib2_net(tb), net) && tb->port == port &&
+ tb->l3mdev == l3mdev &&
+ ipv6_addr_equal(&tb->v6_rcv_saddr, &sk->sk_v6_rcv_saddr);
+ else
+#endif
+ return net_eq(ib2_net(tb), net) && tb->port == port &&
+ tb->l3mdev == l3mdev && tb->rcv_saddr == sk->sk_rcv_saddr;
+}
+
+bool check_bind2_bucket_match_nulladdr(struct inet_bind2_bucket *tb,
+ struct net *net, const unsigned short port,
+ int l3mdev, const struct sock *sk)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ struct in6_addr nulladdr = {};
+
+ if (sk->sk_family == AF_INET6)
+ return net_eq(ib2_net(tb), net) && tb->port == port &&
+ tb->l3mdev == l3mdev &&
+ ipv6_addr_equal(&tb->v6_rcv_saddr, &nulladdr);
+ else
+#endif
+ return net_eq(ib2_net(tb), net) && tb->port == port &&
+ tb->l3mdev == l3mdev && tb->rcv_saddr == 0;
+}
+
+static struct inet_bind2_hashbucket *
+inet_bhashfn_portaddr(struct inet_hashinfo *hinfo, const struct sock *sk,
+ const struct net *net, unsigned short port)
+{
+ u32 hash;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6)
+ hash = ipv6_portaddr_hash(net, &sk->sk_v6_rcv_saddr, port);
+ else
+#endif
+ hash = ipv4_portaddr_hash(net, sk->sk_rcv_saddr, port);
+ return &hinfo->bhash2[hash & (hinfo->bhash_size - 1)];
+}
+
+/* This should only be called when the spinlock for the socket's corresponding
+ * bind_hashbucket is held
+ */
+struct inet_bind2_bucket *
+inet_bind2_bucket_find(struct inet_hashinfo *hinfo, struct net *net,
+ const unsigned short port, int l3mdev, struct sock *sk,
+ struct inet_bind2_hashbucket **head)
+{
+ struct inet_bind2_bucket *bhash2 = NULL;
+ struct inet_bind2_hashbucket *h;
+
+ h = inet_bhashfn_portaddr(hinfo, sk, net, port);
+ inet_bind_bucket_for_each(bhash2, &h->chain) {
+ if (check_bind2_bucket_match(bhash2, net, port, l3mdev, sk))
+ break;
+ }
+
+ if (head)
+ *head = h;
+
+ return bhash2;
+}
+
/* RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm
* Note that we use 32bit integers (vs RFC 'short integers')
* because 2^16 is not a multiple of num_ephemeral and this
@@ -742,10 +846,13 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
{
struct inet_hashinfo *hinfo = death_row->hashinfo;
struct inet_timewait_sock *tw = NULL;
+ struct inet_bind2_hashbucket *head2;
struct inet_bind_hashbucket *head;
int port = inet_sk(sk)->inet_num;
struct net *net = sock_net(sk);
+ struct inet_bind2_bucket *tb2;
struct inet_bind_bucket *tb;
+ bool tb_created = false;
u32 remaining, offset;
int ret, i, low, high;
int l3mdev;
@@ -802,8 +909,7 @@ other_parity_scan:
* the established check is already unique enough.
*/
inet_bind_bucket_for_each(tb, &head->chain) {
- if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
- tb->port == port) {
+ if (check_bind_bucket_match(tb, net, port, l3mdev)) {
if (tb->fastreuse >= 0 ||
tb->fastreuseport >= 0)
goto next_port;
@@ -821,6 +927,7 @@ other_parity_scan:
spin_unlock_bh(&head->lock);
return -ENOMEM;
}
+ tb_created = true;
tb->fastreuse = -1;
tb->fastreuseport = -1;
goto ok;
@@ -836,6 +943,17 @@ next_port:
return -EADDRNOTAVAIL;
ok:
+ /* Find the corresponding tb2 bucket since we need to
+ * add the socket to the bhash2 table as well
+ */
+ tb2 = inet_bind2_bucket_find(hinfo, net, port, l3mdev, sk, &head2);
+ if (!tb2) {
+ tb2 = inet_bind2_bucket_create(hinfo->bind2_bucket_cachep, net,
+ head2, port, l3mdev, sk);
+ if (!tb2)
+ goto error;
+ }
+
/* Here we want to add a little bit of randomness to the next source
* port that will be chosen. We use a max() with a random here so that
* on low contention the randomness is maximal and on high contention
@@ -845,7 +963,7 @@ ok:
WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2);
/* Head lock still held and bh's disabled */
- inet_bind_hash(sk, tb, port);
+ inet_bind_hash(sk, tb, tb2, port);
if (sk_unhashed(sk)) {
inet_sk(sk)->inet_sport = htons(port);
inet_ehash_nolisten(sk, (struct sock *)tw, NULL);
@@ -857,6 +975,12 @@ ok:
inet_twsk_deschedule_put(tw);
local_bh_enable();
return 0;
+
+error:
+ if (tb_created)
+ inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb);
+ spin_unlock_bh(&head->lock);
+ return -ENOMEM;
}
/*
@@ -874,29 +998,14 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row,
}
EXPORT_SYMBOL_GPL(inet_hash_connect);
-void inet_hashinfo_init(struct inet_hashinfo *h)
-{
- int i;
-
- for (i = 0; i < INET_LHTABLE_SIZE; i++) {
- spin_lock_init(&h->listening_hash[i].lock);
- INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].nulls_head,
- i + LISTENING_NULLS_BASE);
- h->listening_hash[i].count = 0;
- }
-
- h->lhash2 = NULL;
-}
-EXPORT_SYMBOL_GPL(inet_hashinfo_init);
-
static void init_hashinfo_lhash2(struct inet_hashinfo *h)
{
int i;
for (i = 0; i <= h->lhash2_mask; i++) {
spin_lock_init(&h->lhash2[i].lock);
- INIT_HLIST_HEAD(&h->lhash2[i].head);
- h->lhash2[i].count = 0;
+ INIT_HLIST_NULLS_HEAD(&h->lhash2[i].nulls_head,
+ i + LISTENING_NULLS_BASE);
}
}
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 92ba3350274b..e3aa436a1bdf 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -90,6 +90,7 @@ int ip_forward(struct sk_buff *skb)
struct rtable *rt; /* Route we use */
struct ip_options *opt = &(IPCB(skb)->opt);
struct net *net;
+ SKB_DR(reason);
/* that should never happen */
if (skb->pkt_type != PACKET_HOST)
@@ -101,8 +102,10 @@ int ip_forward(struct sk_buff *skb)
if (skb_warn_if_lro(skb))
goto drop;
- if (!xfrm4_policy_check(NULL, XFRM_POLICY_FWD, skb))
+ if (!xfrm4_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
+ SKB_DR_SET(reason, XFRM_POLICY);
goto drop;
+ }
if (IPCB(skb)->opt.router_alert && ip_call_ra_chain(skb))
return NET_RX_SUCCESS;
@@ -118,8 +121,10 @@ int ip_forward(struct sk_buff *skb)
if (ip_hdr(skb)->ttl <= 1)
goto too_many_hops;
- if (!xfrm4_route_forward(skb))
+ if (!xfrm4_route_forward(skb)) {
+ SKB_DR_SET(reason, XFRM_POLICY);
goto drop;
+ }
rt = skb_rtable(skb);
@@ -132,6 +137,7 @@ int ip_forward(struct sk_buff *skb)
IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(mtu));
+ SKB_DR_SET(reason, PKT_TOO_BIG);
goto drop;
}
@@ -169,7 +175,8 @@ too_many_hops:
/* Tell the sender its packet died... */
__IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS);
icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0);
+ SKB_DR_SET(reason, IP_INHDR);
drop:
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
return NET_RX_DROP;
}
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index aacee9dd771b..7e474a85deaf 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -748,6 +748,7 @@ free_skb:
static void ipgre_link_update(struct net_device *dev, bool set_mtu)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
+ __be16 flags;
int len;
len = tunnel->tun_hlen;
@@ -763,19 +764,15 @@ static void ipgre_link_update(struct net_device *dev, bool set_mtu)
if (set_mtu)
dev->mtu = max_t(int, dev->mtu - len, 68);
- if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
- if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
- tunnel->encap.type == TUNNEL_ENCAP_NONE) {
- dev->features |= NETIF_F_GSO_SOFTWARE;
- dev->hw_features |= NETIF_F_GSO_SOFTWARE;
- } else {
- dev->features &= ~NETIF_F_GSO_SOFTWARE;
- dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
- }
- dev->features |= NETIF_F_LLTX;
- } else {
+ flags = tunnel->parms.o_flags;
+
+ if (flags & TUNNEL_SEQ ||
+ (flags & TUNNEL_CSUM && tunnel->encap.type != TUNNEL_ENCAP_NONE)) {
+ dev->features &= ~NETIF_F_GSO_SOFTWARE;
dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
- dev->features &= ~(NETIF_F_LLTX | NETIF_F_GSO_SOFTWARE);
+ } else {
+ dev->features |= NETIF_F_GSO_SOFTWARE;
+ dev->hw_features |= NETIF_F_GSO_SOFTWARE;
}
}
@@ -949,6 +946,7 @@ static void ipgre_tunnel_setup(struct net_device *dev)
static void __gre_tunnel_init(struct net_device *dev)
{
struct ip_tunnel *tunnel;
+ __be16 flags;
tunnel = netdev_priv(dev);
tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
@@ -957,25 +955,21 @@ static void __gre_tunnel_init(struct net_device *dev)
tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
dev->needed_headroom = tunnel->hlen + sizeof(tunnel->parms.iph);
- dev->features |= GRE_FEATURES;
+ dev->features |= GRE_FEATURES | NETIF_F_LLTX;
dev->hw_features |= GRE_FEATURES;
- if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
- /* TCP offload with GRE SEQ is not supported, nor
- * can we support 2 levels of outer headers requiring
- * an update.
- */
- if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
- (tunnel->encap.type == TUNNEL_ENCAP_NONE)) {
- dev->features |= NETIF_F_GSO_SOFTWARE;
- dev->hw_features |= NETIF_F_GSO_SOFTWARE;
- }
+ flags = tunnel->parms.o_flags;
- /* Can use a lockless transmit, unless we generate
- * output sequences
- */
- dev->features |= NETIF_F_LLTX;
- }
+ /* TCP offload with GRE SEQ is not supported, nor can we support 2
+ * levels of outer headers requiring an update.
+ */
+ if (flags & TUNNEL_SEQ)
+ return;
+ if (flags & TUNNEL_CSUM && tunnel->encap.type != TUNNEL_ENCAP_NONE)
+ return;
+
+ dev->features |= NETIF_F_GSO_SOFTWARE;
+ dev->hw_features |= NETIF_F_GSO_SOFTWARE;
}
static int ipgre_tunnel_init(struct net_device *dev)
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 95f7bb052784..b1165f717cd1 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -451,6 +451,7 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net)
* that it receives, do not try to analyse it.
*/
if (skb->pkt_type == PACKET_OTHERHOST) {
+ dev_core_stats_rx_otherhost_dropped_inc(skb->dev);
drop_reason = SKB_DROP_REASON_OTHERHOST;
goto drop;
}
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index c860519d57ee..13e6329784fb 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -356,7 +356,7 @@ static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg,
const void *ptr)
{
const struct mfc_cache_cmp_arg *cmparg = arg->key;
- struct mfc_cache *c = (struct mfc_cache *)ptr;
+ const struct mfc_cache *c = ptr;
return cmparg->mfc_mcastgrp != c->mfc_mcastgrp ||
cmparg->mfc_origin != c->mfc_origin;
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index aff707988e23..bd135165482a 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -45,8 +45,7 @@ int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, un
fl4.saddr = saddr;
fl4.flowi4_tos = RT_TOS(iph->tos);
fl4.flowi4_oif = sk ? sk->sk_bound_dev_if : 0;
- if (!fl4.flowi4_oif)
- fl4.flowi4_oif = l3mdev_master_ifindex(dev);
+ fl4.flowi4_l3mdev = l3mdev_master_ifindex(dev);
fl4.flowi4_mark = skb->mark;
fl4.flowi4_flags = flags;
fib4_rules_early_flow_dissect(net, skb, &fl4, &flkeys);
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index 4eed5afca392..918c61fda0f3 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -80,6 +80,7 @@ struct sk_buff *nf_reject_skb_v4_unreach(struct net *net,
struct iphdr *niph;
struct icmphdr *icmph;
unsigned int len;
+ int dataoff;
__wsum csum;
u8 proto;
@@ -99,10 +100,11 @@ struct sk_buff *nf_reject_skb_v4_unreach(struct net *net,
if (pskb_trim_rcsum(oldskb, ntohs(ip_hdr(oldskb)->tot_len)))
return NULL;
+ dataoff = ip_hdrlen(oldskb);
proto = ip_hdr(oldskb)->protocol;
if (!skb_csum_unnecessary(oldskb) &&
- nf_reject_verify_csum(proto) &&
+ nf_reject_verify_csum(oldskb, dataoff, proto) &&
nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), proto))
return NULL;
@@ -311,6 +313,7 @@ EXPORT_SYMBOL_GPL(nf_send_reset);
void nf_send_unreach(struct sk_buff *skb_in, int code, int hook)
{
struct iphdr *iph = ip_hdr(skb_in);
+ int dataoff = ip_hdrlen(skb_in);
u8 proto = iph->protocol;
if (iph->frag_off & htons(IP_OFFSET))
@@ -320,12 +323,13 @@ void nf_send_unreach(struct sk_buff *skb_in, int code, int hook)
nf_reject_fill_skb_dst(skb_in) < 0)
return;
- if (skb_csum_unnecessary(skb_in) || !nf_reject_verify_csum(proto)) {
+ if (skb_csum_unnecessary(skb_in) ||
+ !nf_reject_verify_csum(skb_in, dataoff, proto)) {
icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
return;
}
- if (nf_ip_checksum(skb_in, hook, ip_hdrlen(skb_in), proto) == 0)
+ if (nf_ip_checksum(skb_in, hook, dataoff, proto) == 0)
icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
}
EXPORT_SYMBOL_GPL(nf_send_unreach);
diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c
index 4151eb1262dd..b75cac69bd7e 100644
--- a/net/ipv4/netfilter/nft_fib_ipv4.c
+++ b/net/ipv4/netfilter/nft_fib_ipv4.c
@@ -112,6 +112,10 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
fl4.daddr = iph->daddr;
fl4.saddr = get_saddr(iph->saddr);
} else {
+ if (nft_hook(pkt) == NF_INET_FORWARD &&
+ priv->flags & NFTA_FIB_F_IIF)
+ fl4.flowi4_iif = nft_out(pkt)->ifindex;
+
fl4.daddr = iph->saddr;
fl4.saddr = get_saddr(iph->daddr);
}
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index aa9a11b20d18..1a43ca73f94d 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -600,7 +600,7 @@ EXPORT_SYMBOL_GPL(ping_err);
int ping_getfrag(void *from, char *to,
int offset, int fraglen, int odd, struct sk_buff *skb)
{
- struct pingfakehdr *pfh = (struct pingfakehdr *)from;
+ struct pingfakehdr *pfh = from;
if (offset == 0) {
fraglen -= sizeof(struct icmphdr);
@@ -854,8 +854,8 @@ do_confirm:
goto out;
}
-int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
- int flags, int *addr_len)
+int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
+ int *addr_len)
{
struct inet_sock *isk = inet_sk(sk);
int family = sk->sk_family;
@@ -871,7 +871,7 @@ int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
if (flags & MSG_ERRQUEUE)
return inet_recv_error(sk, msg, len, addr_len);
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (!skb)
goto out;
@@ -944,16 +944,24 @@ out:
}
EXPORT_SYMBOL_GPL(ping_recvmsg);
-int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+static enum skb_drop_reason __ping_queue_rcv_skb(struct sock *sk,
+ struct sk_buff *skb)
{
+ enum skb_drop_reason reason;
+
pr_debug("ping_queue_rcv_skb(sk=%p,sk->num=%d,skb=%p)\n",
inet_sk(sk), inet_sk(sk)->inet_num, skb);
- if (sock_queue_rcv_skb(sk, skb) < 0) {
- kfree_skb(skb);
+ if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) {
+ kfree_skb_reason(skb, reason);
pr_debug("ping_queue_rcv_skb -> failed\n");
- return -1;
+ return reason;
}
- return 0;
+ return SKB_NOT_DROPPED_YET;
+}
+
+int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+{
+ return __ping_queue_rcv_skb(sk, skb) ? -1 : 0;
}
EXPORT_SYMBOL_GPL(ping_queue_rcv_skb);
@@ -962,12 +970,12 @@ EXPORT_SYMBOL_GPL(ping_queue_rcv_skb);
* All we need to do is get the socket.
*/
-bool ping_rcv(struct sk_buff *skb)
+enum skb_drop_reason ping_rcv(struct sk_buff *skb)
{
+ enum skb_drop_reason reason = SKB_DROP_REASON_NO_SOCKET;
struct sock *sk;
struct net *net = dev_net(skb->dev);
struct icmphdr *icmph = icmp_hdr(skb);
- bool rc = false;
/* We assume the packet has already been checked by icmp_rcv */
@@ -982,15 +990,17 @@ bool ping_rcv(struct sk_buff *skb)
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
pr_debug("rcv on socket %p\n", sk);
- if (skb2 && !ping_queue_rcv_skb(sk, skb2))
- rc = true;
+ if (skb2)
+ reason = __ping_queue_rcv_skb(sk, skb2);
+ else
+ reason = SKB_DROP_REASON_NOMEM;
sock_put(sk);
}
- if (!rc)
+ if (reason)
pr_debug("no socket, dropping\n");
- return rc;
+ return reason;
}
EXPORT_SYMBOL_GPL(ping_rcv);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 9f97b9cbf7b3..bbd717805b10 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -753,7 +753,7 @@ out:
*/
static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int noblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
struct inet_sock *inet = inet_sk(sk);
size_t copied = 0;
@@ -769,7 +769,7 @@ static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
goto out;
}
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (!skb)
goto out;
@@ -783,7 +783,7 @@ static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
if (err)
goto done;
- sock_recv_ts_and_drops(msg, sk, skb);
+ sock_recv_cmsgs(msg, sk, skb);
/* Copy the address. */
if (sin) {
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index ed01063d8f30..356f535f3443 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -503,28 +503,29 @@ static void ip_rt_fix_tos(struct flowi4 *fl4)
__u8 tos = RT_FL_TOS(fl4);
fl4->flowi4_tos = tos & IPTOS_RT_MASK;
- fl4->flowi4_scope = tos & RTO_ONLINK ?
- RT_SCOPE_LINK : RT_SCOPE_UNIVERSE;
+ if (tos & RTO_ONLINK)
+ fl4->flowi4_scope = RT_SCOPE_LINK;
}
static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
- const struct sock *sk,
- const struct iphdr *iph,
- int oif, u8 tos,
- u8 prot, u32 mark, int flow_flags)
+ const struct sock *sk, const struct iphdr *iph,
+ int oif, __u8 tos, u8 prot, u32 mark,
+ int flow_flags)
{
+ __u8 scope = RT_SCOPE_UNIVERSE;
+
if (sk) {
const struct inet_sock *inet = inet_sk(sk);
oif = sk->sk_bound_dev_if;
mark = sk->sk_mark;
- tos = RT_CONN_FLAGS(sk);
+ tos = ip_sock_rt_tos(sk);
+ scope = ip_sock_rt_scope(sk);
prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
}
- flowi4_init_output(fl4, oif, mark, tos,
- RT_SCOPE_UNIVERSE, prot,
- flow_flags,
- iph->daddr, iph->saddr, 0, 0,
+
+ flowi4_init_output(fl4, oif, mark, tos & IPTOS_RT_MASK, scope,
+ prot, flow_flags, iph->daddr, iph->saddr, 0, 0,
sock_net_uid(net, sk));
}
@@ -534,9 +535,9 @@ static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
const struct net *net = dev_net(skb->dev);
const struct iphdr *iph = ip_hdr(skb);
int oif = skb->dev->ifindex;
- u8 tos = RT_TOS(iph->tos);
u8 prot = iph->protocol;
u32 mark = skb->mark;
+ __u8 tos = iph->tos;
__build_flow_key(net, fl4, sk, iph, oif, tos, prot, mark, 0);
}
@@ -552,7 +553,8 @@ static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
if (inet_opt && inet_opt->opt.srr)
daddr = inet_opt->opt.faddr;
flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
- RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
+ ip_sock_rt_tos(sk) & IPTOS_RT_MASK,
+ ip_sock_rt_scope(sk),
inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
inet_sk_flowi_flags(sk),
daddr, inet->inet_saddr, 0, 0, sk->sk_uid);
@@ -825,14 +827,13 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf
const struct iphdr *iph = (const struct iphdr *) skb->data;
struct net *net = dev_net(skb->dev);
int oif = skb->dev->ifindex;
- u8 tos = RT_TOS(iph->tos);
u8 prot = iph->protocol;
u32 mark = skb->mark;
+ __u8 tos = iph->tos;
rt = (struct rtable *) dst;
__build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0);
- ip_rt_fix_tos(&fl4);
__ip_do_redirect(rt, skb, &fl4, true);
}
@@ -945,6 +946,7 @@ static int ip_error(struct sk_buff *skb)
struct inet_peer *peer;
unsigned long now;
struct net *net;
+ SKB_DR(reason);
bool send;
int code;
@@ -964,10 +966,12 @@ static int ip_error(struct sk_buff *skb)
if (!IN_DEV_FORWARD(in_dev)) {
switch (rt->dst.error) {
case EHOSTUNREACH:
+ SKB_DR_SET(reason, IP_INADDRERRORS);
__IP_INC_STATS(net, IPSTATS_MIB_INADDRERRORS);
break;
case ENETUNREACH:
+ SKB_DR_SET(reason, IP_INNOROUTES);
__IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
break;
}
@@ -983,6 +987,7 @@ static int ip_error(struct sk_buff *skb)
break;
case ENETUNREACH:
code = ICMP_NET_UNREACH;
+ SKB_DR_SET(reason, IP_INNOROUTES);
__IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
break;
case EACCES:
@@ -1009,7 +1014,7 @@ static int ip_error(struct sk_buff *skb)
if (send)
icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
-out: kfree_skb(skb);
+out: kfree_skb_reason(skb, reason);
return 0;
}
@@ -1057,7 +1062,6 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct flowi4 fl4;
ip_rt_build_flow_key(&fl4, sk, skb);
- ip_rt_fix_tos(&fl4);
/* Don't make lookup fail for bridged encapsulations */
if (skb && netif_is_any_bridge_port(skb->dev))
@@ -1074,8 +1078,8 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
struct rtable *rt;
u32 mark = IP4_REPLY_MARK(net, skb->mark);
- __build_flow_key(net, &fl4, NULL, iph, oif,
- RT_TOS(iph->tos), protocol, mark, 0);
+ __build_flow_key(net, &fl4, NULL, iph, oif, iph->tos, protocol, mark,
+ 0);
rt = __ip_route_output_key(net, &fl4);
if (!IS_ERR(rt)) {
__ip_rt_update_pmtu(rt, &fl4, mtu);
@@ -1132,8 +1136,6 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
goto out;
new = true;
- } else {
- ip_rt_fix_tos(&fl4);
}
__ip_rt_update_pmtu((struct rtable *)xfrm_dst_path(&rt->dst), &fl4, mtu);
@@ -1165,8 +1167,7 @@ void ipv4_redirect(struct sk_buff *skb, struct net *net,
struct flowi4 fl4;
struct rtable *rt;
- __build_flow_key(net, &fl4, NULL, iph, oif,
- RT_TOS(iph->tos), protocol, 0, 0);
+ __build_flow_key(net, &fl4, NULL, iph, oif, iph->tos, protocol, 0, 0);
rt = __ip_route_output_key(net, &fl4);
if (!IS_ERR(rt)) {
__ip_do_redirect(rt, skb, &fl4, false);
@@ -3408,7 +3409,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
fri.tb_id = table_id;
fri.dst = res.prefix;
fri.dst_len = res.prefixlen;
- fri.tos = fl4.flowi4_tos;
+ fri.dscp = inet_dsfield_to_dscp(fl4.flowi4_tos);
fri.type = rt->rt_type;
fri.offload = 0;
fri.trap = 0;
@@ -3421,7 +3422,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
if (fa->fa_slen == slen &&
fa->tb_id == fri.tb_id &&
- fa->fa_dscp == inet_dsfield_to_dscp(fri.tos) &&
+ fa->fa_dscp == fri.dscp &&
fa->fa_info == res.fi &&
fa->fa_type == fri.type) {
fri.offload = READ_ONCE(fa->offload);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index ad80d180b60b..cd448cdd3b38 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -20,10 +20,6 @@
#include <net/protocol.h>
#include <net/netevent.h>
-static int two = 2;
-static int three __maybe_unused = 3;
-static int four = 4;
-static int thousand = 1000;
static int tcp_retr1_max = 255;
static int ip_local_port_range_min[] = { 1, 1 };
static int ip_local_port_range_max[] = { 65535, 65535 };
@@ -1006,7 +1002,7 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_dou8vec_minmax,
.extra1 = SYSCTL_ZERO,
- .extra2 = &two,
+ .extra2 = SYSCTL_TWO,
},
{
.procname = "tcp_max_syn_backlog",
@@ -1059,7 +1055,7 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_fib_multipath_hash_policy,
.extra1 = SYSCTL_ZERO,
- .extra2 = &three,
+ .extra2 = SYSCTL_THREE,
},
{
.procname = "fib_multipath_hash_fields",
@@ -1117,7 +1113,7 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_dou8vec_minmax,
.extra1 = SYSCTL_ZERO,
- .extra2 = &four,
+ .extra2 = SYSCTL_FOUR,
},
{
.procname = "tcp_recovery",
@@ -1310,7 +1306,7 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
- .extra2 = &thousand,
+ .extra2 = SYSCTL_ONE_THOUSAND,
},
{
.procname = "tcp_pacing_ca_ratio",
@@ -1319,7 +1315,7 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
- .extra2 = &thousand,
+ .extra2 = SYSCTL_ONE_THOUSAND,
},
{
.procname = "tcp_wmem",
@@ -1391,7 +1387,7 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_dou8vec_minmax,
.extra1 = SYSCTL_ZERO,
- .extra2 = &two,
+ .extra2 = SYSCTL_TWO,
},
{ }
};
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index bb7ef45408e1..9984d23a7f3e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -429,7 +429,7 @@ void tcp_init_sock(struct sock *sk)
* algorithms that we must have the following bandaid to talk
* efficiently to them. -DaveM
*/
- tp->snd_cwnd = TCP_INIT_CWND;
+ tcp_snd_cwnd_set(tp, TCP_INIT_CWND);
/* There's a bubble in the pipe until at least the first ACK. */
tp->app_limited = ~0U;
@@ -843,7 +843,6 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
}
release_sock(sk);
- sk_defer_free_flush(sk);
if (spliced)
return spliced;
@@ -1589,20 +1588,6 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied)
tcp_send_ack(sk);
}
-void __sk_defer_free_flush(struct sock *sk)
-{
- struct llist_node *head;
- struct sk_buff *skb, *n;
-
- head = llist_del_all(&sk->defer_list);
- llist_for_each_entry_safe(skb, n, head, ll_node) {
- prefetch(n);
- skb_mark_not_on_list(skb);
- __kfree_skb(skb);
- }
-}
-EXPORT_SYMBOL(__sk_defer_free_flush);
-
static void tcp_eat_recv_skb(struct sock *sk, struct sk_buff *skb)
{
__skb_unlink(skb, &sk->sk_receive_queue);
@@ -1610,11 +1595,7 @@ static void tcp_eat_recv_skb(struct sock *sk, struct sk_buff *skb)
sock_rfree(skb);
skb->destructor = NULL;
skb->sk = NULL;
- if (!skb_queue_empty(&sk->sk_receive_queue) ||
- !llist_empty(&sk->defer_list)) {
- llist_add(&skb->ll_node, &sk->defer_list);
- return;
- }
+ return skb_attempt_defer_free(skb);
}
__kfree_skb(skb);
}
@@ -1877,8 +1858,7 @@ static void tcp_zerocopy_set_hint_for_skb(struct sock *sk,
}
static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
- int nonblock, int flags,
- struct scm_timestamping_internal *tss,
+ int flags, struct scm_timestamping_internal *tss,
int *cmsg_flags);
static int receive_fallback_to_copy(struct sock *sk,
struct tcp_zerocopy_receive *zc, int inq,
@@ -1900,7 +1880,7 @@ static int receive_fallback_to_copy(struct sock *sk,
if (err)
return err;
- err = tcp_recvmsg_locked(sk, &msg, inq, /*nonblock=*/1, /*flags=*/0,
+ err = tcp_recvmsg_locked(sk, &msg, inq, MSG_DONTWAIT,
tss, &zc->msg_flags);
if (err < 0)
return err;
@@ -2316,8 +2296,7 @@ static int tcp_inq_hint(struct sock *sk)
*/
static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
- int nonblock, int flags,
- struct scm_timestamping_internal *tss,
+ int flags, struct scm_timestamping_internal *tss,
int *cmsg_flags)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -2339,7 +2318,7 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
*cmsg_flags = TCP_CMSG_INQ;
msg->msg_get_inq = 1;
}
- timeo = sock_rcvtimeo(sk, nonblock);
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
/* Urgent data needs to be handled specially. */
if (flags & MSG_OOB)
@@ -2457,7 +2436,6 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
__sk_flush_backlog(sk);
} else {
tcp_cleanup_rbuf(sk, copied);
- sk_defer_free_flush(sk);
sk_wait_data(sk, &timeo, last);
}
@@ -2558,8 +2536,8 @@ recv_sndq:
goto out;
}
-int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
- int flags, int *addr_len)
+int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
+ int *addr_len)
{
int cmsg_flags = 0, ret;
struct scm_timestamping_internal tss;
@@ -2570,13 +2548,11 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
if (sk_can_busy_loop(sk) &&
skb_queue_empty_lockless(&sk->sk_receive_queue) &&
sk->sk_state == TCP_ESTABLISHED)
- sk_busy_loop(sk, nonblock);
+ sk_busy_loop(sk, flags & MSG_DONTWAIT);
lock_sock(sk);
- ret = tcp_recvmsg_locked(sk, msg, len, nonblock, flags, &tss,
- &cmsg_flags);
+ ret = tcp_recvmsg_locked(sk, msg, len, flags, &tss, &cmsg_flags);
release_sock(sk);
- sk_defer_free_flush(sk);
if ((cmsg_flags || msg->msg_get_inq) && ret >= 0) {
if (cmsg_flags & TCP_CMSG_TS)
@@ -3037,7 +3013,7 @@ int tcp_disconnect(struct sock *sk, int flags)
icsk->icsk_rto_min = TCP_RTO_MIN;
icsk->icsk_delack_max = TCP_DELACK_MAX;
tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
- tp->snd_cwnd = TCP_INIT_CWND;
+ tcp_snd_cwnd_set(tp, TCP_INIT_CWND);
tp->snd_cwnd_cnt = 0;
tp->window_clamp = 0;
tp->delivered = 0;
@@ -3103,7 +3079,6 @@ int tcp_disconnect(struct sock *sk, int flags)
sk->sk_frag.page = NULL;
sk->sk_frag.offset = 0;
}
- sk_defer_free_flush(sk);
sk_error_report(sk);
return 0;
}
@@ -3748,7 +3723,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_max_pacing_rate = rate64;
info->tcpi_reordering = tp->reordering;
- info->tcpi_snd_cwnd = tp->snd_cwnd;
+ info->tcpi_snd_cwnd = tcp_snd_cwnd(tp);
if (info->tcpi_state == TCP_LISTEN) {
/* listeners aliased fields :
@@ -3919,7 +3894,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk,
rate64 = tcp_compute_delivery_rate(tp);
nla_put_u64_64bit(stats, TCP_NLA_DELIVERY_RATE, rate64, TCP_NLA_PAD);
- nla_put_u32(stats, TCP_NLA_SND_CWND, tp->snd_cwnd);
+ nla_put_u32(stats, TCP_NLA_SND_CWND, tcp_snd_cwnd(tp));
nla_put_u32(stats, TCP_NLA_REORDERING, tp->reordering);
nla_put_u32(stats, TCP_NLA_MIN_RTT, tcp_min_rtt(tp));
@@ -4232,7 +4207,6 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
err = BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sk, level, optname,
&zc, &len, err);
release_sock(sk);
- sk_defer_free_flush(sk);
if (len >= offsetofend(struct tcp_zerocopy_receive, msg_flags))
goto zerocopy_rcv_cmsg;
switch (len) {
@@ -4621,7 +4595,6 @@ void __init tcp_init(void)
timer_setup(&tcp_orphan_timer, tcp_orphan_update, TIMER_DEFERRABLE);
mod_timer(&tcp_orphan_timer, jiffies + TCP_ORPHAN_TIMER_PERIOD);
- inet_hashinfo_init(&tcp_hashinfo);
inet_hashinfo2_init(&tcp_hashinfo, "tcp_listen_portaddr_hash",
thash_entries, 21, /* one slot per 2 MB*/
0, 64 * 1024);
@@ -4631,6 +4604,12 @@ void __init tcp_init(void)
SLAB_HWCACHE_ALIGN | SLAB_PANIC |
SLAB_ACCOUNT,
NULL);
+ tcp_hashinfo.bind2_bucket_cachep =
+ kmem_cache_create("tcp_bind2_bucket",
+ sizeof(struct inet_bind2_bucket), 0,
+ SLAB_HWCACHE_ALIGN | SLAB_PANIC |
+ SLAB_ACCOUNT,
+ NULL);
/* Size and allocate the main established and bind bucket
* hash tables.
@@ -4653,8 +4632,9 @@ void __init tcp_init(void)
if (inet_ehash_locks_alloc(&tcp_hashinfo))
panic("TCP: failed to alloc ehash_locks");
tcp_hashinfo.bhash =
- alloc_large_system_hash("TCP bind",
- sizeof(struct inet_bind_hashbucket),
+ alloc_large_system_hash("TCP bind bhash tables",
+ sizeof(struct inet_bind_hashbucket) +
+ sizeof(struct inet_bind2_hashbucket),
tcp_hashinfo.ehash_mask + 1,
17, /* one slot per 128 KB of memory */
0,
@@ -4663,9 +4643,12 @@ void __init tcp_init(void)
0,
64 * 1024);
tcp_hashinfo.bhash_size = 1U << tcp_hashinfo.bhash_size;
+ tcp_hashinfo.bhash2 =
+ (struct inet_bind2_hashbucket *)(tcp_hashinfo.bhash + tcp_hashinfo.bhash_size);
for (i = 0; i < tcp_hashinfo.bhash_size; i++) {
spin_lock_init(&tcp_hashinfo.bhash[i].lock);
INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain);
+ INIT_HLIST_HEAD(&tcp_hashinfo.bhash2[i].chain);
}
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 02e8626ccb27..075e744bfb48 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -276,7 +276,7 @@ static void bbr_init_pacing_rate_from_rtt(struct sock *sk)
} else { /* no RTT sample yet */
rtt_us = USEC_PER_MSEC; /* use nominal default RTT */
}
- bw = (u64)tp->snd_cwnd * BW_UNIT;
+ bw = (u64)tcp_snd_cwnd(tp) * BW_UNIT;
do_div(bw, rtt_us);
sk->sk_pacing_rate = bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain);
}
@@ -310,7 +310,7 @@ static u32 bbr_tso_segs_goal(struct sock *sk)
*/
bytes = min_t(unsigned long,
sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift),
- GSO_MAX_SIZE - 1 - MAX_TCP_HEADER);
+ GSO_LEGACY_MAX_SIZE - 1 - MAX_TCP_HEADER);
segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk));
return min(segs, 0x7FU);
@@ -323,9 +323,9 @@ static void bbr_save_cwnd(struct sock *sk)
struct bbr *bbr = inet_csk_ca(sk);
if (bbr->prev_ca_state < TCP_CA_Recovery && bbr->mode != BBR_PROBE_RTT)
- bbr->prior_cwnd = tp->snd_cwnd; /* this cwnd is good enough */
+ bbr->prior_cwnd = tcp_snd_cwnd(tp); /* this cwnd is good enough */
else /* loss recovery or BBR_PROBE_RTT have temporarily cut cwnd */
- bbr->prior_cwnd = max(bbr->prior_cwnd, tp->snd_cwnd);
+ bbr->prior_cwnd = max(bbr->prior_cwnd, tcp_snd_cwnd(tp));
}
static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event)
@@ -482,7 +482,7 @@ static bool bbr_set_cwnd_to_recover_or_restore(
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
u8 prev_state = bbr->prev_ca_state, state = inet_csk(sk)->icsk_ca_state;
- u32 cwnd = tp->snd_cwnd;
+ u32 cwnd = tcp_snd_cwnd(tp);
/* An ACK for P pkts should release at most 2*P packets. We do this
* in two steps. First, here we deduct the number of lost packets.
@@ -520,7 +520,7 @@ static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs,
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
- u32 cwnd = tp->snd_cwnd, target_cwnd = 0;
+ u32 cwnd = tcp_snd_cwnd(tp), target_cwnd = 0;
if (!acked)
goto done; /* no packet fully ACKed; just apply caps */
@@ -544,9 +544,9 @@ static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs,
cwnd = max(cwnd, bbr_cwnd_min_target);
done:
- tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp); /* apply global cap */
+ tcp_snd_cwnd_set(tp, min(cwnd, tp->snd_cwnd_clamp)); /* apply global cap */
if (bbr->mode == BBR_PROBE_RTT) /* drain queue, refresh min_rtt */
- tp->snd_cwnd = min(tp->snd_cwnd, bbr_cwnd_min_target);
+ tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), bbr_cwnd_min_target));
}
/* End cycle phase if it's time and/or we hit the phase's in-flight target. */
@@ -856,7 +856,7 @@ static void bbr_update_ack_aggregation(struct sock *sk,
bbr->ack_epoch_acked = min_t(u32, 0xFFFFF,
bbr->ack_epoch_acked + rs->acked_sacked);
extra_acked = bbr->ack_epoch_acked - expected_acked;
- extra_acked = min(extra_acked, tp->snd_cwnd);
+ extra_acked = min(extra_acked, tcp_snd_cwnd(tp));
if (extra_acked > bbr->extra_acked[bbr->extra_acked_win_idx])
bbr->extra_acked[bbr->extra_acked_win_idx] = extra_acked;
}
@@ -914,7 +914,7 @@ static void bbr_check_probe_rtt_done(struct sock *sk)
return;
bbr->min_rtt_stamp = tcp_jiffies32; /* wait a while until PROBE_RTT */
- tp->snd_cwnd = max(tp->snd_cwnd, bbr->prior_cwnd);
+ tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp), bbr->prior_cwnd));
bbr_reset_mode(sk);
}
@@ -1093,7 +1093,7 @@ static u32 bbr_undo_cwnd(struct sock *sk)
bbr->full_bw = 0; /* spurious slow-down; reset full pipe detection */
bbr->full_bw_cnt = 0;
bbr_reset_lt_bw_sampling(sk);
- return tcp_sk(sk)->snd_cwnd;
+ return tcp_snd_cwnd(tcp_sk(sk));
}
/* Entering loss recovery, so save cwnd for when we exit or undo recovery. */
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index f5f588b1f6e9..58358bf92e1b 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -150,7 +150,7 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
if (!acked)
return;
}
- bictcp_update(ca, tp->snd_cwnd);
+ bictcp_update(ca, tcp_snd_cwnd(tp));
tcp_cong_avoid_ai(tp, ca->cnt, acked);
}
@@ -166,16 +166,16 @@ static u32 bictcp_recalc_ssthresh(struct sock *sk)
ca->epoch_start = 0; /* end of epoch */
/* Wmax and fast convergence */
- if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence)
- ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta))
+ if (tcp_snd_cwnd(tp) < ca->last_max_cwnd && fast_convergence)
+ ca->last_max_cwnd = (tcp_snd_cwnd(tp) * (BICTCP_BETA_SCALE + beta))
/ (2 * BICTCP_BETA_SCALE);
else
- ca->last_max_cwnd = tp->snd_cwnd;
+ ca->last_max_cwnd = tcp_snd_cwnd(tp);
- if (tp->snd_cwnd <= low_window)
- return max(tp->snd_cwnd >> 1U, 2U);
+ if (tcp_snd_cwnd(tp) <= low_window)
+ return max(tcp_snd_cwnd(tp) >> 1U, 2U);
else
- return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
+ return max((tcp_snd_cwnd(tp) * beta) / BICTCP_BETA_SCALE, 2U);
}
static void bictcp_state(struct sock *sk, u8 new_state)
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 1cdcb4df0eb7..be3947e70fec 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -174,7 +174,6 @@ static int tcp_msg_wait_data(struct sock *sk, struct sk_psock *psock,
static int tcp_bpf_recvmsg_parser(struct sock *sk,
struct msghdr *msg,
size_t len,
- int nonblock,
int flags,
int *addr_len)
{
@@ -186,7 +185,7 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
psock = sk_psock_get(sk);
if (unlikely(!psock))
- return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+ return tcp_recvmsg(sk, msg, len, flags, addr_len);
lock_sock(sk);
msg_bytes_ready:
@@ -211,7 +210,7 @@ msg_bytes_ready:
goto out;
}
- timeo = sock_rcvtimeo(sk, nonblock);
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
if (!timeo) {
copied = -EAGAIN;
goto out;
@@ -234,7 +233,7 @@ out:
}
static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int nonblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
struct sk_psock *psock;
int copied, ret;
@@ -244,11 +243,11 @@ static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
psock = sk_psock_get(sk);
if (unlikely(!psock))
- return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+ return tcp_recvmsg(sk, msg, len, flags, addr_len);
if (!skb_queue_empty(&sk->sk_receive_queue) &&
sk_psock_queue_empty(psock)) {
sk_psock_put(sk, psock);
- return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+ return tcp_recvmsg(sk, msg, len, flags, addr_len);
}
lock_sock(sk);
msg_bytes_ready:
@@ -257,14 +256,14 @@ msg_bytes_ready:
long timeo;
int data;
- timeo = sock_rcvtimeo(sk, nonblock);
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
data = tcp_msg_wait_data(sk, psock, timeo);
if (data) {
if (!sk_psock_queue_empty(psock))
goto msg_bytes_ready;
release_sock(sk);
sk_psock_put(sk, psock);
- return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+ return tcp_recvmsg(sk, msg, len, flags, addr_len);
}
copied = -EAGAIN;
}
diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c
index 709d23801823..ddc7ba0554bd 100644
--- a/net/ipv4/tcp_cdg.c
+++ b/net/ipv4/tcp_cdg.c
@@ -161,8 +161,8 @@ static void tcp_cdg_hystart_update(struct sock *sk)
LINUX_MIB_TCPHYSTARTTRAINDETECT);
NET_ADD_STATS(sock_net(sk),
LINUX_MIB_TCPHYSTARTTRAINCWND,
- tp->snd_cwnd);
- tp->snd_ssthresh = tp->snd_cwnd;
+ tcp_snd_cwnd(tp));
+ tp->snd_ssthresh = tcp_snd_cwnd(tp);
return;
}
}
@@ -180,8 +180,8 @@ static void tcp_cdg_hystart_update(struct sock *sk)
LINUX_MIB_TCPHYSTARTDELAYDETECT);
NET_ADD_STATS(sock_net(sk),
LINUX_MIB_TCPHYSTARTDELAYCWND,
- tp->snd_cwnd);
- tp->snd_ssthresh = tp->snd_cwnd;
+ tcp_snd_cwnd(tp));
+ tp->snd_ssthresh = tcp_snd_cwnd(tp);
}
}
}
@@ -252,7 +252,7 @@ static bool tcp_cdg_backoff(struct sock *sk, u32 grad)
return false;
}
- ca->shadow_wnd = max(ca->shadow_wnd, tp->snd_cwnd);
+ ca->shadow_wnd = max(ca->shadow_wnd, tcp_snd_cwnd(tp));
ca->state = CDG_BACKOFF;
tcp_enter_cwr(sk);
return true;
@@ -285,14 +285,14 @@ static void tcp_cdg_cong_avoid(struct sock *sk, u32 ack, u32 acked)
}
if (!tcp_is_cwnd_limited(sk)) {
- ca->shadow_wnd = min(ca->shadow_wnd, tp->snd_cwnd);
+ ca->shadow_wnd = min(ca->shadow_wnd, tcp_snd_cwnd(tp));
return;
}
- prior_snd_cwnd = tp->snd_cwnd;
+ prior_snd_cwnd = tcp_snd_cwnd(tp);
tcp_reno_cong_avoid(sk, ack, acked);
- incr = tp->snd_cwnd - prior_snd_cwnd;
+ incr = tcp_snd_cwnd(tp) - prior_snd_cwnd;
ca->shadow_wnd = max(ca->shadow_wnd, ca->shadow_wnd + incr);
}
@@ -331,15 +331,15 @@ static u32 tcp_cdg_ssthresh(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
if (ca->state == CDG_BACKOFF)
- return max(2U, (tp->snd_cwnd * min(1024U, backoff_beta)) >> 10);
+ return max(2U, (tcp_snd_cwnd(tp) * min(1024U, backoff_beta)) >> 10);
if (ca->state == CDG_NONFULL && use_tolerance)
- return tp->snd_cwnd;
+ return tcp_snd_cwnd(tp);
- ca->shadow_wnd = min(ca->shadow_wnd >> 1, tp->snd_cwnd);
+ ca->shadow_wnd = min(ca->shadow_wnd >> 1, tcp_snd_cwnd(tp));
if (use_shadow)
- return max3(2U, ca->shadow_wnd, tp->snd_cwnd >> 1);
- return max(2U, tp->snd_cwnd >> 1);
+ return max3(2U, ca->shadow_wnd, tcp_snd_cwnd(tp) >> 1);
+ return max(2U, tcp_snd_cwnd(tp) >> 1);
}
static void tcp_cdg_cwnd_event(struct sock *sk, const enum tcp_ca_event ev)
@@ -357,7 +357,7 @@ static void tcp_cdg_cwnd_event(struct sock *sk, const enum tcp_ca_event ev)
ca->gradients = gradients;
ca->rtt_seq = tp->snd_nxt;
- ca->shadow_wnd = tp->snd_cwnd;
+ ca->shadow_wnd = tcp_snd_cwnd(tp);
break;
case CA_EVENT_COMPLETE_CWR:
ca->state = CDG_UNKNOWN;
@@ -380,7 +380,7 @@ static void tcp_cdg_init(struct sock *sk)
ca->gradients = kcalloc(window, sizeof(ca->gradients[0]),
GFP_NOWAIT | __GFP_NOWARN);
ca->rtt_seq = tp->snd_nxt;
- ca->shadow_wnd = tp->snd_cwnd;
+ ca->shadow_wnd = tcp_snd_cwnd(tp);
}
static void tcp_cdg_release(struct sock *sk)
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index dc95572163df..d3cae40749e8 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -16,6 +16,7 @@
#include <linux/gfp.h>
#include <linux/jhash.h>
#include <net/tcp.h>
+#include <trace/events/tcp.h>
static DEFINE_SPINLOCK(tcp_cong_list_lock);
static LIST_HEAD(tcp_cong_list);
@@ -33,6 +34,17 @@ struct tcp_congestion_ops *tcp_ca_find(const char *name)
return NULL;
}
+void tcp_set_ca_state(struct sock *sk, const u8 ca_state)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ trace_tcp_cong_state_set(sk, ca_state);
+
+ if (icsk->icsk_ca_ops->set_state)
+ icsk->icsk_ca_ops->set_state(sk, ca_state);
+ icsk->icsk_ca_state = ca_state;
+}
+
/* Must be called with rcu lock held */
static struct tcp_congestion_ops *tcp_ca_find_autoload(struct net *net,
const char *name)
@@ -393,10 +405,10 @@ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load,
*/
u32 tcp_slow_start(struct tcp_sock *tp, u32 acked)
{
- u32 cwnd = min(tp->snd_cwnd + acked, tp->snd_ssthresh);
+ u32 cwnd = min(tcp_snd_cwnd(tp) + acked, tp->snd_ssthresh);
- acked -= cwnd - tp->snd_cwnd;
- tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp);
+ acked -= cwnd - tcp_snd_cwnd(tp);
+ tcp_snd_cwnd_set(tp, min(cwnd, tp->snd_cwnd_clamp));
return acked;
}
@@ -410,7 +422,7 @@ void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked)
/* If credits accumulated at a higher w, apply them gently now. */
if (tp->snd_cwnd_cnt >= w) {
tp->snd_cwnd_cnt = 0;
- tp->snd_cwnd++;
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1);
}
tp->snd_cwnd_cnt += acked;
@@ -418,9 +430,9 @@ void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked)
u32 delta = tp->snd_cwnd_cnt / w;
tp->snd_cwnd_cnt -= delta * w;
- tp->snd_cwnd += delta;
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + delta);
}
- tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_cwnd_clamp);
+ tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), tp->snd_cwnd_clamp));
}
EXPORT_SYMBOL_GPL(tcp_cong_avoid_ai);
@@ -445,7 +457,7 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
return;
}
/* In dangerous area, increase slowly. */
- tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked);
+ tcp_cong_avoid_ai(tp, tcp_snd_cwnd(tp), acked);
}
EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);
@@ -454,7 +466,7 @@ u32 tcp_reno_ssthresh(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
- return max(tp->snd_cwnd >> 1U, 2U);
+ return max(tcp_snd_cwnd(tp) >> 1U, 2U);
}
EXPORT_SYMBOL_GPL(tcp_reno_ssthresh);
@@ -462,7 +474,7 @@ u32 tcp_reno_undo_cwnd(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
- return max(tp->snd_cwnd, tp->prior_cwnd);
+ return max(tcp_snd_cwnd(tp), tp->prior_cwnd);
}
EXPORT_SYMBOL_GPL(tcp_reno_undo_cwnd);
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 24d562dd6225..68178e7280ce 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -334,7 +334,7 @@ static void cubictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
if (!acked)
return;
}
- bictcp_update(ca, tp->snd_cwnd, acked);
+ bictcp_update(ca, tcp_snd_cwnd(tp), acked);
tcp_cong_avoid_ai(tp, ca->cnt, acked);
}
@@ -346,13 +346,13 @@ static u32 cubictcp_recalc_ssthresh(struct sock *sk)
ca->epoch_start = 0; /* end of epoch */
/* Wmax and fast convergence */
- if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence)
- ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta))
+ if (tcp_snd_cwnd(tp) < ca->last_max_cwnd && fast_convergence)
+ ca->last_max_cwnd = (tcp_snd_cwnd(tp) * (BICTCP_BETA_SCALE + beta))
/ (2 * BICTCP_BETA_SCALE);
else
- ca->last_max_cwnd = tp->snd_cwnd;
+ ca->last_max_cwnd = tcp_snd_cwnd(tp);
- return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
+ return max((tcp_snd_cwnd(tp) * beta) / BICTCP_BETA_SCALE, 2U);
}
static void cubictcp_state(struct sock *sk, u8 new_state)
@@ -372,7 +372,7 @@ static void cubictcp_state(struct sock *sk, u8 new_state)
* We apply another 100% factor because @rate is doubled at this point.
* We cap the cushion to 1ms.
*/
-static u32 hystart_ack_delay(struct sock *sk)
+static u32 hystart_ack_delay(const struct sock *sk)
{
unsigned long rate;
@@ -380,7 +380,7 @@ static u32 hystart_ack_delay(struct sock *sk)
if (!rate)
return 0;
return min_t(u64, USEC_PER_MSEC,
- div64_ul((u64)GSO_MAX_SIZE * 4 * USEC_PER_SEC, rate));
+ div64_ul((u64)sk->sk_gso_max_size * 4 * USEC_PER_SEC, rate));
}
static void hystart_update(struct sock *sk, u32 delay)
@@ -413,13 +413,13 @@ static void hystart_update(struct sock *sk, u32 delay)
ca->found = 1;
pr_debug("hystart_ack_train (%u > %u) delay_min %u (+ ack_delay %u) cwnd %u\n",
now - ca->round_start, threshold,
- ca->delay_min, hystart_ack_delay(sk), tp->snd_cwnd);
+ ca->delay_min, hystart_ack_delay(sk), tcp_snd_cwnd(tp));
NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPHYSTARTTRAINDETECT);
NET_ADD_STATS(sock_net(sk),
LINUX_MIB_TCPHYSTARTTRAINCWND,
- tp->snd_cwnd);
- tp->snd_ssthresh = tp->snd_cwnd;
+ tcp_snd_cwnd(tp));
+ tp->snd_ssthresh = tcp_snd_cwnd(tp);
}
}
}
@@ -438,8 +438,8 @@ static void hystart_update(struct sock *sk, u32 delay)
LINUX_MIB_TCPHYSTARTDELAYDETECT);
NET_ADD_STATS(sock_net(sk),
LINUX_MIB_TCPHYSTARTDELAYCWND,
- tp->snd_cwnd);
- tp->snd_ssthresh = tp->snd_cwnd;
+ tcp_snd_cwnd(tp));
+ tp->snd_ssthresh = tcp_snd_cwnd(tp);
}
}
}
@@ -469,7 +469,7 @@ static void cubictcp_acked(struct sock *sk, const struct ack_sample *sample)
/* hystart triggers when cwnd is larger than some threshold */
if (!ca->found && tcp_in_slow_start(tp) && hystart &&
- tp->snd_cwnd >= hystart_low_window)
+ tcp_snd_cwnd(tp) >= hystart_low_window)
hystart_update(sk, delay);
}
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index 1943a6630341..ab034a4e9324 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -106,8 +106,8 @@ static u32 dctcp_ssthresh(struct sock *sk)
struct dctcp *ca = inet_csk_ca(sk);
struct tcp_sock *tp = tcp_sk(sk);
- ca->loss_cwnd = tp->snd_cwnd;
- return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U);
+ ca->loss_cwnd = tcp_snd_cwnd(tp);
+ return max(tcp_snd_cwnd(tp) - ((tcp_snd_cwnd(tp) * ca->dctcp_alpha) >> 11U), 2U);
}
static void dctcp_update_alpha(struct sock *sk, u32 flags)
@@ -148,8 +148,8 @@ static void dctcp_react_to_loss(struct sock *sk)
struct dctcp *ca = inet_csk_ca(sk);
struct tcp_sock *tp = tcp_sk(sk);
- ca->loss_cwnd = tp->snd_cwnd;
- tp->snd_ssthresh = max(tp->snd_cwnd >> 1U, 2U);
+ ca->loss_cwnd = tcp_snd_cwnd(tp);
+ tp->snd_ssthresh = max(tcp_snd_cwnd(tp) >> 1U, 2U);
}
static void dctcp_state(struct sock *sk, u8 new_state)
@@ -211,8 +211,9 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr,
static u32 dctcp_cwnd_undo(struct sock *sk)
{
const struct dctcp *ca = inet_csk_ca(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
- return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
+ return max(tcp_snd_cwnd(tp), ca->loss_cwnd);
}
static struct tcp_congestion_ops dctcp __read_mostly = {
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index 349069d6cd0a..c6de5ce79ad3 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -127,22 +127,22 @@ static void hstcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
* snd_cwnd <=
* hstcp_aimd_vals[ca->ai].cwnd
*/
- if (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd) {
- while (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd &&
+ if (tcp_snd_cwnd(tp) > hstcp_aimd_vals[ca->ai].cwnd) {
+ while (tcp_snd_cwnd(tp) > hstcp_aimd_vals[ca->ai].cwnd &&
ca->ai < HSTCP_AIMD_MAX - 1)
ca->ai++;
- } else if (ca->ai && tp->snd_cwnd <= hstcp_aimd_vals[ca->ai-1].cwnd) {
- while (ca->ai && tp->snd_cwnd <= hstcp_aimd_vals[ca->ai-1].cwnd)
+ } else if (ca->ai && tcp_snd_cwnd(tp) <= hstcp_aimd_vals[ca->ai-1].cwnd) {
+ while (ca->ai && tcp_snd_cwnd(tp) <= hstcp_aimd_vals[ca->ai-1].cwnd)
ca->ai--;
}
/* Do additive increase */
- if (tp->snd_cwnd < tp->snd_cwnd_clamp) {
+ if (tcp_snd_cwnd(tp) < tp->snd_cwnd_clamp) {
/* cwnd = cwnd + a(w) / cwnd */
tp->snd_cwnd_cnt += ca->ai + 1;
- if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
- tp->snd_cwnd_cnt -= tp->snd_cwnd;
- tp->snd_cwnd++;
+ if (tp->snd_cwnd_cnt >= tcp_snd_cwnd(tp)) {
+ tp->snd_cwnd_cnt -= tcp_snd_cwnd(tp);
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1);
}
}
}
@@ -154,7 +154,7 @@ static u32 hstcp_ssthresh(struct sock *sk)
struct hstcp *ca = inet_csk_ca(sk);
/* Do multiplicative decrease */
- return max(tp->snd_cwnd - ((tp->snd_cwnd * hstcp_aimd_vals[ca->ai].md) >> 8), 2U);
+ return max(tcp_snd_cwnd(tp) - ((tcp_snd_cwnd(tp) * hstcp_aimd_vals[ca->ai].md) >> 8), 2U);
}
static struct tcp_congestion_ops tcp_highspeed __read_mostly = {
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 55adcfcf96fe..52b1f2665dfa 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -124,7 +124,7 @@ static void measure_achieved_throughput(struct sock *sk,
ca->packetcount += sample->pkts_acked;
- if (ca->packetcount >= tp->snd_cwnd - (ca->alpha >> 7 ? : 1) &&
+ if (ca->packetcount >= tcp_snd_cwnd(tp) - (ca->alpha >> 7 ? : 1) &&
now - ca->lasttime >= ca->minRTT &&
ca->minRTT > 0) {
__u32 cur_Bi = ca->packetcount * HZ / (now - ca->lasttime);
@@ -225,7 +225,7 @@ static u32 htcp_recalc_ssthresh(struct sock *sk)
const struct htcp *ca = inet_csk_ca(sk);
htcp_param_update(sk);
- return max((tp->snd_cwnd * ca->beta) >> 7, 2U);
+ return max((tcp_snd_cwnd(tp) * ca->beta) >> 7, 2U);
}
static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
@@ -242,9 +242,9 @@ static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
/* In dangerous area, increase slowly.
* In theory this is tp->snd_cwnd += alpha / tp->snd_cwnd
*/
- if ((tp->snd_cwnd_cnt * ca->alpha)>>7 >= tp->snd_cwnd) {
- if (tp->snd_cwnd < tp->snd_cwnd_clamp)
- tp->snd_cwnd++;
+ if ((tp->snd_cwnd_cnt * ca->alpha)>>7 >= tcp_snd_cwnd(tp)) {
+ if (tcp_snd_cwnd(tp) < tp->snd_cwnd_clamp)
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1);
tp->snd_cwnd_cnt = 0;
htcp_alpha_update(ca);
} else
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index be39327e04e6..abd7d91807e5 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -54,7 +54,7 @@ static void hybla_init(struct sock *sk)
ca->rho2_7ls = 0;
ca->snd_cwnd_cents = 0;
ca->hybla_en = true;
- tp->snd_cwnd = 2;
+ tcp_snd_cwnd_set(tp, 2);
tp->snd_cwnd_clamp = 65535;
/* 1st Rho measurement based on initial srtt */
@@ -62,7 +62,7 @@ static void hybla_init(struct sock *sk)
/* set minimum rtt as this is the 1st ever seen */
ca->minrtt_us = tp->srtt_us;
- tp->snd_cwnd = ca->rho;
+ tcp_snd_cwnd_set(tp, ca->rho);
}
static void hybla_state(struct sock *sk, u8 ca_state)
@@ -137,31 +137,31 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked)
* as long as increment is estimated as (rho<<7)/window
* it already is <<7 and we can easily count its fractions.
*/
- increment = ca->rho2_7ls / tp->snd_cwnd;
+ increment = ca->rho2_7ls / tcp_snd_cwnd(tp);
if (increment < 128)
tp->snd_cwnd_cnt++;
}
odd = increment % 128;
- tp->snd_cwnd += increment >> 7;
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + (increment >> 7));
ca->snd_cwnd_cents += odd;
/* check when fractions goes >=128 and increase cwnd by 1. */
while (ca->snd_cwnd_cents >= 128) {
- tp->snd_cwnd++;
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1);
ca->snd_cwnd_cents -= 128;
tp->snd_cwnd_cnt = 0;
}
/* check when cwnd has not been incremented for a while */
- if (increment == 0 && odd == 0 && tp->snd_cwnd_cnt >= tp->snd_cwnd) {
- tp->snd_cwnd++;
+ if (increment == 0 && odd == 0 && tp->snd_cwnd_cnt >= tcp_snd_cwnd(tp)) {
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1);
tp->snd_cwnd_cnt = 0;
}
/* clamp down slowstart cwnd to ssthresh value. */
if (is_slowstart)
- tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
+ tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), tp->snd_ssthresh));
- tp->snd_cwnd = min_t(u32, tp->snd_cwnd, tp->snd_cwnd_clamp);
+ tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), tp->snd_cwnd_clamp));
}
static struct tcp_congestion_ops tcp_hybla __read_mostly = {
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index 00e54873213e..c0c81a2c77fa 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -224,7 +224,7 @@ static void update_params(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
struct illinois *ca = inet_csk_ca(sk);
- if (tp->snd_cwnd < win_thresh) {
+ if (tcp_snd_cwnd(tp) < win_thresh) {
ca->alpha = ALPHA_BASE;
ca->beta = BETA_BASE;
} else if (ca->cnt_rtt > 0) {
@@ -284,9 +284,9 @@ static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked)
* tp->snd_cwnd += alpha/tp->snd_cwnd
*/
delta = (tp->snd_cwnd_cnt * ca->alpha) >> ALPHA_SHIFT;
- if (delta >= tp->snd_cwnd) {
- tp->snd_cwnd = min(tp->snd_cwnd + delta / tp->snd_cwnd,
- (u32)tp->snd_cwnd_clamp);
+ if (delta >= tcp_snd_cwnd(tp)) {
+ tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp) + delta / tcp_snd_cwnd(tp),
+ (u32)tp->snd_cwnd_clamp));
tp->snd_cwnd_cnt = 0;
}
}
@@ -296,9 +296,11 @@ static u32 tcp_illinois_ssthresh(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct illinois *ca = inet_csk_ca(sk);
+ u32 decr;
/* Multiplicative decrease */
- return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->beta) >> BETA_SHIFT), 2U);
+ decr = (tcp_snd_cwnd(tp) * ca->beta) >> BETA_SHIFT;
+ return max(tcp_snd_cwnd(tp) - decr, 2U);
}
/* Extract info for Tcp socket info provided via netlink. */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 60f99e9fb6d1..3231af73e430 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -414,7 +414,7 @@ static void tcp_sndbuf_expand(struct sock *sk)
per_mss = roundup_pow_of_two(per_mss) +
SKB_DATA_ALIGN(sizeof(struct sk_buff));
- nr_segs = max_t(u32, TCP_INIT_CWND, tp->snd_cwnd);
+ nr_segs = max_t(u32, TCP_INIT_CWND, tcp_snd_cwnd(tp));
nr_segs = max_t(u32, nr_segs, tp->reordering + 1);
/* Fast Recovery (RFC 5681 3.2) :
@@ -909,12 +909,12 @@ static void tcp_update_pacing_rate(struct sock *sk)
* If snd_cwnd >= (tp->snd_ssthresh / 2), we are approaching
* end of slow start and should slow down.
*/
- if (tp->snd_cwnd < tp->snd_ssthresh / 2)
+ if (tcp_snd_cwnd(tp) < tp->snd_ssthresh / 2)
rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio;
else
rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio;
- rate *= max(tp->snd_cwnd, tp->packets_out);
+ rate *= max(tcp_snd_cwnd(tp), tp->packets_out);
if (likely(tp->srtt_us))
do_div(rate, tp->srtt_us);
@@ -2147,12 +2147,12 @@ void tcp_enter_loss(struct sock *sk)
!after(tp->high_seq, tp->snd_una) ||
(icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
tp->prior_ssthresh = tcp_current_ssthresh(sk);
- tp->prior_cwnd = tp->snd_cwnd;
+ tp->prior_cwnd = tcp_snd_cwnd(tp);
tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
tcp_ca_event(sk, CA_EVENT_LOSS);
tcp_init_undo(tp);
}
- tp->snd_cwnd = tcp_packets_in_flight(tp) + 1;
+ tcp_snd_cwnd_set(tp, tcp_packets_in_flight(tp) + 1);
tp->snd_cwnd_cnt = 0;
tp->snd_cwnd_stamp = tcp_jiffies32;
@@ -2458,7 +2458,7 @@ static void DBGUNDO(struct sock *sk, const char *msg)
pr_debug("Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n",
msg,
&inet->inet_daddr, ntohs(inet->inet_dport),
- tp->snd_cwnd, tcp_left_out(tp),
+ tcp_snd_cwnd(tp), tcp_left_out(tp),
tp->snd_ssthresh, tp->prior_ssthresh,
tp->packets_out);
}
@@ -2467,7 +2467,7 @@ static void DBGUNDO(struct sock *sk, const char *msg)
pr_debug("Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
msg,
&sk->sk_v6_daddr, ntohs(inet->inet_dport),
- tp->snd_cwnd, tcp_left_out(tp),
+ tcp_snd_cwnd(tp), tcp_left_out(tp),
tp->snd_ssthresh, tp->prior_ssthresh,
tp->packets_out);
}
@@ -2492,7 +2492,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
if (tp->prior_ssthresh) {
const struct inet_connection_sock *icsk = inet_csk(sk);
- tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk);
+ tcp_snd_cwnd_set(tp, icsk->icsk_ca_ops->undo_cwnd(sk));
if (tp->prior_ssthresh > tp->snd_ssthresh) {
tp->snd_ssthresh = tp->prior_ssthresh;
@@ -2599,7 +2599,7 @@ static void tcp_init_cwnd_reduction(struct sock *sk)
tp->high_seq = tp->snd_nxt;
tp->tlp_high_seq = 0;
tp->snd_cwnd_cnt = 0;
- tp->prior_cwnd = tp->snd_cwnd;
+ tp->prior_cwnd = tcp_snd_cwnd(tp);
tp->prr_delivered = 0;
tp->prr_out = 0;
tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
@@ -2620,16 +2620,16 @@ void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int newly_lost,
u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
tp->prior_cwnd - 1;
sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
- } else if (flag & FLAG_SND_UNA_ADVANCED && !newly_lost) {
- sndcnt = min_t(int, delta,
- max_t(int, tp->prr_delivered - tp->prr_out,
- newly_acked_sacked) + 1);
} else {
- sndcnt = min(delta, newly_acked_sacked);
+ sndcnt = max_t(int, tp->prr_delivered - tp->prr_out,
+ newly_acked_sacked);
+ if (flag & FLAG_SND_UNA_ADVANCED && !newly_lost)
+ sndcnt++;
+ sndcnt = min(delta, sndcnt);
}
/* Force a fast retransmit upon entering fast recovery */
sndcnt = max(sndcnt, (tp->prr_out ? 0 : 1));
- tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
+ tcp_snd_cwnd_set(tp, tcp_packets_in_flight(tp) + sndcnt);
}
static inline void tcp_end_cwnd_reduction(struct sock *sk)
@@ -2642,7 +2642,7 @@ static inline void tcp_end_cwnd_reduction(struct sock *sk)
/* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */
if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH &&
(inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || tp->undo_marker)) {
- tp->snd_cwnd = tp->snd_ssthresh;
+ tcp_snd_cwnd_set(tp, tp->snd_ssthresh);
tp->snd_cwnd_stamp = tcp_jiffies32;
}
tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
@@ -2709,9 +2709,9 @@ static void tcp_mtup_probe_success(struct sock *sk)
/* FIXME: breaks with very large cwnd */
tp->prior_ssthresh = tcp_current_ssthresh(sk);
- tp->snd_cwnd = tp->snd_cwnd *
- tcp_mss_to_mtu(sk, tp->mss_cache) /
- icsk->icsk_mtup.probe_size;
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) *
+ tcp_mss_to_mtu(sk, tp->mss_cache) /
+ icsk->icsk_mtup.probe_size);
tp->snd_cwnd_cnt = 0;
tp->snd_cwnd_stamp = tcp_jiffies32;
tp->snd_ssthresh = tcp_current_ssthresh(sk);
@@ -3034,7 +3034,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
tp->snd_una == tp->mtu_probe.probe_seq_start) {
tcp_mtup_probe_failed(sk);
/* Restores the reduction we did in tcp_mtup_probe() */
- tp->snd_cwnd++;
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1);
tcp_simple_retransmit(sk);
return;
}
@@ -3766,7 +3766,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if (before(ack, prior_snd_una - tp->max_window)) {
if (!(flag & FLAG_NO_CHALLENGE_ACK))
tcp_send_challenge_ack(sk);
- return -1;
+ return -SKB_DROP_REASON_TCP_TOO_OLD_ACK;
}
goto old_ack;
}
@@ -3775,7 +3775,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
* this segment (RFC793 Section 3.9).
*/
if (after(ack, tp->snd_nxt))
- return -1;
+ return -SKB_DROP_REASON_TCP_ACK_UNSENT_DATA;
if (after(ack, prior_snd_una)) {
flag |= FLAG_SND_UNA_ADVANCED;
@@ -4675,7 +4675,7 @@ static bool tcp_ooo_try_coalesce(struct sock *sk,
{
bool res = tcp_try_coalesce(sk, to, from, fragstolen);
- /* In case tcp_drop() is called later, update to->gso_segs */
+ /* In case tcp_drop_reason() is called later, update to->gso_segs */
if (res) {
u32 gso_segs = max_t(u16, 1, skb_shinfo(to)->gso_segs) +
max_t(u16, 1, skb_shinfo(from)->gso_segs);
@@ -4692,11 +4692,6 @@ static void tcp_drop_reason(struct sock *sk, struct sk_buff *skb,
kfree_skb_reason(skb, reason);
}
-static void tcp_drop(struct sock *sk, struct sk_buff *skb)
-{
- tcp_drop_reason(sk, skb, SKB_DROP_REASON_NOT_SPECIFIED);
-}
-
/* This one checks to see if we can put data from the
* out_of_order queue into the receive_queue.
*/
@@ -4724,7 +4719,7 @@ static void tcp_ofo_queue(struct sock *sk)
rb_erase(&skb->rbnode, &tp->out_of_order_queue);
if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
- tcp_drop(sk, skb);
+ tcp_drop_reason(sk, skb, SKB_DROP_REASON_TCP_OFO_DROP);
continue;
}
@@ -5335,7 +5330,8 @@ static bool tcp_prune_ofo_queue(struct sock *sk)
prev = rb_prev(node);
rb_erase(node, &tp->out_of_order_queue);
goal -= rb_to_skb(node)->truesize;
- tcp_drop(sk, rb_to_skb(node));
+ tcp_drop_reason(sk, rb_to_skb(node),
+ SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE);
if (!prev || goal <= 0) {
sk_mem_reclaim(sk);
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
@@ -5437,7 +5433,7 @@ static bool tcp_should_expand_sndbuf(struct sock *sk)
return false;
/* If we filled the congestion window, do not expand. */
- if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
+ if (tcp_packets_in_flight(tp) >= tcp_snd_cwnd(tp))
return false;
return true;
@@ -5678,7 +5674,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
const struct tcphdr *th, int syn_inerr)
{
struct tcp_sock *tp = tcp_sk(sk);
- bool rst_seq_match = false;
+ SKB_DR(reason);
/* RFC1323: H1. Apply PAWS check first. */
if (tcp_fast_parse_options(sock_net(sk), skb, th, tp) &&
@@ -5690,6 +5686,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
LINUX_MIB_TCPACKSKIPPEDPAWS,
&tp->last_oow_ack_time))
tcp_send_dupack(sk, skb);
+ SKB_DR_SET(reason, TCP_RFC7323_PAWS);
goto discard;
}
/* Reset is accepted even if it did not pass PAWS. */
@@ -5711,8 +5708,9 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
&tp->last_oow_ack_time))
tcp_send_dupack(sk, skb);
} else if (tcp_reset_check(sk, skb)) {
- tcp_reset(sk, skb);
+ goto reset;
}
+ SKB_DR_SET(reason, TCP_INVALID_SEQUENCE);
goto discard;
}
@@ -5728,9 +5726,10 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
* Send a challenge ACK
*/
if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt ||
- tcp_reset_check(sk, skb)) {
- rst_seq_match = true;
- } else if (tcp_is_sack(tp) && tp->rx_opt.num_sacks > 0) {
+ tcp_reset_check(sk, skb))
+ goto reset;
+
+ if (tcp_is_sack(tp) && tp->rx_opt.num_sacks > 0) {
struct tcp_sack_block *sp = &tp->selective_acks[0];
int max_sack = sp[0].end_seq;
int this_sack;
@@ -5743,21 +5742,18 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
}
if (TCP_SKB_CB(skb)->seq == max_sack)
- rst_seq_match = true;
+ goto reset;
}
- if (rst_seq_match)
- tcp_reset(sk, skb);
- else {
- /* Disable TFO if RST is out-of-order
- * and no data has been received
- * for current active TFO socket
- */
- if (tp->syn_fastopen && !tp->data_segs_in &&
- sk->sk_state == TCP_ESTABLISHED)
- tcp_fastopen_active_disable(sk);
- tcp_send_challenge_ack(sk);
- }
+ /* Disable TFO if RST is out-of-order
+ * and no data has been received
+ * for current active TFO socket
+ */
+ if (tp->syn_fastopen && !tp->data_segs_in &&
+ sk->sk_state == TCP_ESTABLISHED)
+ tcp_fastopen_active_disable(sk);
+ tcp_send_challenge_ack(sk);
+ SKB_DR_SET(reason, TCP_RESET);
goto discard;
}
@@ -5772,6 +5768,7 @@ syn_challenge:
TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE);
tcp_send_challenge_ack(sk);
+ SKB_DR_SET(reason, TCP_INVALID_SYN);
goto discard;
}
@@ -5780,7 +5777,12 @@ syn_challenge:
return true;
discard:
- tcp_drop(sk, skb);
+ tcp_drop_reason(sk, skb, reason);
+ return false;
+
+reset:
+ tcp_reset(sk, skb);
+ __kfree_skb(skb);
return false;
}
@@ -5926,6 +5928,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS);
/* Bulk data transfer: receiver */
+ skb_dst_drop(skb);
__skb_pull(skb, tcp_header_len);
eaten = tcp_queue_rcv(sk, skb, &fragstolen);
@@ -5967,9 +5970,11 @@ slow_path:
return;
step5:
- if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0)
+ reason = tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT);
+ if ((int)reason < 0) {
+ reason = -reason;
goto discard;
-
+ }
tcp_rcv_rtt_measure_ts(sk, skb);
/* Process urgent data. */
@@ -6009,9 +6014,9 @@ void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb)
* retransmission has occurred.
*/
if (tp->total_retrans > 1 && tp->undo_marker)
- tp->snd_cwnd = 1;
+ tcp_snd_cwnd_set(tp, 1);
else
- tp->snd_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk));
+ tcp_snd_cwnd_set(tp, tcp_init_cwnd(tp, __sk_dst_get(sk)));
tp->snd_cwnd_stamp = tcp_jiffies32;
bpf_skops_established(sk, bpf_op, skb);
@@ -6147,6 +6152,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
struct tcp_fastopen_cookie foc = { .len = -1 };
int saved_clamp = tp->rx_opt.mss_clamp;
bool fastopen_fail;
+ SKB_DR(reason);
tcp_parse_options(sock_net(sk), skb, &tp->rx_opt, 0, &foc);
if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
@@ -6189,7 +6195,9 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
if (th->rst) {
tcp_reset(sk, skb);
- goto discard;
+consume:
+ __kfree_skb(skb);
+ return 0;
}
/* rfc793:
@@ -6199,9 +6207,10 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
* See note below!
* --ANK(990513)
*/
- if (!th->syn)
+ if (!th->syn) {
+ SKB_DR_SET(reason, TCP_FLAGS);
goto discard_and_undo;
-
+ }
/* rfc793:
* "If the SYN bit is on ...
* are acceptable then ...
@@ -6278,13 +6287,9 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
TCP_DELACK_MAX, TCP_RTO_MAX);
-
-discard:
- tcp_drop(sk, skb);
- return 0;
- } else {
- tcp_send_ack(sk);
+ goto consume;
}
+ tcp_send_ack(sk);
return -1;
}
@@ -6296,15 +6301,16 @@ discard:
*
* Otherwise (no ACK) drop the segment and return."
*/
-
+ SKB_DR_SET(reason, TCP_RESET);
goto discard_and_undo;
}
/* PAWS check. */
if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp &&
- tcp_paws_reject(&tp->rx_opt, 0))
+ tcp_paws_reject(&tp->rx_opt, 0)) {
+ SKB_DR_SET(reason, TCP_RFC7323_PAWS);
goto discard_and_undo;
-
+ }
if (th->syn) {
/* We see SYN without ACK. It is attempt of
* simultaneous connect with crossed SYNs.
@@ -6353,7 +6359,7 @@ discard:
*/
return -1;
#else
- goto discard;
+ goto consume;
#endif
}
/* "fifth, if neither of the SYN or RST bits is set then
@@ -6363,7 +6369,8 @@ discard:
discard_and_undo:
tcp_clear_options(&tp->rx_opt);
tp->rx_opt.mss_clamp = saved_clamp;
- goto discard;
+ tcp_drop_reason(sk, skb, reason);
+ return 0;
reset_and_undo:
tcp_clear_options(&tp->rx_opt);
@@ -6418,21 +6425,26 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
struct request_sock *req;
int queued = 0;
bool acceptable;
+ SKB_DR(reason);
switch (sk->sk_state) {
case TCP_CLOSE:
+ SKB_DR_SET(reason, TCP_CLOSE);
goto discard;
case TCP_LISTEN:
if (th->ack)
return 1;
- if (th->rst)
+ if (th->rst) {
+ SKB_DR_SET(reason, TCP_RESET);
goto discard;
-
+ }
if (th->syn) {
- if (th->fin)
+ if (th->fin) {
+ SKB_DR_SET(reason, TCP_FLAGS);
goto discard;
+ }
/* It is possible that we process SYN packets from backlog,
* so we need to make sure to disable BH and RCU right there.
*/
@@ -6447,6 +6459,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
consume_skb(skb);
return 0;
}
+ SKB_DR_SET(reason, TCP_FLAGS);
goto discard;
case TCP_SYN_SENT:
@@ -6473,13 +6486,16 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
sk->sk_state != TCP_FIN_WAIT1);
- if (!tcp_check_req(sk, skb, req, true, &req_stolen))
+ if (!tcp_check_req(sk, skb, req, true, &req_stolen)) {
+ SKB_DR_SET(reason, TCP_FASTOPEN);
goto discard;
+ }
}
- if (!th->ack && !th->rst && !th->syn)
+ if (!th->ack && !th->rst && !th->syn) {
+ SKB_DR_SET(reason, TCP_FLAGS);
goto discard;
-
+ }
if (!tcp_validate_incoming(sk, skb, th, 0))
return 0;
@@ -6492,6 +6508,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
if (sk->sk_state == TCP_SYN_RECV)
return 1; /* send one RST */
tcp_send_challenge_ack(sk);
+ SKB_DR_SET(reason, TCP_OLD_ACK);
goto discard;
}
switch (sk->sk_state) {
@@ -6585,7 +6602,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
inet_csk_reset_keepalive_timer(sk, tmo);
} else {
tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
- goto discard;
+ goto consume;
}
break;
}
@@ -6593,7 +6610,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
case TCP_CLOSING:
if (tp->snd_una == tp->write_seq) {
tcp_time_wait(sk, TCP_TIME_WAIT, 0);
- goto discard;
+ goto consume;
}
break;
@@ -6601,7 +6618,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
if (tp->snd_una == tp->write_seq) {
tcp_update_metrics(sk);
tcp_done(sk);
- goto discard;
+ goto consume;
}
break;
}
@@ -6652,9 +6669,13 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
if (!queued) {
discard:
- tcp_drop(sk, skb);
+ tcp_drop_reason(sk, skb, reason);
}
return 0;
+
+consume:
+ __kfree_skb(skb);
+ return 0;
}
EXPORT_SYMBOL(tcp_rcv_state_process);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 457f5b5d5d4a..dac2650f3863 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -229,9 +229,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
orig_dport = usin->sin_port;
fl4 = &inet->cork.fl.u.ip4;
rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
- RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
- IPPROTO_TCP,
- orig_sport, orig_dport, sk);
+ sk->sk_bound_dev_if, IPPROTO_TCP, orig_sport,
+ orig_dport, sk);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
if (err == -ENETUNREACH)
@@ -2066,7 +2065,6 @@ process:
sk_incoming_cpu_update(sk);
- sk_defer_free_flush(sk);
bh_lock_sock_nested(sk);
tcp_segs_in(tcp_sk(sk), skb);
ret = 0;
@@ -2103,6 +2101,7 @@ bad_packet:
}
discard_it:
+ SKB_DR_OR(drop_reason, NOT_SPECIFIED);
/* Discard frame. */
kfree_skb_reason(skb, drop_reason);
return 0;
@@ -2285,16 +2284,15 @@ static void *listening_get_first(struct seq_file *seq)
st->offset = 0;
for (; st->bucket <= tcp_hashinfo.lhash2_mask; st->bucket++) {
struct inet_listen_hashbucket *ilb2;
- struct inet_connection_sock *icsk;
+ struct hlist_nulls_node *node;
struct sock *sk;
ilb2 = &tcp_hashinfo.lhash2[st->bucket];
- if (hlist_empty(&ilb2->head))
+ if (hlist_nulls_empty(&ilb2->nulls_head))
continue;
spin_lock(&ilb2->lock);
- inet_lhash2_for_each_icsk(icsk, &ilb2->head) {
- sk = (struct sock *)icsk;
+ sk_nulls_for_each(sk, node, &ilb2->nulls_head) {
if (seq_sk_match(seq, sk))
return sk;
}
@@ -2313,15 +2311,14 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
{
struct tcp_iter_state *st = seq->private;
struct inet_listen_hashbucket *ilb2;
- struct inet_connection_sock *icsk;
+ struct hlist_nulls_node *node;
struct sock *sk = cur;
++st->num;
++st->offset;
- icsk = inet_csk(sk);
- inet_lhash2_for_each_icsk_continue(icsk) {
- sk = (struct sock *)icsk;
+ sk = sk_nulls_next(sk);
+ sk_nulls_for_each_from(sk, node) {
if (seq_sk_match(seq, sk))
return sk;
}
@@ -2621,7 +2618,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
jiffies_to_clock_t(icsk->icsk_rto),
jiffies_to_clock_t(icsk->icsk_ack.ato),
(icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sk),
- tp->snd_cwnd,
+ tcp_snd_cwnd(tp),
state == TCP_LISTEN ?
fastopenq->max_qlen :
(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
@@ -2730,16 +2727,15 @@ static unsigned int bpf_iter_tcp_listening_batch(struct seq_file *seq,
{
struct bpf_tcp_iter_state *iter = seq->private;
struct tcp_iter_state *st = &iter->state;
- struct inet_connection_sock *icsk;
+ struct hlist_nulls_node *node;
unsigned int expected = 1;
struct sock *sk;
sock_hold(start_sk);
iter->batch[iter->end_sk++] = start_sk;
- icsk = inet_csk(start_sk);
- inet_lhash2_for_each_icsk_continue(icsk) {
- sk = (struct sock *)icsk;
+ sk = sk_nulls_next(start_sk);
+ sk_nulls_for_each_from(sk, node) {
if (seq_sk_match(seq, sk)) {
if (iter->end_sk < iter->max_sk) {
sock_hold(sk);
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index 82b36ec3f2f8..ae36780977d2 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -297,7 +297,7 @@ static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample)
lp->flag &= ~LP_WITHIN_THR;
pr_debug("TCP-LP: %05o|%5u|%5u|%15u|%15u|%15u\n", lp->flag,
- tp->snd_cwnd, lp->remote_hz, lp->owd_min, lp->owd_max,
+ tcp_snd_cwnd(tp), lp->remote_hz, lp->owd_min, lp->owd_max,
lp->sowd >> 3);
if (lp->flag & LP_WITHIN_THR)
@@ -313,12 +313,12 @@ static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample)
/* happened within inference
* drop snd_cwnd into 1 */
if (lp->flag & LP_WITHIN_INF)
- tp->snd_cwnd = 1U;
+ tcp_snd_cwnd_set(tp, 1U);
/* happened after inference
* cut snd_cwnd into half */
else
- tp->snd_cwnd = max(tp->snd_cwnd >> 1U, 1U);
+ tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp) >> 1U, 1U));
/* record this drop time */
lp->last_drop = now;
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 0588b004ddac..7029b0e98edb 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -388,15 +388,15 @@ void tcp_update_metrics(struct sock *sk)
if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
- if (val && (tp->snd_cwnd >> 1) > val)
+ if (val && (tcp_snd_cwnd(tp) >> 1) > val)
tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
- tp->snd_cwnd >> 1);
+ tcp_snd_cwnd(tp) >> 1);
}
if (!tcp_metric_locked(tm, TCP_METRIC_CWND)) {
val = tcp_metric_get(tm, TCP_METRIC_CWND);
- if (tp->snd_cwnd > val)
+ if (tcp_snd_cwnd(tp) > val)
tcp_metric_set(tm, TCP_METRIC_CWND,
- tp->snd_cwnd);
+ tcp_snd_cwnd(tp));
}
} else if (!tcp_in_slow_start(tp) &&
icsk->icsk_ca_state == TCP_CA_Open) {
@@ -404,10 +404,10 @@ void tcp_update_metrics(struct sock *sk)
if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH))
tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
- max(tp->snd_cwnd >> 1, tp->snd_ssthresh));
+ max(tcp_snd_cwnd(tp) >> 1, tp->snd_ssthresh));
if (!tcp_metric_locked(tm, TCP_METRIC_CWND)) {
val = tcp_metric_get(tm, TCP_METRIC_CWND);
- tcp_metric_set(tm, TCP_METRIC_CWND, (val + tp->snd_cwnd) >> 1);
+ tcp_metric_set(tm, TCP_METRIC_CWND, (val + tcp_snd_cwnd(tp)) >> 1);
}
} else {
/* Else slow start did not finish, cwnd is non-sense,
diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c
index ab552356bdba..a60662f4bdf9 100644
--- a/net/ipv4/tcp_nv.c
+++ b/net/ipv4/tcp_nv.c
@@ -197,10 +197,10 @@ static void tcpnv_cong_avoid(struct sock *sk, u32 ack, u32 acked)
}
if (ca->cwnd_growth_factor < 0) {
- cnt = tp->snd_cwnd << -ca->cwnd_growth_factor;
+ cnt = tcp_snd_cwnd(tp) << -ca->cwnd_growth_factor;
tcp_cong_avoid_ai(tp, cnt, acked);
} else {
- cnt = max(4U, tp->snd_cwnd >> ca->cwnd_growth_factor);
+ cnt = max(4U, tcp_snd_cwnd(tp) >> ca->cwnd_growth_factor);
tcp_cong_avoid_ai(tp, cnt, acked);
}
}
@@ -209,7 +209,7 @@ static u32 tcpnv_recalc_ssthresh(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
- return max((tp->snd_cwnd * nv_loss_dec_factor) >> 10, 2U);
+ return max((tcp_snd_cwnd(tp) * nv_loss_dec_factor) >> 10, 2U);
}
static void tcpnv_state(struct sock *sk, u8 new_state)
@@ -257,7 +257,7 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
return;
/* Stop cwnd growth if we were in catch up mode */
- if (ca->nv_catchup && tp->snd_cwnd >= nv_min_cwnd) {
+ if (ca->nv_catchup && tcp_snd_cwnd(tp) >= nv_min_cwnd) {
ca->nv_catchup = 0;
ca->nv_allow_cwnd_growth = 0;
}
@@ -371,7 +371,7 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
* if cwnd < max_win, grow cwnd
* else leave the same
*/
- if (tp->snd_cwnd > max_win) {
+ if (tcp_snd_cwnd(tp) > max_win) {
/* there is congestion, check that it is ok
* to make a CA decision
* 1. We should have at least nv_dec_eval_min_calls
@@ -398,20 +398,20 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
ca->nv_allow_cwnd_growth = 0;
tp->snd_ssthresh =
(nv_ssthresh_factor * max_win) >> 3;
- if (tp->snd_cwnd - max_win > 2) {
+ if (tcp_snd_cwnd(tp) - max_win > 2) {
/* gap > 2, we do exponential cwnd decrease */
int dec;
- dec = max(2U, ((tp->snd_cwnd - max_win) *
+ dec = max(2U, ((tcp_snd_cwnd(tp) - max_win) *
nv_cong_dec_mult) >> 7);
- tp->snd_cwnd -= dec;
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) - dec);
} else if (nv_cong_dec_mult > 0) {
- tp->snd_cwnd = max_win;
+ tcp_snd_cwnd_set(tp, max_win);
}
if (ca->cwnd_growth_factor > 0)
ca->cwnd_growth_factor = 0;
ca->nv_no_cong_cnt = 0;
- } else if (tp->snd_cwnd <= max_win - nv_pad_buffer) {
+ } else if (tcp_snd_cwnd(tp) <= max_win - nv_pad_buffer) {
/* There is no congestion, grow cwnd if allowed*/
if (ca->nv_eval_call_cnt < nv_inc_eval_min_calls)
return;
@@ -444,8 +444,8 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
* (it wasn't before, if it is now is because nv
* decreased it).
*/
- if (tp->snd_cwnd < nv_min_cwnd)
- tp->snd_cwnd = nv_min_cwnd;
+ if (tcp_snd_cwnd(tp) < nv_min_cwnd)
+ tcp_snd_cwnd_set(tp, nv_min_cwnd);
}
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 1ca2f28c9981..b4b2284ed4a2 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -143,7 +143,7 @@ void tcp_cwnd_restart(struct sock *sk, s32 delta)
{
struct tcp_sock *tp = tcp_sk(sk);
u32 restart_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk));
- u32 cwnd = tp->snd_cwnd;
+ u32 cwnd = tcp_snd_cwnd(tp);
tcp_ca_event(sk, CA_EVENT_CWND_RESTART);
@@ -152,7 +152,7 @@ void tcp_cwnd_restart(struct sock *sk, s32 delta)
while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd)
cwnd >>= 1;
- tp->snd_cwnd = max(cwnd, restart_cwnd);
+ tcp_snd_cwnd_set(tp, max(cwnd, restart_cwnd));
tp->snd_cwnd_stamp = tcp_jiffies32;
tp->snd_cwnd_used = 0;
}
@@ -445,12 +445,13 @@ struct tcp_out_options {
struct mptcp_out_options mptcp;
};
-static void mptcp_options_write(__be32 *ptr, const struct tcp_sock *tp,
+static void mptcp_options_write(struct tcphdr *th, __be32 *ptr,
+ struct tcp_sock *tp,
struct tcp_out_options *opts)
{
#if IS_ENABLED(CONFIG_MPTCP)
if (unlikely(OPTION_MPTCP & opts->options))
- mptcp_write_options(ptr, tp, &opts->mptcp);
+ mptcp_write_options(th, ptr, tp, &opts->mptcp);
#endif
}
@@ -606,9 +607,10 @@ static void bpf_skops_write_hdr_opt(struct sock *sk, struct sk_buff *skb,
* At least SACK_PERM as the first option is known to lead to a disaster
* (but it may well be that other scenarios fail similarly).
*/
-static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
+static void tcp_options_write(struct tcphdr *th, struct tcp_sock *tp,
struct tcp_out_options *opts)
{
+ __be32 *ptr = (__be32 *)(th + 1);
u16 options = opts->options; /* mungable copy */
if (unlikely(OPTION_MD5 & options)) {
@@ -702,7 +704,7 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
smc_options_write(ptr, &options);
- mptcp_options_write(ptr, tp, opts);
+ mptcp_options_write(th, ptr, tp, opts);
}
static void smc_set_option(const struct tcp_sock *tp,
@@ -1014,7 +1016,7 @@ static void tcp_tsq_write(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
if (tp->lost_out > tp->retrans_out &&
- tp->snd_cwnd > tcp_packets_in_flight(tp)) {
+ tcp_snd_cwnd(tp) > tcp_packets_in_flight(tp)) {
tcp_mstamp_refresh(tp);
tcp_xmit_retransmit_queue(sk);
}
@@ -1355,7 +1357,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
th->window = htons(min(tp->rcv_wnd, 65535U));
}
- tcp_options_write((__be32 *)(th + 1), tp, &opts);
+ tcp_options_write(th, tp, &opts);
#ifdef CONFIG_TCP_MD5SIG
/* Calculate the MD5 hash, as we have all we need now */
@@ -1551,7 +1553,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
* SO_SNDBUF values.
* Also allow first and last skb in retransmit queue to be split.
*/
- limit = sk->sk_sndbuf + 2 * SKB_TRUESIZE(GSO_MAX_SIZE);
+ limit = sk->sk_sndbuf + 2 * SKB_TRUESIZE(GSO_LEGACY_MAX_SIZE);
if (unlikely((sk->sk_wmem_queued >> 1) > limit &&
tcp_queue != TCP_FRAG_IN_WRITE_QUEUE &&
skb != tcp_rtx_queue_head(sk) &&
@@ -1861,9 +1863,9 @@ static void tcp_cwnd_application_limited(struct sock *sk)
/* Limited by application or receiver window. */
u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk));
u32 win_used = max(tp->snd_cwnd_used, init_win);
- if (win_used < tp->snd_cwnd) {
+ if (win_used < tcp_snd_cwnd(tp)) {
tp->snd_ssthresh = tcp_current_ssthresh(sk);
- tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1;
+ tcp_snd_cwnd_set(tp, (tcp_snd_cwnd(tp) + win_used) >> 1);
}
tp->snd_cwnd_used = 0;
}
@@ -2044,7 +2046,7 @@ static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp,
return 1;
in_flight = tcp_packets_in_flight(tp);
- cwnd = tp->snd_cwnd;
+ cwnd = tcp_snd_cwnd(tp);
if (in_flight >= cwnd)
return 0;
@@ -2197,12 +2199,12 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
in_flight = tcp_packets_in_flight(tp);
BUG_ON(tcp_skb_pcount(skb) <= 1);
- BUG_ON(tp->snd_cwnd <= in_flight);
+ BUG_ON(tcp_snd_cwnd(tp) <= in_flight);
send_win = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
/* From in_flight test above, we know that cwnd > in_flight. */
- cong_win = (tp->snd_cwnd - in_flight) * tp->mss_cache;
+ cong_win = (tcp_snd_cwnd(tp) - in_flight) * tp->mss_cache;
limit = min(send_win, cong_win);
@@ -2216,7 +2218,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
win_divisor = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_win_divisor);
if (win_divisor) {
- u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
+ u32 chunk = min(tp->snd_wnd, tcp_snd_cwnd(tp) * tp->mss_cache);
/* If at least some fraction of a window is available,
* just use it.
@@ -2346,7 +2348,7 @@ static int tcp_mtu_probe(struct sock *sk)
if (likely(!icsk->icsk_mtup.enabled ||
icsk->icsk_mtup.probe_size ||
inet_csk(sk)->icsk_ca_state != TCP_CA_Open ||
- tp->snd_cwnd < 11 ||
+ tcp_snd_cwnd(tp) < 11 ||
tp->rx_opt.num_sacks || tp->rx_opt.dsack))
return -1;
@@ -2382,7 +2384,7 @@ static int tcp_mtu_probe(struct sock *sk)
return 0;
/* Do we need to wait to drain cwnd? With none in flight, don't stall */
- if (tcp_packets_in_flight(tp) + 2 > tp->snd_cwnd) {
+ if (tcp_packets_in_flight(tp) + 2 > tcp_snd_cwnd(tp)) {
if (!tcp_packets_in_flight(tp))
return -1;
else
@@ -2451,7 +2453,7 @@ static int tcp_mtu_probe(struct sock *sk)
if (!tcp_transmit_skb(sk, nskb, 1, GFP_ATOMIC)) {
/* Decrement cwnd here because we are sending
* effectively two packets. */
- tp->snd_cwnd--;
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) - 1);
tcp_event_new_data_sent(sk, nskb);
icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len);
@@ -2709,7 +2711,7 @@ repair:
else
tcp_chrono_stop(sk, TCP_CHRONO_RWND_LIMITED);
- is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd);
+ is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tcp_snd_cwnd(tp));
if (likely(sent_pkts || is_cwnd_limited))
tcp_cwnd_validate(sk, is_cwnd_limited);
@@ -2819,7 +2821,7 @@ void tcp_send_loss_probe(struct sock *sk)
if (unlikely(!skb)) {
WARN_ONCE(tp->packets_out,
"invalid inflight: %u state %u cwnd %u mss %d\n",
- tp->packets_out, sk->sk_state, tp->snd_cwnd, mss);
+ tp->packets_out, sk->sk_state, tcp_snd_cwnd(tp), mss);
inet_csk(sk)->icsk_pending = 0;
return;
}
@@ -3303,7 +3305,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
if (!hole)
tp->retransmit_skb_hint = skb;
- segs = tp->snd_cwnd - tcp_packets_in_flight(tp);
+ segs = tcp_snd_cwnd(tp) - tcp_packets_in_flight(tp);
if (segs <= 0)
break;
sacked = TCP_SKB_CB(skb)->sacked;
@@ -3591,7 +3593,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
/* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
th->window = htons(min(req->rsk_rcv_wnd, 65535U));
- tcp_options_write((__be32 *)(th + 1), NULL, &opts);
+ tcp_options_write(th, NULL, &opts);
th->doff = (tcp_header_size >> 2);
__TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c
index 9a8e014d9b5b..a8f6d9d06f2e 100644
--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c
@@ -200,7 +200,7 @@ void tcp_rate_check_app_limited(struct sock *sk)
/* Nothing in sending host's qdisc queues or NIC tx queue. */
sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1) &&
/* We are not limited by CWND. */
- tcp_packets_in_flight(tp) < tp->snd_cwnd &&
+ tcp_packets_in_flight(tp) < tcp_snd_cwnd(tp) &&
/* All lost packets have been retransmitted. */
tp->lost_out <= tp->retrans_out)
tp->app_limited =
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index fd113f6226ef..48f30e7209f2 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -2,11 +2,6 @@
#include <linux/tcp.h>
#include <net/tcp.h>
-static bool tcp_rack_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2)
-{
- return t1 > t2 || (t1 == t2 && after(seq1, seq2));
-}
-
static u32 tcp_rack_reo_wnd(const struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -77,9 +72,9 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
!(scb->sacked & TCPCB_SACKED_RETRANS))
continue;
- if (!tcp_rack_sent_after(tp->rack.mstamp,
- tcp_skb_timestamp_us(skb),
- tp->rack.end_seq, scb->end_seq))
+ if (!tcp_skb_sent_after(tp->rack.mstamp,
+ tcp_skb_timestamp_us(skb),
+ tp->rack.end_seq, scb->end_seq))
break;
/* A packet is lost if it has not been s/acked beyond
@@ -140,8 +135,8 @@ void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
}
tp->rack.advanced = 1;
tp->rack.rtt_us = rtt_us;
- if (tcp_rack_sent_after(xmit_time, tp->rack.mstamp,
- end_seq, tp->rack.end_seq)) {
+ if (tcp_skb_sent_after(xmit_time, tp->rack.mstamp,
+ end_seq, tp->rack.end_seq)) {
tp->rack.mstamp = xmit_time;
tp->rack.end_seq = end_seq;
}
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index 5842081bc8a2..862b96248a92 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -27,7 +27,7 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked)
if (!acked)
return;
}
- tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT),
+ tcp_cong_avoid_ai(tp, min(tcp_snd_cwnd(tp), TCP_SCALABLE_AI_CNT),
acked);
}
@@ -35,7 +35,7 @@ static u32 tcp_scalable_ssthresh(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
- return max(tp->snd_cwnd - (tp->snd_cwnd>>TCP_SCALABLE_MD_SCALE), 2U);
+ return max(tcp_snd_cwnd(tp) - (tcp_snd_cwnd(tp)>>TCP_SCALABLE_MD_SCALE), 2U);
}
static struct tcp_congestion_ops tcp_scalable __read_mostly = {
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index c8003c8aad2c..786848ad37ea 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -159,7 +159,7 @@ EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event);
static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp)
{
- return min(tp->snd_ssthresh, tp->snd_cwnd);
+ return min(tp->snd_ssthresh, tcp_snd_cwnd(tp));
}
static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
@@ -217,14 +217,14 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
* This is:
* (actual rate in segments) * baseRTT
*/
- target_cwnd = (u64)tp->snd_cwnd * vegas->baseRTT;
+ target_cwnd = (u64)tcp_snd_cwnd(tp) * vegas->baseRTT;
do_div(target_cwnd, rtt);
/* Calculate the difference between the window we had,
* and the window we would like to have. This quantity
* is the "Diff" from the Arizona Vegas papers.
*/
- diff = tp->snd_cwnd * (rtt-vegas->baseRTT) / vegas->baseRTT;
+ diff = tcp_snd_cwnd(tp) * (rtt-vegas->baseRTT) / vegas->baseRTT;
if (diff > gamma && tcp_in_slow_start(tp)) {
/* Going too fast. Time to slow down
@@ -238,7 +238,8 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
* truncation robs us of full link
* utilization.
*/
- tp->snd_cwnd = min(tp->snd_cwnd, (u32)target_cwnd+1);
+ tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp),
+ (u32)target_cwnd + 1));
tp->snd_ssthresh = tcp_vegas_ssthresh(tp);
} else if (tcp_in_slow_start(tp)) {
@@ -254,14 +255,14 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
/* The old window was too fast, so
* we slow down.
*/
- tp->snd_cwnd--;
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) - 1);
tp->snd_ssthresh
= tcp_vegas_ssthresh(tp);
} else if (diff < alpha) {
/* We don't have enough extra packets
* in the network, so speed up.
*/
- tp->snd_cwnd++;
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1);
} else {
/* Sending just as fast as we
* should be.
@@ -269,10 +270,10 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
}
}
- if (tp->snd_cwnd < 2)
- tp->snd_cwnd = 2;
- else if (tp->snd_cwnd > tp->snd_cwnd_clamp)
- tp->snd_cwnd = tp->snd_cwnd_clamp;
+ if (tcp_snd_cwnd(tp) < 2)
+ tcp_snd_cwnd_set(tp, 2);
+ else if (tcp_snd_cwnd(tp) > tp->snd_cwnd_clamp)
+ tcp_snd_cwnd_set(tp, tp->snd_cwnd_clamp);
tp->snd_ssthresh = tcp_current_ssthresh(sk);
}
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index cd50a61c9976..366ff6f214b2 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -146,11 +146,11 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
rtt = veno->minrtt;
- target_cwnd = (u64)tp->snd_cwnd * veno->basertt;
+ target_cwnd = (u64)tcp_snd_cwnd(tp) * veno->basertt;
target_cwnd <<= V_PARAM_SHIFT;
do_div(target_cwnd, rtt);
- veno->diff = (tp->snd_cwnd << V_PARAM_SHIFT) - target_cwnd;
+ veno->diff = (tcp_snd_cwnd(tp) << V_PARAM_SHIFT) - target_cwnd;
if (tcp_in_slow_start(tp)) {
/* Slow start. */
@@ -164,15 +164,15 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
/* In the "non-congestive state", increase cwnd
* every rtt.
*/
- tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked);
+ tcp_cong_avoid_ai(tp, tcp_snd_cwnd(tp), acked);
} else {
/* In the "congestive state", increase cwnd
* every other rtt.
*/
- if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
+ if (tp->snd_cwnd_cnt >= tcp_snd_cwnd(tp)) {
if (veno->inc &&
- tp->snd_cwnd < tp->snd_cwnd_clamp) {
- tp->snd_cwnd++;
+ tcp_snd_cwnd(tp) < tp->snd_cwnd_clamp) {
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1);
veno->inc = 0;
} else
veno->inc = 1;
@@ -181,10 +181,10 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
tp->snd_cwnd_cnt += acked;
}
done:
- if (tp->snd_cwnd < 2)
- tp->snd_cwnd = 2;
- else if (tp->snd_cwnd > tp->snd_cwnd_clamp)
- tp->snd_cwnd = tp->snd_cwnd_clamp;
+ if (tcp_snd_cwnd(tp) < 2)
+ tcp_snd_cwnd_set(tp, 2);
+ else if (tcp_snd_cwnd(tp) > tp->snd_cwnd_clamp)
+ tcp_snd_cwnd_set(tp, tp->snd_cwnd_clamp);
}
/* Wipe the slate clean for the next rtt. */
/* veno->cntrtt = 0; */
@@ -199,10 +199,10 @@ static u32 tcp_veno_ssthresh(struct sock *sk)
if (veno->diff < beta)
/* in "non-congestive state", cut cwnd by 1/5 */
- return max(tp->snd_cwnd * 4 / 5, 2U);
+ return max(tcp_snd_cwnd(tp) * 4 / 5, 2U);
else
/* in "congestive state", cut cwnd by 1/2 */
- return max(tp->snd_cwnd >> 1U, 2U);
+ return max(tcp_snd_cwnd(tp) >> 1U, 2U);
}
static struct tcp_congestion_ops tcp_veno __read_mostly = {
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index b2e05c4cea00..c6e97141eef2 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -244,7 +244,8 @@ static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event)
switch (event) {
case CA_EVENT_COMPLETE_CWR:
- tp->snd_cwnd = tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk);
+ tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk);
+ tcp_snd_cwnd_set(tp, tp->snd_ssthresh);
break;
case CA_EVENT_LOSS:
tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk);
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index 07c4c93b9fdb..18b07ff5d20e 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -71,11 +71,11 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked)
if (!yeah->doing_reno_now) {
/* Scalable */
- tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT),
+ tcp_cong_avoid_ai(tp, min(tcp_snd_cwnd(tp), TCP_SCALABLE_AI_CNT),
acked);
} else {
/* Reno */
- tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked);
+ tcp_cong_avoid_ai(tp, tcp_snd_cwnd(tp), acked);
}
/* The key players are v_vegas.beg_snd_una and v_beg_snd_nxt.
@@ -130,7 +130,7 @@ do_vegas:
/* Compute excess number of packets above bandwidth
* Avoid doing full 64 bit divide.
*/
- bw = tp->snd_cwnd;
+ bw = tcp_snd_cwnd(tp);
bw *= rtt - yeah->vegas.baseRTT;
do_div(bw, rtt);
queue = bw;
@@ -138,20 +138,20 @@ do_vegas:
if (queue > TCP_YEAH_ALPHA ||
rtt - yeah->vegas.baseRTT > (yeah->vegas.baseRTT / TCP_YEAH_PHY)) {
if (queue > TCP_YEAH_ALPHA &&
- tp->snd_cwnd > yeah->reno_count) {
+ tcp_snd_cwnd(tp) > yeah->reno_count) {
u32 reduction = min(queue / TCP_YEAH_GAMMA ,
- tp->snd_cwnd >> TCP_YEAH_EPSILON);
+ tcp_snd_cwnd(tp) >> TCP_YEAH_EPSILON);
- tp->snd_cwnd -= reduction;
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) - reduction);
- tp->snd_cwnd = max(tp->snd_cwnd,
- yeah->reno_count);
+ tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp),
+ yeah->reno_count));
- tp->snd_ssthresh = tp->snd_cwnd;
+ tp->snd_ssthresh = tcp_snd_cwnd(tp);
}
if (yeah->reno_count <= 2)
- yeah->reno_count = max(tp->snd_cwnd>>1, 2U);
+ yeah->reno_count = max(tcp_snd_cwnd(tp)>>1, 2U);
else
yeah->reno_count++;
@@ -176,7 +176,7 @@ do_vegas:
*/
yeah->vegas.beg_snd_una = yeah->vegas.beg_snd_nxt;
yeah->vegas.beg_snd_nxt = tp->snd_nxt;
- yeah->vegas.beg_snd_cwnd = tp->snd_cwnd;
+ yeah->vegas.beg_snd_cwnd = tcp_snd_cwnd(tp);
/* Wipe the slate clean for the next RTT. */
yeah->vegas.cntRTT = 0;
@@ -193,16 +193,16 @@ static u32 tcp_yeah_ssthresh(struct sock *sk)
if (yeah->doing_reno_now < TCP_YEAH_RHO) {
reduction = yeah->lastQ;
- reduction = min(reduction, max(tp->snd_cwnd>>1, 2U));
+ reduction = min(reduction, max(tcp_snd_cwnd(tp)>>1, 2U));
- reduction = max(reduction, tp->snd_cwnd >> TCP_YEAH_DELTA);
+ reduction = max(reduction, tcp_snd_cwnd(tp) >> TCP_YEAH_DELTA);
} else
- reduction = max(tp->snd_cwnd>>1, 2U);
+ reduction = max(tcp_snd_cwnd(tp)>>1, 2U);
yeah->fast_count = 0;
yeah->reno_count = max(yeah->reno_count>>1, 2U);
- return max_t(int, tp->snd_cwnd - reduction, 2);
+ return max_t(int, tcp_snd_cwnd(tp) - reduction, 2);
}
static struct tcp_congestion_ops tcp_yeah __read_mostly = {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 6b4d8361560f..aa9f2ec3dc46 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1726,7 +1726,7 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
EXPORT_SYMBOL(udp_ioctl);
struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags,
- int noblock, int *off, int *err)
+ int *off, int *err)
{
struct sk_buff_head *sk_queue = &sk->sk_receive_queue;
struct sk_buff_head *queue;
@@ -1735,7 +1735,6 @@ struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags,
int error;
queue = &udp_sk(sk)->reader_queue;
- flags |= noblock ? MSG_DONTWAIT : 0;
timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
do {
struct sk_buff *skb;
@@ -1805,7 +1804,7 @@ int udp_read_sock(struct sock *sk, read_descriptor_t *desc,
struct sk_buff *skb;
int err, used;
- skb = skb_recv_udp(sk, 0, 1, &err);
+ skb = skb_recv_udp(sk, MSG_DONTWAIT, &err);
if (!skb)
return err;
@@ -1843,8 +1842,8 @@ EXPORT_SYMBOL(udp_read_sock);
* return it, otherwise we block.
*/
-int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
- int flags, int *addr_len)
+int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
+ int *addr_len)
{
struct inet_sock *inet = inet_sk(sk);
DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
@@ -1859,7 +1858,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
try_again:
off = sk_peek_offset(sk, flags);
- skb = __skb_recv_udp(sk, flags, noblock, &off, &err);
+ skb = __skb_recv_udp(sk, flags, &off, &err);
if (!skb)
return err;
@@ -1910,7 +1909,7 @@ try_again:
UDP_INC_STATS(sock_net(sk),
UDP_MIB_INDATAGRAMS, is_udplite);
- sock_recv_ts_and_drops(msg, sk, skb);
+ sock_recv_cmsgs(msg, sk, skb);
/* Copy the address. */
if (sin) {
@@ -2564,8 +2563,7 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net,
struct sock *sk;
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
- if (INET_MATCH(sk, net, acookie, rmt_addr,
- loc_addr, ports, dif, sdif))
+ if (inet_match(net, sk, acookie, ports, dif, sdif))
return sk;
/* Only check first socket in chain */
break;
diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c
index bbe6569c9ad3..ff15918b7bdc 100644
--- a/net/ipv4/udp_bpf.c
+++ b/net/ipv4/udp_bpf.c
@@ -11,14 +11,13 @@
static struct proto *udpv6_prot_saved __read_mostly;
static int sk_udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int noblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
#if IS_ENABLED(CONFIG_IPV6)
if (sk->sk_family == AF_INET6)
- return udpv6_prot_saved->recvmsg(sk, msg, len, noblock, flags,
- addr_len);
+ return udpv6_prot_saved->recvmsg(sk, msg, len, flags, addr_len);
#endif
- return udp_prot.recvmsg(sk, msg, len, noblock, flags, addr_len);
+ return udp_prot.recvmsg(sk, msg, len, flags, addr_len);
}
static bool udp_sk_has_data(struct sock *sk)
@@ -61,7 +60,7 @@ static int udp_msg_wait_data(struct sock *sk, struct sk_psock *psock,
}
static int udp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int nonblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
struct sk_psock *psock;
int copied, ret;
@@ -71,10 +70,10 @@ static int udp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
psock = sk_psock_get(sk);
if (unlikely(!psock))
- return sk_udp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+ return sk_udp_recvmsg(sk, msg, len, flags, addr_len);
if (!psock_has_data(psock)) {
- ret = sk_udp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+ ret = sk_udp_recvmsg(sk, msg, len, flags, addr_len);
goto out;
}
@@ -84,12 +83,12 @@ msg_bytes_ready:
long timeo;
int data;
- timeo = sock_rcvtimeo(sk, nonblock);
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
data = udp_msg_wait_data(sk, psock, timeo);
if (data) {
if (psock_has_data(psock))
goto msg_bytes_ready;
- ret = sk_udp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+ ret = sk_udp_recvmsg(sk, msg, len, flags, addr_len);
goto out;
}
copied = -EAGAIN;
diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h
index 2878d8285caf..4ba7a88a1b1d 100644
--- a/net/ipv4/udp_impl.h
+++ b/net/ipv4/udp_impl.h
@@ -17,8 +17,8 @@ int udp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
int udp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
-int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
- int flags, int *addr_len);
+int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
+ int *addr_len);
int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size,
int flags);
void udp_destroy_sock(struct sock *sk);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index e7c68fa12fae..ca0aa744593e 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -335,7 +335,7 @@ static int snmp6_alloc_dev(struct inet6_dev *idev)
{
int i;
- idev->stats.ipv6 = alloc_percpu(struct ipstats_mib);
+ idev->stats.ipv6 = alloc_percpu_gfp(struct ipstats_mib, GFP_KERNEL_ACCOUNT);
if (!idev->stats.ipv6)
goto err_ip;
@@ -351,7 +351,7 @@ static int snmp6_alloc_dev(struct inet6_dev *idev)
if (!idev->stats.icmpv6dev)
goto err_icmp;
idev->stats.icmpv6msgdev = kzalloc(sizeof(struct icmpv6msg_mib_device),
- GFP_KERNEL);
+ GFP_KERNEL_ACCOUNT);
if (!idev->stats.icmpv6msgdev)
goto err_icmpmsg;
@@ -375,7 +375,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
if (dev->mtu < IPV6_MIN_MTU && dev != blackhole_netdev)
return ERR_PTR(-EINVAL);
- ndev = kzalloc(sizeof(struct inet6_dev), GFP_KERNEL);
+ ndev = kzalloc(sizeof(*ndev), GFP_KERNEL_ACCOUNT);
if (!ndev)
return ERR_PTR(err);
@@ -797,6 +797,7 @@ static void dev_forward_change(struct inet6_dev *idev)
{
struct net_device *dev;
struct inet6_ifaddr *ifa;
+ LIST_HEAD(tmp_addr_list);
if (!idev)
return;
@@ -815,14 +816,24 @@ static void dev_forward_change(struct inet6_dev *idev)
}
}
+ read_lock_bh(&idev->lock);
list_for_each_entry(ifa, &idev->addr_list, if_list) {
if (ifa->flags&IFA_F_TENTATIVE)
continue;
+ list_add_tail(&ifa->if_list_aux, &tmp_addr_list);
+ }
+ read_unlock_bh(&idev->lock);
+
+ while (!list_empty(&tmp_addr_list)) {
+ ifa = list_first_entry(&tmp_addr_list,
+ struct inet6_ifaddr, if_list_aux);
+ list_del(&ifa->if_list_aux);
if (idev->cnf.forwarding)
addrconf_join_anycast(ifa);
else
addrconf_leave_anycast(ifa);
}
+
inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
NETCONFA_FORWARDING,
dev->ifindex, &idev->cnf);
@@ -3728,7 +3739,8 @@ static int addrconf_ifdown(struct net_device *dev, bool unregister)
unsigned long event = unregister ? NETDEV_UNREGISTER : NETDEV_DOWN;
struct net *net = dev_net(dev);
struct inet6_dev *idev;
- struct inet6_ifaddr *ifa, *tmp;
+ struct inet6_ifaddr *ifa;
+ LIST_HEAD(tmp_addr_list);
bool keep_addr = false;
bool was_ready;
int state, i;
@@ -3820,16 +3832,23 @@ restart:
write_lock_bh(&idev->lock);
}
- list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) {
+ list_for_each_entry(ifa, &idev->addr_list, if_list)
+ list_add_tail(&ifa->if_list_aux, &tmp_addr_list);
+ write_unlock_bh(&idev->lock);
+
+ while (!list_empty(&tmp_addr_list)) {
struct fib6_info *rt = NULL;
bool keep;
+ ifa = list_first_entry(&tmp_addr_list,
+ struct inet6_ifaddr, if_list_aux);
+ list_del(&ifa->if_list_aux);
+
addrconf_del_dad_work(ifa);
keep = keep_addr && (ifa->flags & IFA_F_PERMANENT) &&
!addr_is_local(&ifa->addr);
- write_unlock_bh(&idev->lock);
spin_lock_bh(&ifa->lock);
if (keep) {
@@ -3860,15 +3879,14 @@ restart:
addrconf_leave_solict(ifa->idev, &ifa->addr);
}
- write_lock_bh(&idev->lock);
if (!keep) {
+ write_lock_bh(&idev->lock);
list_del_rcu(&ifa->if_list);
+ write_unlock_bh(&idev->lock);
in6_ifa_put(ifa);
}
}
- write_unlock_bh(&idev->lock);
-
/* Step 5: Discard anycast and multicast list */
if (unregister) {
ipv6_ac_destroy_dev(idev);
@@ -4199,7 +4217,8 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id,
send_rs = send_mld &&
ipv6_accept_ra(ifp->idev) &&
ifp->idev->cnf.rtr_solicits != 0 &&
- (dev->flags&IFF_LOOPBACK) == 0;
+ (dev->flags & IFF_LOOPBACK) == 0 &&
+ (dev->type != ARPHRD_TUNNEL);
read_unlock_bh(&ifp->idev->lock);
/* While dad is in progress mld report's source address is in6_addrany.
@@ -5567,6 +5586,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_IOAM6_ID] = cnf->ioam6_id;
array[DEVCONF_IOAM6_ID_WIDE] = cnf->ioam6_id_wide;
array[DEVCONF_NDISC_EVICT_NOCARRIER] = cnf->ndisc_evict_nocarrier;
+ array[DEVCONF_ACCEPT_UNSOLICITED_NA] = cnf->accept_unsolicited_na;
}
static inline size_t inet6_ifla6_size(void)
@@ -7018,6 +7038,15 @@ static const struct ctl_table addrconf_sysctl[] = {
.extra2 = (void *)SYSCTL_ONE,
},
{
+ .procname = "accept_unsolicited_na",
+ .data = &ipv6_devconf.accept_unsolicited_na,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = (void *)SYSCTL_ZERO,
+ .extra2 = (void *)SYSCTL_ONE,
+ },
+ {
/* sentinel */
}
};
@@ -7029,7 +7058,7 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name,
struct ctl_table *table;
char path[sizeof("net/ipv6/conf/") + IFNAMSIZ];
- table = kmemdup(addrconf_sysctl, sizeof(addrconf_sysctl), GFP_KERNEL);
+ table = kmemdup(addrconf_sysctl, sizeof(addrconf_sysctl), GFP_KERNEL_ACCOUNT);
if (!table)
goto out;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 7d7b7523d126..70564ddccc46 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -318,7 +318,7 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
/* Binding to v4-mapped address on a v6-only socket
* makes no sense
*/
- if (sk->sk_ipv6only) {
+ if (ipv6_only_sock(sk)) {
err = -EINVAL;
goto out;
}
@@ -654,7 +654,7 @@ int inet6_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
}
INDIRECT_CALLABLE_DECLARE(int udpv6_recvmsg(struct sock *, struct msghdr *,
- size_t, int, int, int *));
+ size_t, int, int *));
int inet6_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int flags)
{
@@ -669,8 +669,7 @@ int inet6_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
/* IPV6_ADDRFORM can change sk->sk_prot under us. */
prot = READ_ONCE(sk->sk_prot);
err = INDIRECT_CALL_2(prot->recvmsg, tcp_recvmsg, udpv6_recvmsg,
- sk, msg, size, flags & MSG_DONTWAIT,
- flags & ~MSG_DONTWAIT, &addr_len);
+ sk, msg, size, flags, &addr_len);
if (err >= 0)
msg->msg_namelen = addr_len;
return err;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 206f66310a88..df665d4e8f0f 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -145,7 +145,7 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr,
int err;
if (usin->sin6_family == AF_INET) {
- if (__ipv6_only_sock(sk))
+ if (ipv6_only_sock(sk))
return -EAFNOSUPPORT;
err = __ip4_datagram_connect(sk, uaddr, addr_len);
goto ipv4_connected;
@@ -178,7 +178,7 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr,
if (addr_type & IPV6_ADDR_MAPPED) {
struct sockaddr_in sin;
- if (__ipv6_only_sock(sk)) {
+ if (ipv6_only_sock(sk)) {
err = -ENETUNREACH;
goto out;
}
@@ -218,11 +218,11 @@ ipv4_connected:
err = -EINVAL;
goto out;
}
- sk->sk_bound_dev_if = usin->sin6_scope_id;
+ WRITE_ONCE(sk->sk_bound_dev_if, usin->sin6_scope_id);
}
if (!sk->sk_bound_dev_if && (addr_type & IPV6_ADDR_MULTICAST))
- sk->sk_bound_dev_if = np->mcast_oif;
+ WRITE_ONCE(sk->sk_bound_dev_if, np->mcast_oif);
/* Connect to link-local address requires an interface */
if (!sk->sk_bound_dev_if) {
@@ -798,7 +798,7 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
if (src_idx) {
if (fl6->flowi6_oif &&
src_idx != fl6->flowi6_oif &&
- (sk->sk_bound_dev_if != fl6->flowi6_oif ||
+ (READ_ONCE(sk->sk_bound_dev_if) != fl6->flowi6_oif ||
!sk_dev_equal_l3scope(sk, src_idx)))
return -EINVAL;
fl6->flowi6_oif = src_idx;
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index f2120e92caf1..36e1d0f8dd06 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -741,7 +741,6 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
static inline int esp_remove_trailer(struct sk_buff *skb)
{
struct xfrm_state *x = xfrm_input_state(skb);
- struct xfrm_offload *xo = xfrm_offload(skb);
struct crypto_aead *aead = x->data;
int alen, hlen, elen;
int padlen, trimlen;
@@ -753,11 +752,6 @@ static inline int esp_remove_trailer(struct sk_buff *skb)
hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
elen = skb->len - hlen;
- if (xo && (xo->flags & XFRM_ESP_NO_TRAILER)) {
- ret = xo->proto;
- goto out;
- }
-
ret = skb_copy_bits(skb, skb->len - alen - 2, nexthdr, 2);
BUG_ON(ret);
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 658d5eabaf7e..a8d961d3a477 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -90,12 +90,13 @@ static bool ip6_tlvopt_unknown(struct sk_buff *skb, int optoff,
break;
fallthrough;
case 2: /* send ICMP PARM PROB regardless and drop packet */
- icmpv6_param_prob(skb, ICMPV6_UNK_OPTION, optoff);
+ icmpv6_param_prob_reason(skb, ICMPV6_UNK_OPTION, optoff,
+ SKB_DROP_REASON_UNHANDLED_PROTO);
return false;
}
drop:
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_UNHANDLED_PROTO);
return false;
}
@@ -218,7 +219,7 @@ static bool ip6_parse_tlv(bool hopbyhop,
if (len == 0)
return true;
bad:
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR);
return false;
}
@@ -232,6 +233,7 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff)
struct ipv6_destopt_hao *hao;
struct inet6_skb_parm *opt = IP6CB(skb);
struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ SKB_DR(reason);
int ret;
if (opt->dsthao) {
@@ -246,19 +248,23 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff)
if (hao->length != 16) {
net_dbg_ratelimited("hao invalid option length = %d\n",
hao->length);
+ SKB_DR_SET(reason, IP_INHDR);
goto discard;
}
if (!(ipv6_addr_type(&hao->addr) & IPV6_ADDR_UNICAST)) {
net_dbg_ratelimited("hao is not an unicast addr: %pI6\n",
&hao->addr);
+ SKB_DR_SET(reason, INVALID_PROTO);
goto discard;
}
ret = xfrm6_input_addr(skb, (xfrm_address_t *)&ipv6h->daddr,
(xfrm_address_t *)&hao->addr, IPPROTO_DSTOPTS);
- if (unlikely(ret < 0))
+ if (unlikely(ret < 0)) {
+ SKB_DR_SET(reason, XFRM_POLICY);
goto discard;
+ }
if (skb_cloned(skb)) {
if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
@@ -281,7 +287,7 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff)
return true;
discard:
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
return false;
}
#endif
@@ -487,7 +493,6 @@ static int ipv6_rpl_srh_rcv(struct sk_buff *skb)
struct net *net = dev_net(skb->dev);
struct inet6_dev *idev;
struct ipv6hdr *oldhdr;
- struct in6_addr addr;
unsigned char *buf;
int accept_rpl_seg;
int i, err;
@@ -616,9 +621,7 @@ looped_back:
return -1;
}
- addr = ipv6_hdr(skb)->daddr;
- ipv6_hdr(skb)->daddr = ohdr->rpl_segaddr[i];
- ohdr->rpl_segaddr[i] = addr;
+ swap(ipv6_hdr(skb)->daddr, ohdr->rpl_segaddr[i]);
ipv6_rpl_srh_compress(chdr, ohdr, &ipv6_hdr(skb)->daddr, n);
@@ -934,7 +937,7 @@ static bool ipv6_hop_ra(struct sk_buff *skb, int optoff)
}
net_dbg_ratelimited("ipv6_hop_ra: wrong RA length %d\n",
nh[optoff + 1]);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR);
return false;
}
@@ -988,7 +991,7 @@ ignore:
return true;
drop:
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR);
return false;
}
@@ -997,31 +1000,30 @@ drop:
static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
{
const unsigned char *nh = skb_network_header(skb);
- struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
- struct net *net = ipv6_skb_net(skb);
+ SKB_DR(reason);
u32 pkt_len;
if (nh[optoff + 1] != 4 || (optoff & 3) != 2) {
net_dbg_ratelimited("ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n",
nh[optoff+1]);
- __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
+ SKB_DR_SET(reason, IP_INHDR);
goto drop;
}
pkt_len = ntohl(*(__be32 *)(nh + optoff + 2));
if (pkt_len <= IPV6_MAXPLEN) {
- __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
- icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2);
+ icmpv6_param_prob_reason(skb, ICMPV6_HDR_FIELD, optoff + 2,
+ SKB_DROP_REASON_IP_INHDR);
return false;
}
if (ipv6_hdr(skb)->payload_len) {
- __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
- icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff);
+ icmpv6_param_prob_reason(skb, ICMPV6_HDR_FIELD, optoff,
+ SKB_DROP_REASON_IP_INHDR);
return false;
}
if (pkt_len > skb->len - sizeof(struct ipv6hdr)) {
- __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTRUNCATEDPKTS);
+ SKB_DR_SET(reason, PKT_TOO_SMALL);
goto drop;
}
@@ -1032,7 +1034,7 @@ static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
return true;
drop:
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
return false;
}
@@ -1054,7 +1056,7 @@ static bool ipv6_hop_calipso(struct sk_buff *skb, int optoff)
return true;
drop:
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR);
return false;
}
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index e6b978ea0e87..61770220774e 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -629,12 +629,13 @@ out_bh_enable:
}
EXPORT_SYMBOL(icmp6_send);
-/* Slightly more convenient version of icmp6_send.
+/* Slightly more convenient version of icmp6_send with drop reasons.
*/
-void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
+void icmpv6_param_prob_reason(struct sk_buff *skb, u8 code, int pos,
+ enum skb_drop_reason reason)
{
icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL, IP6CB(skb));
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
}
/* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
@@ -864,21 +865,23 @@ out:
static int icmpv6_rcv(struct sk_buff *skb)
{
+ enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
struct net *net = dev_net(skb->dev);
struct net_device *dev = icmp6_dev(skb);
struct inet6_dev *idev = __in6_dev_get(dev);
const struct in6_addr *saddr, *daddr;
struct icmp6hdr *hdr;
u8 type;
- bool success = false;
if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
struct sec_path *sp = skb_sec_path(skb);
int nh;
if (!(sp && sp->xvec[sp->len - 1]->props.flags &
- XFRM_STATE_ICMP))
+ XFRM_STATE_ICMP)) {
+ reason = SKB_DROP_REASON_XFRM_POLICY;
goto drop_no_count;
+ }
if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
goto drop_no_count;
@@ -886,8 +889,11 @@ static int icmpv6_rcv(struct sk_buff *skb)
nh = skb_network_offset(skb);
skb_set_network_header(skb, sizeof(*hdr));
- if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
+ if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN,
+ skb)) {
+ reason = SKB_DROP_REASON_XFRM_POLICY;
goto drop_no_count;
+ }
skb_set_network_header(skb, nh);
}
@@ -924,11 +930,11 @@ static int icmpv6_rcv(struct sk_buff *skb)
break;
case ICMPV6_ECHO_REPLY:
- success = ping_rcv(skb);
+ reason = ping_rcv(skb);
break;
case ICMPV6_EXT_ECHO_REPLY:
- success = ping_rcv(skb);
+ reason = ping_rcv(skb);
break;
case ICMPV6_PKT_TOOBIG:
@@ -994,19 +1000,20 @@ static int icmpv6_rcv(struct sk_buff *skb)
/* until the v6 path can be better sorted assume failure and
* preserve the status quo behaviour for the rest of the paths to here
*/
- if (success)
- consume_skb(skb);
+ if (reason)
+ kfree_skb_reason(skb, reason);
else
- kfree_skb(skb);
+ consume_skb(skb);
return 0;
csum_error:
+ reason = SKB_DROP_REASON_ICMP_CSUM;
__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
discard_it:
__ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
drop_no_count:
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
return 0;
}
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 32ccac10bd62..7d53d62783b1 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -71,12 +71,12 @@ begin:
sk_nulls_for_each_rcu(sk, node, &head->chain) {
if (sk->sk_hash != hash)
continue;
- if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif, sdif))
+ if (!inet6_match(net, sk, saddr, daddr, ports, dif, sdif))
continue;
if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
goto out;
- if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports, dif, sdif))) {
+ if (unlikely(!inet6_match(net, sk, saddr, daddr, ports, dif, sdif))) {
sock_gen_put(sk);
goto begin;
}
@@ -138,12 +138,11 @@ static struct sock *inet6_lhash2_lookup(struct net *net,
const __be16 sport, const struct in6_addr *daddr,
const unsigned short hnum, const int dif, const int sdif)
{
- struct inet_connection_sock *icsk;
struct sock *sk, *result = NULL;
+ struct hlist_nulls_node *node;
int score, hiscore = 0;
- inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) {
- sk = (struct sock *)icsk;
+ sk_nulls_for_each_rcu(sk, node, &ilb2->nulls_head) {
score = compute_score(sk, net, hnum, daddr, dif, sdif);
if (score > hiscore) {
result = lookup_reuseport(net, sk, skb, doff,
@@ -269,7 +268,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
if (sk2->sk_hash != hash)
continue;
- if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports,
+ if (likely(inet6_match(net, sk2, saddr, daddr, ports,
dif, sdif))) {
if (sk2->sk_state == TCP_TIME_WAIT) {
tw = inet_twsk(sk2);
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 5136959b3dc5..4e37f7c29900 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -382,11 +382,6 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net,
goto failed_free;
ip6gre_tnl_link_config(nt, 1);
-
- /* Can use a lockless transmit, unless we generate output sequences */
- if (!(nt->parms.o_flags & TUNNEL_SEQ))
- dev->features |= NETIF_F_LLTX;
-
ip6gre_tunnel_link(ign, nt);
return nt;
@@ -1445,26 +1440,23 @@ static void ip6gre_tunnel_setup(struct net_device *dev)
static void ip6gre_tnl_init_features(struct net_device *dev)
{
struct ip6_tnl *nt = netdev_priv(dev);
+ __be16 flags;
- dev->features |= GRE6_FEATURES;
+ dev->features |= GRE6_FEATURES | NETIF_F_LLTX;
dev->hw_features |= GRE6_FEATURES;
- if (!(nt->parms.o_flags & TUNNEL_SEQ)) {
- /* TCP offload with GRE SEQ is not supported, nor
- * can we support 2 levels of outer headers requiring
- * an update.
- */
- if (!(nt->parms.o_flags & TUNNEL_CSUM) ||
- nt->encap.type == TUNNEL_ENCAP_NONE) {
- dev->features |= NETIF_F_GSO_SOFTWARE;
- dev->hw_features |= NETIF_F_GSO_SOFTWARE;
- }
+ flags = nt->parms.o_flags;
- /* Can use a lockless transmit, unless we generate
- * output sequences
- */
- dev->features |= NETIF_F_LLTX;
- }
+ /* TCP offload with GRE SEQ is not supported, nor can we support 2
+ * levels of outer headers requiring an update.
+ */
+ if (flags & TUNNEL_SEQ)
+ return;
+ if (flags & TUNNEL_CSUM && nt->encap.type != TUNNEL_ENCAP_NONE)
+ return;
+
+ dev->features |= NETIF_F_GSO_SOFTWARE;
+ dev->hw_features |= NETIF_F_GSO_SOFTWARE;
}
static int ip6gre_tunnel_init_common(struct net_device *dev)
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 5b5ea35635f9..0322cc86b84e 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -145,12 +145,14 @@ static void ip6_list_rcv_finish(struct net *net, struct sock *sk,
static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev,
struct net *net)
{
+ enum skb_drop_reason reason;
const struct ipv6hdr *hdr;
u32 pkt_len;
struct inet6_dev *idev;
if (skb->pkt_type == PACKET_OTHERHOST) {
- kfree_skb(skb);
+ dev_core_stats_rx_otherhost_dropped_inc(skb->dev);
+ kfree_skb_reason(skb, SKB_DROP_REASON_OTHERHOST);
return NULL;
}
@@ -160,9 +162,12 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev,
__IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_IN, skb->len);
+ SKB_DR_SET(reason, NOT_SPECIFIED);
if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL ||
!idev || unlikely(idev->cnf.disable_ipv6)) {
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
+ if (idev && unlikely(idev->cnf.disable_ipv6))
+ SKB_DR_SET(reason, IPV6DISABLED);
goto drop;
}
@@ -186,8 +191,10 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev,
hdr = ipv6_hdr(skb);
- if (hdr->version != 6)
+ if (hdr->version != 6) {
+ SKB_DR_SET(reason, UNHANDLED_PROTO);
goto err;
+ }
__IP6_ADD_STATS(net, idev,
IPSTATS_MIB_NOECTPKTS +
@@ -225,8 +232,10 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev,
if (!ipv6_addr_is_multicast(&hdr->daddr) &&
(skb->pkt_type == PACKET_BROADCAST ||
skb->pkt_type == PACKET_MULTICAST) &&
- idev->cnf.drop_unicast_in_l2_multicast)
+ idev->cnf.drop_unicast_in_l2_multicast) {
+ SKB_DR_SET(reason, UNICAST_IN_L2_MULTICAST);
goto err;
+ }
/* RFC4291 2.7
* Nodes must not originate a packet to a multicast address whose scope
@@ -255,12 +264,11 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev,
if (pkt_len + sizeof(struct ipv6hdr) > skb->len) {
__IP6_INC_STATS(net,
idev, IPSTATS_MIB_INTRUNCATEDPKTS);
+ SKB_DR_SET(reason, PKT_TOO_SMALL);
goto drop;
}
- if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) {
- __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
- goto drop;
- }
+ if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr)))
+ goto err;
hdr = ipv6_hdr(skb);
}
@@ -281,9 +289,10 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev,
return skb;
err:
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
+ SKB_DR_OR(reason, IP_INHDR);
drop:
rcu_read_unlock();
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
return NULL;
}
@@ -353,6 +362,7 @@ void ip6_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int nexthdr,
const struct inet6_protocol *ipprot;
struct inet6_dev *idev;
unsigned int nhoff;
+ SKB_DR(reason);
bool raw;
/*
@@ -412,12 +422,16 @@ resubmit_final:
if (ipv6_addr_is_multicast(&hdr->daddr) &&
!ipv6_chk_mcast_addr(dev, &hdr->daddr,
&hdr->saddr) &&
- !ipv6_is_mld(skb, nexthdr, skb_network_header_len(skb)))
+ !ipv6_is_mld(skb, nexthdr, skb_network_header_len(skb))) {
+ SKB_DR_SET(reason, IP_INADDRERRORS);
goto discard;
+ }
}
if (!(ipprot->flags & INET6_PROTO_NOPOLICY) &&
- !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
+ !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+ SKB_DR_SET(reason, XFRM_POLICY);
goto discard;
+ }
ret = INDIRECT_CALL_2(ipprot->handler, tcp_v6_rcv, udpv6_rcv,
skb);
@@ -443,8 +457,11 @@ resubmit_final:
IPSTATS_MIB_INUNKNOWNPROTOS);
icmpv6_send(skb, ICMPV6_PARAMPROB,
ICMPV6_UNK_NEXTHDR, nhoff);
+ SKB_DR_SET(reason, IP_NOPROTO);
+ } else {
+ SKB_DR_SET(reason, XFRM_POLICY);
}
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
} else {
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDELIVERS);
consume_skb(skb);
@@ -454,7 +471,7 @@ resubmit_final:
discard:
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
}
static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index c4fc03c1ac99..d12dba2dd535 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -77,7 +77,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
struct sk_buff *segs = ERR_PTR(-EINVAL);
struct ipv6hdr *ipv6h;
const struct net_offload *ops;
- int proto;
+ int proto, nexthdr;
struct frag_hdr *fptr;
unsigned int payload_len;
u8 *prevhdr;
@@ -87,6 +87,28 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
bool gso_partial;
skb_reset_network_header(skb);
+ nexthdr = ipv6_has_hopopt_jumbo(skb);
+ if (nexthdr) {
+ const int hophdr_len = sizeof(struct hop_jumbo_hdr);
+ int err;
+
+ err = skb_cow_head(skb, 0);
+ if (err < 0)
+ return ERR_PTR(err);
+
+ /* remove the HBH header.
+ * Layout: [Ethernet header][IPv6 header][HBH][TCP header]
+ */
+ memmove(skb_mac_header(skb) + hophdr_len,
+ skb_mac_header(skb),
+ ETH_HLEN + sizeof(struct ipv6hdr));
+ skb->data += hophdr_len;
+ skb->len -= hophdr_len;
+ skb->network_header += hophdr_len;
+ skb->mac_header += hophdr_len;
+ ipv6h = (struct ipv6hdr *)skb->data;
+ ipv6h->nexthdr = nexthdr;
+ }
nhoff = skb_network_header(skb) - skb_mac_header(skb);
if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
goto out;
@@ -320,15 +342,43 @@ static struct sk_buff *ip4ip6_gro_receive(struct list_head *head,
INDIRECT_CALLABLE_SCOPE int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
{
const struct net_offload *ops;
- struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + nhoff);
+ struct ipv6hdr *iph;
int err = -ENOSYS;
+ u32 payload_len;
if (skb->encapsulation) {
skb_set_inner_protocol(skb, cpu_to_be16(ETH_P_IPV6));
skb_set_inner_network_header(skb, nhoff);
}
- iph->payload_len = htons(skb->len - nhoff - sizeof(*iph));
+ payload_len = skb->len - nhoff - sizeof(*iph);
+ if (unlikely(payload_len > IPV6_MAXPLEN)) {
+ struct hop_jumbo_hdr *hop_jumbo;
+ int hoplen = sizeof(*hop_jumbo);
+
+ /* Move network header left */
+ memmove(skb_mac_header(skb) - hoplen, skb_mac_header(skb),
+ skb->transport_header - skb->mac_header);
+ skb->data -= hoplen;
+ skb->len += hoplen;
+ skb->mac_header -= hoplen;
+ skb->network_header -= hoplen;
+ iph = (struct ipv6hdr *)(skb->data + nhoff);
+ hop_jumbo = (struct hop_jumbo_hdr *)(iph + 1);
+
+ /* Build hop-by-hop options */
+ hop_jumbo->nexthdr = iph->nexthdr;
+ hop_jumbo->hdrlen = 0;
+ hop_jumbo->tlv_type = IPV6_TLV_JUMBO;
+ hop_jumbo->tlv_len = 4;
+ hop_jumbo->jumbo_payload_len = htonl(payload_len + hoplen);
+
+ iph->nexthdr = NEXTHDR_HOP;
+ iph->payload_len = 0;
+ } else {
+ iph = (struct ipv6hdr *)(skb->data + nhoff);
+ iph->payload_len = htons(payload_len);
+ }
nhoff += sizeof(*iph) + ipv6_exthdrs_len(iph, &ops);
if (WARN_ON(!ops || !ops->callbacks.gro_complete))
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index fa63ef2bd99c..4081b12a01ff 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -119,19 +119,21 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
rcu_read_lock_bh();
nexthop = rt6_nexthop((struct rt6_info *)dst, daddr);
neigh = __ipv6_neigh_lookup_noref(dev, nexthop);
- if (unlikely(!neigh))
- neigh = __neigh_create(&nd_tbl, nexthop, dev, false);
- if (!IS_ERR(neigh)) {
- sock_confirm_neigh(skb, neigh);
- ret = neigh_output(neigh, skb, false);
- rcu_read_unlock_bh();
- return ret;
+
+ if (unlikely(IS_ERR_OR_NULL(neigh))) {
+ if (unlikely(!neigh))
+ neigh = __neigh_create(&nd_tbl, nexthop, dev, false);
+ if (IS_ERR(neigh)) {
+ rcu_read_unlock_bh();
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES);
+ kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_CREATEFAIL);
+ return -EINVAL;
+ }
}
+ sock_confirm_neigh(skb, neigh);
+ ret = neigh_output(neigh, skb, false);
rcu_read_unlock_bh();
-
- IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES);
- kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_CREATEFAIL);
- return -EINVAL;
+ return ret;
}
static int
@@ -180,7 +182,9 @@ static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff
#endif
mtu = ip6_skb_dst_mtu(skb);
- if (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu))
+ if (skb_is_gso(skb) &&
+ !(IP6CB(skb)->flags & IP6SKB_FAKEJUMBO) &&
+ !skb_gso_validate_network_len(skb, mtu))
return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu);
if ((skb->len > mtu && !skb_is_gso(skb)) ||
@@ -198,7 +202,6 @@ static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
switch (ret) {
case NET_XMIT_SUCCESS:
- return __ip6_finish_output(net, sk, skb);
case NET_XMIT_CN:
return __ip6_finish_output(net, sk, skb) ? : ret;
default:
@@ -251,6 +254,8 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
struct dst_entry *dst = skb_dst(skb);
struct net_device *dev = dst->dev;
struct inet6_dev *idev = ip6_dst_idev(dst);
+ struct hop_jumbo_hdr *hop_jumbo;
+ int hoplen = sizeof(*hop_jumbo);
unsigned int head_room;
struct ipv6hdr *hdr;
u8 proto = fl6->flowi6_proto;
@@ -258,7 +263,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
int hlimit = -1;
u32 mtu;
- head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dev);
+ head_room = sizeof(struct ipv6hdr) + hoplen + LL_RESERVED_SPACE(dev);
if (opt)
head_room += opt->opt_nflen + opt->opt_flen;
@@ -281,6 +286,20 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
&fl6->saddr);
}
+ if (unlikely(seg_len > IPV6_MAXPLEN)) {
+ hop_jumbo = skb_push(skb, hoplen);
+
+ hop_jumbo->nexthdr = proto;
+ hop_jumbo->hdrlen = 0;
+ hop_jumbo->tlv_type = IPV6_TLV_JUMBO;
+ hop_jumbo->tlv_len = 4;
+ hop_jumbo->jumbo_payload_len = htonl(seg_len + hoplen);
+
+ proto = IPPROTO_HOPOPTS;
+ seg_len = 0;
+ IP6CB(skb)->flags |= IP6SKB_FAKEJUMBO;
+ }
+
skb_push(skb, sizeof(struct ipv6hdr));
skb_reset_network_header(skb);
hdr = ipv6_hdr(skb);
@@ -469,6 +488,7 @@ int ip6_forward(struct sk_buff *skb)
struct inet6_skb_parm *opt = IP6CB(skb);
struct net *net = dev_net(dst->dev);
struct inet6_dev *idev;
+ SKB_DR(reason);
u32 mtu;
idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
@@ -518,7 +538,7 @@ int ip6_forward(struct sk_buff *skb)
icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR);
return -ETIMEDOUT;
}
@@ -537,6 +557,7 @@ int ip6_forward(struct sk_buff *skb)
if (!xfrm6_route_forward(skb)) {
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
+ SKB_DR_SET(reason, XFRM_POLICY);
goto drop;
}
dst = skb_dst(skb);
@@ -596,7 +617,7 @@ int ip6_forward(struct sk_buff *skb)
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
__IP6_INC_STATS(net, ip6_dst_idev(dst),
IPSTATS_MIB_FRAGFAILS);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG);
return -EMSGSIZE;
}
@@ -618,8 +639,9 @@ int ip6_forward(struct sk_buff *skb)
error:
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
+ SKB_DR_SET(reason, IP_INADDRERRORS);
drop:
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
return -EINVAL;
}
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 53f632a560ec..19325b7600bb 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -257,8 +257,6 @@ static int ip6_tnl_create2(struct net_device *dev)
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
int err;
- t = netdev_priv(dev);
-
dev->rtnl_link_ops = &ip6_link_ops;
err = register_netdevice(dev);
if (err < 0)
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index fcb288b0ae13..254addad0dd3 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -979,6 +979,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
struct inet6_dev *idev = __in6_dev_get(dev);
struct inet6_ifaddr *ifp;
struct neighbour *neigh;
+ bool create_neigh;
if (skb->len < sizeof(struct nd_msg)) {
ND_PRINTK(2, warn, "NA: packet too short\n");
@@ -999,6 +1000,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
/* For some 802.11 wireless deployments (and possibly other networks),
* there will be a NA proxy and unsolicitd packets are attacks
* and thus should not be accepted.
+ * drop_unsolicited_na takes precedence over accept_unsolicited_na
*/
if (!msg->icmph.icmp6_solicited && idev &&
idev->cnf.drop_unsolicited_na)
@@ -1039,7 +1041,23 @@ static void ndisc_recv_na(struct sk_buff *skb)
in6_ifa_put(ifp);
return;
}
- neigh = neigh_lookup(&nd_tbl, &msg->target, dev);
+ /* RFC 9131 updates original Neighbour Discovery RFC 4861.
+ * An unsolicited NA can now create a neighbour cache entry
+ * on routers if it has Target LL Address option.
+ *
+ * drop accept fwding behaviour
+ * ---- ------ ------ ----------------------------------------------
+ * 1 X X Drop NA packet and don't pass up the stack
+ * 0 0 X Pass NA packet up the stack, don't update NC
+ * 0 1 0 Pass NA packet up the stack, don't update NC
+ * 0 1 1 Pass NA packet up the stack, and add a STALE
+ * NC entry
+ * Note that we don't do a (daddr == all-routers-mcast) check.
+ */
+ create_neigh = !msg->icmph.icmp6_solicited && lladdr &&
+ idev && idev->cnf.forwarding &&
+ idev->cnf.accept_unsolicited_na;
+ neigh = __neigh_lookup(&nd_tbl, &msg->target, dev, create_neigh);
if (neigh) {
u8 old_flags = neigh->flags;
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 8ce60ab89015..857713d7a38a 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -31,6 +31,7 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff
int strict = (ipv6_addr_type(&iph->daddr) &
(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
struct flowi6 fl6 = {
+ .flowi6_l3mdev = l3mdev_master_ifindex(dev),
.flowi6_mark = skb->mark,
.flowi6_uid = sock_net_uid(net, sk),
.daddr = iph->daddr,
@@ -42,8 +43,6 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff
fl6.flowi6_oif = sk->sk_bound_dev_if;
else if (strict)
fl6.flowi6_oif = dev->ifindex;
- else
- fl6.flowi6_oif = l3mdev_master_ifindex(dev);
fib6_rules_early_flow_dissect(net, skb, &fl6, &flkeys);
dst = ip6_route_output(net, sk, &fl6);
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index dffeaaaadcde..f61d4f18e1cf 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -31,7 +31,7 @@ static bool nf_reject_v6_csum_ok(struct sk_buff *skb, int hook)
if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0)
return false;
- if (!nf_reject_verify_csum(proto))
+ if (!nf_reject_verify_csum(skb, thoff, proto))
return true;
return nf_ip6_checksum(skb, hook, thoff, proto) == 0;
@@ -388,7 +388,7 @@ static bool reject6_csum_ok(struct sk_buff *skb, int hook)
if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0)
return false;
- if (!nf_reject_verify_csum(proto))
+ if (!nf_reject_verify_csum(skb, thoff, proto))
return true;
return nf_ip6_checksum(skb, hook, thoff, proto) == 0;
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index b3f163b40c2b..8970d0b4faeb 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -30,6 +30,10 @@ static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv,
fl6->daddr = iph->daddr;
fl6->saddr = iph->saddr;
} else {
+ if (nft_hook(pkt) == NF_INET_FORWARD &&
+ priv->flags & NFTA_FIB_F_IIF)
+ fl6->flowi6_iif = nft_out(pkt)->ifindex;
+
fl6->daddr = iph->saddr;
fl6->saddr = iph->daddr;
}
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index c51d5ce3711c..3b7cbd522b54 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -460,7 +460,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
*/
static int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int noblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
struct ipv6_pinfo *np = inet6_sk(sk);
DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
@@ -477,7 +477,7 @@ static int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
if (np->rxpmtu && np->rxopt.bits.rxpmtu)
return ipv6_recv_rxpmtu(sk, msg, len, addr_len);
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (!skb)
goto out;
@@ -512,7 +512,7 @@ static int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
*addr_len = sizeof(*sin6);
}
- sock_recv_ts_and_drops(msg, sk, skb);
+ sock_recv_cmsgs(msg, sk, skb);
if (np->rxopt.all)
ip6_datagram_recv_ctl(sk, msg, skb);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index c4b6ce017d5e..d25dc83bac62 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -4483,6 +4483,7 @@ static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
struct dst_entry *dst = skb_dst(skb);
struct net *net = dev_net(dst->dev);
struct inet6_dev *idev;
+ SKB_DR(reason);
int type;
if (netif_is_l3_master(skb->dev) ||
@@ -4495,11 +4496,14 @@ static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
case IPSTATS_MIB_INNOROUTES:
type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
if (type == IPV6_ADDR_ANY) {
+ SKB_DR_SET(reason, IP_INADDRERRORS);
IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
break;
}
+ SKB_DR_SET(reason, IP_INNOROUTES);
fallthrough;
case IPSTATS_MIB_OUTNOROUTES:
+ SKB_DR_OR(reason, IP_OUTNOROUTES);
IP6_INC_STATS(net, idev, ipstats_mib_noroutes);
break;
}
@@ -4509,7 +4513,7 @@ static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
skb_dst_drop(skb);
icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
return 0;
}
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index d53dd142bf87..94a0a294c6a1 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -23,8 +23,6 @@
#endif
#include <linux/ioam6.h>
-static int two = 2;
-static int three = 3;
static int flowlabel_reflect_max = 0x7;
static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX;
static u32 rt6_multipath_hash_fields_all_mask =
@@ -172,7 +170,7 @@ static struct ctl_table ipv6_table_template[] = {
.mode = 0644,
.proc_handler = proc_rt6_multipath_hash_policy,
.extra1 = SYSCTL_ZERO,
- .extra2 = &three,
+ .extra2 = SYSCTL_THREE,
},
{
.procname = "fib_multipath_hash_fields",
@@ -197,7 +195,7 @@ static struct ctl_table ipv6_table_template[] = {
.mode = 0644,
.proc_handler = proc_dou8vec_minmax,
.extra1 = SYSCTL_ZERO,
- .extra2 = &two,
+ .extra2 = SYSCTL_TWO,
},
{
.procname = "ioam6_id",
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index faaddaf43c90..f37dd4aa91c6 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -230,7 +230,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
u32 exthdrlen = icsk->icsk_ext_hdr_len;
struct sockaddr_in sin;
- if (__ipv6_only_sock(sk))
+ if (ipv6_only_sock(sk))
return -ENETUNREACH;
sin.sin_family = AF_INET;
@@ -1728,7 +1728,6 @@ process:
sk_incoming_cpu_update(sk);
- sk_defer_free_flush(sk);
bh_lock_sock_nested(sk);
tcp_segs_in(tcp_sk(sk), skb);
ret = 0;
@@ -1763,6 +1762,7 @@ bad_packet:
}
discard_it:
+ SKB_DR_OR(drop_reason, NOT_SPECIFIED);
kfree_skb_reason(skb, drop_reason);
return 0;
@@ -2044,7 +2044,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
jiffies_to_clock_t(icsk->icsk_rto),
jiffies_to_clock_t(icsk->icsk_ack.ato),
(icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
- tp->snd_cwnd,
+ tcp_snd_cwnd(tp),
state == TCP_LISTEN ?
fastopenq->max_qlen :
(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 7f0fa9bd9ffe..55afd7f39c04 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -105,7 +105,7 @@ static int compute_score(struct sock *sk, struct net *net,
const struct in6_addr *daddr, unsigned short hnum,
int dif, int sdif)
{
- int score;
+ int bound_dev_if, score;
struct inet_sock *inet;
bool dev_match;
@@ -132,10 +132,11 @@ static int compute_score(struct sock *sk, struct net *net,
score++;
}
- dev_match = udp_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif);
+ bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
+ dev_match = udp_sk_bound_dev_eq(net, bound_dev_if, dif, sdif);
if (!dev_match)
return -1;
- if (sk->sk_bound_dev_if)
+ if (bound_dev_if)
score++;
if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
@@ -322,7 +323,7 @@ static int udp6_skb_len(struct sk_buff *skb)
*/
int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int noblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
struct ipv6_pinfo *np = inet6_sk(sk);
struct inet_sock *inet = inet_sk(sk);
@@ -342,7 +343,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
try_again:
off = sk_peek_offset(sk, flags);
- skb = __skb_recv_udp(sk, flags, noblock, &off, &err);
+ skb = __skb_recv_udp(sk, flags, &off, &err);
if (!skb)
return err;
@@ -391,7 +392,7 @@ try_again:
if (!peeking)
SNMP_INC_STATS(mib, UDP_MIB_INDATAGRAMS);
- sock_recv_ts_and_drops(msg, sk, skb);
+ sock_recv_cmsgs(msg, sk, skb);
/* Copy the address. */
if (msg->msg_name) {
@@ -789,7 +790,7 @@ static bool __udp_v6_is_mcast_sock(struct net *net, struct sock *sk,
(inet->inet_dport && inet->inet_dport != rmt_port) ||
(!ipv6_addr_any(&sk->sk_v6_daddr) &&
!ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) ||
- !udp_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif) ||
+ !udp_sk_bound_dev_eq(net, READ_ONCE(sk->sk_bound_dev_if), dif, sdif) ||
(!ipv6_addr_any(&sk->sk_v6_rcv_saddr) &&
!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr)))
return false;
@@ -1043,7 +1044,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net,
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
if (sk->sk_state == TCP_ESTABLISHED &&
- INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif, sdif))
+ inet6_match(net, sk, rmt_addr, loc_addr, ports, dif, sdif))
return sk;
/* Only check first socket in chain */
break;
@@ -1123,7 +1124,7 @@ static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
* bytes that are out of the bound specified by user in addr_len.
*/
if (uaddr->sa_family == AF_INET) {
- if (__ipv6_only_sock(sk))
+ if (ipv6_only_sock(sk))
return -EAFNOSUPPORT;
return udp_pre_connect(sk, uaddr, addr_len);
}
@@ -1359,7 +1360,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
msg->msg_name = &sin;
msg->msg_namelen = sizeof(sin);
do_udp_sendmsg:
- if (__ipv6_only_sock(sk))
+ if (ipv6_only_sock(sk))
return -ENETUNREACH;
return udp_sendmsg(sk, msg, len);
}
@@ -1433,7 +1434,7 @@ do_udp_sendmsg:
}
if (!fl6->flowi6_oif)
- fl6->flowi6_oif = sk->sk_bound_dev_if;
+ fl6->flowi6_oif = READ_ONCE(sk->sk_bound_dev_if);
if (!fl6->flowi6_oif)
fl6->flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index b2fcc46c1630..4251e49d32a0 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -20,8 +20,8 @@ int udpv6_getsockopt(struct sock *sk, int level, int optname,
int udpv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
unsigned int optlen);
int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len);
-int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
- int flags, int *addr_len);
+int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
+ int *addr_len);
void udpv6_destroy_sock(struct sock *sk);
#ifdef CONFIG_PROC_FS
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index a1760add5bf1..a0385ddbffcf 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1223,7 +1223,6 @@ static void iucv_process_message_q(struct sock *sk)
static int iucv_sock_recvmsg(struct socket *sock, struct msghdr *msg,
size_t len, int flags)
{
- int noblock = flags & MSG_DONTWAIT;
struct sock *sk = sock->sk;
struct iucv_sock *iucv = iucv_sk(sk);
unsigned int copied, rlen;
@@ -1242,7 +1241,7 @@ static int iucv_sock_recvmsg(struct socket *sock, struct msghdr *msg,
/* receive/dequeue next skb:
* the function understands MSG_PEEK and, thus, does not dequeue skb */
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (!skb) {
if (sk->sk_shutdown & RCV_SHUTDOWN)
return 0;
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 339d95df19d3..11e1a3a3e442 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3698,7 +3698,7 @@ static int pfkey_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
goto out;
- skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (skb == NULL)
goto out;
@@ -3713,7 +3713,7 @@ static int pfkey_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
if (err)
goto out_free;
- sock_recv_ts_and_drops(msg, sk, skb);
+ sock_recv_cmsgs(msg, sk, skb);
err = (flags & MSG_TRUNC) ? skb->len : copied;
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index b3edafa5fba4..4db5a554bdbd 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -50,11 +50,13 @@ static struct sock *__l2tp_ip_bind_lookup(const struct net *net, __be32 laddr,
sk_for_each_bound(sk, &l2tp_ip_bind_table) {
const struct l2tp_ip_sock *l2tp = l2tp_ip_sk(sk);
const struct inet_sock *inet = inet_sk(sk);
+ int bound_dev_if;
if (!net_eq(sock_net(sk), net))
continue;
- if (sk->sk_bound_dev_if && dif && sk->sk_bound_dev_if != dif)
+ bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
+ if (bound_dev_if && dif && bound_dev_if != dif)
continue;
if (inet->inet_rcv_saddr && laddr &&
@@ -515,7 +517,7 @@ no_route:
}
static int l2tp_ip_recvmsg(struct sock *sk, struct msghdr *msg,
- size_t len, int noblock, int flags, int *addr_len)
+ size_t len, int flags, int *addr_len)
{
struct inet_sock *inet = inet_sk(sk);
size_t copied = 0;
@@ -526,7 +528,7 @@ static int l2tp_ip_recvmsg(struct sock *sk, struct msghdr *msg,
if (flags & MSG_OOB)
goto out;
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (!skb)
goto out;
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 96f975777438..c6ff8bf9b55f 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -62,11 +62,13 @@ static struct sock *__l2tp_ip6_bind_lookup(const struct net *net,
const struct in6_addr *sk_laddr = inet6_rcv_saddr(sk);
const struct in6_addr *sk_raddr = &sk->sk_v6_daddr;
const struct l2tp_ip6_sock *l2tp = l2tp_ip6_sk(sk);
+ int bound_dev_if;
if (!net_eq(sock_net(sk), net))
continue;
- if (sk->sk_bound_dev_if && dif && sk->sk_bound_dev_if != dif)
+ bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
+ if (bound_dev_if && dif && bound_dev_if != dif)
continue;
if (sk_laddr && !ipv6_addr_any(sk_laddr) &&
@@ -445,7 +447,7 @@ static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr,
lsa->l2tp_conn_id = lsk->conn_id;
}
if (ipv6_addr_type(&lsa->l2tp_addr) & IPV6_ADDR_LINKLOCAL)
- lsa->l2tp_scope_id = sk->sk_bound_dev_if;
+ lsa->l2tp_scope_id = READ_ONCE(sk->sk_bound_dev_if);
return sizeof(*lsa);
}
@@ -560,7 +562,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
}
if (fl6.flowi6_oif == 0)
- fl6.flowi6_oif = sk->sk_bound_dev_if;
+ fl6.flowi6_oif = READ_ONCE(sk->sk_bound_dev_if);
if (msg->msg_controllen) {
opt = &opt_space;
@@ -657,7 +659,7 @@ do_confirm:
}
static int l2tp_ip6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int noblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
struct ipv6_pinfo *np = inet6_sk(sk);
DECLARE_SOCKADDR(struct sockaddr_l2tpip6 *, lsa, msg->msg_name);
@@ -671,7 +673,7 @@ static int l2tp_ip6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
if (flags & MSG_ERRQUEUE)
return ipv6_recv_error(sk, msg, len, addr_len);
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (!skb)
goto out;
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index bf35710127dd..8be1fdc68a0b 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -191,8 +191,7 @@ static int pppol2tp_recvmsg(struct socket *sock, struct msghdr *msg,
goto end;
err = 0;
- skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
- flags & MSG_DONTWAIT, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (!skb)
goto end;
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 218cdc554d71..bfab39320004 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -263,7 +263,7 @@ static void ieee80211_send_addba_resp(struct sta_info *sta, u8 *da, u16 tid,
mgmt->u.action.u.addba_resp.timeout = cpu_to_le16(timeout);
mgmt->u.action.u.addba_resp.status = cpu_to_le16(status);
- if (sta->sta.he_cap.has_he && addbaext)
+ if (sta->sta.deflink.he_cap.has_he && addbaext)
ieee80211_add_addbaext(sdata, skb, addbaext, buf_size);
ieee80211_tx_skb(sdata, skb);
@@ -296,7 +296,7 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
goto end;
}
- if (!sta->sta.ht_cap.ht_supported &&
+ if (!sta->sta.deflink.ht_cap.ht_supported &&
sta->sdata->vif.bss_conf.chandef.chan->band != NL80211_BAND_6GHZ) {
ht_dbg(sta->sdata,
"STA %pM erroneously requests BA session on tid %d w/o QoS\n",
@@ -312,9 +312,9 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
goto end;
}
- if (sta->sta.eht_cap.has_eht)
+ if (sta->sta.deflink.eht_cap.has_eht)
max_buf_size = IEEE80211_MAX_AMPDU_BUF_EHT;
- else if (sta->sta.he_cap.has_he)
+ else if (sta->sta.deflink.he_cap.has_he)
max_buf_size = IEEE80211_MAX_AMPDU_BUF_HE;
else
max_buf_size = IEEE80211_MAX_AMPDU_BUF_HT;
@@ -324,7 +324,7 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
* and if buffer size does not exceeds max value */
/* XXX: check own ht delayed BA capability?? */
if (((ba_policy != 1) &&
- (!(sta->sta.ht_cap.cap & IEEE80211_HT_CAP_DELAY_BA))) ||
+ (!(sta->sta.deflink.ht_cap.cap & IEEE80211_HT_CAP_DELAY_BA))) ||
(buf_size > max_buf_size)) {
status = WLAN_STATUS_INVALID_QOS_PARAM;
ht_dbg_ratelimited(sta->sdata,
@@ -507,7 +507,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
goto free;
}
- if (sta->sta.eht_cap.has_eht && elems && elems->addba_ext_ie) {
+ if (sta->sta.deflink.eht_cap.has_eht && elems && elems->addba_ext_ie) {
u8 buf_size_1k = u8_get_bits(elems->addba_ext_ie->data,
IEEE80211_ADDBA_EXT_BUF_SIZE_MASK);
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 1deb3d874a4b..91878ed5ec46 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -467,7 +467,7 @@ static void ieee80211_send_addba_with_timeout(struct sta_info *sta,
sta->ampdu_mlme.addba_req_num[tid]++;
spin_unlock_bh(&sta->lock);
- if (sta->sta.he_cap.has_he) {
+ if (sta->sta.deflink.he_cap.has_he) {
buf_size = local->hw.max_tx_aggregation_subframes;
} else {
/*
@@ -594,7 +594,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
"Requested to start BA session on reserved tid=%d", tid))
return -EINVAL;
- if (!pubsta->ht_cap.ht_supported &&
+ if (!pubsta->deflink.ht_cap.ht_supported &&
sta->sdata->vif.bss_conf.chandef.chan->band != NL80211_BAND_6GHZ)
return -EINVAL;
@@ -647,7 +647,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
* is set when we receive a bss info from a probe response or a beacon.
*/
if (sta->sdata->vif.type == NL80211_IFTYPE_ADHOC &&
- !sta->sta.ht_cap.ht_supported) {
+ !sta->sta.deflink.ht_cap.ht_supported) {
ht_dbg(sdata,
"BA request denied - IBSS STA %pM does not advertise HT support\n",
pubsta->addr);
diff --git a/net/mac80211/airtime.c b/net/mac80211/airtime.c
index 2619e12c8bda..4bab1683652d 100644
--- a/net/mac80211/airtime.c
+++ b/net/mac80211/airtime.c
@@ -647,8 +647,8 @@ u32 ieee80211_calc_expected_tx_airtime(struct ieee80211_hw *hw,
struct sta_info *sta = container_of(pubsta, struct sta_info,
sta);
struct ieee80211_rx_status stat;
- struct ieee80211_tx_rate *tx_rate = &sta->tx_stats.last_rate;
- struct rate_info *ri = &sta->tx_stats.last_rate_info;
+ struct ieee80211_tx_rate *tx_rate = &sta->deflink.tx_stats.last_rate;
+ struct rate_info *ri = &sta->deflink.tx_stats.last_rate_info;
u32 duration, overhead;
u8 agg_shift;
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index ba752539d1d9..f7896f257e1b 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -570,7 +570,8 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev,
if (pairwise)
key = key_mtx_dereference(local, sta->ptk[key_idx]);
else
- key = key_mtx_dereference(local, sta->gtk[key_idx]);
+ key = key_mtx_dereference(local,
+ sta->deflink.gtk[key_idx]);
} else
key = key_mtx_dereference(local, sdata->keys[key_idx]);
@@ -620,7 +621,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
else if (!pairwise &&
key_idx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS +
NUM_DEFAULT_BEACON_KEYS)
- key = rcu_dereference(sta->gtk[key_idx]);
+ key = rcu_dereference(sta->deflink.gtk[key_idx]);
} else
key = rcu_dereference(sdata->keys[key_idx]);
@@ -1173,7 +1174,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
IEEE80211_HE_OPERATION_RTS_THRESHOLD_MASK);
changed |= BSS_CHANGED_HE_OBSS_PD;
- if (params->he_bss_color.enabled)
+ if (params->beacon.he_bss_color.enabled)
changed |= BSS_CHANGED_HE_BSS_COLOR;
}
@@ -1230,7 +1231,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
sdata->vif.bss_conf.allow_p2p_go_ps = sdata->vif.p2p;
sdata->vif.bss_conf.twt_responder = params->twt_responder;
sdata->vif.bss_conf.he_obss_pd = params->he_obss_pd;
- sdata->vif.bss_conf.he_bss_color = params->he_bss_color;
+ sdata->vif.bss_conf.he_bss_color = params->beacon.he_bss_color;
sdata->vif.bss_conf.s1g = params->chandef.chan->band ==
NL80211_BAND_S1GHZ;
@@ -1315,6 +1316,7 @@ static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev,
struct cfg80211_beacon_data *params)
{
struct ieee80211_sub_if_data *sdata;
+ struct ieee80211_bss_conf *bss_conf;
struct beacon_data *old;
int err;
@@ -1334,10 +1336,28 @@ static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev,
err = ieee80211_assign_beacon(sdata, params, NULL, NULL);
if (err < 0)
return err;
+
+ bss_conf = &sdata->vif.bss_conf;
+ if (params->he_bss_color_valid &&
+ params->he_bss_color.enabled != bss_conf->he_bss_color.enabled) {
+ bss_conf->he_bss_color.enabled = params->he_bss_color.enabled;
+ err |= BSS_CHANGED_HE_BSS_COLOR;
+ }
+
ieee80211_bss_info_change_notify(sdata, err);
return 0;
}
+static void ieee80211_free_next_beacon(struct ieee80211_sub_if_data *sdata)
+{
+ if (!sdata->u.ap.next_beacon)
+ return;
+
+ kfree(sdata->u.ap.next_beacon->mbssid_ies);
+ kfree(sdata->u.ap.next_beacon);
+ sdata->u.ap.next_beacon = NULL;
+}
+
static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
@@ -1372,11 +1392,7 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev)
mutex_unlock(&local->mtx);
- if (sdata->u.ap.next_beacon) {
- kfree(sdata->u.ap.next_beacon->mbssid_ies);
- kfree(sdata->u.ap.next_beacon);
- sdata->u.ap.next_beacon = NULL;
- }
+ ieee80211_free_next_beacon(sdata);
/* turn off carrier for this interface and dependent VLANs */
list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list)
@@ -1728,9 +1744,9 @@ static int sta_apply_parameters(struct ieee80211_local *local,
sta->listen_interval = params->listen_interval;
if (params->sta_modify_mask & STATION_PARAM_APPLY_STA_TXPOWER) {
- sta->sta.txpwr.type = params->txpwr.type;
+ sta->sta.deflink.txpwr.type = params->txpwr.type;
if (params->txpwr.type == NL80211_TX_POWER_LIMITED)
- sta->sta.txpwr.power = params->txpwr.power;
+ sta->sta.deflink.txpwr.power = params->txpwr.power;
ret = drv_sta_set_txpwr(local, sdata, sta);
if (ret)
return ret;
@@ -1740,7 +1756,7 @@ static int sta_apply_parameters(struct ieee80211_local *local,
ieee80211_parse_bitrates(&sdata->vif.bss_conf.chandef,
sband, params->supported_rates,
params->supported_rates_len,
- &sta->sta.supp_rates[sband->band]);
+ &sta->sta.deflink.supp_rates[sband->band]);
}
if (params->ht_capa)
@@ -2927,7 +2943,7 @@ int __ieee80211_request_smps_mgd(struct ieee80211_sub_if_data *sdata,
sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT)
return 0;
- ap = sdata->u.mgd.associated->bssid;
+ ap = sdata->u.mgd.bssid;
rcu_read_lock();
list_for_each_entry_rcu(sta, &sdata->local->sta_list, list) {
@@ -3306,13 +3322,12 @@ static int ieee80211_set_after_csa_beacon(struct ieee80211_sub_if_data *sdata,
switch (sdata->vif.type) {
case NL80211_IFTYPE_AP:
+ if (!sdata->u.ap.next_beacon)
+ return -EINVAL;
+
err = ieee80211_assign_beacon(sdata, sdata->u.ap.next_beacon,
NULL, NULL);
- if (sdata->u.ap.next_beacon) {
- kfree(sdata->u.ap.next_beacon->mbssid_ies);
- kfree(sdata->u.ap.next_beacon);
- sdata->u.ap.next_beacon = NULL;
- }
+ ieee80211_free_next_beacon(sdata);
if (err < 0)
return err;
@@ -3468,9 +3483,7 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata,
IEEE80211_MAX_CNTDWN_COUNTERS_NUM) ||
(params->n_counter_offsets_presp >
IEEE80211_MAX_CNTDWN_COUNTERS_NUM)) {
- kfree(sdata->u.ap.next_beacon->mbssid_ies);
- kfree(sdata->u.ap.next_beacon);
- sdata->u.ap.next_beacon = NULL;
+ ieee80211_free_next_beacon(sdata);
return -EINVAL;
}
@@ -3482,9 +3495,7 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata,
err = ieee80211_assign_beacon(sdata, &params->beacon_csa, &csa, NULL);
if (err < 0) {
- kfree(sdata->u.ap.next_beacon->mbssid_ies);
- kfree(sdata->u.ap.next_beacon);
- sdata->u.ap.next_beacon = NULL;
+ ieee80211_free_next_beacon(sdata);
return err;
}
*changed |= err;
@@ -3574,11 +3585,8 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata,
static void ieee80211_color_change_abort(struct ieee80211_sub_if_data *sdata)
{
sdata->vif.color_change_active = false;
- if (sdata->u.ap.next_beacon) {
- kfree(sdata->u.ap.next_beacon->mbssid_ies);
- kfree(sdata->u.ap.next_beacon);
- sdata->u.ap.next_beacon = NULL;
- }
+
+ ieee80211_free_next_beacon(sdata);
cfg80211_color_change_aborted_notify(sdata->dev);
}
@@ -4314,13 +4322,12 @@ ieee80211_set_after_color_change_beacon(struct ieee80211_sub_if_data *sdata,
case NL80211_IFTYPE_AP: {
int ret;
+ if (!sdata->u.ap.next_beacon)
+ return -EINVAL;
+
ret = ieee80211_assign_beacon(sdata, sdata->u.ap.next_beacon,
NULL, NULL);
- if (sdata->u.ap.next_beacon) {
- kfree(sdata->u.ap.next_beacon->mbssid_ies);
- kfree(sdata->u.ap.next_beacon);
- sdata->u.ap.next_beacon = NULL;
- }
+ ieee80211_free_next_beacon(sdata);
if (ret < 0)
return ret;
@@ -4363,11 +4370,7 @@ ieee80211_set_color_change_beacon(struct ieee80211_sub_if_data *sdata,
err = ieee80211_assign_beacon(sdata, &params->beacon_color_change,
NULL, &color_change);
if (err < 0) {
- if (sdata->u.ap.next_beacon) {
- kfree(sdata->u.ap.next_beacon->mbssid_ies);
- kfree(sdata->u.ap.next_beacon);
- sdata->u.ap.next_beacon = NULL;
- }
+ ieee80211_free_next_beacon(sdata);
return err;
}
*changed |= err;
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index e26d42de14ec..e3452445b363 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -199,7 +199,7 @@ static enum nl80211_chan_width ieee80211_get_sta_bw(struct sta_info *sta)
switch (width) {
case IEEE80211_STA_RX_BW_20:
- if (sta->sta.ht_cap.ht_supported)
+ if (sta->sta.deflink.ht_cap.ht_supported)
return NL80211_CHAN_WIDTH_20;
else
return NL80211_CHAN_WIDTH_20_NOHT;
@@ -375,15 +375,15 @@ static void ieee80211_chan_bw_change(struct ieee80211_local *local,
new_sta_bw = ieee80211_sta_cur_vht_bw(sta);
/* nothing change */
- if (new_sta_bw == sta->sta.bandwidth)
+ if (new_sta_bw == sta->sta.deflink.bandwidth)
continue;
/* vif changed to narrow BW and narrow BW for station wasn't
* requested or vise versa */
- if ((new_sta_bw < sta->sta.bandwidth) == !narrowed)
+ if ((new_sta_bw < sta->sta.deflink.bandwidth) == !narrowed)
continue;
- sta->sta.bandwidth = new_sta_bw;
+ sta->sta.deflink.bandwidth = new_sta_bw;
rate_control_rate_update(local, sband, sta,
IEEE80211_RC_BW_CHANGED);
}
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index f4c9a92f50f9..1fe43b264d75 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -504,6 +504,7 @@ static const char *hw_flag_names[] = {
FLAG(SUPPORTS_TX_ENCAP_OFFLOAD),
FLAG(SUPPORTS_RX_DECAP_OFFLOAD),
FLAG(SUPPORTS_CONC_MON_RX_DECAP),
+ FLAG(DETECTS_COLOR_COLLISION),
#undef FLAG
};
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index e490c3da3aca..cf71484658c6 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -337,7 +337,7 @@ static ssize_t ieee80211_if_parse_tkip_mic_test(
dev_kfree_skb(skb);
return -ENOTCONN;
}
- memcpy(hdr->addr1, sdata->u.mgd.associated->bssid, ETH_ALEN);
+ memcpy(hdr->addr1, sdata->u.mgd.bssid, ETH_ALEN);
memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN);
memcpy(hdr->addr3, addr, ETH_ALEN);
sdata_unlock(sdata);
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 88d9cc945a21..182094be9001 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -447,7 +447,7 @@ static ssize_t sta_ht_capa_read(struct file *file, char __user *userbuf,
int i;
ssize_t bufsz = 512;
struct sta_info *sta = file->private_data;
- struct ieee80211_sta_ht_cap *htc = &sta->sta.ht_cap;
+ struct ieee80211_sta_ht_cap *htc = &sta->sta.deflink.ht_cap;
ssize_t ret;
buf = kzalloc(bufsz, GFP_KERNEL);
@@ -531,7 +531,7 @@ static ssize_t sta_vht_capa_read(struct file *file, char __user *userbuf,
{
char *buf, *p;
struct sta_info *sta = file->private_data;
- struct ieee80211_sta_vht_cap *vhtc = &sta->sta.vht_cap;
+ struct ieee80211_sta_vht_cap *vhtc = &sta->sta.deflink.vht_cap;
ssize_t ret;
ssize_t bufsz = 512;
@@ -646,7 +646,7 @@ static ssize_t sta_he_capa_read(struct file *file, char __user *userbuf,
char *buf, *p;
size_t buf_sz = PAGE_SIZE;
struct sta_info *sta = file->private_data;
- struct ieee80211_sta_he_cap *hec = &sta->sta.he_cap;
+ struct ieee80211_sta_he_cap *hec = &sta->sta.deflink.he_cap;
struct ieee80211_he_mcs_nss_supp *nss = &hec->he_mcs_nss_supp;
u8 ppe_size;
u8 *cap;
@@ -1052,9 +1052,9 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta)
DEBUGFS_ADD(vht_capa);
DEBUGFS_ADD(he_capa);
- DEBUGFS_ADD_COUNTER(rx_duplicates, rx_stats.num_duplicates);
- DEBUGFS_ADD_COUNTER(rx_fragments, rx_stats.fragments);
- DEBUGFS_ADD_COUNTER(tx_filtered, status_stats.filtered);
+ DEBUGFS_ADD_COUNTER(rx_duplicates, deflink.rx_stats.num_duplicates);
+ DEBUGFS_ADD_COUNTER(rx_fragments, deflink.rx_stats.fragments);
+ DEBUGFS_ADD_COUNTER(tx_filtered, deflink.status_stats.filtered);
if (local->ops->wake_tx_queue) {
DEBUGFS_ADD(aqm);
diff --git a/net/mac80211/eht.c b/net/mac80211/eht.c
index 364ad0ef7692..96c9486bf2fe 100644
--- a/net/mac80211/eht.c
+++ b/net/mac80211/eht.c
@@ -14,7 +14,7 @@ ieee80211_eht_cap_ie_to_sta_eht_cap(struct ieee80211_sub_if_data *sdata,
const struct ieee80211_eht_cap_elem *eht_cap_ie_elem,
u8 eht_cap_len, struct sta_info *sta)
{
- struct ieee80211_sta_eht_cap *eht_cap = &sta->sta.eht_cap;
+ struct ieee80211_sta_eht_cap *eht_cap = &sta->sta.deflink.eht_cap;
struct ieee80211_he_cap_elem *he_cap_ie_elem = (void *)he_cap_ie;
u8 eht_ppe_size = 0;
u8 mcs_nss_size;
@@ -71,6 +71,6 @@ ieee80211_eht_cap_ie_to_sta_eht_cap(struct ieee80211_sub_if_data *sdata,
eht_cap->has_eht = true;
- sta->cur_max_bandwidth = ieee80211_sta_cap_rx_bw(sta);
- sta->sta.bandwidth = ieee80211_sta_cur_vht_bw(sta);
+ sta->deflink.cur_max_bandwidth = ieee80211_sta_cap_rx_bw(sta);
+ sta->sta.deflink.bandwidth = ieee80211_sta_cur_vht_bw(sta);
}
diff --git a/net/mac80211/ethtool.c b/net/mac80211/ethtool.c
index b2253df54413..31cd3c1ac07f 100644
--- a/net/mac80211/ethtool.c
+++ b/net/mac80211/ethtool.c
@@ -114,7 +114,7 @@ static void ieee80211_get_stats(struct net_device *dev,
sta_set_sinfo(sta, &sinfo, false);
i = 0;
- ADD_STA_STATS(sta);
+ ADD_STA_STATS(sta->link[0]);
data[i++] = sta->sta_state;
@@ -140,7 +140,7 @@ static void ieee80211_get_stats(struct net_device *dev,
memset(&sinfo, 0, sizeof(sinfo));
sta_set_sinfo(sta, &sinfo, false);
i = 0;
- ADD_STA_STATS(sta);
+ ADD_STA_STATS(sta->link[0]);
}
}
diff --git a/net/mac80211/he.c b/net/mac80211/he.c
index c05af7018f79..1a61f7552edd 100644
--- a/net/mac80211/he.c
+++ b/net/mac80211/he.c
@@ -49,7 +49,7 @@ ieee80211_update_from_he_6ghz_capa(const struct ieee80211_he_6ghz_capa *he_6ghz_
break;
}
- sta->sta.he_6ghz_capa = *he_6ghz_capa;
+ sta->sta.deflink.he_6ghz_capa = *he_6ghz_capa;
}
static void ieee80211_he_mcs_disable(__le16 *he_mcs)
@@ -110,7 +110,7 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata,
const struct ieee80211_he_6ghz_capa *he_6ghz_capa,
struct sta_info *sta)
{
- struct ieee80211_sta_he_cap *he_cap = &sta->sta.he_cap;
+ struct ieee80211_sta_he_cap *he_cap = &sta->sta.deflink.he_cap;
struct ieee80211_sta_he_cap own_he_cap;
struct ieee80211_he_cap_elem *he_cap_ie_elem = (void *)he_cap_ie;
u8 he_ppe_size;
@@ -153,8 +153,8 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata,
he_cap->has_he = true;
- sta->cur_max_bandwidth = ieee80211_sta_cap_rx_bw(sta);
- sta->sta.bandwidth = ieee80211_sta_cur_vht_bw(sta);
+ sta->deflink.cur_max_bandwidth = ieee80211_sta_cap_rx_bw(sta);
+ sta->sta.deflink.bandwidth = ieee80211_sta_cur_vht_bw(sta);
if (sband->band == NL80211_BAND_6GHZ && he_6ghz_capa)
ieee80211_update_from_he_6ghz_capa(he_6ghz_capa, sta);
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 2eb7641f5556..171bd16b13f3 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -243,9 +243,9 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_HT_3839;
apply:
- changed = memcmp(&sta->sta.ht_cap, &ht_cap, sizeof(ht_cap));
+ changed = memcmp(&sta->sta.deflink.ht_cap, &ht_cap, sizeof(ht_cap));
- memcpy(&sta->sta.ht_cap, &ht_cap, sizeof(ht_cap));
+ memcpy(&sta->sta.deflink.ht_cap, &ht_cap, sizeof(ht_cap));
switch (sdata->vif.bss_conf.chandef.width) {
default:
@@ -264,9 +264,9 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
break;
}
- sta->sta.bandwidth = bw;
+ sta->sta.deflink.bandwidth = bw;
- sta->cur_max_bandwidth =
+ sta->deflink.cur_max_bandwidth =
ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ?
IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20;
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 0416c4d22292..14c04fd48b7a 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -637,7 +637,7 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, const u8 *bssid,
/* make sure mandatory rates are always added */
sband = local->hw.wiphy->bands[band];
- sta->sta.supp_rates[band] = supp_rates |
+ sta->sta.deflink.supp_rates[band] = supp_rates |
ieee80211_mandatory_rates(sband, scan_width);
return ieee80211_ibss_finish_sta(sta);
@@ -1005,7 +1005,7 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata,
if (sta) {
u32 prev_rates;
- prev_rates = sta->sta.supp_rates[band];
+ prev_rates = sta->sta.deflink.supp_rates[band];
/* make sure mandatory rates are always added */
scan_width = NL80211_BSS_CHAN_WIDTH_20;
if (rx_status->bw == RATE_INFO_BW_5)
@@ -1013,13 +1013,13 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata,
else if (rx_status->bw == RATE_INFO_BW_10)
scan_width = NL80211_BSS_CHAN_WIDTH_10;
- sta->sta.supp_rates[band] = supp_rates |
+ sta->sta.deflink.supp_rates[band] = supp_rates |
ieee80211_mandatory_rates(sband, scan_width);
- if (sta->sta.supp_rates[band] != prev_rates) {
+ if (sta->sta.deflink.supp_rates[band] != prev_rates) {
ibss_dbg(sdata,
"updated supp_rates set for %pM based on beacon/probe_resp (0x%x -> 0x%x)\n",
sta->sta.addr, prev_rates,
- sta->sta.supp_rates[band]);
+ sta->sta.deflink.supp_rates[band]);
rates_updated = true;
}
} else {
@@ -1043,7 +1043,7 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata,
/* we both use HT */
struct ieee80211_ht_cap htcap_ie;
struct cfg80211_chan_def chandef;
- enum ieee80211_sta_rx_bandwidth bw = sta->sta.bandwidth;
+ enum ieee80211_sta_rx_bandwidth bw = sta->sta.deflink.bandwidth;
cfg80211_chandef_create(&chandef, channel, NL80211_CHAN_NO_HT);
ieee80211_chandef_ht_oper(elems->ht_operation, &chandef);
@@ -1058,7 +1058,7 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata,
sdata->u.ibss.chandef.width != NL80211_CHAN_WIDTH_40) {
/* we both use VHT */
struct ieee80211_vht_cap cap_ie;
- struct ieee80211_sta_vht_cap cap = sta->sta.vht_cap;
+ struct ieee80211_sta_vht_cap cap = sta->sta.deflink.vht_cap;
u32 vht_cap_info =
le32_to_cpu(elems->vht_cap_elem->vht_cap_info);
@@ -1069,11 +1069,11 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata,
memcpy(&cap_ie, elems->vht_cap_elem, sizeof(cap_ie));
ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband,
&cap_ie, sta);
- if (memcmp(&cap, &sta->sta.vht_cap, sizeof(cap)))
+ if (memcmp(&cap, &sta->sta.deflink.vht_cap, sizeof(cap)))
rates_updated |= true;
}
- if (bw != sta->sta.bandwidth)
+ if (bw != sta->sta.deflink.bandwidth)
rates_updated |= true;
if (!cfg80211_chandef_compatible(&sdata->u.ibss.chandef,
@@ -1083,12 +1083,12 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata,
if (sta && rates_updated) {
u32 changed = IEEE80211_RC_SUPP_RATES_CHANGED;
- u8 rx_nss = sta->sta.rx_nss;
+ u8 rx_nss = sta->sta.deflink.rx_nss;
/* Force rx_nss recalculation */
- sta->sta.rx_nss = 0;
+ sta->sta.deflink.rx_nss = 0;
rate_control_rate_init(sta);
- if (sta->sta.rx_nss != rx_nss)
+ if (sta->sta.deflink.rx_nss != rx_nss)
changed |= IEEE80211_RC_NSS_CHANGED;
drv_sta_rc_update(local, sdata, &sta->sta, changed);
@@ -1235,7 +1235,7 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata,
/* make sure mandatory rates are always added */
sband = local->hw.wiphy->bands[band];
- sta->sta.supp_rates[band] = supp_rates |
+ sta->sta.deflink.supp_rates[band] = supp_rates |
ieee80211_mandatory_rates(sband, scan_width);
spin_lock(&ifibss->incomplete_lock);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index d4a7ba4a8202..86ef0a46a68c 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -453,9 +453,10 @@ struct ieee80211_if_managed {
bool nullfunc_failed;
u8 connection_loss:1,
driver_disconnect:1,
- reconnect:1;
+ reconnect:1,
+ associated:1;
- struct cfg80211_bss *associated;
+ struct cfg80211_bss *assoc_bss;
struct ieee80211_mgd_auth_data *auth_data;
struct ieee80211_mgd_assoc_data *assoc_data;
@@ -1148,6 +1149,9 @@ struct tpt_led_trigger {
* a scan complete for an aborted scan.
* @SCAN_HW_CANCELLED: Set for our scan work function when the scan is being
* cancelled.
+ * @SCAN_BEACON_WAIT: Set whenever we're passive scanning because of radar/no-IR
+ * and could send a probe request after receiving a beacon.
+ * @SCAN_BEACON_DONE: Beacon received, we can now send a probe request
*/
enum {
SCAN_SW_SCANNING,
@@ -1156,6 +1160,8 @@ enum {
SCAN_COMPLETED,
SCAN_ABORTED,
SCAN_HW_CANCELLED,
+ SCAN_BEACON_WAIT,
+ SCAN_BEACON_DONE,
};
/**
@@ -1854,7 +1860,7 @@ void ieee80211_mgd_quiesce(struct ieee80211_sub_if_data *sdata);
void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata);
void ieee80211_sta_handle_tspec_ac_params(struct ieee80211_sub_if_data *sdata);
void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata,
- u8 *bssid, u8 reason, bool tx);
+ u8 reason, bool tx);
/* IBSS code */
void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local);
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index f695fc80088b..0fcf8aebedc4 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -476,7 +476,7 @@ static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
!(new->conf.flags & IEEE80211_KEY_FLAG_NO_AUTO_TX))
_ieee80211_set_tx_key(new, true);
} else {
- rcu_assign_pointer(sta->gtk[idx], new);
+ rcu_assign_pointer(sta->deflink.gtk[idx], new);
}
/* Only needed for transition from no key -> key.
* Still triggers unnecessary when using Extended Key ID
@@ -826,7 +826,8 @@ int ieee80211_key_link(struct ieee80211_key *key,
(old_key && old_key->conf.cipher != key->conf.cipher))
goto out;
} else if (sta) {
- old_key = key_mtx_dereference(sdata->local, sta->gtk[idx]);
+ old_key = key_mtx_dereference(sdata->local,
+ sta->deflink.gtk[idx]);
} else {
old_key = key_mtx_dereference(sdata->local, sdata->keys[idx]);
}
@@ -1076,8 +1077,8 @@ void ieee80211_free_sta_keys(struct ieee80211_local *local,
int i;
mutex_lock(&local->key_mtx);
- for (i = 0; i < ARRAY_SIZE(sta->gtk); i++) {
- key = key_mtx_dereference(local, sta->gtk[i]);
+ for (i = 0; i < ARRAY_SIZE(sta->deflink.gtk); i++) {
+ key = key_mtx_dereference(local, sta->deflink.gtk[i]);
if (!key)
continue;
ieee80211_key_replace(key->sdata, key->sta,
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index a48a32f87897..5a385d4146b9 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -287,8 +287,8 @@ static void ieee80211_restart_work(struct work_struct *work)
if (sdata->vif.csa_active) {
sdata_lock(sdata);
ieee80211_sta_connection_lost(sdata,
- sdata->u.mgd.associated->bssid,
- WLAN_REASON_UNSPECIFIED, false);
+ WLAN_REASON_UNSPECIFIED,
+ false);
sdata_unlock(sdata);
}
}
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 44a6fdb6efbd..58ebdcd69d05 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -310,7 +310,7 @@ void ieee80211s_update_metric(struct ieee80211_local *local,
LINK_FAIL_THRESH)
mesh_plink_broken(sta);
- sta_set_rate_info_tx(sta, &sta->tx_stats.last_rate, &rinfo);
+ sta_set_rate_info_tx(sta, &sta->deflink.tx_stats.last_rate, &rinfo);
ewma_mesh_tx_rate_avg_add(&sta->mesh->tx_rate_avg,
cfg80211_calculate_bitrate(&rinfo));
}
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index a829470dd59e..42ba7424589e 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -61,8 +61,8 @@ static bool rssi_threshold_check(struct ieee80211_sub_if_data *sdata,
s32 rssi_threshold = sdata->u.mesh.mshcfg.rssi_threshold;
return rssi_threshold == 0 ||
(sta &&
- (s8)-ewma_signal_read(&sta->rx_stats_avg.signal) >
- rssi_threshold);
+ (s8)-ewma_signal_read(&sta->deflink.rx_stats_avg.signal) >
+ rssi_threshold);
}
/**
@@ -125,7 +125,7 @@ static u32 mesh_set_short_slot_time(struct ieee80211_sub_if_data *sdata)
continue;
short_slot = false;
- if (erp_rates & sta->sta.supp_rates[sband->band])
+ if (erp_rates & sta->sta.deflink.supp_rates[sband->band])
short_slot = true;
else
break;
@@ -175,10 +175,10 @@ static u32 mesh_set_ht_prot_mode(struct ieee80211_sub_if_data *sdata)
sta->mesh->plink_state != NL80211_PLINK_ESTAB)
continue;
- if (sta->sta.bandwidth > IEEE80211_STA_RX_BW_20)
+ if (sta->sta.deflink.bandwidth > IEEE80211_STA_RX_BW_20)
continue;
- if (!sta->sta.ht_cap.ht_supported) {
+ if (!sta->sta.deflink.ht_cap.ht_supported) {
mpl_dbg(sdata, "nonHT sta (%pM) is present\n",
sta->sta.addr);
non_ht_sta = true;
@@ -415,7 +415,7 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata,
struct ieee80211_local *local = sdata->local;
struct ieee80211_supported_band *sband;
u32 rates, basic_rates = 0, changed = 0;
- enum ieee80211_sta_rx_bandwidth bw = sta->sta.bandwidth;
+ enum ieee80211_sta_rx_bandwidth bw = sta->sta.deflink.bandwidth;
sband = ieee80211_get_sband(sdata);
if (!sband)
@@ -425,7 +425,7 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata,
&basic_rates);
spin_lock_bh(&sta->mesh->plink_lock);
- sta->rx_stats.last_rx = jiffies;
+ sta->deflink.rx_stats.last_rx = jiffies;
/* rates and capabilities don't change during peering */
if (sta->mesh->plink_state == NL80211_PLINK_ESTAB &&
@@ -433,9 +433,9 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata,
goto out;
sta->mesh->processed_beacon = true;
- if (sta->sta.supp_rates[sband->band] != rates)
+ if (sta->sta.deflink.supp_rates[sband->band] != rates)
changed |= IEEE80211_RC_SUPP_RATES_CHANGED;
- sta->sta.supp_rates[sband->band] = rates;
+ sta->sta.deflink.supp_rates[sband->band] = rates;
if (ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband,
elems->ht_cap_elem, sta))
@@ -449,16 +449,16 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata,
elems->he_6ghz_capa,
sta);
- if (bw != sta->sta.bandwidth)
+ if (bw != sta->sta.deflink.bandwidth)
changed |= IEEE80211_RC_BW_CHANGED;
/* HT peer is operating 20MHz-only */
if (elems->ht_operation &&
!(elems->ht_operation->ht_param &
IEEE80211_HT_PARAM_CHAN_WIDTH_ANY)) {
- if (sta->sta.bandwidth != IEEE80211_STA_RX_BW_20)
+ if (sta->sta.deflink.bandwidth != IEEE80211_STA_RX_BW_20)
changed |= IEEE80211_RC_BW_CHANGED;
- sta->sta.bandwidth = IEEE80211_STA_RX_BW_20;
+ sta->sta.deflink.bandwidth = IEEE80211_STA_RX_BW_20;
}
if (!test_sta_flag(sta, WLAN_STA_RATE_CONTROL))
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index dc8aec1a5d3d..58d48dcae030 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1376,7 +1376,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
- struct cfg80211_bss *cbss = ifmgd->associated;
+ struct cfg80211_bss *cbss = ifmgd->assoc_bss;
struct ieee80211_chanctx_conf *conf;
struct ieee80211_chanctx *chanctx;
enum nl80211_band current_band;
@@ -1398,7 +1398,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
res = ieee80211_parse_ch_switch_ie(sdata, elems, current_band,
bss->vht_cap_info,
ifmgd->flags,
- ifmgd->associated->bssid, &csa_ie);
+ ifmgd->bssid, &csa_ie);
if (!res) {
ch_switch.timestamp = timestamp;
@@ -1427,7 +1427,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
csa_ie.chandef.chan->band) {
sdata_info(sdata,
"AP %pM switches to different band (%d MHz, width:%d, CF1/2: %d/%d MHz), disconnecting\n",
- ifmgd->associated->bssid,
+ ifmgd->bssid,
csa_ie.chandef.chan->center_freq,
csa_ie.chandef.width, csa_ie.chandef.center_freq1,
csa_ie.chandef.center_freq2);
@@ -1440,7 +1440,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
"AP %pM switches to unsupported channel "
"(%d.%03d MHz, width:%d, CF1/2: %d.%03d/%d MHz), "
"disconnecting\n",
- ifmgd->associated->bssid,
+ ifmgd->bssid,
csa_ie.chandef.chan->center_freq,
csa_ie.chandef.chan->freq_offset,
csa_ie.chandef.width, csa_ie.chandef.center_freq1,
@@ -1456,7 +1456,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
return;
sdata_info(sdata,
"AP %pM tries to chanswitch to same channel, ignore\n",
- ifmgd->associated->bssid);
+ ifmgd->bssid);
ifmgd->csa_ignored_same_chan = true;
return;
}
@@ -2266,7 +2266,8 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
sdata->u.mgd.beacon_timeout = usecs_to_jiffies(ieee80211_tu_to_usec(
beacon_loss_count * bss_conf->beacon_int));
- sdata->u.mgd.associated = cbss;
+ sdata->u.mgd.associated = true;
+ sdata->u.mgd.assoc_bss = cbss;
memcpy(sdata->u.mgd.bssid, cbss->bssid, ETH_ALEN);
ieee80211_check_rate_mask(sdata);
@@ -2361,7 +2362,8 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
ieee80211_stop_poll(sdata);
- ifmgd->associated = NULL;
+ ifmgd->associated = false;
+ ifmgd->assoc_bss = NULL;
netif_carrier_off(sdata->dev);
/*
@@ -2608,8 +2610,7 @@ static void ieee80211_mlme_send_probe_req(struct ieee80211_sub_if_data *sdata,
static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
- const struct element *ssid;
- u8 *dst = ifmgd->associated->bssid;
+ u8 *dst = ifmgd->bssid;
u8 unicast_limit = max(1, max_probe_tries - 3);
struct sta_info *sta;
@@ -2642,19 +2643,10 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
ifmgd->nullfunc_failed = false;
ieee80211_send_nullfunc(sdata->local, sdata, false);
} else {
- int ssid_len;
-
- rcu_read_lock();
- ssid = ieee80211_bss_get_elem(ifmgd->associated, WLAN_EID_SSID);
- if (WARN_ON_ONCE(ssid == NULL))
- ssid_len = 0;
- else
- ssid_len = ssid->datalen;
-
ieee80211_mlme_send_probe_req(sdata, sdata->vif.addr, dst,
- ssid->data, ssid_len,
- ifmgd->associated->channel);
- rcu_read_unlock();
+ sdata->vif.bss_conf.ssid,
+ sdata->vif.bss_conf.ssid_len,
+ ifmgd->assoc_bss->channel);
}
ifmgd->probe_timeout = jiffies + msecs_to_jiffies(probe_wait_ms);
@@ -2744,7 +2736,7 @@ struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw,
sdata_assert_lock(sdata);
if (ifmgd->associated)
- cbss = ifmgd->associated;
+ cbss = ifmgd->assoc_bss;
else if (ifmgd->auth_data)
cbss = ifmgd->auth_data->bss;
else if (ifmgd->assoc_data)
@@ -2809,7 +2801,7 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
* AP is probably out of range (or not reachable for another
* reason) so remove the bss struct for that AP.
*/
- cfg80211_unlink_bss(local->hw.wiphy, ifmgd->associated);
+ cfg80211_unlink_bss(local->hw.wiphy, ifmgd->assoc_bss);
}
ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH,
@@ -3219,8 +3211,8 @@ static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata,
}
if (ifmgd->associated &&
- ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid)) {
- const u8 *bssid = ifmgd->associated->bssid;
+ ether_addr_equal(mgmt->bssid, ifmgd->bssid)) {
+ const u8 *bssid = ifmgd->bssid;
sdata_info(sdata, "deauthenticated from %pM (Reason: %u=%s)\n",
bssid, reason_code,
@@ -3262,7 +3254,7 @@ static void ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata,
return;
if (!ifmgd->associated ||
- !ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid))
+ !ether_addr_equal(mgmt->bssid, ifmgd->bssid))
return;
reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code);
@@ -3342,7 +3334,7 @@ static bool ieee80211_twt_req_supported(const struct sta_info *sta,
if (!(elems->ext_capab[9] & WLAN_EXT_CAPA10_TWT_RESPONDER_SUPPORT))
return false;
- return sta->sta.he_cap.he_cap_elem.mac_cap_info[0] &
+ return sta->sta.deflink.he_cap.he_cap_elem.mac_cap_info[0] &
IEEE80211_HE_MAC_CAP0_TWT_RES;
}
@@ -3369,7 +3361,7 @@ static bool ieee80211_twt_bcast_support(struct ieee80211_sub_if_data *sdata,
ieee80211_vif_type_p2p(&sdata->vif));
return bss_conf->he_support &&
- (sta->sta.he_cap.he_cap_elem.mac_cap_info[2] &
+ (sta->sta.deflink.he_cap.he_cap_elem.mac_cap_info[2] &
IEEE80211_HE_MAC_CAP2_BCAST_TWT) &&
own_he_cap &&
(own_he_cap->he_cap_elem.mac_cap_info[2] &
@@ -3587,7 +3579,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
elems->he_6ghz_capa,
sta);
- bss_conf->he_support = sta->sta.he_cap.has_he;
+ bss_conf->he_support = sta->sta.deflink.he_cap.has_he;
if (elems->rsnx && elems->rsnx_len &&
(elems->rsnx[0] & WLAN_RSNX_CAPA_PROTECTED_TWT) &&
wiphy_ext_feature_isset(local->hw.wiphy,
@@ -3607,7 +3599,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
elems->eht_cap_len,
sta);
- bss_conf->eht_support = sta->sta.eht_cap.has_eht;
+ bss_conf->eht_support = sta->sta.deflink.eht_cap.has_eht;
} else {
bss_conf->eht_support = false;
}
@@ -3684,7 +3676,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
nss = *elems->opmode_notif & IEEE80211_OPMODE_NOTIF_RX_NSS_MASK;
nss >>= IEEE80211_OPMODE_NOTIF_RX_NSS_SHIFT;
nss += 1;
- sta->sta.rx_nss = nss;
+ sta->sta.deflink.rx_nss = nss;
}
rate_control_rate_init(sta);
@@ -3972,7 +3964,7 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata,
ieee80211_rx_bss_info(sdata, mgmt, len, rx_status);
if (ifmgd->associated &&
- ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid))
+ ether_addr_equal(mgmt->bssid, ifmgd->bssid))
ieee80211_reset_ap_probe(sdata);
}
@@ -4201,9 +4193,9 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
}
if (!ifmgd->associated ||
- !ieee80211_rx_our_beacon(bssid, ifmgd->associated))
+ !ieee80211_rx_our_beacon(bssid, ifmgd->assoc_bss))
return;
- bssid = ifmgd->associated->bssid;
+ bssid = ifmgd->bssid;
if (!(rx_status->flag & RX_FLAG_NO_SIGNAL_VAL))
ieee80211_handle_beacon_sig(sdata, ifmgd, bss_conf,
@@ -4519,7 +4511,7 @@ static void ieee80211_sta_timer(struct timer_list *t)
}
void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata,
- u8 *bssid, u8 reason, bool tx)
+ u8 reason, bool tx)
{
u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
@@ -4750,11 +4742,9 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
if (ifmgd->flags & IEEE80211_STA_CONNECTION_POLL &&
ifmgd->associated) {
- u8 bssid[ETH_ALEN];
+ u8 *bssid = ifmgd->bssid;
int max_tries;
- memcpy(bssid, ifmgd->associated->bssid, ETH_ALEN);
-
if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS))
max_tries = max_nullfunc_tries;
else
@@ -4774,7 +4764,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
mlme_dbg(sdata,
"No ack for nullfunc frame to AP %pM, disconnecting.\n",
bssid);
- ieee80211_sta_connection_lost(sdata, bssid,
+ ieee80211_sta_connection_lost(sdata,
WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY,
false);
}
@@ -4784,7 +4774,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
mlme_dbg(sdata,
"Failed to send nullfunc to AP %pM after %dms, disconnecting\n",
bssid, probe_wait_ms);
- ieee80211_sta_connection_lost(sdata, bssid,
+ ieee80211_sta_connection_lost(sdata,
WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, false);
} else if (ifmgd->probe_send_count < max_tries) {
mlme_dbg(sdata,
@@ -4801,7 +4791,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
"No probe response from AP %pM after %dms, disconnecting.\n",
bssid, probe_wait_ms);
- ieee80211_sta_connection_lost(sdata, bssid,
+ ieee80211_sta_connection_lost(sdata,
WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, false);
}
}
@@ -4842,9 +4832,9 @@ static void ieee80211_sta_conn_mon_timer(struct timer_list *t)
if (!sta)
return;
- timeout = sta->status_stats.last_ack;
- if (time_before(sta->status_stats.last_ack, sta->rx_stats.last_rx))
- timeout = sta->rx_stats.last_rx;
+ timeout = sta->deflink.status_stats.last_ack;
+ if (time_before(sta->deflink.status_stats.last_ack, sta->deflink.rx_stats.last_rx))
+ timeout = sta->deflink.rx_stats.last_rx;
timeout += IEEE80211_CONNECTION_IDLE_TIME;
/* If timeout is after now, then update timer to fire at
@@ -4934,7 +4924,7 @@ void ieee80211_mgd_quiesce(struct ieee80211_sub_if_data *sdata)
.bssid = bssid,
};
- memcpy(bssid, ifmgd->associated->bssid, ETH_ALEN);
+ memcpy(bssid, ifmgd->bssid, ETH_ALEN);
ieee80211_mgd_deauth(sdata, &req);
}
@@ -4956,7 +4946,6 @@ void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata)
sdata->flags &= ~IEEE80211_SDATA_DISCONNECT_RESUME;
mlme_dbg(sdata, "driver requested disconnect after resume\n");
ieee80211_sta_connection_lost(sdata,
- ifmgd->associated->bssid,
WLAN_REASON_UNSPECIFIED,
true);
sdata_unlock(sdata);
@@ -4967,7 +4956,6 @@ void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata)
sdata->flags &= ~IEEE80211_SDATA_DISCONNECT_HW_RESTART;
mlme_dbg(sdata, "driver requested disconnect after hardware restart\n");
ieee80211_sta_connection_lost(sdata,
- ifmgd->associated->bssid,
WLAN_REASON_UNSPECIFIED,
true);
sdata_unlock(sdata);
@@ -5644,7 +5632,7 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
}
if (rates)
- new_sta->sta.supp_rates[cbss->channel->band] = rates;
+ new_sta->sta.deflink.supp_rates[cbss->channel->band] = rates;
else
sdata_info(sdata,
"No rates found, keeping mandatory only\n");
@@ -5842,7 +5830,7 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
sdata_info(sdata,
"disconnect from AP %pM for new auth to %pM\n",
- ifmgd->associated->bssid, req->bss->bssid);
+ ifmgd->bssid, req->bss->bssid);
ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH,
WLAN_REASON_UNSPECIFIED,
false, frame_buf);
@@ -5918,7 +5906,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
sdata_info(sdata,
"disconnect from AP %pM for new assoc to %pM\n",
- ifmgd->associated->bssid, req->bss->bssid);
+ ifmgd->bssid, req->bss->bssid);
ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH,
WLAN_REASON_UNSPECIFIED,
false, frame_buf);
@@ -6132,6 +6120,9 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
ifmgd->flags |= IEEE80211_STA_DISABLE_EHT;
}
+ if (req->flags & ASSOC_REQ_DISABLE_EHT)
+ ifmgd->flags |= IEEE80211_STA_DISABLE_EHT;
+
err = ieee80211_prep_connection(sdata, req->bss, true, override);
if (err)
goto err_clear;
@@ -6273,7 +6264,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
}
if (ifmgd->associated &&
- ether_addr_equal(ifmgd->associated->bssid, req->bssid)) {
+ ether_addr_equal(ifmgd->bssid, req->bssid)) {
sdata_info(sdata,
"deauthenticating from %pM by local choice (Reason: %u=%s)\n",
req->bssid, req->reason_code,
@@ -6304,7 +6295,7 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
* to cfg80211 while that's in a locked section already
* trying to tell us that the user wants to disconnect.
*/
- if (ifmgd->associated != req->bss)
+ if (ifmgd->assoc_bss != req->bss)
return -ENOLINK;
sdata_info(sdata,
@@ -6382,3 +6373,43 @@ void ieee80211_cqm_beacon_loss_notify(struct ieee80211_vif *vif, gfp_t gfp)
cfg80211_cqm_beacon_loss_notify(sdata->dev, gfp);
}
EXPORT_SYMBOL(ieee80211_cqm_beacon_loss_notify);
+
+static void _ieee80211_enable_rssi_reports(struct ieee80211_sub_if_data *sdata,
+ int rssi_min_thold,
+ int rssi_max_thold)
+{
+ trace_api_enable_rssi_reports(sdata, rssi_min_thold, rssi_max_thold);
+
+ if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION))
+ return;
+
+ /*
+ * Scale up threshold values before storing it, as the RSSI averaging
+ * algorithm uses a scaled up value as well. Change this scaling
+ * factor if the RSSI averaging algorithm changes.
+ */
+ sdata->u.mgd.rssi_min_thold = rssi_min_thold*16;
+ sdata->u.mgd.rssi_max_thold = rssi_max_thold*16;
+}
+
+void ieee80211_enable_rssi_reports(struct ieee80211_vif *vif,
+ int rssi_min_thold,
+ int rssi_max_thold)
+{
+ struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+
+ WARN_ON(rssi_min_thold == rssi_max_thold ||
+ rssi_min_thold > rssi_max_thold);
+
+ _ieee80211_enable_rssi_reports(sdata, rssi_min_thold,
+ rssi_max_thold);
+}
+EXPORT_SYMBOL(ieee80211_enable_rssi_reports);
+
+void ieee80211_disable_rssi_reports(struct ieee80211_vif *vif)
+{
+ struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+
+ _ieee80211_enable_rssi_reports(sdata, 0, 0);
+}
+EXPORT_SYMBOL(ieee80211_disable_rssi_reports);
diff --git a/net/mac80211/ocb.c b/net/mac80211/ocb.c
index 7c1a735b9eee..f97cb4c453d3 100644
--- a/net/mac80211/ocb.c
+++ b/net/mac80211/ocb.c
@@ -74,7 +74,7 @@ void ieee80211_ocb_rx_no_sta(struct ieee80211_sub_if_data *sdata,
/* Add only mandatory rates for now */
sband = local->hw.wiphy->bands[band];
- sta->sta.supp_rates[band] =
+ sta->sta.deflink.supp_rates[band] =
ieee80211_mandatory_rates(sband, scan_width);
spin_lock(&ifocb->incomplete_lock);
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index 853c9a369d72..c5d2ab9df1e7 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -819,7 +819,7 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
if (!sdata->u.mgd.associated ||
(params->offchan && params->wait &&
local->ops->remain_on_channel &&
- memcmp(sdata->u.mgd.associated->bssid,
+ memcmp(sdata->u.mgd.bssid,
mgmt->bssid, ETH_ALEN)))
need_offchan = true;
sdata_unlock(sdata);
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 8c6416129d5b..ae9700e0a1a5 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -371,7 +371,7 @@ static void __rate_control_send_low(struct ieee80211_hw *hw,
WARN_ONCE(i == sband->n_bitrates,
"no supported rates for sta %pM (0x%x, band %d) in rate_mask 0x%x with flags 0x%x\n",
sta ? sta->addr : NULL,
- sta ? sta->supp_rates[sband->band] : -1,
+ sta ? sta->deflink.supp_rates[sband->band] : -1,
sband->band,
rate_mask, rate_flags);
@@ -781,11 +781,11 @@ static bool rate_control_cap_mask(struct ieee80211_sub_if_data *sdata,
u16 sta_vht_mask[NL80211_VHT_NSS_MAX];
/* Filter out rates that the STA does not support */
- *mask &= sta->supp_rates[sband->band];
+ *mask &= sta->deflink.supp_rates[sband->band];
for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++)
- mcs_mask[i] &= sta->ht_cap.mcs.rx_mask[i];
+ mcs_mask[i] &= sta->deflink.ht_cap.mcs.rx_mask[i];
- sta_vht_cap = sta->vht_cap.vht_mcs.rx_mcs_map;
+ sta_vht_cap = sta->deflink.vht_cap.vht_mcs.rx_mcs_map;
ieee80211_get_vht_mask_from_cap(sta_vht_cap, sta_vht_mask);
for (i = 0; i < NL80211_VHT_NSS_MAX; i++)
vht_mask[i] &= sta_vht_mask[i];
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 9c6ace858107..5f27e6746762 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -333,6 +333,17 @@ minstrel_ht_get_group_idx(struct ieee80211_tx_rate *rate)
!!(rate->flags & IEEE80211_TX_RC_40_MHZ_WIDTH));
}
+/*
+ * Look up an MCS group index based on new cfg80211 rate_info.
+ */
+static int
+minstrel_ht_ri_get_group_idx(struct rate_info *rate)
+{
+ return GROUP_IDX((rate->mcs / 8) + 1,
+ !!(rate->flags & RATE_INFO_FLAGS_SHORT_GI),
+ !!(rate->bw & RATE_INFO_BW_40));
+}
+
static int
minstrel_vht_get_group_idx(struct ieee80211_tx_rate *rate)
{
@@ -342,6 +353,18 @@ minstrel_vht_get_group_idx(struct ieee80211_tx_rate *rate)
2*!!(rate->flags & IEEE80211_TX_RC_80_MHZ_WIDTH));
}
+/*
+ * Look up an MCS group index based on new cfg80211 rate_info.
+ */
+static int
+minstrel_vht_ri_get_group_idx(struct rate_info *rate)
+{
+ return VHT_GROUP_IDX(rate->nss,
+ !!(rate->flags & RATE_INFO_FLAGS_SHORT_GI),
+ !!(rate->bw & RATE_INFO_BW_40) +
+ 2*!!(rate->bw & RATE_INFO_BW_80));
+}
+
static struct minstrel_rate_stats *
minstrel_ht_get_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
struct ieee80211_tx_rate *rate)
@@ -362,6 +385,9 @@ minstrel_ht_get_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
group = MINSTREL_CCK_GROUP;
for (idx = 0; idx < ARRAY_SIZE(mp->cck_rates); idx++) {
+ if (!(mi->supported[group] & BIT(idx)))
+ continue;
+
if (rate->idx != mp->cck_rates[idx])
continue;
@@ -382,6 +408,50 @@ out:
return &mi->groups[group].rates[idx];
}
+/*
+ * Get the minstrel rate statistics for specified STA and rate info.
+ */
+static struct minstrel_rate_stats *
+minstrel_ht_ri_get_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
+ struct ieee80211_rate_status *rate_status)
+{
+ int group, idx;
+ struct rate_info *rate = &rate_status->rate_idx;
+
+ if (rate->flags & RATE_INFO_FLAGS_MCS) {
+ group = minstrel_ht_ri_get_group_idx(rate);
+ idx = rate->mcs % 8;
+ goto out;
+ }
+
+ if (rate->flags & RATE_INFO_FLAGS_VHT_MCS) {
+ group = minstrel_vht_ri_get_group_idx(rate);
+ idx = rate->mcs;
+ goto out;
+ }
+
+ group = MINSTREL_CCK_GROUP;
+ for (idx = 0; idx < ARRAY_SIZE(mp->cck_rates); idx++) {
+ if (rate->legacy != minstrel_cck_bitrates[ mp->cck_rates[idx] ])
+ continue;
+
+ /* short preamble */
+ if ((mi->supported[group] & BIT(idx + 4)) &&
+ mi->use_short_preamble)
+ idx += 4;
+ goto out;
+ }
+
+ group = MINSTREL_OFDM_GROUP;
+ for (idx = 0; idx < ARRAY_SIZE(mp->ofdm_rates[0]); idx++)
+ if (rate->legacy == minstrel_ofdm_bitrates[ mp->ofdm_rates[mi->band][idx] ])
+ goto out;
+
+ idx = 0;
+out:
+ return &mi->groups[group].rates[idx];
+}
+
static inline struct minstrel_rate_stats *
minstrel_get_ratestats(struct minstrel_ht_sta *mi, int index)
{
@@ -603,7 +673,7 @@ minstrel_ht_prob_rate_reduce_streams(struct minstrel_ht_sta *mi)
int tmp_max_streams, group, tmp_idx, tmp_prob;
int tmp_tp = 0;
- if (!mi->sta->ht_cap.ht_supported)
+ if (!mi->sta->deflink.ht_cap.ht_supported)
return;
group = MI_RATE_GROUP(mi->max_tp_rate[0]);
@@ -993,7 +1063,7 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
u16 tmp_mcs_tp_rate[MAX_THR_RATES], tmp_group_tp_rate[MAX_THR_RATES];
u16 tmp_legacy_tp_rate[MAX_THR_RATES], tmp_max_prob_rate;
u16 index;
- bool ht_supported = mi->sta->ht_cap.ht_supported;
+ bool ht_supported = mi->sta->deflink.ht_cap.ht_supported;
if (mi->ampdu_packets > 0) {
if (!ieee80211_hw_check(mp->hw, TX_STATUS_NO_AMPDU_LEN))
@@ -1149,6 +1219,40 @@ minstrel_ht_txstat_valid(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
return false;
}
+/*
+ * Check whether rate_status contains valid information.
+ */
+static bool
+minstrel_ht_ri_txstat_valid(struct minstrel_priv *mp,
+ struct minstrel_ht_sta *mi,
+ struct ieee80211_rate_status *rate_status)
+{
+ int i;
+
+ if (!rate_status)
+ return false;
+ if (!rate_status->try_count)
+ return false;
+
+ if (rate_status->rate_idx.flags & RATE_INFO_FLAGS_MCS ||
+ rate_status->rate_idx.flags & RATE_INFO_FLAGS_VHT_MCS)
+ return true;
+
+ for (i = 0; i < ARRAY_SIZE(mp->cck_rates); i++) {
+ if (rate_status->rate_idx.legacy ==
+ minstrel_cck_bitrates[ mp->cck_rates[i] ])
+ return true;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(mp->ofdm_rates); i++) {
+ if (rate_status->rate_idx.legacy ==
+ minstrel_ofdm_bitrates[ mp->ofdm_rates[mi->band][i] ])
+ return true;
+ }
+
+ return false;
+}
+
static void
minstrel_downgrade_rate(struct minstrel_ht_sta *mi, u16 *idx, bool primary)
{
@@ -1214,16 +1318,34 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
mi->ampdu_packets++;
mi->ampdu_len += info->status.ampdu_len;
- last = !minstrel_ht_txstat_valid(mp, mi, &ar[0]);
- for (i = 0; !last; i++) {
- last = (i == IEEE80211_TX_MAX_RATES - 1) ||
- !minstrel_ht_txstat_valid(mp, mi, &ar[i + 1]);
+ if (st->rates && st->n_rates) {
+ last = !minstrel_ht_ri_txstat_valid(mp, mi, &(st->rates[0]));
+ for (i = 0; !last; i++) {
+ last = (i == st->n_rates - 1) ||
+ !minstrel_ht_ri_txstat_valid(mp, mi,
+ &(st->rates[i + 1]));
+
+ rate = minstrel_ht_ri_get_stats(mp, mi,
+ &(st->rates[i]));
- rate = minstrel_ht_get_stats(mp, mi, &ar[i]);
- if (last)
- rate->success += info->status.ampdu_ack_len;
+ if (last)
+ rate->success += info->status.ampdu_ack_len;
- rate->attempts += ar[i].count * info->status.ampdu_len;
+ rate->attempts += st->rates[i].try_count *
+ info->status.ampdu_len;
+ }
+ } else {
+ last = !minstrel_ht_txstat_valid(mp, mi, &ar[0]);
+ for (i = 0; !last; i++) {
+ last = (i == IEEE80211_TX_MAX_RATES - 1) ||
+ !minstrel_ht_txstat_valid(mp, mi, &ar[i + 1]);
+
+ rate = minstrel_ht_get_stats(mp, mi, &ar[i]);
+ if (last)
+ rate->success += info->status.ampdu_ack_len;
+
+ rate->attempts += ar[i].count * info->status.ampdu_len;
+ }
}
if (mp->hw->max_rates > 1) {
@@ -1416,7 +1538,7 @@ minstrel_ht_get_max_amsdu_len(struct minstrel_ht_sta *mi)
* the limit here to avoid the complexity of having to de-aggregate
* packets in the queue.
*/
- if (!mi->sta->vht_cap.vht_supported)
+ if (!mi->sta->deflink.vht_cap.vht_supported)
return IEEE80211_MAX_MPDU_LEN_HT_BA;
/* unlimited */
@@ -1436,17 +1558,17 @@ minstrel_ht_update_rates(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
/* Start with max_tp_rate[0] */
minstrel_ht_set_rate(mp, mi, rates, i++, mi->max_tp_rate[0]);
- if (mp->hw->max_rates >= 3) {
- /* At least 3 tx rates supported, use max_tp_rate[1] next */
- minstrel_ht_set_rate(mp, mi, rates, i++, mi->max_tp_rate[1]);
- }
+ /* Fill up remaining, keep one entry for max_probe_rate */
+ for (; i < (mp->hw->max_rates - 1); i++)
+ minstrel_ht_set_rate(mp, mi, rates, i, mi->max_tp_rate[i]);
- if (mp->hw->max_rates >= 2) {
+ if (i < mp->hw->max_rates)
minstrel_ht_set_rate(mp, mi, rates, i++, mi->max_prob_rate);
- }
+
+ if (i < IEEE80211_TX_RATE_TABLE_SIZE)
+ rates->rate[i].idx = -1;
mi->sta->max_rc_amsdu_len = minstrel_ht_get_max_amsdu_len(mi);
- rates->rate[i].idx = -1;
rate_control_set_rates(mp->hw, mi->sta, rates);
}
@@ -1533,7 +1655,7 @@ minstrel_ht_update_cck(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
if (sband->band != NL80211_BAND_2GHZ)
return;
- if (sta->ht_cap.ht_supported &&
+ if (sta->deflink.ht_cap.ht_supported &&
!ieee80211_hw_check(mp->hw, SUPPORTS_HT_CCK_RATES))
return;
@@ -1556,7 +1678,7 @@ minstrel_ht_update_ofdm(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
const u8 *rates;
int i;
- if (sta->ht_cap.ht_supported)
+ if (sta->deflink.ht_cap.ht_supported)
return;
rates = mp->ofdm_rates[sband->band];
@@ -1576,10 +1698,11 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
{
struct minstrel_priv *mp = priv;
struct minstrel_ht_sta *mi = priv_sta;
- struct ieee80211_mcs_info *mcs = &sta->ht_cap.mcs;
- u16 ht_cap = sta->ht_cap.cap;
- struct ieee80211_sta_vht_cap *vht_cap = &sta->vht_cap;
+ struct ieee80211_mcs_info *mcs = &sta->deflink.ht_cap.mcs;
+ u16 ht_cap = sta->deflink.ht_cap.cap;
+ struct ieee80211_sta_vht_cap *vht_cap = &sta->deflink.vht_cap;
const struct ieee80211_rate *ctl_rate;
+ struct sta_info *sta_info;
bool ldpc, erp;
int use_vht;
int n_supported = 0;
@@ -1650,7 +1773,7 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
}
if (gflags & IEEE80211_TX_RC_40_MHZ_WIDTH &&
- sta->bandwidth < IEEE80211_STA_RX_BW_40)
+ sta->deflink.bandwidth < IEEE80211_STA_RX_BW_40)
continue;
nss = minstrel_mcs_groups[i].streams;
@@ -1677,7 +1800,7 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
continue;
if (gflags & IEEE80211_TX_RC_80_MHZ_WIDTH) {
- if (sta->bandwidth < IEEE80211_STA_RX_BW_80 ||
+ if (sta->deflink.bandwidth < IEEE80211_STA_RX_BW_80 ||
((gflags & IEEE80211_TX_RC_SHORT_GI) &&
!(vht_cap->cap & IEEE80211_VHT_CAP_SHORT_GI_80))) {
continue;
@@ -1698,6 +1821,10 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
n_supported++;
}
+ sta_info = container_of(sta, struct sta_info, sta);
+ mi->use_short_preamble = test_sta_flag(sta_info, WLAN_STA_SHORT_PREAMBLE) &&
+ sta_info->sdata->vif.bss_conf.use_short_preamble;
+
minstrel_ht_update_cck(mp, mi, sband, sta);
minstrel_ht_update_ofdm(mp, mi, sband, sta);
diff --git a/net/mac80211/rc80211_minstrel_ht.h b/net/mac80211/rc80211_minstrel_ht.h
index 06e7126727ad..1766ff0c78d3 100644
--- a/net/mac80211/rc80211_minstrel_ht.h
+++ b/net/mac80211/rc80211_minstrel_ht.h
@@ -180,7 +180,7 @@ struct minstrel_ht_sta {
/* tx flags to add for frames for this sta */
u32 tx_flags;
-
+ bool use_short_preamble;
u8 band;
u8 sample_seq;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 88d797fa82ff..3c08ae04ddbc 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -221,7 +221,7 @@ static void __ieee80211_queue_skb_to_iface(struct ieee80211_sub_if_data *sdata,
skb_queue_tail(&sdata->skb_queue, skb);
ieee80211_queue_work(&sdata->local->hw, &sdata->work);
if (sta)
- sta->rx_stats.packets++;
+ sta->deflink.rx_stats.packets++;
}
static void ieee80211_queue_skb_to_iface(struct ieee80211_sub_if_data *sdata,
@@ -1464,7 +1464,7 @@ ieee80211_rx_h_check_dup(struct ieee80211_rx_data *rx)
if (unlikely(ieee80211_has_retry(hdr->frame_control) &&
rx->sta->last_seq_ctrl[rx->seqno_idx] == hdr->seq_ctrl)) {
I802_DEBUG_INC(rx->local->dot11FrameDuplicateCount);
- rx->sta->rx_stats.num_duplicates++;
+ rx->sta->deflink.rx_stats.num_duplicates++;
return RX_DROP_UNUSABLE;
} else if (!(status->flag & RX_FLAG_AMSDU_MORE)) {
rx->sta->last_seq_ctrl[rx->seqno_idx] = hdr->seq_ctrl;
@@ -1760,46 +1760,47 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
NL80211_IFTYPE_ADHOC);
if (ether_addr_equal(bssid, rx->sdata->u.ibss.bssid) &&
test_sta_flag(sta, WLAN_STA_AUTHORIZED)) {
- sta->rx_stats.last_rx = jiffies;
+ sta->deflink.rx_stats.last_rx = jiffies;
if (ieee80211_is_data(hdr->frame_control) &&
!is_multicast_ether_addr(hdr->addr1))
- sta->rx_stats.last_rate =
+ sta->deflink.rx_stats.last_rate =
sta_stats_encode_rate(status);
}
} else if (rx->sdata->vif.type == NL80211_IFTYPE_OCB) {
- sta->rx_stats.last_rx = jiffies;
+ sta->deflink.rx_stats.last_rx = jiffies;
} else if (!ieee80211_is_s1g_beacon(hdr->frame_control) &&
!is_multicast_ether_addr(hdr->addr1)) {
/*
* Mesh beacons will update last_rx when if they are found to
* match the current local configuration when processed.
*/
- sta->rx_stats.last_rx = jiffies;
+ sta->deflink.rx_stats.last_rx = jiffies;
if (ieee80211_is_data(hdr->frame_control))
- sta->rx_stats.last_rate = sta_stats_encode_rate(status);
+ sta->deflink.rx_stats.last_rate = sta_stats_encode_rate(status);
}
- sta->rx_stats.fragments++;
+ sta->deflink.rx_stats.fragments++;
- u64_stats_update_begin(&rx->sta->rx_stats.syncp);
- sta->rx_stats.bytes += rx->skb->len;
- u64_stats_update_end(&rx->sta->rx_stats.syncp);
+ u64_stats_update_begin(&rx->sta->deflink.rx_stats.syncp);
+ sta->deflink.rx_stats.bytes += rx->skb->len;
+ u64_stats_update_end(&rx->sta->deflink.rx_stats.syncp);
if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) {
- sta->rx_stats.last_signal = status->signal;
- ewma_signal_add(&sta->rx_stats_avg.signal, -status->signal);
+ sta->deflink.rx_stats.last_signal = status->signal;
+ ewma_signal_add(&sta->deflink.rx_stats_avg.signal,
+ -status->signal);
}
if (status->chains) {
- sta->rx_stats.chains = status->chains;
+ sta->deflink.rx_stats.chains = status->chains;
for (i = 0; i < ARRAY_SIZE(status->chain_signal); i++) {
int signal = status->chain_signal[i];
if (!(status->chains & BIT(i)))
continue;
- sta->rx_stats.chain_signal_last[i] = signal;
- ewma_signal_add(&sta->rx_stats_avg.chain_signal[i],
+ sta->deflink.rx_stats.chain_signal_last[i] = signal;
+ ewma_signal_add(&sta->deflink.rx_stats_avg.chain_signal[i],
-signal);
}
}
@@ -1860,7 +1861,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
* Update counter and free packet here to avoid
* counting this as a dropped packed.
*/
- sta->rx_stats.packets++;
+ sta->deflink.rx_stats.packets++;
dev_kfree_skb(rx->skb);
return RX_QUEUED;
}
@@ -1892,11 +1893,11 @@ ieee80211_rx_get_bigtk(struct ieee80211_rx_data *rx, int idx)
}
if (rx->sta)
- key = rcu_dereference(rx->sta->gtk[idx]);
+ key = rcu_dereference(rx->sta->deflink.gtk[idx]);
if (!key)
key = rcu_dereference(sdata->keys[idx]);
if (!key && rx->sta)
- key = rcu_dereference(rx->sta->gtk[idx2]);
+ key = rcu_dereference(rx->sta->deflink.gtk[idx2]);
if (!key)
key = rcu_dereference(sdata->keys[idx2]);
@@ -2011,7 +2012,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
test_sta_flag(rx->sta, WLAN_STA_MFP))
return RX_DROP_MONITOR;
- rx->key = rcu_dereference(rx->sta->gtk[mmie_keyidx]);
+ rx->key = rcu_dereference(rx->sta->deflink.gtk[mmie_keyidx]);
}
if (!rx->key)
rx->key = rcu_dereference(rx->sdata->keys[mmie_keyidx]);
@@ -2034,7 +2035,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
} else {
if (rx->sta) {
for (i = 0; i < NUM_DEFAULT_KEYS; i++) {
- key = rcu_dereference(rx->sta->gtk[i]);
+ key = rcu_dereference(rx->sta->deflink.gtk[i]);
if (key)
break;
}
@@ -2071,7 +2072,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
/* check per-station GTK first, if multicast packet */
if (is_multicast_ether_addr(hdr->addr1) && rx->sta)
- rx->key = rcu_dereference(rx->sta->gtk[keyidx]);
+ rx->key = rcu_dereference(rx->sta->deflink.gtk[keyidx]);
/* if not found, try default key */
if (!rx->key) {
@@ -2397,7 +2398,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
out:
ieee80211_led_rx(rx->local);
if (rx->sta)
- rx->sta->rx_stats.packets++;
+ rx->sta->deflink.rx_stats.packets++;
return RX_CONTINUE;
}
@@ -2644,9 +2645,9 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
* for non-QoS-data frames. Here we know it's a data
* frame, so count MSDUs.
*/
- u64_stats_update_begin(&rx->sta->rx_stats.syncp);
- rx->sta->rx_stats.msdu[rx->seqno_idx]++;
- u64_stats_update_end(&rx->sta->rx_stats.syncp);
+ u64_stats_update_begin(&rx->sta->deflink.rx_stats.syncp);
+ rx->sta->deflink.rx_stats.msdu[rx->seqno_idx]++;
+ u64_stats_update_end(&rx->sta->deflink.rx_stats.syncp);
}
if ((sdata->vif.type == NL80211_IFTYPE_AP ||
@@ -3177,6 +3178,49 @@ static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata,
ieee80211_tx_skb(sdata, skb);
}
+static void
+ieee80211_rx_check_bss_color_collision(struct ieee80211_rx_data *rx)
+{
+ struct ieee80211_mgmt *mgmt = (void *)rx->skb->data;
+ const struct element *ie;
+ size_t baselen;
+
+ if (!wiphy_ext_feature_isset(rx->local->hw.wiphy,
+ NL80211_EXT_FEATURE_BSS_COLOR))
+ return;
+
+ if (ieee80211_hw_check(&rx->local->hw, DETECTS_COLOR_COLLISION))
+ return;
+
+ if (rx->sdata->vif.csa_active)
+ return;
+
+ baselen = mgmt->u.beacon.variable - rx->skb->data;
+ if (baselen > rx->skb->len)
+ return;
+
+ ie = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_OPERATION,
+ mgmt->u.beacon.variable,
+ rx->skb->len - baselen);
+ if (ie && ie->datalen >= sizeof(struct ieee80211_he_operation) &&
+ ie->datalen >= ieee80211_he_oper_size(ie->data + 1)) {
+ struct ieee80211_bss_conf *bss_conf = &rx->sdata->vif.bss_conf;
+ const struct ieee80211_he_operation *he_oper;
+ u8 color;
+
+ he_oper = (void *)(ie->data + 1);
+ if (le32_get_bits(he_oper->he_oper_params,
+ IEEE80211_HE_OPERATION_BSS_COLOR_DISABLED))
+ return;
+
+ color = le32_get_bits(he_oper->he_oper_params,
+ IEEE80211_HE_OPERATION_BSS_COLOR_MASK);
+ if (color == bss_conf->he_bss_color.color)
+ ieeee80211_obss_color_collision_notify(&rx->sdata->vif,
+ BIT_ULL(color));
+ }
+}
+
static ieee80211_rx_result debug_noinline
ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx)
{
@@ -3202,6 +3246,9 @@ ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx)
!(rx->flags & IEEE80211_RX_BEACON_REPORTED)) {
int sig = 0;
+ /* sw bss color collision detection */
+ ieee80211_rx_check_bss_color_collision(rx);
+
if (ieee80211_hw_check(&rx->local->hw, SIGNAL_DBM) &&
!(status->flag & RX_FLAG_NO_SIGNAL_VAL))
sig = status->signal;
@@ -3295,7 +3342,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
switch (mgmt->u.action.category) {
case WLAN_CATEGORY_HT:
/* reject HT action frames from stations not supporting HT */
- if (!rx->sta->sta.ht_cap.ht_supported)
+ if (!rx->sta->sta.deflink.ht_cap.ht_supported)
goto invalid;
if (sdata->vif.type != NL80211_IFTYPE_STATION &&
@@ -3359,7 +3406,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
struct sta_opmode_info sta_opmode = {};
/* If it doesn't support 40 MHz it can't change ... */
- if (!(rx->sta->sta.ht_cap.cap &
+ if (!(rx->sta->sta.deflink.ht_cap.cap &
IEEE80211_HT_CAP_SUP_WIDTH_20_40))
goto handled;
@@ -3369,13 +3416,13 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
max_bw = ieee80211_sta_cap_rx_bw(rx->sta);
/* set cur_max_bandwidth and recalc sta bw */
- rx->sta->cur_max_bandwidth = max_bw;
+ rx->sta->deflink.cur_max_bandwidth = max_bw;
new_bw = ieee80211_sta_cur_vht_bw(rx->sta);
- if (rx->sta->sta.bandwidth == new_bw)
+ if (rx->sta->sta.deflink.bandwidth == new_bw)
goto handled;
- rx->sta->sta.bandwidth = new_bw;
+ rx->sta->sta.deflink.bandwidth = new_bw;
sband = rx->local->hw.wiphy->bands[status->band];
sta_opmode.bw =
ieee80211_sta_rx_bw_to_chan_width(rx->sta);
@@ -3572,7 +3619,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
handled:
if (rx->sta)
- rx->sta->rx_stats.packets++;
+ rx->sta->deflink.rx_stats.packets++;
dev_kfree_skb(rx->skb);
return RX_QUEUED;
@@ -3606,7 +3653,7 @@ ieee80211_rx_h_userspace_mgmt(struct ieee80211_rx_data *rx)
ieee80211_rx_status_to_khz(status), sig,
rx->skb->data, rx->skb->len, 0)) {
if (rx->sta)
- rx->sta->rx_stats.packets++;
+ rx->sta->deflink.rx_stats.packets++;
dev_kfree_skb(rx->skb);
return RX_QUEUED;
}
@@ -3644,7 +3691,7 @@ ieee80211_rx_h_action_post_userspace(struct ieee80211_rx_data *rx)
handled:
if (rx->sta)
- rx->sta->rx_stats.packets++;
+ rx->sta->deflink.rx_stats.packets++;
dev_kfree_skb(rx->skb);
return RX_QUEUED;
}
@@ -3864,7 +3911,7 @@ static void ieee80211_rx_handlers_result(struct ieee80211_rx_data *rx,
case RX_DROP_MONITOR:
I802_DEBUG_INC(rx->sdata->local->rx_handlers_drop);
if (rx->sta)
- rx->sta->rx_stats.dropped++;
+ rx->sta->deflink.rx_stats.dropped++;
fallthrough;
case RX_CONTINUE: {
struct ieee80211_rate *rate = NULL;
@@ -3883,7 +3930,7 @@ static void ieee80211_rx_handlers_result(struct ieee80211_rx_data *rx,
case RX_DROP_UNUSABLE:
I802_DEBUG_INC(rx->sdata->local->rx_handlers_drop);
if (rx->sta)
- rx->sta->rx_stats.dropped++;
+ rx->sta->deflink.rx_stats.dropped++;
dev_kfree_skb(rx->skb);
break;
case RX_QUEUED:
@@ -4435,15 +4482,15 @@ static void ieee80211_rx_8023(struct ieee80211_rx_data *rx,
void *sa = skb->data + ETH_ALEN;
void *da = skb->data;
- stats = &sta->rx_stats;
+ stats = &sta->deflink.rx_stats;
if (fast_rx->uses_rss)
- stats = this_cpu_ptr(sta->pcpu_rx_stats);
+ stats = this_cpu_ptr(sta->deflink.pcpu_rx_stats);
/* statistics part of ieee80211_rx_h_sta_process() */
if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) {
stats->last_signal = status->signal;
if (!fast_rx->uses_rss)
- ewma_signal_add(&sta->rx_stats_avg.signal,
+ ewma_signal_add(&sta->deflink.rx_stats_avg.signal,
-status->signal);
}
@@ -4459,7 +4506,7 @@ static void ieee80211_rx_8023(struct ieee80211_rx_data *rx,
stats->chain_signal_last[i] = signal;
if (!fast_rx->uses_rss)
- ewma_signal_add(&sta->rx_stats_avg.chain_signal[i],
+ ewma_signal_add(&sta->deflink.rx_stats_avg.chain_signal[i],
-signal);
}
}
@@ -4535,7 +4582,7 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
u8 da[ETH_ALEN];
u8 sa[ETH_ALEN];
} addrs __aligned(2);
- struct ieee80211_sta_rx_stats *stats = &sta->rx_stats;
+ struct ieee80211_sta_rx_stats *stats = &sta->deflink.rx_stats;
/* for parallel-rx, we need to have DUP_VALIDATED, otherwise we write
* to a common data structure; drivers can implement that per queue
@@ -4637,7 +4684,7 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
drop:
dev_kfree_skb(skb);
if (fast_rx->uses_rss)
- stats = this_cpu_ptr(sta->pcpu_rx_stats);
+ stats = this_cpu_ptr(sta->deflink.pcpu_rx_stats);
stats->dropped++;
return true;
diff --git a/net/mac80211/s1g.c b/net/mac80211/s1g.c
index 4141bc80cdfd..8ca7d45d6daa 100644
--- a/net/mac80211/s1g.c
+++ b/net/mac80211/s1g.c
@@ -11,8 +11,8 @@
void ieee80211_s1g_sta_rate_init(struct sta_info *sta)
{
/* avoid indicating legacy bitrates for S1G STAs */
- sta->tx_stats.last_rate.flags |= IEEE80211_TX_RC_S1G_MCS;
- sta->rx_stats.last_rate =
+ sta->deflink.tx_stats.last_rate.flags |= IEEE80211_TX_RC_S1G_MCS;
+ sta->deflink.rx_stats.last_rate =
STA_STATS_FIELD(TYPE, STA_STATS_RATE_TYPE_S1G);
}
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 5e6b275afc9e..b698756887eb 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -281,6 +281,16 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
if (likely(!sdata1 && !sdata2))
return;
+ if (test_and_clear_bit(SCAN_BEACON_WAIT, &local->scanning)) {
+ /*
+ * we were passive scanning because of radar/no-IR, but
+ * the beacon/proberesp rx gives us an opportunity to upgrade
+ * to active scan
+ */
+ set_bit(SCAN_BEACON_DONE, &local->scanning);
+ ieee80211_queue_delayed_work(&local->hw, &local->scan_work, 0);
+ }
+
if (ieee80211_is_probe_resp(mgmt->frame_control)) {
struct cfg80211_scan_request *scan_req;
struct cfg80211_sched_scan_request *sched_scan_req;
@@ -787,6 +797,8 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
IEEE80211_CHAN_RADAR)) ||
!req->n_ssids) {
next_delay = IEEE80211_PASSIVE_CHANNEL_TIME;
+ if (req->n_ssids)
+ set_bit(SCAN_BEACON_WAIT, &local->scanning);
} else {
ieee80211_scan_state_send_probe(local, &next_delay);
next_delay = IEEE80211_CHANNEL_TIME;
@@ -998,6 +1010,8 @@ set_channel:
!scan_req->n_ssids) {
*next_delay = IEEE80211_PASSIVE_CHANNEL_TIME;
local->next_scan_state = SCAN_DECISION;
+ if (scan_req->n_ssids)
+ set_bit(SCAN_BEACON_WAIT, &local->scanning);
return;
}
@@ -1090,6 +1104,8 @@ void ieee80211_scan_work(struct work_struct *work)
goto out;
}
+ clear_bit(SCAN_BEACON_WAIT, &local->scanning);
+
/*
* as long as no delay is required advance immediately
* without scheduling a new work
@@ -1100,6 +1116,10 @@ void ieee80211_scan_work(struct work_struct *work)
goto out_complete;
}
+ if (test_and_clear_bit(SCAN_BEACON_DONE, &local->scanning) &&
+ local->next_scan_state == SCAN_DECISION)
+ local->next_scan_state = SCAN_SEND_PROBE;
+
switch (local->next_scan_state) {
case SCAN_DECISION:
/* if no more bands/channels left, complete scan */
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 91fbb1ee5c38..e04a0905e941 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -287,7 +287,7 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta)
#ifdef CONFIG_MAC80211_MESH
kfree(sta->mesh);
#endif
- free_percpu(sta->pcpu_rx_stats);
+ free_percpu(sta->deflink.pcpu_rx_stats);
kfree(sta);
}
@@ -346,9 +346,9 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
return NULL;
if (ieee80211_hw_check(hw, USES_RSS)) {
- sta->pcpu_rx_stats =
+ sta->deflink.pcpu_rx_stats =
alloc_percpu_gfp(struct ieee80211_sta_rx_stats, gfp);
- if (!sta->pcpu_rx_stats)
+ if (!sta->deflink.pcpu_rx_stats)
goto free;
}
@@ -376,6 +376,14 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
sta->sta.max_rx_aggregation_subframes =
local->hw.max_rx_aggregation_subframes;
+ /* TODO link specific alloc and assignments for MLO Link STA */
+
+ /* For non MLO STA, link info can be accessed either via deflink
+ * or link[0]
+ */
+ sta->link[0] = &sta->deflink;
+ sta->sta.link[0] = &sta->sta.deflink;
+
/* Extended Key ID needs to install keys for keyid 0 and 1 Rx-only.
* The Tx path starts to use a key as soon as the key slot ptk_idx
* references to is not NULL. To not use the initial Rx-only key
@@ -387,9 +395,9 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
sta->local = local;
sta->sdata = sdata;
- sta->rx_stats.last_rx = jiffies;
+ sta->deflink.rx_stats.last_rx = jiffies;
- u64_stats_init(&sta->rx_stats.syncp);
+ u64_stats_init(&sta->deflink.rx_stats.syncp);
ieee80211_init_frag_cache(&sta->frags);
@@ -399,10 +407,10 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
sta->reserved_tid = IEEE80211_TID_UNRESERVED;
sta->last_connected = ktime_get_seconds();
- ewma_signal_init(&sta->rx_stats_avg.signal);
- ewma_avg_signal_init(&sta->status_stats.avg_ack_signal);
- for (i = 0; i < ARRAY_SIZE(sta->rx_stats_avg.chain_signal); i++)
- ewma_signal_init(&sta->rx_stats_avg.chain_signal[i]);
+ ewma_signal_init(&sta->deflink.rx_stats_avg.signal);
+ ewma_avg_signal_init(&sta->deflink.status_stats.avg_ack_signal);
+ for (i = 0; i < ARRAY_SIZE(sta->deflink.rx_stats_avg.chain_signal); i++)
+ ewma_signal_init(&sta->deflink.rx_stats_avg.chain_signal[i]);
if (local->ops->wake_tx_queue) {
void *txq_data;
@@ -472,7 +480,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
if (!(rate->flags & mandatory))
continue;
- sta->sta.supp_rates[i] |= BIT(r);
+ sta->sta.deflink.supp_rates[i] |= BIT(r);
}
}
@@ -524,7 +532,7 @@ free_txq:
if (sta->sta.txq[0])
kfree(to_txq_info(sta->sta.txq[0]));
free:
- free_percpu(sta->pcpu_rx_stats);
+ free_percpu(sta->deflink.pcpu_rx_stats);
#ifdef CONFIG_MAC80211_MESH
kfree(sta->mesh);
#endif
@@ -2087,16 +2095,16 @@ int sta_info_move_state(struct sta_info *sta,
u8 sta_info_tx_streams(struct sta_info *sta)
{
- struct ieee80211_sta_ht_cap *ht_cap = &sta->sta.ht_cap;
+ struct ieee80211_sta_ht_cap *ht_cap = &sta->sta.deflink.ht_cap;
u8 rx_streams;
- if (!sta->sta.ht_cap.ht_supported)
+ if (!sta->sta.deflink.ht_cap.ht_supported)
return 1;
- if (sta->sta.vht_cap.vht_supported) {
+ if (sta->sta.deflink.vht_cap.vht_supported) {
int i;
u16 tx_mcs_map =
- le16_to_cpu(sta->sta.vht_cap.vht_mcs.tx_mcs_map);
+ le16_to_cpu(sta->sta.deflink.vht_cap.vht_mcs.tx_mcs_map);
for (i = 7; i >= 0; i--)
if ((tx_mcs_map & (0x3 << (i * 2))) !=
@@ -2123,16 +2131,16 @@ u8 sta_info_tx_streams(struct sta_info *sta)
static struct ieee80211_sta_rx_stats *
sta_get_last_rx_stats(struct sta_info *sta)
{
- struct ieee80211_sta_rx_stats *stats = &sta->rx_stats;
+ struct ieee80211_sta_rx_stats *stats = &sta->deflink.rx_stats;
int cpu;
- if (!sta->pcpu_rx_stats)
+ if (!sta->deflink.pcpu_rx_stats)
return stats;
for_each_possible_cpu(cpu) {
struct ieee80211_sta_rx_stats *cpustats;
- cpustats = per_cpu_ptr(sta->pcpu_rx_stats, cpu);
+ cpustats = per_cpu_ptr(sta->deflink.pcpu_rx_stats, cpu);
if (time_after(cpustats->last_rx, stats->last_rx))
stats = cpustats;
@@ -2226,13 +2234,15 @@ static void sta_set_tidstats(struct sta_info *sta,
int cpu;
if (!(tidstats->filled & BIT(NL80211_TID_STATS_RX_MSDU))) {
- tidstats->rx_msdu += sta_get_tidstats_msdu(&sta->rx_stats, tid);
+ tidstats->rx_msdu += sta_get_tidstats_msdu(&sta->deflink.rx_stats,
+ tid);
- if (sta->pcpu_rx_stats) {
+ if (sta->deflink.pcpu_rx_stats) {
for_each_possible_cpu(cpu) {
struct ieee80211_sta_rx_stats *cpurxs;
- cpurxs = per_cpu_ptr(sta->pcpu_rx_stats, cpu);
+ cpurxs = per_cpu_ptr(sta->deflink.pcpu_rx_stats,
+ cpu);
tidstats->rx_msdu +=
sta_get_tidstats_msdu(cpurxs, tid);
}
@@ -2243,19 +2253,19 @@ static void sta_set_tidstats(struct sta_info *sta,
if (!(tidstats->filled & BIT(NL80211_TID_STATS_TX_MSDU))) {
tidstats->filled |= BIT(NL80211_TID_STATS_TX_MSDU);
- tidstats->tx_msdu = sta->tx_stats.msdu[tid];
+ tidstats->tx_msdu = sta->deflink.tx_stats.msdu[tid];
}
if (!(tidstats->filled & BIT(NL80211_TID_STATS_TX_MSDU_RETRIES)) &&
ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
tidstats->filled |= BIT(NL80211_TID_STATS_TX_MSDU_RETRIES);
- tidstats->tx_msdu_retries = sta->status_stats.msdu_retries[tid];
+ tidstats->tx_msdu_retries = sta->deflink.status_stats.msdu_retries[tid];
}
if (!(tidstats->filled & BIT(NL80211_TID_STATS_TX_MSDU_FAILED)) &&
ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
tidstats->filled |= BIT(NL80211_TID_STATS_TX_MSDU_FAILED);
- tidstats->tx_msdu_failed = sta->status_stats.msdu_failed[tid];
+ tidstats->tx_msdu_failed = sta->deflink.status_stats.msdu_failed[tid];
}
if (local->ops->wake_tx_queue && tid < IEEE80211_NUM_TIDS) {
@@ -2326,26 +2336,27 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
BIT_ULL(NL80211_STA_INFO_TX_BYTES)))) {
sinfo->tx_bytes = 0;
for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
- sinfo->tx_bytes += sta->tx_stats.bytes[ac];
+ sinfo->tx_bytes += sta->deflink.tx_stats.bytes[ac];
sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BYTES64);
}
if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_PACKETS))) {
sinfo->tx_packets = 0;
for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
- sinfo->tx_packets += sta->tx_stats.packets[ac];
+ sinfo->tx_packets += sta->deflink.tx_stats.packets[ac];
sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_PACKETS);
}
if (!(sinfo->filled & (BIT_ULL(NL80211_STA_INFO_RX_BYTES64) |
BIT_ULL(NL80211_STA_INFO_RX_BYTES)))) {
- sinfo->rx_bytes += sta_get_stats_bytes(&sta->rx_stats);
+ sinfo->rx_bytes += sta_get_stats_bytes(&sta->deflink.rx_stats);
- if (sta->pcpu_rx_stats) {
+ if (sta->deflink.pcpu_rx_stats) {
for_each_possible_cpu(cpu) {
struct ieee80211_sta_rx_stats *cpurxs;
- cpurxs = per_cpu_ptr(sta->pcpu_rx_stats, cpu);
+ cpurxs = per_cpu_ptr(sta->deflink.pcpu_rx_stats,
+ cpu);
sinfo->rx_bytes += sta_get_stats_bytes(cpurxs);
}
}
@@ -2354,12 +2365,13 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
}
if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_RX_PACKETS))) {
- sinfo->rx_packets = sta->rx_stats.packets;
- if (sta->pcpu_rx_stats) {
+ sinfo->rx_packets = sta->deflink.rx_stats.packets;
+ if (sta->deflink.pcpu_rx_stats) {
for_each_possible_cpu(cpu) {
struct ieee80211_sta_rx_stats *cpurxs;
- cpurxs = per_cpu_ptr(sta->pcpu_rx_stats, cpu);
+ cpurxs = per_cpu_ptr(sta->deflink.pcpu_rx_stats,
+ cpu);
sinfo->rx_packets += cpurxs->packets;
}
}
@@ -2367,12 +2379,12 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
}
if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_RETRIES))) {
- sinfo->tx_retries = sta->status_stats.retry_count;
+ sinfo->tx_retries = sta->deflink.status_stats.retry_count;
sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_RETRIES);
}
if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_FAILED))) {
- sinfo->tx_failed = sta->status_stats.retry_failed;
+ sinfo->tx_failed = sta->deflink.status_stats.retry_failed;
sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_FAILED);
}
@@ -2393,12 +2405,12 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
sinfo->filled |= BIT_ULL(NL80211_STA_INFO_AIRTIME_WEIGHT);
}
- sinfo->rx_dropped_misc = sta->rx_stats.dropped;
- if (sta->pcpu_rx_stats) {
+ sinfo->rx_dropped_misc = sta->deflink.rx_stats.dropped;
+ if (sta->deflink.pcpu_rx_stats) {
for_each_possible_cpu(cpu) {
struct ieee80211_sta_rx_stats *cpurxs;
- cpurxs = per_cpu_ptr(sta->pcpu_rx_stats, cpu);
+ cpurxs = per_cpu_ptr(sta->deflink.pcpu_rx_stats, cpu);
sinfo->rx_dropped_misc += cpurxs->dropped;
}
}
@@ -2417,10 +2429,10 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
sinfo->filled |= BIT_ULL(NL80211_STA_INFO_SIGNAL);
}
- if (!sta->pcpu_rx_stats &&
+ if (!sta->deflink.pcpu_rx_stats &&
!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_SIGNAL_AVG))) {
sinfo->signal_avg =
- -ewma_signal_read(&sta->rx_stats_avg.signal);
+ -ewma_signal_read(&sta->deflink.rx_stats_avg.signal);
sinfo->filled |= BIT_ULL(NL80211_STA_INFO_SIGNAL_AVG);
}
}
@@ -2433,7 +2445,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
!(sinfo->filled & (BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL) |
BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL_AVG)))) {
sinfo->filled |= BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL);
- if (!sta->pcpu_rx_stats)
+ if (!sta->deflink.pcpu_rx_stats)
sinfo->filled |= BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL_AVG);
sinfo->chains = last_rxstats->chains;
@@ -2442,12 +2454,12 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
sinfo->chain_signal[i] =
last_rxstats->chain_signal_last[i];
sinfo->chain_signal_avg[i] =
- -ewma_signal_read(&sta->rx_stats_avg.chain_signal[i]);
+ -ewma_signal_read(&sta->deflink.rx_stats_avg.chain_signal[i]);
}
}
if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_BITRATE))) {
- sta_set_rate_info_tx(sta, &sta->tx_stats.last_rate,
+ sta_set_rate_info_tx(sta, &sta->deflink.tx_stats.last_rate,
&sinfo->txrate);
sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BITRATE);
}
@@ -2529,16 +2541,16 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
}
if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL)) &&
- sta->status_stats.ack_signal_filled) {
- sinfo->ack_signal = sta->status_stats.last_ack_signal;
+ sta->deflink.status_stats.ack_signal_filled) {
+ sinfo->ack_signal = sta->deflink.status_stats.last_ack_signal;
sinfo->filled |= BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL);
}
if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL_AVG)) &&
- sta->status_stats.ack_signal_filled) {
+ sta->deflink.status_stats.ack_signal_filled) {
sinfo->avg_ack_signal =
-(s8)ewma_avg_signal_read(
- &sta->status_stats.avg_ack_signal);
+ &sta->deflink.status_stats.avg_ack_signal);
sinfo->filled |=
BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL_AVG);
}
@@ -2573,10 +2585,10 @@ unsigned long ieee80211_sta_last_active(struct sta_info *sta)
{
struct ieee80211_sta_rx_stats *stats = sta_get_last_rx_stats(sta);
- if (!sta->status_stats.last_ack ||
- time_after(stats->last_rx, sta->status_stats.last_ack))
+ if (!sta->deflink.status_stats.last_ack ||
+ time_after(stats->last_rx, sta->deflink.status_stats.last_ack))
return stats->last_rx;
- return sta->status_stats.last_ack;
+ return sta->deflink.status_stats.last_ack;
}
static void sta_update_codel_params(struct sta_info *sta, u32 thr)
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 379fd367197f..35c390bedfba 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -484,6 +484,86 @@ struct ieee80211_fragment_cache {
#define STA_SLOW_THRESHOLD 6000 /* 6 Mbps */
/**
+ * struct link_sta_info - Link STA information
+ * All link specific sta info are stored here for reference. This can be
+ * a single entry for non-MLD STA or multiple entries for MLD STA
+ * @addr: Link MAC address - Can be same as MLD STA mac address and is always
+ * same for non-MLD STA. This is used as key for searching link STA
+ * @link_id: Link ID uniquely identifying the link STA. This is 0 for non-MLD
+ * and set to the corresponding vif LinkId for MLD STA
+ * @sta: Points to the STA info
+ * @gtk: group keys negotiated with this station, if any
+ * @tx_stats: TX statistics
+ * @tx_stats.packets: # of packets transmitted
+ * @tx_stats.bytes: # of bytes in all packets transmitted
+ * @tx_stats.last_rate: last TX rate
+ * @tx_stats.msdu: # of transmitted MSDUs per TID
+ * @rx_stats: RX statistics
+ * @rx_stats_avg: averaged RX statistics
+ * @rx_stats_avg.signal: averaged signal
+ * @rx_stats_avg.chain_signal: averaged per-chain signal
+ * @pcpu_rx_stats: per-CPU RX statistics, assigned only if the driver needs
+ * this (by advertising the USES_RSS hw flag)
+ * @status_stats: TX status statistics
+ * @status_stats.filtered: # of filtered frames
+ * @status_stats.retry_failed: # of frames that failed after retry
+ * @status_stats.retry_count: # of retries attempted
+ * @status_stats.lost_packets: # of lost packets
+ * @status_stats.last_pkt_time: timestamp of last ACKed packet
+ * @status_stats.msdu_retries: # of MSDU retries
+ * @status_stats.msdu_failed: # of failed MSDUs
+ * @status_stats.last_ack: last ack timestamp (jiffies)
+ * @status_stats.last_ack_signal: last ACK signal
+ * @status_stats.ack_signal_filled: last ACK signal validity
+ * @status_stats.avg_ack_signal: average ACK signal
+ * TODO Move other link params from sta_info as required for MLD operation
+ */
+struct link_sta_info {
+ u8 addr[ETH_ALEN];
+ u8 link_id;
+
+ /* TODO rhash head/node for finding link_sta based on addr */
+
+ struct sta_info *sta;
+ struct ieee80211_key __rcu *gtk[NUM_DEFAULT_KEYS +
+ NUM_DEFAULT_MGMT_KEYS +
+ NUM_DEFAULT_BEACON_KEYS];
+ struct ieee80211_sta_rx_stats __percpu *pcpu_rx_stats;
+
+ /* Updated from RX path only, no locking requirements */
+ struct ieee80211_sta_rx_stats rx_stats;
+ struct {
+ struct ewma_signal signal;
+ struct ewma_signal chain_signal[IEEE80211_MAX_CHAINS];
+ } rx_stats_avg;
+
+ /* Updated from TX status path only, no locking requirements */
+ struct {
+ unsigned long filtered;
+ unsigned long retry_failed, retry_count;
+ unsigned int lost_packets;
+ unsigned long last_pkt_time;
+ u64 msdu_retries[IEEE80211_NUM_TIDS + 1];
+ u64 msdu_failed[IEEE80211_NUM_TIDS + 1];
+ unsigned long last_ack;
+ s8 last_ack_signal;
+ bool ack_signal_filled;
+ struct ewma_avg_signal avg_ack_signal;
+ } status_stats;
+
+ /* Updated from TX path only, no locking requirements */
+ struct {
+ u64 packets[IEEE80211_NUM_ACS];
+ u64 bytes[IEEE80211_NUM_ACS];
+ struct ieee80211_tx_rate last_rate;
+ struct rate_info last_rate_info;
+ u64 msdu[IEEE80211_NUM_TIDS + 1];
+ } tx_stats;
+
+ enum ieee80211_sta_rx_bandwidth cur_max_bandwidth;
+};
+
+/**
* struct sta_info - STA information
*
* This structure collects information about a station that
@@ -498,7 +578,6 @@ struct ieee80211_fragment_cache {
* @sdata: virtual interface this station belongs to
* @ptk: peer keys negotiated with this station, if any
* @ptk_idx: last installed peer key index
- * @gtk: group keys negotiated with this station, if any
* @rate_ctrl: rate control algorithm reference
* @rate_ctrl_lock: spinlock used to protect rate control data
* (data inside the algorithm, so serializes calls there)
@@ -544,30 +623,19 @@ struct ieee80211_fragment_cache {
* @fast_rx: RX fastpath information
* @tdls_chandef: a TDLS peer can have a wider chandef that is compatible to
* the BSS one.
- * @tx_stats: TX statistics
- * @tx_stats.packets: # of packets transmitted
- * @tx_stats.bytes: # of bytes in all packets transmitted
- * @tx_stats.last_rate: last TX rate
- * @tx_stats.msdu: # of transmitted MSDUs per TID
- * @rx_stats: RX statistics
- * @rx_stats_avg: averaged RX statistics
- * @rx_stats_avg.signal: averaged signal
- * @rx_stats_avg.chain_signal: averaged per-chain signal
- * @pcpu_rx_stats: per-CPU RX statistics, assigned only if the driver needs
- * this (by advertising the USES_RSS hw flag)
- * @status_stats: TX status statistics
- * @status_stats.filtered: # of filtered frames
- * @status_stats.retry_failed: # of frames that failed after retry
- * @status_stats.retry_count: # of retries attempted
- * @status_stats.lost_packets: # of lost packets
- * @status_stats.last_pkt_time: timestamp of last ACKed packet
- * @status_stats.msdu_retries: # of MSDU retries
- * @status_stats.msdu_failed: # of failed MSDUs
- * @status_stats.last_ack: last ack timestamp (jiffies)
- * @status_stats.last_ack_signal: last ACK signal
- * @status_stats.ack_signal_filled: last ACK signal validity
- * @status_stats.avg_ack_signal: average ACK signal
* @frags: fragment cache
+ * @multi_link_sta: Identifies if this sta is a MLD STA or regular STA
+ * @deflink: This is the default link STA information, for non MLO STA all link
+ * specific STA information is accessed through @deflink or through
+ * link[0] which points to address of @deflink. For MLO Link STA
+ * the first added link STA will point to deflink.
+ * @link: reference to Link Sta entries. For Non MLO STA, except 1st link,
+ * i.e link[0] all links would be assigned to NULL by default and
+ * would access link information via @deflink or link[0]. For MLO
+ * STA, first link STA being added will point its link pointer to
+ * @deflink address and remaining would be allocated and the address
+ * would be assigned to link[link_id] where link_id is the id assigned
+ * by the AP.
*/
struct sta_info {
/* General information, mostly static */
@@ -577,9 +645,6 @@ struct sta_info {
u8 addr[ETH_ALEN];
struct ieee80211_local *local;
struct ieee80211_sub_if_data *sdata;
- struct ieee80211_key __rcu *gtk[NUM_DEFAULT_KEYS +
- NUM_DEFAULT_MGMT_KEYS +
- NUM_DEFAULT_BEACON_KEYS];
struct ieee80211_key __rcu *ptk[NUM_DEFAULT_KEYS];
u8 ptk_idx;
struct rate_control_ref *rate_ctrl;
@@ -589,7 +654,6 @@ struct sta_info {
struct ieee80211_fast_tx __rcu *fast_tx;
struct ieee80211_fast_rx __rcu *fast_rx;
- struct ieee80211_sta_rx_stats __percpu *pcpu_rx_stats;
#ifdef CONFIG_MAC80211_MESH
struct mesh_sta *mesh;
@@ -619,38 +683,9 @@ struct sta_info {
u64 assoc_at;
long last_connected;
- /* Updated from RX path only, no locking requirements */
- struct ieee80211_sta_rx_stats rx_stats;
- struct {
- struct ewma_signal signal;
- struct ewma_signal chain_signal[IEEE80211_MAX_CHAINS];
- } rx_stats_avg;
-
/* Plus 1 for non-QoS frames */
__le16 last_seq_ctrl[IEEE80211_NUM_TIDS + 1];
- /* Updated from TX status path only, no locking requirements */
- struct {
- unsigned long filtered;
- unsigned long retry_failed, retry_count;
- unsigned int lost_packets;
- unsigned long last_pkt_time;
- u64 msdu_retries[IEEE80211_NUM_TIDS + 1];
- u64 msdu_failed[IEEE80211_NUM_TIDS + 1];
- unsigned long last_ack;
- s8 last_ack_signal;
- bool ack_signal_filled;
- struct ewma_avg_signal avg_ack_signal;
- } status_stats;
-
- /* Updated from TX path only, no locking requirements */
- struct {
- u64 packets[IEEE80211_NUM_ACS];
- u64 bytes[IEEE80211_NUM_ACS];
- struct ieee80211_tx_rate last_rate;
- struct rate_info last_rate_info;
- u64 msdu[IEEE80211_NUM_TIDS + 1];
- } tx_stats;
u16 tid_seq[IEEE80211_QOS_CTL_TID_MASK + 1];
struct airtime_info airtime[IEEE80211_NUM_ACS];
@@ -664,8 +699,6 @@ struct sta_info {
struct dentry *debugfs_dir;
#endif
- enum ieee80211_sta_rx_bandwidth cur_max_bandwidth;
-
enum ieee80211_smps_mode known_smps_mode;
const struct ieee80211_cipher_scheme *cipher_scheme;
@@ -677,6 +710,10 @@ struct sta_info {
struct ieee80211_fragment_cache frags;
+ bool multi_link_sta;
+ struct link_sta_info deflink;
+ struct link_sta_info *link[MAX_STA_LINKS];
+
/* keep last! */
struct ieee80211_sta sta;
};
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index e81e8a5bb774..e69272139437 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -72,7 +72,7 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
info->flags |= IEEE80211_TX_INTFL_RETRANSMISSION;
info->flags &= ~IEEE80211_TX_TEMPORARY_FLAGS;
- sta->status_stats.filtered++;
+ sta->deflink.status_stats.filtered++;
/*
* Clear more-data bit on filtered frames, it might be set
@@ -247,15 +247,19 @@ static void ieee80211_set_bar_pending(struct sta_info *sta, u8 tid, u16 ssn)
static int ieee80211_tx_radiotap_len(struct ieee80211_tx_info *info,
struct ieee80211_tx_status *status)
{
+ struct ieee80211_rate_status *status_rate = NULL;
int len = sizeof(struct ieee80211_radiotap_header);
+ if (status && status->n_rates)
+ status_rate = &status->rates[status->n_rates - 1];
+
/* IEEE80211_RADIOTAP_RATE rate */
- if (status && status->rate && !(status->rate->flags &
- (RATE_INFO_FLAGS_MCS |
- RATE_INFO_FLAGS_DMG |
- RATE_INFO_FLAGS_EDMG |
- RATE_INFO_FLAGS_VHT_MCS |
- RATE_INFO_FLAGS_HE_MCS)))
+ if (status_rate && !(status_rate->rate_idx.flags &
+ (RATE_INFO_FLAGS_MCS |
+ RATE_INFO_FLAGS_DMG |
+ RATE_INFO_FLAGS_EDMG |
+ RATE_INFO_FLAGS_VHT_MCS |
+ RATE_INFO_FLAGS_HE_MCS)))
len += 2;
else if (info->status.rates[0].idx >= 0 &&
!(info->status.rates[0].flags &
@@ -270,12 +274,12 @@ static int ieee80211_tx_radiotap_len(struct ieee80211_tx_info *info,
/* IEEE80211_RADIOTAP_MCS
* IEEE80211_RADIOTAP_VHT */
- if (status && status->rate) {
- if (status->rate->flags & RATE_INFO_FLAGS_MCS)
+ if (status_rate) {
+ if (status_rate->rate_idx.flags & RATE_INFO_FLAGS_MCS)
len += 3;
- else if (status->rate->flags & RATE_INFO_FLAGS_VHT_MCS)
+ else if (status_rate->rate_idx.flags & RATE_INFO_FLAGS_VHT_MCS)
len = ALIGN(len, 2) + 12;
- else if (status->rate->flags & RATE_INFO_FLAGS_HE_MCS)
+ else if (status_rate->rate_idx.flags & RATE_INFO_FLAGS_HE_MCS)
len = ALIGN(len, 2) + 12;
} else if (info->status.rates[0].idx >= 0) {
if (info->status.rates[0].flags & IEEE80211_TX_RC_MCS)
@@ -297,10 +301,14 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
struct ieee80211_radiotap_header *rthdr;
+ struct ieee80211_rate_status *status_rate = NULL;
unsigned char *pos;
u16 legacy_rate = 0;
u16 txflags;
+ if (status && status->n_rates)
+ status_rate = &status->rates[status->n_rates - 1];
+
rthdr = skb_push(skb, rtap_len);
memset(rthdr, 0, rtap_len);
@@ -318,13 +326,14 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
/* IEEE80211_RADIOTAP_RATE */
- if (status && status->rate) {
- if (!(status->rate->flags & (RATE_INFO_FLAGS_MCS |
- RATE_INFO_FLAGS_DMG |
- RATE_INFO_FLAGS_EDMG |
- RATE_INFO_FLAGS_VHT_MCS |
- RATE_INFO_FLAGS_HE_MCS)))
- legacy_rate = status->rate->legacy;
+ if (status_rate) {
+ if (!(status_rate->rate_idx.flags &
+ (RATE_INFO_FLAGS_MCS |
+ RATE_INFO_FLAGS_DMG |
+ RATE_INFO_FLAGS_EDMG |
+ RATE_INFO_FLAGS_VHT_MCS |
+ RATE_INFO_FLAGS_HE_MCS)))
+ legacy_rate = status_rate->rate_idx.legacy;
} else if (info->status.rates[0].idx >= 0 &&
!(info->status.rates[0].flags & (IEEE80211_TX_RC_MCS |
IEEE80211_TX_RC_VHT_MCS)))
@@ -357,20 +366,21 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
*pos = retry_count;
pos++;
- if (status && status->rate &&
- (status->rate->flags & RATE_INFO_FLAGS_MCS)) {
+ if (status_rate && (status_rate->rate_idx.flags & RATE_INFO_FLAGS_MCS))
+ {
rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_MCS));
pos[0] = IEEE80211_RADIOTAP_MCS_HAVE_MCS |
IEEE80211_RADIOTAP_MCS_HAVE_GI |
IEEE80211_RADIOTAP_MCS_HAVE_BW;
- if (status->rate->flags & RATE_INFO_FLAGS_SHORT_GI)
+ if (status_rate->rate_idx.flags & RATE_INFO_FLAGS_SHORT_GI)
pos[1] |= IEEE80211_RADIOTAP_MCS_SGI;
- if (status->rate->bw == RATE_INFO_BW_40)
+ if (status_rate->rate_idx.bw == RATE_INFO_BW_40)
pos[1] |= IEEE80211_RADIOTAP_MCS_BW_40;
- pos[2] = status->rate->mcs;
+ pos[2] = status_rate->rate_idx.mcs;
pos += 3;
- } else if (status && status->rate &&
- (status->rate->flags & RATE_INFO_FLAGS_VHT_MCS)) {
+ } else if (status_rate && (status_rate->rate_idx.flags &
+ RATE_INFO_FLAGS_VHT_MCS))
+ {
u16 known = local->hw.radiotap_vht_details &
(IEEE80211_RADIOTAP_VHT_KNOWN_GI |
IEEE80211_RADIOTAP_VHT_KNOWN_BANDWIDTH);
@@ -385,12 +395,12 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
pos += 2;
/* u8 flags - IEEE80211_RADIOTAP_VHT_FLAG_* */
- if (status->rate->flags & RATE_INFO_FLAGS_SHORT_GI)
+ if (status_rate->rate_idx.flags & RATE_INFO_FLAGS_SHORT_GI)
*pos |= IEEE80211_RADIOTAP_VHT_FLAG_SGI;
pos++;
/* u8 bandwidth */
- switch (status->rate->bw) {
+ switch (status_rate->rate_idx.bw) {
case RATE_INFO_BW_160:
*pos = 11;
break;
@@ -407,7 +417,8 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
pos++;
/* u8 mcs_nss[4] */
- *pos = (status->rate->mcs << 4) | status->rate->nss;
+ *pos = (status_rate->rate_idx.mcs << 4) |
+ status_rate->rate_idx.nss;
pos += 4;
/* u8 coding */
@@ -416,8 +427,9 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
pos++;
/* u16 partial_aid */
pos += 2;
- } else if (status && status->rate &&
- (status->rate->flags & RATE_INFO_FLAGS_HE_MCS)) {
+ } else if (status_rate && (status_rate->rate_idx.flags &
+ RATE_INFO_FLAGS_HE_MCS))
+ {
struct ieee80211_radiotap_he *he;
rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_HE));
@@ -435,7 +447,7 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
#define HE_PREP(f, val) le16_encode_bits(val, IEEE80211_RADIOTAP_HE_##f)
- he->data6 |= HE_PREP(DATA6_NSTS, status->rate->nss);
+ he->data6 |= HE_PREP(DATA6_NSTS, status_rate->rate_idx.nss);
#define CHECK_GI(s) \
BUILD_BUG_ON(IEEE80211_RADIOTAP_HE_DATA5_GI_##s != \
@@ -445,12 +457,12 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
CHECK_GI(1_6);
CHECK_GI(3_2);
- he->data3 |= HE_PREP(DATA3_DATA_MCS, status->rate->mcs);
- he->data3 |= HE_PREP(DATA3_DATA_DCM, status->rate->he_dcm);
+ he->data3 |= HE_PREP(DATA3_DATA_MCS, status_rate->rate_idx.mcs);
+ he->data3 |= HE_PREP(DATA3_DATA_DCM, status_rate->rate_idx.he_dcm);
- he->data5 |= HE_PREP(DATA5_GI, status->rate->he_gi);
+ he->data5 |= HE_PREP(DATA5_GI, status_rate->rate_idx.he_gi);
- switch (status->rate->bw) {
+ switch (status_rate->rate_idx.bw) {
case RATE_INFO_BW_20:
he->data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC,
IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_20MHZ);
@@ -481,16 +493,16 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
CHECK_RU_ALLOC(2x996);
he->data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC,
- status->rate->he_ru_alloc + 4);
+ status_rate->rate_idx.he_ru_alloc + 4);
break;
default:
- WARN_ONCE(1, "Invalid SU BW %d\n", status->rate->bw);
+ WARN_ONCE(1, "Invalid SU BW %d\n", status_rate->rate_idx.bw);
}
pos += sizeof(struct ieee80211_radiotap_he);
}
- if ((status && status->rate) || info->status.rates[0].idx < 0)
+ if (status_rate || info->status.rates[0].idx < 0)
return;
/* IEEE80211_RADIOTAP_MCS
@@ -776,7 +788,7 @@ static void ieee80211_lost_packet(struct sta_info *sta,
!(info->flags & IEEE80211_TX_STAT_AMPDU))
return;
- sta->status_stats.lost_packets++;
+ sta->deflink.status_stats.lost_packets++;
if (sta->sta.tdls) {
pkt_time = STA_LOST_TDLS_PKT_TIME;
pkt_thr = STA_LOST_PKT_THRESHOLD;
@@ -789,13 +801,14 @@ static void ieee80211_lost_packet(struct sta_info *sta,
* mechanism.
* For non-TDLS, use STA_LOST_PKT_THRESHOLD and STA_LOST_PKT_TIME
*/
- if (sta->status_stats.lost_packets < pkt_thr ||
- !time_after(jiffies, sta->status_stats.last_pkt_time + pkt_time))
+ if (sta->deflink.status_stats.lost_packets < pkt_thr ||
+ !time_after(jiffies, sta->deflink.status_stats.last_pkt_time + pkt_time))
return;
cfg80211_cqm_pktloss_notify(sta->sdata->dev, sta->sta.addr,
- sta->status_stats.lost_packets, GFP_ATOMIC);
- sta->status_stats.lost_packets = 0;
+ sta->deflink.status_stats.lost_packets,
+ GFP_ATOMIC);
+ sta->deflink.status_stats.lost_packets = 0;
}
static int ieee80211_tx_get_rates(struct ieee80211_hw *hw,
@@ -930,7 +943,7 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw,
if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL) &&
(ieee80211_is_data(hdr->frame_control)) &&
(rates_idx != -1))
- sta->tx_stats.last_rate =
+ sta->deflink.tx_stats.last_rate =
info->status.rates[rates_idx];
if ((info->flags & IEEE80211_TX_STAT_AMPDU_NO_BACK) &&
@@ -976,9 +989,9 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw,
return;
} else if (ieee80211_is_data_present(fc)) {
if (!acked && !noack_success)
- sta->status_stats.msdu_failed[tid]++;
+ sta->deflink.status_stats.msdu_failed[tid]++;
- sta->status_stats.msdu_retries[tid] +=
+ sta->deflink.status_stats.msdu_retries[tid] +=
retry_count;
}
@@ -1110,8 +1123,9 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw,
if (pubsta) {
sta = container_of(pubsta, struct sta_info, sta);
- if (status->rate)
- sta->tx_stats.last_rate_info = *status->rate;
+ if (status->n_rates)
+ sta->deflink.tx_stats.last_rate_info =
+ status->rates[status->n_rates - 1].rate_idx;
}
if (skb && (tx_time_est =
@@ -1142,8 +1156,8 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw,
struct ieee80211_sub_if_data *sdata = sta->sdata;
if (!acked && !noack_success)
- sta->status_stats.retry_failed++;
- sta->status_stats.retry_count += retry_count;
+ sta->deflink.status_stats.retry_failed++;
+ sta->deflink.status_stats.retry_count += retry_count;
if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
if (sdata->vif.type == NL80211_IFTYPE_STATION &&
@@ -1152,13 +1166,13 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw,
acked, info->status.tx_time);
if (acked) {
- sta->status_stats.last_ack = jiffies;
+ sta->deflink.status_stats.last_ack = jiffies;
- if (sta->status_stats.lost_packets)
- sta->status_stats.lost_packets = 0;
+ if (sta->deflink.status_stats.lost_packets)
+ sta->deflink.status_stats.lost_packets = 0;
/* Track when last packet was ACKed */
- sta->status_stats.last_pkt_time = jiffies;
+ sta->deflink.status_stats.last_pkt_time = jiffies;
/* Reset connection monitor */
if (sdata->vif.type == NL80211_IFTYPE_STATION &&
@@ -1166,10 +1180,10 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw,
sdata->u.mgd.probe_send_count = 0;
if (ack_signal_valid) {
- sta->status_stats.last_ack_signal =
+ sta->deflink.status_stats.last_ack_signal =
(s8)info->status.ack_signal;
- sta->status_stats.ack_signal_filled = true;
- ewma_avg_signal_add(&sta->status_stats.avg_ack_signal,
+ sta->deflink.status_stats.ack_signal_filled = true;
+ ewma_avg_signal_add(&sta->deflink.status_stats.avg_ack_signal,
-info->status.ack_signal);
}
} else if (test_sta_flag(sta, WLAN_STA_PS_STA)) {
@@ -1235,7 +1249,7 @@ void ieee80211_tx_rate_update(struct ieee80211_hw *hw,
rate_control_tx_status(local, sband, &status);
if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL))
- sta->tx_stats.last_rate = info->status.rates[0];
+ sta->deflink.tx_stats.last_rate = info->status.rates[0];
}
EXPORT_SYMBOL(ieee80211_tx_rate_update);
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index 137be9ec94af..4e2d22e47429 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -459,9 +459,9 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata,
pos = skb_put(skb, sizeof(struct ieee80211_ht_cap) + 2);
ieee80211_ie_build_ht_cap(pos, &ht_cap, ht_cap.cap);
} else if (action_code == WLAN_TDLS_SETUP_RESPONSE &&
- ht_cap.ht_supported && sta->sta.ht_cap.ht_supported) {
+ ht_cap.ht_supported && sta->sta.deflink.ht_cap.ht_supported) {
/* the peer caps are already intersected with our own */
- memcpy(&ht_cap, &sta->sta.ht_cap, sizeof(ht_cap));
+ memcpy(&ht_cap, &sta->sta.deflink.ht_cap, sizeof(ht_cap));
pos = skb_put(skb, sizeof(struct ieee80211_ht_cap) + 2);
ieee80211_ie_build_ht_cap(pos, &ht_cap, ht_cap.cap);
@@ -510,9 +510,9 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata,
pos = skb_put(skb, sizeof(struct ieee80211_vht_cap) + 2);
ieee80211_ie_build_vht_cap(pos, &vht_cap, vht_cap.cap);
} else if (action_code == WLAN_TDLS_SETUP_RESPONSE &&
- vht_cap.vht_supported && sta->sta.vht_cap.vht_supported) {
+ vht_cap.vht_supported && sta->sta.deflink.vht_cap.vht_supported) {
/* the peer caps are already intersected with our own */
- memcpy(&vht_cap, &sta->sta.vht_cap, sizeof(vht_cap));
+ memcpy(&vht_cap, &sta->sta.deflink.vht_cap, sizeof(vht_cap));
/* the AID is present only when VHT is implemented */
ieee80211_tdls_add_aid(sdata, skb);
@@ -603,13 +603,13 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata,
* if HT support is only added in TDLS, we need an HT-operation IE.
* add the IE as required by IEEE802.11-2012 9.23.3.2.
*/
- if (!ap_sta->sta.ht_cap.ht_supported && sta->sta.ht_cap.ht_supported) {
+ if (!ap_sta->sta.deflink.ht_cap.ht_supported && sta->sta.deflink.ht_cap.ht_supported) {
u16 prot = IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED |
IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT |
IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT;
pos = skb_put(skb, 2 + sizeof(struct ieee80211_ht_operation));
- ieee80211_ie_build_ht_oper(pos, &sta->sta.ht_cap,
+ ieee80211_ie_build_ht_oper(pos, &sta->sta.deflink.ht_cap,
&sdata->vif.bss_conf.chandef, prot,
true);
}
@@ -618,7 +618,7 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata,
/* only include VHT-operation if not on the 2.4GHz band */
if (sband->band != NL80211_BAND_2GHZ &&
- sta->sta.vht_cap.vht_supported) {
+ sta->sta.deflink.vht_cap.vht_supported) {
/*
* if both peers support WIDER_BW, we can expand the chandef to
* a wider compatible one, up to 80MHz
@@ -627,7 +627,7 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata,
ieee80211_tdls_chandef_vht_upgrade(sdata, sta);
pos = skb_put(skb, 2 + sizeof(struct ieee80211_vht_operation));
- ieee80211_ie_build_vht_oper(pos, &sta->sta.vht_cap,
+ ieee80211_ie_build_vht_oper(pos, &sta->sta.deflink.vht_cap,
&sta->tdls_chandef);
}
@@ -1269,8 +1269,8 @@ static void iee80211_tdls_recalc_chanctx(struct ieee80211_sub_if_data *sdata,
bw = ieee80211_chan_width_to_rx_bw(conf->def.width);
bw = min(bw, ieee80211_sta_cap_rx_bw(sta));
- if (bw != sta->sta.bandwidth) {
- sta->sta.bandwidth = bw;
+ if (bw != sta->sta.deflink.bandwidth) {
+ sta->sta.deflink.bandwidth = bw;
rate_control_rate_update(local, sband, sta,
IEEE80211_RC_BW_CHANGED);
/*
@@ -1296,7 +1296,7 @@ static int iee80211_tdls_have_ht_peers(struct ieee80211_sub_if_data *sdata)
if (!sta->sta.tdls || sta->sdata != sdata || !sta->uploaded ||
!test_sta_flag(sta, WLAN_STA_AUTHORIZED) ||
!test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH) ||
- !sta->sta.ht_cap.ht_supported)
+ !sta->sta.deflink.ht_cap.ht_supported)
continue;
result = true;
break;
@@ -1321,7 +1321,7 @@ iee80211_tdls_recalc_ht_protection(struct ieee80211_sub_if_data *sdata,
if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT))
return;
- tdls_ht = (sta && sta->sta.ht_cap.ht_supported) ||
+ tdls_ht = (sta && sta->sta.deflink.ht_cap.ht_supported) ||
iee80211_tdls_have_ht_peers(sdata);
opmode = sdata->vif.bss_conf.ht_operation_mode;
@@ -1900,7 +1900,7 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata,
}
/* peer should have known better */
- if (!sta->sta.ht_cap.ht_supported && elems->sec_chan_offs &&
+ if (!sta->sta.deflink.ht_cap.ht_supported && elems->sec_chan_offs &&
elems->sec_chan_offs->sec_chan_offs) {
tdls_dbg(sdata, "TDLS chan switch - wide chan unsupported\n");
ret = -ENOTSUPP;
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index d91498f77796..743adfbb9b15 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -860,8 +860,8 @@ TRACE_EVENT(drv_sta_set_txpwr,
LOCAL_ASSIGN;
VIF_ASSIGN;
STA_ASSIGN;
- __entry->txpwr = sta->txpwr.power;
- __entry->type = sta->txpwr.type;
+ __entry->txpwr = sta->deflink.txpwr.power;
+ __entry->type = sta->deflink.txpwr.type;
),
TP_printk(
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index b6b20f38de0e..0e4efc08c762 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -768,9 +768,9 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
if (txrc.reported_rate.idx < 0) {
txrc.reported_rate = tx->rate;
if (tx->sta && ieee80211_is_tx_data(tx->skb))
- tx->sta->tx_stats.last_rate = txrc.reported_rate;
+ tx->sta->deflink.tx_stats.last_rate = txrc.reported_rate;
} else if (tx->sta)
- tx->sta->tx_stats.last_rate = txrc.reported_rate;
+ tx->sta->deflink.tx_stats.last_rate = txrc.reported_rate;
if (ratetbl)
return TX_CONTINUE;
@@ -837,7 +837,7 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx)
hdr->seq_ctrl = cpu_to_le16(tx->sdata->sequence_number);
tx->sdata->sequence_number += 0x10;
if (tx->sta)
- tx->sta->tx_stats.msdu[IEEE80211_NUM_TIDS]++;
+ tx->sta->deflink.tx_stats.msdu[IEEE80211_NUM_TIDS]++;
return TX_CONTINUE;
}
@@ -851,7 +851,7 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx)
/* include per-STA, per-TID sequence counter */
tid = ieee80211_get_tid(hdr);
- tx->sta->tx_stats.msdu[tid]++;
+ tx->sta->deflink.tx_stats.msdu[tid]++;
hdr->seq_ctrl = ieee80211_tx_next_seq(tx->sta, tid);
@@ -1004,10 +1004,10 @@ ieee80211_tx_h_stats(struct ieee80211_tx_data *tx)
skb_queue_walk(&tx->skbs, skb) {
ac = skb_get_queue_mapping(skb);
- tx->sta->tx_stats.bytes[ac] += skb->len;
+ tx->sta->deflink.tx_stats.bytes[ac] += skb->len;
}
if (ac >= 0)
- tx->sta->tx_stats.packets[ac]++;
+ tx->sta->deflink.tx_stats.packets[ac]++;
return TX_CONTINUE;
}
@@ -1159,7 +1159,7 @@ ieee80211_aggr_check(struct ieee80211_sub_if_data *sdata,
if (!ref || !(ref->ops->capa & RATE_CTRL_CAPA_AMPDU_TRIGGER))
return;
- if (!sta || !sta->sta.ht_cap.ht_supported ||
+ if (!sta || !sta->sta.deflink.ht_cap.ht_supported ||
!sta->sta.wme || skb_get_queue_mapping(skb) == IEEE80211_AC_VO ||
skb->protocol == sdata->control_port_protocol)
return;
@@ -3150,8 +3150,6 @@ void ieee80211_check_fast_xmit(struct sta_info *sta)
fast_tx = kmemdup(&build, sizeof(build), GFP_ATOMIC);
/* if the kmemdup fails, continue w/o fast_tx */
- if (!fast_tx)
- goto out;
out:
/* we might have raced against another call to this function */
@@ -3462,18 +3460,18 @@ ieee80211_xmit_fast_finish(struct ieee80211_sub_if_data *sdata,
}
if (skb_shinfo(skb)->gso_size)
- sta->tx_stats.msdu[tid] +=
+ sta->deflink.tx_stats.msdu[tid] +=
DIV_ROUND_UP(skb->len, skb_shinfo(skb)->gso_size);
else
- sta->tx_stats.msdu[tid]++;
+ sta->deflink.tx_stats.msdu[tid]++;
info->hw_queue = sdata->vif.hw_queue[skb_get_queue_mapping(skb)];
/* statistics normally done by ieee80211_tx_h_stats (but that
* has to consider fragmentation, so is more complex)
*/
- sta->tx_stats.bytes[skb_get_queue_mapping(skb)] += skb->len;
- sta->tx_stats.packets[skb_get_queue_mapping(skb)]++;
+ sta->deflink.tx_stats.bytes[skb_get_queue_mapping(skb)] += skb->len;
+ sta->deflink.tx_stats.packets[skb_get_queue_mapping(skb)]++;
if (pn_offs) {
u64 pn;
@@ -4481,8 +4479,8 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata,
dev_sw_netstats_tx_add(dev, 1, skb->len);
- sta->tx_stats.bytes[skb_get_queue_mapping(skb)] += skb->len;
- sta->tx_stats.packets[skb_get_queue_mapping(skb)]++;
+ sta->deflink.tx_stats.bytes[skb_get_queue_mapping(skb)] += skb->len;
+ sta->deflink.tx_stats.packets[skb_get_queue_mapping(skb)]++;
if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
sdata = container_of(sdata->bss,
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 682a164f795a..1e26b5235add 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -2854,46 +2854,6 @@ size_t ieee80211_ie_split_vendor(const u8 *ies, size_t ielen, size_t offset)
return pos;
}
-static void _ieee80211_enable_rssi_reports(struct ieee80211_sub_if_data *sdata,
- int rssi_min_thold,
- int rssi_max_thold)
-{
- trace_api_enable_rssi_reports(sdata, rssi_min_thold, rssi_max_thold);
-
- if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION))
- return;
-
- /*
- * Scale up threshold values before storing it, as the RSSI averaging
- * algorithm uses a scaled up value as well. Change this scaling
- * factor if the RSSI averaging algorithm changes.
- */
- sdata->u.mgd.rssi_min_thold = rssi_min_thold*16;
- sdata->u.mgd.rssi_max_thold = rssi_max_thold*16;
-}
-
-void ieee80211_enable_rssi_reports(struct ieee80211_vif *vif,
- int rssi_min_thold,
- int rssi_max_thold)
-{
- struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
-
- WARN_ON(rssi_min_thold == rssi_max_thold ||
- rssi_min_thold > rssi_max_thold);
-
- _ieee80211_enable_rssi_reports(sdata, rssi_min_thold,
- rssi_max_thold);
-}
-EXPORT_SYMBOL(ieee80211_enable_rssi_reports);
-
-void ieee80211_disable_rssi_reports(struct ieee80211_vif *vif)
-{
- struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
-
- _ieee80211_enable_rssi_reports(sdata, 0, 0);
-}
-EXPORT_SYMBOL(ieee80211_disable_rssi_reports);
-
u8 *ieee80211_ie_build_ht_cap(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap,
u16 cap)
{
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index 8f16aa9c725d..ff26e0c4787b 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -118,14 +118,14 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
const struct ieee80211_vht_cap *vht_cap_ie,
struct sta_info *sta)
{
- struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.vht_cap;
+ struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.deflink.vht_cap;
struct ieee80211_sta_vht_cap own_cap;
u32 cap_info, i;
bool have_80mhz;
memset(vht_cap, 0, sizeof(*vht_cap));
- if (!sta->sta.ht_cap.ht_supported)
+ if (!sta->sta.deflink.ht_cap.ht_supported)
return;
if (!vht_cap_ie || !sband->vht_cap.vht_supported)
@@ -295,10 +295,10 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
switch (vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) {
case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ:
case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ:
- sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_160;
+ sta->deflink.cur_max_bandwidth = IEEE80211_STA_RX_BW_160;
break;
default:
- sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_80;
+ sta->deflink.cur_max_bandwidth = IEEE80211_STA_RX_BW_80;
if (!(vht_cap->vht_mcs.tx_highest &
cpu_to_le16(IEEE80211_VHT_EXT_NSS_BW_CAPABLE)))
@@ -310,10 +310,10 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
* above) between 160 and 80+80 yet.
*/
if (cap_info & IEEE80211_VHT_CAP_EXT_NSS_BW_MASK)
- sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_160;
+ sta->deflink.cur_max_bandwidth = IEEE80211_STA_RX_BW_160;
}
- sta->sta.bandwidth = ieee80211_sta_cur_vht_bw(sta);
+ sta->sta.deflink.bandwidth = ieee80211_sta_cur_vht_bw(sta);
switch (vht_cap->cap & IEEE80211_VHT_CAP_MAX_MPDU_MASK) {
case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454:
@@ -332,9 +332,9 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
/* FIXME: move this to some better location - parses HE/EHT now */
enum ieee80211_sta_rx_bandwidth ieee80211_sta_cap_rx_bw(struct sta_info *sta)
{
- struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.vht_cap;
- struct ieee80211_sta_he_cap *he_cap = &sta->sta.he_cap;
- struct ieee80211_sta_eht_cap *eht_cap = &sta->sta.eht_cap;
+ struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.deflink.vht_cap;
+ struct ieee80211_sta_he_cap *he_cap = &sta->sta.deflink.he_cap;
+ struct ieee80211_sta_eht_cap *eht_cap = &sta->sta.deflink.eht_cap;
u32 cap_width;
if (he_cap->has_he) {
@@ -369,7 +369,7 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cap_rx_bw(struct sta_info *sta)
}
if (!vht_cap->vht_supported)
- return sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ?
+ return sta->sta.deflink.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ?
IEEE80211_STA_RX_BW_40 :
IEEE80211_STA_RX_BW_20;
@@ -392,14 +392,14 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cap_rx_bw(struct sta_info *sta)
enum nl80211_chan_width ieee80211_sta_cap_chan_bw(struct sta_info *sta)
{
- struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.vht_cap;
+ struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.deflink.vht_cap;
u32 cap_width;
if (!vht_cap->vht_supported) {
- if (!sta->sta.ht_cap.ht_supported)
+ if (!sta->sta.deflink.ht_cap.ht_supported)
return NL80211_CHAN_WIDTH_20_NOHT;
- return sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ?
+ return sta->sta.deflink.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ?
NL80211_CHAN_WIDTH_40 : NL80211_CHAN_WIDTH_20;
}
@@ -416,13 +416,13 @@ enum nl80211_chan_width ieee80211_sta_cap_chan_bw(struct sta_info *sta)
enum nl80211_chan_width
ieee80211_sta_rx_bw_to_chan_width(struct sta_info *sta)
{
- enum ieee80211_sta_rx_bandwidth cur_bw = sta->sta.bandwidth;
- struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.vht_cap;
+ enum ieee80211_sta_rx_bandwidth cur_bw = sta->sta.deflink.bandwidth;
+ struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.deflink.vht_cap;
u32 cap_width;
switch (cur_bw) {
case IEEE80211_STA_RX_BW_20:
- if (!sta->sta.ht_cap.ht_supported)
+ if (!sta->sta.deflink.ht_cap.ht_supported)
return NL80211_CHAN_WIDTH_20_NOHT;
else
return NL80211_CHAN_WIDTH_20;
@@ -473,7 +473,7 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta)
enum nl80211_chan_width bss_width = sdata->vif.bss_conf.chandef.width;
bw = ieee80211_sta_cap_rx_bw(sta);
- bw = min(bw, sta->cur_max_bandwidth);
+ bw = min(bw, sta->deflink.cur_max_bandwidth);
/* Don't consider AP's bandwidth for TDLS peers, section 11.23.1 of
* IEEE80211-2016 specification makes higher bandwidth operation
@@ -501,12 +501,12 @@ void ieee80211_sta_set_rx_nss(struct sta_info *sta)
bool support_160;
/* if we received a notification already don't overwrite it */
- if (sta->sta.rx_nss)
+ if (sta->sta.deflink.rx_nss)
return;
- if (sta->sta.eht_cap.has_eht) {
+ if (sta->sta.deflink.eht_cap.has_eht) {
int i;
- const u8 *rx_nss_mcs = (void *)&sta->sta.eht_cap.eht_mcs_nss_supp;
+ const u8 *rx_nss_mcs = (void *)&sta->sta.deflink.eht_cap.eht_mcs_nss_supp;
/* get the max nss for EHT over all possible bandwidths and mcs */
for (i = 0; i < sizeof(struct ieee80211_eht_mcs_nss_supp); i++)
@@ -515,10 +515,10 @@ void ieee80211_sta_set_rx_nss(struct sta_info *sta)
IEEE80211_EHT_MCS_NSS_RX));
}
- if (sta->sta.he_cap.has_he) {
+ if (sta->sta.deflink.he_cap.has_he) {
int i;
u8 rx_mcs_80 = 0, rx_mcs_160 = 0;
- const struct ieee80211_sta_he_cap *he_cap = &sta->sta.he_cap;
+ const struct ieee80211_sta_he_cap *he_cap = &sta->sta.deflink.he_cap;
u16 mcs_160_map =
le16_to_cpu(he_cap->he_mcs_nss_supp.rx_mcs_160);
u16 mcs_80_map = le16_to_cpu(he_cap->he_mcs_nss_supp.rx_mcs_80);
@@ -549,23 +549,23 @@ void ieee80211_sta_set_rx_nss(struct sta_info *sta)
he_rx_nss = rx_mcs_80;
}
- if (sta->sta.ht_cap.ht_supported) {
- if (sta->sta.ht_cap.mcs.rx_mask[0])
+ if (sta->sta.deflink.ht_cap.ht_supported) {
+ if (sta->sta.deflink.ht_cap.mcs.rx_mask[0])
ht_rx_nss++;
- if (sta->sta.ht_cap.mcs.rx_mask[1])
+ if (sta->sta.deflink.ht_cap.mcs.rx_mask[1])
ht_rx_nss++;
- if (sta->sta.ht_cap.mcs.rx_mask[2])
+ if (sta->sta.deflink.ht_cap.mcs.rx_mask[2])
ht_rx_nss++;
- if (sta->sta.ht_cap.mcs.rx_mask[3])
+ if (sta->sta.deflink.ht_cap.mcs.rx_mask[3])
ht_rx_nss++;
/* FIXME: consider rx_highest? */
}
- if (sta->sta.vht_cap.vht_supported) {
+ if (sta->sta.deflink.vht_cap.vht_supported) {
int i;
u16 rx_mcs_map;
- rx_mcs_map = le16_to_cpu(sta->sta.vht_cap.vht_mcs.rx_mcs_map);
+ rx_mcs_map = le16_to_cpu(sta->sta.deflink.vht_cap.vht_mcs.rx_mcs_map);
for (i = 7; i >= 0; i--) {
u8 mcs = (rx_mcs_map >> (2 * i)) & 3;
@@ -581,7 +581,7 @@ void ieee80211_sta_set_rx_nss(struct sta_info *sta)
rx_nss = max(vht_rx_nss, ht_rx_nss);
rx_nss = max(he_rx_nss, rx_nss);
rx_nss = max(eht_rx_nss, rx_nss);
- sta->sta.rx_nss = max_t(u8, 1, rx_nss);
+ sta->sta.deflink.rx_nss = max_t(u8, 1, rx_nss);
}
u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
@@ -601,8 +601,8 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
nss >>= IEEE80211_OPMODE_NOTIF_RX_NSS_SHIFT;
nss += 1;
- if (sta->sta.rx_nss != nss) {
- sta->sta.rx_nss = nss;
+ if (sta->sta.deflink.rx_nss != nss) {
+ sta->sta.deflink.rx_nss = nss;
sta_opmode.rx_nss = nss;
changed |= IEEE80211_RC_NSS_CHANGED;
sta_opmode.changed |= STA_OPMODE_N_SS_CHANGED;
@@ -611,27 +611,27 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
switch (opmode & IEEE80211_OPMODE_NOTIF_CHANWIDTH_MASK) {
case IEEE80211_OPMODE_NOTIF_CHANWIDTH_20MHZ:
/* ignore IEEE80211_OPMODE_NOTIF_BW_160_80P80 must not be set */
- sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_20;
+ sta->deflink.cur_max_bandwidth = IEEE80211_STA_RX_BW_20;
break;
case IEEE80211_OPMODE_NOTIF_CHANWIDTH_40MHZ:
/* ignore IEEE80211_OPMODE_NOTIF_BW_160_80P80 must not be set */
- sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_40;
+ sta->deflink.cur_max_bandwidth = IEEE80211_STA_RX_BW_40;
break;
case IEEE80211_OPMODE_NOTIF_CHANWIDTH_80MHZ:
if (opmode & IEEE80211_OPMODE_NOTIF_BW_160_80P80)
- sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_160;
+ sta->deflink.cur_max_bandwidth = IEEE80211_STA_RX_BW_160;
else
- sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_80;
+ sta->deflink.cur_max_bandwidth = IEEE80211_STA_RX_BW_80;
break;
case IEEE80211_OPMODE_NOTIF_CHANWIDTH_160MHZ:
/* legacy only, no longer used by newer spec */
- sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_160;
+ sta->deflink.cur_max_bandwidth = IEEE80211_STA_RX_BW_160;
break;
}
new_bw = ieee80211_sta_cur_vht_bw(sta);
- if (new_bw != sta->sta.bandwidth) {
- sta->sta.bandwidth = new_bw;
+ if (new_bw != sta->sta.deflink.bandwidth) {
+ sta->sta.deflink.bandwidth = new_bw;
sta_opmode.bw = ieee80211_sta_rx_bw_to_chan_width(sta);
changed |= IEEE80211_RC_BW_CHANGED;
sta_opmode.changed |= STA_OPMODE_MAX_BW_CHANGED;
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 7ed0d268aff2..5fd8a3e8b5b4 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -311,19 +311,21 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx)
return RX_CONTINUE;
}
-
-static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *b_0, u8 *aad)
+/*
+ * Calculate AAD for CCMP/GCMP, returning qos_tid since we
+ * need that in CCMP also for b_0.
+ */
+static u8 ccmp_gcmp_aad(struct sk_buff *skb, u8 *aad)
{
+ struct ieee80211_hdr *hdr = (void *)skb->data;
__le16 mask_fc;
int a4_included, mgmt;
u8 qos_tid;
- u16 len_a;
- unsigned int hdrlen;
- struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+ u16 len_a = 22;
/*
* Mask FC: zero subtype b4 b5 b6 (if not mgmt)
- * Retry, PwrMgt, MoreData; set Protected
+ * Retry, PwrMgt, MoreData, Order (if Qos Data); set Protected
*/
mgmt = ieee80211_is_mgmt(hdr->frame_control);
mask_fc = hdr->frame_control;
@@ -333,30 +335,17 @@ static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *b_0, u8 *aad)
mask_fc &= ~cpu_to_le16(0x0070);
mask_fc |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
- hdrlen = ieee80211_hdrlen(hdr->frame_control);
- len_a = hdrlen - 2;
a4_included = ieee80211_has_a4(hdr->frame_control);
+ if (a4_included)
+ len_a += 6;
- if (ieee80211_is_data_qos(hdr->frame_control))
+ if (ieee80211_is_data_qos(hdr->frame_control)) {
qos_tid = ieee80211_get_tid(hdr);
- else
+ mask_fc &= ~cpu_to_le16(IEEE80211_FCTL_ORDER);
+ len_a += 2;
+ } else {
qos_tid = 0;
-
- /* In CCM, the initial vectors (IV) used for CTR mode encryption and CBC
- * mode authentication are not allowed to collide, yet both are derived
- * from this vector b_0. We only set L := 1 here to indicate that the
- * data size can be represented in (L+1) bytes. The CCM layer will take
- * care of storing the data length in the top (L+1) bytes and setting
- * and clearing the other bits as is required to derive the two IVs.
- */
- b_0[0] = 0x1;
-
- /* Nonce: Nonce Flags | A2 | PN
- * Nonce Flags: Priority (b0..b3) | Management (b4) | Reserved (b5..b7)
- */
- b_0[1] = qos_tid | (mgmt << 4);
- memcpy(&b_0[2], hdr->addr2, ETH_ALEN);
- memcpy(&b_0[8], pn, IEEE80211_CCMP_PN_LEN);
+ }
/* AAD (extra authenticate-only data) / masked 802.11 header
* FC | A1 | A2 | A3 | SC | [A4] | [QC] */
@@ -376,8 +365,31 @@ static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *b_0, u8 *aad)
memset(&aad[24], 0, ETH_ALEN + IEEE80211_QOS_CTL_LEN);
aad[24] = qos_tid;
}
+
+ return qos_tid;
}
+static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *b_0, u8 *aad)
+{
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+ u8 qos_tid = ccmp_gcmp_aad(skb, aad);
+
+ /* In CCM, the initial vectors (IV) used for CTR mode encryption and CBC
+ * mode authentication are not allowed to collide, yet both are derived
+ * from this vector b_0. We only set L := 1 here to indicate that the
+ * data size can be represented in (L+1) bytes. The CCM layer will take
+ * care of storing the data length in the top (L+1) bytes and setting
+ * and clearing the other bits as is required to derive the two IVs.
+ */
+ b_0[0] = 0x1;
+
+ /* Nonce: Nonce Flags | A2 | PN
+ * Nonce Flags: Priority (b0..b3) | Management (b4) | Reserved (b5..b7)
+ */
+ b_0[1] = qos_tid | (ieee80211_is_mgmt(hdr->frame_control) << 4);
+ memcpy(&b_0[2], hdr->addr2, ETH_ALEN);
+ memcpy(&b_0[8], pn, IEEE80211_CCMP_PN_LEN);
+}
static inline void ccmp_pn2hdr(u8 *hdr, u8 *pn, int key_id)
{
@@ -571,9 +583,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx,
static void gcmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *j_0, u8 *aad)
{
- __le16 mask_fc;
- u8 qos_tid;
- struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+ struct ieee80211_hdr *hdr = (void *)skb->data;
memcpy(j_0, hdr->addr2, ETH_ALEN);
memcpy(&j_0[ETH_ALEN], pn, IEEE80211_GCMP_PN_LEN);
@@ -581,40 +591,7 @@ static void gcmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *j_0, u8 *aad)
j_0[14] = 0;
j_0[AES_BLOCK_SIZE - 1] = 0x01;
- /* AAD (extra authenticate-only data) / masked 802.11 header
- * FC | A1 | A2 | A3 | SC | [A4] | [QC]
- */
- put_unaligned_be16(ieee80211_hdrlen(hdr->frame_control) - 2, &aad[0]);
- /* Mask FC: zero subtype b4 b5 b6 (if not mgmt)
- * Retry, PwrMgt, MoreData; set Protected
- */
- mask_fc = hdr->frame_control;
- mask_fc &= ~cpu_to_le16(IEEE80211_FCTL_RETRY |
- IEEE80211_FCTL_PM | IEEE80211_FCTL_MOREDATA);
- if (!ieee80211_is_mgmt(hdr->frame_control))
- mask_fc &= ~cpu_to_le16(0x0070);
- mask_fc |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
-
- put_unaligned(mask_fc, (__le16 *)&aad[2]);
- memcpy(&aad[4], &hdr->addr1, 3 * ETH_ALEN);
-
- /* Mask Seq#, leave Frag# */
- aad[22] = *((u8 *)&hdr->seq_ctrl) & 0x0f;
- aad[23] = 0;
-
- if (ieee80211_is_data_qos(hdr->frame_control))
- qos_tid = ieee80211_get_tid(hdr);
- else
- qos_tid = 0;
-
- if (ieee80211_has_a4(hdr->frame_control)) {
- memcpy(&aad[24], hdr->addr4, ETH_ALEN);
- aad[30] = qos_tid;
- aad[31] = 0;
- } else {
- memset(&aad[24], 0, ETH_ALEN + IEEE80211_QOS_CTL_LEN);
- aad[24] = qos_tid;
- }
+ ccmp_gcmp_aad(skb, aad);
}
static inline void gcmp_pn2hdr(u8 *hdr, const u8 *pn, int key_id)
diff --git a/net/mac802154/cfg.c b/net/mac802154/cfg.c
index fbeebe3bc31d..1e4a9f74ed43 100644
--- a/net/mac802154/cfg.c
+++ b/net/mac802154/cfg.c
@@ -118,6 +118,7 @@ ieee802154_set_channel(struct wpan_phy *wpan_phy, u8 page, u8 channel)
if (!ret) {
wpan_phy->current_page = page;
wpan_phy->current_channel = channel;
+ ieee802154_configure_durations(wpan_phy);
}
return ret;
diff --git a/net/mac802154/ieee802154_i.h b/net/mac802154/ieee802154_i.h
index 702560acc8ce..1381e6a5e180 100644
--- a/net/mac802154/ieee802154_i.h
+++ b/net/mac802154/ieee802154_i.h
@@ -56,6 +56,8 @@ struct ieee802154_local {
struct sk_buff *tx_skb;
struct work_struct tx_work;
+ /* A negative Linux error code or a null/positive MLME error status */
+ int tx_result;
};
enum {
diff --git a/net/mac802154/main.c b/net/mac802154/main.c
index 520cedc594e1..bd7bdb1219dd 100644
--- a/net/mac802154/main.c
+++ b/net/mac802154/main.c
@@ -113,6 +113,50 @@ ieee802154_alloc_hw(size_t priv_data_len, const struct ieee802154_ops *ops)
}
EXPORT_SYMBOL(ieee802154_alloc_hw);
+void ieee802154_configure_durations(struct wpan_phy *phy)
+{
+ u32 duration = 0;
+
+ switch (phy->current_page) {
+ case 0:
+ if (BIT(phy->current_channel) & 0x1)
+ /* 868 MHz BPSK 802.15.4-2003: 20 ksym/s */
+ duration = 50 * NSEC_PER_USEC;
+ else if (BIT(phy->current_channel) & 0x7FE)
+ /* 915 MHz BPSK 802.15.4-2003: 40 ksym/s */
+ duration = 25 * NSEC_PER_USEC;
+ else if (BIT(phy->current_channel) & 0x7FFF800)
+ /* 2400 MHz O-QPSK 802.15.4-2006: 62.5 ksym/s */
+ duration = 16 * NSEC_PER_USEC;
+ break;
+ case 2:
+ if (BIT(phy->current_channel) & 0x1)
+ /* 868 MHz O-QPSK 802.15.4-2006: 25 ksym/s */
+ duration = 40 * NSEC_PER_USEC;
+ else if (BIT(phy->current_channel) & 0x7FE)
+ /* 915 MHz O-QPSK 802.15.4-2006: 62.5 ksym/s */
+ duration = 16 * NSEC_PER_USEC;
+ break;
+ case 3:
+ if (BIT(phy->current_channel) & 0x3FFF)
+ /* 2.4 GHz CSS 802.15.4a-2007: 1/6 Msym/s */
+ duration = 6 * NSEC_PER_USEC;
+ break;
+ default:
+ break;
+ }
+
+ if (!duration) {
+ pr_debug("Unknown PHY symbol duration\n");
+ return;
+ }
+
+ phy->symbol_duration = duration;
+ phy->lifs_period = (IEEE802154_LIFS_PERIOD * phy->symbol_duration) / NSEC_PER_SEC;
+ phy->sifs_period = (IEEE802154_SIFS_PERIOD * phy->symbol_duration) / NSEC_PER_SEC;
+}
+EXPORT_SYMBOL(ieee802154_configure_durations);
+
void ieee802154_free_hw(struct ieee802154_hw *hw)
{
struct ieee802154_local *local = hw_to_local(hw);
@@ -131,10 +175,10 @@ static void ieee802154_setup_wpan_phy_pib(struct wpan_phy *wpan_phy)
* Should be done when all drivers sets this value.
*/
- wpan_phy->lifs_period = IEEE802154_LIFS_PERIOD *
- wpan_phy->symbol_duration;
- wpan_phy->sifs_period = IEEE802154_SIFS_PERIOD *
- wpan_phy->symbol_duration;
+ wpan_phy->lifs_period =
+ (IEEE802154_LIFS_PERIOD * wpan_phy->symbol_duration) / 1000;
+ wpan_phy->sifs_period =
+ (IEEE802154_SIFS_PERIOD * wpan_phy->symbol_duration) / 1000;
}
int ieee802154_register_hw(struct ieee802154_hw *hw)
@@ -157,6 +201,8 @@ int ieee802154_register_hw(struct ieee802154_hw *hw)
ieee802154_setup_wpan_phy_pib(local->phy);
+ ieee802154_configure_durations(local->phy);
+
if (!(hw->flags & IEEE802154_HW_CSMA_PARAMS)) {
local->phy->supported.min_csma_backoffs = 4;
local->phy->supported.max_csma_backoffs = 4;
diff --git a/net/mac802154/util.c b/net/mac802154/util.c
index f2078238718b..9f024d85563b 100644
--- a/net/mac802154/util.c
+++ b/net/mac802154/util.c
@@ -58,8 +58,11 @@ enum hrtimer_restart ieee802154_xmit_ifs_timer(struct hrtimer *timer)
void ieee802154_xmit_complete(struct ieee802154_hw *hw, struct sk_buff *skb,
bool ifs_handling)
{
+ struct ieee802154_local *local = hw_to_local(hw);
+
+ local->tx_result = IEEE802154_SUCCESS;
+
if (ifs_handling) {
- struct ieee802154_local *local = hw_to_local(hw);
u8 max_sifs_size;
/* If transceiver sets CRC on his own we need to use lifs
@@ -88,6 +91,23 @@ void ieee802154_xmit_complete(struct ieee802154_hw *hw, struct sk_buff *skb,
}
EXPORT_SYMBOL(ieee802154_xmit_complete);
+void ieee802154_xmit_error(struct ieee802154_hw *hw, struct sk_buff *skb,
+ int reason)
+{
+ struct ieee802154_local *local = hw_to_local(hw);
+
+ local->tx_result = reason;
+ ieee802154_wake_queue(hw);
+ dev_kfree_skb_any(skb);
+}
+EXPORT_SYMBOL(ieee802154_xmit_error);
+
+void ieee802154_xmit_hw_error(struct ieee802154_hw *hw, struct sk_buff *skb)
+{
+ ieee802154_xmit_error(hw, skb, IEEE802154_SYSTEM_ERROR);
+}
+EXPORT_SYMBOL(ieee802154_xmit_hw_error);
+
void ieee802154_stop_device(struct ieee802154_local *local)
{
flush_workqueue(local->workqueue);
diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c
index e22b0cbb2f35..c2fc2a7b2528 100644
--- a/net/mctp/af_mctp.c
+++ b/net/mctp/af_mctp.c
@@ -216,7 +216,7 @@ static int mctp_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
if (flags & ~(MSG_DONTWAIT | MSG_TRUNC | MSG_PEEK))
return -EOPNOTSUPP;
- skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &rc);
+ skb = skb_recv_datagram(sk, flags, &rc);
if (!skb)
return rc;
@@ -238,7 +238,7 @@ static int mctp_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
if (rc < 0)
goto out_free;
- sock_recv_ts_and_drops(msg, sk, skb);
+ sock_recv_cmsgs(msg, sk, skb);
if (addr) {
struct mctp_skb_cb *cb = mctp_cb(skb);
diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c
index 17d9c49000c5..92ea4158f7fc 100644
--- a/net/mctp/test/route-test.c
+++ b/net/mctp/test/route-test.c
@@ -352,7 +352,7 @@ static void mctp_test_route_input_sk(struct kunit *test)
if (params->deliver) {
KUNIT_EXPECT_EQ(test, rc, 0);
- skb2 = skb_recv_datagram(sock->sk, 0, 1, &rc);
+ skb2 = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc);
KUNIT_EXPECT_NOT_ERR_OR_NULL(test, skb2);
KUNIT_EXPECT_EQ(test, skb->len, 1);
@@ -360,7 +360,7 @@ static void mctp_test_route_input_sk(struct kunit *test)
} else {
KUNIT_EXPECT_NE(test, rc, 0);
- skb2 = skb_recv_datagram(sock->sk, 0, 1, &rc);
+ skb2 = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc);
KUNIT_EXPECT_NULL(test, skb2);
}
@@ -423,7 +423,7 @@ static void mctp_test_route_input_sk_reasm(struct kunit *test)
rc = mctp_route_input(&rt->rt, skb);
}
- skb2 = skb_recv_datagram(sock->sk, 0, 1, &rc);
+ skb2 = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc);
if (params->rx_len) {
KUNIT_EXPECT_NOT_ERR_OR_NULL(test, skb2);
@@ -582,7 +582,7 @@ static void mctp_test_route_input_sk_keys(struct kunit *test)
rc = mctp_route_input(&rt->rt, skb);
/* (potentially) receive message */
- skb2 = skb_recv_datagram(sock->sk, 0, 1, &rc);
+ skb2 = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc);
if (params->deliver)
KUNIT_EXPECT_NOT_ERR_OR_NULL(test, skb2);
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index d6fdc5782d33..35b5f806fdda 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -1527,10 +1527,9 @@ static int mpls_ifdown(struct net_device *dev, int event)
rt->rt_nh_size;
struct mpls_route *orig = rt;
- rt = kmalloc(size, GFP_KERNEL);
+ rt = kmemdup(orig, size, GFP_KERNEL);
if (!rt)
return -ENOMEM;
- memcpy(rt, orig, size);
}
}
diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile
index e54daceac58b..6e7df47c9584 100644
--- a/net/mptcp/Makefile
+++ b/net/mptcp/Makefile
@@ -2,7 +2,7 @@
obj-$(CONFIG_MPTCP) += mptcp.o
mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o pm.o diag.o \
- mib.o pm_netlink.o sockopt.o
+ mib.o pm_netlink.o sockopt.o pm_userspace.o
obj-$(CONFIG_SYN_COOKIES) += syncookies.o
obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o
@@ -10,3 +10,5 @@ obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o
mptcp_crypto_test-objs := crypto_test.o
mptcp_token_test-objs := token_test.o
obj-$(CONFIG_MPTCP_KUNIT_TEST) += mptcp_crypto_test.o mptcp_token_test.o
+
+obj-$(CONFIG_BPF_SYSCALL) += bpf.o
diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c
new file mode 100644
index 000000000000..5a0a84ad94af
--- /dev/null
+++ b/net/mptcp/bpf.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Multipath TCP
+ *
+ * Copyright (c) 2020, Tessares SA.
+ * Copyright (c) 2022, SUSE.
+ *
+ * Author: Nicolas Rybowski <nicolas.rybowski@tessares.net>
+ */
+
+#define pr_fmt(fmt) "MPTCP: " fmt
+
+#include <linux/bpf.h>
+#include "protocol.h"
+
+struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk)
+{
+ if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP && sk_is_mptcp(sk))
+ return mptcp_sk(mptcp_subflow_ctx(sk)->conn);
+
+ return NULL;
+}
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
index 8b235468c88f..ae20b7d92e28 100644
--- a/net/mptcp/ctrl.c
+++ b/net/mptcp/ctrl.c
@@ -16,6 +16,11 @@
#define MPTCP_SYSCTL_PATH "net/mptcp"
static int mptcp_pernet_id;
+
+#ifdef CONFIG_SYSCTL
+static int mptcp_pm_type_max = __MPTCP_PM_TYPE_MAX;
+#endif
+
struct mptcp_pernet {
#ifdef CONFIG_SYSCTL
struct ctl_table_header *ctl_table_hdr;
@@ -26,6 +31,7 @@ struct mptcp_pernet {
u8 mptcp_enabled;
u8 checksum_enabled;
u8 allow_join_initial_addr_port;
+ u8 pm_type;
};
static struct mptcp_pernet *mptcp_get_pernet(const struct net *net)
@@ -58,6 +64,11 @@ unsigned int mptcp_stale_loss_cnt(const struct net *net)
return mptcp_get_pernet(net)->stale_loss_cnt;
}
+int mptcp_get_pm_type(const struct net *net)
+{
+ return mptcp_get_pernet(net)->pm_type;
+}
+
static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
{
pernet->mptcp_enabled = 1;
@@ -65,6 +76,7 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
pernet->checksum_enabled = 0;
pernet->allow_join_initial_addr_port = 1;
pernet->stale_loss_cnt = 4;
+ pernet->pm_type = MPTCP_PM_TYPE_KERNEL;
}
#ifdef CONFIG_SYSCTL
@@ -108,6 +120,14 @@ static struct ctl_table mptcp_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_douintvec_minmax,
},
+ {
+ .procname = "pm_type",
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = &mptcp_pm_type_max
+ },
{}
};
@@ -128,6 +148,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
table[2].data = &pernet->checksum_enabled;
table[3].data = &pernet->allow_join_initial_addr_port;
table[4].data = &pernet->stale_loss_cnt;
+ table[5].data = &pernet->pm_type;
hdr = register_net_sysctl(net, MPTCP_SYSCTL_PATH, table);
if (!hdr)
diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index e55d3dfbee0c..0dac2863c6e1 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -24,6 +24,7 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("MPJoinAckRx", MPTCP_MIB_JOINACKRX),
SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC),
SNMP_MIB_ITEM("DSSNotMatching", MPTCP_MIB_DSSNOMATCH),
+ SNMP_MIB_ITEM("InfiniteMapTx", MPTCP_MIB_INFINITEMAPTX),
SNMP_MIB_ITEM("InfiniteMapRx", MPTCP_MIB_INFINITEMAPRX),
SNMP_MIB_ITEM("DSSNoMatchTCP", MPTCP_MIB_DSSTCPMISMATCH),
SNMP_MIB_ITEM("DataCsumErr", MPTCP_MIB_DATACSUMERR),
@@ -55,6 +56,10 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("RcvPruned", MPTCP_MIB_RCVPRUNED),
SNMP_MIB_ITEM("SubflowStale", MPTCP_MIB_SUBFLOWSTALE),
SNMP_MIB_ITEM("SubflowRecover", MPTCP_MIB_SUBFLOWRECOVER),
+ SNMP_MIB_ITEM("SndWndShared", MPTCP_MIB_SNDWNDSHARED),
+ SNMP_MIB_ITEM("RcvWndShared", MPTCP_MIB_RCVWNDSHARED),
+ SNMP_MIB_ITEM("RcvWndConflictUpdate", MPTCP_MIB_RCVWNDCONFLICTUPDATE),
+ SNMP_MIB_ITEM("RcvWndConflict", MPTCP_MIB_RCVWNDCONFLICT),
SNMP_MIB_SENTINEL
};
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index 00576179a619..2be3596374f4 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -17,6 +17,7 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_JOINACKRX, /* Received an ACK + MP_JOIN */
MPTCP_MIB_JOINACKMAC, /* HMAC was wrong on ACK + MP_JOIN */
MPTCP_MIB_DSSNOMATCH, /* Received a new mapping that did not match the previous one */
+ MPTCP_MIB_INFINITEMAPTX, /* Sent an infinite mapping */
MPTCP_MIB_INFINITEMAPRX, /* Received an infinite mapping */
MPTCP_MIB_DSSTCPMISMATCH, /* DSS-mapping did not map with TCP's sequence numbers */
MPTCP_MIB_DATACSUMERR, /* The data checksum fail */
@@ -48,6 +49,12 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_RCVPRUNED, /* Incoming packet dropped due to memory limit */
MPTCP_MIB_SUBFLOWSTALE, /* Subflows entered 'stale' status */
MPTCP_MIB_SUBFLOWRECOVER, /* Subflows returned to active status after being stale */
+ MPTCP_MIB_SNDWNDSHARED, /* Subflow snd wnd is overridden by msk's one */
+ MPTCP_MIB_RCVWNDSHARED, /* Subflow rcv wnd is overridden by msk's one */
+ MPTCP_MIB_RCVWNDCONFLICTUPDATE, /* subflow rcv wnd is overridden by msk's one due to
+ * conflict with another subflow while updating msk rcv wnd
+ */
+ MPTCP_MIB_RCVWNDCONFLICT, /* Conflict with while updating msk rcv wnd */
__MPTCP_MIB_MAX
};
diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c
index f44125dd6697..7f9a71780437 100644
--- a/net/mptcp/mptcp_diag.c
+++ b/net/mptcp/mptcp_diag.c
@@ -66,20 +66,103 @@ out_nosk:
return err;
}
+struct mptcp_diag_ctx {
+ long s_slot;
+ long s_num;
+ unsigned int l_slot;
+ unsigned int l_num;
+};
+
+static void mptcp_diag_dump_listeners(struct sk_buff *skb, struct netlink_callback *cb,
+ const struct inet_diag_req_v2 *r,
+ bool net_admin)
+{
+ struct inet_diag_dump_data *cb_data = cb->data;
+ struct mptcp_diag_ctx *diag_ctx = (void *)cb->ctx;
+ struct nlattr *bc = cb_data->inet_diag_nla_bc;
+ struct net *net = sock_net(skb->sk);
+ int i;
+
+ for (i = diag_ctx->l_slot; i <= tcp_hashinfo.lhash2_mask; i++) {
+ struct inet_listen_hashbucket *ilb;
+ struct hlist_nulls_node *node;
+ struct sock *sk;
+ int num = 0;
+
+ ilb = &tcp_hashinfo.lhash2[i];
+
+ rcu_read_lock();
+ spin_lock(&ilb->lock);
+ sk_nulls_for_each(sk, node, &ilb->nulls_head) {
+ const struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(sk);
+ struct inet_sock *inet = inet_sk(sk);
+ int ret;
+
+ if (num < diag_ctx->l_num)
+ goto next_listen;
+
+ if (!ctx || strcmp(inet_csk(sk)->icsk_ulp_ops->name, "mptcp"))
+ goto next_listen;
+
+ sk = ctx->conn;
+ if (!sk || !net_eq(sock_net(sk), net))
+ goto next_listen;
+
+ if (r->sdiag_family != AF_UNSPEC &&
+ sk->sk_family != r->sdiag_family)
+ goto next_listen;
+
+ if (r->id.idiag_sport != inet->inet_sport &&
+ r->id.idiag_sport)
+ goto next_listen;
+
+ if (!refcount_inc_not_zero(&sk->sk_refcnt))
+ goto next_listen;
+
+ ret = sk_diag_dump(sk, skb, cb, r, bc, net_admin);
+
+ sock_put(sk);
+
+ if (ret < 0) {
+ spin_unlock(&ilb->lock);
+ rcu_read_unlock();
+ diag_ctx->l_slot = i;
+ diag_ctx->l_num = num;
+ return;
+ }
+ diag_ctx->l_num = num + 1;
+ num = 0;
+next_listen:
+ ++num;
+ }
+ spin_unlock(&ilb->lock);
+ rcu_read_unlock();
+
+ cond_resched();
+ diag_ctx->l_num = 0;
+ }
+
+ diag_ctx->l_num = 0;
+ diag_ctx->l_slot = i;
+}
+
static void mptcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
const struct inet_diag_req_v2 *r)
{
bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
+ struct mptcp_diag_ctx *diag_ctx = (void *)cb->ctx;
struct net *net = sock_net(skb->sk);
struct inet_diag_dump_data *cb_data;
struct mptcp_sock *msk;
struct nlattr *bc;
+ BUILD_BUG_ON(sizeof(cb->ctx) < sizeof(*diag_ctx));
+
cb_data = cb->data;
bc = cb_data->inet_diag_nla_bc;
- while ((msk = mptcp_token_iter_next(net, &cb->args[0], &cb->args[1])) !=
- NULL) {
+ while ((msk = mptcp_token_iter_next(net, &diag_ctx->s_slot,
+ &diag_ctx->s_num)) != NULL) {
struct inet_sock *inet = (struct inet_sock *)msk;
struct sock *sk = (struct sock *)msk;
int ret = 0;
@@ -101,11 +184,14 @@ next:
sock_put(sk);
if (ret < 0) {
/* will retry on the same position */
- cb->args[1]--;
+ diag_ctx->s_num--;
break;
}
cond_resched();
}
+
+ if ((r->idiag_states & TCPF_LISTEN) && r->id.idiag_dport == 0)
+ mptcp_diag_dump_listeners(skb, cb, r, net_admin);
}
static void mptcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
@@ -116,6 +202,19 @@ static void mptcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
r->idiag_rqueue = sk_rmem_alloc_get(sk);
r->idiag_wqueue = sk_wmem_alloc_get(sk);
+
+ if (inet_sk_state_load(sk) == TCP_LISTEN) {
+ struct sock *lsk = READ_ONCE(msk->first);
+
+ if (lsk) {
+ /* override with settings from tcp listener,
+ * so Send-Q will show accept queue.
+ */
+ r->idiag_rqueue = READ_ONCE(lsk->sk_ack_backlog);
+ r->idiag_wqueue = READ_ONCE(lsk->sk_max_ack_backlog);
+ }
+ }
+
if (!info)
return;
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index b548cec86c9d..be3b918a6d15 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -825,7 +825,7 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
opts->suboptions = 0;
- if (unlikely(__mptcp_check_fallback(msk)))
+ if (unlikely(__mptcp_check_fallback(msk) && !mptcp_check_infinite_map(skb)))
return false;
if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) {
@@ -931,7 +931,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
if (TCP_SKB_CB(skb)->seq == subflow->ssn_offset + 1 &&
TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq &&
subflow->mp_join && (mp_opt->suboptions & OPTIONS_MPTCP_MPJ) &&
- READ_ONCE(msk->pm.server_side))
+ !subflow->request_join)
tcp_send_ack(ssk);
goto fully_established;
}
@@ -1133,7 +1133,7 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
if ((mp_opt.suboptions & OPTION_MPTCP_ADD_ADDR) &&
add_addr_hmac_valid(msk, &mp_opt)) {
if (!mp_opt.echo) {
- mptcp_pm_add_addr_received(msk, &mp_opt.addr);
+ mptcp_pm_add_addr_received(sk, &mp_opt.addr);
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDR);
} else {
mptcp_pm_add_addr_echoed(msk, &mp_opt.addr);
@@ -1224,20 +1224,62 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
return true;
}
-static void mptcp_set_rwin(const struct tcp_sock *tp)
+static void mptcp_set_rwin(struct tcp_sock *tp, struct tcphdr *th)
{
const struct sock *ssk = (const struct sock *)tp;
- const struct mptcp_subflow_context *subflow;
+ struct mptcp_subflow_context *subflow;
+ u64 ack_seq, rcv_wnd_old, rcv_wnd_new;
struct mptcp_sock *msk;
- u64 ack_seq;
+ u32 new_win;
+ u64 win;
subflow = mptcp_subflow_ctx(ssk);
msk = mptcp_sk(subflow->conn);
- ack_seq = READ_ONCE(msk->ack_seq) + tp->rcv_wnd;
+ ack_seq = READ_ONCE(msk->ack_seq);
+ rcv_wnd_new = ack_seq + tp->rcv_wnd;
+
+ rcv_wnd_old = atomic64_read(&msk->rcv_wnd_sent);
+ if (after64(rcv_wnd_new, rcv_wnd_old)) {
+ u64 rcv_wnd;
+
+ for (;;) {
+ rcv_wnd = atomic64_cmpxchg(&msk->rcv_wnd_sent, rcv_wnd_old, rcv_wnd_new);
+
+ if (rcv_wnd == rcv_wnd_old)
+ break;
+ if (before64(rcv_wnd_new, rcv_wnd)) {
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDCONFLICTUPDATE);
+ goto raise_win;
+ }
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDCONFLICT);
+ rcv_wnd_old = rcv_wnd;
+ }
+ return;
+ }
+
+ if (rcv_wnd_new != rcv_wnd_old) {
+raise_win:
+ win = rcv_wnd_old - ack_seq;
+ tp->rcv_wnd = min_t(u64, win, U32_MAX);
+ new_win = tp->rcv_wnd;
- if (after64(ack_seq, READ_ONCE(msk->rcv_wnd_sent)))
- WRITE_ONCE(msk->rcv_wnd_sent, ack_seq);
+ /* Make sure we do not exceed the maximum possible
+ * scaled window.
+ */
+ if (unlikely(th->syn))
+ new_win = min(new_win, 65535U) << tp->rx_opt.rcv_wscale;
+ if (!tp->rx_opt.rcv_wscale &&
+ sock_net(ssk)->ipv4.sysctl_tcp_workaround_signed_windows)
+ new_win = min(new_win, MAX_TCP_WINDOW);
+ else
+ new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
+
+ /* RFC1323 scaling applied */
+ new_win >>= tp->rx_opt.rcv_wscale;
+ th->window = htons(new_win);
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDSHARED);
+ }
}
__sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum)
@@ -1275,7 +1317,7 @@ static void put_len_csum(u16 len, __sum16 csum, void *data)
put_unaligned(csum, sumptr);
}
-void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
+void mptcp_write_options(struct tcphdr *th, __be32 *ptr, struct tcp_sock *tp,
struct mptcp_out_options *opts)
{
const struct sock *ssk = (const struct sock *)tp;
@@ -1350,8 +1392,11 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
put_unaligned_be32(mpext->subflow_seq, ptr);
ptr += 1;
if (opts->csum_reqd) {
+ /* data_len == 0 is reserved for the infinite mapping,
+ * the checksum will also be set to 0.
+ */
put_len_csum(mpext->data_len,
- mptcp_make_csum(mpext),
+ (mpext->data_len ? mptcp_make_csum(mpext) : 0),
ptr);
} else {
put_unaligned_be32(mpext->data_len << 16 |
@@ -1562,7 +1607,7 @@ mp_capable_done:
}
if (tp)
- mptcp_set_rwin(tp);
+ mptcp_set_rwin(tp, th);
}
__be32 mptcp_get_reset_option(const struct sk_buff *skb)
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index aa51b100e033..59a85220edc9 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -87,6 +87,9 @@ bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk)
unsigned int subflows_max;
int ret = 0;
+ if (mptcp_pm_is_userspace(msk))
+ return mptcp_userspace_pm_active(msk);
+
subflows_max = mptcp_pm_get_subflows_max(msk);
pr_debug("msk=%p subflows=%d max=%d allow=%d", msk, pm->subflows,
@@ -178,7 +181,8 @@ void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk,
struct mptcp_pm_data *pm = &msk->pm;
bool update_subflows;
- update_subflows = subflow->request_join || subflow->mp_join;
+ update_subflows = (subflow->request_join || subflow->mp_join) &&
+ mptcp_pm_is_kernel(msk);
if (!READ_ONCE(pm->work_pending) && !update_subflows)
return;
@@ -195,19 +199,28 @@ void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk,
spin_unlock_bh(&pm->lock);
}
-void mptcp_pm_add_addr_received(struct mptcp_sock *msk,
+void mptcp_pm_add_addr_received(const struct sock *ssk,
const struct mptcp_addr_info *addr)
{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
struct mptcp_pm_data *pm = &msk->pm;
pr_debug("msk=%p remote_id=%d accept=%d", msk, addr->id,
READ_ONCE(pm->accept_addr));
- mptcp_event_addr_announced(msk, addr);
+ mptcp_event_addr_announced(ssk, addr);
spin_lock_bh(&pm->lock);
- if (!READ_ONCE(pm->accept_addr)) {
+ if (mptcp_pm_is_userspace(msk)) {
+ if (mptcp_userspace_pm_active(msk)) {
+ mptcp_pm_announce_addr(msk, addr, true);
+ mptcp_pm_add_addr_send_ack(msk);
+ } else {
+ __MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_ADDADDRDROP);
+ }
+ } else if (!READ_ONCE(pm->accept_addr)) {
mptcp_pm_announce_addr(msk, addr, true);
mptcp_pm_add_addr_send_ack(msk);
} else if (mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_RECEIVED)) {
@@ -261,19 +274,49 @@ void mptcp_pm_rm_addr_received(struct mptcp_sock *msk,
spin_unlock_bh(&pm->lock);
}
-void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup)
+void mptcp_pm_mp_prio_received(struct sock *ssk, u8 bkup)
{
- struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ struct sock *sk = subflow->conn;
+ struct mptcp_sock *msk;
pr_debug("subflow->backup=%d, bkup=%d\n", subflow->backup, bkup);
- subflow->backup = bkup;
+ msk = mptcp_sk(sk);
+ if (subflow->backup != bkup) {
+ subflow->backup = bkup;
+ mptcp_data_lock(sk);
+ if (!sock_owned_by_user(sk))
+ msk->last_snd = NULL;
+ else
+ __set_bit(MPTCP_RESET_SCHEDULER, &msk->cb_flags);
+ mptcp_data_unlock(sk);
+ }
- mptcp_event(MPTCP_EVENT_SUB_PRIORITY, mptcp_sk(subflow->conn), sk, GFP_ATOMIC);
+ mptcp_event(MPTCP_EVENT_SUB_PRIORITY, msk, ssk, GFP_ATOMIC);
}
void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+ struct sock *s = (struct sock *)msk;
+
pr_debug("fail_seq=%llu", fail_seq);
+
+ if (!READ_ONCE(msk->allow_infinite_fallback))
+ return;
+
+ if (!READ_ONCE(subflow->mp_fail_response_expect)) {
+ pr_debug("send MP_FAIL response and infinite map");
+
+ subflow->send_mp_fail = 1;
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILTX);
+ subflow->send_infinite_map = 1;
+ } else if (!sock_flag(sk, SOCK_DEAD)) {
+ pr_debug("MP_FAIL response received");
+
+ sk_stop_timer(s, &s->sk_timer);
+ }
}
/* path manager helpers */
@@ -381,27 +424,48 @@ void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk)
void mptcp_pm_data_reset(struct mptcp_sock *msk)
{
- msk->pm.add_addr_signaled = 0;
- msk->pm.add_addr_accepted = 0;
- msk->pm.local_addr_used = 0;
- msk->pm.subflows = 0;
- msk->pm.rm_list_tx.nr = 0;
- msk->pm.rm_list_rx.nr = 0;
- WRITE_ONCE(msk->pm.work_pending, false);
- WRITE_ONCE(msk->pm.addr_signal, 0);
- WRITE_ONCE(msk->pm.accept_addr, false);
- WRITE_ONCE(msk->pm.accept_subflow, false);
- WRITE_ONCE(msk->pm.remote_deny_join_id0, false);
- msk->pm.status = 0;
- bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
+ u8 pm_type = mptcp_get_pm_type(sock_net((struct sock *)msk));
+ struct mptcp_pm_data *pm = &msk->pm;
- mptcp_pm_nl_data_init(msk);
+ pm->add_addr_signaled = 0;
+ pm->add_addr_accepted = 0;
+ pm->local_addr_used = 0;
+ pm->subflows = 0;
+ pm->rm_list_tx.nr = 0;
+ pm->rm_list_rx.nr = 0;
+ WRITE_ONCE(pm->pm_type, pm_type);
+
+ if (pm_type == MPTCP_PM_TYPE_KERNEL) {
+ bool subflows_allowed = !!mptcp_pm_get_subflows_max(msk);
+
+ /* pm->work_pending must be only be set to 'true' when
+ * pm->pm_type is set to MPTCP_PM_TYPE_KERNEL
+ */
+ WRITE_ONCE(pm->work_pending,
+ (!!mptcp_pm_get_local_addr_max(msk) &&
+ subflows_allowed) ||
+ !!mptcp_pm_get_add_addr_signal_max(msk));
+ WRITE_ONCE(pm->accept_addr,
+ !!mptcp_pm_get_add_addr_accept_max(msk) &&
+ subflows_allowed);
+ WRITE_ONCE(pm->accept_subflow, subflows_allowed);
+ } else {
+ WRITE_ONCE(pm->work_pending, 0);
+ WRITE_ONCE(pm->accept_addr, 0);
+ WRITE_ONCE(pm->accept_subflow, 0);
+ }
+
+ WRITE_ONCE(pm->addr_signal, 0);
+ WRITE_ONCE(pm->remote_deny_join_id0, false);
+ pm->status = 0;
+ bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
}
void mptcp_pm_data_init(struct mptcp_sock *msk)
{
spin_lock_init(&msk->pm.lock);
INIT_LIST_HEAD(&msk->pm.anno_list);
+ INIT_LIST_HEAD(&msk->pm.userspace_pm_local_addr_list);
mptcp_pm_data_reset(msk);
}
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index b5e8de6f7507..e099f2a12504 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -22,14 +22,6 @@ static struct genl_family mptcp_genl_family;
static int pm_nl_pernet_id;
-struct mptcp_pm_addr_entry {
- struct list_head list;
- struct mptcp_addr_info addr;
- u8 flags;
- int ifindex;
- struct socket *lsk;
-};
-
struct mptcp_pm_add_entry {
struct list_head list;
struct mptcp_addr_info addr;
@@ -55,8 +47,19 @@ struct pm_nl_pernet {
#define MPTCP_PM_ADDR_MAX 8
#define ADD_ADDR_RETRANS_MAX 3
-static bool addresses_equal(const struct mptcp_addr_info *a,
- const struct mptcp_addr_info *b, bool use_port)
+static struct pm_nl_pernet *pm_nl_get_pernet(const struct net *net)
+{
+ return net_generic(net, pm_nl_pernet_id);
+}
+
+static struct pm_nl_pernet *
+pm_nl_get_pernet_from_msk(const struct mptcp_sock *msk)
+{
+ return pm_nl_get_pernet(sock_net((struct sock *)msk));
+}
+
+bool mptcp_addresses_equal(const struct mptcp_addr_info *a,
+ const struct mptcp_addr_info *b, bool use_port)
{
bool addr_equals = false;
@@ -120,7 +123,7 @@ static bool lookup_subflow_by_saddr(const struct list_head *list,
skc = (struct sock_common *)mptcp_subflow_tcp_sock(subflow);
local_address(skc, &cur);
- if (addresses_equal(&cur, saddr, saddr->port))
+ if (mptcp_addresses_equal(&cur, saddr, saddr->port))
return true;
}
@@ -138,7 +141,7 @@ static bool lookup_subflow_by_daddr(const struct list_head *list,
skc = (struct sock_common *)mptcp_subflow_tcp_sock(subflow);
remote_address(skc, &cur);
- if (addresses_equal(&cur, daddr, daddr->port))
+ if (mptcp_addresses_equal(&cur, daddr, daddr->port))
return true;
}
@@ -206,43 +209,39 @@ select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk)
unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk)
{
- const struct pm_nl_pernet *pernet;
+ const struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
- pernet = net_generic(sock_net((const struct sock *)msk), pm_nl_pernet_id);
return READ_ONCE(pernet->add_addr_signal_max);
}
EXPORT_SYMBOL_GPL(mptcp_pm_get_add_addr_signal_max);
unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk)
{
- struct pm_nl_pernet *pernet;
+ struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
- pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id);
return READ_ONCE(pernet->add_addr_accept_max);
}
EXPORT_SYMBOL_GPL(mptcp_pm_get_add_addr_accept_max);
unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk)
{
- struct pm_nl_pernet *pernet;
+ struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
- pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id);
return READ_ONCE(pernet->subflows_max);
}
EXPORT_SYMBOL_GPL(mptcp_pm_get_subflows_max);
unsigned int mptcp_pm_get_local_addr_max(const struct mptcp_sock *msk)
{
- struct pm_nl_pernet *pernet;
+ struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
- pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id);
return READ_ONCE(pernet->local_addr_max);
}
EXPORT_SYMBOL_GPL(mptcp_pm_get_local_addr_max);
bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk)
{
- struct pm_nl_pernet *pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id);
+ struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
if (msk->pm.subflows == mptcp_pm_get_subflows_max(msk) ||
(find_next_and_bit(pernet->id_bitmap, msk->pm.id_avail_bitmap,
@@ -262,7 +261,7 @@ mptcp_lookup_anno_list_by_saddr(const struct mptcp_sock *msk,
lockdep_assert_held(&msk->pm.lock);
list_for_each_entry(entry, &msk->pm.anno_list, list) {
- if (addresses_equal(&entry->addr, addr, true))
+ if (mptcp_addresses_equal(&entry->addr, addr, true))
return entry;
}
@@ -279,7 +278,7 @@ bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk)
spin_lock_bh(&msk->pm.lock);
list_for_each_entry(entry, &msk->pm.anno_list, list) {
- if (addresses_equal(&entry->addr, &saddr, true)) {
+ if (mptcp_addresses_equal(&entry->addr, &saddr, true)) {
ret = true;
goto out;
}
@@ -353,8 +352,8 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk,
return entry;
}
-static bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
- const struct mptcp_pm_addr_entry *entry)
+bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
+ const struct mptcp_pm_addr_entry *entry)
{
struct mptcp_pm_add_entry *add_entry = NULL;
struct sock *sk = (struct sock *)msk;
@@ -362,8 +361,16 @@ static bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
lockdep_assert_held(&msk->pm.lock);
- if (mptcp_lookup_anno_list_by_saddr(msk, &entry->addr))
- return false;
+ add_entry = mptcp_lookup_anno_list_by_saddr(msk, &entry->addr);
+
+ if (add_entry) {
+ if (mptcp_pm_is_kernel(msk))
+ return false;
+
+ sk_reset_timer(sk, &add_entry->add_timer,
+ jiffies + mptcp_get_add_addr_timeout(net));
+ return true;
+ }
add_entry = kmalloc(sizeof(*add_entry), GFP_ATOMIC);
if (!add_entry)
@@ -406,7 +413,7 @@ static bool lookup_address_in_vec(const struct mptcp_addr_info *addrs, unsigned
int i;
for (i = 0; i < nr; i++) {
- if (addresses_equal(&addrs[i], addr, addr->port))
+ if (mptcp_addresses_equal(&addrs[i], addr, addr->port))
return true;
}
@@ -442,7 +449,7 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm
mptcp_for_each_subflow(msk, subflow) {
ssk = mptcp_subflow_tcp_sock(subflow);
remote_address((struct sock_common *)ssk, &addrs[i]);
- if (deny_id0 && addresses_equal(&addrs[i], &remote, false))
+ if (deny_id0 && mptcp_addresses_equal(&addrs[i], &remote, false))
continue;
if (!lookup_address_in_vec(addrs, i, &addrs[i]) &&
@@ -475,7 +482,7 @@ __lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info,
struct mptcp_pm_addr_entry *entry;
list_for_each_entry(entry, &pernet->local_addr_list, list) {
- if ((!lookup_by_id && addresses_equal(&entry->addr, info, true)) ||
+ if ((!lookup_by_id && mptcp_addresses_equal(&entry->addr, info, true)) ||
(lookup_by_id && entry->addr.id == info->id))
return entry;
}
@@ -490,7 +497,7 @@ lookup_id_by_addr(const struct pm_nl_pernet *pernet, const struct mptcp_addr_inf
rcu_read_lock();
list_for_each_entry(entry, &pernet->local_addr_list, list) {
- if (addresses_equal(&entry->addr, addr, entry->addr.port)) {
+ if (mptcp_addresses_equal(&entry->addr, addr, entry->addr.port)) {
ret = entry->addr.id;
break;
}
@@ -508,7 +515,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
struct pm_nl_pernet *pernet;
unsigned int subflows_max;
- pernet = net_generic(sock_net(sk), pm_nl_pernet_id);
+ pernet = pm_nl_get_pernet(sock_net(sk));
add_addr_signal_max = mptcp_pm_get_add_addr_signal_max(msk);
local_addr_max = mptcp_pm_get_local_addr_max(msk);
@@ -604,7 +611,7 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
unsigned int subflows_max;
int i = 0;
- pernet = net_generic(sock_net(sk), pm_nl_pernet_id);
+ pernet = pm_nl_get_pernet_from_msk(msk);
subflows_max = mptcp_pm_get_subflows_max(msk);
rcu_read_lock();
@@ -724,9 +731,11 @@ static int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
struct mptcp_addr_info local;
local_address((struct sock_common *)ssk, &local);
- if (!addresses_equal(&local, addr, addr->port))
+ if (!mptcp_addresses_equal(&local, addr, addr->port))
continue;
+ if (subflow->backup != bkup)
+ msk->last_snd = NULL;
subflow->backup = bkup;
subflow->send_mp_prio = 1;
subflow->request_bkup = bkup;
@@ -796,6 +805,9 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
if (!removed)
continue;
+ if (!mptcp_pm_is_kernel(msk))
+ continue;
+
if (rm_type == MPTCP_MIB_RMADDR) {
msk->pm.add_addr_accepted--;
WRITE_ONCE(msk->pm.accept_addr, true);
@@ -889,9 +901,9 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
* singled addresses
*/
list_for_each_entry(cur, &pernet->local_addr_list, list) {
- if (addresses_equal(&cur->addr, &entry->addr,
- address_use_port(entry) &&
- address_use_port(cur))) {
+ if (mptcp_addresses_equal(&cur->addr, &entry->addr,
+ address_use_port(entry) &&
+ address_use_port(cur))) {
/* allow replacing the exiting endpoint only if such
* endpoint is an implicit one and the user-space
* did not provide an endpoint id
@@ -1018,14 +1030,17 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
*/
local_address((struct sock_common *)msk, &msk_local);
local_address((struct sock_common *)skc, &skc_local);
- if (addresses_equal(&msk_local, &skc_local, false))
+ if (mptcp_addresses_equal(&msk_local, &skc_local, false))
return 0;
- pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id);
+ if (mptcp_pm_is_userspace(msk))
+ return mptcp_userspace_pm_get_local_id(msk, &skc_local);
+
+ pernet = pm_nl_get_pernet_from_msk(msk);
rcu_read_lock();
list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
- if (addresses_equal(&entry->addr, &skc_local, entry->addr.port)) {
+ if (mptcp_addresses_equal(&entry->addr, &skc_local, entry->addr.port)) {
ret = entry->addr.id;
break;
}
@@ -1052,18 +1067,6 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
return ret;
}
-void mptcp_pm_nl_data_init(struct mptcp_sock *msk)
-{
- struct mptcp_pm_data *pm = &msk->pm;
- bool subflows;
-
- subflows = !!mptcp_pm_get_subflows_max(msk);
- WRITE_ONCE(pm->work_pending, (!!mptcp_pm_get_local_addr_max(msk) && subflows) ||
- !!mptcp_pm_get_add_addr_signal_max(msk));
- WRITE_ONCE(pm->accept_addr, !!mptcp_pm_get_add_addr_accept_max(msk) && subflows);
- WRITE_ONCE(pm->accept_subflow, subflows);
-}
-
#define MPTCP_PM_CMD_GRP_OFFSET 0
#define MPTCP_PM_EV_GRP_OFFSET 1
@@ -1091,6 +1094,10 @@ static const struct nla_policy mptcp_pm_policy[MPTCP_PM_ATTR_MAX + 1] = {
NLA_POLICY_NESTED(mptcp_pm_addr_policy),
[MPTCP_PM_ATTR_RCV_ADD_ADDRS] = { .type = NLA_U32, },
[MPTCP_PM_ATTR_SUBFLOWS] = { .type = NLA_U32, },
+ [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, },
+ [MPTCP_PM_ATTR_LOC_ID] = { .type = NLA_U8, },
+ [MPTCP_PM_ATTR_ADDR_REMOTE] =
+ NLA_POLICY_NESTED(mptcp_pm_addr_policy),
};
void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk)
@@ -1139,11 +1146,12 @@ static int mptcp_pm_family_to_addr(int family)
return MPTCP_PM_ADDR_ATTR_ADDR4;
}
-static int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info,
- bool require_family,
- struct mptcp_pm_addr_entry *entry)
+static int mptcp_pm_parse_pm_addr_attr(struct nlattr *tb[],
+ const struct nlattr *attr,
+ struct genl_info *info,
+ struct mptcp_addr_info *addr,
+ bool require_family)
{
- struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1];
int err, addr_addr;
if (!attr) {
@@ -1157,27 +1165,29 @@ static int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info,
if (err)
return err;
- memset(entry, 0, sizeof(*entry));
+ if (tb[MPTCP_PM_ADDR_ATTR_ID])
+ addr->id = nla_get_u8(tb[MPTCP_PM_ADDR_ATTR_ID]);
+
if (!tb[MPTCP_PM_ADDR_ATTR_FAMILY]) {
if (!require_family)
- goto skip_family;
+ return err;
NL_SET_ERR_MSG_ATTR(info->extack, attr,
"missing family");
return -EINVAL;
}
- entry->addr.family = nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_FAMILY]);
- if (entry->addr.family != AF_INET
+ addr->family = nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_FAMILY]);
+ if (addr->family != AF_INET
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
- && entry->addr.family != AF_INET6
+ && addr->family != AF_INET6
#endif
) {
NL_SET_ERR_MSG_ATTR(info->extack, attr,
"unknown address family");
return -EINVAL;
}
- addr_addr = mptcp_pm_family_to_addr(entry->addr.family);
+ addr_addr = mptcp_pm_family_to_addr(addr->family);
if (!tb[addr_addr]) {
NL_SET_ERR_MSG_ATTR(info->extack, attr,
"missing address data");
@@ -1185,22 +1195,47 @@ static int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info,
}
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
- if (entry->addr.family == AF_INET6)
- entry->addr.addr6 = nla_get_in6_addr(tb[addr_addr]);
+ if (addr->family == AF_INET6)
+ addr->addr6 = nla_get_in6_addr(tb[addr_addr]);
else
#endif
- entry->addr.addr.s_addr = nla_get_in_addr(tb[addr_addr]);
+ addr->addr.s_addr = nla_get_in_addr(tb[addr_addr]);
+
+ if (tb[MPTCP_PM_ADDR_ATTR_PORT])
+ addr->port = htons(nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_PORT]));
+
+ return err;
+}
+
+int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info,
+ struct mptcp_addr_info *addr)
+{
+ struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1];
+
+ memset(addr, 0, sizeof(*addr));
+
+ return mptcp_pm_parse_pm_addr_attr(tb, attr, info, addr, true);
+}
+
+int mptcp_pm_parse_entry(struct nlattr *attr, struct genl_info *info,
+ bool require_family,
+ struct mptcp_pm_addr_entry *entry)
+{
+ struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1];
+ int err;
+
+ memset(entry, 0, sizeof(*entry));
+
+ err = mptcp_pm_parse_pm_addr_attr(tb, attr, info, &entry->addr, require_family);
+ if (err)
+ return err;
-skip_family:
if (tb[MPTCP_PM_ADDR_ATTR_IF_IDX]) {
u32 val = nla_get_s32(tb[MPTCP_PM_ADDR_ATTR_IF_IDX]);
entry->ifindex = val;
}
- if (tb[MPTCP_PM_ADDR_ATTR_ID])
- entry->addr.id = nla_get_u8(tb[MPTCP_PM_ADDR_ATTR_ID]);
-
if (tb[MPTCP_PM_ADDR_ATTR_FLAGS])
entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]);
@@ -1212,7 +1247,7 @@ skip_family:
static struct pm_nl_pernet *genl_info_pm_nl(struct genl_info *info)
{
- return net_generic(genl_info_net(info), pm_nl_pernet_id);
+ return pm_nl_get_pernet(genl_info_net(info));
}
static int mptcp_nl_add_subflow_or_signal_addr(struct net *net)
@@ -1223,7 +1258,8 @@ static int mptcp_nl_add_subflow_or_signal_addr(struct net *net)
while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) {
struct sock *sk = (struct sock *)msk;
- if (!READ_ONCE(msk->fully_established))
+ if (!READ_ONCE(msk->fully_established) ||
+ mptcp_pm_is_userspace(msk))
goto next;
lock_sock(sk);
@@ -1247,7 +1283,7 @@ static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info)
struct mptcp_pm_addr_entry addr, *entry;
int ret;
- ret = mptcp_pm_parse_addr(attr, info, true, &addr);
+ ret = mptcp_pm_parse_entry(attr, info, true, &addr);
if (ret < 0)
return ret;
@@ -1296,17 +1332,25 @@ static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info)
return 0;
}
-int mptcp_pm_get_flags_and_ifindex_by_id(struct net *net, unsigned int id,
+int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id,
u8 *flags, int *ifindex)
{
struct mptcp_pm_addr_entry *entry;
+ struct sock *sk = (struct sock *)msk;
+ struct net *net = sock_net(sk);
*flags = 0;
*ifindex = 0;
if (id) {
+ if (mptcp_pm_is_userspace(msk))
+ return mptcp_userspace_pm_get_flags_and_ifindex_by_id(msk,
+ id,
+ flags,
+ ifindex);
+
rcu_read_lock();
- entry = __lookup_addr_by_id(net_generic(net, pm_nl_pernet_id), id);
+ entry = __lookup_addr_by_id(pm_nl_get_pernet(net), id);
if (entry) {
*flags = entry->flags;
*ifindex = entry->ifindex;
@@ -1366,6 +1410,9 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net,
struct sock *sk = (struct sock *)msk;
bool remove_subflow;
+ if (mptcp_pm_is_userspace(msk))
+ goto next;
+
if (list_empty(&msk->conn_list)) {
mptcp_pm_remove_anno_addr(msk, addr, false);
goto next;
@@ -1400,11 +1447,11 @@ static int mptcp_nl_remove_id_zero_address(struct net *net,
struct sock *sk = (struct sock *)msk;
struct mptcp_addr_info msk_local;
- if (list_empty(&msk->conn_list))
+ if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk))
goto next;
local_address((struct sock_common *)msk, &msk_local);
- if (!addresses_equal(&msk_local, addr, addr->port))
+ if (!mptcp_addresses_equal(&msk_local, addr, addr->port))
goto next;
lock_sock(sk);
@@ -1430,7 +1477,7 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info)
unsigned int addr_max;
int ret;
- ret = mptcp_pm_parse_addr(attr, info, false, &addr);
+ ret = mptcp_pm_parse_entry(attr, info, false, &addr);
if (ret < 0)
return ret;
@@ -1470,8 +1517,8 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info)
return ret;
}
-static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk,
- struct list_head *rm_list)
+void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk,
+ struct list_head *rm_list)
{
struct mptcp_rm_list alist = { .nr = 0 }, slist = { .nr = 0 };
struct mptcp_pm_addr_entry *entry;
@@ -1507,9 +1554,11 @@ static void mptcp_nl_remove_addrs_list(struct net *net,
while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) {
struct sock *sk = (struct sock *)msk;
- lock_sock(sk);
- mptcp_pm_remove_addrs_and_subflows(msk, rm_list);
- release_sock(sk);
+ if (!mptcp_pm_is_userspace(msk)) {
+ lock_sock(sk);
+ mptcp_pm_remove_addrs_and_subflows(msk, rm_list);
+ release_sock(sk);
+ }
sock_put(sk);
cond_resched();
@@ -1602,7 +1651,7 @@ static int mptcp_nl_cmd_get_addr(struct sk_buff *skb, struct genl_info *info)
void *reply;
int ret;
- ret = mptcp_pm_parse_addr(attr, info, false, &addr);
+ ret = mptcp_pm_parse_entry(attr, info, false, &addr);
if (ret < 0)
return ret;
@@ -1653,7 +1702,7 @@ static int mptcp_nl_cmd_dump_addrs(struct sk_buff *msg,
void *hdr;
int i;
- pernet = net_generic(net, pm_nl_pernet_id);
+ pernet = pm_nl_get_pernet(net);
spin_lock_bh(&pernet->lock);
for (i = id; i < MPTCP_PM_MAX_ADDR_ID + 1; i++) {
@@ -1782,7 +1831,7 @@ static int mptcp_nl_set_flags(struct net *net,
while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) {
struct sock *sk = (struct sock *)msk;
- if (list_empty(&msk->conn_list))
+ if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk))
goto next;
lock_sock(sk);
@@ -1813,7 +1862,7 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
u8 bkup = 0, lookup_by_id = 0;
int ret;
- ret = mptcp_pm_parse_addr(attr, info, false, &addr);
+ ret = mptcp_pm_parse_entry(attr, info, false, &addr);
if (ret < 0)
return ret;
@@ -1852,6 +1901,13 @@ static void mptcp_nl_mcast_send(struct net *net, struct sk_buff *nlskb, gfp_t gf
nlskb, 0, MPTCP_PM_EV_GRP_OFFSET, gfp);
}
+bool mptcp_userspace_pm_active(const struct mptcp_sock *msk)
+{
+ return genl_has_listeners(&mptcp_genl_family,
+ sock_net((const struct sock *)msk),
+ MPTCP_PM_EV_GRP_OFFSET);
+}
+
static int mptcp_event_add_subflow(struct sk_buff *skb, const struct sock *ssk)
{
const struct inet_sock *issk = inet_sk(ssk);
@@ -1972,6 +2028,9 @@ static int mptcp_event_created(struct sk_buff *skb,
if (err)
return err;
+ if (nla_put_u8(skb, MPTCP_ATTR_SERVER_SIDE, READ_ONCE(msk->pm.server_side)))
+ return -EMSGSIZE;
+
return mptcp_event_add_subflow(skb, ssk);
}
@@ -2006,10 +2065,12 @@ nla_put_failure:
kfree_skb(skb);
}
-void mptcp_event_addr_announced(const struct mptcp_sock *msk,
+void mptcp_event_addr_announced(const struct sock *ssk,
const struct mptcp_addr_info *info)
{
- struct net *net = sock_net((const struct sock *)msk);
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+ struct net *net = sock_net(ssk);
struct nlmsghdr *nlh;
struct sk_buff *skb;
@@ -2031,7 +2092,10 @@ void mptcp_event_addr_announced(const struct mptcp_sock *msk,
if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, info->id))
goto nla_put_failure;
- if (nla_put_be16(skb, MPTCP_ATTR_DPORT, info->port))
+ if (nla_put_be16(skb, MPTCP_ATTR_DPORT,
+ info->port == 0 ?
+ inet_sk(ssk)->inet_dport :
+ info->port))
goto nla_put_failure;
switch (info->family) {
@@ -2148,6 +2212,26 @@ static const struct genl_small_ops mptcp_pm_ops[] = {
.doit = mptcp_nl_cmd_set_flags,
.flags = GENL_ADMIN_PERM,
},
+ {
+ .cmd = MPTCP_PM_CMD_ANNOUNCE,
+ .doit = mptcp_nl_cmd_announce,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = MPTCP_PM_CMD_REMOVE,
+ .doit = mptcp_nl_cmd_remove,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = MPTCP_PM_CMD_SUBFLOW_CREATE,
+ .doit = mptcp_nl_cmd_sf_create,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = MPTCP_PM_CMD_SUBFLOW_DESTROY,
+ .doit = mptcp_nl_cmd_sf_destroy,
+ .flags = GENL_ADMIN_PERM,
+ },
};
static struct genl_family mptcp_genl_family __ro_after_init = {
@@ -2165,7 +2249,7 @@ static struct genl_family mptcp_genl_family __ro_after_init = {
static int __net_init pm_nl_init_net(struct net *net)
{
- struct pm_nl_pernet *pernet = net_generic(net, pm_nl_pernet_id);
+ struct pm_nl_pernet *pernet = pm_nl_get_pernet(net);
INIT_LIST_HEAD_RCU(&pernet->local_addr_list);
@@ -2187,7 +2271,7 @@ static void __net_exit pm_nl_exit_net(struct list_head *net_list)
struct net *net;
list_for_each_entry(net, net_list, exit_list) {
- struct pm_nl_pernet *pernet = net_generic(net, pm_nl_pernet_id);
+ struct pm_nl_pernet *pernet = pm_nl_get_pernet(net);
/* net is removed from namespace list, can't race with
* other modifiers, also netns core already waited for a
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
new file mode 100644
index 000000000000..f56378e4f597
--- /dev/null
+++ b/net/mptcp/pm_userspace.c
@@ -0,0 +1,429 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Multipath TCP
+ *
+ * Copyright (c) 2022, Intel Corporation.
+ */
+
+#include "protocol.h"
+
+void mptcp_free_local_addr_list(struct mptcp_sock *msk)
+{
+ struct mptcp_pm_addr_entry *entry, *tmp;
+ struct sock *sk = (struct sock *)msk;
+ LIST_HEAD(free_list);
+
+ if (!mptcp_pm_is_userspace(msk))
+ return;
+
+ spin_lock_bh(&msk->pm.lock);
+ list_splice_init(&msk->pm.userspace_pm_local_addr_list, &free_list);
+ spin_unlock_bh(&msk->pm.lock);
+
+ list_for_each_entry_safe(entry, tmp, &free_list, list) {
+ sock_kfree_s(sk, entry, sizeof(*entry));
+ }
+}
+
+int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
+ struct mptcp_pm_addr_entry *entry)
+{
+ DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
+ struct mptcp_pm_addr_entry *match = NULL;
+ struct sock *sk = (struct sock *)msk;
+ struct mptcp_pm_addr_entry *e;
+ bool addr_match = false;
+ bool id_match = false;
+ int ret = -EINVAL;
+
+ bitmap_zero(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
+
+ spin_lock_bh(&msk->pm.lock);
+ list_for_each_entry(e, &msk->pm.userspace_pm_local_addr_list, list) {
+ addr_match = mptcp_addresses_equal(&e->addr, &entry->addr, true);
+ if (addr_match && entry->addr.id == 0)
+ entry->addr.id = e->addr.id;
+ id_match = (e->addr.id == entry->addr.id);
+ if (addr_match && id_match) {
+ match = e;
+ break;
+ } else if (addr_match || id_match) {
+ break;
+ }
+ __set_bit(e->addr.id, id_bitmap);
+ }
+
+ if (!match && !addr_match && !id_match) {
+ /* Memory for the entry is allocated from the
+ * sock option buffer.
+ */
+ e = sock_kmalloc(sk, sizeof(*e), GFP_ATOMIC);
+ if (!e) {
+ spin_unlock_bh(&msk->pm.lock);
+ return -ENOMEM;
+ }
+
+ *e = *entry;
+ if (!e->addr.id)
+ e->addr.id = find_next_zero_bit(id_bitmap,
+ MPTCP_PM_MAX_ADDR_ID + 1,
+ 1);
+ list_add_tail_rcu(&e->list, &msk->pm.userspace_pm_local_addr_list);
+ ret = e->addr.id;
+ } else if (match) {
+ ret = entry->addr.id;
+ }
+
+ spin_unlock_bh(&msk->pm.lock);
+ return ret;
+}
+
+int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
+ unsigned int id,
+ u8 *flags, int *ifindex)
+{
+ struct mptcp_pm_addr_entry *entry, *match = NULL;
+
+ *flags = 0;
+ *ifindex = 0;
+
+ spin_lock_bh(&msk->pm.lock);
+ list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) {
+ if (id == entry->addr.id) {
+ match = entry;
+ break;
+ }
+ }
+ spin_unlock_bh(&msk->pm.lock);
+ if (match) {
+ *flags = match->flags;
+ *ifindex = match->ifindex;
+ }
+
+ return 0;
+}
+
+int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk,
+ struct mptcp_addr_info *skc)
+{
+ struct mptcp_pm_addr_entry new_entry;
+ __be16 msk_sport = ((struct inet_sock *)
+ inet_sk((struct sock *)msk))->inet_sport;
+
+ memset(&new_entry, 0, sizeof(struct mptcp_pm_addr_entry));
+ new_entry.addr = *skc;
+ new_entry.addr.id = 0;
+ new_entry.flags = MPTCP_PM_ADDR_FLAG_IMPLICIT;
+
+ if (new_entry.addr.port == msk_sport)
+ new_entry.addr.port = 0;
+
+ return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry);
+}
+
+int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
+ struct nlattr *addr = info->attrs[MPTCP_PM_ATTR_ADDR];
+ struct mptcp_pm_addr_entry addr_val;
+ struct mptcp_sock *msk;
+ int err = -EINVAL;
+ u32 token_val;
+
+ if (!addr || !token) {
+ GENL_SET_ERR_MSG(info, "missing required inputs");
+ return err;
+ }
+
+ token_val = nla_get_u32(token);
+
+ msk = mptcp_token_get_sock(sock_net(skb->sk), token_val);
+ if (!msk) {
+ NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token");
+ return err;
+ }
+
+ if (!mptcp_pm_is_userspace(msk)) {
+ GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected");
+ goto announce_err;
+ }
+
+ err = mptcp_pm_parse_entry(addr, info, true, &addr_val);
+ if (err < 0) {
+ GENL_SET_ERR_MSG(info, "error parsing local address");
+ goto announce_err;
+ }
+
+ if (addr_val.addr.id == 0 || !(addr_val.flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) {
+ GENL_SET_ERR_MSG(info, "invalid addr id or flags");
+ goto announce_err;
+ }
+
+ err = mptcp_userspace_pm_append_new_local_addr(msk, &addr_val);
+ if (err < 0) {
+ GENL_SET_ERR_MSG(info, "did not match address and id");
+ goto announce_err;
+ }
+
+ lock_sock((struct sock *)msk);
+ spin_lock_bh(&msk->pm.lock);
+
+ if (mptcp_pm_alloc_anno_list(msk, &addr_val)) {
+ mptcp_pm_announce_addr(msk, &addr_val.addr, false);
+ mptcp_pm_nl_addr_send_ack(msk);
+ }
+
+ spin_unlock_bh(&msk->pm.lock);
+ release_sock((struct sock *)msk);
+
+ err = 0;
+ announce_err:
+ sock_put((struct sock *)msk);
+ return err;
+}
+
+int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
+ struct nlattr *id = info->attrs[MPTCP_PM_ATTR_LOC_ID];
+ struct mptcp_pm_addr_entry *match = NULL;
+ struct mptcp_pm_addr_entry *entry;
+ struct mptcp_sock *msk;
+ LIST_HEAD(free_list);
+ int err = -EINVAL;
+ u32 token_val;
+ u8 id_val;
+
+ if (!id || !token) {
+ GENL_SET_ERR_MSG(info, "missing required inputs");
+ return err;
+ }
+
+ id_val = nla_get_u8(id);
+ token_val = nla_get_u32(token);
+
+ msk = mptcp_token_get_sock(sock_net(skb->sk), token_val);
+ if (!msk) {
+ NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token");
+ return err;
+ }
+
+ if (!mptcp_pm_is_userspace(msk)) {
+ GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected");
+ goto remove_err;
+ }
+
+ lock_sock((struct sock *)msk);
+
+ list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) {
+ if (entry->addr.id == id_val) {
+ match = entry;
+ break;
+ }
+ }
+
+ if (!match) {
+ GENL_SET_ERR_MSG(info, "address with specified id not found");
+ release_sock((struct sock *)msk);
+ goto remove_err;
+ }
+
+ list_move(&match->list, &free_list);
+
+ mptcp_pm_remove_addrs_and_subflows(msk, &free_list);
+
+ release_sock((struct sock *)msk);
+
+ list_for_each_entry_safe(match, entry, &free_list, list) {
+ sock_kfree_s((struct sock *)msk, match, sizeof(*match));
+ }
+
+ err = 0;
+ remove_err:
+ sock_put((struct sock *)msk);
+ return err;
+}
+
+int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr *raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE];
+ struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
+ struct nlattr *laddr = info->attrs[MPTCP_PM_ATTR_ADDR];
+ struct mptcp_addr_info addr_r;
+ struct mptcp_addr_info addr_l;
+ struct mptcp_sock *msk;
+ int err = -EINVAL;
+ struct sock *sk;
+ u32 token_val;
+
+ if (!laddr || !raddr || !token) {
+ GENL_SET_ERR_MSG(info, "missing required inputs");
+ return err;
+ }
+
+ token_val = nla_get_u32(token);
+
+ msk = mptcp_token_get_sock(genl_info_net(info), token_val);
+ if (!msk) {
+ NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token");
+ return err;
+ }
+
+ if (!mptcp_pm_is_userspace(msk)) {
+ GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected");
+ goto create_err;
+ }
+
+ err = mptcp_pm_parse_addr(laddr, info, &addr_l);
+ if (err < 0) {
+ NL_SET_ERR_MSG_ATTR(info->extack, laddr, "error parsing local addr");
+ goto create_err;
+ }
+
+ if (addr_l.id == 0) {
+ NL_SET_ERR_MSG_ATTR(info->extack, laddr, "missing local addr id");
+ goto create_err;
+ }
+
+ err = mptcp_pm_parse_addr(raddr, info, &addr_r);
+ if (err < 0) {
+ NL_SET_ERR_MSG_ATTR(info->extack, raddr, "error parsing remote addr");
+ goto create_err;
+ }
+
+ sk = &msk->sk.icsk_inet.sk;
+ lock_sock(sk);
+
+ err = __mptcp_subflow_connect(sk, &addr_l, &addr_r);
+
+ release_sock(sk);
+
+ create_err:
+ sock_put((struct sock *)msk);
+ return err;
+}
+
+static struct sock *mptcp_nl_find_ssk(struct mptcp_sock *msk,
+ const struct mptcp_addr_info *local,
+ const struct mptcp_addr_info *remote)
+{
+ struct sock *sk = &msk->sk.icsk_inet.sk;
+ struct mptcp_subflow_context *subflow;
+ struct sock *found = NULL;
+
+ if (local->family != remote->family)
+ return NULL;
+
+ lock_sock(sk);
+
+ mptcp_for_each_subflow(msk, subflow) {
+ const struct inet_sock *issk;
+ struct sock *ssk;
+
+ ssk = mptcp_subflow_tcp_sock(subflow);
+
+ if (local->family != ssk->sk_family)
+ continue;
+
+ issk = inet_sk(ssk);
+
+ switch (ssk->sk_family) {
+ case AF_INET:
+ if (issk->inet_saddr != local->addr.s_addr ||
+ issk->inet_daddr != remote->addr.s_addr)
+ continue;
+ break;
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ case AF_INET6: {
+ const struct ipv6_pinfo *pinfo = inet6_sk(ssk);
+
+ if (!ipv6_addr_equal(&local->addr6, &pinfo->saddr) ||
+ !ipv6_addr_equal(&remote->addr6, &ssk->sk_v6_daddr))
+ continue;
+ break;
+ }
+#endif
+ default:
+ continue;
+ }
+
+ if (issk->inet_sport == local->port &&
+ issk->inet_dport == remote->port) {
+ found = ssk;
+ goto found;
+ }
+ }
+
+found:
+ release_sock(sk);
+
+ return found;
+}
+
+int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr *raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE];
+ struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
+ struct nlattr *laddr = info->attrs[MPTCP_PM_ATTR_ADDR];
+ struct mptcp_addr_info addr_l;
+ struct mptcp_addr_info addr_r;
+ struct mptcp_sock *msk;
+ struct sock *sk, *ssk;
+ int err = -EINVAL;
+ u32 token_val;
+
+ if (!laddr || !raddr || !token) {
+ GENL_SET_ERR_MSG(info, "missing required inputs");
+ return err;
+ }
+
+ token_val = nla_get_u32(token);
+
+ msk = mptcp_token_get_sock(genl_info_net(info), token_val);
+ if (!msk) {
+ NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token");
+ return err;
+ }
+
+ if (!mptcp_pm_is_userspace(msk)) {
+ GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected");
+ goto destroy_err;
+ }
+
+ err = mptcp_pm_parse_addr(laddr, info, &addr_l);
+ if (err < 0) {
+ NL_SET_ERR_MSG_ATTR(info->extack, laddr, "error parsing local addr");
+ goto destroy_err;
+ }
+
+ err = mptcp_pm_parse_addr(raddr, info, &addr_r);
+ if (err < 0) {
+ NL_SET_ERR_MSG_ATTR(info->extack, raddr, "error parsing remote addr");
+ goto destroy_err;
+ }
+
+ if (addr_l.family != addr_r.family) {
+ GENL_SET_ERR_MSG(info, "address families do not match");
+ goto destroy_err;
+ }
+
+ if (!addr_l.port || !addr_r.port) {
+ GENL_SET_ERR_MSG(info, "missing local or remote port");
+ goto destroy_err;
+ }
+
+ sk = &msk->sk.icsk_inet.sk;
+ ssk = mptcp_nl_find_ssk(msk, &addr_l, &addr_r);
+ if (ssk) {
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+
+ mptcp_subflow_shutdown(sk, ssk, RCV_SHUTDOWN | SEND_SHUTDOWN);
+ mptcp_close_ssk(sk, ssk, subflow);
+ err = 0;
+ } else {
+ err = -ESRCH;
+ }
+
+ destroy_err:
+ sock_put((struct sock *)msk);
+ return err;
+}
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 0cbea3b6d0a4..17e13396024a 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -216,7 +216,7 @@ static void mptcp_data_queue_ofo(struct mptcp_sock *msk, struct sk_buff *skb)
seq = MPTCP_SKB_CB(skb)->map_seq;
end_seq = MPTCP_SKB_CB(skb)->end_seq;
- max_seq = READ_ONCE(msk->rcv_wnd_sent);
+ max_seq = atomic64_read(&msk->rcv_wnd_sent);
pr_debug("msk=%p seq=%llx limit=%llx empty=%d", msk, seq, max_seq,
RB_EMPTY_ROOT(&msk->out_of_order_queue));
@@ -225,7 +225,7 @@ static void mptcp_data_queue_ofo(struct mptcp_sock *msk, struct sk_buff *skb)
mptcp_drop(sk, skb);
pr_debug("oow by %lld, rcv_wnd_sent %llu\n",
(unsigned long long)end_seq - (unsigned long)max_seq,
- (unsigned long long)msk->rcv_wnd_sent);
+ (unsigned long long)atomic64_read(&msk->rcv_wnd_sent));
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_NODSSWINDOW);
return;
}
@@ -1141,18 +1141,21 @@ struct mptcp_sendmsg_info {
bool data_lock_held;
};
-static int mptcp_check_allowed_size(struct mptcp_sock *msk, u64 data_seq,
- int avail_size)
+static int mptcp_check_allowed_size(const struct mptcp_sock *msk, struct sock *ssk,
+ u64 data_seq, int avail_size)
{
u64 window_end = mptcp_wnd_end(msk);
+ u64 mptcp_snd_wnd;
if (__mptcp_check_fallback(msk))
return avail_size;
- if (!before64(data_seq + avail_size, window_end)) {
- u64 allowed_size = window_end - data_seq;
+ mptcp_snd_wnd = window_end - data_seq;
+ avail_size = min_t(unsigned int, mptcp_snd_wnd, avail_size);
- return min_t(unsigned int, allowed_size, avail_size);
+ if (unlikely(tcp_sk(ssk)->snd_wnd < mptcp_snd_wnd)) {
+ tcp_sk(ssk)->snd_wnd = min_t(u64, U32_MAX, mptcp_snd_wnd);
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_SNDWNDSHARED);
}
return avail_size;
@@ -1229,6 +1232,22 @@ static void mptcp_update_data_checksum(struct sk_buff *skb, int added)
mpext->csum = csum_fold(csum_block_add(csum, skb_checksum(skb, offset, added, 0), offset));
}
+static void mptcp_update_infinite_map(struct mptcp_sock *msk,
+ struct sock *ssk,
+ struct mptcp_ext *mpext)
+{
+ if (!mpext)
+ return;
+
+ mpext->infinite_map = 1;
+ mpext->data_len = 0;
+
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPTX);
+ mptcp_subflow_ctx(ssk)->send_infinite_map = 0;
+ pr_fallback(msk);
+ __mptcp_do_fallback(msk);
+}
+
static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
struct mptcp_data_frag *dfrag,
struct mptcp_sendmsg_info *info)
@@ -1289,7 +1308,7 @@ alloc_skb:
}
/* Zero window and all data acked? Probe. */
- copy = mptcp_check_allowed_size(msk, data_seq, copy);
+ copy = mptcp_check_allowed_size(msk, ssk, data_seq, copy);
if (copy == 0) {
u64 snd_una = READ_ONCE(msk->snd_una);
@@ -1360,6 +1379,8 @@ alloc_skb:
out:
if (READ_ONCE(msk->csum_enabled))
mptcp_update_data_checksum(skb, copy);
+ if (mptcp_subflow_ctx(ssk)->send_infinite_map)
+ mptcp_update_infinite_map(msk, ssk, mpext);
trace_mptcp_sendmsg_frag(mpext);
mptcp_subflow_ctx(ssk)->rel_write_seq += copy;
return copy;
@@ -1480,11 +1501,16 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
* to check that subflow has a non empty cwin.
*/
ssk = send_info[SSK_MODE_ACTIVE].ssk;
- if (!ssk || !sk_stream_memory_free(ssk) || !tcp_sk(ssk)->snd_wnd)
+ if (!ssk || !sk_stream_memory_free(ssk))
return NULL;
- burst = min_t(int, MPTCP_SEND_BURST_SIZE, tcp_sk(ssk)->snd_wnd);
+ burst = min_t(int, MPTCP_SEND_BURST_SIZE, mptcp_wnd_end(msk) - msk->snd_nxt);
wmem = READ_ONCE(ssk->sk_wmem_queued);
+ if (!burst) {
+ msk->last_snd = NULL;
+ return ssk;
+ }
+
subflow = mptcp_subflow_ctx(ssk);
subflow->avg_pacing_rate = div_u64((u64)subflow->avg_pacing_rate * wmem +
READ_ONCE(ssk->sk_pacing_rate) * burst,
@@ -2012,7 +2038,7 @@ static unsigned int mptcp_inq_hint(const struct sock *sk)
}
static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int nonblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
struct mptcp_sock *msk = mptcp_sk(sk);
struct scm_timestamping_internal tss;
@@ -2030,7 +2056,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
goto out_err;
}
- timeo = sock_rcvtimeo(sk, nonblock);
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
len = min_t(size_t, len, INT_MAX);
target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
@@ -2149,6 +2175,21 @@ static void mptcp_retransmit_timer(struct timer_list *t)
sock_put(sk);
}
+static struct mptcp_subflow_context *
+mp_fail_response_expect_subflow(struct mptcp_sock *msk)
+{
+ struct mptcp_subflow_context *subflow, *ret = NULL;
+
+ mptcp_for_each_subflow(msk, subflow) {
+ if (READ_ONCE(subflow->mp_fail_response_expect)) {
+ ret = subflow;
+ break;
+ }
+ }
+
+ return ret;
+}
+
static void mptcp_timeout_timer(struct timer_list *t)
{
struct sock *sk = from_timer(sk, t, sk_timer);
@@ -2465,6 +2506,7 @@ static void __mptcp_retrans(struct sock *sk)
dfrag->already_sent = max(dfrag->already_sent, info.sent);
tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
info.size_goal);
+ WRITE_ONCE(msk->allow_infinite_fallback, false);
}
release_sock(ssk);
@@ -2476,6 +2518,23 @@ reset_timer:
mptcp_reset_timer(sk);
}
+static void mptcp_mp_fail_no_response(struct mptcp_sock *msk)
+{
+ struct mptcp_subflow_context *subflow;
+ struct sock *ssk;
+ bool slow;
+
+ subflow = mp_fail_response_expect_subflow(msk);
+ if (subflow) {
+ pr_debug("MP_FAIL doesn't respond, reset the subflow");
+
+ ssk = mptcp_subflow_tcp_sock(subflow);
+ slow = lock_sock_fast(ssk);
+ mptcp_subflow_reset(ssk);
+ unlock_sock_fast(ssk, slow);
+ }
+}
+
static void mptcp_worker(struct work_struct *work)
{
struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work);
@@ -2516,6 +2575,8 @@ static void mptcp_worker(struct work_struct *work)
if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
__mptcp_retrans(sk);
+ mptcp_mp_fail_no_response(msk);
+
unlock:
release_sock(sk);
sock_put(sk);
@@ -2539,6 +2600,7 @@ static int __mptcp_init_sock(struct sock *sk)
msk->first = NULL;
inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;
WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
+ WRITE_ONCE(msk->allow_infinite_fallback, true);
msk->recovery = false;
mptcp_pm_data_init(msk);
@@ -2733,7 +2795,7 @@ static void __mptcp_destroy_sock(struct sock *sk)
/* join list will be eventually flushed (with rst) at sock lock release time*/
list_splice_init(&msk->conn_list, &conn_list);
- sk_stop_timer(sk, &msk->sk.icsk_retransmit_timer);
+ mptcp_stop_timer(sk);
sk_stop_timer(sk, &sk->sk_timer);
msk->pm.status = 0;
@@ -2841,7 +2903,7 @@ static int mptcp_disconnect(struct sock *sk, int flags)
__mptcp_close_ssk(sk, ssk, subflow, MPTCP_CF_FASTCLOSE);
}
- sk_stop_timer(sk, &msk->sk.icsk_retransmit_timer);
+ mptcp_stop_timer(sk);
sk_stop_timer(sk, &sk->sk_timer);
if (mptcp_sk(sk)->token)
@@ -2916,7 +2978,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq);
ack_seq++;
WRITE_ONCE(msk->ack_seq, ack_seq);
- WRITE_ONCE(msk->rcv_wnd_sent, ack_seq);
+ atomic64_set(&msk->rcv_wnd_sent, ack_seq);
}
sock_reset_flag(nsk, SOCK_RCU_FREE);
@@ -3017,6 +3079,7 @@ void mptcp_destroy_common(struct mptcp_sock *msk)
msk->rmem_fwd_alloc = 0;
mptcp_token_destroy(msk);
mptcp_pm_free_anno_list(msk);
+ mptcp_free_local_addr_list(msk);
}
static void mptcp_destroy(struct sock *sk)
@@ -3092,15 +3155,19 @@ static void mptcp_release_cb(struct sock *sk)
spin_lock_bh(&sk->sk_lock.slock);
}
- /* be sure to set the current sk state before tacking actions
- * depending on sk_state
- */
- if (__test_and_clear_bit(MPTCP_CONNECTED, &msk->cb_flags))
- __mptcp_set_connected(sk);
if (__test_and_clear_bit(MPTCP_CLEAN_UNA, &msk->cb_flags))
__mptcp_clean_una_wakeup(sk);
- if (__test_and_clear_bit(MPTCP_ERROR_REPORT, &msk->cb_flags))
- __mptcp_error_report(sk);
+ if (unlikely(&msk->cb_flags)) {
+ /* be sure to set the current sk state before tacking actions
+ * depending on sk_state, that is processing MPTCP_ERROR_REPORT
+ */
+ if (__test_and_clear_bit(MPTCP_CONNECTED, &msk->cb_flags))
+ __mptcp_set_connected(sk);
+ if (__test_and_clear_bit(MPTCP_ERROR_REPORT, &msk->cb_flags))
+ __mptcp_error_report(sk);
+ if (__test_and_clear_bit(MPTCP_RESET_SCHEDULER, &msk->cb_flags))
+ msk->last_snd = NULL;
+ }
__mptcp_update_rmem(sk);
}
@@ -3204,9 +3271,9 @@ void mptcp_finish_connect(struct sock *ssk)
WRITE_ONCE(msk->write_seq, subflow->idsn + 1);
WRITE_ONCE(msk->snd_nxt, msk->write_seq);
WRITE_ONCE(msk->ack_seq, ack_seq);
- WRITE_ONCE(msk->rcv_wnd_sent, ack_seq);
WRITE_ONCE(msk->can_ack, 1);
WRITE_ONCE(msk->snd_una, msk->write_seq);
+ atomic64_set(&msk->rcv_wnd_sent, ack_seq);
mptcp_pm_new_connection(msk, ssk, 0);
@@ -3237,15 +3304,12 @@ bool mptcp_finish_join(struct sock *ssk)
return false;
}
- if (!msk->pm.server_side)
+ if (!list_empty(&subflow->node))
goto out;
if (!mptcp_pm_allow_new_subflow(msk))
goto err_prohibited;
- if (WARN_ON_ONCE(!list_empty(&subflow->node)))
- goto err_prohibited;
-
/* active connections are already on conn_list.
* If we can't acquire msk socket lock here, let the release callback
* handle it
@@ -3271,6 +3335,7 @@ err_prohibited:
}
subflow->map_seq = READ_ONCE(msk->ack_seq);
+ WRITE_ONCE(msk->allow_infinite_fallback, false);
out:
mptcp_event(MPTCP_EVENT_SUB_ESTABLISHED, msk, ssk, GFP_ATOMIC);
@@ -3703,8 +3768,8 @@ void __init mptcp_proto_init(void)
for_each_possible_cpu(cpu) {
delegated = per_cpu_ptr(&mptcp_delegated_actions, cpu);
INIT_LIST_HEAD(&delegated->head);
- netif_tx_napi_add(&mptcp_napi_dev, &delegated->napi, mptcp_napi_poll,
- NAPI_POLL_WEIGHT);
+ netif_napi_add_tx(&mptcp_napi_dev, &delegated->napi,
+ mptcp_napi_poll);
napi_enable(&delegated->napi);
}
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 5655a63aa6a8..200f89f6d62f 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -11,6 +11,7 @@
#include <net/tcp.h>
#include <net/inet_connection_sock.h>
#include <uapi/linux/mptcp.h>
+#include <net/genetlink.h>
#define MPTCP_SUPPORTED_VERSION 1
@@ -124,6 +125,7 @@
#define MPTCP_RETRANSMIT 4
#define MPTCP_FLUSH_JOIN_LIST 5
#define MPTCP_CONNECTED 6
+#define MPTCP_RESET_SCHEDULER 7
static inline bool before64(__u64 seq1, __u64 seq2)
{
@@ -182,6 +184,14 @@ enum mptcp_pm_status {
*/
};
+enum mptcp_pm_type {
+ MPTCP_PM_TYPE_KERNEL = 0,
+ MPTCP_PM_TYPE_USERSPACE,
+
+ __MPTCP_PM_TYPE_NR,
+ __MPTCP_PM_TYPE_MAX = __MPTCP_PM_TYPE_NR - 1,
+};
+
/* Status bits below MPTCP_PM_ALREADY_ESTABLISHED need pm worker actions */
#define MPTCP_PM_WORK_MASK ((1 << MPTCP_PM_ALREADY_ESTABLISHED) - 1)
@@ -198,6 +208,7 @@ struct mptcp_pm_data {
struct mptcp_addr_info local;
struct mptcp_addr_info remote;
struct list_head anno_list;
+ struct list_head userspace_pm_local_addr_list;
spinlock_t lock; /*protects the whole PM data */
@@ -210,6 +221,7 @@ struct mptcp_pm_data {
u8 add_addr_signaled;
u8 add_addr_accepted;
u8 local_addr_used;
+ u8 pm_type;
u8 subflows;
u8 status;
DECLARE_BITMAP(id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
@@ -217,6 +229,14 @@ struct mptcp_pm_data {
struct mptcp_rm_list rm_list_rx;
};
+struct mptcp_pm_addr_entry {
+ struct list_head list;
+ struct mptcp_addr_info addr;
+ u8 flags;
+ int ifindex;
+ struct socket *lsk;
+};
+
struct mptcp_data_frag {
struct list_head list;
u64 data_seq;
@@ -236,7 +256,7 @@ struct mptcp_sock {
u64 write_seq;
u64 snd_nxt;
u64 ack_seq;
- u64 rcv_wnd_sent;
+ atomic64_t rcv_wnd_sent;
u64 rcv_data_fin_seq;
int rmem_fwd_alloc;
struct sock *last_snd;
@@ -262,6 +282,7 @@ struct mptcp_sock {
bool rcv_fastclose;
bool use_64bit_ack; /* Set when we received a 64-bit DSN */
bool csum_enabled;
+ bool allow_infinite_fallback;
u8 recvmsg_inq:1,
cork:1,
nodelay:1;
@@ -439,6 +460,7 @@ struct mptcp_subflow_context {
send_mp_prio : 1,
send_mp_fail : 1,
send_fastclose : 1,
+ send_infinite_map : 1,
rx_eof : 1,
can_ack : 1, /* only after processing the remote a key */
disposable : 1, /* ctx can be free at ulp release time */
@@ -446,6 +468,7 @@ struct mptcp_subflow_context {
local_id_valid : 1, /* local_id is correctly initialized */
valid_csum_seen : 1; /* at least one csum validated */
enum mptcp_data_avail data_avail;
+ bool mp_fail_response_expect;
u32 remote_nonce;
u64 thmac;
u32 local_nonce;
@@ -572,6 +595,7 @@ unsigned int mptcp_get_add_addr_timeout(const struct net *net);
int mptcp_is_checksum_enabled(const struct net *net);
int mptcp_allow_join_id0(const struct net *net);
unsigned int mptcp_stale_loss_cnt(const struct net *net);
+int mptcp_get_pm_type(const struct net *net);
void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
struct mptcp_options_received *mp_opt);
bool __mptcp_retransmit_pending_data(struct sock *sk);
@@ -587,6 +611,9 @@ void mptcp_subflow_reset(struct sock *ssk);
void mptcp_sock_graft(struct sock *sk, struct socket *parent);
struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
+bool mptcp_addresses_equal(const struct mptcp_addr_info *a,
+ const struct mptcp_addr_info *b, bool use_port);
+
/* called with sk socket lock held */
int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
const struct mptcp_addr_info *remote);
@@ -622,19 +649,6 @@ static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops;
}
-static inline bool mptcp_has_another_subflow(struct sock *ssk)
-{
- struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk), *tmp;
- struct mptcp_sock *msk = mptcp_sk(subflow->conn);
-
- mptcp_for_each_subflow(msk, tmp) {
- if (tmp != subflow)
- return true;
- }
-
- return false;
-}
-
void __init mptcp_proto_init(void);
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
int __init mptcp_proto_v6_init(void);
@@ -729,6 +743,11 @@ __sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum su
void __init mptcp_pm_init(void);
void mptcp_pm_data_init(struct mptcp_sock *msk);
void mptcp_pm_data_reset(struct mptcp_sock *msk);
+int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info,
+ struct mptcp_addr_info *addr);
+int mptcp_pm_parse_entry(struct nlattr *attr, struct genl_info *info,
+ bool require_family,
+ struct mptcp_pm_addr_entry *entry);
void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk);
void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk);
void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side);
@@ -739,7 +758,7 @@ void mptcp_pm_subflow_established(struct mptcp_sock *msk);
bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk);
void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk,
const struct mptcp_subflow_context *subflow);
-void mptcp_pm_add_addr_received(struct mptcp_sock *msk,
+void mptcp_pm_add_addr_received(const struct sock *ssk,
const struct mptcp_addr_info *addr);
void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk,
const struct mptcp_addr_info *addr);
@@ -749,6 +768,8 @@ void mptcp_pm_rm_addr_received(struct mptcp_sock *msk,
const struct mptcp_rm_list *rm_list);
void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup);
void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq);
+bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
+ const struct mptcp_pm_addr_entry *entry);
void mptcp_pm_free_anno_list(struct mptcp_sock *msk);
bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk);
struct mptcp_pm_add_entry *
@@ -757,19 +778,34 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk,
struct mptcp_pm_add_entry *
mptcp_lookup_anno_list_by_saddr(const struct mptcp_sock *msk,
const struct mptcp_addr_info *addr);
-int mptcp_pm_get_flags_and_ifindex_by_id(struct net *net, unsigned int id,
+int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
+ unsigned int id,
u8 *flags, int *ifindex);
+int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
+ unsigned int id,
+ u8 *flags, int *ifindex);
int mptcp_pm_announce_addr(struct mptcp_sock *msk,
const struct mptcp_addr_info *addr,
bool echo);
int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list);
int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list);
+void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk,
+ struct list_head *rm_list);
+
+int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
+ struct mptcp_pm_addr_entry *entry);
+void mptcp_free_local_addr_list(struct mptcp_sock *msk);
+int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info);
+int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info);
+int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info);
+int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info);
void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk,
const struct sock *ssk, gfp_t gfp);
-void mptcp_event_addr_announced(const struct mptcp_sock *msk, const struct mptcp_addr_info *info);
+void mptcp_event_addr_announced(const struct sock *ssk, const struct mptcp_addr_info *info);
void mptcp_event_addr_removed(const struct mptcp_sock *msk, u8 id);
+bool mptcp_userspace_pm_active(const struct mptcp_sock *msk);
static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk)
{
@@ -792,6 +828,16 @@ static inline bool mptcp_pm_should_rm_signal(struct mptcp_sock *msk)
return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_RM_ADDR_SIGNAL);
}
+static inline bool mptcp_pm_is_userspace(const struct mptcp_sock *msk)
+{
+ return READ_ONCE(msk->pm.pm_type) == MPTCP_PM_TYPE_USERSPACE;
+}
+
+static inline bool mptcp_pm_is_kernel(const struct mptcp_sock *msk)
+{
+ return READ_ONCE(msk->pm.pm_type) == MPTCP_PM_TYPE_KERNEL;
+}
+
static inline unsigned int mptcp_add_addr_len(int family, bool echo, bool port)
{
u8 len = TCPOLEN_MPTCP_ADD_ADDR_BASE;
@@ -822,9 +868,9 @@ bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb,
bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
struct mptcp_rm_list *rm_list);
int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
+int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc);
void __init mptcp_pm_nl_init(void);
-void mptcp_pm_nl_data_init(struct mptcp_sock *msk);
void mptcp_pm_nl_work(struct mptcp_sock *msk);
void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk,
const struct mptcp_rm_list *rm_list);
@@ -890,13 +936,28 @@ static inline void mptcp_do_fallback(struct sock *sk)
#define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)", __func__, a)
+static inline bool mptcp_check_infinite_map(struct sk_buff *skb)
+{
+ struct mptcp_ext *mpext;
+
+ mpext = skb ? mptcp_get_ext(skb) : NULL;
+ if (mpext && mpext->infinite_map)
+ return true;
+
+ return false;
+}
+
+static inline bool is_active_ssk(struct mptcp_subflow_context *subflow)
+{
+ return (subflow->request_mptcp || subflow->request_join);
+}
+
static inline bool subflow_simultaneous_connect(struct sock *sk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
- struct sock *parent = subflow->conn;
return sk->sk_state == TCP_ESTABLISHED &&
- !mptcp_sk(parent)->pm.server_side &&
+ is_active_ssk(subflow) &&
!subflow->conn_finished;
}
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index f949d22f52bd..423d3826ca1e 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -756,6 +756,18 @@ static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname,
return -EOPNOTSUPP;
}
+static int mptcp_setsockopt_sol_tcp_defer(struct mptcp_sock *msk, sockptr_t optval,
+ unsigned int optlen)
+{
+ struct socket *listener;
+
+ listener = __mptcp_nmpc_socket(msk);
+ if (!listener)
+ return 0; /* TCP_DEFER_ACCEPT does not fail */
+
+ return tcp_setsockopt(listener->sk, SOL_TCP, TCP_DEFER_ACCEPT, optval, optlen);
+}
+
static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
sockptr_t optval, unsigned int optlen)
{
@@ -782,6 +794,8 @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
return mptcp_setsockopt_sol_tcp_cork(msk, optval, optlen);
case TCP_NODELAY:
return mptcp_setsockopt_sol_tcp_nodelay(msk, optval, optlen);
+ case TCP_DEFER_ACCEPT:
+ return mptcp_setsockopt_sol_tcp_defer(msk, optval, optlen);
}
return -EOPNOTSUPP;
@@ -853,15 +867,11 @@ out:
void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
{
- struct sock *sk = &msk->sk.icsk_inet.sk;
u32 flags = 0;
- bool slow;
u8 val;
memset(info, 0, sizeof(*info));
- slow = lock_sock_fast(sk);
-
info->mptcpi_subflows = READ_ONCE(msk->pm.subflows);
info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled);
info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted);
@@ -882,8 +892,6 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
info->mptcpi_snd_una = READ_ONCE(msk->snd_una);
info->mptcpi_rcv_nxt = READ_ONCE(msk->ack_seq);
info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled);
-
- unlock_sock_fast(sk, slow);
}
EXPORT_SYMBOL_GPL(mptcp_diag_fill_info);
@@ -1148,6 +1156,7 @@ static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
case TCP_CONGESTION:
case TCP_INFO:
case TCP_CC_INFO:
+ case TCP_DEFER_ACCEPT:
return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname,
optval, optlen);
case TCP_INQ:
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index be76ada89d96..8841e8cd9ad8 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -62,7 +62,9 @@ static void subflow_generate_hmac(u64 key1, u64 key2, u32 nonce1, u32 nonce2,
static bool mptcp_can_accept_new_subflow(const struct mptcp_sock *msk)
{
return mptcp_is_fully_established((void *)msk) &&
- READ_ONCE(msk->pm.accept_subflow);
+ ((mptcp_pm_is_userspace(msk) &&
+ mptcp_userspace_pm_active(msk)) ||
+ READ_ONCE(msk->pm.accept_subflow));
}
/* validate received token and create truncated hmac and nonce for SYN-ACK */
@@ -441,6 +443,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->backup = mp_opt.backup;
subflow->thmac = mp_opt.thmac;
subflow->remote_nonce = mp_opt.nonce;
+ subflow->remote_id = mp_opt.join_id;
pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d",
subflow, subflow->thmac, subflow->remote_nonce,
subflow->backup);
@@ -971,6 +974,7 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
bool csum_reqd = READ_ONCE(msk->csum_enabled);
+ struct sock *sk = (struct sock *)msk;
struct mptcp_ext *mpext;
struct sk_buff *skb;
u16 data_len;
@@ -1009,7 +1013,12 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
data_len = mpext->data_len;
if (data_len == 0) {
+ pr_debug("infinite mapping received");
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX);
+ subflow->map_data_len = 0;
+ if (!sock_flag(ssk, SOCK_DEAD))
+ sk_stop_timer(sk, &sk->sk_timer);
+
return MAPPING_INVALID;
}
@@ -1218,35 +1227,43 @@ no_data:
return false;
fallback:
- /* RFC 8684 section 3.7. */
- if (subflow->send_mp_fail) {
- if (mptcp_has_another_subflow(ssk)) {
- while ((skb = skb_peek(&ssk->sk_receive_queue)))
- sk_eat_skb(ssk, skb);
+ if (!__mptcp_check_fallback(msk)) {
+ /* RFC 8684 section 3.7. */
+ if (subflow->send_mp_fail) {
+ if (!READ_ONCE(msk->allow_infinite_fallback)) {
+ ssk->sk_err = EBADMSG;
+ tcp_set_state(ssk, TCP_CLOSE);
+ subflow->reset_transient = 0;
+ subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
+ tcp_send_active_reset(ssk, GFP_ATOMIC);
+ while ((skb = skb_peek(&ssk->sk_receive_queue)))
+ sk_eat_skb(ssk, skb);
+ } else if (!sock_flag(ssk, SOCK_DEAD)) {
+ WRITE_ONCE(subflow->mp_fail_response_expect, true);
+ sk_reset_timer((struct sock *)msk,
+ &((struct sock *)msk)->sk_timer,
+ jiffies + TCP_RTO_MAX);
+ }
+ WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
+ return true;
}
- ssk->sk_err = EBADMSG;
- tcp_set_state(ssk, TCP_CLOSE);
- subflow->reset_transient = 0;
- subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
- tcp_send_active_reset(ssk, GFP_ATOMIC);
- WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
- return true;
- }
- if (!subflow_can_fallback(subflow)) {
- /* fatal protocol error, close the socket.
- * subflow_error_report() will introduce the appropriate barriers
- */
- ssk->sk_err = EBADMSG;
- tcp_set_state(ssk, TCP_CLOSE);
- subflow->reset_transient = 0;
- subflow->reset_reason = MPTCP_RST_EMPTCP;
- tcp_send_active_reset(ssk, GFP_ATOMIC);
- WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
- return false;
+ if (!subflow_can_fallback(subflow) && subflow->map_data_len) {
+ /* fatal protocol error, close the socket.
+ * subflow_error_report() will introduce the appropriate barriers
+ */
+ ssk->sk_err = EBADMSG;
+ tcp_set_state(ssk, TCP_CLOSE);
+ subflow->reset_transient = 0;
+ subflow->reset_reason = MPTCP_RST_EMPTCP;
+ tcp_send_active_reset(ssk, GFP_ATOMIC);
+ WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
+ return false;
+ }
+
+ __mptcp_do_fallback(msk);
}
- __mptcp_do_fallback(msk);
skb = skb_peek(&ssk->sk_receive_queue);
subflow->map_valid = 1;
subflow->map_seq = READ_ONCE(msk->ack_seq);
@@ -1461,7 +1478,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
if (local_id)
subflow_set_local_id(subflow, local_id);
- mptcp_pm_get_flags_and_ifindex_by_id(sock_net(sk), local_id,
+ mptcp_pm_get_flags_and_ifindex_by_id(msk, local_id,
&flags, &ifindex);
subflow->remote_key = msk->remote_key;
subflow->local_key = msk->local_key;
@@ -1498,6 +1515,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
/* discard the subflow socket */
mptcp_sock_graft(ssk, sk->sk_socket);
iput(SOCK_INODE(sf));
+ WRITE_ONCE(msk->allow_infinite_fallback, false);
return err;
failed_unlink:
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 7f645328b47f..efab2b06d373 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1767,8 +1767,6 @@ static int ip_vs_zero_all(struct netns_ipvs *ipvs)
#ifdef CONFIG_SYSCTL
-static int three = 3;
-
static int
proc_do_defense_mode(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
@@ -1977,7 +1975,7 @@ static struct ctl_table vs_vars[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
- .extra2 = &three,
+ .extra2 = SYSCTL_THREE,
},
{
.procname = "nat_icmp_send",
diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index 82f36beb2e76..5d8ed6c90b7e 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -132,6 +132,9 @@ static int __nf_conncount_add(struct net *net,
struct nf_conn *found_ct;
unsigned int collect = 0;
+ if (time_is_after_eq_jiffies((unsigned long)list->last_gc))
+ goto add_new_node;
+
/* check the saved connections */
list_for_each_entry_safe(conn, conn_n, &list->head, node) {
if (collect > CONNCOUNT_GC_MAX_NODES)
@@ -177,6 +180,7 @@ static int __nf_conncount_add(struct net *net,
nf_ct_put(found_ct);
}
+add_new_node:
if (WARN_ON_ONCE(list->count > INT_MAX))
return -EOVERFLOW;
@@ -190,6 +194,7 @@ static int __nf_conncount_add(struct net *net,
conn->jiffies32 = (u32)jiffies;
list_add_tail(&conn->node, &list->head);
list->count++;
+ list->last_gc = (u32)jiffies;
return 0;
}
@@ -214,6 +219,7 @@ void nf_conncount_list_init(struct nf_conncount_list *list)
spin_lock_init(&list->list_lock);
INIT_LIST_HEAD(&list->head);
list->count = 0;
+ list->last_gc = (u32)jiffies;
}
EXPORT_SYMBOL_GPL(nf_conncount_list_init);
@@ -227,6 +233,10 @@ bool nf_conncount_gc_list(struct net *net,
unsigned int collected = 0;
bool ret = false;
+ /* don't bother if we just did GC */
+ if (time_is_after_eq_jiffies((unsigned long)READ_ONCE(list->last_gc)))
+ return false;
+
/* don't bother if other cpu is already doing GC */
if (!spin_trylock(&list->list_lock))
return false;
@@ -258,6 +268,7 @@ bool nf_conncount_gc_list(struct net *net,
if (!list->count)
ret = true;
+ list->last_gc = (u32)jiffies;
spin_unlock(&list->list_lock);
return ret;
diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c
index fe98673dd5ac..bc4d5cd63a94 100644
--- a/net/netfilter/nf_conntrack_bpf.c
+++ b/net/netfilter/nf_conntrack_bpf.c
@@ -38,6 +38,7 @@
* @l4proto - Layer 4 protocol
* Values:
* IPPROTO_TCP, IPPROTO_UDP
+ * @dir: - connection tracking tuple direction.
* @reserved - Reserved member, will be reused for more options in future
* Values:
* 0
@@ -46,7 +47,8 @@ struct bpf_ct_opts {
s32 netns_id;
s32 error;
u8 l4proto;
- u8 reserved[3];
+ u8 dir;
+ u8 reserved[2];
};
enum {
@@ -56,10 +58,11 @@ enum {
static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
struct bpf_sock_tuple *bpf_tuple,
u32 tuple_len, u8 protonum,
- s32 netns_id)
+ s32 netns_id, u8 *dir)
{
struct nf_conntrack_tuple_hash *hash;
struct nf_conntrack_tuple tuple;
+ struct nf_conn *ct;
if (unlikely(protonum != IPPROTO_TCP && protonum != IPPROTO_UDP))
return ERR_PTR(-EPROTO);
@@ -99,7 +102,12 @@ static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
put_net(net);
if (!hash)
return ERR_PTR(-ENOENT);
- return nf_ct_tuplehash_to_ctrack(hash);
+
+ ct = nf_ct_tuplehash_to_ctrack(hash);
+ if (dir)
+ *dir = NF_CT_DIRECTION(hash);
+
+ return ct;
}
__diag_push();
@@ -135,13 +143,13 @@ bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
if (!opts)
return NULL;
if (!bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
- opts->reserved[2] || opts__sz != NF_BPF_CT_OPTS_SZ) {
+ opts__sz != NF_BPF_CT_OPTS_SZ) {
opts->error = -EINVAL;
return NULL;
}
caller_net = dev_net(ctx->rxq->dev);
nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts->l4proto,
- opts->netns_id);
+ opts->netns_id, &opts->dir);
if (IS_ERR(nfct)) {
opts->error = PTR_ERR(nfct);
return NULL;
@@ -178,13 +186,13 @@ bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
if (!opts)
return NULL;
if (!bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
- opts->reserved[2] || opts__sz != NF_BPF_CT_OPTS_SZ) {
+ opts__sz != NF_BPF_CT_OPTS_SZ) {
opts->error = -EINVAL;
return NULL;
}
caller_net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk);
nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts->l4proto,
- opts->netns_id);
+ opts->netns_id, &opts->dir);
if (IS_ERR(nfct)) {
opts->error = PTR_ERR(nfct);
return NULL;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 0164e5f522e8..082a2fd8d85b 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -525,50 +525,6 @@ clean_from_lists(struct nf_conn *ct)
nf_ct_remove_expectations(ct);
}
-/* must be called with local_bh_disable */
-static void nf_ct_add_to_dying_list(struct nf_conn *ct)
-{
- struct ct_pcpu *pcpu;
-
- /* add this conntrack to the (per cpu) dying list */
- ct->cpu = smp_processor_id();
- pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu);
-
- spin_lock(&pcpu->lock);
- hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
- &pcpu->dying);
- spin_unlock(&pcpu->lock);
-}
-
-/* must be called with local_bh_disable */
-static void nf_ct_add_to_unconfirmed_list(struct nf_conn *ct)
-{
- struct ct_pcpu *pcpu;
-
- /* add this conntrack to the (per cpu) unconfirmed list */
- ct->cpu = smp_processor_id();
- pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu);
-
- spin_lock(&pcpu->lock);
- hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
- &pcpu->unconfirmed);
- spin_unlock(&pcpu->lock);
-}
-
-/* must be called with local_bh_disable */
-static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct)
-{
- struct ct_pcpu *pcpu;
-
- /* We overload first tuple to link into unconfirmed or dying list.*/
- pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu);
-
- spin_lock(&pcpu->lock);
- BUG_ON(hlist_nulls_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode));
- hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
- spin_unlock(&pcpu->lock);
-}
-
#define NFCT_ALIGN(len) (((len) + NFCT_INFOMASK) & ~NFCT_INFOMASK)
/* Released via nf_ct_destroy() */
@@ -640,7 +596,6 @@ void nf_ct_destroy(struct nf_conntrack *nfct)
if (unlikely(nf_ct_protonum(ct) == IPPROTO_GRE))
destroy_gre_conntrack(ct);
- local_bh_disable();
/* Expectations will have been removed in clean_from_lists,
* except TFTP can create an expectation on the first packet,
* before connection is in the list, so we need to clean here,
@@ -648,10 +603,6 @@ void nf_ct_destroy(struct nf_conntrack *nfct)
*/
nf_ct_remove_expectations(ct);
- nf_ct_del_from_dying_or_unconfirmed_list(ct);
-
- local_bh_enable();
-
if (ct->master)
nf_ct_put(ct->master);
@@ -660,15 +611,12 @@ void nf_ct_destroy(struct nf_conntrack *nfct)
}
EXPORT_SYMBOL(nf_ct_destroy);
-static void nf_ct_delete_from_lists(struct nf_conn *ct)
+static void __nf_ct_delete_from_lists(struct nf_conn *ct)
{
struct net *net = nf_ct_net(ct);
unsigned int hash, reply_hash;
unsigned int sequence;
- nf_ct_helper_destroy(ct);
-
- local_bh_disable();
do {
sequence = read_seqcount_begin(&nf_conntrack_generation);
hash = hash_conntrack(net,
@@ -681,12 +629,30 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct)
clean_from_lists(ct);
nf_conntrack_double_unlock(hash, reply_hash);
+}
+
+static void nf_ct_delete_from_lists(struct nf_conn *ct)
+{
+ nf_ct_helper_destroy(ct);
+ local_bh_disable();
- nf_ct_add_to_dying_list(ct);
+ __nf_ct_delete_from_lists(ct);
local_bh_enable();
}
+static void nf_ct_add_to_ecache_list(struct nf_conn *ct)
+{
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+ struct nf_conntrack_net *cnet = nf_ct_pernet(nf_ct_net(ct));
+
+ spin_lock(&cnet->ecache.dying_lock);
+ hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
+ &cnet->ecache.dying_list);
+ spin_unlock(&cnet->ecache.dying_lock);
+#endif
+}
+
bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
{
struct nf_conn_tstamp *tstamp;
@@ -709,7 +675,12 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
/* destroy event was not delivered. nf_ct_put will
* be done by event cache worker on redelivery.
*/
- nf_ct_delete_from_lists(ct);
+ nf_ct_helper_destroy(ct);
+ local_bh_disable();
+ __nf_ct_delete_from_lists(ct);
+ nf_ct_add_to_ecache_list(ct);
+ local_bh_enable();
+
nf_conntrack_ecache_work(nf_ct_net(ct), NFCT_ECACHE_DESTROY_FAIL);
return false;
}
@@ -870,6 +841,33 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct,
&nf_conntrack_hash[reply_hash]);
}
+static bool nf_ct_ext_valid_pre(const struct nf_ct_ext *ext)
+{
+ /* if ext->gen_id is not equal to nf_conntrack_ext_genid, some extensions
+ * may contain stale pointers to e.g. helper that has been removed.
+ *
+ * The helper can't clear this because the nf_conn object isn't in
+ * any hash and synchronize_rcu() isn't enough because associated skb
+ * might sit in a queue.
+ */
+ return !ext || ext->gen_id == atomic_read(&nf_conntrack_ext_genid);
+}
+
+static bool nf_ct_ext_valid_post(struct nf_ct_ext *ext)
+{
+ if (!ext)
+ return true;
+
+ if (ext->gen_id != atomic_read(&nf_conntrack_ext_genid))
+ return false;
+
+ /* inserted into conntrack table, nf_ct_iterate_cleanup()
+ * will find it. Disable nf_ct_ext_find() id check.
+ */
+ WRITE_ONCE(ext->gen_id, 0);
+ return true;
+}
+
int
nf_conntrack_hash_check_insert(struct nf_conn *ct)
{
@@ -885,6 +883,11 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
zone = nf_ct_zone(ct);
+ if (!nf_ct_ext_valid_pre(ct->ext)) {
+ NF_CT_STAT_INC(net, insert_failed);
+ return -ETIMEDOUT;
+ }
+
local_bh_disable();
do {
sequence = read_seqcount_begin(&nf_conntrack_generation);
@@ -925,6 +928,13 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
nf_conntrack_double_unlock(hash, reply_hash);
NF_CT_STAT_INC(net, insert);
local_bh_enable();
+
+ if (!nf_ct_ext_valid_post(ct->ext)) {
+ nf_ct_kill(ct);
+ NF_CT_STAT_INC(net, drop);
+ return -ETIMEDOUT;
+ }
+
return 0;
chaintoolong:
NF_CT_STAT_INC(net, chaintoolong);
@@ -972,7 +982,6 @@ static void __nf_conntrack_insert_prepare(struct nf_conn *ct)
struct nf_conn_tstamp *tstamp;
refcount_inc(&ct->ct_general.use);
- ct->status |= IPS_CONFIRMED;
/* set conntrack timestamp, if enabled. */
tstamp = nf_conn_tstamp_find(ct);
@@ -1001,7 +1010,6 @@ static int __nf_ct_resolve_clash(struct sk_buff *skb,
nf_conntrack_get(&ct->ct_general);
nf_ct_acct_merge(ct, ctinfo, loser_ct);
- nf_ct_add_to_dying_list(loser_ct);
nf_ct_put(loser_ct);
nf_ct_set(skb, ct, ctinfo);
@@ -1134,7 +1142,6 @@ nf_ct_resolve_clash(struct sk_buff *skb, struct nf_conntrack_tuple_hash *h,
return ret;
drop:
- nf_ct_add_to_dying_list(loser_ct);
NF_CT_STAT_INC(net, drop);
NF_CT_STAT_INC(net, insert_failed);
return NF_DROP;
@@ -1195,16 +1202,20 @@ __nf_conntrack_confirm(struct sk_buff *skb)
return NF_DROP;
}
+ if (!nf_ct_ext_valid_pre(ct->ext)) {
+ NF_CT_STAT_INC(net, insert_failed);
+ goto dying;
+ }
+
pr_debug("Confirming conntrack %p\n", ct);
/* We have to check the DYING flag after unlink to prevent
* a race against nf_ct_get_next_corpse() possibly called from
* user context, else we insert an already 'dead' hash, blocking
* further use of that particular connection -JM.
*/
- nf_ct_del_from_dying_or_unconfirmed_list(ct);
+ ct->status |= IPS_CONFIRMED;
if (unlikely(nf_ct_is_dying(ct))) {
- nf_ct_add_to_dying_list(ct);
NF_CT_STAT_INC(net, insert_failed);
goto dying;
}
@@ -1228,7 +1239,6 @@ __nf_conntrack_confirm(struct sk_buff *skb)
goto out;
if (chainlen++ > max_chainlen) {
chaintoolong:
- nf_ct_add_to_dying_list(ct);
NF_CT_STAT_INC(net, chaintoolong);
NF_CT_STAT_INC(net, insert_failed);
ret = NF_DROP;
@@ -1252,6 +1262,16 @@ chaintoolong:
nf_conntrack_double_unlock(hash, reply_hash);
local_bh_enable();
+ /* ext area is still valid (rcu read lock is held,
+ * but will go out of scope soon, we need to remove
+ * this conntrack again.
+ */
+ if (!nf_ct_ext_valid_post(ct->ext)) {
+ nf_ct_kill(ct);
+ NF_CT_STAT_INC(net, drop);
+ return NF_DROP;
+ }
+
help = nfct_help(ct);
if (help && help->helper)
nf_conntrack_event_cache(IPCT_HELPER, ct);
@@ -1678,7 +1698,9 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
struct nf_conn *ct;
struct nf_conn_help *help;
struct nf_conntrack_tuple repl_tuple;
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
struct nf_conntrack_ecache *ecache;
+#endif
struct nf_conntrack_expect *exp = NULL;
const struct nf_conntrack_zone *zone;
struct nf_conn_timeout *timeout_ext;
@@ -1711,15 +1733,21 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
nf_ct_labels_ext_add(ct);
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL;
- nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0,
- ecache ? ecache->expmask : 0,
- GFP_ATOMIC);
- local_bh_disable();
+ if ((ecache || net->ct.sysctl_events) &&
+ !nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0,
+ ecache ? ecache->expmask : 0,
+ GFP_ATOMIC)) {
+ nf_conntrack_free(ct);
+ return ERR_PTR(-ENOMEM);
+ }
+#endif
+
cnet = nf_ct_pernet(net);
if (cnet->expect_count) {
- spin_lock(&nf_conntrack_expect_lock);
+ spin_lock_bh(&nf_conntrack_expect_lock);
exp = nf_ct_find_expectation(net, zone, tuple);
if (exp) {
pr_debug("expectation arrives ct=%p exp=%p\n",
@@ -1742,16 +1770,13 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
#endif
NF_CT_STAT_INC(net, expect_new);
}
- spin_unlock(&nf_conntrack_expect_lock);
+ spin_unlock_bh(&nf_conntrack_expect_lock);
}
if (!exp)
__nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC);
- /* Now it is inserted into the unconfirmed list, set refcount to 1. */
+ /* Now it is going to be associated with an sk_buff, set refcount to 1. */
refcount_set(&ct->ct_general.use, 1);
- nf_ct_add_to_unconfirmed_list(ct);
-
- local_bh_enable();
if (exp) {
if (exp->expectfn)
@@ -2319,7 +2344,7 @@ static bool nf_conntrack_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
/* Bring out ya dead! */
static struct nf_conn *
get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
- void *data, unsigned int *bucket)
+ const struct nf_ct_iter_data *iter_data, unsigned int *bucket)
{
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
@@ -2350,7 +2375,12 @@ get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
* tuple while iterating.
*/
ct = nf_ct_tuplehash_to_ctrack(h);
- if (iter(ct, data))
+
+ if (iter_data->net &&
+ !net_eq(iter_data->net, nf_ct_net(ct)))
+ continue;
+
+ if (iter(ct, iter_data->data))
goto found;
}
spin_unlock(lockp);
@@ -2367,7 +2397,7 @@ found:
}
static void nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data),
- void *data, u32 portid, int report)
+ const struct nf_ct_iter_data *iter_data)
{
unsigned int bucket = 0;
struct nf_conn *ct;
@@ -2375,91 +2405,28 @@ static void nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data),
might_sleep();
mutex_lock(&nf_conntrack_mutex);
- while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
+ while ((ct = get_next_corpse(iter, iter_data, &bucket)) != NULL) {
/* Time to push up daises... */
- nf_ct_delete(ct, portid, report);
+ nf_ct_delete(ct, iter_data->portid, iter_data->report);
nf_ct_put(ct);
cond_resched();
}
mutex_unlock(&nf_conntrack_mutex);
}
-struct iter_data {
- int (*iter)(struct nf_conn *i, void *data);
- void *data;
- struct net *net;
-};
-
-static int iter_net_only(struct nf_conn *i, void *data)
-{
- struct iter_data *d = data;
-
- if (!net_eq(d->net, nf_ct_net(i)))
- return 0;
-
- return d->iter(i, d->data);
-}
-
-static void
-__nf_ct_unconfirmed_destroy(struct net *net)
-{
- int cpu;
-
- for_each_possible_cpu(cpu) {
- struct nf_conntrack_tuple_hash *h;
- struct hlist_nulls_node *n;
- struct ct_pcpu *pcpu;
-
- pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
-
- spin_lock_bh(&pcpu->lock);
- hlist_nulls_for_each_entry(h, n, &pcpu->unconfirmed, hnnode) {
- struct nf_conn *ct;
-
- ct = nf_ct_tuplehash_to_ctrack(h);
-
- /* we cannot call iter() on unconfirmed list, the
- * owning cpu can reallocate ct->ext at any time.
- */
- set_bit(IPS_DYING_BIT, &ct->status);
- }
- spin_unlock_bh(&pcpu->lock);
- cond_resched();
- }
-}
-
-void nf_ct_unconfirmed_destroy(struct net *net)
+void nf_ct_iterate_cleanup_net(int (*iter)(struct nf_conn *i, void *data),
+ const struct nf_ct_iter_data *iter_data)
{
+ struct net *net = iter_data->net;
struct nf_conntrack_net *cnet = nf_ct_pernet(net);
might_sleep();
- if (atomic_read(&cnet->count) > 0) {
- __nf_ct_unconfirmed_destroy(net);
- nf_queue_nf_hook_drop(net);
- synchronize_net();
- }
-}
-EXPORT_SYMBOL_GPL(nf_ct_unconfirmed_destroy);
-
-void nf_ct_iterate_cleanup_net(struct net *net,
- int (*iter)(struct nf_conn *i, void *data),
- void *data, u32 portid, int report)
-{
- struct nf_conntrack_net *cnet = nf_ct_pernet(net);
- struct iter_data d;
-
- might_sleep();
-
if (atomic_read(&cnet->count) == 0)
return;
- d.iter = iter;
- d.data = data;
- d.net = net;
-
- nf_ct_iterate_cleanup(iter_net_only, &d, portid, report);
+ nf_ct_iterate_cleanup(iter, iter_data);
}
EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup_net);
@@ -2477,6 +2444,7 @@ EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup_net);
void
nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data)
{
+ struct nf_ct_iter_data iter_data = {};
struct net *net;
down_read(&net_rwsem);
@@ -2485,31 +2453,41 @@ nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data)
if (atomic_read(&cnet->count) == 0)
continue;
- __nf_ct_unconfirmed_destroy(net);
nf_queue_nf_hook_drop(net);
}
up_read(&net_rwsem);
/* Need to wait for netns cleanup worker to finish, if its
* running -- it might have deleted a net namespace from
- * the global list, so our __nf_ct_unconfirmed_destroy() might
- * not have affected all namespaces.
+ * the global list, so hook drop above might not have
+ * affected all namespaces.
*/
net_ns_barrier();
- /* a conntrack could have been unlinked from unconfirmed list
- * before we grabbed pcpu lock in __nf_ct_unconfirmed_destroy().
+ /* a skb w. unconfirmed conntrack could have been reinjected just
+ * before we called nf_queue_nf_hook_drop().
+ *
* This makes sure its inserted into conntrack table.
*/
synchronize_net();
- nf_ct_iterate_cleanup(iter, data, 0, 0);
+ nf_ct_ext_bump_genid();
+ iter_data.data = data;
+ nf_ct_iterate_cleanup(iter, &iter_data);
+
+ /* Another cpu might be in a rcu read section with
+ * rcu protected pointer cleared in iter callback
+ * or hidden via nf_ct_ext_bump_genid() above.
+ *
+ * Wait until those are done.
+ */
+ synchronize_rcu();
}
EXPORT_SYMBOL_GPL(nf_ct_iterate_destroy);
static int kill_all(struct nf_conn *i, void *data)
{
- return net_eq(nf_ct_net(i), data);
+ return 1;
}
void nf_conntrack_cleanup_start(void)
@@ -2544,8 +2522,9 @@ void nf_conntrack_cleanup_net(struct net *net)
void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list)
{
- int busy;
+ struct nf_ct_iter_data iter_data = {};
struct net *net;
+ int busy;
/*
* This makes sure all current packets have passed through
@@ -2558,7 +2537,8 @@ i_see_dead_people:
list_for_each_entry(net, net_exit_list, exit_list) {
struct nf_conntrack_net *cnet = nf_ct_pernet(net);
- nf_ct_iterate_cleanup(kill_all, net, 0, 0);
+ iter_data.net = net;
+ nf_ct_iterate_cleanup_net(kill_all, &iter_data);
if (atomic_read(&cnet->count) != 0)
busy = 1;
}
@@ -2571,7 +2551,6 @@ i_see_dead_people:
nf_conntrack_ecache_pernet_fini(net);
nf_conntrack_expect_pernet_fini(net);
free_percpu(net->ct.stat);
- free_percpu(net->ct.pcpu_lists);
}
}
@@ -2777,33 +2756,19 @@ void nf_conntrack_init_end(void)
* We need to use special "null" values, not used in hash table
*/
#define UNCONFIRMED_NULLS_VAL ((1<<30)+0)
-#define DYING_NULLS_VAL ((1<<30)+1)
int nf_conntrack_init_net(struct net *net)
{
struct nf_conntrack_net *cnet = nf_ct_pernet(net);
int ret = -ENOMEM;
- int cpu;
BUILD_BUG_ON(IP_CT_UNTRACKED == IP_CT_NUMBER);
BUILD_BUG_ON_NOT_POWER_OF_2(CONNTRACK_LOCKS);
atomic_set(&cnet->count, 0);
- net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu);
- if (!net->ct.pcpu_lists)
- goto err_stat;
-
- for_each_possible_cpu(cpu) {
- struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
-
- spin_lock_init(&pcpu->lock);
- INIT_HLIST_NULLS_HEAD(&pcpu->unconfirmed, UNCONFIRMED_NULLS_VAL);
- INIT_HLIST_NULLS_HEAD(&pcpu->dying, DYING_NULLS_VAL);
- }
-
net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
if (!net->ct.stat)
- goto err_pcpu_lists;
+ return ret;
ret = nf_conntrack_expect_pernet_init(net);
if (ret < 0)
@@ -2819,8 +2784,5 @@ int nf_conntrack_init_net(struct net *net)
err_expect:
free_percpu(net->ct.stat);
-err_pcpu_lists:
- free_percpu(net->ct.pcpu_lists);
-err_stat:
return ret;
}
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index 07e65b4e92f8..8698b3424646 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -16,7 +16,6 @@
#include <linux/vmalloc.h>
#include <linux/stddef.h>
#include <linux/err.h>
-#include <linux/percpu.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/slab.h>
@@ -29,8 +28,9 @@
static DEFINE_MUTEX(nf_ct_ecache_mutex);
-#define ECACHE_RETRY_WAIT (HZ/10)
-#define ECACHE_STACK_ALLOC (256 / sizeof(void *))
+#define DYING_NULLS_VAL ((1 << 30) + 1)
+#define ECACHE_MAX_JIFFIES msecs_to_jiffies(10)
+#define ECACHE_RETRY_JIFFIES msecs_to_jiffies(10)
enum retry_state {
STATE_CONGESTED,
@@ -38,96 +38,90 @@ enum retry_state {
STATE_DONE,
};
-static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu)
+struct nf_conntrack_net_ecache *nf_conn_pernet_ecache(const struct net *net)
{
- struct nf_conn *refs[ECACHE_STACK_ALLOC];
+ struct nf_conntrack_net *cnet = nf_ct_pernet(net);
+
+ return &cnet->ecache;
+}
+#if IS_MODULE(CONFIG_NF_CT_NETLINK)
+EXPORT_SYMBOL_GPL(nf_conn_pernet_ecache);
+#endif
+
+static enum retry_state ecache_work_evict_list(struct nf_conntrack_net *cnet)
+{
+ unsigned long stop = jiffies + ECACHE_MAX_JIFFIES;
+ struct hlist_nulls_head evicted_list;
enum retry_state ret = STATE_DONE;
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_node *n;
- unsigned int evicted = 0;
+ unsigned int sent;
- spin_lock(&pcpu->lock);
+ INIT_HLIST_NULLS_HEAD(&evicted_list, DYING_NULLS_VAL);
- hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) {
+next:
+ sent = 0;
+ spin_lock_bh(&cnet->ecache.dying_lock);
+
+ hlist_nulls_for_each_entry_safe(h, n, &cnet->ecache.dying_list, hnnode) {
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
- struct nf_conntrack_ecache *e;
-
- if (!nf_ct_is_confirmed(ct))
- continue;
-
- /* This ecache access is safe because the ct is on the
- * pcpu dying list and we hold the spinlock -- the entry
- * cannot be free'd until after the lock is released.
- *
- * This is true even if ct has a refcount of 0: the
- * cpu that is about to free the entry must remove it
- * from the dying list and needs the lock to do so.
- */
- e = nf_ct_ecache_find(ct);
- if (!e || e->state != NFCT_ECACHE_DESTROY_FAIL)
- continue;
- /* ct is in NFCT_ECACHE_DESTROY_FAIL state, this means
- * the worker owns this entry: the ct will remain valid
- * until the worker puts its ct reference.
+ /* The worker owns all entries, ct remains valid until nf_ct_put
+ * in the loop below.
*/
if (nf_conntrack_event(IPCT_DESTROY, ct)) {
ret = STATE_CONGESTED;
break;
}
- e->state = NFCT_ECACHE_DESTROY_SENT;
- refs[evicted] = ct;
+ hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
+ hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode, &evicted_list);
- if (++evicted >= ARRAY_SIZE(refs)) {
+ if (time_after(stop, jiffies)) {
ret = STATE_RESTART;
break;
}
+
+ if (sent++ > 16) {
+ spin_unlock_bh(&cnet->ecache.dying_lock);
+ cond_resched();
+ goto next;
+ }
}
- spin_unlock(&pcpu->lock);
+ spin_unlock_bh(&cnet->ecache.dying_lock);
- /* can't _put while holding lock */
- while (evicted)
- nf_ct_put(refs[--evicted]);
+ hlist_nulls_for_each_entry_safe(h, n, &evicted_list, hnnode) {
+ struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+
+ hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode);
+ nf_ct_put(ct);
+
+ cond_resched();
+ }
return ret;
}
static void ecache_work(struct work_struct *work)
{
- struct nf_conntrack_net *cnet = container_of(work, struct nf_conntrack_net, ecache_dwork.work);
- struct netns_ct *ctnet = cnet->ct_net;
- int cpu, delay = -1;
- struct ct_pcpu *pcpu;
-
- local_bh_disable();
-
- for_each_possible_cpu(cpu) {
- enum retry_state ret;
-
- pcpu = per_cpu_ptr(ctnet->pcpu_lists, cpu);
-
- ret = ecache_work_evict_list(pcpu);
-
- switch (ret) {
- case STATE_CONGESTED:
- delay = ECACHE_RETRY_WAIT;
- goto out;
- case STATE_RESTART:
- delay = 0;
- break;
- case STATE_DONE:
- break;
- }
+ struct nf_conntrack_net *cnet = container_of(work, struct nf_conntrack_net, ecache.dwork.work);
+ int ret, delay = -1;
+
+ ret = ecache_work_evict_list(cnet);
+ switch (ret) {
+ case STATE_CONGESTED:
+ delay = ECACHE_RETRY_JIFFIES;
+ break;
+ case STATE_RESTART:
+ delay = 0;
+ break;
+ case STATE_DONE:
+ break;
}
- out:
- local_bh_enable();
-
- ctnet->ecache_dwork_pending = delay > 0;
if (delay >= 0)
- schedule_delayed_work(&cnet->ecache_dwork, delay);
+ schedule_delayed_work(&cnet->ecache.dwork, delay);
}
static int __nf_conntrack_eventmask_report(struct nf_conntrack_ecache *e,
@@ -199,7 +193,6 @@ int nf_conntrack_eventmask_report(unsigned int events, struct nf_conn *ct,
*/
if (e->portid == 0 && portid != 0)
e->portid = portid;
- e->state = NFCT_ECACHE_DESTROY_FAIL;
}
return ret;
@@ -293,16 +286,55 @@ void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state)
struct nf_conntrack_net *cnet = nf_ct_pernet(net);
if (state == NFCT_ECACHE_DESTROY_FAIL &&
- !delayed_work_pending(&cnet->ecache_dwork)) {
- schedule_delayed_work(&cnet->ecache_dwork, HZ);
+ !delayed_work_pending(&cnet->ecache.dwork)) {
+ schedule_delayed_work(&cnet->ecache.dwork, HZ);
net->ct.ecache_dwork_pending = true;
} else if (state == NFCT_ECACHE_DESTROY_SENT) {
- net->ct.ecache_dwork_pending = false;
- mod_delayed_work(system_wq, &cnet->ecache_dwork, 0);
+ if (!hlist_nulls_empty(&cnet->ecache.dying_list))
+ mod_delayed_work(system_wq, &cnet->ecache.dwork, 0);
+ else
+ net->ct.ecache_dwork_pending = false;
}
}
-#define NF_CT_EVENTS_DEFAULT 1
+bool nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp)
+{
+ struct net *net = nf_ct_net(ct);
+ struct nf_conntrack_ecache *e;
+
+ switch (net->ct.sysctl_events) {
+ case 0:
+ /* assignment via template / ruleset? ignore sysctl. */
+ if (ctmask || expmask)
+ break;
+ return true;
+ case 2: /* autodetect: no event listener, don't allocate extension. */
+ if (!READ_ONCE(net->ct.ctnetlink_has_listener))
+ return true;
+ fallthrough;
+ case 1:
+ /* always allocate an extension. */
+ if (!ctmask && !expmask) {
+ ctmask = ~0;
+ expmask = ~0;
+ }
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ return true;
+ }
+
+ e = nf_ct_ext_add(ct, NF_CT_EXT_ECACHE, gfp);
+ if (e) {
+ e->ctmask = ctmask;
+ e->expmask = expmask;
+ }
+
+ return e != NULL;
+}
+EXPORT_SYMBOL_GPL(nf_ct_ecache_ext_add);
+
+#define NF_CT_EVENTS_DEFAULT 2
static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT;
void nf_conntrack_ecache_pernet_init(struct net *net)
@@ -310,8 +342,10 @@ void nf_conntrack_ecache_pernet_init(struct net *net)
struct nf_conntrack_net *cnet = nf_ct_pernet(net);
net->ct.sysctl_events = nf_ct_events;
- cnet->ct_net = &net->ct;
- INIT_DELAYED_WORK(&cnet->ecache_dwork, ecache_work);
+
+ INIT_DELAYED_WORK(&cnet->ecache.dwork, ecache_work);
+ INIT_HLIST_NULLS_HEAD(&cnet->ecache.dying_list, DYING_NULLS_VAL);
+ spin_lock_init(&cnet->ecache.dying_lock);
BUILD_BUG_ON(__IPCT_MAX >= 16); /* e->ctmask is u16 */
}
@@ -320,5 +354,5 @@ void nf_conntrack_ecache_pernet_fini(struct net *net)
{
struct nf_conntrack_net *cnet = nf_ct_pernet(net);
- cancel_delayed_work_sync(&cnet->ecache_dwork);
+ cancel_delayed_work_sync(&cnet->ecache.dwork);
}
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index 1296fda54ac6..0b513f7bf9f3 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -27,6 +27,8 @@
#define NF_CT_EXT_PREALLOC 128u /* conntrack events are on by default */
+atomic_t nf_conntrack_ext_genid __read_mostly = ATOMIC_INIT(1);
+
static const u8 nf_ct_ext_type_len[NF_CT_EXT_NUM] = {
[NF_CT_EXT_HELPER] = sizeof(struct nf_conn_help),
#if IS_ENABLED(CONFIG_NF_NAT)
@@ -116,8 +118,10 @@ void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
if (!new)
return NULL;
- if (!ct->ext)
+ if (!ct->ext) {
memset(new->offset, 0, sizeof(new->offset));
+ new->gen_id = atomic_read(&nf_conntrack_ext_genid);
+ }
new->offset[id] = newoff;
new->len = newlen;
@@ -127,3 +131,29 @@ void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
return (void *)new + newoff;
}
EXPORT_SYMBOL(nf_ct_ext_add);
+
+/* Use nf_ct_ext_find wrapper. This is only useful for unconfirmed entries. */
+void *__nf_ct_ext_find(const struct nf_ct_ext *ext, u8 id)
+{
+ unsigned int gen_id = atomic_read(&nf_conntrack_ext_genid);
+ unsigned int this_id = READ_ONCE(ext->gen_id);
+
+ if (!__nf_ct_ext_exist(ext, id))
+ return NULL;
+
+ if (this_id == 0 || ext->gen_id == gen_id)
+ return (void *)ext + ext->offset[id];
+
+ return NULL;
+}
+EXPORT_SYMBOL(__nf_ct_ext_find);
+
+void nf_ct_ext_bump_genid(void)
+{
+ unsigned int value = atomic_inc_return(&nf_conntrack_ext_genid);
+
+ if (value == UINT_MAX)
+ atomic_set(&nf_conntrack_ext_genid, 1);
+
+ msleep(HZ);
+}
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 8dec42ec603e..c12a87ebc3ee 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -468,11 +468,6 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
nf_ct_expect_iterate_destroy(expect_iter_me, NULL);
nf_ct_iterate_destroy(unhelp, me);
-
- /* Maybe someone has gotten the helper already when unhelp above.
- * So need to wait it.
- */
- synchronize_rcu();
}
EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 1ea2ad732d57..722af5e309ba 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1559,6 +1559,11 @@ static int ctnetlink_flush_conntrack(struct net *net,
u32 portid, int report, u8 family)
{
struct ctnetlink_filter *filter = NULL;
+ struct nf_ct_iter_data iter = {
+ .net = net,
+ .portid = portid,
+ .report = report,
+ };
if (ctnetlink_needs_filter(family, cda)) {
if (cda[CTA_FILTER])
@@ -1567,10 +1572,11 @@ static int ctnetlink_flush_conntrack(struct net *net,
filter = ctnetlink_alloc_filter(cda, family);
if (IS_ERR(filter))
return PTR_ERR(filter);
+
+ iter.data = filter;
}
- nf_ct_iterate_cleanup_net(net, ctnetlink_flush_iterate, filter,
- portid, report);
+ nf_ct_iterate_cleanup_net(ctnetlink_flush_iterate, &iter);
kfree(filter);
return 0;
@@ -1708,85 +1714,103 @@ static int ctnetlink_done_list(struct netlink_callback *cb)
return 0;
}
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+static int ctnetlink_dump_one_entry(struct sk_buff *skb,
+ struct netlink_callback *cb,
+ struct nf_conn *ct,
+ bool dying)
+{
+ struct ctnetlink_list_dump_ctx *ctx = (void *)cb->ctx;
+ struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+ u8 l3proto = nfmsg->nfgen_family;
+ int res;
+
+ if (l3proto && nf_ct_l3num(ct) != l3proto)
+ return 0;
+
+ if (ctx->last) {
+ if (ct != ctx->last)
+ return 0;
+
+ ctx->last = NULL;
+ }
+
+ /* We can't dump extension info for the unconfirmed
+ * list because unconfirmed conntracks can have
+ * ct->ext reallocated (and thus freed).
+ *
+ * In the dying list case ct->ext can't be free'd
+ * until after we drop pcpu->lock.
+ */
+ res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
+ ct, dying, 0);
+ if (res < 0) {
+ if (!refcount_inc_not_zero(&ct->ct_general.use))
+ return 0;
+
+ ctx->last = ct;
+ }
+
+ return res;
+}
+#endif
+
static int
-ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying)
+ctnetlink_dump_unconfirmed(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ return 0;
+}
+
+static int
+ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb)
{
struct ctnetlink_list_dump_ctx *ctx = (void *)cb->ctx;
- struct nf_conn *ct, *last;
+ struct nf_conn *last = ctx->last;
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+ const struct net *net = sock_net(skb->sk);
+ struct nf_conntrack_net_ecache *ecache_net;
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_node *n;
- struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
- u_int8_t l3proto = nfmsg->nfgen_family;
- int res;
- int cpu;
- struct hlist_nulls_head *list;
- struct net *net = sock_net(skb->sk);
+#endif
if (ctx->done)
return 0;
- last = ctx->last;
+ ctx->last = NULL;
- for (cpu = ctx->cpu; cpu < nr_cpu_ids; cpu++) {
- struct ct_pcpu *pcpu;
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+ ecache_net = nf_conn_pernet_ecache(net);
+ spin_lock_bh(&ecache_net->dying_lock);
- if (!cpu_possible(cpu))
- continue;
+ hlist_nulls_for_each_entry(h, n, &ecache_net->dying_list, hnnode) {
+ struct nf_conn *ct;
+ int res;
- pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
- spin_lock_bh(&pcpu->lock);
- list = dying ? &pcpu->dying : &pcpu->unconfirmed;
-restart:
- hlist_nulls_for_each_entry(h, n, list, hnnode) {
- ct = nf_ct_tuplehash_to_ctrack(h);
- if (l3proto && nf_ct_l3num(ct) != l3proto)
- continue;
- if (ctx->last) {
- if (ct != last)
- continue;
- ctx->last = NULL;
- }
+ ct = nf_ct_tuplehash_to_ctrack(h);
+ if (last && last != ct)
+ continue;
- /* We can't dump extension info for the unconfirmed
- * list because unconfirmed conntracks can have
- * ct->ext reallocated (and thus freed).
- *
- * In the dying list case ct->ext can't be free'd
- * until after we drop pcpu->lock.
- */
- res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
- ct, dying, 0);
- if (res < 0) {
- if (!refcount_inc_not_zero(&ct->ct_general.use))
- continue;
- ctx->cpu = cpu;
- ctx->last = ct;
- spin_unlock_bh(&pcpu->lock);
- goto out;
- }
- }
- if (ctx->last) {
- ctx->last = NULL;
- goto restart;
+ res = ctnetlink_dump_one_entry(skb, cb, ct, true);
+ if (res < 0) {
+ spin_unlock_bh(&ecache_net->dying_lock);
+ nf_ct_put(last);
+ return skb->len;
}
- spin_unlock_bh(&pcpu->lock);
+
+ nf_ct_put(last);
+ last = NULL;
}
+
+ spin_unlock_bh(&ecache_net->dying_lock);
+#endif
ctx->done = true;
-out:
- if (last)
- nf_ct_put(last);
+ nf_ct_put(last);
return skb->len;
}
-static int
-ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb)
-{
- return ctnetlink_dump_list(skb, cb, true);
-}
-
static int ctnetlink_get_ct_dying(struct sk_buff *skb,
const struct nfnl_info *info,
const struct nlattr * const cda[])
@@ -1802,12 +1826,6 @@ static int ctnetlink_get_ct_dying(struct sk_buff *skb,
return -EOPNOTSUPP;
}
-static int
-ctnetlink_dump_unconfirmed(struct sk_buff *skb, struct netlink_callback *cb)
-{
- return ctnetlink_dump_list(skb, cb, false);
-}
-
static int ctnetlink_get_ct_unconfirmed(struct sk_buff *skb,
const struct nfnl_info *info,
const struct nlattr * const cda[])
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index d1f2d3c8d2b1..895b09cbd7cf 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -538,9 +538,13 @@ retry:
out_unlock:
mutex_unlock(&nf_ct_proto_mutex);
- if (fixup_needed)
- nf_ct_iterate_cleanup_net(net, nf_ct_tcp_fixup,
- (void *)(unsigned long)nfproto, 0, 0);
+ if (fixup_needed) {
+ struct nf_ct_iter_data iter_data = {
+ .net = net,
+ .data = (void *)(unsigned long)nfproto,
+ };
+ nf_ct_iterate_cleanup_net(nf_ct_tcp_fixup, &iter_data);
+ }
return err;
}
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 204a5cdff5b1..a63b51dceaf2 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -485,7 +485,6 @@ static bool tcp_in_window(struct nf_conn *ct,
struct nf_tcp_net *tn = nf_tcp_pernet(net);
struct ip_ct_tcp_state *sender = &state->seen[dir];
struct ip_ct_tcp_state *receiver = &state->seen[!dir];
- const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
__u32 seq, ack, sack, end, win, swin;
u16 win_raw;
s32 receiver_offset;
@@ -508,18 +507,6 @@ static bool tcp_in_window(struct nf_conn *ct,
ack -= receiver_offset;
sack -= receiver_offset;
- pr_debug("tcp_in_window: START\n");
- pr_debug("tcp_in_window: ");
- nf_ct_dump_tuple(tuple);
- pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
- seq, ack, receiver_offset, sack, receiver_offset, win, end);
- pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
- "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
- sender->td_end, sender->td_maxend, sender->td_maxwin,
- sender->td_scale,
- receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
- receiver->td_scale);
-
if (sender->td_maxwin == 0) {
/*
* Initialize sender data.
@@ -597,27 +584,10 @@ static bool tcp_in_window(struct nf_conn *ct,
*/
seq = end = sender->td_end;
- pr_debug("tcp_in_window: ");
- nf_ct_dump_tuple(tuple);
- pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
- seq, ack, receiver_offset, sack, receiver_offset, win, end);
- pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
- "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
- sender->td_end, sender->td_maxend, sender->td_maxwin,
- sender->td_scale,
- receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
- receiver->td_scale);
-
/* Is the ending sequence in the receive window (if available)? */
in_recv_win = !receiver->td_maxwin ||
after(end, sender->td_end - receiver->td_maxwin - 1);
- pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
- before(seq, sender->td_maxend + 1),
- (in_recv_win ? 1 : 0),
- before(sack, receiver->td_end + 1),
- after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1));
-
if (before(seq, sender->td_maxend + 1) &&
in_recv_win &&
before(sack, receiver->td_end + 1) &&
@@ -698,11 +668,6 @@ static bool tcp_in_window(struct nf_conn *ct,
}
}
- pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u "
- "receiver end=%u maxend=%u maxwin=%u\n",
- res, sender->td_end, sender->td_maxend, sender->td_maxwin,
- receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
-
return res;
}
@@ -772,8 +737,6 @@ static noinline bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
enum tcp_conntrack new_state;
struct net *net = nf_ct_net(ct);
const struct nf_tcp_net *tn = nf_tcp_pernet(net);
- const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0];
- const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1];
/* Don't need lock here: this conntrack not in circulation yet */
new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE];
@@ -826,14 +789,6 @@ static noinline bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
/* tcp_packet will set them */
ct->proto.tcp.last_index = TCP_NONE_SET;
-
- pr_debug("%s: sender end=%u maxend=%u maxwin=%u scale=%i "
- "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
- __func__,
- sender->td_end, sender->td_maxend, sender->td_maxwin,
- sender->td_scale,
- receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
- receiver->td_scale);
return true;
}
@@ -1032,10 +987,11 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
}
/* Invalid packet */
- pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
- dir, get_conntrack_index(th), old_state);
spin_unlock_bh(&ct->lock);
- nf_ct_l4proto_log_invalid(skb, ct, state, "invalid state");
+ nf_ct_l4proto_log_invalid(skb, ct, state,
+ "packet (index %d) in dir %d invalid, state %s",
+ index, dir,
+ tcp_conntrack_names[old_state]);
return -NF_ACCEPT;
case TCP_CONNTRACK_TIME_WAIT:
/* RFC5961 compliance cause stack to send "challenge-ACK"
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 55aa55b252b2..6ad7bbc90d38 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -693,7 +693,7 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dou8vec_minmax,
.extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
+ .extra2 = SYSCTL_TWO,
},
#endif
#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
diff --git a/net/netfilter/nf_conntrack_timeout.c b/net/netfilter/nf_conntrack_timeout.c
index cec166ecba77..0f828d05ea60 100644
--- a/net/netfilter/nf_conntrack_timeout.c
+++ b/net/netfilter/nf_conntrack_timeout.c
@@ -38,7 +38,12 @@ static int untimeout(struct nf_conn *ct, void *timeout)
void nf_ct_untimeout(struct net *net, struct nf_ct_timeout *timeout)
{
- nf_ct_iterate_cleanup_net(net, untimeout, timeout, 0, 0);
+ struct nf_ct_iter_data iter_data = {
+ .net = net,
+ .data = timeout,
+ };
+
+ nf_ct_iterate_cleanup_net(untimeout, &iter_data);
}
EXPORT_SYMBOL_GPL(nf_ct_untimeout);
diff --git a/net/netfilter/nf_log_syslog.c b/net/netfilter/nf_log_syslog.c
index 13234641cdb3..77bcb10fc586 100644
--- a/net/netfilter/nf_log_syslog.c
+++ b/net/netfilter/nf_log_syslog.c
@@ -40,6 +40,12 @@ struct arppayload {
unsigned char ip_dst[4];
};
+/* Guard against containers flooding syslog. */
+static bool nf_log_allowed(const struct net *net)
+{
+ return net_eq(net, &init_net) || sysctl_nf_log_all_netns;
+}
+
static void nf_log_dump_vlan(struct nf_log_buf *m, const struct sk_buff *skb)
{
u16 vid;
@@ -133,8 +139,7 @@ static void nf_log_arp_packet(struct net *net, u_int8_t pf,
{
struct nf_log_buf *m;
- /* FIXME: Disabled from containers until syslog ns is supported */
- if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns)
+ if (!nf_log_allowed(net))
return;
m = nf_log_buf_open();
@@ -766,9 +771,9 @@ dump_ipv6_packet(struct net *net, struct nf_log_buf *m,
nf_log_buf_add(m, "MARK=0x%x ", skb->mark);
}
-static void dump_ipv4_mac_header(struct nf_log_buf *m,
- const struct nf_loginfo *info,
- const struct sk_buff *skb)
+static void dump_mac_header(struct nf_log_buf *m,
+ const struct nf_loginfo *info,
+ const struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
unsigned int logflags = 0;
@@ -798,9 +803,26 @@ fallback:
const unsigned char *p = skb_mac_header(skb);
unsigned int i;
- nf_log_buf_add(m, "%02x", *p++);
- for (i = 1; i < dev->hard_header_len; i++, p++)
- nf_log_buf_add(m, ":%02x", *p);
+ if (dev->type == ARPHRD_SIT) {
+ p -= ETH_HLEN;
+
+ if (p < skb->head)
+ p = NULL;
+ }
+
+ if (p) {
+ nf_log_buf_add(m, "%02x", *p++);
+ for (i = 1; i < dev->hard_header_len; i++)
+ nf_log_buf_add(m, ":%02x", *p++);
+ }
+
+ if (dev->type == ARPHRD_SIT) {
+ const struct iphdr *iph =
+ (struct iphdr *)skb_mac_header(skb);
+
+ nf_log_buf_add(m, " TUNNEL=%pI4->%pI4", &iph->saddr,
+ &iph->daddr);
+ }
}
nf_log_buf_add(m, " ");
}
@@ -814,8 +836,7 @@ static void nf_log_ip_packet(struct net *net, u_int8_t pf,
{
struct nf_log_buf *m;
- /* FIXME: Disabled from containers until syslog ns is supported */
- if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns)
+ if (!nf_log_allowed(net))
return;
m = nf_log_buf_open();
@@ -827,7 +848,7 @@ static void nf_log_ip_packet(struct net *net, u_int8_t pf,
out, loginfo, prefix);
if (in)
- dump_ipv4_mac_header(m, loginfo, skb);
+ dump_mac_header(m, loginfo, skb);
dump_ipv4_packet(net, m, loginfo, skb, 0);
@@ -841,64 +862,6 @@ static struct nf_logger nf_ip_logger __read_mostly = {
.me = THIS_MODULE,
};
-static void dump_ipv6_mac_header(struct nf_log_buf *m,
- const struct nf_loginfo *info,
- const struct sk_buff *skb)
-{
- struct net_device *dev = skb->dev;
- unsigned int logflags = 0;
-
- if (info->type == NF_LOG_TYPE_LOG)
- logflags = info->u.log.logflags;
-
- if (!(logflags & NF_LOG_MACDECODE))
- goto fallback;
-
- switch (dev->type) {
- case ARPHRD_ETHER:
- nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ",
- eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest);
- nf_log_dump_vlan(m, skb);
- nf_log_buf_add(m, "MACPROTO=%04x ",
- ntohs(eth_hdr(skb)->h_proto));
- return;
- default:
- break;
- }
-
-fallback:
- nf_log_buf_add(m, "MAC=");
- if (dev->hard_header_len &&
- skb->mac_header != skb->network_header) {
- const unsigned char *p = skb_mac_header(skb);
- unsigned int len = dev->hard_header_len;
- unsigned int i;
-
- if (dev->type == ARPHRD_SIT) {
- p -= ETH_HLEN;
-
- if (p < skb->head)
- p = NULL;
- }
-
- if (p) {
- nf_log_buf_add(m, "%02x", *p++);
- for (i = 1; i < len; i++)
- nf_log_buf_add(m, ":%02x", *p++);
- }
- nf_log_buf_add(m, " ");
-
- if (dev->type == ARPHRD_SIT) {
- const struct iphdr *iph =
- (struct iphdr *)skb_mac_header(skb);
- nf_log_buf_add(m, "TUNNEL=%pI4->%pI4 ", &iph->saddr,
- &iph->daddr);
- }
- } else {
- nf_log_buf_add(m, " ");
- }
-}
-
static void nf_log_ip6_packet(struct net *net, u_int8_t pf,
unsigned int hooknum, const struct sk_buff *skb,
const struct net_device *in,
@@ -908,8 +871,7 @@ static void nf_log_ip6_packet(struct net *net, u_int8_t pf,
{
struct nf_log_buf *m;
- /* FIXME: Disabled from containers until syslog ns is supported */
- if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns)
+ if (!nf_log_allowed(net))
return;
m = nf_log_buf_open();
@@ -921,7 +883,7 @@ static void nf_log_ip6_packet(struct net *net, u_int8_t pf,
loginfo, prefix);
if (in)
- dump_ipv6_mac_header(m, loginfo, skb);
+ dump_mac_header(m, loginfo, skb);
dump_ipv6_packet(net, m, loginfo, skb, skb_network_offset(skb), 1);
@@ -935,6 +897,32 @@ static struct nf_logger nf_ip6_logger __read_mostly = {
.me = THIS_MODULE,
};
+static void nf_log_unknown_packet(struct net *net, u_int8_t pf,
+ unsigned int hooknum,
+ const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const struct nf_loginfo *loginfo,
+ const char *prefix)
+{
+ struct nf_log_buf *m;
+
+ if (!nf_log_allowed(net))
+ return;
+
+ m = nf_log_buf_open();
+
+ if (!loginfo)
+ loginfo = &default_loginfo;
+
+ nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, loginfo,
+ prefix);
+
+ dump_mac_header(m, loginfo, skb);
+
+ nf_log_buf_close(m);
+}
+
static void nf_log_netdev_packet(struct net *net, u_int8_t pf,
unsigned int hooknum,
const struct sk_buff *skb,
@@ -954,6 +942,10 @@ static void nf_log_netdev_packet(struct net *net, u_int8_t pf,
case htons(ETH_P_RARP):
nf_log_arp_packet(net, pf, hooknum, skb, in, out, loginfo, prefix);
break;
+ default:
+ nf_log_unknown_packet(net, pf, hooknum, skb,
+ in, out, loginfo, prefix);
+ break;
}
}
diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c
index e32fac374608..1a506b0c6511 100644
--- a/net/netfilter/nf_nat_masquerade.c
+++ b/net/netfilter/nf_nat_masquerade.c
@@ -77,11 +77,14 @@ EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4);
static void iterate_cleanup_work(struct work_struct *work)
{
+ struct nf_ct_iter_data iter_data = {};
struct masq_dev_work *w;
w = container_of(work, struct masq_dev_work, work);
- nf_ct_iterate_cleanup_net(w->net, w->iter, (void *)w, 0, 0);
+ iter_data.net = w->net;
+ iter_data.data = (void *)w;
+ nf_ct_iterate_cleanup_net(w->iter, &iter_data);
put_net_track(w->net, &w->ns_tracker);
kfree(w);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index a096b9fbbbdf..12fc9cda4a2c 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -8358,10 +8358,8 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha
if (chain->blob_next || !nft_is_active_next(net, chain))
return 0;
- rule = list_entry(&chain->rules, struct nft_rule, list);
-
data_size = 0;
- list_for_each_entry_continue(rule, &chain->rules, list) {
+ list_for_each_entry(rule, &chain->rules, list) {
if (nft_is_active_next(net, rule)) {
data_size += sizeof(*prule) + rule->dlen;
if (data_size > INT_MAX)
@@ -8378,7 +8376,7 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha
data_boundary = data + data_size;
size = 0;
- list_for_each_entry_continue(rule, &chain->rules, list) {
+ list_for_each_entry(rule, &chain->rules, list) {
if (!nft_is_active_next(net, rule))
continue;
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 7e2c8dd01408..ad3bbe34ca88 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -45,6 +45,7 @@ MODULE_DESCRIPTION("Netfilter messages via netlink socket");
static unsigned int nfnetlink_pernet_id __read_mostly;
struct nfnl_net {
+ unsigned int ctnetlink_listeners;
struct sock *nfnl;
};
@@ -654,7 +655,6 @@ static void nfnetlink_rcv(struct sk_buff *skb)
netlink_rcv_skb(skb, nfnetlink_rcv_msg);
}
-#ifdef CONFIG_MODULES
static int nfnetlink_bind(struct net *net, int group)
{
const struct nfnetlink_subsystem *ss;
@@ -670,9 +670,44 @@ static int nfnetlink_bind(struct net *net, int group)
rcu_read_unlock();
if (!ss)
request_module_nowait("nfnetlink-subsys-%d", type);
+
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+ if (type == NFNL_SUBSYS_CTNETLINK) {
+ struct nfnl_net *nfnlnet = nfnl_pernet(net);
+
+ nfnl_lock(NFNL_SUBSYS_CTNETLINK);
+
+ if (WARN_ON_ONCE(nfnlnet->ctnetlink_listeners == UINT_MAX)) {
+ nfnl_unlock(NFNL_SUBSYS_CTNETLINK);
+ return -EOVERFLOW;
+ }
+
+ nfnlnet->ctnetlink_listeners++;
+ if (nfnlnet->ctnetlink_listeners == 1)
+ WRITE_ONCE(net->ct.ctnetlink_has_listener, true);
+ nfnl_unlock(NFNL_SUBSYS_CTNETLINK);
+ }
+#endif
return 0;
}
+
+static void nfnetlink_unbind(struct net *net, int group)
+{
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+ int type = nfnl_group2type[group];
+
+ if (type == NFNL_SUBSYS_CTNETLINK) {
+ struct nfnl_net *nfnlnet = nfnl_pernet(net);
+
+ nfnl_lock(NFNL_SUBSYS_CTNETLINK);
+ WARN_ON_ONCE(nfnlnet->ctnetlink_listeners == 0);
+ nfnlnet->ctnetlink_listeners--;
+ if (nfnlnet->ctnetlink_listeners == 0)
+ WRITE_ONCE(net->ct.ctnetlink_has_listener, false);
+ nfnl_unlock(NFNL_SUBSYS_CTNETLINK);
+ }
#endif
+}
static int __net_init nfnetlink_net_init(struct net *net)
{
@@ -680,9 +715,8 @@ static int __net_init nfnetlink_net_init(struct net *net)
struct netlink_kernel_cfg cfg = {
.groups = NFNLGRP_MAX,
.input = nfnetlink_rcv,
-#ifdef CONFIG_MODULES
.bind = nfnetlink_bind,
-#endif
+ .unbind = nfnetlink_unbind,
};
nfnlnet->nfnl = netlink_kernel_create(net, NETLINK_NETFILTER, &cfg);
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index b0d8888a539b..f069c24c6146 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -33,8 +33,19 @@
static unsigned int nfct_timeout_id __read_mostly;
+struct ctnl_timeout {
+ struct list_head head;
+ struct rcu_head rcu_head;
+ refcount_t refcnt;
+ char name[CTNL_TIMEOUT_NAME_MAX];
+ struct nf_ct_timeout timeout;
+
+ struct list_head free_head;
+};
+
struct nfct_timeout_pernet {
struct list_head nfct_timeout_list;
+ struct list_head nfct_timeout_freelist;
};
MODULE_LICENSE("GPL");
@@ -158,6 +169,7 @@ static int cttimeout_new_timeout(struct sk_buff *skb,
timeout->timeout.l3num = l3num;
timeout->timeout.l4proto = l4proto;
refcount_set(&timeout->refcnt, 1);
+ __module_get(THIS_MODULE);
list_add_tail_rcu(&timeout->head, &pernet->nfct_timeout_list);
return 0;
@@ -506,13 +518,8 @@ static struct nf_ct_timeout *ctnl_timeout_find_get(struct net *net,
if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
continue;
- if (!try_module_get(THIS_MODULE))
- goto err;
-
- if (!refcount_inc_not_zero(&timeout->refcnt)) {
- module_put(THIS_MODULE);
+ if (!refcount_inc_not_zero(&timeout->refcnt))
goto err;
- }
matching = timeout;
break;
}
@@ -525,10 +532,10 @@ static void ctnl_timeout_put(struct nf_ct_timeout *t)
struct ctnl_timeout *timeout =
container_of(t, struct ctnl_timeout, timeout);
- if (refcount_dec_and_test(&timeout->refcnt))
+ if (refcount_dec_and_test(&timeout->refcnt)) {
kfree_rcu(timeout, rcu_head);
-
- module_put(THIS_MODULE);
+ module_put(THIS_MODULE);
+ }
}
static const struct nfnl_callback cttimeout_cb[IPCTNL_MSG_TIMEOUT_MAX] = {
@@ -578,20 +585,36 @@ static int __net_init cttimeout_net_init(struct net *net)
struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(net);
INIT_LIST_HEAD(&pernet->nfct_timeout_list);
+ INIT_LIST_HEAD(&pernet->nfct_timeout_freelist);
return 0;
}
+static void __net_exit cttimeout_net_pre_exit(struct net *net)
+{
+ struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(net);
+ struct ctnl_timeout *cur, *tmp;
+
+ list_for_each_entry_safe(cur, tmp, &pernet->nfct_timeout_list, head) {
+ list_del_rcu(&cur->head);
+ list_add(&cur->free_head, &pernet->nfct_timeout_freelist);
+ }
+
+ /* core calls synchronize_rcu() after this */
+}
+
static void __net_exit cttimeout_net_exit(struct net *net)
{
struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(net);
struct ctnl_timeout *cur, *tmp;
- nf_ct_unconfirmed_destroy(net);
+ if (list_empty(&pernet->nfct_timeout_freelist))
+ return;
+
nf_ct_untimeout(net, NULL);
- list_for_each_entry_safe(cur, tmp, &pernet->nfct_timeout_list, head) {
- list_del_rcu(&cur->head);
+ list_for_each_entry_safe(cur, tmp, &pernet->nfct_timeout_freelist, head) {
+ list_del(&cur->free_head);
if (refcount_dec_and_test(&cur->refcnt))
kfree_rcu(cur, rcu_head);
@@ -600,6 +623,7 @@ static void __net_exit cttimeout_net_exit(struct net *net)
static struct pernet_operations cttimeout_ops = {
.init = cttimeout_net_init,
+ .pre_exit = cttimeout_net_pre_exit,
.exit = cttimeout_net_exit,
.id = &nfct_timeout_id,
.size = sizeof(struct nfct_timeout_pernet),
@@ -632,13 +656,24 @@ err_out:
return ret;
}
+static int untimeout(struct nf_conn *ct, void *timeout)
+{
+ struct nf_conn_timeout *timeout_ext = nf_ct_timeout_find(ct);
+
+ if (timeout_ext)
+ RCU_INIT_POINTER(timeout_ext->timeout, NULL);
+
+ return 0;
+}
+
static void __exit cttimeout_exit(void)
{
nfnetlink_subsys_unregister(&cttimeout_subsys);
unregister_pernet_subsys(&cttimeout_ops);
RCU_INIT_POINTER(nf_ct_timeout_hook, NULL);
- synchronize_rcu();
+
+ nf_ct_iterate_destroy(untimeout, NULL);
}
module_init(cttimeout_init);
diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
index f590ee1c8a1b..83590afe3768 100644
--- a/net/netfilter/nft_bitwise.c
+++ b/net/netfilter/nft_bitwise.c
@@ -30,7 +30,7 @@ static void nft_bitwise_eval_bool(u32 *dst, const u32 *src,
{
unsigned int i;
- for (i = 0; i < DIV_ROUND_UP(priv->len, 4); i++)
+ for (i = 0; i < DIV_ROUND_UP(priv->len, sizeof(u32)); i++)
dst[i] = (src[i] & priv->mask.data[i]) ^ priv->xor.data[i];
}
@@ -109,22 +109,23 @@ static int nft_bitwise_init_bool(struct nft_bitwise *priv,
return err;
if (mask.type != NFT_DATA_VALUE || mask.len != priv->len) {
err = -EINVAL;
- goto err1;
+ goto err_mask_release;
}
err = nft_data_init(NULL, &priv->xor, sizeof(priv->xor), &xor,
tb[NFTA_BITWISE_XOR]);
if (err < 0)
- goto err1;
+ goto err_mask_release;
if (xor.type != NFT_DATA_VALUE || xor.len != priv->len) {
err = -EINVAL;
- goto err2;
+ goto err_xor_release;
}
return 0;
-err2:
+
+err_xor_release:
nft_data_release(&priv->xor, xor.type);
-err1:
+err_mask_release:
nft_data_release(&priv->mask, mask.type);
return err;
}
diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c
index f198f2d9ef90..1f12d7ade606 100644
--- a/net/netfilter/nft_fib.c
+++ b/net/netfilter/nft_fib.c
@@ -35,6 +35,10 @@ int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
case NFT_FIB_RESULT_OIF:
case NFT_FIB_RESULT_OIFNAME:
hooks = (1 << NF_INET_PRE_ROUTING);
+ if (priv->flags & NFTA_FIB_F_IIF) {
+ hooks |= (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_FORWARD);
+ }
break;
case NFT_FIB_RESULT_ADDRTYPE:
if (priv->flags & NFTA_FIB_F_IIF)
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index 6f0b07fe648d..a16cf47199b7 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -232,11 +232,19 @@ static int nft_flow_route(const struct nft_pktinfo *pkt,
switch (nft_pf(pkt)) {
case NFPROTO_IPV4:
fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip;
+ fl.u.ip4.saddr = ct->tuplehash[dir].tuple.dst.u3.ip;
fl.u.ip4.flowi4_oif = nft_in(pkt)->ifindex;
+ fl.u.ip4.flowi4_iif = this_dst->dev->ifindex;
+ fl.u.ip4.flowi4_tos = RT_TOS(ip_hdr(pkt->skb)->tos);
+ fl.u.ip4.flowi4_mark = pkt->skb->mark;
break;
case NFPROTO_IPV6:
fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6;
+ fl.u.ip6.saddr = ct->tuplehash[dir].tuple.dst.u3.in6;
fl.u.ip6.flowi6_oif = nft_in(pkt)->ifindex;
+ fl.u.ip6.flowi6_iif = this_dst->dev->ifindex;
+ fl.u.ip6.flowlabel = ip6_flowinfo(ipv6_hdr(pkt->skb));
+ fl.u.ip6.flowi6_mark = pkt->skb->mark;
break;
}
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 73e9c0a9c187..0cd91f813a3b 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1931,7 +1931,6 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
struct scm_cookie scm;
struct sock *sk = sock->sk;
struct netlink_sock *nlk = nlk_sk(sk);
- int noblock = flags & MSG_DONTWAIT;
size_t copied;
struct sk_buff *skb, *data_skb;
int err, ret;
@@ -1941,7 +1940,7 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
copied = 0;
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (skb == NULL)
goto out;
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index fa9dc2ba3941..6f7f4392cffb 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -1159,7 +1159,8 @@ static int nr_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
}
/* Now we can treat all alike */
- if ((skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &er)) == NULL) {
+ skb = skb_recv_datagram(sk, flags, &er);
+ if (!skb) {
release_sock(sk);
return er;
}
diff --git a/net/nfc/core.c b/net/nfc/core.c
index 5b286e1e0a6f..6ff3e10ff8e3 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -1166,6 +1166,7 @@ void nfc_unregister_device(struct nfc_dev *dev)
if (dev->rfkill) {
rfkill_unregister(dev->rfkill);
rfkill_destroy(dev->rfkill);
+ dev->rfkill = NULL;
}
dev->shutting_down = true;
device_unlock(&dev->dev);
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index 4ca35791c93b..77642d18a3b4 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -821,7 +821,6 @@ static int llcp_sock_sendmsg(struct socket *sock, struct msghdr *msg,
static int llcp_sock_recvmsg(struct socket *sock, struct msghdr *msg,
size_t len, int flags)
{
- int noblock = flags & MSG_DONTWAIT;
struct sock *sk = sock->sk;
unsigned int copied, rlen;
struct sk_buff *skb, *cskb;
@@ -842,7 +841,7 @@ static int llcp_sock_recvmsg(struct socket *sock, struct msghdr *msg,
if (flags & (MSG_OOB))
return -EOPNOTSUPP;
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (!skb) {
pr_err("Recv datagram failed state %d %d %d",
sk->sk_state, err, sock_error(sk));
diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c
index 0ca214ab5aef..8dd569765f96 100644
--- a/net/nfc/rawsock.c
+++ b/net/nfc/rawsock.c
@@ -238,7 +238,6 @@ static int rawsock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
static int rawsock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
int flags)
{
- int noblock = flags & MSG_DONTWAIT;
struct sock *sk = sock->sk;
struct sk_buff *skb;
int copied;
@@ -246,7 +245,7 @@ static int rawsock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
pr_debug("sock=%p sk=%p len=%zu flags=%d\n", sock, sk, len, flags);
- skb = skb_recv_datagram(sk, flags, noblock, &rc);
+ skb = skb_recv_datagram(sk, flags, &rc);
if (!skb)
return rc;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 002d2b9c69dd..677f9cfa9660 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1924,12 +1924,20 @@ oom:
static void packet_parse_headers(struct sk_buff *skb, struct socket *sock)
{
+ int depth;
+
if ((!skb->protocol || skb->protocol == htons(ETH_P_ALL)) &&
sock->type == SOCK_RAW) {
skb_reset_mac_header(skb);
skb->protocol = dev_parse_header_protocol(skb);
}
+ /* Move network header to the right position for VLAN tagged packets */
+ if (likely(skb->dev->type == ARPHRD_ETHER) &&
+ eth_type_vlan(skb->protocol) &&
+ __vlan_get_protocol(skb, skb->protocol, &depth) != 0)
+ skb_set_network_header(skb, depth);
+
skb_probe_transport_header(skb);
}
@@ -3047,6 +3055,11 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
skb->mark = sockc.mark;
skb->tstamp = sockc.transmit_time;
+ if (unlikely(extra_len == 4))
+ skb->no_fcs = 1;
+
+ packet_parse_headers(skb, sock);
+
if (has_vnet_hdr) {
err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le());
if (err)
@@ -3055,11 +3068,6 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
virtio_net_hdr_set_proto(skb, &vnet_hdr);
}
- packet_parse_headers(skb, sock);
-
- if (unlikely(extra_len == 4))
- skb->no_fcs = 1;
-
err = po->xmit(skb);
if (unlikely(err != 0)) {
if (err > 0)
@@ -3426,7 +3434,7 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
* but then it will block.
*/
- skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
/*
* An error occurred so return it. Because skb_recv_datagram()
@@ -3469,7 +3477,7 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
sll->sll_protocol = skb->protocol;
}
- sock_recv_ts_and_drops(msg, sk, skb);
+ sock_recv_cmsgs(msg, sk, skb);
if (msg->msg_name) {
const size_t max_len = min(sizeof(skb->cb),
diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c
index 393e6aa7a592..ff5f49ab236e 100644
--- a/net/phonet/datagram.c
+++ b/net/phonet/datagram.c
@@ -112,7 +112,7 @@ static int pn_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
}
static int pn_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int noblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
struct sk_buff *skb = NULL;
struct sockaddr_pn sa;
@@ -123,7 +123,7 @@ static int pn_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
MSG_CMSG_COMPAT))
goto out_nofree;
- skb = skb_recv_datagram(sk, flags, noblock, &rval);
+ skb = skb_recv_datagram(sk, flags, &rval);
if (skb == NULL)
goto out_nofree;
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 65d463ad8770..83ea13a50690 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -772,7 +772,8 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp,
u8 pipe_handle, enabled, n_sb;
u8 aligned = 0;
- skb = skb_recv_datagram(sk, 0, flags & O_NONBLOCK, errp);
+ skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0,
+ errp);
if (!skb)
return NULL;
@@ -1238,7 +1239,7 @@ struct sk_buff *pep_read(struct sock *sk)
}
static int pep_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int noblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
struct sk_buff *skb;
int err;
@@ -1267,7 +1268,7 @@ static int pep_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
return -EINVAL;
}
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
lock_sock(sk);
if (skb == NULL) {
if (err == -ENOTCONN && sk->sk_state == TCP_CLOSE_WAIT)
diff --git a/net/qrtr/af_qrtr.c b/net/qrtr/af_qrtr.c
index ec2322529727..5c2fb992803b 100644
--- a/net/qrtr/af_qrtr.c
+++ b/net/qrtr/af_qrtr.c
@@ -1035,8 +1035,7 @@ static int qrtr_recvmsg(struct socket *sock, struct msghdr *msg,
return -EADDRNOTAVAIL;
}
- skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
- flags & MSG_DONTWAIT, &rc);
+ skb = skb_recv_datagram(sk, flags, &rc);
if (!skb) {
release_sock(sk);
return rc;
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 30a1cf4c16c6..bf2d986a6bc3 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -1230,7 +1230,8 @@ static int rose_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
return -ENOTCONN;
/* Now we can treat all alike */
- if ((skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &er)) == NULL)
+ skb = skb_recv_datagram(sk, flags, &er);
+ if (!skb)
return er;
qbit = (skb->data[0] & ROSE_Q_BIT) == ROSE_Q_BIT;
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index e2e6b6b78578..fee6409c2bb3 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -1128,22 +1128,15 @@ static int rose_node_show(struct seq_file *seq, void *v)
seq_puts(seq, "address mask n neigh neigh neigh\n");
else {
const struct rose_node *rose_node = v;
- /* if (rose_node->loopback) {
- seq_printf(seq, "%-10s %04d 1 loopback\n",
- rose2asc(rsbuf, &rose_node->address),
- rose_node->mask);
- } else { */
- seq_printf(seq, "%-10s %04d %d",
- rose2asc(rsbuf, &rose_node->address),
- rose_node->mask,
- rose_node->count);
-
- for (i = 0; i < rose_node->count; i++)
- seq_printf(seq, " %05d",
- rose_node->neighbour[i]->number);
-
- seq_puts(seq, "\n");
- /* } */
+ seq_printf(seq, "%-10s %04d %d",
+ rose2asc(rsbuf, &rose_node->address),
+ rose_node->mask,
+ rose_node->count);
+
+ for (i = 0; i < rose_node->count; i++)
+ seq_printf(seq, " %05d", rose_node->neighbour[i]->number);
+
+ seq_puts(seq, "\n");
}
return 0;
}
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 2b5f89713e36..ceba28e9dce6 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -351,7 +351,7 @@ static void rxrpc_dummy_notify_rx(struct sock *sk, struct rxrpc_call *rxcall,
*/
void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call)
{
- _enter("%d{%d}", call->debug_id, atomic_read(&call->usage));
+ _enter("%d{%d}", call->debug_id, refcount_read(&call->ref));
mutex_lock(&call->user_mutex);
rxrpc_release_call(rxrpc_sk(sock->sk), call);
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 969e532f77a9..571436064cd6 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -15,14 +15,6 @@
#include <keys/rxrpc-type.h>
#include "protocol.h"
-#if 0
-#define CHECK_SLAB_OKAY(X) \
- BUG_ON(atomic_read((X)) >> (sizeof(atomic_t) - 2) == \
- (POISON_FREE << 8 | POISON_FREE))
-#else
-#define CHECK_SLAB_OKAY(X) do {} while (0)
-#endif
-
#define FCRYPT_BSIZE 8
struct rxrpc_crypt {
union {
@@ -68,7 +60,7 @@ struct rxrpc_net {
struct proc_dir_entry *proc_net; /* Subdir in /proc/net */
u32 epoch; /* Local epoch for detecting local-end reset */
struct list_head calls; /* List of calls active in this namespace */
- rwlock_t call_lock; /* Lock for ->calls */
+ spinlock_t call_lock; /* Lock for ->calls */
atomic_t nr_calls; /* Count of allocated calls */
atomic_t nr_conns;
@@ -88,7 +80,7 @@ struct rxrpc_net {
struct work_struct client_conn_reaper;
struct timer_list client_conn_reap_timer;
- struct list_head local_endpoints;
+ struct hlist_head local_endpoints;
struct mutex local_mutex; /* Lock for ->local_endpoints */
DECLARE_HASHTABLE (peer_hash, 10);
@@ -279,9 +271,9 @@ struct rxrpc_security {
struct rxrpc_local {
struct rcu_head rcu;
atomic_t active_users; /* Number of users of the local endpoint */
- atomic_t usage; /* Number of references to the structure */
+ refcount_t ref; /* Number of references to the structure */
struct rxrpc_net *rxnet; /* The network ns in which this resides */
- struct list_head link;
+ struct hlist_node link;
struct socket *socket; /* my UDP socket */
struct work_struct processor;
struct rxrpc_sock __rcu *service; /* Service(s) listening on this endpoint */
@@ -304,7 +296,7 @@ struct rxrpc_local {
*/
struct rxrpc_peer {
struct rcu_head rcu; /* This must be first */
- atomic_t usage;
+ refcount_t ref;
unsigned long hash_key;
struct hlist_node hash_link;
struct rxrpc_local *local;
@@ -406,7 +398,7 @@ enum rxrpc_conn_proto_state {
*/
struct rxrpc_bundle {
struct rxrpc_conn_parameters params;
- atomic_t usage;
+ refcount_t ref;
unsigned int debug_id;
bool try_upgrade; /* True if the bundle is attempting upgrade */
bool alloc_conn; /* True if someone's getting a conn */
@@ -427,7 +419,7 @@ struct rxrpc_connection {
struct rxrpc_conn_proto proto;
struct rxrpc_conn_parameters params;
- atomic_t usage;
+ refcount_t ref;
struct rcu_head rcu;
struct list_head cache_link;
@@ -609,7 +601,7 @@ struct rxrpc_call {
int error; /* Local error incurred */
enum rxrpc_call_state state; /* current state of call */
enum rxrpc_call_completion completion; /* Call completion condition */
- atomic_t usage;
+ refcount_t ref;
u16 service_id; /* service ID */
u8 security_ix; /* Security type */
enum rxrpc_interruptibility interruptibility; /* At what point call may be interrupted */
@@ -676,13 +668,12 @@ struct rxrpc_call {
spinlock_t input_lock; /* Lock for packet input to this call */
- /* receive-phase ACK management */
+ /* Receive-phase ACK management (ACKs we send). */
u8 ackr_reason; /* reason to ACK */
rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */
- rxrpc_serial_t ackr_first_seq; /* first sequence number received */
- rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */
- rxrpc_seq_t ackr_consumed; /* Highest packet shown consumed */
- rxrpc_seq_t ackr_seen; /* Highest packet shown seen */
+ rxrpc_seq_t ackr_highest_seq; /* Higest sequence number received */
+ atomic_t ackr_nr_unacked; /* Number of unacked packets */
+ atomic_t ackr_nr_consumed; /* Number of packets needing hard ACK */
/* RTT management */
rxrpc_serial_t rtt_serial[4]; /* Serial number of DATA or PING sent */
@@ -692,8 +683,10 @@ struct rxrpc_call {
#define RXRPC_CALL_RTT_AVAIL_MASK 0xf
#define RXRPC_CALL_RTT_PEND_SHIFT 8
- /* transmission-phase ACK management */
+ /* Transmission-phase ACK management (ACKs we've received). */
ktime_t acks_latest_ts; /* Timestamp of latest ACK received */
+ rxrpc_seq_t acks_first_seq; /* first sequence number received */
+ rxrpc_seq_t acks_prev_seq; /* Highest previousPacket received */
rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */
rxrpc_seq_t acks_lost_top; /* tx_top at the time lost-ack ping sent */
rxrpc_serial_t acks_lost_ping; /* Serial number of probe ACK */
@@ -1014,6 +1007,7 @@ void rxrpc_put_peer_locked(struct rxrpc_peer *);
extern const struct seq_operations rxrpc_call_seq_ops;
extern const struct seq_operations rxrpc_connection_seq_ops;
extern const struct seq_operations rxrpc_peer_seq_ops;
+extern const struct seq_operations rxrpc_local_seq_ops;
/*
* recvmsg.c
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 1ae90fb97936..99e10eea3732 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -91,7 +91,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
(head + 1) & (size - 1));
trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_service,
- atomic_read(&conn->usage), here);
+ refcount_read(&conn->ref), here);
}
/* Now it gets complicated, because calls get registered with the
@@ -104,7 +104,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
call->state = RXRPC_CALL_SERVER_PREALLOC;
trace_rxrpc_call(call->debug_id, rxrpc_call_new_service,
- atomic_read(&call->usage),
+ refcount_read(&call->ref),
here, (const void *)user_call_ID);
write_lock(&rx->call_lock);
@@ -140,9 +140,9 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
write_unlock(&rx->call_lock);
rxnet = call->rxnet;
- write_lock(&rxnet->call_lock);
- list_add_tail(&call->link, &rxnet->calls);
- write_unlock(&rxnet->call_lock);
+ spin_lock_bh(&rxnet->call_lock);
+ list_add_tail_rcu(&call->link, &rxnet->calls);
+ spin_unlock_bh(&rxnet->call_lock);
b->call_backlog[call_head] = call;
smp_store_release(&b->call_backlog_head, (call_head + 1) & (size - 1));
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 22e05de5d1ca..f8ecad2b730e 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -377,9 +377,9 @@ recheck_state:
if (test_bit(RXRPC_CALL_RX_HEARD, &call->flags) &&
(int)call->conn->hi_serial - (int)call->rx_serial > 0) {
trace_rxrpc_call_reset(call);
- rxrpc_abort_call("EXP", call, 0, RX_USER_ABORT, -ECONNRESET);
+ rxrpc_abort_call("EXP", call, 0, RX_CALL_DEAD, -ECONNRESET);
} else {
- rxrpc_abort_call("EXP", call, 0, RX_USER_ABORT, -ETIME);
+ rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, -ETIME);
}
set_bit(RXRPC_CALL_EV_ABORT, &call->events);
goto recheck_state;
@@ -406,7 +406,8 @@ recheck_state:
goto recheck_state;
}
- if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events)) {
+ if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events) &&
+ call->state != RXRPC_CALL_CLIENT_RECV_REPLY) {
rxrpc_resend(call, now);
goto recheck_state;
}
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 043508fd8d8a..84d0a4109645 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -112,7 +112,7 @@ struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *rx,
found_extant_call:
rxrpc_get_call(call, rxrpc_call_got);
read_unlock(&rx->call_lock);
- _leave(" = %p [%d]", call, atomic_read(&call->usage));
+ _leave(" = %p [%d]", call, refcount_read(&call->ref));
return call;
}
@@ -160,7 +160,7 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp,
spin_lock_init(&call->notify_lock);
spin_lock_init(&call->input_lock);
rwlock_init(&call->state_lock);
- atomic_set(&call->usage, 1);
+ refcount_set(&call->ref, 1);
call->debug_id = debug_id;
call->tx_total_len = -1;
call->next_rx_timo = 20 * HZ;
@@ -299,7 +299,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
call->interruptibility = p->interruptibility;
call->tx_total_len = p->tx_total_len;
trace_rxrpc_call(call->debug_id, rxrpc_call_new_client,
- atomic_read(&call->usage),
+ refcount_read(&call->ref),
here, (const void *)p->user_call_ID);
if (p->kernel)
__set_bit(RXRPC_CALL_KERNEL, &call->flags);
@@ -337,9 +337,9 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
write_unlock(&rx->call_lock);
rxnet = call->rxnet;
- write_lock(&rxnet->call_lock);
- list_add_tail(&call->link, &rxnet->calls);
- write_unlock(&rxnet->call_lock);
+ spin_lock_bh(&rxnet->call_lock);
+ list_add_tail_rcu(&call->link, &rxnet->calls);
+ spin_unlock_bh(&rxnet->call_lock);
/* From this point on, the call is protected by its own lock. */
release_sock(&rx->sk);
@@ -352,7 +352,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
goto error_attached_to_socket;
trace_rxrpc_call(call->debug_id, rxrpc_call_connected,
- atomic_read(&call->usage), here, NULL);
+ refcount_read(&call->ref), here, NULL);
rxrpc_start_call_timer(call);
@@ -372,7 +372,7 @@ error_dup_user_ID:
__rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
RX_CALL_DEAD, -EEXIST);
trace_rxrpc_call(call->debug_id, rxrpc_call_error,
- atomic_read(&call->usage), here, ERR_PTR(-EEXIST));
+ refcount_read(&call->ref), here, ERR_PTR(-EEXIST));
rxrpc_release_call(rx, call);
mutex_unlock(&call->user_mutex);
rxrpc_put_call(call, rxrpc_call_put);
@@ -386,7 +386,7 @@ error_dup_user_ID:
*/
error_attached_to_socket:
trace_rxrpc_call(call->debug_id, rxrpc_call_error,
- atomic_read(&call->usage), here, ERR_PTR(ret));
+ refcount_read(&call->ref), here, ERR_PTR(ret));
set_bit(RXRPC_CALL_DISCONNECTED, &call->flags);
__rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
RX_CALL_DEAD, ret);
@@ -442,8 +442,9 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx,
bool rxrpc_queue_call(struct rxrpc_call *call)
{
const void *here = __builtin_return_address(0);
- int n = atomic_fetch_add_unless(&call->usage, 1, 0);
- if (n == 0)
+ int n;
+
+ if (!__refcount_inc_not_zero(&call->ref, &n))
return false;
if (rxrpc_queue_work(&call->processor))
trace_rxrpc_call(call->debug_id, rxrpc_call_queued, n + 1,
@@ -459,7 +460,7 @@ bool rxrpc_queue_call(struct rxrpc_call *call)
bool __rxrpc_queue_call(struct rxrpc_call *call)
{
const void *here = __builtin_return_address(0);
- int n = atomic_read(&call->usage);
+ int n = refcount_read(&call->ref);
ASSERTCMP(n, >=, 1);
if (rxrpc_queue_work(&call->processor))
trace_rxrpc_call(call->debug_id, rxrpc_call_queued_ref, n,
@@ -476,7 +477,7 @@ void rxrpc_see_call(struct rxrpc_call *call)
{
const void *here = __builtin_return_address(0);
if (call) {
- int n = atomic_read(&call->usage);
+ int n = refcount_read(&call->ref);
trace_rxrpc_call(call->debug_id, rxrpc_call_seen, n,
here, NULL);
@@ -486,11 +487,11 @@ void rxrpc_see_call(struct rxrpc_call *call)
bool rxrpc_try_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op)
{
const void *here = __builtin_return_address(0);
- int n = atomic_fetch_add_unless(&call->usage, 1, 0);
+ int n;
- if (n == 0)
+ if (!__refcount_inc_not_zero(&call->ref, &n))
return false;
- trace_rxrpc_call(call->debug_id, op, n, here, NULL);
+ trace_rxrpc_call(call->debug_id, op, n + 1, here, NULL);
return true;
}
@@ -500,9 +501,10 @@ bool rxrpc_try_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op)
void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op)
{
const void *here = __builtin_return_address(0);
- int n = atomic_inc_return(&call->usage);
+ int n;
- trace_rxrpc_call(call->debug_id, op, n, here, NULL);
+ __refcount_inc(&call->ref, &n);
+ trace_rxrpc_call(call->debug_id, op, n + 1, here, NULL);
}
/*
@@ -527,10 +529,10 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
struct rxrpc_connection *conn = call->conn;
bool put = false;
- _enter("{%d,%d}", call->debug_id, atomic_read(&call->usage));
+ _enter("{%d,%d}", call->debug_id, refcount_read(&call->ref));
trace_rxrpc_call(call->debug_id, rxrpc_call_release,
- atomic_read(&call->usage),
+ refcount_read(&call->ref),
here, (const void *)call->flags);
ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
@@ -619,21 +621,21 @@ void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op)
struct rxrpc_net *rxnet = call->rxnet;
const void *here = __builtin_return_address(0);
unsigned int debug_id = call->debug_id;
+ bool dead;
int n;
ASSERT(call != NULL);
- n = atomic_dec_return(&call->usage);
+ dead = __refcount_dec_and_test(&call->ref, &n);
trace_rxrpc_call(debug_id, op, n, here, NULL);
- ASSERTCMP(n, >=, 0);
- if (n == 0) {
+ if (dead) {
_debug("call %d dead", call->debug_id);
ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
if (!list_empty(&call->link)) {
- write_lock(&rxnet->call_lock);
+ spin_lock_bh(&rxnet->call_lock);
list_del_init(&call->link);
- write_unlock(&rxnet->call_lock);
+ spin_unlock_bh(&rxnet->call_lock);
}
rxrpc_cleanup_call(call);
@@ -705,7 +707,7 @@ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet)
_enter("");
if (!list_empty(&rxnet->calls)) {
- write_lock(&rxnet->call_lock);
+ spin_lock_bh(&rxnet->call_lock);
while (!list_empty(&rxnet->calls)) {
call = list_entry(rxnet->calls.next,
@@ -716,16 +718,16 @@ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet)
list_del_init(&call->link);
pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n",
- call, atomic_read(&call->usage),
+ call, refcount_read(&call->ref),
rxrpc_call_states[call->state],
call->flags, call->events);
- write_unlock(&rxnet->call_lock);
+ spin_unlock_bh(&rxnet->call_lock);
cond_resched();
- write_lock(&rxnet->call_lock);
+ spin_lock_bh(&rxnet->call_lock);
}
- write_unlock(&rxnet->call_lock);
+ spin_unlock_bh(&rxnet->call_lock);
}
atomic_dec(&rxnet->nr_calls);
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 8120138dac01..3c9eeb5b750c 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -102,7 +102,7 @@ void rxrpc_destroy_client_conn_ids(void)
if (!idr_is_empty(&rxrpc_client_conn_ids)) {
idr_for_each_entry(&rxrpc_client_conn_ids, conn, id) {
pr_err("AF_RXRPC: Leaked client conn %p {%d}\n",
- conn, atomic_read(&conn->usage));
+ conn, refcount_read(&conn->ref));
}
BUG();
}
@@ -122,7 +122,7 @@ static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_conn_parameters *cp,
if (bundle) {
bundle->params = *cp;
rxrpc_get_peer(bundle->params.peer);
- atomic_set(&bundle->usage, 1);
+ refcount_set(&bundle->ref, 1);
spin_lock_init(&bundle->channel_lock);
INIT_LIST_HEAD(&bundle->waiting_calls);
}
@@ -131,7 +131,7 @@ static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_conn_parameters *cp,
struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *bundle)
{
- atomic_inc(&bundle->usage);
+ refcount_inc(&bundle->ref);
return bundle;
}
@@ -144,10 +144,13 @@ static void rxrpc_free_bundle(struct rxrpc_bundle *bundle)
void rxrpc_put_bundle(struct rxrpc_bundle *bundle)
{
unsigned int d = bundle->debug_id;
- unsigned int u = atomic_dec_return(&bundle->usage);
+ bool dead;
+ int r;
- _debug("PUT B=%x %u", d, u);
- if (u == 0)
+ dead = __refcount_dec_and_test(&bundle->ref, &r);
+
+ _debug("PUT B=%x %d", d, r);
+ if (dead)
rxrpc_free_bundle(bundle);
}
@@ -169,7 +172,7 @@ rxrpc_alloc_client_connection(struct rxrpc_bundle *bundle, gfp_t gfp)
return ERR_PTR(-ENOMEM);
}
- atomic_set(&conn->usage, 1);
+ refcount_set(&conn->ref, 1);
conn->bundle = bundle;
conn->params = bundle->params;
conn->out_clientflag = RXRPC_CLIENT_INITIATED;
@@ -195,7 +198,7 @@ rxrpc_alloc_client_connection(struct rxrpc_bundle *bundle, gfp_t gfp)
key_get(conn->params.key);
trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_client,
- atomic_read(&conn->usage),
+ refcount_read(&conn->ref),
__builtin_return_address(0));
atomic_inc(&rxnet->nr_client_conns);
@@ -966,14 +969,13 @@ void rxrpc_put_client_conn(struct rxrpc_connection *conn)
{
const void *here = __builtin_return_address(0);
unsigned int debug_id = conn->debug_id;
- int n;
+ bool dead;
+ int r;
- n = atomic_dec_return(&conn->usage);
- trace_rxrpc_conn(debug_id, rxrpc_conn_put_client, n, here);
- if (n <= 0) {
- ASSERTCMP(n, >=, 0);
+ dead = __refcount_dec_and_test(&conn->ref, &r);
+ trace_rxrpc_conn(debug_id, rxrpc_conn_put_client, r - 1, here);
+ if (dead)
rxrpc_kill_client_conn(conn);
- }
}
/*
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index b2159dbf5412..22089e37e97f 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -104,7 +104,7 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local,
goto not_found;
*_peer = peer;
conn = rxrpc_find_service_conn_rcu(peer, skb);
- if (!conn || atomic_read(&conn->usage) == 0)
+ if (!conn || refcount_read(&conn->ref) == 0)
goto not_found;
_leave(" = %p", conn);
return conn;
@@ -114,7 +114,7 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local,
*/
conn = idr_find(&rxrpc_client_conn_ids,
sp->hdr.cid >> RXRPC_CIDSHIFT);
- if (!conn || atomic_read(&conn->usage) == 0) {
+ if (!conn || refcount_read(&conn->ref) == 0) {
_debug("no conn");
goto not_found;
}
@@ -183,7 +183,7 @@ void __rxrpc_disconnect_call(struct rxrpc_connection *conn,
chan->last_type = RXRPC_PACKET_TYPE_ABORT;
break;
default:
- chan->last_abort = RX_USER_ABORT;
+ chan->last_abort = RX_CALL_DEAD;
chan->last_type = RXRPC_PACKET_TYPE_ABORT;
break;
}
@@ -263,11 +263,12 @@ void rxrpc_kill_connection(struct rxrpc_connection *conn)
bool rxrpc_queue_conn(struct rxrpc_connection *conn)
{
const void *here = __builtin_return_address(0);
- int n = atomic_fetch_add_unless(&conn->usage, 1, 0);
- if (n == 0)
+ int r;
+
+ if (!__refcount_inc_not_zero(&conn->ref, &r))
return false;
if (rxrpc_queue_work(&conn->processor))
- trace_rxrpc_conn(conn->debug_id, rxrpc_conn_queued, n + 1, here);
+ trace_rxrpc_conn(conn->debug_id, rxrpc_conn_queued, r + 1, here);
else
rxrpc_put_connection(conn);
return true;
@@ -280,7 +281,7 @@ void rxrpc_see_connection(struct rxrpc_connection *conn)
{
const void *here = __builtin_return_address(0);
if (conn) {
- int n = atomic_read(&conn->usage);
+ int n = refcount_read(&conn->ref);
trace_rxrpc_conn(conn->debug_id, rxrpc_conn_seen, n, here);
}
@@ -292,9 +293,10 @@ void rxrpc_see_connection(struct rxrpc_connection *conn)
struct rxrpc_connection *rxrpc_get_connection(struct rxrpc_connection *conn)
{
const void *here = __builtin_return_address(0);
- int n = atomic_inc_return(&conn->usage);
+ int r;
- trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, n, here);
+ __refcount_inc(&conn->ref, &r);
+ trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, r, here);
return conn;
}
@@ -305,11 +307,11 @@ struct rxrpc_connection *
rxrpc_get_connection_maybe(struct rxrpc_connection *conn)
{
const void *here = __builtin_return_address(0);
+ int r;
if (conn) {
- int n = atomic_fetch_add_unless(&conn->usage, 1, 0);
- if (n > 0)
- trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, n + 1, here);
+ if (__refcount_inc_not_zero(&conn->ref, &r))
+ trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, r + 1, here);
else
conn = NULL;
}
@@ -333,12 +335,11 @@ void rxrpc_put_service_conn(struct rxrpc_connection *conn)
{
const void *here = __builtin_return_address(0);
unsigned int debug_id = conn->debug_id;
- int n;
+ int r;
- n = atomic_dec_return(&conn->usage);
- trace_rxrpc_conn(debug_id, rxrpc_conn_put_service, n, here);
- ASSERTCMP(n, >=, 0);
- if (n == 1)
+ __refcount_dec(&conn->ref, &r);
+ trace_rxrpc_conn(debug_id, rxrpc_conn_put_service, r - 1, here);
+ if (r - 1 == 1)
rxrpc_set_service_reap_timer(conn->params.local->rxnet,
jiffies + rxrpc_connection_expiry);
}
@@ -351,9 +352,9 @@ static void rxrpc_destroy_connection(struct rcu_head *rcu)
struct rxrpc_connection *conn =
container_of(rcu, struct rxrpc_connection, rcu);
- _enter("{%d,u=%d}", conn->debug_id, atomic_read(&conn->usage));
+ _enter("{%d,u=%d}", conn->debug_id, refcount_read(&conn->ref));
- ASSERTCMP(atomic_read(&conn->usage), ==, 0);
+ ASSERTCMP(refcount_read(&conn->ref), ==, 0);
_net("DESTROY CONN %d", conn->debug_id);
@@ -392,8 +393,8 @@ void rxrpc_service_connection_reaper(struct work_struct *work)
write_lock(&rxnet->conn_lock);
list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) {
- ASSERTCMP(atomic_read(&conn->usage), >, 0);
- if (likely(atomic_read(&conn->usage) > 1))
+ ASSERTCMP(refcount_read(&conn->ref), >, 0);
+ if (likely(refcount_read(&conn->ref) > 1))
continue;
if (conn->state == RXRPC_CONN_SERVICE_PREALLOC)
continue;
@@ -405,7 +406,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work)
expire_at = idle_timestamp + rxrpc_closed_conn_expiry * HZ;
_debug("reap CONN %d { u=%d,t=%ld }",
- conn->debug_id, atomic_read(&conn->usage),
+ conn->debug_id, refcount_read(&conn->ref),
(long)expire_at - (long)now);
if (time_before(now, expire_at)) {
@@ -418,7 +419,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work)
/* The usage count sits at 1 whilst the object is unused on the
* list; we reduce that to 0 to make the object unavailable.
*/
- if (atomic_cmpxchg(&conn->usage, 1, 0) != 1)
+ if (!refcount_dec_if_one(&conn->ref))
continue;
trace_rxrpc_conn(conn->debug_id, rxrpc_conn_reap_service, 0, NULL);
@@ -442,7 +443,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work)
link);
list_del_init(&conn->link);
- ASSERTCMP(atomic_read(&conn->usage), ==, 0);
+ ASSERTCMP(refcount_read(&conn->ref), ==, 0);
rxrpc_kill_connection(conn);
}
@@ -470,7 +471,7 @@ void rxrpc_destroy_all_connections(struct rxrpc_net *rxnet)
write_lock(&rxnet->conn_lock);
list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) {
pr_err("AF_RXRPC: Leaked conn %p {%d}\n",
- conn, atomic_read(&conn->usage));
+ conn, refcount_read(&conn->ref));
leak = true;
}
write_unlock(&rxnet->conn_lock);
diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c
index e1966dfc9152..6e6aa02c6f9e 100644
--- a/net/rxrpc/conn_service.c
+++ b/net/rxrpc/conn_service.c
@@ -9,7 +9,7 @@
#include "ar-internal.h"
static struct rxrpc_bundle rxrpc_service_dummy_bundle = {
- .usage = ATOMIC_INIT(1),
+ .ref = REFCOUNT_INIT(1),
.debug_id = UINT_MAX,
.channel_lock = __SPIN_LOCK_UNLOCKED(&rxrpc_service_dummy_bundle.channel_lock),
};
@@ -99,7 +99,7 @@ conn_published:
return;
found_extant_conn:
- if (atomic_read(&cursor->usage) == 0)
+ if (refcount_read(&cursor->ref) == 0)
goto replace_old_connection;
write_sequnlock_bh(&peer->service_conn_lock);
/* We should not be able to get here. rxrpc_incoming_connection() is
@@ -132,7 +132,7 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxn
* the rxrpc_connections list.
*/
conn->state = RXRPC_CONN_SERVICE_PREALLOC;
- atomic_set(&conn->usage, 2);
+ refcount_set(&conn->ref, 2);
conn->bundle = rxrpc_get_bundle(&rxrpc_service_dummy_bundle);
atomic_inc(&rxnet->nr_conns);
@@ -142,7 +142,7 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxn
write_unlock(&rxnet->conn_lock);
trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_service,
- atomic_read(&conn->usage),
+ refcount_read(&conn->ref),
__builtin_return_address(0));
}
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index dc201363f2c4..721d847ba92b 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -412,8 +412,8 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
enum rxrpc_call_state state;
- unsigned int j, nr_subpackets;
- rxrpc_serial_t serial = sp->hdr.serial, ack_serial = 0;
+ unsigned int j, nr_subpackets, nr_unacked = 0;
+ rxrpc_serial_t serial = sp->hdr.serial, ack_serial = serial;
rxrpc_seq_t seq0 = sp->hdr.seq, hard_ack;
bool immediate_ack = false, jumbo_bad = false;
u8 ack = 0;
@@ -453,7 +453,6 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
!rxrpc_receiving_reply(call))
goto unlock;
- call->ackr_prev_seq = seq0;
hard_ack = READ_ONCE(call->rx_hard_ack);
nr_subpackets = sp->nr_subpackets;
@@ -534,6 +533,9 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
ack_serial = serial;
}
+ if (after(seq0, call->ackr_highest_seq))
+ call->ackr_highest_seq = seq0;
+
/* Queue the packet. We use a couple of memory barriers here as need
* to make sure that rx_top is perceived to be set after the buffer
* pointer and that the buffer pointer is set after the annotation and
@@ -567,6 +569,8 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
sp = NULL;
}
+ nr_unacked++;
+
if (last) {
set_bit(RXRPC_CALL_RX_LAST, &call->flags);
if (!ack) {
@@ -586,9 +590,14 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
}
call->rx_expect_next = seq + 1;
}
+ if (!ack)
+ ack_serial = serial;
}
ack:
+ if (atomic_add_return(nr_unacked, &call->ackr_nr_unacked) > 2 && !ack)
+ ack = RXRPC_ACK_IDLE;
+
if (ack)
rxrpc_propose_ACK(call, ack, ack_serial,
immediate_ack, true,
@@ -812,7 +821,7 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks,
static bool rxrpc_is_ack_valid(struct rxrpc_call *call,
rxrpc_seq_t first_pkt, rxrpc_seq_t prev_pkt)
{
- rxrpc_seq_t base = READ_ONCE(call->ackr_first_seq);
+ rxrpc_seq_t base = READ_ONCE(call->acks_first_seq);
if (after(first_pkt, base))
return true; /* The window advanced */
@@ -820,7 +829,7 @@ static bool rxrpc_is_ack_valid(struct rxrpc_call *call,
if (before(first_pkt, base))
return false; /* firstPacket regressed */
- if (after_eq(prev_pkt, call->ackr_prev_seq))
+ if (after_eq(prev_pkt, call->acks_prev_seq))
return true; /* previousPacket hasn't regressed. */
/* Some rx implementations put a serial number in previousPacket. */
@@ -903,11 +912,38 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
rxrpc_propose_ack_respond_to_ack);
}
+ /* If we get an EXCEEDS_WINDOW ACK from the server, it probably
+ * indicates that the client address changed due to NAT. The server
+ * lost the call because it switched to a different peer.
+ */
+ if (unlikely(buf.ack.reason == RXRPC_ACK_EXCEEDS_WINDOW) &&
+ first_soft_ack == 1 &&
+ prev_pkt == 0 &&
+ rxrpc_is_client_call(call)) {
+ rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
+ 0, -ENETRESET);
+ return;
+ }
+
+ /* If we get an OUT_OF_SEQUENCE ACK from the server, that can also
+ * indicate a change of address. However, we can retransmit the call
+ * if we still have it buffered to the beginning.
+ */
+ if (unlikely(buf.ack.reason == RXRPC_ACK_OUT_OF_SEQUENCE) &&
+ first_soft_ack == 1 &&
+ prev_pkt == 0 &&
+ call->tx_hard_ack == 0 &&
+ rxrpc_is_client_call(call)) {
+ rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
+ 0, -ENETRESET);
+ return;
+ }
+
/* Discard any out-of-order or duplicate ACKs (outside lock). */
if (!rxrpc_is_ack_valid(call, first_soft_ack, prev_pkt)) {
trace_rxrpc_rx_discard_ack(call->debug_id, ack_serial,
- first_soft_ack, call->ackr_first_seq,
- prev_pkt, call->ackr_prev_seq);
+ first_soft_ack, call->acks_first_seq,
+ prev_pkt, call->acks_prev_seq);
return;
}
@@ -922,14 +958,14 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
/* Discard any out-of-order or duplicate ACKs (inside lock). */
if (!rxrpc_is_ack_valid(call, first_soft_ack, prev_pkt)) {
trace_rxrpc_rx_discard_ack(call->debug_id, ack_serial,
- first_soft_ack, call->ackr_first_seq,
- prev_pkt, call->ackr_prev_seq);
+ first_soft_ack, call->acks_first_seq,
+ prev_pkt, call->acks_prev_seq);
goto out;
}
call->acks_latest_ts = skb->tstamp;
- call->ackr_first_seq = first_soft_ack;
- call->ackr_prev_seq = prev_pkt;
+ call->acks_first_seq = first_soft_ack;
+ call->acks_prev_seq = prev_pkt;
/* Parse rwind and mtu sizes if provided. */
if (buf.info.rxMTU)
@@ -1154,8 +1190,6 @@ static void rxrpc_post_packet_to_local(struct rxrpc_local *local,
*/
static void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
{
- CHECK_SLAB_OKAY(&local->usage);
-
if (rxrpc_get_local_maybe(local)) {
skb_queue_tail(&local->reject_queue, skb);
rxrpc_queue_local(local);
@@ -1413,7 +1447,7 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb)
}
}
- if (!call || atomic_read(&call->usage) == 0) {
+ if (!call || refcount_read(&call->ref) == 0) {
if (rxrpc_to_client(sp) ||
sp->hdr.type != RXRPC_PACKET_TYPE_DATA)
goto bad_message;
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 6a1611b0e303..96ecb7356c0f 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -79,10 +79,10 @@ static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet,
local = kzalloc(sizeof(struct rxrpc_local), GFP_KERNEL);
if (local) {
- atomic_set(&local->usage, 1);
+ refcount_set(&local->ref, 1);
atomic_set(&local->active_users, 1);
local->rxnet = rxnet;
- INIT_LIST_HEAD(&local->link);
+ INIT_HLIST_NODE(&local->link);
INIT_WORK(&local->processor, rxrpc_local_processor);
init_rwsem(&local->defrag_sem);
skb_queue_head_init(&local->reject_queue);
@@ -180,7 +180,7 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net,
{
struct rxrpc_local *local;
struct rxrpc_net *rxnet = rxrpc_net(net);
- struct list_head *cursor;
+ struct hlist_node *cursor;
const char *age;
long diff;
int ret;
@@ -190,16 +190,12 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net,
mutex_lock(&rxnet->local_mutex);
- for (cursor = rxnet->local_endpoints.next;
- cursor != &rxnet->local_endpoints;
- cursor = cursor->next) {
- local = list_entry(cursor, struct rxrpc_local, link);
+ hlist_for_each(cursor, &rxnet->local_endpoints) {
+ local = hlist_entry(cursor, struct rxrpc_local, link);
diff = rxrpc_local_cmp_key(local, srx);
- if (diff < 0)
+ if (diff != 0)
continue;
- if (diff > 0)
- break;
/* Services aren't allowed to share transport sockets, so
* reject that here. It is possible that the object is dying -
@@ -211,9 +207,10 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net,
goto addr_in_use;
}
- /* Found a match. We replace a dying object. Attempting to
- * bind the transport socket may still fail if we're attempting
- * to use a local address that the dying object is still using.
+ /* Found a match. We want to replace a dying object.
+ * Attempting to bind the transport socket may still fail if
+ * we're attempting to use a local address that the dying
+ * object is still using.
*/
if (!rxrpc_use_local(local))
break;
@@ -230,10 +227,12 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net,
if (ret < 0)
goto sock_error;
- if (cursor != &rxnet->local_endpoints)
- list_replace_init(cursor, &local->link);
- else
- list_add_tail(&local->link, cursor);
+ if (cursor) {
+ hlist_replace_rcu(cursor, &local->link);
+ cursor->pprev = NULL;
+ } else {
+ hlist_add_head_rcu(&local->link, &rxnet->local_endpoints);
+ }
age = "new";
found:
@@ -266,10 +265,10 @@ addr_in_use:
struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *local)
{
const void *here = __builtin_return_address(0);
- int n;
+ int r;
- n = atomic_inc_return(&local->usage);
- trace_rxrpc_local(local->debug_id, rxrpc_local_got, n, here);
+ __refcount_inc(&local->ref, &r);
+ trace_rxrpc_local(local->debug_id, rxrpc_local_got, r + 1, here);
return local;
}
@@ -279,12 +278,12 @@ struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *local)
struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *local)
{
const void *here = __builtin_return_address(0);
+ int r;
if (local) {
- int n = atomic_fetch_add_unless(&local->usage, 1, 0);
- if (n > 0)
+ if (__refcount_inc_not_zero(&local->ref, &r))
trace_rxrpc_local(local->debug_id, rxrpc_local_got,
- n + 1, here);
+ r + 1, here);
else
local = NULL;
}
@@ -298,10 +297,10 @@ void rxrpc_queue_local(struct rxrpc_local *local)
{
const void *here = __builtin_return_address(0);
unsigned int debug_id = local->debug_id;
- int n = atomic_read(&local->usage);
+ int r = refcount_read(&local->ref);
if (rxrpc_queue_work(&local->processor))
- trace_rxrpc_local(debug_id, rxrpc_local_queued, n, here);
+ trace_rxrpc_local(debug_id, rxrpc_local_queued, r + 1, here);
else
rxrpc_put_local(local);
}
@@ -313,15 +312,16 @@ void rxrpc_put_local(struct rxrpc_local *local)
{
const void *here = __builtin_return_address(0);
unsigned int debug_id;
- int n;
+ bool dead;
+ int r;
if (local) {
debug_id = local->debug_id;
- n = atomic_dec_return(&local->usage);
- trace_rxrpc_local(debug_id, rxrpc_local_put, n, here);
+ dead = __refcount_dec_and_test(&local->ref, &r);
+ trace_rxrpc_local(debug_id, rxrpc_local_put, r, here);
- if (n == 0)
+ if (dead)
call_rcu(&local->rcu, rxrpc_local_rcu);
}
}
@@ -374,7 +374,7 @@ static void rxrpc_local_destroyer(struct rxrpc_local *local)
local->dead = true;
mutex_lock(&rxnet->local_mutex);
- list_del_init(&local->link);
+ hlist_del_init_rcu(&local->link);
mutex_unlock(&rxnet->local_mutex);
rxrpc_clean_up_local_conns(local);
@@ -406,7 +406,7 @@ static void rxrpc_local_processor(struct work_struct *work)
bool again;
trace_rxrpc_local(local->debug_id, rxrpc_local_processing,
- atomic_read(&local->usage), NULL);
+ refcount_read(&local->ref), NULL);
do {
again = false;
@@ -458,11 +458,11 @@ void rxrpc_destroy_all_locals(struct rxrpc_net *rxnet)
flush_workqueue(rxrpc_workqueue);
- if (!list_empty(&rxnet->local_endpoints)) {
+ if (!hlist_empty(&rxnet->local_endpoints)) {
mutex_lock(&rxnet->local_mutex);
- list_for_each_entry(local, &rxnet->local_endpoints, link) {
+ hlist_for_each_entry(local, &rxnet->local_endpoints, link) {
pr_err("AF_RXRPC: Leaked local %p {%d}\n",
- local, atomic_read(&local->usage));
+ local, refcount_read(&local->ref));
}
mutex_unlock(&rxnet->local_mutex);
BUG();
diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c
index cc7e30733feb..bb4c25d6df64 100644
--- a/net/rxrpc/net_ns.c
+++ b/net/rxrpc/net_ns.c
@@ -50,7 +50,7 @@ static __net_init int rxrpc_init_net(struct net *net)
rxnet->epoch |= RXRPC_RANDOM_EPOCH;
INIT_LIST_HEAD(&rxnet->calls);
- rwlock_init(&rxnet->call_lock);
+ spin_lock_init(&rxnet->call_lock);
atomic_set(&rxnet->nr_calls, 1);
atomic_set(&rxnet->nr_conns, 1);
@@ -72,7 +72,7 @@ static __net_init int rxrpc_init_net(struct net *net)
timer_setup(&rxnet->client_conn_reap_timer,
rxrpc_client_conn_reap_timeout, 0);
- INIT_LIST_HEAD(&rxnet->local_endpoints);
+ INIT_HLIST_HEAD(&rxnet->local_endpoints);
mutex_init(&rxnet->local_mutex);
hash_init(rxnet->peer_hash);
@@ -98,6 +98,9 @@ static __net_init int rxrpc_init_net(struct net *net)
proc_create_net("peers", 0444, rxnet->proc_net,
&rxrpc_peer_seq_ops,
sizeof(struct seq_net_private));
+ proc_create_net("locals", 0444, rxnet->proc_net,
+ &rxrpc_local_seq_ops,
+ sizeof(struct seq_net_private));
return 0;
err_proc:
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index a45c83f22236..9683617db704 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -74,11 +74,18 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn,
u8 reason)
{
rxrpc_serial_t serial;
+ unsigned int tmp;
rxrpc_seq_t hard_ack, top, seq;
int ix;
u32 mtu, jmax;
u8 *ackp = pkt->acks;
+ tmp = atomic_xchg(&call->ackr_nr_unacked, 0);
+ tmp |= atomic_xchg(&call->ackr_nr_consumed, 0);
+ if (!tmp && (reason == RXRPC_ACK_DELAY ||
+ reason == RXRPC_ACK_IDLE))
+ return 0;
+
/* Barrier against rxrpc_input_data(). */
serial = call->ackr_serial;
hard_ack = READ_ONCE(call->rx_hard_ack);
@@ -89,7 +96,7 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn,
pkt->ack.bufferSpace = htons(8);
pkt->ack.maxSkew = htons(0);
pkt->ack.firstPacket = htonl(hard_ack + 1);
- pkt->ack.previousPacket = htonl(call->ackr_prev_seq);
+ pkt->ack.previousPacket = htonl(call->ackr_highest_seq);
pkt->ack.serial = htonl(serial);
pkt->ack.reason = reason;
pkt->ack.nAcks = top - hard_ack;
@@ -223,6 +230,10 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
n = rxrpc_fill_out_ack(conn, call, pkt, &hard_ack, &top, reason);
spin_unlock_bh(&call->lock);
+ if (n == 0) {
+ kfree(pkt);
+ return 0;
+ }
iov[0].iov_base = pkt;
iov[0].iov_len = sizeof(pkt->whdr) + sizeof(pkt->ack) + n;
@@ -259,13 +270,6 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
ntohl(pkt->ack.serial),
false, true,
rxrpc_propose_ack_retry_tx);
- } else {
- spin_lock_bh(&call->lock);
- if (after(hard_ack, call->ackr_consumed))
- call->ackr_consumed = hard_ack;
- if (after(top, call->ackr_seen))
- call->ackr_seen = top;
- spin_unlock_bh(&call->lock);
}
rxrpc_set_keepalive(call);
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 0298fe2ad6d3..26d2ae9baaf2 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -121,7 +121,7 @@ static struct rxrpc_peer *__rxrpc_lookup_peer_rcu(
hash_for_each_possible_rcu(rxnet->peer_hash, peer, hash_link, hash_key) {
if (rxrpc_peer_cmp_key(peer, local, srx, hash_key) == 0 &&
- atomic_read(&peer->usage) > 0)
+ refcount_read(&peer->ref) > 0)
return peer;
}
@@ -140,7 +140,7 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *local,
peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key);
if (peer) {
_net("PEER %d {%pISp}", peer->debug_id, &peer->srx.transport);
- _leave(" = %p {u=%d}", peer, atomic_read(&peer->usage));
+ _leave(" = %p {u=%d}", peer, refcount_read(&peer->ref));
}
return peer;
}
@@ -216,7 +216,7 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp)
peer = kzalloc(sizeof(struct rxrpc_peer), gfp);
if (peer) {
- atomic_set(&peer->usage, 1);
+ refcount_set(&peer->ref, 1);
peer->local = rxrpc_get_local(local);
INIT_HLIST_HEAD(&peer->error_targets);
peer->service_conns = RB_ROOT;
@@ -378,7 +378,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx,
_net("PEER %d {%pISp}", peer->debug_id, &peer->srx.transport);
- _leave(" = %p {u=%d}", peer, atomic_read(&peer->usage));
+ _leave(" = %p {u=%d}", peer, refcount_read(&peer->ref));
return peer;
}
@@ -388,10 +388,10 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx,
struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *peer)
{
const void *here = __builtin_return_address(0);
- int n;
+ int r;
- n = atomic_inc_return(&peer->usage);
- trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, n, here);
+ __refcount_inc(&peer->ref, &r);
+ trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, r + 1, here);
return peer;
}
@@ -401,11 +401,11 @@ struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *peer)
struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *peer)
{
const void *here = __builtin_return_address(0);
+ int r;
if (peer) {
- int n = atomic_fetch_add_unless(&peer->usage, 1, 0);
- if (n > 0)
- trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, n + 1, here);
+ if (__refcount_inc_not_zero(&peer->ref, &r))
+ trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, r + 1, here);
else
peer = NULL;
}
@@ -436,13 +436,14 @@ void rxrpc_put_peer(struct rxrpc_peer *peer)
{
const void *here = __builtin_return_address(0);
unsigned int debug_id;
- int n;
+ bool dead;
+ int r;
if (peer) {
debug_id = peer->debug_id;
- n = atomic_dec_return(&peer->usage);
- trace_rxrpc_peer(debug_id, rxrpc_peer_put, n, here);
- if (n == 0)
+ dead = __refcount_dec_and_test(&peer->ref, &r);
+ trace_rxrpc_peer(debug_id, rxrpc_peer_put, r - 1, here);
+ if (dead)
__rxrpc_put_peer(peer);
}
}
@@ -455,11 +456,12 @@ void rxrpc_put_peer_locked(struct rxrpc_peer *peer)
{
const void *here = __builtin_return_address(0);
unsigned int debug_id = peer->debug_id;
- int n;
+ bool dead;
+ int r;
- n = atomic_dec_return(&peer->usage);
- trace_rxrpc_peer(debug_id, rxrpc_peer_put, n, here);
- if (n == 0) {
+ dead = __refcount_dec_and_test(&peer->ref, &r);
+ trace_rxrpc_peer(debug_id, rxrpc_peer_put, r - 1, here);
+ if (dead) {
hash_del_rcu(&peer->hash_link);
list_del_init(&peer->keepalive_link);
rxrpc_free_peer(peer);
@@ -481,7 +483,7 @@ void rxrpc_destroy_all_peers(struct rxrpc_net *rxnet)
hlist_for_each_entry(peer, &rxnet->peer_hash[i], hash_link) {
pr_err("Leaked peer %u {%u} %pISp\n",
peer->debug_id,
- atomic_read(&peer->usage),
+ refcount_read(&peer->ref),
&peer->srx.transport);
}
}
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index e2f990754f88..245418943e01 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -26,29 +26,23 @@ static const char *const rxrpc_conn_states[RXRPC_CONN__NR_STATES] = {
*/
static void *rxrpc_call_seq_start(struct seq_file *seq, loff_t *_pos)
__acquires(rcu)
- __acquires(rxnet->call_lock)
{
struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
rcu_read_lock();
- read_lock(&rxnet->call_lock);
- return seq_list_start_head(&rxnet->calls, *_pos);
+ return seq_list_start_head_rcu(&rxnet->calls, *_pos);
}
static void *rxrpc_call_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
- return seq_list_next(v, &rxnet->calls, pos);
+ return seq_list_next_rcu(v, &rxnet->calls, pos);
}
static void rxrpc_call_seq_stop(struct seq_file *seq, void *v)
- __releases(rxnet->call_lock)
__releases(rcu)
{
- struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
-
- read_unlock(&rxnet->call_lock);
rcu_read_unlock();
}
@@ -107,7 +101,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
call->cid,
call->call_id,
rxrpc_is_service_call(call) ? "Svc" : "Clt",
- atomic_read(&call->usage),
+ refcount_read(&call->ref),
rxrpc_call_states[call->state],
call->abort_code,
call->debug_id,
@@ -189,7 +183,7 @@ print:
conn->service_id,
conn->proto.cid,
rxrpc_conn_is_service(conn) ? "Svc" : "Clt",
- atomic_read(&conn->usage),
+ refcount_read(&conn->ref),
rxrpc_conn_states[conn->state],
key_serial(conn->params.key),
atomic_read(&conn->serial),
@@ -239,7 +233,7 @@ static int rxrpc_peer_seq_show(struct seq_file *seq, void *v)
" %3u %5u %6llus %8u %8u\n",
lbuff,
rbuff,
- atomic_read(&peer->usage),
+ refcount_read(&peer->ref),
peer->cong_cwnd,
peer->mtu,
now - peer->last_tx_at,
@@ -334,3 +328,72 @@ const struct seq_operations rxrpc_peer_seq_ops = {
.stop = rxrpc_peer_seq_stop,
.show = rxrpc_peer_seq_show,
};
+
+/*
+ * Generate a list of extant virtual local endpoints in /proc/net/rxrpc/locals
+ */
+static int rxrpc_local_seq_show(struct seq_file *seq, void *v)
+{
+ struct rxrpc_local *local;
+ char lbuff[50];
+
+ if (v == SEQ_START_TOKEN) {
+ seq_puts(seq,
+ "Proto Local "
+ " Use Act\n");
+ return 0;
+ }
+
+ local = hlist_entry(v, struct rxrpc_local, link);
+
+ sprintf(lbuff, "%pISpc", &local->srx.transport);
+
+ seq_printf(seq,
+ "UDP %-47.47s %3u %3u\n",
+ lbuff,
+ refcount_read(&local->ref),
+ atomic_read(&local->active_users));
+
+ return 0;
+}
+
+static void *rxrpc_local_seq_start(struct seq_file *seq, loff_t *_pos)
+ __acquires(rcu)
+{
+ struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+ unsigned int n;
+
+ rcu_read_lock();
+
+ if (*_pos >= UINT_MAX)
+ return NULL;
+
+ n = *_pos;
+ if (n == 0)
+ return SEQ_START_TOKEN;
+
+ return seq_hlist_start_rcu(&rxnet->local_endpoints, n - 1);
+}
+
+static void *rxrpc_local_seq_next(struct seq_file *seq, void *v, loff_t *_pos)
+{
+ struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+
+ if (*_pos >= UINT_MAX)
+ return NULL;
+
+ return seq_hlist_next_rcu(v, &rxnet->local_endpoints, _pos);
+}
+
+static void rxrpc_local_seq_stop(struct seq_file *seq, void *v)
+ __releases(rcu)
+{
+ rcu_read_unlock();
+}
+
+const struct seq_operations rxrpc_local_seq_ops = {
+ .start = rxrpc_local_seq_start,
+ .next = rxrpc_local_seq_next,
+ .stop = rxrpc_local_seq_stop,
+ .show = rxrpc_local_seq_show,
+};
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index eca6dda26c77..250f23bc1c07 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -260,11 +260,9 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call)
rxrpc_end_rx_phase(call, serial);
} else {
/* Check to see if there's an ACK that needs sending. */
- if (after_eq(hard_ack, call->ackr_consumed + 2) ||
- after_eq(top, call->ackr_seen + 2) ||
- (hard_ack == top && after(hard_ack, call->ackr_consumed)))
- rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, serial,
- true, true,
+ if (atomic_inc_return(&call->ackr_nr_consumed) > 2)
+ rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, serial,
+ true, false,
rxrpc_propose_ack_rotate_rx);
if (call->ackr_reason && call->ackr_reason != RXRPC_ACK_DELAY)
rxrpc_send_ack_packet(call, false, NULL);
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index af8ad6c30b9f..1d38e279e2ef 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -444,6 +444,12 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
success:
ret = copied;
+ if (READ_ONCE(call->state) == RXRPC_CALL_COMPLETE) {
+ read_lock_bh(&call->state_lock);
+ if (call->error < 0)
+ ret = call->error;
+ read_unlock_bh(&call->state_lock);
+ }
out:
call->tx_pending = skb;
_leave(" = %d", ret);
diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c
index 0348d2bf6f7d..580a5acffee7 100644
--- a/net/rxrpc/skbuff.c
+++ b/net/rxrpc/skbuff.c
@@ -71,7 +71,6 @@ void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
const void *here = __builtin_return_address(0);
if (skb) {
int n;
- CHECK_SLAB_OKAY(&skb->users);
n = atomic_dec_return(select_skb_count(skb));
trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n,
rxrpc_skb(skb)->rx_flags, here);
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
index 540351d6a5f4..555e0910786b 100644
--- a/net/rxrpc/sysctl.c
+++ b/net/rxrpc/sysctl.c
@@ -12,7 +12,7 @@
static struct ctl_table_header *rxrpc_sysctl_reg_table;
static const unsigned int four = 4;
-static const unsigned int thirtytwo = 32;
+static const unsigned int max_backlog = RXRPC_BACKLOG_MAX - 1;
static const unsigned int n_65535 = 65535;
static const unsigned int n_max_acks = RXRPC_RXTX_BUFF_SIZE - 1;
static const unsigned long one_jiffy = 1;
@@ -89,7 +89,7 @@ static struct ctl_table rxrpc_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = (void *)&four,
- .extra2 = (void *)&thirtytwo,
+ .extra2 = (void *)&max_backlog,
},
{
.procname = "rx_window_size",
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 4f51094da9da..da9733da9868 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -195,7 +195,7 @@ static int offload_action_init(struct flow_offload_action *fl_action,
if (act->ops->offload_act_setup) {
spin_lock_bh(&act->tcfa_lock);
err = act->ops->offload_act_setup(act, fl_action, NULL,
- false);
+ false, extack);
spin_unlock_bh(&act->tcfa_lock);
return err;
}
@@ -271,7 +271,7 @@ static int tcf_action_offload_add_ex(struct tc_action *action,
if (err)
goto fl_err;
- err = tc_setup_action(&fl_action->action, actions);
+ err = tc_setup_action(&fl_action->action, actions, extack);
if (err) {
NL_SET_ERR_MSG_MOD(extack,
"Failed to setup tc actions for offload");
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index e0f515b774ca..22847ee009ef 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -696,7 +696,8 @@ static size_t tcf_csum_get_fill_size(const struct tc_action *act)
}
static int tcf_csum_offload_act_setup(struct tc_action *act, void *entry_data,
- u32 *index_inc, bool bind)
+ u32 *index_inc, bool bind,
+ struct netlink_ext_ack *extack)
{
if (bind) {
struct flow_action_entry *entry = entry_data;
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index b1f502fce595..8af9d6e5ba61 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -1584,7 +1584,8 @@ static void tcf_stats_update(struct tc_action *a, u64 bytes, u64 packets,
}
static int tcf_ct_offload_act_setup(struct tc_action *act, void *entry_data,
- u32 *index_inc, bool bind)
+ u32 *index_inc, bool bind,
+ struct netlink_ext_ack *extack)
{
if (bind) {
struct flow_action_entry *entry = entry_data;
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index bde6a6c01e64..ac29d1065232 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -253,7 +253,8 @@ static size_t tcf_gact_get_fill_size(const struct tc_action *act)
}
static int tcf_gact_offload_act_setup(struct tc_action *act, void *entry_data,
- u32 *index_inc, bool bind)
+ u32 *index_inc, bool bind,
+ struct netlink_ext_ack *extack)
{
if (bind) {
struct flow_action_entry *entry = entry_data;
@@ -267,7 +268,17 @@ static int tcf_gact_offload_act_setup(struct tc_action *act, void *entry_data,
} else if (is_tcf_gact_goto_chain(act)) {
entry->id = FLOW_ACTION_GOTO;
entry->chain_index = tcf_gact_goto_chain_index(act);
+ } else if (is_tcf_gact_continue(act)) {
+ NL_SET_ERR_MSG_MOD(extack, "Offload of \"continue\" action is not supported");
+ return -EOPNOTSUPP;
+ } else if (is_tcf_gact_reclassify(act)) {
+ NL_SET_ERR_MSG_MOD(extack, "Offload of \"reclassify\" action is not supported");
+ return -EOPNOTSUPP;
+ } else if (is_tcf_gact_pipe(act)) {
+ NL_SET_ERR_MSG_MOD(extack, "Offload of \"pipe\" action is not supported");
+ return -EOPNOTSUPP;
} else {
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported generic action offload");
return -EOPNOTSUPP;
}
*index_inc = 1;
diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c
index d56e73843a4b..fd5155274733 100644
--- a/net/sched/act_gate.c
+++ b/net/sched/act_gate.c
@@ -619,7 +619,8 @@ static int tcf_gate_get_entries(struct flow_action_entry *entry,
}
static int tcf_gate_offload_act_setup(struct tc_action *act, void *entry_data,
- u32 *index_inc, bool bind)
+ u32 *index_inc, bool bind,
+ struct netlink_ext_ack *extack)
{
int err;
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 39acd1d18609..ebb92fb072ab 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -460,7 +460,8 @@ static void tcf_offload_mirred_get_dev(struct flow_action_entry *entry,
}
static int tcf_mirred_offload_act_setup(struct tc_action *act, void *entry_data,
- u32 *index_inc, bool bind)
+ u32 *index_inc, bool bind,
+ struct netlink_ext_ack *extack)
{
if (bind) {
struct flow_action_entry *entry = entry_data;
@@ -478,6 +479,7 @@ static int tcf_mirred_offload_act_setup(struct tc_action *act, void *entry_data,
entry->id = FLOW_ACTION_MIRRED_INGRESS;
tcf_offload_mirred_get_dev(entry, act);
} else {
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported mirred offload");
return -EOPNOTSUPP;
}
*index_inc = 1;
diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c
index b9ff3459fdab..adabeccb63e1 100644
--- a/net/sched/act_mpls.c
+++ b/net/sched/act_mpls.c
@@ -385,7 +385,8 @@ static int tcf_mpls_search(struct net *net, struct tc_action **a, u32 index)
}
static int tcf_mpls_offload_act_setup(struct tc_action *act, void *entry_data,
- u32 *index_inc, bool bind)
+ u32 *index_inc, bool bind,
+ struct netlink_ext_ack *extack)
{
if (bind) {
struct flow_action_entry *entry = entry_data;
@@ -410,7 +411,14 @@ static int tcf_mpls_offload_act_setup(struct tc_action *act, void *entry_data,
entry->mpls_mangle.bos = tcf_mpls_bos(act);
entry->mpls_mangle.ttl = tcf_mpls_ttl(act);
break;
+ case TCA_MPLS_ACT_DEC_TTL:
+ NL_SET_ERR_MSG_MOD(extack, "Offload not supported when \"dec_ttl\" option is used");
+ return -EOPNOTSUPP;
+ case TCA_MPLS_ACT_MAC_PUSH:
+ NL_SET_ERR_MSG_MOD(extack, "Offload not supported when \"mac_push\" option is used");
+ return -EOPNOTSUPP;
default:
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported MPLS mode offload");
return -EOPNOTSUPP;
}
*index_inc = 1;
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 211c757bfc3c..823ee643371c 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -510,7 +510,8 @@ static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index)
}
static int tcf_pedit_offload_act_setup(struct tc_action *act, void *entry_data,
- u32 *index_inc, bool bind)
+ u32 *index_inc, bool bind,
+ struct netlink_ext_ack *extack)
{
if (bind) {
struct flow_action_entry *entry = entry_data;
@@ -525,6 +526,7 @@ static int tcf_pedit_offload_act_setup(struct tc_action *act, void *entry_data,
entry->id = FLOW_ACTION_ADD;
break;
default:
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported pedit command offload");
return -EOPNOTSUPP;
}
entry->mangle.htype = tcf_pedit_htype(act, k);
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index f4d917705263..79c8901f66ab 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -419,7 +419,8 @@ static int tcf_police_search(struct net *net, struct tc_action **a, u32 index)
return tcf_idr_search(tn, a, index);
}
-static int tcf_police_act_to_flow_act(int tc_act, u32 *extval)
+static int tcf_police_act_to_flow_act(int tc_act, u32 *extval,
+ struct netlink_ext_ack *extack)
{
int act_id = -EOPNOTSUPP;
@@ -430,19 +431,28 @@ static int tcf_police_act_to_flow_act(int tc_act, u32 *extval)
act_id = FLOW_ACTION_DROP;
else if (tc_act == TC_ACT_PIPE)
act_id = FLOW_ACTION_PIPE;
+ else if (tc_act == TC_ACT_RECLASSIFY)
+ NL_SET_ERR_MSG_MOD(extack, "Offload not supported when conform/exceed action is \"reclassify\"");
+ else
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported conform/exceed action offload");
} else if (TC_ACT_EXT_CMP(tc_act, TC_ACT_GOTO_CHAIN)) {
act_id = FLOW_ACTION_GOTO;
*extval = tc_act & TC_ACT_EXT_VAL_MASK;
} else if (TC_ACT_EXT_CMP(tc_act, TC_ACT_JUMP)) {
act_id = FLOW_ACTION_JUMP;
*extval = tc_act & TC_ACT_EXT_VAL_MASK;
+ } else if (tc_act == TC_ACT_UNSPEC) {
+ NL_SET_ERR_MSG_MOD(extack, "Offload not supported when conform/exceed action is \"continue\"");
+ } else {
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported conform/exceed action offload");
}
return act_id;
}
static int tcf_police_offload_act_setup(struct tc_action *act, void *entry_data,
- u32 *index_inc, bool bind)
+ u32 *index_inc, bool bind,
+ struct netlink_ext_ack *extack)
{
if (bind) {
struct flow_action_entry *entry = entry_data;
@@ -466,14 +476,16 @@ static int tcf_police_offload_act_setup(struct tc_action *act, void *entry_data,
entry->police.mtu = tcf_police_tcfp_mtu(act);
act_id = tcf_police_act_to_flow_act(police->tcf_action,
- &entry->police.exceed.extval);
+ &entry->police.exceed.extval,
+ extack);
if (act_id < 0)
return act_id;
entry->police.exceed.act_id = act_id;
act_id = tcf_police_act_to_flow_act(p->tcfp_result,
- &entry->police.notexceed.extval);
+ &entry->police.notexceed.extval,
+ extack);
if (act_id < 0)
return act_id;
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 9a22cdda6bbd..2f7f5e44d28c 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -291,7 +291,8 @@ static void tcf_offload_sample_get_group(struct flow_action_entry *entry,
}
static int tcf_sample_offload_act_setup(struct tc_action *act, void *entry_data,
- u32 *index_inc, bool bind)
+ u32 *index_inc, bool bind,
+ struct netlink_ext_ack *extack)
{
if (bind) {
struct flow_action_entry *entry = entry_data;
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index ceba11b198bb..e3bd11dfe1ca 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -23,6 +23,20 @@
static unsigned int skbedit_net_id;
static struct tc_action_ops act_skbedit_ops;
+static u16 tcf_skbedit_hash(struct tcf_skbedit_params *params,
+ struct sk_buff *skb)
+{
+ u16 queue_mapping = params->queue_mapping;
+
+ if (params->flags & SKBEDIT_F_TXQ_SKBHASH) {
+ u32 hash = skb_get_hash(skb);
+
+ queue_mapping += hash % params->mapping_mod;
+ }
+
+ return netdev_cap_txqueue(skb->dev, queue_mapping);
+}
+
static int tcf_skbedit_act(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
@@ -58,8 +72,12 @@ static int tcf_skbedit_act(struct sk_buff *skb, const struct tc_action *a,
}
}
if (params->flags & SKBEDIT_F_QUEUE_MAPPING &&
- skb->dev->real_num_tx_queues > params->queue_mapping)
- skb_set_queue_mapping(skb, params->queue_mapping);
+ skb->dev->real_num_tx_queues > params->queue_mapping) {
+#ifdef CONFIG_NET_EGRESS
+ netdev_xmit_skip_txqueue(true);
+#endif
+ skb_set_queue_mapping(skb, tcf_skbedit_hash(params, skb));
+ }
if (params->flags & SKBEDIT_F_MARK) {
skb->mark &= ~params->mask;
skb->mark |= params->mark & params->mask;
@@ -92,6 +110,7 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
[TCA_SKBEDIT_PTYPE] = { .len = sizeof(u16) },
[TCA_SKBEDIT_MASK] = { .len = sizeof(u32) },
[TCA_SKBEDIT_FLAGS] = { .len = sizeof(u64) },
+ [TCA_SKBEDIT_QUEUE_MAPPING_MAX] = { .len = sizeof(u16) },
};
static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
@@ -108,6 +127,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
struct tcf_skbedit *d;
u32 flags = 0, *priority = NULL, *mark = NULL, *mask = NULL;
u16 *queue_mapping = NULL, *ptype = NULL;
+ u16 mapping_mod = 1;
bool exists = false;
int ret = 0, err;
u32 index;
@@ -153,6 +173,25 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
if (tb[TCA_SKBEDIT_FLAGS] != NULL) {
u64 *pure_flags = nla_data(tb[TCA_SKBEDIT_FLAGS]);
+ if (*pure_flags & SKBEDIT_F_TXQ_SKBHASH) {
+ u16 *queue_mapping_max;
+
+ if (!tb[TCA_SKBEDIT_QUEUE_MAPPING] ||
+ !tb[TCA_SKBEDIT_QUEUE_MAPPING_MAX]) {
+ NL_SET_ERR_MSG_MOD(extack, "Missing required range of queue_mapping.");
+ return -EINVAL;
+ }
+
+ queue_mapping_max =
+ nla_data(tb[TCA_SKBEDIT_QUEUE_MAPPING_MAX]);
+ if (*queue_mapping_max < *queue_mapping) {
+ NL_SET_ERR_MSG_MOD(extack, "The range of queue_mapping is invalid, max < min.");
+ return -EINVAL;
+ }
+
+ mapping_mod = *queue_mapping_max - *queue_mapping + 1;
+ flags |= SKBEDIT_F_TXQ_SKBHASH;
+ }
if (*pure_flags & SKBEDIT_F_INHERITDSFIELD)
flags |= SKBEDIT_F_INHERITDSFIELD;
}
@@ -204,8 +243,10 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
params_new->flags = flags;
if (flags & SKBEDIT_F_PRIORITY)
params_new->priority = *priority;
- if (flags & SKBEDIT_F_QUEUE_MAPPING)
+ if (flags & SKBEDIT_F_QUEUE_MAPPING) {
params_new->queue_mapping = *queue_mapping;
+ params_new->mapping_mod = mapping_mod;
+ }
if (flags & SKBEDIT_F_MARK)
params_new->mark = *mark;
if (flags & SKBEDIT_F_PTYPE)
@@ -272,6 +313,13 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
goto nla_put_failure;
if (params->flags & SKBEDIT_F_INHERITDSFIELD)
pure_flags |= SKBEDIT_F_INHERITDSFIELD;
+ if (params->flags & SKBEDIT_F_TXQ_SKBHASH) {
+ if (nla_put_u16(skb, TCA_SKBEDIT_QUEUE_MAPPING_MAX,
+ params->queue_mapping + params->mapping_mod - 1))
+ goto nla_put_failure;
+
+ pure_flags |= SKBEDIT_F_TXQ_SKBHASH;
+ }
if (pure_flags != 0 &&
nla_put(skb, TCA_SKBEDIT_FLAGS, sizeof(pure_flags), &pure_flags))
goto nla_put_failure;
@@ -321,6 +369,7 @@ static size_t tcf_skbedit_get_fill_size(const struct tc_action *act)
return nla_total_size(sizeof(struct tc_skbedit))
+ nla_total_size(sizeof(u32)) /* TCA_SKBEDIT_PRIORITY */
+ nla_total_size(sizeof(u16)) /* TCA_SKBEDIT_QUEUE_MAPPING */
+ + nla_total_size(sizeof(u16)) /* TCA_SKBEDIT_QUEUE_MAPPING_MAX */
+ nla_total_size(sizeof(u32)) /* TCA_SKBEDIT_MARK */
+ nla_total_size(sizeof(u16)) /* TCA_SKBEDIT_PTYPE */
+ nla_total_size(sizeof(u32)) /* TCA_SKBEDIT_MASK */
@@ -328,7 +377,8 @@ static size_t tcf_skbedit_get_fill_size(const struct tc_action *act)
}
static int tcf_skbedit_offload_act_setup(struct tc_action *act, void *entry_data,
- u32 *index_inc, bool bind)
+ u32 *index_inc, bool bind,
+ struct netlink_ext_ack *extack)
{
if (bind) {
struct flow_action_entry *entry = entry_data;
@@ -342,7 +392,14 @@ static int tcf_skbedit_offload_act_setup(struct tc_action *act, void *entry_data
} else if (is_tcf_skbedit_priority(act)) {
entry->id = FLOW_ACTION_PRIORITY;
entry->priority = tcf_skbedit_priority(act);
+ } else if (is_tcf_skbedit_queue_mapping(act)) {
+ NL_SET_ERR_MSG_MOD(extack, "Offload not supported when \"queue_mapping\" option is used");
+ return -EOPNOTSUPP;
+ } else if (is_tcf_skbedit_inheritdsfield(act)) {
+ NL_SET_ERR_MSG_MOD(extack, "Offload not supported when \"inheritdsfield\" option is used");
+ return -EOPNOTSUPP;
} else {
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported skbedit option offload");
return -EOPNOTSUPP;
}
*index_inc = 1;
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 23aba03d26a8..856dc23cef8c 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -808,7 +808,8 @@ static int tcf_tunnel_encap_get_tunnel(struct flow_action_entry *entry,
static int tcf_tunnel_key_offload_act_setup(struct tc_action *act,
void *entry_data,
u32 *index_inc,
- bool bind)
+ bool bind,
+ struct netlink_ext_ack *extack)
{
int err;
@@ -823,6 +824,7 @@ static int tcf_tunnel_key_offload_act_setup(struct tc_action *act,
} else if (is_tcf_tunnel_release(act)) {
entry->id = FLOW_ACTION_TUNNEL_DECAP;
} else {
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel key mode offload");
return -EOPNOTSUPP;
}
*index_inc = 1;
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 883454c4f921..68b5e772386a 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -369,7 +369,8 @@ static size_t tcf_vlan_get_fill_size(const struct tc_action *act)
}
static int tcf_vlan_offload_act_setup(struct tc_action *act, void *entry_data,
- u32 *index_inc, bool bind)
+ u32 *index_inc, bool bind,
+ struct netlink_ext_ack *extack)
{
if (bind) {
struct flow_action_entry *entry = entry_data;
@@ -398,6 +399,7 @@ static int tcf_vlan_offload_act_setup(struct tc_action *act, void *entry_data,
tcf_vlan_push_eth(entry->vlan_push_eth.src, entry->vlan_push_eth.dst, act);
break;
default:
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported vlan action mode offload");
return -EOPNOTSUPP;
}
*index_inc = 1;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index f0699f39afdb..9bb4d3dcc994 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -3513,20 +3513,25 @@ EXPORT_SYMBOL(tc_cleanup_offload_action);
static int tc_setup_offload_act(struct tc_action *act,
struct flow_action_entry *entry,
- u32 *index_inc)
+ u32 *index_inc,
+ struct netlink_ext_ack *extack)
{
#ifdef CONFIG_NET_CLS_ACT
- if (act->ops->offload_act_setup)
- return act->ops->offload_act_setup(act, entry, index_inc, true);
- else
+ if (act->ops->offload_act_setup) {
+ return act->ops->offload_act_setup(act, entry, index_inc, true,
+ extack);
+ } else {
+ NL_SET_ERR_MSG(extack, "Action does not support offload");
return -EOPNOTSUPP;
+ }
#else
return 0;
#endif
}
int tc_setup_action(struct flow_action *flow_action,
- struct tc_action *actions[])
+ struct tc_action *actions[],
+ struct netlink_ext_ack *extack)
{
int i, j, index, err = 0;
struct tc_action *act;
@@ -3551,7 +3556,7 @@ int tc_setup_action(struct flow_action *flow_action,
entry->hw_stats = tc_act_hw_stats(act->hw_stats);
entry->hw_index = act->tcfa_index;
index = 0;
- err = tc_setup_offload_act(act, entry, &index);
+ err = tc_setup_offload_act(act, entry, &index, extack);
if (!err)
j += index;
else
@@ -3570,13 +3575,14 @@ err_out_locked:
}
int tc_setup_offload_action(struct flow_action *flow_action,
- const struct tcf_exts *exts)
+ const struct tcf_exts *exts,
+ struct netlink_ext_ack *extack)
{
#ifdef CONFIG_NET_CLS_ACT
if (!exts)
return 0;
- return tc_setup_action(flow_action, exts->actions);
+ return tc_setup_action(flow_action, exts->actions, extack);
#else
return 0;
#endif
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index ed5e6f08e74a..dcca70144dff 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -72,6 +72,7 @@ struct fl_flow_key {
} tp_range;
struct flow_dissector_key_ct ct;
struct flow_dissector_key_hash hash;
+ struct flow_dissector_key_num_of_vlans num_of_vlans;
} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
struct fl_flow_mask_range {
@@ -464,14 +465,12 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
cls_flower.rule->match.key = &f->mkey;
cls_flower.classid = f->res.classid;
- err = tc_setup_offload_action(&cls_flower.rule->action, &f->exts);
+ err = tc_setup_offload_action(&cls_flower.rule->action, &f->exts,
+ cls_flower.common.extack);
if (err) {
kfree(cls_flower.rule);
- if (skip_sw) {
- NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action");
- return err;
- }
- return 0;
+
+ return skip_sw ? err : 0;
}
err = tc_setup_cb_add(block, tp, TC_SETUP_CLSFLOWER, &cls_flower,
@@ -714,6 +713,7 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
[TCA_FLOWER_FLAGS] = { .type = NLA_U32 },
[TCA_FLOWER_KEY_HASH] = { .type = NLA_U32 },
[TCA_FLOWER_KEY_HASH_MASK] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_NUM_OF_VLANS] = { .type = NLA_U8 },
};
@@ -1030,8 +1030,10 @@ static void fl_set_key_vlan(struct nlattr **tb,
VLAN_PRIORITY_MASK;
key_mask->vlan_priority = VLAN_PRIORITY_MASK;
}
- key_val->vlan_tpid = ethertype;
- key_mask->vlan_tpid = cpu_to_be16(~0);
+ if (ethertype) {
+ key_val->vlan_tpid = ethertype;
+ key_mask->vlan_tpid = cpu_to_be16(~0);
+ }
if (tb[vlan_next_eth_type_key]) {
key_val->vlan_eth_type =
nla_get_be16(tb[vlan_next_eth_type_key]);
@@ -1581,6 +1583,26 @@ static int fl_set_key_ct(struct nlattr **tb,
return 0;
}
+static bool is_vlan_key(struct nlattr *tb, __be16 *ethertype,
+ struct fl_flow_key *key, struct fl_flow_key *mask,
+ int vthresh)
+{
+ const bool good_num_of_vlans = key->num_of_vlans.num_of_vlans > vthresh;
+
+ if (!tb) {
+ *ethertype = 0;
+ return good_num_of_vlans;
+ }
+
+ *ethertype = nla_get_be16(tb);
+ if (good_num_of_vlans || eth_type_vlan(*ethertype))
+ return true;
+
+ key->basic.n_proto = *ethertype;
+ mask->basic.n_proto = cpu_to_be16(~0);
+ return false;
+}
+
static int fl_set_key(struct net *net, struct nlattr **tb,
struct fl_flow_key *key, struct fl_flow_key *mask,
struct netlink_ext_ack *extack)
@@ -1602,37 +1624,30 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
fl_set_key_val(tb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC,
mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK,
sizeof(key->eth.src));
-
- if (tb[TCA_FLOWER_KEY_ETH_TYPE]) {
- ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_ETH_TYPE]);
-
- if (eth_type_vlan(ethertype)) {
- fl_set_key_vlan(tb, ethertype, TCA_FLOWER_KEY_VLAN_ID,
- TCA_FLOWER_KEY_VLAN_PRIO,
- TCA_FLOWER_KEY_VLAN_ETH_TYPE,
- &key->vlan, &mask->vlan);
-
- if (tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]) {
- ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]);
- if (eth_type_vlan(ethertype)) {
- fl_set_key_vlan(tb, ethertype,
- TCA_FLOWER_KEY_CVLAN_ID,
- TCA_FLOWER_KEY_CVLAN_PRIO,
- TCA_FLOWER_KEY_CVLAN_ETH_TYPE,
- &key->cvlan, &mask->cvlan);
- fl_set_key_val(tb, &key->basic.n_proto,
- TCA_FLOWER_KEY_CVLAN_ETH_TYPE,
- &mask->basic.n_proto,
- TCA_FLOWER_UNSPEC,
- sizeof(key->basic.n_proto));
- } else {
- key->basic.n_proto = ethertype;
- mask->basic.n_proto = cpu_to_be16(~0);
- }
- }
- } else {
- key->basic.n_proto = ethertype;
- mask->basic.n_proto = cpu_to_be16(~0);
+ fl_set_key_val(tb, &key->num_of_vlans,
+ TCA_FLOWER_KEY_NUM_OF_VLANS,
+ &mask->num_of_vlans,
+ TCA_FLOWER_UNSPEC,
+ sizeof(key->num_of_vlans));
+
+ if (is_vlan_key(tb[TCA_FLOWER_KEY_ETH_TYPE], &ethertype, key, mask, 0)) {
+ fl_set_key_vlan(tb, ethertype, TCA_FLOWER_KEY_VLAN_ID,
+ TCA_FLOWER_KEY_VLAN_PRIO,
+ TCA_FLOWER_KEY_VLAN_ETH_TYPE,
+ &key->vlan, &mask->vlan);
+
+ if (is_vlan_key(tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE],
+ &ethertype, key, mask, 1)) {
+ fl_set_key_vlan(tb, ethertype,
+ TCA_FLOWER_KEY_CVLAN_ID,
+ TCA_FLOWER_KEY_CVLAN_PRIO,
+ TCA_FLOWER_KEY_CVLAN_ETH_TYPE,
+ &key->cvlan, &mask->cvlan);
+ fl_set_key_val(tb, &key->basic.n_proto,
+ TCA_FLOWER_KEY_CVLAN_ETH_TYPE,
+ &mask->basic.n_proto,
+ TCA_FLOWER_UNSPEC,
+ sizeof(key->basic.n_proto));
}
}
@@ -1906,6 +1921,8 @@ static void fl_init_dissector(struct flow_dissector *dissector,
FLOW_DISSECTOR_KEY_CT, ct);
FL_KEY_SET_IF_MASKED(mask, keys, cnt,
FLOW_DISSECTOR_KEY_HASH, hash);
+ FL_KEY_SET_IF_MASKED(mask, keys, cnt,
+ FLOW_DISSECTOR_KEY_NUM_OF_VLANS, num_of_vlans);
skb_flow_dissector_init(dissector, keys, cnt);
}
@@ -2362,11 +2379,11 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
cls_flower.rule->match.mask = &f->mask->key;
cls_flower.rule->match.key = &f->mkey;
- err = tc_setup_offload_action(&cls_flower.rule->action, &f->exts);
+ err = tc_setup_offload_action(&cls_flower.rule->action, &f->exts,
+ cls_flower.common.extack);
if (err) {
kfree(cls_flower.rule);
if (tc_skip_sw(f->flags)) {
- NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action");
__fl_put(f);
return err;
}
@@ -2994,6 +3011,11 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net,
sizeof(key->basic.n_proto)))
goto nla_put_failure;
+ if (mask->num_of_vlans.num_of_vlans) {
+ if (nla_put_u8(skb, TCA_FLOWER_KEY_NUM_OF_VLANS, key->num_of_vlans.num_of_vlans))
+ goto nla_put_failure;
+ }
+
if (fl_dump_key_mpls(skb, &key->mpls, &mask->mpls))
goto nla_put_failure;
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index ca5670fd5228..06cf22adbab7 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -97,16 +97,13 @@ static int mall_replace_hw_filter(struct tcf_proto *tp,
cls_mall.command = TC_CLSMATCHALL_REPLACE;
cls_mall.cookie = cookie;
- err = tc_setup_offload_action(&cls_mall.rule->action, &head->exts);
+ err = tc_setup_offload_action(&cls_mall.rule->action, &head->exts,
+ cls_mall.common.extack);
if (err) {
kfree(cls_mall.rule);
mall_destroy_hw_filter(tp, head, cookie, NULL);
- if (skip_sw)
- NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action");
- else
- err = 0;
- return err;
+ return skip_sw ? err : 0;
}
err = tc_setup_cb_add(block, tp, TC_SETUP_CLSMATCHALL, &cls_mall,
@@ -302,14 +299,12 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
TC_CLSMATCHALL_REPLACE : TC_CLSMATCHALL_DESTROY;
cls_mall.cookie = (unsigned long)head;
- err = tc_setup_offload_action(&cls_mall.rule->action, &head->exts);
+ err = tc_setup_offload_action(&cls_mall.rule->action, &head->exts,
+ cls_mall.common.extack);
if (err) {
kfree(cls_mall.rule);
- if (add && tc_skip_sw(head->flags)) {
- NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action");
- return err;
- }
- return 0;
+
+ return add && tc_skip_sw(head->flags) ? err : 0;
}
err = tc_setup_cb_reoffload(block, tp, add, cb, TC_SETUP_CLSMATCHALL,
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 0a04468b7314..49bae3d5006b 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -311,12 +311,15 @@ META_COLLECTOR(int_sk_bound_if)
META_COLLECTOR(var_sk_bound_if)
{
+ int bound_dev_if;
+
if (skip_nonlocal(skb)) {
*err = -1;
return;
}
- if (skb->sk->sk_bound_dev_if == 0) {
+ bound_dev_if = READ_ONCE(skb->sk->sk_bound_dev_if);
+ if (bound_dev_if == 0) {
dst->value = (unsigned long) "any";
dst->len = 3;
} else {
@@ -324,7 +327,7 @@ META_COLLECTOR(var_sk_bound_if)
rcu_read_lock();
dev = dev_get_by_index_rcu(sock_net(skb->sk),
- skb->sk->sk_bound_dev_if);
+ bound_dev_if);
*err = var_dev(dev, dst);
rcu_read_unlock();
}
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 5bab9f8b8f45..dba0b3e24af5 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -1019,22 +1019,14 @@ EXPORT_SYMBOL(qdisc_create_dflt);
void qdisc_reset(struct Qdisc *qdisc)
{
const struct Qdisc_ops *ops = qdisc->ops;
- struct sk_buff *skb, *tmp;
trace_qdisc_reset(qdisc);
if (ops->reset)
ops->reset(qdisc);
- skb_queue_walk_safe(&qdisc->gso_skb, skb, tmp) {
- __skb_unlink(skb, &qdisc->gso_skb);
- kfree_skb_list(skb);
- }
-
- skb_queue_walk_safe(&qdisc->skb_bad_txq, skb, tmp) {
- __skb_unlink(skb, &qdisc->skb_bad_txq);
- kfree_skb_list(skb);
- }
+ __skb_queue_purge(&qdisc->gso_skb);
+ __skb_queue_purge(&qdisc->skb_bad_txq);
qdisc->q.qlen = 0;
qdisc->qstats.backlog = 0;
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 90e12bafdd48..4f43afa8678f 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -92,6 +92,7 @@ int sctp_rcv(struct sk_buff *skb)
struct sctp_chunk *chunk;
union sctp_addr src;
union sctp_addr dest;
+ int bound_dev_if;
int family;
struct sctp_af *af;
struct net *net = dev_net(skb->dev);
@@ -169,7 +170,8 @@ int sctp_rcv(struct sk_buff *skb)
* If a frame arrives on an interface and the receiving socket is
* bound to another interface, via SO_BINDTODEVICE, treat it as OOTB
*/
- if (sk->sk_bound_dev_if && (sk->sk_bound_dev_if != af->skb_iif(skb))) {
+ bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
+ if (bound_dev_if && (bound_dev_if != af->skb_iif(skb))) {
if (transport) {
sctp_transport_put(transport);
asoc = NULL;
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 470dbdc27d58..d081858c2d07 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -926,7 +926,7 @@ static int sctp_inet6_af_supported(sa_family_t family, struct sctp_sock *sp)
return 1;
/* v4-mapped-v6 addresses */
case AF_INET:
- if (!__ipv6_only_sock(sctp_opt2sk(sp)))
+ if (!ipv6_only_sock(sctp_opt2sk(sp)))
return 1;
fallthrough;
default:
@@ -952,7 +952,7 @@ static int sctp_inet6_cmp_addr(const union sctp_addr *addr1,
return 0;
/* If the socket is IPv6 only, v4 addrs will not match */
- if (__ipv6_only_sock(sk) && af1 != af2)
+ if (ipv6_only_sock(sk) && af1 != af2)
return 0;
/* Today, wildcard AF_INET/AF_INET6. */
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 72fe6669c50d..a63df055ac57 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -134,7 +134,8 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag,
dst_hold(tp->dst);
sk_setup_caps(sk, tp->dst);
}
- packet->max_size = sk_can_gso(sk) ? READ_ONCE(tp->dst->dev->gso_max_size)
+ packet->max_size = sk_can_gso(sk) ? min(READ_ONCE(tp->dst->dev->gso_max_size),
+ GSO_LEGACY_MAX_SIZE)
: asoc->pathmtu;
rcu_read_unlock();
}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 7b0427658056..6d37d2dfb3da 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2084,7 +2084,7 @@ static int sctp_skb_pull(struct sk_buff *skb, int len)
* 5 for complete description of the flags.
*/
static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int noblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
struct sctp_ulpevent *event = NULL;
struct sctp_sock *sp = sctp_sk(sk);
@@ -2093,9 +2093,8 @@ static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int err = 0;
int skb_len;
- pr_debug("%s: sk:%p, msghdr:%p, len:%zd, noblock:%d, flags:0x%x, "
- "addr_len:%p)\n", __func__, sk, msg, len, noblock, flags,
- addr_len);
+ pr_debug("%s: sk:%p, msghdr:%p, len:%zd, flags:0x%x, addr_len:%p)\n",
+ __func__, sk, msg, len, flags, addr_len);
lock_sock(sk);
@@ -2105,7 +2104,7 @@ static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
goto out;
}
- skb = sctp_skb_recv_datagram(sk, flags, noblock, &err);
+ skb = sctp_skb_recv_datagram(sk, flags, &err);
if (!skb)
goto out;
@@ -2129,7 +2128,7 @@ static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
head_skb = event->chunk->head_skb;
else
head_skb = skb;
- sock_recv_ts_and_drops(msg, sk, head_skb);
+ sock_recv_cmsgs(msg, sk, head_skb);
if (sctp_ulpevent_is_notification(event)) {
msg->msg_flags |= MSG_NOTIFICATION;
sp->pf->event_msgname(event, msg->msg_name, addr_len);
@@ -8978,14 +8977,13 @@ out:
* Note: This is pretty much the same routine as in core/datagram.c
* with a few changes to make lksctp work.
*/
-struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags,
- int noblock, int *err)
+struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags, int *err)
{
int error;
struct sk_buff *skb;
long timeo;
- timeo = sock_rcvtimeo(sk, noblock);
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
pr_debug("%s: timeo:%ld, max:%ld\n", __func__, timeo,
MAX_SCHEDULE_TIMEOUT);
@@ -9018,7 +9016,7 @@ struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags,
break;
if (sk_can_busy_loop(sk)) {
- sk_busy_loop(sk, noblock);
+ sk_busy_loop(sk, flags & MSG_DONTWAIT);
if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
continue;
diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c
index 99e5f69fbb74..518b1b9bf89d 100644
--- a/net/sctp/stream_sched.c
+++ b/net/sctp/stream_sched.c
@@ -146,14 +146,11 @@ int sctp_sched_set_sched(struct sctp_association *asoc,
/* Give the next scheduler a clean slate. */
for (i = 0; i < asoc->stream.outcnt; i++) {
- void *p = SCTP_SO(&asoc->stream, i)->ext;
+ struct sctp_stream_out_ext *ext = SCTP_SO(&asoc->stream, i)->ext;
- if (!p)
+ if (!ext)
continue;
-
- p += offsetofend(struct sctp_stream_out_ext, outq);
- memset(p, 0, sizeof(struct sctp_stream_out_ext) -
- offsetofend(struct sctp_stream_out_ext, outq));
+ memset_after(ext, 0, outq);
}
}
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 0c3d2b4d7321..8920ca92a011 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -1063,7 +1063,7 @@ void sctp_ulpevent_read_nxtinfo(const struct sctp_ulpevent *event,
struct sk_buff *skb;
int err;
- skb = sctp_skb_recv_datagram(sk, MSG_PEEK, 1, &err);
+ skb = sctp_skb_recv_datagram(sk, MSG_PEEK | MSG_DONTWAIT, &err);
if (skb != NULL) {
__sctp_ulpevent_read_nxtinfo(sctp_skb2event(skb),
msghdr, skb);
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index fce16b9d6e1a..a201bf29af98 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -1544,9 +1544,29 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
goto out_err;
lock_sock(sk);
+ switch (sock->state) {
+ default:
+ rc = -EINVAL;
+ goto out;
+ case SS_CONNECTED:
+ rc = sk->sk_state == SMC_ACTIVE ? -EISCONN : -EINVAL;
+ goto out;
+ case SS_CONNECTING:
+ if (sk->sk_state == SMC_ACTIVE)
+ goto connected;
+ break;
+ case SS_UNCONNECTED:
+ sock->state = SS_CONNECTING;
+ break;
+ }
+
switch (sk->sk_state) {
default:
goto out;
+ case SMC_CLOSED:
+ rc = sock_error(sk) ? : -ECONNABORTED;
+ sock->state = SS_UNCONNECTED;
+ goto out;
case SMC_ACTIVE:
rc = -EISCONN;
goto out;
@@ -1564,21 +1584,25 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
if (rc && rc != -EINPROGRESS)
goto out;
- sock_hold(&smc->sk); /* sock put in passive closing */
- if (smc->use_fallback)
+ if (smc->use_fallback) {
+ sock->state = rc ? SS_CONNECTING : SS_CONNECTED;
goto out;
+ }
+ sock_hold(&smc->sk); /* sock put in passive closing */
if (flags & O_NONBLOCK) {
if (queue_work(smc_hs_wq, &smc->connect_work))
smc->connect_nonblock = 1;
rc = -EINPROGRESS;
+ goto out;
} else {
rc = __smc_connect(smc);
if (rc < 0)
goto out;
- else
- rc = 0; /* success cases including fallback */
}
+connected:
+ rc = 0;
+ sock->state = SS_CONNECTED;
out:
release_sock(sk);
out_err:
@@ -1693,6 +1717,7 @@ struct sock *smc_accept_dequeue(struct sock *parent,
}
if (new_sock) {
sock_graft(new_sk, new_sock);
+ new_sock->state = SS_CONNECTED;
if (isk->use_fallback) {
smc_sk(new_sk)->clcsock->file = new_sock->file;
isk->clcsock->file->private_data = isk->clcsock;
@@ -2424,7 +2449,7 @@ static int smc_listen(struct socket *sock, int backlog)
rc = -EINVAL;
if ((sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) ||
- smc->connect_nonblock)
+ smc->connect_nonblock || sock->state != SS_UNCONNECTED)
goto out;
rc = 0;
@@ -2716,6 +2741,17 @@ static int smc_shutdown(struct socket *sock, int how)
lock_sock(sk);
+ if (sock->state == SS_CONNECTING) {
+ if (sk->sk_state == SMC_ACTIVE)
+ sock->state = SS_CONNECTED;
+ else if (sk->sk_state == SMC_PEERCLOSEWAIT1 ||
+ sk->sk_state == SMC_PEERCLOSEWAIT2 ||
+ sk->sk_state == SMC_APPCLOSEWAIT1 ||
+ sk->sk_state == SMC_APPCLOSEWAIT2 ||
+ sk->sk_state == SMC_APPFINCLOSEWAIT)
+ sock->state = SS_DISCONNECTING;
+ }
+
rc = -ENOTCONN;
if ((sk->sk_state != SMC_ACTIVE) &&
(sk->sk_state != SMC_PEERCLOSEWAIT1) &&
@@ -2729,6 +2765,7 @@ static int smc_shutdown(struct socket *sock, int how)
sk->sk_shutdown = smc->clcsock->sk->sk_shutdown;
if (sk->sk_shutdown == SHUTDOWN_MASK) {
sk->sk_state = SMC_CLOSED;
+ sk->sk_socket->state = SS_UNCONNECTED;
sock_put(sk);
}
goto out;
@@ -2754,6 +2791,10 @@ static int smc_shutdown(struct socket *sock, int how)
/* map sock_shutdown_cmd constants to sk_shutdown value range */
sk->sk_shutdown |= how + 1;
+ if (sk->sk_state == SMC_CLOSED)
+ sock->state = SS_UNCONNECTED;
+ else
+ sock->state = SS_DISCONNECTING;
out:
release_sock(sk);
return rc ? rc : rc1;
@@ -3139,6 +3180,7 @@ static int __smc_create(struct net *net, struct socket *sock, int protocol,
rc = -ENOBUFS;
sock->ops = &smc_sock_ops;
+ sock->state = SS_UNCONNECTED;
sk = smc_sock_alloc(net, sock, protocol);
if (!sk)
goto out;
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index a3e2d3b89568..dcda4165d107 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -671,6 +671,7 @@ int smc_ib_create_queue_pair(struct smc_link *lnk)
.max_recv_wr = SMC_WR_BUF_CNT * 3,
.max_send_sge = SMC_IB_MAX_SEND_SGE,
.max_recv_sge = sges_per_buf,
+ .max_inline_data = 0,
},
.sq_sig_type = IB_SIGNAL_REQ_WR,
.qp_type = IB_QPT_RC,
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 98ca9229fe87..805a546e8c04 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -391,12 +391,20 @@ static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len,
int rc;
for (dstchunk = 0; dstchunk < 2; dstchunk++) {
- struct ib_sge *sge =
- wr_rdma_buf->wr_tx_rdma[dstchunk].wr.sg_list;
+ struct ib_rdma_wr *wr = &wr_rdma_buf->wr_tx_rdma[dstchunk];
+ struct ib_sge *sge = wr->wr.sg_list;
+ u64 base_addr = dma_addr;
+
+ if (dst_len < link->qp_attr.cap.max_inline_data) {
+ base_addr = (uintptr_t)conn->sndbuf_desc->cpu_addr;
+ wr->wr.send_flags |= IB_SEND_INLINE;
+ } else {
+ wr->wr.send_flags &= ~IB_SEND_INLINE;
+ }
num_sges = 0;
for (srcchunk = 0; srcchunk < 2; srcchunk++) {
- sge[srcchunk].addr = dma_addr + src_off;
+ sge[srcchunk].addr = base_addr + src_off;
sge[srcchunk].length = src_len;
num_sges++;
@@ -410,8 +418,7 @@ static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len,
src_len = dst_len - src_len; /* remainder */
src_len_sum += src_len;
}
- rc = smc_tx_rdma_write(conn, dst_off, num_sges,
- &wr_rdma_buf->wr_tx_rdma[dstchunk]);
+ rc = smc_tx_rdma_write(conn, dst_off, num_sges, wr);
if (rc)
return rc;
if (dst_len_sum == len)
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index 24be1d03fef9..26f8f240d9e8 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -554,10 +554,11 @@ void smc_wr_remember_qp_attr(struct smc_link *lnk)
static void smc_wr_init_sge(struct smc_link *lnk)
{
int sges_per_buf = (lnk->lgr->smc_version == SMC_V2) ? 2 : 1;
+ bool send_inline = (lnk->qp_attr.cap.max_inline_data > SMC_WR_TX_SIZE);
u32 i;
for (i = 0; i < lnk->wr_tx_cnt; i++) {
- lnk->wr_tx_sges[i].addr =
+ lnk->wr_tx_sges[i].addr = send_inline ? (uintptr_t)(&lnk->wr_tx_bufs[i]) :
lnk->wr_tx_dma_addr + i * SMC_WR_BUF_SIZE;
lnk->wr_tx_sges[i].length = SMC_WR_TX_SIZE;
lnk->wr_tx_sges[i].lkey = lnk->roce_pd->local_dma_lkey;
@@ -575,6 +576,8 @@ static void smc_wr_init_sge(struct smc_link *lnk)
lnk->wr_tx_ibs[i].opcode = IB_WR_SEND;
lnk->wr_tx_ibs[i].send_flags =
IB_SEND_SIGNALED | IB_SEND_SOLICITED;
+ if (send_inline)
+ lnk->wr_tx_ibs[i].send_flags |= IB_SEND_INLINE;
lnk->wr_tx_rdmas[i].wr_tx_rdma[0].wr.opcode = IB_WR_RDMA_WRITE;
lnk->wr_tx_rdmas[i].wr_tx_rdma[1].wr.opcode = IB_WR_RDMA_WRITE;
lnk->wr_tx_rdmas[i].wr_tx_rdma[0].wr.sg_list =
diff --git a/net/socket.c b/net/socket.c
index bb6a1a12fbde..2bc8773d9dc5 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -683,9 +683,18 @@ void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
{
u8 flags = *tx_flags;
- if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
+ if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE) {
flags |= SKBTX_HW_TSTAMP;
+ /* PTP hardware clocks can provide a free running cycle counter
+ * as a time base for virtual clocks. Tell driver to use the
+ * free running cycle counter for timestamp if socket is bound
+ * to virtual clock.
+ */
+ if (tsflags & SOF_TIMESTAMPING_BIND_PHC)
+ flags |= SKBTX_HW_TSTAMP_USE_CYCLES;
+ }
+
if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
flags |= SKBTX_SW_TSTAMP;
@@ -796,7 +805,28 @@ static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
}
-static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
+static ktime_t get_timestamp(struct sock *sk, struct sk_buff *skb, int *if_index)
+{
+ bool cycles = sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC;
+ struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
+ struct net_device *orig_dev;
+ ktime_t hwtstamp;
+
+ rcu_read_lock();
+ orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
+ if (orig_dev) {
+ *if_index = orig_dev->ifindex;
+ hwtstamp = netdev_get_tstamp(orig_dev, shhwtstamps, cycles);
+ } else {
+ hwtstamp = shhwtstamps->hwtstamp;
+ }
+ rcu_read_unlock();
+
+ return hwtstamp;
+}
+
+static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb,
+ int if_index)
{
struct scm_ts_pktinfo ts_pktinfo;
struct net_device *orig_dev;
@@ -806,11 +836,14 @@ static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
- rcu_read_lock();
- orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
- if (orig_dev)
- ts_pktinfo.if_index = orig_dev->ifindex;
- rcu_read_unlock();
+ if (!if_index) {
+ rcu_read_lock();
+ orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
+ if (orig_dev)
+ if_index = orig_dev->ifindex;
+ rcu_read_unlock();
+ }
+ ts_pktinfo.if_index = if_index;
ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
@@ -830,6 +863,7 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
int empty = 1, false_tstamp = 0;
struct skb_shared_hwtstamps *shhwtstamps =
skb_hwtstamps(skb);
+ int if_index;
ktime_t hwtstamp;
/* Race occurred between timestamp enabling and packet
@@ -878,18 +912,22 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
if (shhwtstamps &&
(sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
!skb_is_swtx_tstamp(skb, false_tstamp)) {
- if (sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC)
- hwtstamp = ptp_convert_timestamp(shhwtstamps,
- sk->sk_bind_phc);
+ if_index = 0;
+ if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP_NETDEV)
+ hwtstamp = get_timestamp(sk, skb, &if_index);
else
hwtstamp = shhwtstamps->hwtstamp;
+ if (sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC)
+ hwtstamp = ptp_convert_timestamp(&hwtstamp,
+ sk->sk_bind_phc);
+
if (ktime_to_timespec64_cond(hwtstamp, tss.ts + 2)) {
empty = 0;
if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
!skb_is_err_queue(skb))
- put_ts_pktinfo(msg, skb);
+ put_ts_pktinfo(msg, skb, if_index);
}
}
if (!empty) {
@@ -930,13 +968,22 @@ static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
}
-void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
- struct sk_buff *skb)
+static void sock_recv_mark(struct msghdr *msg, struct sock *sk,
+ struct sk_buff *skb)
+{
+ if (sock_flag(sk, SOCK_RCVMARK) && skb)
+ put_cmsg(msg, SOL_SOCKET, SO_MARK, sizeof(__u32),
+ &skb->mark);
+}
+
+void __sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
+ struct sk_buff *skb)
{
sock_recv_timestamp(msg, sk, skb);
sock_recv_drops(msg, sk, skb);
+ sock_recv_mark(msg, sk, skb);
}
-EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
+EXPORT_SYMBOL_GPL(__sock_recv_cmsgs);
INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *,
size_t, int));
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index cc35ec433400..45336e68bf79 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -464,7 +464,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
0, 0, MSG_PEEK | MSG_DONTWAIT);
if (err < 0)
goto out_recv_err;
- skb = skb_recv_udp(svsk->sk_sk, 0, 1, &err);
+ skb = skb_recv_udp(svsk->sk_sk, MSG_DONTWAIT, &err);
if (!skb)
goto out_recv_err;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 650102a9c86a..fcdd0fca408e 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1355,7 +1355,7 @@ static void xs_udp_data_receive(struct sock_xprt *transport)
if (sk == NULL)
goto out;
for (;;) {
- skb = skb_recv_udp(sk, 0, 1, &err);
+ skb = skb_recv_udp(sk, MSG_DONTWAIT, &err);
if (skb == NULL)
break;
xs_udp_data_read_skb(&transport->xprt, sk, skb);
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 3919fe2c58c5..ec6f4b699a2b 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -411,10 +411,16 @@ static int tls_device_copy_data(void *addr, size_t bytes, struct iov_iter *i)
return 0;
}
+union tls_iter_offset {
+ struct iov_iter *msg_iter;
+ int offset;
+};
+
static int tls_push_data(struct sock *sk,
- struct iov_iter *msg_iter,
+ union tls_iter_offset iter_offset,
size_t size, int flags,
- unsigned char record_type)
+ unsigned char record_type,
+ struct page *zc_page)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_prot_info *prot = &tls_ctx->prot_info;
@@ -480,12 +486,21 @@ handle_error:
}
record = ctx->open_record;
- copy = min_t(size_t, size, (pfrag->size - pfrag->offset));
- copy = min_t(size_t, copy, (max_open_record_len - record->len));
- if (copy) {
+ copy = min_t(size_t, size, max_open_record_len - record->len);
+ if (copy && zc_page) {
+ struct page_frag zc_pfrag;
+
+ zc_pfrag.page = zc_page;
+ zc_pfrag.offset = iter_offset.offset;
+ zc_pfrag.size = copy;
+ tls_append_frag(record, &zc_pfrag, copy);
+ } else if (copy) {
+ copy = min_t(size_t, copy, pfrag->size - pfrag->offset);
+
rc = tls_device_copy_data(page_address(pfrag->page) +
- pfrag->offset, copy, msg_iter);
+ pfrag->offset, copy,
+ iter_offset.msg_iter);
if (rc)
goto handle_error;
tls_append_frag(record, pfrag, copy);
@@ -540,6 +555,7 @@ int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
{
unsigned char record_type = TLS_RECORD_TYPE_DATA;
struct tls_context *tls_ctx = tls_get_ctx(sk);
+ union tls_iter_offset iter;
int rc;
mutex_lock(&tls_ctx->tx_lock);
@@ -551,8 +567,8 @@ int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
goto out;
}
- rc = tls_push_data(sk, &msg->msg_iter, size,
- msg->msg_flags, record_type);
+ iter.msg_iter = &msg->msg_iter;
+ rc = tls_push_data(sk, iter, size, msg->msg_flags, record_type, NULL);
out:
release_sock(sk);
@@ -564,7 +580,8 @@ int tls_device_sendpage(struct sock *sk, struct page *page,
int offset, size_t size, int flags)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
- struct iov_iter msg_iter;
+ union tls_iter_offset iter_offset;
+ struct iov_iter msg_iter;
char *kaddr;
struct kvec iov;
int rc;
@@ -580,12 +597,20 @@ int tls_device_sendpage(struct sock *sk, struct page *page,
goto out;
}
+ if (tls_ctx->zerocopy_sendfile) {
+ iter_offset.offset = offset;
+ rc = tls_push_data(sk, iter_offset, size,
+ flags, TLS_RECORD_TYPE_DATA, page);
+ goto out;
+ }
+
kaddr = kmap(page);
iov.iov_base = kaddr + offset;
iov.iov_len = size;
iov_iter_kvec(&msg_iter, WRITE, &iov, 1, size);
- rc = tls_push_data(sk, &msg_iter, size,
- flags, TLS_RECORD_TYPE_DATA);
+ iter_offset.msg_iter = &msg_iter;
+ rc = tls_push_data(sk, iter_offset, size, flags, TLS_RECORD_TYPE_DATA,
+ NULL);
kunmap(page);
out:
@@ -656,10 +681,12 @@ EXPORT_SYMBOL(tls_get_record);
static int tls_device_push_pending_record(struct sock *sk, int flags)
{
- struct iov_iter msg_iter;
+ union tls_iter_offset iter;
+ struct iov_iter msg_iter;
iov_iter_kvec(&msg_iter, WRITE, NULL, 0, 0);
- return tls_push_data(sk, &msg_iter, 0, flags, TLS_RECORD_TYPE_DATA);
+ iter.msg_iter = &msg_iter;
+ return tls_push_data(sk, iter, 0, flags, TLS_RECORD_TYPE_DATA, NULL);
}
void tls_device_write_space(struct sock *sk, struct tls_context *ctx)
@@ -964,11 +991,9 @@ int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx,
tls_ctx->rx.rec_seq, rxm->full_len,
is_encrypted, is_decrypted);
- ctx->sw.decrypted |= is_decrypted;
-
if (unlikely(test_bit(TLS_RX_DEV_DEGRADED, &tls_ctx->flags))) {
if (likely(is_encrypted || is_decrypted))
- return 0;
+ return is_decrypted;
/* After tls_device_down disables the offload, the next SKB will
* likely have initial fragments decrypted, and final ones not
@@ -983,7 +1008,7 @@ int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx,
*/
if (is_decrypted) {
ctx->resync_nh_reset = 1;
- return 0;
+ return is_decrypted;
}
if (is_encrypted) {
tls_device_core_ctrl_rx_resync(tls_ctx, ctx, sk, skb);
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 7b2b0e7ffee4..b91ddc110786 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -513,6 +513,26 @@ out:
return rc;
}
+static int do_tls_getsockopt_tx_zc(struct sock *sk, char __user *optval,
+ int __user *optlen)
+{
+ struct tls_context *ctx = tls_get_ctx(sk);
+ unsigned int value;
+ int len;
+
+ if (get_user(len, optlen))
+ return -EFAULT;
+
+ if (len != sizeof(value))
+ return -EINVAL;
+
+ value = ctx->zerocopy_sendfile;
+ if (copy_to_user(optval, &value, sizeof(value)))
+ return -EFAULT;
+
+ return 0;
+}
+
static int do_tls_getsockopt(struct sock *sk, int optname,
char __user *optval, int __user *optlen)
{
@@ -524,6 +544,9 @@ static int do_tls_getsockopt(struct sock *sk, int optname,
rc = do_tls_getsockopt_conf(sk, optval, optlen,
optname == TLS_TX);
break;
+ case TLS_TX_ZEROCOPY_SENDFILE:
+ rc = do_tls_getsockopt_tx_zc(sk, optval, optlen);
+ break;
default:
rc = -ENOPROTOOPT;
break;
@@ -675,6 +698,26 @@ err_crypto_info:
return rc;
}
+static int do_tls_setsockopt_tx_zc(struct sock *sk, sockptr_t optval,
+ unsigned int optlen)
+{
+ struct tls_context *ctx = tls_get_ctx(sk);
+ unsigned int value;
+
+ if (sockptr_is_null(optval) || optlen != sizeof(value))
+ return -EINVAL;
+
+ if (copy_from_sockptr(&value, optval, sizeof(value)))
+ return -EFAULT;
+
+ if (value > 1)
+ return -EINVAL;
+
+ ctx->zerocopy_sendfile = value;
+
+ return 0;
+}
+
static int do_tls_setsockopt(struct sock *sk, int optname, sockptr_t optval,
unsigned int optlen)
{
@@ -688,6 +731,11 @@ static int do_tls_setsockopt(struct sock *sk, int optname, sockptr_t optval,
optname == TLS_TX);
release_sock(sk);
break;
+ case TLS_TX_ZEROCOPY_SENDFILE:
+ lock_sock(sk);
+ rc = do_tls_setsockopt_tx_zc(sk, optval, optlen);
+ release_sock(sk);
+ break;
default:
rc = -ENOPROTOOPT;
break;
@@ -921,6 +969,12 @@ static int tls_get_info(const struct sock *sk, struct sk_buff *skb)
if (err)
goto nla_failure;
+ if (ctx->tx_conf == TLS_HW && ctx->zerocopy_sendfile) {
+ err = nla_put_flag(skb, TLS_INFO_ZC_SENDFILE);
+ if (err)
+ goto nla_failure;
+ }
+
rcu_read_unlock();
nla_nest_end(skb, start);
return 0;
@@ -940,6 +994,7 @@ static size_t tls_get_info_size(const struct sock *sk)
nla_total_size(sizeof(u16)) + /* TLS_INFO_CIPHER */
nla_total_size(sizeof(u16)) + /* TLS_INFO_RXCONF */
nla_total_size(sizeof(u16)) + /* TLS_INFO_TXCONF */
+ nla_total_size(0) + /* TLS_INFO_ZC_SENDFILE */
0;
return size;
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index a8976ef95528..0513f82b8537 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -44,6 +44,11 @@
#include <net/strparser.h>
#include <net/tls.h>
+struct tls_decrypt_arg {
+ bool zc;
+ bool async;
+};
+
noinline void tls_err_abort(struct sock *sk, int err)
{
WARN_ON_ONCE(err >= 0);
@@ -128,32 +133,31 @@ static int skb_nsg(struct sk_buff *skb, int offset, int len)
return __skb_nsg(skb, offset, len, 0);
}
-static int padding_length(struct tls_sw_context_rx *ctx,
- struct tls_prot_info *prot, struct sk_buff *skb)
+static int padding_length(struct tls_prot_info *prot, struct sk_buff *skb)
{
struct strp_msg *rxm = strp_msg(skb);
+ struct tls_msg *tlm = tls_msg(skb);
int sub = 0;
/* Determine zero-padding length */
if (prot->version == TLS_1_3_VERSION) {
+ int offset = rxm->full_len - TLS_TAG_SIZE - 1;
char content_type = 0;
int err;
- int back = 17;
while (content_type == 0) {
- if (back > rxm->full_len - prot->prepend_size)
+ if (offset < prot->prepend_size)
return -EBADMSG;
- err = skb_copy_bits(skb,
- rxm->offset + rxm->full_len - back,
+ err = skb_copy_bits(skb, rxm->offset + offset,
&content_type, 1);
if (err)
return err;
if (content_type)
break;
sub++;
- back++;
+ offset--;
}
- ctx->control = content_type;
+ tlm->control = content_type;
}
return sub;
}
@@ -169,7 +173,6 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err)
struct scatterlist *sg;
struct sk_buff *skb;
unsigned int pages;
- int pending;
skb = (struct sk_buff *)req->data;
tls_ctx = tls_get_ctx(skb->sk);
@@ -185,17 +188,12 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err)
tls_err_abort(skb->sk, err);
} else {
struct strp_msg *rxm = strp_msg(skb);
- int pad;
- pad = padding_length(ctx, prot, skb);
- if (pad < 0) {
- ctx->async_wait.err = pad;
- tls_err_abort(skb->sk, pad);
- } else {
- rxm->full_len -= pad;
- rxm->offset += prot->prepend_size;
- rxm->full_len -= prot->overhead_size;
- }
+ /* No TLS 1.3 support with async crypto */
+ WARN_ON(prot->tail_size);
+
+ rxm->offset += prot->prepend_size;
+ rxm->full_len -= prot->overhead_size;
}
/* After using skb->sk to propagate sk through crypto async callback
@@ -217,9 +215,7 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err)
kfree(aead_req);
spin_lock_bh(&ctx->decrypt_compl_lock);
- pending = atomic_dec_return(&ctx->decrypt_pending);
-
- if (!pending && ctx->async_notify)
+ if (!atomic_dec_return(&ctx->decrypt_pending))
complete(&ctx->async_wait.completion);
spin_unlock_bh(&ctx->decrypt_compl_lock);
}
@@ -231,7 +227,7 @@ static int tls_do_decryption(struct sock *sk,
char *iv_recv,
size_t data_len,
struct aead_request *aead_req,
- bool async)
+ struct tls_decrypt_arg *darg)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_prot_info *prot = &tls_ctx->prot_info;
@@ -244,7 +240,7 @@ static int tls_do_decryption(struct sock *sk,
data_len + prot->tag_size,
(u8 *)iv_recv);
- if (async) {
+ if (darg->async) {
/* Using skb->sk to push sk through to crypto async callback
* handler. This allows propagating errors up to the socket
* if needed. It _must_ be cleared in the async handler
@@ -264,14 +260,15 @@ static int tls_do_decryption(struct sock *sk,
ret = crypto_aead_decrypt(aead_req);
if (ret == -EINPROGRESS) {
- if (async)
- return ret;
+ if (darg->async)
+ return 0;
ret = crypto_wait_req(ret, &ctx->async_wait);
}
+ darg->async = false;
- if (async)
- atomic_dec(&ctx->decrypt_pending);
+ if (ret == -EBADMSG)
+ TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSDECRYPTERROR);
return ret;
}
@@ -1346,15 +1343,14 @@ static struct sk_buff *tls_wait_data(struct sock *sk, struct sk_psock *psock,
return skb;
}
-static int tls_setup_from_iter(struct sock *sk, struct iov_iter *from,
+static int tls_setup_from_iter(struct iov_iter *from,
int length, int *pages_used,
- unsigned int *size_used,
struct scatterlist *to,
int to_max_pages)
{
int rc = 0, i = 0, num_elem = *pages_used, maxpages;
struct page *pages[MAX_SKB_FRAGS];
- unsigned int size = *size_used;
+ unsigned int size = 0;
ssize_t copied, use;
size_t offset;
@@ -1397,8 +1393,7 @@ static int tls_setup_from_iter(struct sock *sk, struct iov_iter *from,
sg_mark_end(&to[num_elem - 1]);
out:
if (rc)
- iov_iter_revert(from, size - *size_used);
- *size_used = size;
+ iov_iter_revert(from, size);
*pages_used = num_elem;
return rc;
@@ -1415,12 +1410,13 @@ out:
static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
struct iov_iter *out_iov,
struct scatterlist *out_sg,
- int *chunk, bool *zc, bool async)
+ struct tls_decrypt_arg *darg)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
struct tls_prot_info *prot = &tls_ctx->prot_info;
struct strp_msg *rxm = strp_msg(skb);
+ struct tls_msg *tlm = tls_msg(skb);
int n_sgin, n_sgout, nsg, mem_size, aead_size, err, pages = 0;
struct aead_request *aead_req;
struct sk_buff *unused;
@@ -1431,7 +1427,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
prot->tail_size;
int iv_offset = 0;
- if (*zc && (out_iov || out_sg)) {
+ if (darg->zc && (out_iov || out_sg)) {
if (out_iov)
n_sgout = 1 +
iov_iter_npages_cap(out_iov, INT_MAX, data_len);
@@ -1441,7 +1437,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
rxm->full_len - prot->prepend_size);
} else {
n_sgout = 0;
- *zc = false;
+ darg->zc = false;
n_sgin = skb_cow_data(skb, 0, &unused);
}
@@ -1456,7 +1452,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
aead_size = sizeof(*aead_req) + crypto_aead_reqsize(ctx->aead_recv);
mem_size = aead_size + (nsg * sizeof(struct scatterlist));
mem_size = mem_size + prot->aad_size;
- mem_size = mem_size + crypto_aead_ivsize(ctx->aead_recv);
+ mem_size = mem_size + MAX_IV_SIZE;
/* Allocate a single block of memory which contains
* aead_req || sgin[] || sgout[] || aad || iv.
@@ -1486,26 +1482,26 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
}
/* Prepare IV */
- err = skb_copy_bits(skb, rxm->offset + TLS_HEADER_SIZE,
- iv + iv_offset + prot->salt_size,
- prot->iv_size);
- if (err < 0) {
- kfree(mem);
- return err;
- }
if (prot->version == TLS_1_3_VERSION ||
- prot->cipher_type == TLS_CIPHER_CHACHA20_POLY1305)
+ prot->cipher_type == TLS_CIPHER_CHACHA20_POLY1305) {
memcpy(iv + iv_offset, tls_ctx->rx.iv,
prot->iv_size + prot->salt_size);
- else
+ } else {
+ err = skb_copy_bits(skb, rxm->offset + TLS_HEADER_SIZE,
+ iv + iv_offset + prot->salt_size,
+ prot->iv_size);
+ if (err < 0) {
+ kfree(mem);
+ return err;
+ }
memcpy(iv + iv_offset, tls_ctx->rx.iv, prot->salt_size);
-
+ }
xor_iv_with_seq(prot, iv + iv_offset, tls_ctx->rx.rec_seq);
/* Prepare AAD */
tls_make_aad(aad, rxm->full_len - prot->overhead_size +
prot->tail_size,
- tls_ctx->rx.rec_seq, ctx->control, prot);
+ tls_ctx->rx.rec_seq, tlm->control, prot);
/* Prepare sgin */
sg_init_table(sgin, n_sgin);
@@ -1523,9 +1519,8 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
sg_init_table(sgout, n_sgout);
sg_set_buf(&sgout[0], aad, prot->aad_size);
- *chunk = 0;
- err = tls_setup_from_iter(sk, out_iov, data_len,
- &pages, chunk, &sgout[1],
+ err = tls_setup_from_iter(out_iov, data_len,
+ &pages, &sgout[1],
(n_sgout - 1));
if (err < 0)
goto fallback_to_reg_recv;
@@ -1538,15 +1533,14 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
fallback_to_reg_recv:
sgout = sgin;
pages = 0;
- *chunk = data_len;
- *zc = false;
+ darg->zc = false;
}
/* Prepare and submit AEAD request */
err = tls_do_decryption(sk, skb, sgin, sgout, iv,
- data_len, aead_req, async);
- if (err == -EINPROGRESS)
- return err;
+ data_len, aead_req, darg);
+ if (darg->async)
+ return 0;
/* Release the pages in case iov was mapped to pages */
for (; pages > 0; pages--)
@@ -1557,87 +1551,83 @@ fallback_to_reg_recv:
}
static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
- struct iov_iter *dest, int *chunk, bool *zc,
- bool async)
+ struct iov_iter *dest,
+ struct tls_decrypt_arg *darg)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
- struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
struct tls_prot_info *prot = &tls_ctx->prot_info;
struct strp_msg *rxm = strp_msg(skb);
- int pad, err = 0;
+ struct tls_msg *tlm = tls_msg(skb);
+ int pad, err;
- if (!ctx->decrypted) {
- if (tls_ctx->rx_conf == TLS_HW) {
- err = tls_device_decrypted(sk, tls_ctx, skb, rxm);
- if (err < 0)
- return err;
- }
+ if (tlm->decrypted) {
+ darg->zc = false;
+ darg->async = false;
+ return 0;
+ }
- /* Still not decrypted after tls_device */
- if (!ctx->decrypted) {
- err = decrypt_internal(sk, skb, dest, NULL, chunk, zc,
- async);
- if (err < 0) {
- if (err == -EINPROGRESS)
- tls_advance_record_sn(sk, prot,
- &tls_ctx->rx);
- else if (err == -EBADMSG)
- TLS_INC_STATS(sock_net(sk),
- LINUX_MIB_TLSDECRYPTERROR);
- return err;
- }
- } else {
- *zc = false;
+ if (tls_ctx->rx_conf == TLS_HW) {
+ err = tls_device_decrypted(sk, tls_ctx, skb, rxm);
+ if (err < 0)
+ return err;
+ if (err > 0) {
+ tlm->decrypted = 1;
+ darg->zc = false;
+ darg->async = false;
+ goto decrypt_done;
}
+ }
- pad = padding_length(ctx, prot, skb);
- if (pad < 0)
- return pad;
+ err = decrypt_internal(sk, skb, dest, NULL, darg);
+ if (err < 0)
+ return err;
+ if (darg->async)
+ goto decrypt_next;
- rxm->full_len -= pad;
- rxm->offset += prot->prepend_size;
- rxm->full_len -= prot->overhead_size;
- tls_advance_record_sn(sk, prot, &tls_ctx->rx);
- ctx->decrypted = 1;
- ctx->saved_data_ready(sk);
- } else {
- *zc = false;
- }
+decrypt_done:
+ pad = padding_length(prot, skb);
+ if (pad < 0)
+ return pad;
- return err;
+ rxm->full_len -= pad;
+ rxm->offset += prot->prepend_size;
+ rxm->full_len -= prot->overhead_size;
+ tlm->decrypted = 1;
+decrypt_next:
+ tls_advance_record_sn(sk, prot, &tls_ctx->rx);
+
+ return 0;
}
int decrypt_skb(struct sock *sk, struct sk_buff *skb,
struct scatterlist *sgout)
{
- bool zc = true;
- int chunk;
+ struct tls_decrypt_arg darg = { .zc = true, };
- return decrypt_internal(sk, skb, NULL, sgout, &chunk, &zc, false);
+ return decrypt_internal(sk, skb, NULL, sgout, &darg);
}
-static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb,
- unsigned int len)
+static int tls_record_content_type(struct msghdr *msg, struct tls_msg *tlm,
+ u8 *control)
{
- struct tls_context *tls_ctx = tls_get_ctx(sk);
- struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
-
- if (skb) {
- struct strp_msg *rxm = strp_msg(skb);
-
- if (len < rxm->full_len) {
- rxm->offset += len;
- rxm->full_len -= len;
- return false;
+ int err;
+
+ if (!*control) {
+ *control = tlm->control;
+ if (!*control)
+ return -EBADMSG;
+
+ err = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE,
+ sizeof(*control), control);
+ if (*control != TLS_RECORD_TYPE_DATA) {
+ if (err || msg->msg_flags & MSG_CTRUNC)
+ return -EIO;
}
- consume_skb(skb);
+ } else if (*control != tlm->control) {
+ return 0;
}
- /* Finished with message */
- ctx->recv_pkt = NULL;
- __strp_unpause(&ctx->strp);
-
- return true;
+ return 1;
}
/* This function traverses the rx_list in tls receive context to copies the
@@ -1648,31 +1638,23 @@ static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb,
static int process_rx_list(struct tls_sw_context_rx *ctx,
struct msghdr *msg,
u8 *control,
- bool *cmsg,
size_t skip,
size_t len,
bool zc,
bool is_peek)
{
struct sk_buff *skb = skb_peek(&ctx->rx_list);
- u8 ctrl = *control;
- u8 msgc = *cmsg;
struct tls_msg *tlm;
ssize_t copied = 0;
-
- /* Set the record type in 'control' if caller didn't pass it */
- if (!ctrl && skb) {
- tlm = tls_msg(skb);
- ctrl = tlm->control;
- }
+ int err;
while (skip && skb) {
struct strp_msg *rxm = strp_msg(skb);
tlm = tls_msg(skb);
- /* Cannot process a record of different type */
- if (ctrl != tlm->control)
- return 0;
+ err = tls_record_content_type(msg, tlm, control);
+ if (err <= 0)
+ goto out;
if (skip < rxm->full_len)
break;
@@ -1688,30 +1670,15 @@ static int process_rx_list(struct tls_sw_context_rx *ctx,
tlm = tls_msg(skb);
- /* Cannot process a record of different type */
- if (ctrl != tlm->control)
- return 0;
-
- /* Set record type if not already done. For a non-data record,
- * do not proceed if record type could not be copied.
- */
- if (!msgc) {
- int cerr = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE,
- sizeof(ctrl), &ctrl);
- msgc = true;
- if (ctrl != TLS_RECORD_TYPE_DATA) {
- if (cerr || msg->msg_flags & MSG_CTRUNC)
- return -EIO;
-
- *cmsg = msgc;
- }
- }
+ err = tls_record_content_type(msg, tlm, control);
+ if (err <= 0)
+ goto out;
if (!zc || (rxm->full_len - skip) > len) {
- int err = skb_copy_datagram_msg(skb, rxm->offset + skip,
+ err = skb_copy_datagram_msg(skb, rxm->offset + skip,
msg, chunk);
if (err < 0)
- return err;
+ goto out;
}
len = len - chunk;
@@ -1738,21 +1705,21 @@ static int process_rx_list(struct tls_sw_context_rx *ctx,
next_skb = skb_peek_next(skb, &ctx->rx_list);
if (!is_peek) {
- skb_unlink(skb, &ctx->rx_list);
+ __skb_unlink(skb, &ctx->rx_list);
consume_skb(skb);
}
skb = next_skb;
}
+ err = 0;
- *control = ctrl;
- return copied;
+out:
+ return copied ? : err;
}
int tls_sw_recvmsg(struct sock *sk,
struct msghdr *msg,
size_t len,
- int nonblock,
int flags,
int *addr_len)
{
@@ -1766,16 +1733,13 @@ int tls_sw_recvmsg(struct sock *sk,
struct tls_msg *tlm;
struct sk_buff *skb;
ssize_t copied = 0;
- bool cmsg = false;
+ bool async = false;
int target, err = 0;
long timeo;
bool is_kvec = iov_iter_is_kvec(&msg->msg_iter);
bool is_peek = flags & MSG_PEEK;
bool bpf_strp_enabled;
- int num_async = 0;
- int pending;
-
- flags |= nonblock;
+ bool zc_capable;
if (unlikely(flags & MSG_ERRQUEUE))
return sock_recv_errqueue(sk, msg, len, SOL_IP, IP_RECVERR);
@@ -1784,81 +1748,64 @@ int tls_sw_recvmsg(struct sock *sk,
lock_sock(sk);
bpf_strp_enabled = sk_psock_strp_enabled(psock);
+ /* If crypto failed the connection is broken */
+ err = ctx->async_wait.err;
+ if (err)
+ goto end;
+
/* Process pending decrypted records. It must be non-zero-copy */
- err = process_rx_list(ctx, msg, &control, &cmsg, 0, len, false,
- is_peek);
- if (err < 0) {
- tls_err_abort(sk, err);
+ err = process_rx_list(ctx, msg, &control, 0, len, false, is_peek);
+ if (err < 0)
goto end;
- } else {
- copied = err;
- }
+ copied = err;
if (len <= copied)
- goto recv_end;
+ goto end;
target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
len = len - copied;
timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+ zc_capable = !bpf_strp_enabled && !is_kvec && !is_peek &&
+ prot->version != TLS_1_3_VERSION;
+ decrypted = 0;
while (len && (decrypted + copied < target || ctx->recv_pkt)) {
- bool retain_skb = false;
- bool zc = false;
- int to_decrypt;
- int chunk = 0;
- bool async_capable;
- bool async = false;
+ struct tls_decrypt_arg darg = {};
+ int to_decrypt, chunk;
skb = tls_wait_data(sk, psock, flags & MSG_DONTWAIT, timeo, &err);
if (!skb) {
if (psock) {
- int ret = sk_msg_recvmsg(sk, psock, msg, len,
- flags);
-
- if (ret > 0) {
- decrypted += ret;
- len -= ret;
- continue;
- }
+ chunk = sk_msg_recvmsg(sk, psock, msg, len,
+ flags);
+ if (chunk > 0)
+ goto leave_on_list;
}
goto recv_end;
- } else {
- tlm = tls_msg(skb);
- if (prot->version == TLS_1_3_VERSION)
- tlm->control = 0;
- else
- tlm->control = ctx->control;
}
rxm = strp_msg(skb);
+ tlm = tls_msg(skb);
to_decrypt = rxm->full_len - prot->overhead_size;
- if (to_decrypt <= len && !is_kvec && !is_peek &&
- ctx->control == TLS_RECORD_TYPE_DATA &&
- prot->version != TLS_1_3_VERSION &&
- !bpf_strp_enabled)
- zc = true;
+ if (zc_capable && to_decrypt <= len &&
+ tlm->control == TLS_RECORD_TYPE_DATA)
+ darg.zc = true;
/* Do not use async mode if record is non-data */
- if (ctx->control == TLS_RECORD_TYPE_DATA && !bpf_strp_enabled)
- async_capable = ctx->async_capable;
+ if (tlm->control == TLS_RECORD_TYPE_DATA && !bpf_strp_enabled)
+ darg.async = ctx->async_capable;
else
- async_capable = false;
+ darg.async = false;
- err = decrypt_skb_update(sk, skb, &msg->msg_iter,
- &chunk, &zc, async_capable);
- if (err < 0 && err != -EINPROGRESS) {
+ err = decrypt_skb_update(sk, skb, &msg->msg_iter, &darg);
+ if (err < 0) {
tls_err_abort(sk, -EBADMSG);
goto recv_end;
}
- if (err == -EINPROGRESS) {
- async = true;
- num_async++;
- } else if (prot->version == TLS_1_3_VERSION) {
- tlm->control = ctx->control;
- }
+ async |= darg.async;
/* If the type of records being processed is not known yet,
* set it to record type just dequeued. If it is already known,
@@ -1867,131 +1814,107 @@ int tls_sw_recvmsg(struct sock *sk,
* is known just after record is dequeued from stream parser.
* For tls1.3, we disable async.
*/
-
- if (!control)
- control = tlm->control;
- else if (control != tlm->control)
+ err = tls_record_content_type(msg, tlm, &control);
+ if (err <= 0)
goto recv_end;
- if (!cmsg) {
- int cerr;
-
- cerr = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE,
- sizeof(control), &control);
- cmsg = true;
- if (control != TLS_RECORD_TYPE_DATA) {
- if (cerr || msg->msg_flags & MSG_CTRUNC) {
- err = -EIO;
- goto recv_end;
- }
- }
+ ctx->recv_pkt = NULL;
+ __strp_unpause(&ctx->strp);
+ __skb_queue_tail(&ctx->rx_list, skb);
+
+ if (async) {
+ /* TLS 1.2-only, to_decrypt must be text length */
+ chunk = min_t(int, to_decrypt, len);
+leave_on_list:
+ decrypted += chunk;
+ len -= chunk;
+ continue;
}
+ /* TLS 1.3 may have updated the length by more than overhead */
+ chunk = rxm->full_len;
- if (async)
- goto pick_next_record;
+ if (!darg.zc) {
+ bool partially_consumed = chunk > len;
- if (!zc) {
if (bpf_strp_enabled) {
+ /* BPF may try to queue the skb */
+ __skb_unlink(skb, &ctx->rx_list);
err = sk_psock_tls_strp_read(psock, skb);
if (err != __SK_PASS) {
rxm->offset = rxm->offset + rxm->full_len;
rxm->full_len = 0;
if (err == __SK_DROP)
consume_skb(skb);
- ctx->recv_pkt = NULL;
- __strp_unpause(&ctx->strp);
continue;
}
+ __skb_queue_tail(&ctx->rx_list, skb);
}
- if (rxm->full_len > len) {
- retain_skb = true;
+ if (partially_consumed)
chunk = len;
- } else {
- chunk = rxm->full_len;
- }
err = skb_copy_datagram_msg(skb, rxm->offset,
msg, chunk);
if (err < 0)
goto recv_end;
- if (!is_peek) {
- rxm->offset = rxm->offset + chunk;
- rxm->full_len = rxm->full_len - chunk;
+ if (is_peek)
+ goto leave_on_list;
+
+ if (partially_consumed) {
+ rxm->offset += chunk;
+ rxm->full_len -= chunk;
+ goto leave_on_list;
}
}
-pick_next_record:
- if (chunk > len)
- chunk = len;
-
decrypted += chunk;
len -= chunk;
- /* For async or peek case, queue the current skb */
- if (async || is_peek || retain_skb) {
- skb_queue_tail(&ctx->rx_list, skb);
- skb = NULL;
- }
+ __skb_unlink(skb, &ctx->rx_list);
+ consume_skb(skb);
- if (tls_sw_advance_skb(sk, skb, chunk)) {
- /* Return full control message to
- * userspace before trying to parse
- * another message type
- */
- msg->msg_flags |= MSG_EOR;
- if (control != TLS_RECORD_TYPE_DATA)
- goto recv_end;
- } else {
+ /* Return full control message to userspace before trying
+ * to parse another message type
+ */
+ msg->msg_flags |= MSG_EOR;
+ if (control != TLS_RECORD_TYPE_DATA)
break;
- }
}
recv_end:
- if (num_async) {
+ if (async) {
+ int ret, pending;
+
/* Wait for all previously submitted records to be decrypted */
spin_lock_bh(&ctx->decrypt_compl_lock);
- ctx->async_notify = true;
+ reinit_completion(&ctx->async_wait.completion);
pending = atomic_read(&ctx->decrypt_pending);
spin_unlock_bh(&ctx->decrypt_compl_lock);
if (pending) {
- err = crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
- if (err) {
- /* one of async decrypt failed */
- tls_err_abort(sk, err);
- copied = 0;
+ ret = crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
+ if (ret) {
+ if (err >= 0 || err == -EINPROGRESS)
+ err = ret;
decrypted = 0;
goto end;
}
- } else {
- reinit_completion(&ctx->async_wait.completion);
}
- /* There can be no concurrent accesses, since we have no
- * pending decrypt operations
- */
- WRITE_ONCE(ctx->async_notify, false);
-
/* Drain records from the rx_list & copy if required */
if (is_peek || is_kvec)
- err = process_rx_list(ctx, msg, &control, &cmsg, copied,
+ err = process_rx_list(ctx, msg, &control, copied,
decrypted, false, is_peek);
else
- err = process_rx_list(ctx, msg, &control, &cmsg, 0,
+ err = process_rx_list(ctx, msg, &control, 0,
decrypted, true, is_peek);
- if (err < 0) {
- tls_err_abort(sk, err);
- copied = 0;
- goto end;
- }
+ decrypted = max(err, 0);
}
copied += decrypted;
end:
release_sock(sk);
- sk_defer_free_flush(sk);
if (psock)
sk_psock_put(sk, psock);
return copied ? : err;
@@ -2005,13 +1928,13 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
struct strp_msg *rxm = NULL;
struct sock *sk = sock->sk;
+ struct tls_msg *tlm;
struct sk_buff *skb;
ssize_t copied = 0;
bool from_queue;
int err = 0;
long timeo;
int chunk;
- bool zc = false;
lock_sock(sk);
@@ -2021,26 +1944,29 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
if (from_queue) {
skb = __skb_dequeue(&ctx->rx_list);
} else {
+ struct tls_decrypt_arg darg = {};
+
skb = tls_wait_data(sk, NULL, flags & SPLICE_F_NONBLOCK, timeo,
&err);
if (!skb)
goto splice_read_end;
- err = decrypt_skb_update(sk, skb, NULL, &chunk, &zc, false);
+ err = decrypt_skb_update(sk, skb, NULL, &darg);
if (err < 0) {
tls_err_abort(sk, -EBADMSG);
goto splice_read_end;
}
}
+ rxm = strp_msg(skb);
+ tlm = tls_msg(skb);
+
/* splice does not support reading control messages */
- if (ctx->control != TLS_RECORD_TYPE_DATA) {
+ if (tlm->control != TLS_RECORD_TYPE_DATA) {
err = -EINVAL;
goto splice_read_end;
}
- rxm = strp_msg(skb);
-
chunk = min_t(unsigned int, rxm->full_len, len);
copied = skb_splice_bits(skb, sk, rxm->offset, pipe, chunk, flags);
if (copied < 0)
@@ -2060,7 +1986,6 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
splice_read_end:
release_sock(sk);
- sk_defer_free_flush(sk);
return copied ? : err;
}
@@ -2084,10 +2009,10 @@ bool tls_sw_sock_is_readable(struct sock *sk)
static int tls_read_size(struct strparser *strp, struct sk_buff *skb)
{
struct tls_context *tls_ctx = tls_get_ctx(strp->sk);
- struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
struct tls_prot_info *prot = &tls_ctx->prot_info;
char header[TLS_HEADER_SIZE + MAX_IV_SIZE];
struct strp_msg *rxm = strp_msg(skb);
+ struct tls_msg *tlm = tls_msg(skb);
size_t cipher_overhead;
size_t data_len = 0;
int ret;
@@ -2104,11 +2029,11 @@ static int tls_read_size(struct strparser *strp, struct sk_buff *skb)
/* Linearize header to local buffer */
ret = skb_copy_bits(skb, rxm->offset, header, prot->prepend_size);
-
if (ret < 0)
goto read_failure;
- ctx->control = header[0];
+ tlm->decrypted = 0;
+ tlm->control = header[0];
data_len = ((header[4] & 0xFF) | (header[3] << 8));
@@ -2149,8 +2074,6 @@ static void tls_queue(struct strparser *strp, struct sk_buff *skb)
struct tls_context *tls_ctx = tls_get_ctx(strp->sk);
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
- ctx->decrypted = 0;
-
ctx->recv_pkt = skb;
strp_pause(strp);
@@ -2241,7 +2164,7 @@ void tls_sw_release_resources_rx(struct sock *sk)
if (ctx->aead_recv) {
kfree_skb(ctx->recv_pkt);
ctx->recv_pkt = NULL;
- skb_queue_purge(&ctx->rx_list);
+ __skb_queue_purge(&ctx->rx_list);
crypto_free_aead(ctx->aead_recv);
strp_stop(&ctx->strp);
/* If tls_sw_strparser_arm() was not called (cleanup paths)
@@ -2501,7 +2424,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
/* Sanity-check the sizes for stack allocations. */
if (iv_size > MAX_IV_SIZE || nonce_size > MAX_IV_SIZE ||
- rec_seq_size > TLS_MAX_REC_SEQ_SIZE) {
+ rec_seq_size > TLS_MAX_REC_SEQ_SIZE || tag_size != TLS_TAG_SIZE) {
rc = -EINVAL;
goto free_priv;
}
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 36367e7e3e0a..654dcef7cfb3 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1643,7 +1643,8 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
* so that no locks are necessary.
*/
- skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
+ skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0,
+ &err);
if (!skb) {
/* This means receive shutdown. */
if (err == 0)
@@ -2481,8 +2482,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t si
const struct proto *prot = READ_ONCE(sk->sk_prot);
if (prot != &unix_dgram_proto)
- return prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
- flags & ~MSG_DONTWAIT, NULL);
+ return prot->recvmsg(sk, msg, size, flags, NULL);
#endif
return __unix_dgram_recvmsg(sk, msg, size, flags);
}
@@ -2498,7 +2498,7 @@ static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
int used, err;
mutex_lock(&u->iolock);
- skb = skb_recv_datagram(sk, 0, 1, &err);
+ skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err);
mutex_unlock(&u->iolock);
if (!skb)
return err;
@@ -2914,8 +2914,7 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
const struct proto *prot = READ_ONCE(sk->sk_prot);
if (prot != &unix_stream_proto)
- return prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
- flags & ~MSG_DONTWAIT, NULL);
+ return prot->recvmsg(sk, msg, size, flags, NULL);
#endif
return unix_stream_read_generic(&state, true);
}
diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c
index 452376c6f419..7cf14c6b1725 100644
--- a/net/unix/unix_bpf.c
+++ b/net/unix/unix_bpf.c
@@ -48,8 +48,7 @@ static int __unix_recvmsg(struct sock *sk, struct msghdr *msg,
}
static int unix_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
- size_t len, int nonblock, int flags,
- int *addr_len)
+ size_t len, int flags, int *addr_len)
{
struct unix_sock *u = unix_sk(sk);
struct sk_psock *psock;
@@ -73,7 +72,7 @@ msg_bytes_ready:
long timeo;
int data;
- timeo = sock_rcvtimeo(sk, nonblock);
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
data = unix_msg_wait_data(sk, psock, timeo);
if (data) {
if (!sk_psock_queue_empty(psock))
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index ba1c8cc0c467..ad64f403536a 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -566,67 +566,28 @@ out:
mutex_unlock(&vsock->rx_lock);
}
-static int virtio_vsock_probe(struct virtio_device *vdev)
+static int virtio_vsock_vqs_init(struct virtio_vsock *vsock)
{
- vq_callback_t *callbacks[] = {
- virtio_vsock_rx_done,
- virtio_vsock_tx_done,
- virtio_vsock_event_done,
- };
+ struct virtio_device *vdev = vsock->vdev;
static const char * const names[] = {
"rx",
"tx",
"event",
};
- struct virtio_vsock *vsock = NULL;
+ vq_callback_t *callbacks[] = {
+ virtio_vsock_rx_done,
+ virtio_vsock_tx_done,
+ virtio_vsock_event_done,
+ };
int ret;
- ret = mutex_lock_interruptible(&the_virtio_vsock_mutex);
- if (ret)
- return ret;
-
- /* Only one virtio-vsock device per guest is supported */
- if (rcu_dereference_protected(the_virtio_vsock,
- lockdep_is_held(&the_virtio_vsock_mutex))) {
- ret = -EBUSY;
- goto out;
- }
-
- vsock = kzalloc(sizeof(*vsock), GFP_KERNEL);
- if (!vsock) {
- ret = -ENOMEM;
- goto out;
- }
-
- vsock->vdev = vdev;
-
- ret = virtio_find_vqs(vsock->vdev, VSOCK_VQ_MAX,
- vsock->vqs, callbacks, names,
+ ret = virtio_find_vqs(vdev, VSOCK_VQ_MAX, vsock->vqs, callbacks, names,
NULL);
if (ret < 0)
- goto out;
+ return ret;
virtio_vsock_update_guest_cid(vsock);
- vsock->rx_buf_nr = 0;
- vsock->rx_buf_max_nr = 0;
- atomic_set(&vsock->queued_replies, 0);
-
- mutex_init(&vsock->tx_lock);
- mutex_init(&vsock->rx_lock);
- mutex_init(&vsock->event_lock);
- spin_lock_init(&vsock->send_pkt_list_lock);
- INIT_LIST_HEAD(&vsock->send_pkt_list);
- INIT_WORK(&vsock->rx_work, virtio_transport_rx_work);
- INIT_WORK(&vsock->tx_work, virtio_transport_tx_work);
- INIT_WORK(&vsock->event_work, virtio_transport_event_work);
- INIT_WORK(&vsock->send_pkt_work, virtio_transport_send_pkt_work);
-
- if (virtio_has_feature(vdev, VIRTIO_VSOCK_F_SEQPACKET))
- vsock->seqpacket_allow = true;
-
- vdev->priv = vsock;
-
virtio_device_ready(vdev);
mutex_lock(&vsock->tx_lock);
@@ -643,30 +604,15 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
vsock->event_run = true;
mutex_unlock(&vsock->event_lock);
- rcu_assign_pointer(the_virtio_vsock, vsock);
-
- mutex_unlock(&the_virtio_vsock_mutex);
-
return 0;
-
-out:
- kfree(vsock);
- mutex_unlock(&the_virtio_vsock_mutex);
- return ret;
}
-static void virtio_vsock_remove(struct virtio_device *vdev)
+static void virtio_vsock_vqs_del(struct virtio_vsock *vsock)
{
- struct virtio_vsock *vsock = vdev->priv;
+ struct virtio_device *vdev = vsock->vdev;
struct virtio_vsock_pkt *pkt;
- mutex_lock(&the_virtio_vsock_mutex);
-
- vdev->priv = NULL;
- rcu_assign_pointer(the_virtio_vsock, NULL);
- synchronize_rcu();
-
- /* Reset all connected sockets when the device disappear */
+ /* Reset all connected sockets when the VQs disappear */
vsock_for_each_connected_socket(&virtio_transport.transport,
virtio_vsock_reset_sock);
@@ -711,6 +657,78 @@ static void virtio_vsock_remove(struct virtio_device *vdev)
/* Delete virtqueues and flush outstanding callbacks if any */
vdev->config->del_vqs(vdev);
+}
+
+static int virtio_vsock_probe(struct virtio_device *vdev)
+{
+ struct virtio_vsock *vsock = NULL;
+ int ret;
+
+ ret = mutex_lock_interruptible(&the_virtio_vsock_mutex);
+ if (ret)
+ return ret;
+
+ /* Only one virtio-vsock device per guest is supported */
+ if (rcu_dereference_protected(the_virtio_vsock,
+ lockdep_is_held(&the_virtio_vsock_mutex))) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ vsock = kzalloc(sizeof(*vsock), GFP_KERNEL);
+ if (!vsock) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ vsock->vdev = vdev;
+
+ vsock->rx_buf_nr = 0;
+ vsock->rx_buf_max_nr = 0;
+ atomic_set(&vsock->queued_replies, 0);
+
+ mutex_init(&vsock->tx_lock);
+ mutex_init(&vsock->rx_lock);
+ mutex_init(&vsock->event_lock);
+ spin_lock_init(&vsock->send_pkt_list_lock);
+ INIT_LIST_HEAD(&vsock->send_pkt_list);
+ INIT_WORK(&vsock->rx_work, virtio_transport_rx_work);
+ INIT_WORK(&vsock->tx_work, virtio_transport_tx_work);
+ INIT_WORK(&vsock->event_work, virtio_transport_event_work);
+ INIT_WORK(&vsock->send_pkt_work, virtio_transport_send_pkt_work);
+
+ if (virtio_has_feature(vdev, VIRTIO_VSOCK_F_SEQPACKET))
+ vsock->seqpacket_allow = true;
+
+ vdev->priv = vsock;
+
+ ret = virtio_vsock_vqs_init(vsock);
+ if (ret < 0)
+ goto out;
+
+ rcu_assign_pointer(the_virtio_vsock, vsock);
+
+ mutex_unlock(&the_virtio_vsock_mutex);
+
+ return 0;
+
+out:
+ kfree(vsock);
+ mutex_unlock(&the_virtio_vsock_mutex);
+ return ret;
+}
+
+static void virtio_vsock_remove(struct virtio_device *vdev)
+{
+ struct virtio_vsock *vsock = vdev->priv;
+
+ mutex_lock(&the_virtio_vsock_mutex);
+
+ vdev->priv = NULL;
+ rcu_assign_pointer(the_virtio_vsock, NULL);
+ synchronize_rcu();
+
+ virtio_vsock_vqs_del(vsock);
/* Other works can be queued before 'config->del_vqs()', so we flush
* all works before to free the vsock object to avoid use after free.
@@ -725,6 +743,49 @@ static void virtio_vsock_remove(struct virtio_device *vdev)
kfree(vsock);
}
+#ifdef CONFIG_PM_SLEEP
+static int virtio_vsock_freeze(struct virtio_device *vdev)
+{
+ struct virtio_vsock *vsock = vdev->priv;
+
+ mutex_lock(&the_virtio_vsock_mutex);
+
+ rcu_assign_pointer(the_virtio_vsock, NULL);
+ synchronize_rcu();
+
+ virtio_vsock_vqs_del(vsock);
+
+ mutex_unlock(&the_virtio_vsock_mutex);
+
+ return 0;
+}
+
+static int virtio_vsock_restore(struct virtio_device *vdev)
+{
+ struct virtio_vsock *vsock = vdev->priv;
+ int ret;
+
+ mutex_lock(&the_virtio_vsock_mutex);
+
+ /* Only one virtio-vsock device per guest is supported */
+ if (rcu_dereference_protected(the_virtio_vsock,
+ lockdep_is_held(&the_virtio_vsock_mutex))) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ ret = virtio_vsock_vqs_init(vsock);
+ if (ret < 0)
+ goto out;
+
+ rcu_assign_pointer(the_virtio_vsock, vsock);
+
+out:
+ mutex_unlock(&the_virtio_vsock_mutex);
+ return ret;
+}
+#endif /* CONFIG_PM_SLEEP */
+
static struct virtio_device_id id_table[] = {
{ VIRTIO_ID_VSOCK, VIRTIO_DEV_ANY_ID },
{ 0 },
@@ -742,6 +803,10 @@ static struct virtio_driver virtio_vsock_driver = {
.id_table = id_table,
.probe = virtio_vsock_probe,
.remove = virtio_vsock_remove,
+#ifdef CONFIG_PM_SLEEP
+ .freeze = virtio_vsock_freeze,
+ .restore = virtio_vsock_restore,
+#endif
};
static int __init virtio_vsock_init(void)
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index b17dc9745188..b14f0ed7427b 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -1732,19 +1732,16 @@ static int vmci_transport_dgram_dequeue(struct vsock_sock *vsk,
int flags)
{
int err;
- int noblock;
struct vmci_datagram *dg;
size_t payload_len;
struct sk_buff *skb;
- noblock = flags & MSG_DONTWAIT;
-
if (flags & MSG_OOB || flags & MSG_ERRQUEUE)
return -EOPNOTSUPP;
/* Retrieve the head sk_buff from the socket's receive queue. */
err = 0;
- skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err);
+ skb = skb_recv_datagram(&vsk->sk, flags, &err);
if (!skb)
return err;
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index 8b7fb4a9e07b..f74f176e0d9d 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -6,7 +6,7 @@
*
* Copyright 2009 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
- * Copyright 2018-2021 Intel Corporation
+ * Copyright 2018-2022 Intel Corporation
*/
#include <linux/export.h>
@@ -1344,97 +1344,6 @@ int cfg80211_set_monitor_channel(struct cfg80211_registered_device *rdev,
return rdev_set_monitor_channel(rdev, chandef);
}
-void
-cfg80211_get_chan_state(struct wireless_dev *wdev,
- struct ieee80211_channel **chan,
- enum cfg80211_chan_mode *chanmode,
- u8 *radar_detect)
-{
- int ret;
-
- *chan = NULL;
- *chanmode = CHAN_MODE_UNDEFINED;
-
- ASSERT_WDEV_LOCK(wdev);
-
- if (wdev->netdev && !netif_running(wdev->netdev))
- return;
-
- switch (wdev->iftype) {
- case NL80211_IFTYPE_ADHOC:
- if (wdev->current_bss) {
- *chan = wdev->current_bss->pub.channel;
- *chanmode = (wdev->ibss_fixed &&
- !wdev->ibss_dfs_possible)
- ? CHAN_MODE_SHARED
- : CHAN_MODE_EXCLUSIVE;
-
- /* consider worst-case - IBSS can try to return to the
- * original user-specified channel as creator */
- if (wdev->ibss_dfs_possible)
- *radar_detect |= BIT(wdev->chandef.width);
- return;
- }
- break;
- case NL80211_IFTYPE_STATION:
- case NL80211_IFTYPE_P2P_CLIENT:
- if (wdev->current_bss) {
- *chan = wdev->current_bss->pub.channel;
- *chanmode = CHAN_MODE_SHARED;
- return;
- }
- break;
- case NL80211_IFTYPE_AP:
- case NL80211_IFTYPE_P2P_GO:
- if (wdev->cac_started) {
- *chan = wdev->chandef.chan;
- *chanmode = CHAN_MODE_SHARED;
- *radar_detect |= BIT(wdev->chandef.width);
- } else if (wdev->beacon_interval) {
- *chan = wdev->chandef.chan;
- *chanmode = CHAN_MODE_SHARED;
-
- ret = cfg80211_chandef_dfs_required(wdev->wiphy,
- &wdev->chandef,
- wdev->iftype);
- WARN_ON(ret < 0);
- if (ret > 0)
- *radar_detect |= BIT(wdev->chandef.width);
- }
- return;
- case NL80211_IFTYPE_MESH_POINT:
- if (wdev->mesh_id_len) {
- *chan = wdev->chandef.chan;
- *chanmode = CHAN_MODE_SHARED;
-
- ret = cfg80211_chandef_dfs_required(wdev->wiphy,
- &wdev->chandef,
- wdev->iftype);
- WARN_ON(ret < 0);
- if (ret > 0)
- *radar_detect |= BIT(wdev->chandef.width);
- }
- return;
- case NL80211_IFTYPE_OCB:
- if (wdev->chandef.chan) {
- *chan = wdev->chandef.chan;
- *chanmode = CHAN_MODE_SHARED;
- return;
- }
- break;
- case NL80211_IFTYPE_MONITOR:
- case NL80211_IFTYPE_AP_VLAN:
- case NL80211_IFTYPE_P2P_DEVICE:
- case NL80211_IFTYPE_NAN:
- /* these interface types don't really have a channel */
- return;
- case NL80211_IFTYPE_UNSPECIFIED:
- case NL80211_IFTYPE_WDS:
- case NUM_NL80211_IFTYPES:
- WARN_ON(1);
- }
-}
-
bool cfg80211_any_usable_channels(struct wiphy *wiphy,
unsigned long sband_mask,
u32 prohibited_flags)
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 3a7dbd63d8c6..5436ada91b1a 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -3,7 +3,7 @@
* Wireless configuration interface internals.
*
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
- * Copyright (C) 2018-2021 Intel Corporation
+ * Copyright (C) 2018-2022 Intel Corporation
*/
#ifndef __NET_WIRELESS_CORE_H
#define __NET_WIRELESS_CORE_H
@@ -281,12 +281,6 @@ struct cfg80211_cached_keys {
int def;
};
-enum cfg80211_chan_mode {
- CHAN_MODE_UNDEFINED,
- CHAN_MODE_SHARED,
- CHAN_MODE_EXCLUSIVE,
-};
-
struct cfg80211_beacon_registration {
struct list_head list;
u32 nlportid;
@@ -525,12 +519,6 @@ static inline unsigned int elapsed_jiffies_msecs(unsigned long start)
return jiffies_to_msecs(end + (ULONG_MAX - start) + 1);
}
-void
-cfg80211_get_chan_state(struct wireless_dev *wdev,
- struct ieee80211_channel **chan,
- enum cfg80211_chan_mode *chanmode,
- u8 *radar_detect);
-
int cfg80211_set_monitor_channel(struct cfg80211_registered_device *rdev,
struct cfg80211_chan_def *chandef);
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index 8f98e546becf..5d89eec2869a 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -3,7 +3,7 @@
* Some IBSS support code for cfg80211.
*
* Copyright 2009 Johannes Berg <johannes@sipsolutions.net>
- * Copyright (C) 2020-2021 Intel Corporation
+ * Copyright (C) 2020-2022 Intel Corporation
*/
#include <linux/etherdevice.h>
@@ -131,8 +131,6 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
kfree_sensitive(wdev->connect_keys);
wdev->connect_keys = connkeys;
- wdev->ibss_fixed = params->channel_fixed;
- wdev->ibss_dfs_possible = params->userspace_handles_dfs;
wdev->chandef = params->chandef;
if (connkeys) {
params->wep_keys = connkeys->params;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 1a3551b6d18b..740b29481bc6 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -791,6 +791,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
NLA_POLICY_RANGE(NLA_BINARY,
NL80211_EHT_MIN_CAPABILITY_LEN,
NL80211_EHT_MAX_CAPABILITY_LEN),
+ [NL80211_ATTR_DISABLE_EHT] = { .type = NLA_FLAG },
};
/* policy for the key attributes */
@@ -3719,6 +3720,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
wdev_lock(wdev);
switch (wdev->iftype) {
case NL80211_IFTYPE_AP:
+ case NL80211_IFTYPE_P2P_GO:
if (wdev->ssid_len &&
nla_put(msg, NL80211_ATTR_SSID, wdev->ssid_len, wdev->ssid))
goto nla_put_failure_locked;
@@ -5180,6 +5182,30 @@ nl80211_parse_mbssid_elems(struct wiphy *wiphy, struct nlattr *attrs)
return elems;
}
+static int nl80211_parse_he_bss_color(struct nlattr *attrs,
+ struct cfg80211_he_bss_color *he_bss_color)
+{
+ struct nlattr *tb[NL80211_HE_BSS_COLOR_ATTR_MAX + 1];
+ int err;
+
+ err = nla_parse_nested(tb, NL80211_HE_BSS_COLOR_ATTR_MAX, attrs,
+ he_bss_color_policy, NULL);
+ if (err)
+ return err;
+
+ if (!tb[NL80211_HE_BSS_COLOR_ATTR_COLOR])
+ return -EINVAL;
+
+ he_bss_color->color =
+ nla_get_u8(tb[NL80211_HE_BSS_COLOR_ATTR_COLOR]);
+ he_bss_color->enabled =
+ !nla_get_flag(tb[NL80211_HE_BSS_COLOR_ATTR_DISABLED]);
+ he_bss_color->partial =
+ nla_get_flag(tb[NL80211_HE_BSS_COLOR_ATTR_PARTIAL]);
+
+ return 0;
+}
+
static int nl80211_parse_beacon(struct cfg80211_registered_device *rdev,
struct nlattr *attrs[],
struct cfg80211_beacon_data *bcn)
@@ -5260,6 +5286,14 @@ static int nl80211_parse_beacon(struct cfg80211_registered_device *rdev,
bcn->ftm_responder = -1;
}
+ if (attrs[NL80211_ATTR_HE_BSS_COLOR]) {
+ err = nl80211_parse_he_bss_color(attrs[NL80211_ATTR_HE_BSS_COLOR],
+ &bcn->he_bss_color);
+ if (err)
+ return err;
+ bcn->he_bss_color_valid = true;
+ }
+
if (attrs[NL80211_ATTR_MBSSID_ELEMS]) {
struct cfg80211_mbssid_elems *mbssid =
nl80211_parse_mbssid_elems(&rdev->wiphy,
@@ -5318,30 +5352,6 @@ static int nl80211_parse_he_obss_pd(struct nlattr *attrs,
return 0;
}
-static int nl80211_parse_he_bss_color(struct nlattr *attrs,
- struct cfg80211_he_bss_color *he_bss_color)
-{
- struct nlattr *tb[NL80211_HE_BSS_COLOR_ATTR_MAX + 1];
- int err;
-
- err = nla_parse_nested(tb, NL80211_HE_BSS_COLOR_ATTR_MAX, attrs,
- he_bss_color_policy, NULL);
- if (err)
- return err;
-
- if (!tb[NL80211_HE_BSS_COLOR_ATTR_COLOR])
- return -EINVAL;
-
- he_bss_color->color =
- nla_get_u8(tb[NL80211_HE_BSS_COLOR_ATTR_COLOR]);
- he_bss_color->enabled =
- !nla_get_flag(tb[NL80211_HE_BSS_COLOR_ATTR_DISABLED]);
- he_bss_color->partial =
- nla_get_flag(tb[NL80211_HE_BSS_COLOR_ATTR_PARTIAL]);
-
- return 0;
-}
-
static int nl80211_parse_fils_discovery(struct cfg80211_registered_device *rdev,
struct nlattr *attrs,
struct cfg80211_ap_settings *params)
@@ -5733,14 +5743,6 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
goto out;
}
- if (info->attrs[NL80211_ATTR_HE_BSS_COLOR]) {
- err = nl80211_parse_he_bss_color(
- info->attrs[NL80211_ATTR_HE_BSS_COLOR],
- &params->he_bss_color);
- if (err)
- goto out;
- }
-
if (info->attrs[NL80211_ATTR_FILS_DISCOVERY]) {
err = nl80211_parse_fils_discovery(rdev,
info->attrs[NL80211_ATTR_FILS_DISCOVERY],
@@ -10386,6 +10388,9 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_HE]))
req.flags |= ASSOC_REQ_DISABLE_HE;
+ if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_EHT]))
+ req.flags |= ASSOC_REQ_DISABLE_EHT;
+
if (info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK])
memcpy(&req.vht_capa_mask,
nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]),
@@ -11174,6 +11179,9 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_HE]))
connect.flags |= ASSOC_REQ_DISABLE_HE;
+ if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_EHT]))
+ connect.flags |= ASSOC_REQ_DISABLE_EHT;
+
if (info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK])
memcpy(&connect.vht_capa_mask,
nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]),
@@ -15300,23 +15308,79 @@ static int nl80211_set_fils_aad(struct sk_buff *skb,
#define NL80211_FLAG_CLEAR_SKB 0x20
#define NL80211_FLAG_NO_WIPHY_MTX 0x40
+#define INTERNAL_FLAG_SELECTORS(__sel) \
+ SELECTOR(__sel, NONE, 0) /* must be first */ \
+ SELECTOR(__sel, WIPHY, \
+ NL80211_FLAG_NEED_WIPHY) \
+ SELECTOR(__sel, WDEV, \
+ NL80211_FLAG_NEED_WDEV) \
+ SELECTOR(__sel, NETDEV, \
+ NL80211_FLAG_NEED_NETDEV) \
+ SELECTOR(__sel, WIPHY_RTNL, \
+ NL80211_FLAG_NEED_WIPHY | \
+ NL80211_FLAG_NEED_RTNL) \
+ SELECTOR(__sel, WIPHY_RTNL_NOMTX, \
+ NL80211_FLAG_NEED_WIPHY | \
+ NL80211_FLAG_NEED_RTNL | \
+ NL80211_FLAG_NO_WIPHY_MTX) \
+ SELECTOR(__sel, WDEV_RTNL, \
+ NL80211_FLAG_NEED_WDEV | \
+ NL80211_FLAG_NEED_RTNL) \
+ SELECTOR(__sel, NETDEV_RTNL, \
+ NL80211_FLAG_NEED_NETDEV | \
+ NL80211_FLAG_NEED_RTNL) \
+ SELECTOR(__sel, NETDEV_UP, \
+ NL80211_FLAG_NEED_NETDEV_UP) \
+ SELECTOR(__sel, NETDEV_UP_NOTMX, \
+ NL80211_FLAG_NEED_NETDEV_UP | \
+ NL80211_FLAG_NO_WIPHY_MTX) \
+ SELECTOR(__sel, NETDEV_UP_CLEAR, \
+ NL80211_FLAG_NEED_NETDEV_UP | \
+ NL80211_FLAG_CLEAR_SKB) \
+ SELECTOR(__sel, WDEV_UP, \
+ NL80211_FLAG_NEED_WDEV_UP) \
+ SELECTOR(__sel, WDEV_UP_RTNL, \
+ NL80211_FLAG_NEED_WDEV_UP | \
+ NL80211_FLAG_NEED_RTNL) \
+ SELECTOR(__sel, WIPHY_CLEAR, \
+ NL80211_FLAG_NEED_WIPHY | \
+ NL80211_FLAG_CLEAR_SKB)
+
+enum nl80211_internal_flags_selector {
+#define SELECTOR(_, name, value) NL80211_IFL_SEL_##name,
+ INTERNAL_FLAG_SELECTORS(_)
+#undef SELECTOR
+};
+
+static u32 nl80211_internal_flags[] = {
+#define SELECTOR(_, name, value) [NL80211_IFL_SEL_##name] = value,
+ INTERNAL_FLAG_SELECTORS(_)
+#undef SELECTOR
+};
+
static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb,
struct genl_info *info)
{
struct cfg80211_registered_device *rdev = NULL;
struct wireless_dev *wdev;
struct net_device *dev;
+ u32 internal_flags;
+
+ if (WARN_ON(ops->internal_flags >= ARRAY_SIZE(nl80211_internal_flags)))
+ return -EINVAL;
+
+ internal_flags = nl80211_internal_flags[ops->internal_flags];
rtnl_lock();
- if (ops->internal_flags & NL80211_FLAG_NEED_WIPHY) {
+ if (internal_flags & NL80211_FLAG_NEED_WIPHY) {
rdev = cfg80211_get_dev_from_info(genl_info_net(info), info);
if (IS_ERR(rdev)) {
rtnl_unlock();
return PTR_ERR(rdev);
}
info->user_ptr[0] = rdev;
- } else if (ops->internal_flags & NL80211_FLAG_NEED_NETDEV ||
- ops->internal_flags & NL80211_FLAG_NEED_WDEV) {
+ } else if (internal_flags & NL80211_FLAG_NEED_NETDEV ||
+ internal_flags & NL80211_FLAG_NEED_WDEV) {
wdev = __cfg80211_wdev_from_attrs(NULL, genl_info_net(info),
info->attrs);
if (IS_ERR(wdev)) {
@@ -15327,7 +15391,7 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb,
dev = wdev->netdev;
rdev = wiphy_to_rdev(wdev->wiphy);
- if (ops->internal_flags & NL80211_FLAG_NEED_NETDEV) {
+ if (internal_flags & NL80211_FLAG_NEED_NETDEV) {
if (!dev) {
rtnl_unlock();
return -EINVAL;
@@ -15338,7 +15402,7 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb,
info->user_ptr[1] = wdev;
}
- if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP &&
+ if (internal_flags & NL80211_FLAG_CHECK_NETDEV_UP &&
!wdev_running(wdev)) {
rtnl_unlock();
return -ENETDOWN;
@@ -15348,12 +15412,12 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb,
info->user_ptr[0] = rdev;
}
- if (rdev && !(ops->internal_flags & NL80211_FLAG_NO_WIPHY_MTX)) {
+ if (rdev && !(internal_flags & NL80211_FLAG_NO_WIPHY_MTX)) {
wiphy_lock(&rdev->wiphy);
/* we keep the mutex locked until post_doit */
__release(&rdev->wiphy.mtx);
}
- if (!(ops->internal_flags & NL80211_FLAG_NEED_RTNL))
+ if (!(internal_flags & NL80211_FLAG_NEED_RTNL))
rtnl_unlock();
return 0;
@@ -15362,8 +15426,10 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb,
static void nl80211_post_doit(const struct genl_ops *ops, struct sk_buff *skb,
struct genl_info *info)
{
+ u32 internal_flags = nl80211_internal_flags[ops->internal_flags];
+
if (info->user_ptr[1]) {
- if (ops->internal_flags & NL80211_FLAG_NEED_WDEV) {
+ if (internal_flags & NL80211_FLAG_NEED_WDEV) {
struct wireless_dev *wdev = info->user_ptr[1];
dev_put(wdev->netdev);
@@ -15373,7 +15439,7 @@ static void nl80211_post_doit(const struct genl_ops *ops, struct sk_buff *skb,
}
if (info->user_ptr[0] &&
- !(ops->internal_flags & NL80211_FLAG_NO_WIPHY_MTX)) {
+ !(internal_flags & NL80211_FLAG_NO_WIPHY_MTX)) {
struct cfg80211_registered_device *rdev = info->user_ptr[0];
/* we kept the mutex locked since pre_doit */
@@ -15381,7 +15447,7 @@ static void nl80211_post_doit(const struct genl_ops *ops, struct sk_buff *skb,
wiphy_unlock(&rdev->wiphy);
}
- if (ops->internal_flags & NL80211_FLAG_NEED_RTNL)
+ if (internal_flags & NL80211_FLAG_NEED_RTNL)
rtnl_unlock();
/* If needed, clear the netlink message payload from the SKB
@@ -15389,7 +15455,7 @@ static void nl80211_post_doit(const struct genl_ops *ops, struct sk_buff *skb,
* the heap after the SKB is freed. The netlink message header
* is still needed for further processing, so leave it intact.
*/
- if (ops->internal_flags & NL80211_FLAG_CLEAR_SKB) {
+ if (internal_flags & NL80211_FLAG_CLEAR_SKB) {
struct nlmsghdr *nlh = nlmsg_hdr(skb);
memset(nlmsg_data(nlh), 0, nlmsg_len(nlh));
@@ -15499,6 +15565,11 @@ error:
return err;
}
+#define SELECTOR(__sel, name, value) \
+ ((__sel) == (value)) ? NL80211_IFL_SEL_##name :
+int __missing_selector(void);
+#define IFLAGS(__val) INTERNAL_FLAG_SELECTORS(__val) __missing_selector()
+
static const struct genl_ops nl80211_ops[] = {
{
.cmd = NL80211_CMD_GET_WIPHY,
@@ -15507,7 +15578,7 @@ static const struct genl_ops nl80211_ops[] = {
.dumpit = nl80211_dump_wiphy,
.done = nl80211_dump_wiphy_done,
/* can be retrieved by unprivileged users */
- .internal_flags = NL80211_FLAG_NEED_WIPHY,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY),
},
};
@@ -15524,112 +15595,113 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.doit = nl80211_get_interface,
.dumpit = nl80211_dump_interface,
/* can be retrieved by unprivileged users */
- .internal_flags = NL80211_FLAG_NEED_WDEV,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV),
},
{
.cmd = NL80211_CMD_SET_INTERFACE,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_interface,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV |
+ NL80211_FLAG_NEED_RTNL),
},
{
.cmd = NL80211_CMD_NEW_INTERFACE,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_new_interface,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WIPHY |
- NL80211_FLAG_NEED_RTNL |
- /* we take the wiphy mutex later ourselves */
- NL80211_FLAG_NO_WIPHY_MTX,
+ .internal_flags =
+ IFLAGS(NL80211_FLAG_NEED_WIPHY |
+ NL80211_FLAG_NEED_RTNL |
+ /* we take the wiphy mutex later ourselves */
+ NL80211_FLAG_NO_WIPHY_MTX),
},
{
.cmd = NL80211_CMD_DEL_INTERFACE,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_del_interface,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV |
+ NL80211_FLAG_NEED_RTNL),
},
{
.cmd = NL80211_CMD_GET_KEY,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_get_key,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_SET_KEY,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_key,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_CLEAR_SKB,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP |
+ NL80211_FLAG_CLEAR_SKB),
},
{
.cmd = NL80211_CMD_NEW_KEY,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_new_key,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_CLEAR_SKB,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP |
+ NL80211_FLAG_CLEAR_SKB),
},
{
.cmd = NL80211_CMD_DEL_KEY,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_del_key,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_SET_BEACON,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.flags = GENL_UNS_ADMIN_PERM,
.doit = nl80211_set_beacon,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_START_AP,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.flags = GENL_UNS_ADMIN_PERM,
.doit = nl80211_start_ap,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_STOP_AP,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.flags = GENL_UNS_ADMIN_PERM,
.doit = nl80211_stop_ap,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_GET_STATION,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_get_station,
.dumpit = nl80211_dump_station,
- .internal_flags = NL80211_FLAG_NEED_NETDEV,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV),
},
{
.cmd = NL80211_CMD_SET_STATION,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_station,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_NEW_STATION,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_new_station,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_DEL_STATION,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_del_station,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_GET_MPATH,
@@ -15637,7 +15709,7 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.doit = nl80211_get_mpath,
.dumpit = nl80211_dump_mpath,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_GET_MPP,
@@ -15645,42 +15717,41 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.doit = nl80211_get_mpp,
.dumpit = nl80211_dump_mpp,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_SET_MPATH,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_mpath,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_NEW_MPATH,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_new_mpath,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_DEL_MPATH,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_del_mpath,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_SET_BSS,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_bss,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_GET_REG,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_get_reg_do,
.dumpit = nl80211_get_reg_dump,
- .internal_flags = 0,
/* can be retrieved by unprivileged users */
},
#ifdef CONFIG_CFG80211_CRDA_SUPPORT
@@ -15689,7 +15760,6 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_reg,
.flags = GENL_ADMIN_PERM,
- .internal_flags = 0,
},
#endif
{
@@ -15709,28 +15779,28 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_get_mesh_config,
/* can be retrieved by unprivileged users */
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_SET_MESH_CONFIG,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_update_mesh_config,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_TRIGGER_SCAN,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_trigger_scan,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP),
},
{
.cmd = NL80211_CMD_ABORT_SCAN,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_abort_scan,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP),
},
{
.cmd = NL80211_CMD_GET_SCAN,
@@ -15742,60 +15812,58 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_start_sched_scan,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_STOP_SCHED_SCAN,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_stop_sched_scan,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_AUTHENTICATE,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_authenticate,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- 0 |
- NL80211_FLAG_CLEAR_SKB,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP |
+ NL80211_FLAG_CLEAR_SKB),
},
{
.cmd = NL80211_CMD_ASSOCIATE,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_associate,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- 0 |
- NL80211_FLAG_CLEAR_SKB,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP |
+ NL80211_FLAG_CLEAR_SKB),
},
{
.cmd = NL80211_CMD_DEAUTHENTICATE,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_deauthenticate,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_DISASSOCIATE,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_disassociate,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_JOIN_IBSS,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_join_ibss,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_LEAVE_IBSS,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_leave_ibss,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
#ifdef CONFIG_NL80211_TESTMODE
{
@@ -15804,7 +15872,7 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.doit = nl80211_testmode_do,
.dumpit = nl80211_testmode_dump,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WIPHY,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY),
},
#endif
{
@@ -15812,34 +15880,32 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_connect,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- 0 |
- NL80211_FLAG_CLEAR_SKB,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP |
+ NL80211_FLAG_CLEAR_SKB),
},
{
.cmd = NL80211_CMD_UPDATE_CONNECT_PARAMS,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_update_connect_params,
.flags = GENL_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- 0 |
- NL80211_FLAG_CLEAR_SKB,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP |
+ NL80211_FLAG_CLEAR_SKB),
},
{
.cmd = NL80211_CMD_DISCONNECT,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_disconnect,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_SET_WIPHY_NETNS,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_wiphy_netns,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WIPHY |
- NL80211_FLAG_NEED_RTNL |
- NL80211_FLAG_NO_WIPHY_MTX,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY |
+ NL80211_FLAG_NEED_RTNL |
+ NL80211_FLAG_NO_WIPHY_MTX),
},
{
.cmd = NL80211_CMD_GET_SURVEY,
@@ -15851,121 +15917,120 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_setdel_pmksa,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- 0 |
- NL80211_FLAG_CLEAR_SKB,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP |
+ NL80211_FLAG_CLEAR_SKB),
},
{
.cmd = NL80211_CMD_DEL_PMKSA,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_setdel_pmksa,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_FLUSH_PMKSA,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_flush_pmksa,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_REMAIN_ON_CHANNEL,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_remain_on_channel,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP),
},
{
.cmd = NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_cancel_remain_on_channel,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP),
},
{
.cmd = NL80211_CMD_SET_TX_BITRATE_MASK,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_tx_bitrate_mask,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV),
},
{
.cmd = NL80211_CMD_REGISTER_FRAME,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_register_mgmt,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV),
},
{
.cmd = NL80211_CMD_FRAME,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_tx_mgmt,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP),
},
{
.cmd = NL80211_CMD_FRAME_WAIT_CANCEL,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_tx_mgmt_cancel_wait,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP),
},
{
.cmd = NL80211_CMD_SET_POWER_SAVE,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_power_save,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV),
},
{
.cmd = NL80211_CMD_GET_POWER_SAVE,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_get_power_save,
/* can be retrieved by unprivileged users */
- .internal_flags = NL80211_FLAG_NEED_NETDEV,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV),
},
{
.cmd = NL80211_CMD_SET_CQM,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_cqm,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV),
},
{
.cmd = NL80211_CMD_SET_CHANNEL,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_channel,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV),
},
{
.cmd = NL80211_CMD_JOIN_MESH,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_join_mesh,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_LEAVE_MESH,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_leave_mesh,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_JOIN_OCB,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_join_ocb,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_LEAVE_OCB,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_leave_ocb,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
#ifdef CONFIG_PM
{
@@ -15973,14 +16038,14 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_get_wowlan,
/* can be retrieved by unprivileged users */
- .internal_flags = NL80211_FLAG_NEED_WIPHY,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY),
},
{
.cmd = NL80211_CMD_SET_WOWLAN,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_wowlan,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WIPHY,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY),
},
#endif
{
@@ -15988,126 +16053,125 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_rekey_data,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- 0 |
- NL80211_FLAG_CLEAR_SKB,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP |
+ NL80211_FLAG_CLEAR_SKB),
},
{
.cmd = NL80211_CMD_TDLS_MGMT,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_tdls_mgmt,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_TDLS_OPER,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_tdls_oper,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_UNEXPECTED_FRAME,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_register_unexpected_frame,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV),
},
{
.cmd = NL80211_CMD_PROBE_CLIENT,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_probe_client,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_REGISTER_BEACONS,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_register_beacons,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WIPHY,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY),
},
{
.cmd = NL80211_CMD_SET_NOACK_MAP,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_noack_map,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV),
},
{
.cmd = NL80211_CMD_START_P2P_DEVICE,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_start_p2p_device,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV |
+ NL80211_FLAG_NEED_RTNL),
},
{
.cmd = NL80211_CMD_STOP_P2P_DEVICE,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_stop_p2p_device,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP |
+ NL80211_FLAG_NEED_RTNL),
},
{
.cmd = NL80211_CMD_START_NAN,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_start_nan,
.flags = GENL_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV |
+ NL80211_FLAG_NEED_RTNL),
},
{
.cmd = NL80211_CMD_STOP_NAN,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_stop_nan,
.flags = GENL_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP |
+ NL80211_FLAG_NEED_RTNL),
},
{
.cmd = NL80211_CMD_ADD_NAN_FUNCTION,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_nan_add_func,
.flags = GENL_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP),
},
{
.cmd = NL80211_CMD_DEL_NAN_FUNCTION,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_nan_del_func,
.flags = GENL_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP),
},
{
.cmd = NL80211_CMD_CHANGE_NAN_CONFIG,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_nan_change_config,
.flags = GENL_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP),
},
{
.cmd = NL80211_CMD_SET_MCAST_RATE,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_mcast_rate,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV),
},
{
.cmd = NL80211_CMD_SET_MAC_ACL,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_mac_acl,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV),
},
{
.cmd = NL80211_CMD_RADAR_DETECT,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_start_radar_detection,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NO_WIPHY_MTX,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP |
+ NL80211_FLAG_NO_WIPHY_MTX),
},
{
.cmd = NL80211_CMD_GET_PROTOCOL_FEATURES,
@@ -16119,41 +16183,41 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_update_ft_ies,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_CRIT_PROTOCOL_START,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_crit_protocol_start,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP),
},
{
.cmd = NL80211_CMD_CRIT_PROTOCOL_STOP,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_crit_protocol_stop,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP),
},
{
.cmd = NL80211_CMD_GET_COALESCE,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_get_coalesce,
- .internal_flags = NL80211_FLAG_NEED_WIPHY,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY),
},
{
.cmd = NL80211_CMD_SET_COALESCE,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_coalesce,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WIPHY,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY),
},
{
.cmd = NL80211_CMD_CHANNEL_SWITCH,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_channel_switch,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_VENDOR,
@@ -16161,140 +16225,137 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.doit = nl80211_vendor_cmd,
.dumpit = nl80211_vendor_cmd_dump,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WIPHY |
- 0 |
- NL80211_FLAG_CLEAR_SKB,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY |
+ NL80211_FLAG_CLEAR_SKB),
},
{
.cmd = NL80211_CMD_SET_QOS_MAP,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_qos_map,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_ADD_TX_TS,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_add_tx_ts,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_DEL_TX_TS,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_del_tx_ts,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_TDLS_CHANNEL_SWITCH,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_tdls_channel_switch,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_TDLS_CANCEL_CHANNEL_SWITCH,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_tdls_cancel_channel_switch,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_SET_MULTICAST_TO_UNICAST,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_multicast_to_unicast,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV),
},
{
.cmd = NL80211_CMD_SET_PMK,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_pmk,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- 0 |
- NL80211_FLAG_CLEAR_SKB,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP |
+ NL80211_FLAG_CLEAR_SKB),
},
{
.cmd = NL80211_CMD_DEL_PMK,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_del_pmk,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_EXTERNAL_AUTH,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_external_auth,
.flags = GENL_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_CONTROL_PORT_FRAME,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_tx_control_port,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_GET_FTM_RESPONDER_STATS,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_get_ftm_responder_stats,
- .internal_flags = NL80211_FLAG_NEED_NETDEV,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV),
},
{
.cmd = NL80211_CMD_PEER_MEASUREMENT_START,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_pmsr_start,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP),
},
{
.cmd = NL80211_CMD_NOTIFY_RADAR,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_notify_radar_detection,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_UPDATE_OWE_INFO,
.doit = nl80211_update_owe_info,
.flags = GENL_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_PROBE_MESH_LINK,
.doit = nl80211_probe_mesh_link,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_SET_TID_CONFIG,
.doit = nl80211_set_tid_config,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV),
},
{
.cmd = NL80211_CMD_SET_SAR_SPECS,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_sar_specs,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_WIPHY |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY |
+ NL80211_FLAG_NEED_RTNL),
},
{
.cmd = NL80211_CMD_COLOR_CHANGE_REQUEST,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_color_change,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
- NL80211_FLAG_NEED_RTNL,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
{
.cmd = NL80211_CMD_SET_FILS_AAD,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = nl80211_set_fils_aad,
.flags = GENL_UNS_ADMIN_PERM,
- .internal_flags = NL80211_FLAG_NEED_NETDEV_UP,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
};
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index c76cd973f06e..58e83ce642ad 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -807,6 +807,8 @@ static int __init load_builtin_regdb_keys(void)
return 0;
}
+MODULE_FIRMWARE("regulatory.db.p7s");
+
static bool regdb_has_valid_signature(const u8 *data, unsigned int size)
{
const struct firmware *sig;
@@ -1078,6 +1080,8 @@ static void regdb_fw_cb(const struct firmware *fw, void *context)
release_firmware(fw);
}
+MODULE_FIRMWARE("regulatory.db");
+
static int query_regdb_file(const char *alpha2)
{
ASSERT_RTNL();
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 3a171828638b..6bc2ac8d8146 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -1315,8 +1315,7 @@ static int x25_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
} else {
/* Now we can treat all alike */
release_sock(sk);
- skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
- flags & MSG_DONTWAIT, &rc);
+ skb = skb_recv_datagram(sk, flags, &rc);
lock_sock(sk);
if (!skb)
goto out;
diff --git a/net/x25/x25_proc.c b/net/x25/x25_proc.c
index 3bddcbdf2e40..0412814a2295 100644
--- a/net/x25/x25_proc.c
+++ b/net/x25/x25_proc.c
@@ -79,7 +79,6 @@ static int x25_seq_socket_show(struct seq_file *seq, void *v)
{
struct sock *s;
struct x25_sock *x25;
- struct net_device *dev;
const char *devname;
if (v == SEQ_START_TOKEN) {
@@ -91,7 +90,7 @@ static int x25_seq_socket_show(struct seq_file *seq, void *v)
s = sk_entry(v);
x25 = x25_sk(s);
- if (!x25->neighbour || (dev = x25->neighbour->dev) == NULL)
+ if (!x25->neighbour || !x25->neighbour->dev)
devname = "???";
else
devname = x25->neighbour->dev->name;
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 3a9348030e20..e0a4526ab66b 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -184,7 +184,7 @@ static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
xsk_xdp = xsk_buff_alloc(xs->pool);
if (!xsk_xdp) {
xs->rx_dropped++;
- return -ENOSPC;
+ return -ENOMEM;
}
xsk_copy_xdp(xsk_xdp, xdp, len);
@@ -217,7 +217,7 @@ static bool xsk_is_bound(struct xdp_sock *xs)
static int xsk_rcv_check(struct xdp_sock *xs, struct xdp_buff *xdp)
{
if (!xsk_is_bound(xs))
- return -EINVAL;
+ return -ENXIO;
if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
return -EINVAL;
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 801cda5d1938..a794410989cc 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -263,7 +263,7 @@ static inline u32 xskq_cons_nb_entries(struct xsk_queue *q, u32 max)
static inline bool xskq_cons_has_entries(struct xsk_queue *q, u32 cnt)
{
- return xskq_cons_nb_entries(q, cnt) >= cnt ? true : false;
+ return xskq_cons_nb_entries(q, cnt) >= cnt;
}
static inline bool xskq_cons_peek_addr_unchecked(struct xsk_queue *q, u64 *addr)
@@ -382,7 +382,7 @@ static inline int xskq_prod_reserve_desc(struct xsk_queue *q,
u32 idx;
if (xskq_prod_is_full(q))
- return -ENOSPC;
+ return -ENOBUFS;
/* A, matches D */
idx = q->cached_prod++ & q->ring_mask;
diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c
index 65b53fb3de13..acc8e52a4f5f 100644
--- a/net/xdp/xskmap.c
+++ b/net/xdp/xskmap.c
@@ -9,6 +9,7 @@
#include <net/xdp_sock.h>
#include <linux/slab.h>
#include <linux/sched.h>
+#include <linux/btf_ids.h>
#include "xsk.h"
@@ -254,7 +255,7 @@ static bool xsk_map_meta_equal(const struct bpf_map *meta0,
bpf_map_meta_equal(meta0, meta1);
}
-static int xsk_map_btf_id;
+BTF_ID_LIST_SINGLE(xsk_map_btf_ids, struct, xsk_map)
const struct bpf_map_ops xsk_map_ops = {
.map_meta_equal = xsk_map_meta_equal,
.map_alloc = xsk_map_alloc,
@@ -266,7 +267,6 @@ const struct bpf_map_ops xsk_map_ops = {
.map_update_elem = xsk_map_update_elem,
.map_delete_elem = xsk_map_delete_elem,
.map_check_btf = map_check_no_btf,
- .map_btf_name = "xsk_map",
- .map_btf_id = &xsk_map_btf_id,
+ .map_btf_id = &xsk_map_btf_ids[0],
.map_redirect = xsk_map_redirect,
};
diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c
index 1f08ebf7d80c..82d14eea1b5a 100644
--- a/net/xfrm/espintcp.c
+++ b/net/xfrm/espintcp.c
@@ -131,7 +131,7 @@ static int espintcp_parse(struct strparser *strp, struct sk_buff *skb)
}
static int espintcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int nonblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
struct espintcp_ctx *ctx = espintcp_getctx(sk);
struct sk_buff *skb;
@@ -139,8 +139,6 @@ static int espintcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int copied;
int off = 0;
- flags |= nonblock ? MSG_DONTWAIT : 0;
-
skb = __skb_recv_datagram(sk, &ctx->ike_queue, flags, &off, &err);
if (!skb) {
if (err == -EAGAIN && sk->sk_shutdown & RCV_SHUTDOWN)
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 36aa01d92b65..35c7e89b2e7d 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -117,7 +117,7 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur
sp = skb_sec_path(skb);
x = sp->xvec[sp->len - 1];
- if (xo->flags & XFRM_GRO || x->xso.flags & XFRM_OFFLOAD_INBOUND)
+ if (xo->flags & XFRM_GRO || x->xso.dir == XFRM_DEV_OFFLOAD_IN)
return skb;
/* This skb was already validated on the upper/virtual dev */
@@ -212,7 +212,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
int err;
struct dst_entry *dst;
struct net_device *dev;
- struct xfrm_state_offload *xso = &x->xso;
+ struct xfrm_dev_offload *xso = &x->xso;
xfrm_address_t *saddr;
xfrm_address_t *daddr;
@@ -264,15 +264,16 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
xso->dev = dev;
netdev_tracker_alloc(dev, &xso->dev_tracker, GFP_ATOMIC);
xso->real_dev = dev;
- xso->num_exthdrs = 1;
- /* Don't forward bit that is not implemented */
- xso->flags = xuo->flags & ~XFRM_OFFLOAD_IPV6;
+
+ if (xuo->flags & XFRM_OFFLOAD_INBOUND)
+ xso->dir = XFRM_DEV_OFFLOAD_IN;
+ else
+ xso->dir = XFRM_DEV_OFFLOAD_OUT;
err = dev->xfrmdev_ops->xdo_dev_state_add(x);
if (err) {
- xso->num_exthdrs = 0;
- xso->flags = 0;
xso->dev = NULL;
+ xso->dir = 0;
xso->real_dev = NULL;
dev_put_track(dev, &xso->dev_tracker);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index b749935152ba..08564e0eef20 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -751,7 +751,7 @@ xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool
for (i = 0; i <= net->xfrm.state_hmask; i++) {
struct xfrm_state *x;
- struct xfrm_state_offload *xso;
+ struct xfrm_dev_offload *xso;
hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
xso = &x->xso;
@@ -835,7 +835,7 @@ int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_vali
err = -ESRCH;
for (i = 0; i <= net->xfrm.state_hmask; i++) {
struct xfrm_state *x;
- struct xfrm_state_offload *xso;
+ struct xfrm_dev_offload *xso;
restart:
hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
xso = &x->xso;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 64fa8fdd6bbd..6a58fec6a1fb 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -840,7 +840,7 @@ static int copy_sec_ctx(struct xfrm_sec_ctx *s, struct sk_buff *skb)
return 0;
}
-static int copy_user_offload(struct xfrm_state_offload *xso, struct sk_buff *skb)
+static int copy_user_offload(struct xfrm_dev_offload *xso, struct sk_buff *skb)
{
struct xfrm_user_offload *xuo;
struct nlattr *attr;
@@ -852,7 +852,8 @@ static int copy_user_offload(struct xfrm_state_offload *xso, struct sk_buff *skb
xuo = nla_data(attr);
memset(xuo, 0, sizeof(*xuo));
xuo->ifindex = xso->dev->ifindex;
- xuo->flags = xso->flags;
+ if (xso->dir == XFRM_DEV_OFFLOAD_IN)
+ xuo->flags = XFRM_OFFLOAD_INBOUND;
return 0;
}