summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/6lowpan/ndisc.c16
-rw-r--r--net/8021q/vlan.c47
-rw-r--r--net/8021q/vlan.h1
-rw-r--r--net/Kconfig6
-rw-r--r--net/appletalk/aarp.c24
-rw-r--r--net/appletalk/atalk_proc.c2
-rw-r--r--net/appletalk/ddp.c1
-rw-r--r--net/atm/clip.c75
-rw-r--r--net/atm/common.c1
-rw-r--r--net/atm/lec.c12
-rw-r--r--net/atm/raw.c2
-rw-r--r--net/atm/resources.c3
-rw-r--r--net/batman-adv/bat_algo.c1
-rw-r--r--net/batman-adv/bat_algo.h2
-rw-r--r--net/batman-adv/bat_iv_ogm.c25
-rw-r--r--net/batman-adv/bat_v.c6
-rw-r--r--net/batman-adv/bat_v_elp.c8
-rw-r--r--net/batman-adv/bat_v_ogm.c14
-rw-r--r--net/batman-adv/hard-interface.c39
-rw-r--r--net/batman-adv/main.c7
-rw-r--r--net/batman-adv/main.h2
-rw-r--r--net/batman-adv/mesh-interface.c6
-rw-r--r--net/batman-adv/multicast.c6
-rw-r--r--net/batman-adv/netlink.c7
-rw-r--r--net/batman-adv/originator.c7
-rw-r--r--net/batman-adv/send.c7
-rw-r--r--net/bluetooth/af_bluetooth.c9
-rw-r--r--net/bluetooth/aosp.c2
-rw-r--r--net/bluetooth/coredump.c6
-rw-r--r--net/bluetooth/eir.c17
-rw-r--r--net/bluetooth/eir.h2
-rw-r--r--net/bluetooth/hci_conn.c50
-rw-r--r--net/bluetooth/hci_core.c101
-rw-r--r--net/bluetooth/hci_debugfs.c8
-rw-r--r--net/bluetooth/hci_event.c134
-rw-r--r--net/bluetooth/hci_sock.c2
-rw-r--r--net/bluetooth/hci_sync.c345
-rw-r--r--net/bluetooth/iso.c69
-rw-r--r--net/bluetooth/l2cap_core.c25
-rw-r--r--net/bluetooth/l2cap_sock.c7
-rw-r--r--net/bluetooth/lib.c2
-rw-r--r--net/bluetooth/mgmt.c204
-rw-r--r--net/bluetooth/mgmt_util.c32
-rw-r--r--net/bluetooth/mgmt_util.h4
-rw-r--r--net/bluetooth/msft.c2
-rw-r--r--net/bluetooth/rfcomm/core.c3
-rw-r--r--net/bluetooth/rfcomm/tty.c9
-rw-r--r--net/bluetooth/sco.c4
-rw-r--r--net/bluetooth/smp.c23
-rw-r--r--net/bluetooth/smp.h1
-rw-r--r--net/bpf/bpf_dummy_struct_ops.c3
-rw-r--r--net/bpf/test_run.c2
-rw-r--r--net/bridge/br.c7
-rw-r--r--net/bridge/br_if.c3
-rw-r--r--net/bridge/br_multicast.c9
-rw-r--r--net/bridge/br_netlink.c2
-rw-r--r--net/bridge/br_switchdev.c5
-rw-r--r--net/bridge/br_sysfs_br.c2
-rw-r--r--net/bridge/netfilter/Kconfig10
-rw-r--r--net/caif/cfctrl.c294
-rw-r--r--net/can/af_can.c6
-rw-r--r--net/can/bcm.c5
-rw-r--r--net/can/isotp.c5
-rw-r--r--net/can/j1939/socket.c5
-rw-r--r--net/can/raw.c5
-rw-r--r--net/ceph/messenger_v2.c12
-rw-r--r--net/core/dev.c279
-rw-r--r--net/core/dev.h14
-rw-r--r--net/core/dev_addr_lists.c2
-rw-r--r--net/core/dev_api.c13
-rw-r--r--net/core/dev_ioctl.c5
-rw-r--r--net/core/dst.c10
-rw-r--r--net/core/dst_cache.c2
-rw-r--r--net/core/filter.c54
-rw-r--r--net/core/hotdata.c5
-rw-r--r--net/core/ieee8021q_helpers.c44
-rw-r--r--net/core/neighbour.c558
-rw-r--r--net/core/net-sysfs.c80
-rw-r--r--net/core/net-sysfs.h2
-rw-r--r--net/core/net_namespace.c70
-rw-r--r--net/core/netclassid_cgroup.c4
-rw-r--r--net/core/netdev-genl-gen.c5
-rw-r--r--net/core/netdev-genl.c14
-rw-r--r--net/core/netdev_rx_queue.c6
-rw-r--r--net/core/netpoll.c480
-rw-r--r--net/core/page_pool.c36
-rw-r--r--net/core/rtnetlink.c10
-rw-r--r--net/core/scm.c32
-rw-r--r--net/core/selftests.c72
-rw-r--r--net/core/skbuff.c41
-rw-r--r--net/core/skmsg.c7
-rw-r--r--net/core/sock.c73
-rw-r--r--net/core/sock_map.c13
-rw-r--r--net/core/stream.c8
-rw-r--r--net/core/sysctl_net_core.c37
-rw-r--r--net/devlink/netlink_gen.c15
-rw-r--r--net/devlink/netlink_gen.h1
-rw-r--r--net/devlink/param.c20
-rw-r--r--net/devlink/rate.c127
-rw-r--r--net/dsa/Kconfig16
-rw-r--r--net/dsa/dsa.c3
-rw-r--r--net/dsa/tag_brcm.c119
-rw-r--r--net/dsa/user.c2
-rw-r--r--net/ethtool/common.c58
-rw-r--r--net/ethtool/common.h13
-rw-r--r--net/ethtool/ioctl.c330
-rw-r--r--net/ethtool/netlink.c95
-rw-r--r--net/ethtool/netlink.h12
-rw-r--r--net/ethtool/pause.c1
-rw-r--r--net/ethtool/pse-pd.c65
-rw-r--r--net/ethtool/rss.c942
-rw-r--r--net/handshake/tlshd.c6
-rw-r--r--net/ipv4/arp.c16
-rw-r--r--net/ipv4/datagram.c2
-rw-r--r--net/ipv4/fib_frontend.c2
-rw-r--r--net/ipv4/fib_semantics.c10
-rw-r--r--net/ipv4/icmp.c24
-rw-r--r--net/ipv4/igmp.c2
-rw-r--r--net/ipv4/inet_connection_sock.c42
-rw-r--r--net/ipv4/inet_diag.c2
-rw-r--r--net/ipv4/inet_hashtables.c4
-rw-r--r--net/ipv4/ip_fragment.c2
-rw-r--r--net/ipv4/ip_input.c13
-rw-r--r--net/ipv4/ip_output.c9
-rw-r--r--net/ipv4/ip_tunnel.c4
-rw-r--r--net/ipv4/ip_tunnel_core.c4
-rw-r--r--net/ipv4/ip_vti.c4
-rw-r--r--net/ipv4/ipcomp.c2
-rw-r--r--net/ipv4/ipconfig.c6
-rw-r--r--net/ipv4/ipmr.c171
-rw-r--r--net/ipv4/netfilter.c4
-rw-r--r--net/ipv4/netfilter/Kconfig24
-rw-r--r--net/ipv4/nexthop.c5
-rw-r--r--net/ipv4/ping.c4
-rw-r--r--net/ipv4/proc.c1
-rw-r--r--net/ipv4/raw.c4
-rw-r--r--net/ipv4/route.c43
-rw-r--r--net/ipv4/syncookies.c3
-rw-r--r--net/ipv4/tcp.c35
-rw-r--r--net/ipv4/tcp_fastopen.c7
-rw-r--r--net/ipv4/tcp_input.c266
-rw-r--r--net/ipv4/tcp_ipv4.c309
-rw-r--r--net/ipv4/tcp_metrics.c8
-rw-r--r--net/ipv4/tcp_minisocks.c2
-rw-r--r--net/ipv4/tcp_offload.c1
-rw-r--r--net/ipv4/tcp_output.c89
-rw-r--r--net/ipv4/tcp_recovery.c2
-rw-r--r--net/ipv4/tcp_timer.c2
-rw-r--r--net/ipv4/udp.c29
-rw-r--r--net/ipv4/udp_impl.h1
-rw-r--r--net/ipv4/udp_offload.c11
-rw-r--r--net/ipv4/udp_tunnel_core.c21
-rw-r--r--net/ipv4/udp_tunnel_nic.c78
-rw-r--r--net/ipv4/udplite.c2
-rw-r--r--net/ipv4/xfrm4_input.c3
-rw-r--r--net/ipv4/xfrm4_output.c2
-rw-r--r--net/ipv6/addrconf.c117
-rw-r--r--net/ipv6/addrlabel.c32
-rw-r--r--net/ipv6/af_inet6.c2
-rw-r--r--net/ipv6/anycast.c101
-rw-r--r--net/ipv6/calipso.c14
-rw-r--r--net/ipv6/datagram.c6
-rw-r--r--net/ipv6/exthdrs.c10
-rw-r--r--net/ipv6/icmp.c4
-rw-r--r--net/ipv6/ila/ila_lwt.c2
-rw-r--r--net/ipv6/inet6_connection_sock.c4
-rw-r--r--net/ipv6/ioam6.c17
-rw-r--r--net/ipv6/ioam6_iptunnel.c4
-rw-r--r--net/ipv6/ip6_fib.c50
-rw-r--r--net/ipv6/ip6_gre.c108
-rw-r--r--net/ipv6/ip6_input.c40
-rw-r--r--net/ipv6/ip6_output.c32
-rw-r--r--net/ipv6/ip6_tunnel.c49
-rw-r--r--net/ipv6/ip6_udp_tunnel.c20
-rw-r--r--net/ipv6/ip6_vti.c4
-rw-r--r--net/ipv6/ip6mr.c157
-rw-r--r--net/ipv6/ipcomp6.c2
-rw-r--r--net/ipv6/ipv6_sockglue.c28
-rw-r--r--net/ipv6/mcast.c395
-rw-r--r--net/ipv6/ndisc.c184
-rw-r--r--net/ipv6/netfilter.c4
-rw-r--r--net/ipv6/netfilter/Kconfig19
-rw-r--r--net/ipv6/netfilter/nf_dup_ipv6.c2
-rw-r--r--net/ipv6/netfilter/nf_reject_ipv6.c2
-rw-r--r--net/ipv6/output_core.c4
-rw-r--r--net/ipv6/ping.c2
-rw-r--r--net/ipv6/raw.c2
-rw-r--r--net/ipv6/reassembly.c10
-rw-r--r--net/ipv6/route.c241
-rw-r--r--net/ipv6/rpl_iptunnel.c12
-rw-r--r--net/ipv6/seg6_iptunnel.c26
-rw-r--r--net/ipv6/seg6_local.c26
-rw-r--r--net/ipv6/sit.c2
-rw-r--r--net/ipv6/syncookies.c2
-rw-r--r--net/ipv6/tcp_ipv6.c23
-rw-r--r--net/ipv6/udp.c11
-rw-r--r--net/ipv6/udp_impl.h1
-rw-r--r--net/ipv6/udplite.c2
-rw-r--r--net/ipv6/xfrm6_input.c3
-rw-r--r--net/ipv6/xfrm6_output.c2
-rw-r--r--net/ipv6/xfrm6_tunnel.c2
-rw-r--r--net/iucv/iucv.c1
-rw-r--r--net/kcm/kcmsock.c3
-rw-r--r--net/key/af_key.c4
-rw-r--r--net/l2tp/l2tp_ip6.c2
-rw-r--r--net/llc/af_llc.c6
-rw-r--r--net/llc/llc_proc.c2
-rw-r--r--net/mac80211/agg-rx.c6
-rw-r--r--net/mac80211/agg-tx.c3
-rw-r--r--net/mac80211/cfg.c221
-rw-r--r--net/mac80211/chan.c51
-rw-r--r--net/mac80211/debug.h5
-rw-r--r--net/mac80211/debugfs.c3
-rw-r--r--net/mac80211/debugfs_netdev.c2
-rw-r--r--net/mac80211/driver-ops.c5
-rw-r--r--net/mac80211/driver-ops.h59
-rw-r--r--net/mac80211/ht.c40
-rw-r--r--net/mac80211/ibss.c4
-rw-r--r--net/mac80211/ieee80211_i.h73
-rw-r--r--net/mac80211/iface.c39
-rw-r--r--net/mac80211/key.c66
-rw-r--r--net/mac80211/link.c15
-rw-r--r--net/mac80211/main.c92
-rw-r--r--net/mac80211/mesh.c2
-rw-r--r--net/mac80211/mlme.c330
-rw-r--r--net/mac80211/offchannel.c7
-rw-r--r--net/mac80211/parse.c6
-rw-r--r--net/mac80211/pm.c2
-rw-r--r--net/mac80211/rx.c117
-rw-r--r--net/mac80211/s1g.c26
-rw-r--r--net/mac80211/scan.c23
-rw-r--r--net/mac80211/sta_info.c418
-rw-r--r--net/mac80211/sta_info.h59
-rw-r--r--net/mac80211/tdls.c2
-rw-r--r--net/mac80211/trace.h105
-rw-r--r--net/mac80211/tx.c145
-rw-r--r--net/mac80211/util.c124
-rw-r--r--net/mac80211/vht.c5
-rw-r--r--net/mctp/af_mctp.c214
-rw-r--r--net/mctp/route.c653
-rw-r--r--net/mctp/test/route-test.c798
-rw-r--r--net/mctp/test/sock-test.c396
-rw-r--r--net/mctp/test/utils.c232
-rw-r--r--net/mctp/test/utils.h61
-rw-r--r--net/mpls/af_mpls.c10
-rw-r--r--net/mptcp/ctrl.c4
-rw-r--r--net/mptcp/mib.c5
-rw-r--r--net/mptcp/mib.h7
-rw-r--r--net/mptcp/options.c6
-rw-r--r--net/mptcp/pm.c8
-rw-r--r--net/mptcp/protocol.c104
-rw-r--r--net/mptcp/protocol.h36
-rw-r--r--net/mptcp/sockopt.c33
-rw-r--r--net/mptcp/subflow.c40
-rw-r--r--net/ncsi/internal.h2
-rw-r--r--net/ncsi/ncsi-rsp.c1
-rw-r--r--net/netfilter/Kconfig30
-rw-r--r--net/netfilter/Makefile1
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c2
-rw-r--r--net/netfilter/nf_bpf_link.c5
-rw-r--r--net/netfilter/nf_conntrack_core.c50
-rw-r--r--net/netfilter/nf_conntrack_netlink.c1
-rw-r--r--net/netfilter/nf_conntrack_proto.c6
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c826
-rw-r--r--net/netfilter/nf_conntrack_standalone.c118
-rw-r--r--net/netfilter/nf_log.c26
-rw-r--r--net/netfilter/nf_nat_core.c6
-rw-r--r--net/netfilter/nf_nat_proto.c43
-rw-r--r--net/netfilter/nf_tables_api.c139
-rw-r--r--net/netfilter/nf_tables_trace.c3
-rw-r--r--net/netfilter/nfnetlink.c1
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c5
-rw-r--r--net/netfilter/nfnetlink_hook.c76
-rw-r--r--net/netfilter/nft_chain_filter.c2
-rw-r--r--net/netfilter/nft_dynset.c10
-rw-r--r--net/netfilter/nft_exthdr.c8
-rw-r--r--net/netfilter/nft_lookup.c27
-rw-r--r--net/netfilter/nft_objref.c5
-rw-r--r--net/netfilter/nft_set_bitmap.c11
-rw-r--r--net/netfilter/nft_set_hash.c54
-rw-r--r--net/netfilter/nft_set_pipapo.c204
-rw-r--r--net/netfilter/nft_set_pipapo_avx2.c26
-rw-r--r--net/netfilter/nft_set_rbtree.c40
-rw-r--r--net/netfilter/x_tables.c16
-rw-r--r--net/netfilter/xt_nfacct.c4
-rw-r--r--net/netlink/af_netlink.c87
-rw-r--r--net/nfc/nci/uart.c8
-rw-r--r--net/nfc/netlink.c6
-rw-r--r--net/openvswitch/actions.c29
-rw-r--r--net/openvswitch/datapath.c50
-rw-r--r--net/openvswitch/datapath.h6
-rw-r--r--net/openvswitch/vport.c1
-rw-r--r--net/packet/af_packet.c29
-rw-r--r--net/packet/diag.c2
-rw-r--r--net/phonet/pep.c2
-rw-r--r--net/phonet/socket.c4
-rw-r--r--net/rds/af_rds.c2
-rw-r--r--net/rds/send.c2
-rw-r--r--net/rds/tcp_listen.c30
-rw-r--r--net/rose/rose_in.c3
-rw-r--r--net/rose/rose_route.c15
-rw-r--r--net/rxrpc/ar-internal.h19
-rw-r--r--net/rxrpc/call_accept.c18
-rw-r--r--net/rxrpc/call_object.c28
-rw-r--r--net/rxrpc/io_thread.c14
-rw-r--r--net/rxrpc/output.c27
-rw-r--r--net/rxrpc/peer_object.c6
-rw-r--r--net/rxrpc/recvmsg.c23
-rw-r--r--net/rxrpc/security.c8
-rw-r--r--net/sched/Kconfig12
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/act_api.c9
-rw-r--r--net/sched/act_connmark.c18
-rw-r--r--net/sched/act_csum.c18
-rw-r--r--net/sched/act_ct.c30
-rw-r--r--net/sched/act_ctinfo.c42
-rw-r--r--net/sched/act_mpls.c21
-rw-r--r--net/sched/act_nat.c25
-rw-r--r--net/sched/act_pedit.c20
-rw-r--r--net/sched/act_police.c18
-rw-r--r--net/sched/act_skbedit.c20
-rw-r--r--net/sched/bpf_qdisc.c9
-rw-r--r--net/sched/em_text.c2
-rw-r--r--net/sched/sch_api.c52
-rw-r--r--net/sched/sch_cake.c5
-rw-r--r--net/sched/sch_dualpi2.c1175
-rw-r--r--net/sched/sch_ets.c2
-rw-r--r--net/sched/sch_generic.c2
-rw-r--r--net/sched/sch_hfsc.c16
-rw-r--r--net/sched/sch_htb.c4
-rw-r--r--net/sched/sch_netem.c40
-rw-r--r--net/sched/sch_prio.c2
-rw-r--r--net/sched/sch_qfq.c35
-rw-r--r--net/sched/sch_red.c2
-rw-r--r--net/sched/sch_sfq.c15
-rw-r--r--net/sched/sch_taprio.c18
-rw-r--r--net/sched/sch_tbf.c2
-rw-r--r--net/sctp/input.c2
-rw-r--r--net/sctp/ipv6.c7
-rw-r--r--net/sctp/proc.c4
-rw-r--r--net/sctp/protocol.c3
-rw-r--r--net/sctp/socket.c8
-rw-r--r--net/sctp/transport.c2
-rw-r--r--net/smc/af_smc.c23
-rw-r--r--net/smc/smc.h8
-rw-r--r--net/smc/smc_clc.c6
-rw-r--r--net/smc/smc_core.c5
-rw-r--r--net/smc/smc_diag.c2
-rw-r--r--net/smc/smc_loopback.c6
-rw-r--r--net/smc/smc_pnet.c2
-rw-r--r--net/socket.c54
-rw-r--r--net/strparser/strparser.c2
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c15
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c3
-rw-r--r--net/sunrpc/clnt.c36
-rw-r--r--net/sunrpc/rpc_pipe.c532
-rw-r--r--net/sunrpc/socklib.c164
-rw-r--r--net/sunrpc/svc.c37
-rw-r--r--net/sunrpc/svcsock.c5
-rw-r--r--net/sunrpc/xdr.c11
-rw-r--r--net/tipc/socket.c2
-rw-r--r--net/tipc/topsrv.c2
-rw-r--r--net/tipc/udp_media.c16
-rw-r--r--net/tls/tls_strp.c3
-rw-r--r--net/tls/tls_sw.c13
-rw-r--r--net/unix/af_unix.c297
-rw-r--r--net/unix/diag.c2
-rw-r--r--net/vmw_vsock/af_vsock.c84
-rw-r--r--net/vmw_vsock/hyperv_transport.c17
-rw-r--r--net/vmw_vsock/vmci_transport.c4
-rw-r--r--net/wireless/core.c23
-rw-r--r--net/wireless/core.h11
-rw-r--r--net/wireless/mlme.c34
-rw-r--r--net/wireless/nl80211.c835
-rw-r--r--net/wireless/rdev-ops.h45
-rw-r--r--net/wireless/reg.c30
-rw-r--r--net/wireless/scan.c204
-rw-r--r--net/wireless/sme.c39
-rw-r--r--net/wireless/trace.h129
-rw-r--r--net/wireless/util.c88
-rw-r--r--net/wireless/wext-compat.c10
-rw-r--r--net/wireless/wext-core.c2
-rw-r--r--net/x25/af_x25.c2
-rw-r--r--net/x25/x25_dev.c22
-rw-r--r--net/xdp/xsk.c38
-rw-r--r--net/xdp/xsk_diag.c2
-rw-r--r--net/xfrm/xfrm_device.c1
-rw-r--r--net/xfrm/xfrm_input.c17
-rw-r--r--net/xfrm/xfrm_interface_core.c7
-rw-r--r--net/xfrm/xfrm_ipcomp.c3
-rw-r--r--net/xfrm/xfrm_policy.c4
-rw-r--r--net/xfrm/xfrm_state.c148
-rw-r--r--net/xfrm/xfrm_user.c3
394 files changed, 13120 insertions, 7037 deletions
diff --git a/net/6lowpan/ndisc.c b/net/6lowpan/ndisc.c
index c40b98f7743c..868d28583c0a 100644
--- a/net/6lowpan/ndisc.c
+++ b/net/6lowpan/ndisc.c
@@ -20,9 +20,8 @@ static int lowpan_ndisc_parse_802154_options(const struct net_device *dev,
switch (nd_opt->nd_opt_len) {
case NDISC_802154_SHORT_ADDR_LENGTH:
if (ndopts->nd_802154_opt_array[nd_opt->nd_opt_type])
- ND_PRINTK(2, warn,
- "%s: duplicated short addr ND6 option found: type=%d\n",
- __func__, nd_opt->nd_opt_type);
+ net_dbg_ratelimited("%s: duplicated short addr ND6 option found: type=%d\n",
+ __func__, nd_opt->nd_opt_type);
else
ndopts->nd_802154_opt_array[nd_opt->nd_opt_type] = nd_opt;
return 1;
@@ -63,8 +62,7 @@ static void lowpan_ndisc_802154_update(struct neighbour *n, u32 flags,
lladdr_short = __ndisc_opt_addr_data(ndopts->nd_802154_opts_src_lladdr,
IEEE802154_SHORT_ADDR_LEN, 0);
if (!lladdr_short) {
- ND_PRINTK(2, warn,
- "NA: invalid short link-layer address length\n");
+ net_dbg_ratelimited("NA: invalid short link-layer address length\n");
return;
}
}
@@ -75,8 +73,7 @@ static void lowpan_ndisc_802154_update(struct neighbour *n, u32 flags,
lladdr_short = __ndisc_opt_addr_data(ndopts->nd_802154_opts_tgt_lladdr,
IEEE802154_SHORT_ADDR_LEN, 0);
if (!lladdr_short) {
- ND_PRINTK(2, warn,
- "NA: invalid short link-layer address length\n");
+ net_dbg_ratelimited("NA: invalid short link-layer address length\n");
return;
}
}
@@ -209,9 +206,8 @@ static void lowpan_ndisc_prefix_rcv_add_addr(struct net *net,
sllao, tokenized, valid_lft,
prefered_lft);
if (err)
- ND_PRINTK(2, warn,
- "RA: could not add a short address based address for prefix: %pI6c\n",
- &pinfo->prefix);
+ net_dbg_ratelimited("RA: could not add a short address based address for prefix: %pI6c\n",
+ &pinfo->prefix);
}
}
#endif
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 06908e37c3d9..fda3a80e9340 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -357,6 +357,35 @@ static int __vlan_device_event(struct net_device *dev, unsigned long event)
return err;
}
+static void vlan_vid0_add(struct net_device *dev)
+{
+ struct vlan_info *vlan_info;
+ int err;
+
+ if (!(dev->features & NETIF_F_HW_VLAN_CTAG_FILTER))
+ return;
+
+ pr_info("adding VLAN 0 to HW filter on device %s\n", dev->name);
+
+ err = vlan_vid_add(dev, htons(ETH_P_8021Q), 0);
+ if (err)
+ return;
+
+ vlan_info = rtnl_dereference(dev->vlan_info);
+ vlan_info->auto_vid0 = true;
+}
+
+static void vlan_vid0_del(struct net_device *dev)
+{
+ struct vlan_info *vlan_info = rtnl_dereference(dev->vlan_info);
+
+ if (!vlan_info || !vlan_info->auto_vid0)
+ return;
+
+ vlan_info->auto_vid0 = false;
+ vlan_vid_del(dev, htons(ETH_P_8021Q), 0);
+}
+
static int vlan_device_event(struct notifier_block *unused, unsigned long event,
void *ptr)
{
@@ -378,15 +407,10 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
return notifier_from_errno(err);
}
- if ((event == NETDEV_UP) &&
- (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) {
- pr_info("adding VLAN 0 to HW filter on device %s\n",
- dev->name);
- vlan_vid_add(dev, htons(ETH_P_8021Q), 0);
- }
- if (event == NETDEV_DOWN &&
- (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER))
- vlan_vid_del(dev, htons(ETH_P_8021Q), 0);
+ if (event == NETDEV_UP)
+ vlan_vid0_add(dev);
+ else if (event == NETDEV_DOWN)
+ vlan_vid0_del(dev);
vlan_info = rtnl_dereference(dev->vlan_info);
if (!vlan_info)
@@ -446,7 +470,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
list_add(&vlandev->close_list, &close_list);
}
- dev_close_many(&close_list, false);
+ netif_close_many(&close_list, false);
list_for_each_entry_safe(vlandev, tmp, &close_list, close_list) {
vlan_stacked_transfer_operstate(dev, vlandev,
@@ -459,7 +483,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
case NETDEV_UP:
/* Put all VLANs for this dev in the up state too. */
vlan_group_for_each_dev(grp, i, vlandev) {
- flgs = dev_get_flags(vlandev);
+ flgs = netif_get_flags(vlandev);
if (flgs & IFF_UP)
continue;
@@ -741,3 +765,4 @@ module_exit(vlan_cleanup_module);
MODULE_DESCRIPTION("802.1Q/802.1ad VLAN Protocol");
MODULE_LICENSE("GPL");
MODULE_VERSION(DRV_VERSION);
+MODULE_IMPORT_NS("NETDEV_INTERNAL");
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 5eaf38875554..c7ffe591d593 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -33,6 +33,7 @@ struct vlan_info {
struct vlan_group grp;
struct list_head vid_list;
unsigned int nr_vids;
+ bool auto_vid0;
struct rcu_head rcu;
};
diff --git a/net/Kconfig b/net/Kconfig
index ebc80a98fc91..d5865cf19799 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -247,7 +247,7 @@ source "net/ipv4/netfilter/Kconfig"
source "net/ipv6/netfilter/Kconfig"
source "net/bridge/netfilter/Kconfig"
-endif
+endif # if NETFILTER
source "net/sctp/Kconfig"
source "net/rds/Kconfig"
@@ -408,9 +408,9 @@ config NET_DROP_MONITOR
just checking the various proc files and other utilities for
drop statistics, say N here.
-endmenu
+endmenu # Network testing
-endmenu
+endmenu # Networking options
source "net/ax25/Kconfig"
source "net/can/Kconfig"
diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index 9c787e2e4b17..4744e3fd4544 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -35,6 +35,7 @@
#include <linux/seq_file.h>
#include <linux/export.h>
#include <linux/etherdevice.h>
+#include <linux/refcount.h>
int sysctl_aarp_expiry_time = AARP_EXPIRY_TIME;
int sysctl_aarp_tick_time = AARP_TICK_TIME;
@@ -44,6 +45,7 @@ int sysctl_aarp_resolve_time = AARP_RESOLVE_TIME;
/* Lists of aarp entries */
/**
* struct aarp_entry - AARP entry
+ * @refcnt: Reference count
* @last_sent: Last time we xmitted the aarp request
* @packet_queue: Queue of frames wait for resolution
* @status: Used for proxy AARP
@@ -55,6 +57,7 @@ int sysctl_aarp_resolve_time = AARP_RESOLVE_TIME;
* @next: Next entry in chain
*/
struct aarp_entry {
+ refcount_t refcnt;
/* These first two are only used for unresolved entries */
unsigned long last_sent;
struct sk_buff_head packet_queue;
@@ -79,6 +82,17 @@ static DEFINE_RWLOCK(aarp_lock);
/* Used to walk the list and purge/kick entries. */
static struct timer_list aarp_timer;
+static inline void aarp_entry_get(struct aarp_entry *a)
+{
+ refcount_inc(&a->refcnt);
+}
+
+static inline void aarp_entry_put(struct aarp_entry *a)
+{
+ if (refcount_dec_and_test(&a->refcnt))
+ kfree(a);
+}
+
/*
* Delete an aarp queue
*
@@ -87,7 +101,7 @@ static struct timer_list aarp_timer;
static void __aarp_expire(struct aarp_entry *a)
{
skb_queue_purge(&a->packet_queue);
- kfree(a);
+ aarp_entry_put(a);
}
/*
@@ -380,9 +394,11 @@ static void aarp_purge(void)
static struct aarp_entry *aarp_alloc(void)
{
struct aarp_entry *a = kmalloc(sizeof(*a), GFP_ATOMIC);
+ if (!a)
+ return NULL;
- if (a)
- skb_queue_head_init(&a->packet_queue);
+ refcount_set(&a->refcnt, 1);
+ skb_queue_head_init(&a->packet_queue);
return a;
}
@@ -477,6 +493,7 @@ int aarp_proxy_probe_network(struct atalk_iface *atif, struct atalk_addr *sa)
entry->dev = atif->dev;
write_lock_bh(&aarp_lock);
+ aarp_entry_get(entry);
hash = sa->s_node % (AARP_HASH_SIZE - 1);
entry->next = proxies[hash];
@@ -502,6 +519,7 @@ int aarp_proxy_probe_network(struct atalk_iface *atif, struct atalk_addr *sa)
retval = 1;
}
+ aarp_entry_put(entry);
write_unlock_bh(&aarp_lock);
out:
return retval;
diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c
index 9c1241292d1d..01787fb6a7bc 100644
--- a/net/appletalk/atalk_proc.c
+++ b/net/appletalk/atalk_proc.c
@@ -181,7 +181,7 @@ static int atalk_seq_socket_show(struct seq_file *seq, void *v)
sk_wmem_alloc_get(s),
sk_rmem_alloc_get(s),
s->sk_state,
- from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)));
+ from_kuid_munged(seq_user_ns(seq), sk_uid(s)));
out:
return 0;
}
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 73ea7e67f05a..30242fe10341 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -576,6 +576,7 @@ static int atrtr_create(struct rtentry *r, struct net_device *devhint)
/* Fill in the routing entry */
rt->target = ta->sat_addr;
+ dev_put(rt->dev); /* Release old device */
dev_hold(devhint);
rt->dev = devhint;
rt->flags = r->rt_flags;
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 61b5b700817d..f7a5565e794e 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -45,7 +45,8 @@
#include <net/atmclip.h>
static struct net_device *clip_devs;
-static struct atm_vcc *atmarpd;
+static struct atm_vcc __rcu *atmarpd;
+static DEFINE_MUTEX(atmarpd_lock);
static struct timer_list idle_timer;
static const struct neigh_ops clip_neigh_ops;
@@ -53,24 +54,35 @@ static int to_atmarpd(enum atmarp_ctrl_type type, int itf, __be32 ip)
{
struct sock *sk;
struct atmarp_ctrl *ctrl;
+ struct atm_vcc *vcc;
struct sk_buff *skb;
+ int err = 0;
pr_debug("(%d)\n", type);
- if (!atmarpd)
- return -EUNATCH;
+
+ rcu_read_lock();
+ vcc = rcu_dereference(atmarpd);
+ if (!vcc) {
+ err = -EUNATCH;
+ goto unlock;
+ }
skb = alloc_skb(sizeof(struct atmarp_ctrl), GFP_ATOMIC);
- if (!skb)
- return -ENOMEM;
+ if (!skb) {
+ err = -ENOMEM;
+ goto unlock;
+ }
ctrl = skb_put(skb, sizeof(struct atmarp_ctrl));
ctrl->type = type;
ctrl->itf_num = itf;
ctrl->ip = ip;
- atm_force_charge(atmarpd, skb->truesize);
+ atm_force_charge(vcc, skb->truesize);
- sk = sk_atm(atmarpd);
+ sk = sk_atm(vcc);
skb_queue_tail(&sk->sk_receive_queue, skb);
sk->sk_data_ready(sk);
- return 0;
+unlock:
+ rcu_read_unlock();
+ return err;
}
static void link_vcc(struct clip_vcc *clip_vcc, struct atmarp_entry *entry)
@@ -193,12 +205,6 @@ static void clip_push(struct atm_vcc *vcc, struct sk_buff *skb)
pr_debug("\n");
- if (!clip_devs) {
- atm_return(vcc, skb->truesize);
- kfree_skb(skb);
- return;
- }
-
if (!skb) {
pr_debug("removing VCC %p\n", clip_vcc);
if (clip_vcc->entry)
@@ -208,6 +214,11 @@ static void clip_push(struct atm_vcc *vcc, struct sk_buff *skb)
return;
}
atm_return(vcc, skb->truesize);
+ if (!clip_devs) {
+ kfree_skb(skb);
+ return;
+ }
+
skb->dev = clip_vcc->entry ? clip_vcc->entry->neigh->dev : clip_devs;
/* clip_vcc->entry == NULL if we don't have an IP address yet */
if (!skb->dev) {
@@ -418,6 +429,8 @@ static int clip_mkip(struct atm_vcc *vcc, int timeout)
if (!vcc->push)
return -EBADFD;
+ if (vcc->user_back)
+ return -EINVAL;
clip_vcc = kmalloc(sizeof(struct clip_vcc), GFP_KERNEL);
if (!clip_vcc)
return -ENOMEM;
@@ -608,17 +621,27 @@ static void atmarpd_close(struct atm_vcc *vcc)
{
pr_debug("\n");
- rtnl_lock();
- atmarpd = NULL;
+ mutex_lock(&atmarpd_lock);
+ RCU_INIT_POINTER(atmarpd, NULL);
+ mutex_unlock(&atmarpd_lock);
+
+ synchronize_rcu();
skb_queue_purge(&sk_atm(vcc)->sk_receive_queue);
- rtnl_unlock();
pr_debug("(done)\n");
module_put(THIS_MODULE);
}
+static int atmarpd_send(struct atm_vcc *vcc, struct sk_buff *skb)
+{
+ atm_return_tx(vcc, skb);
+ dev_kfree_skb_any(skb);
+ return 0;
+}
+
static const struct atmdev_ops atmarpd_dev_ops = {
- .close = atmarpd_close
+ .close = atmarpd_close,
+ .send = atmarpd_send
};
@@ -632,15 +655,18 @@ static struct atm_dev atmarpd_dev = {
static int atm_init_atmarp(struct atm_vcc *vcc)
{
- rtnl_lock();
+ if (vcc->push == clip_push)
+ return -EINVAL;
+
+ mutex_lock(&atmarpd_lock);
if (atmarpd) {
- rtnl_unlock();
+ mutex_unlock(&atmarpd_lock);
return -EADDRINUSE;
}
mod_timer(&idle_timer, jiffies + CLIP_CHECK_INTERVAL * HZ);
- atmarpd = vcc;
+ rcu_assign_pointer(atmarpd, vcc);
set_bit(ATM_VF_META, &vcc->flags);
set_bit(ATM_VF_READY, &vcc->flags);
/* allow replies and avoid getting closed if signaling dies */
@@ -649,13 +675,14 @@ static int atm_init_atmarp(struct atm_vcc *vcc)
vcc->push = NULL;
vcc->pop = NULL; /* crash */
vcc->push_oam = NULL; /* crash */
- rtnl_unlock();
+ mutex_unlock(&atmarpd_lock);
return 0;
}
static int clip_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
struct atm_vcc *vcc = ATM_SD(sock);
+ struct sock *sk = sock->sk;
int err = 0;
switch (cmd) {
@@ -676,14 +703,18 @@ static int clip_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
err = clip_create(arg);
break;
case ATMARPD_CTRL:
+ lock_sock(sk);
err = atm_init_atmarp(vcc);
if (!err) {
sock->state = SS_CONNECTED;
__module_get(THIS_MODULE);
}
+ release_sock(sk);
break;
case ATMARP_MKIP:
+ lock_sock(sk);
err = clip_mkip(vcc, arg);
+ release_sock(sk);
break;
case ATMARP_SETENTRY:
err = clip_setentry(vcc, (__force __be32)arg);
diff --git a/net/atm/common.c b/net/atm/common.c
index 9b75699992ff..d7f7976ea13a 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -635,6 +635,7 @@ int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size)
skb->dev = NULL; /* for paths shared with net_device interfaces */
if (!copy_from_iter_full(skb_put(skb, size), size, &m->msg_iter)) {
+ atm_return_tx(vcc, skb);
kfree_skb(skb);
error = -EFAULT;
goto out;
diff --git a/net/atm/lec.c b/net/atm/lec.c
index acef984f3367..afb8d3eb2185 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -124,6 +124,7 @@ static unsigned char bus_mac[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
/* Device structures */
static struct net_device *dev_lec[MAX_LEC_ITF];
+static DEFINE_MUTEX(lec_mutex);
#if IS_ENABLED(CONFIG_BRIDGE)
static void lec_handle_bridge(struct sk_buff *skb, struct net_device *dev)
@@ -685,6 +686,7 @@ static int lec_vcc_attach(struct atm_vcc *vcc, void __user *arg)
int bytes_left;
struct atmlec_ioc ioc_data;
+ lockdep_assert_held(&lec_mutex);
/* Lecd must be up in this case */
bytes_left = copy_from_user(&ioc_data, arg, sizeof(struct atmlec_ioc));
if (bytes_left != 0)
@@ -710,6 +712,7 @@ static int lec_vcc_attach(struct atm_vcc *vcc, void __user *arg)
static int lec_mcast_attach(struct atm_vcc *vcc, int arg)
{
+ lockdep_assert_held(&lec_mutex);
if (arg < 0 || arg >= MAX_LEC_ITF)
return -EINVAL;
arg = array_index_nospec(arg, MAX_LEC_ITF);
@@ -725,6 +728,7 @@ static int lecd_attach(struct atm_vcc *vcc, int arg)
int i;
struct lec_priv *priv;
+ lockdep_assert_held(&lec_mutex);
if (arg < 0)
arg = 0;
if (arg >= MAX_LEC_ITF)
@@ -742,6 +746,7 @@ static int lecd_attach(struct atm_vcc *vcc, int arg)
snprintf(dev_lec[i]->name, IFNAMSIZ, "lec%d", i);
if (register_netdev(dev_lec[i])) {
free_netdev(dev_lec[i]);
+ dev_lec[i] = NULL;
return -EINVAL;
}
@@ -904,7 +909,6 @@ static void *lec_itf_walk(struct lec_state *state, loff_t *l)
v = (dev && netdev_priv(dev)) ?
lec_priv_walk(state, l, netdev_priv(dev)) : NULL;
if (!v && dev) {
- dev_put(dev);
/* Partial state reset for the next time we get called */
dev = NULL;
}
@@ -928,6 +932,7 @@ static void *lec_seq_start(struct seq_file *seq, loff_t *pos)
{
struct lec_state *state = seq->private;
+ mutex_lock(&lec_mutex);
state->itf = 0;
state->dev = NULL;
state->locked = NULL;
@@ -945,8 +950,9 @@ static void lec_seq_stop(struct seq_file *seq, void *v)
if (state->dev) {
spin_unlock_irqrestore(&state->locked->lec_arp_lock,
state->flags);
- dev_put(state->dev);
+ state->dev = NULL;
}
+ mutex_unlock(&lec_mutex);
}
static void *lec_seq_next(struct seq_file *seq, void *v, loff_t *pos)
@@ -1003,6 +1009,7 @@ static int lane_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
return -ENOIOCTLCMD;
}
+ mutex_lock(&lec_mutex);
switch (cmd) {
case ATMLEC_CTRL:
err = lecd_attach(vcc, (int)arg);
@@ -1017,6 +1024,7 @@ static int lane_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
break;
}
+ mutex_unlock(&lec_mutex);
return err;
}
diff --git a/net/atm/raw.c b/net/atm/raw.c
index 2b5f78a7ec3e..1e6511ec842c 100644
--- a/net/atm/raw.c
+++ b/net/atm/raw.c
@@ -36,7 +36,7 @@ static void atm_pop_raw(struct atm_vcc *vcc, struct sk_buff *skb)
pr_debug("(%d) %d -= %d\n",
vcc->vci, sk_wmem_alloc_get(sk), ATM_SKB(skb)->acct_truesize);
- WARN_ON(refcount_sub_and_test(ATM_SKB(skb)->acct_truesize, &sk->sk_wmem_alloc));
+ atm_return_tx(vcc, skb);
dev_kfree_skb_any(skb);
sk->sk_write_space(sk);
}
diff --git a/net/atm/resources.c b/net/atm/resources.c
index 995d29e7fb13..b19d851e1f44 100644
--- a/net/atm/resources.c
+++ b/net/atm/resources.c
@@ -146,11 +146,10 @@ void atm_dev_deregister(struct atm_dev *dev)
*/
mutex_lock(&atm_dev_mutex);
list_del(&dev->dev_list);
- mutex_unlock(&atm_dev_mutex);
-
atm_dev_release_vccs(dev);
atm_unregister_sysfs(dev);
atm_proc_dev_deregister(dev);
+ mutex_unlock(&atm_dev_mutex);
atm_dev_put(dev);
}
diff --git a/net/batman-adv/bat_algo.c b/net/batman-adv/bat_algo.c
index c0c982b6f029..49e5861b58ec 100644
--- a/net/batman-adv/bat_algo.c
+++ b/net/batman-adv/bat_algo.c
@@ -14,6 +14,7 @@
#include <linux/skbuff.h>
#include <linux/stddef.h>
#include <linux/string.h>
+#include <linux/types.h>
#include <net/genetlink.h>
#include <net/netlink.h>
#include <uapi/linux/batman_adv.h>
diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h
index 2c486374af58..7ce9abbdb4b4 100644
--- a/net/batman-adv/bat_algo.h
+++ b/net/batman-adv/bat_algo.h
@@ -11,10 +11,8 @@
#include <linux/netlink.h>
#include <linux/skbuff.h>
-#include <linux/types.h>
extern char batadv_routing_algo[];
-extern struct list_head batadv_hardif_list;
void batadv_algo_init(void);
struct batadv_algo_ops *batadv_algo_get(const char *name);
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 458879d21d66..54fe38b3b2fd 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -791,6 +791,7 @@ static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface)
struct batadv_ogm_packet *batadv_ogm_packet;
struct batadv_hard_iface *primary_if, *tmp_hard_iface;
int *ogm_buff_len = &hard_iface->bat_iv.ogm_buff_len;
+ struct list_head *iter;
u32 seqno;
u16 tvlv_len = 0;
unsigned long send_time;
@@ -847,10 +848,7 @@ static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface)
* interfaces.
*/
rcu_read_lock();
- list_for_each_entry_rcu(tmp_hard_iface, &batadv_hardif_list, list) {
- if (tmp_hard_iface->mesh_iface != hard_iface->mesh_iface)
- continue;
-
+ netdev_for_each_lower_private_rcu(hard_iface->mesh_iface, tmp_hard_iface, iter) {
if (!kref_get_unless_zero(&tmp_hard_iface->refcount))
continue;
@@ -1567,6 +1565,7 @@ static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset,
bool is_my_oldorig = false;
bool is_my_addr = false;
bool is_my_orig = false;
+ struct list_head *iter;
ogm_packet = (struct batadv_ogm_packet *)(skb->data + ogm_offset);
ethhdr = eth_hdr(skb);
@@ -1603,11 +1602,9 @@ static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset,
ogm_packet->version, has_directlink_flag);
rcu_read_lock();
- list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
- if (hard_iface->if_status != BATADV_IF_ACTIVE)
- continue;
- if (hard_iface->mesh_iface != if_incoming->mesh_iface)
+ netdev_for_each_lower_private_rcu(if_incoming->mesh_iface, hard_iface, iter) {
+ if (hard_iface->if_status != BATADV_IF_ACTIVE)
continue;
if (batadv_compare_eth(ethhdr->h_source,
@@ -1668,13 +1665,10 @@ static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset,
if_incoming, BATADV_IF_DEFAULT);
rcu_read_lock();
- list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
+ netdev_for_each_lower_private_rcu(bat_priv->mesh_iface, hard_iface, iter) {
if (hard_iface->if_status != BATADV_IF_ACTIVE)
continue;
- if (hard_iface->mesh_iface != bat_priv->mesh_iface)
- continue;
-
if (!kref_get_unless_zero(&hard_iface->refcount))
continue;
@@ -2142,6 +2136,7 @@ batadv_iv_ogm_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb,
struct batadv_hard_iface *single_hardif)
{
struct batadv_hard_iface *hard_iface;
+ struct list_head *iter;
int i_hardif = 0;
int i_hardif_s = cb->args[0];
int idx = cb->args[1];
@@ -2158,11 +2153,7 @@ batadv_iv_ogm_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb,
i_hardif++;
}
} else {
- list_for_each_entry_rcu(hard_iface, &batadv_hardif_list,
- list) {
- if (hard_iface->mesh_iface != bat_priv->mesh_iface)
- continue;
-
+ netdev_for_each_lower_private_rcu(bat_priv->mesh_iface, hard_iface, iter) {
if (i_hardif++ < i_hardif_s)
continue;
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index c16c2e60889d..de9444714264 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -212,6 +212,7 @@ batadv_v_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb,
struct batadv_hard_iface *single_hardif)
{
struct batadv_hard_iface *hard_iface;
+ struct list_head *iter;
int i_hardif = 0;
int i_hardif_s = cb->args[0];
int idx = cb->args[1];
@@ -227,10 +228,7 @@ batadv_v_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb,
i_hardif++;
}
} else {
- list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
- if (hard_iface->mesh_iface != bat_priv->mesh_iface)
- continue;
-
+ netdev_for_each_lower_private_rcu(bat_priv->mesh_iface, hard_iface, iter) {
if (i_hardif++ < i_hardif_s)
continue;
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index 70d6778da0d7..cb16c1ed2a58 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -35,7 +35,6 @@
#include <net/cfg80211.h>
#include <uapi/linux/batadv_packet.h>
-#include "bat_algo.h"
#include "bat_v_ogm.h"
#include "hard-interface.h"
#include "log.h"
@@ -472,15 +471,12 @@ void batadv_v_elp_iface_activate(struct batadv_hard_iface *primary_iface,
void batadv_v_elp_primary_iface_set(struct batadv_hard_iface *primary_iface)
{
struct batadv_hard_iface *hard_iface;
+ struct list_head *iter;
/* update orig field of every elp iface belonging to this mesh */
rcu_read_lock();
- list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
- if (primary_iface->mesh_iface != hard_iface->mesh_iface)
- continue;
-
+ netdev_for_each_lower_private_rcu(primary_iface->mesh_iface, hard_iface, iter)
batadv_v_elp_iface_activate(primary_iface, hard_iface);
- }
rcu_read_unlock();
}
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index b86bb647da5b..e3870492dab7 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -22,7 +22,6 @@
#include <linux/mutex.h>
#include <linux/netdevice.h>
#include <linux/random.h>
-#include <linux/rculist.h>
#include <linux/rcupdate.h>
#include <linux/skbuff.h>
#include <linux/slab.h>
@@ -33,7 +32,6 @@
#include <linux/workqueue.h>
#include <uapi/linux/batadv_packet.h>
-#include "bat_algo.h"
#include "hard-interface.h"
#include "hash.h"
#include "log.h"
@@ -265,6 +263,7 @@ static void batadv_v_ogm_send_meshif(struct batadv_priv *bat_priv)
struct batadv_ogm2_packet *ogm_packet;
struct sk_buff *skb, *skb_tmp;
unsigned char *ogm_buff;
+ struct list_head *iter;
int ogm_buff_len;
u16 tvlv_len = 0;
int ret;
@@ -301,10 +300,7 @@ static void batadv_v_ogm_send_meshif(struct batadv_priv *bat_priv)
/* broadcast on every interface */
rcu_read_lock();
- list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
- if (hard_iface->mesh_iface != bat_priv->mesh_iface)
- continue;
-
+ netdev_for_each_lower_private_rcu(bat_priv->mesh_iface, hard_iface, iter) {
if (!kref_get_unless_zero(&hard_iface->refcount))
continue;
@@ -859,6 +855,7 @@ static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset,
struct batadv_hard_iface *hard_iface;
struct batadv_ogm2_packet *ogm_packet;
u32 ogm_throughput, link_throughput, path_throughput;
+ struct list_head *iter;
int ret;
ethhdr = eth_hdr(skb);
@@ -921,13 +918,10 @@ static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset,
BATADV_IF_DEFAULT);
rcu_read_lock();
- list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
+ netdev_for_each_lower_private_rcu(bat_priv->mesh_iface, hard_iface, iter) {
if (hard_iface->if_status != BATADV_IF_ACTIVE)
continue;
- if (hard_iface->mesh_iface != bat_priv->mesh_iface)
- continue;
-
if (!kref_get_unless_zero(&hard_iface->refcount))
continue;
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 558d39dffc23..bace57e4f9a5 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -438,15 +438,13 @@ out:
}
static struct batadv_hard_iface *
-batadv_hardif_get_active(const struct net_device *mesh_iface)
+batadv_hardif_get_active(struct net_device *mesh_iface)
{
struct batadv_hard_iface *hard_iface;
+ struct list_head *iter;
rcu_read_lock();
- list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
- if (hard_iface->mesh_iface != mesh_iface)
- continue;
-
+ netdev_for_each_lower_private_rcu(mesh_iface, hard_iface, iter) {
if (hard_iface->if_status == BATADV_IF_ACTIVE &&
kref_get_unless_zero(&hard_iface->refcount))
goto out;
@@ -508,19 +506,17 @@ batadv_hardif_is_iface_up(const struct batadv_hard_iface *hard_iface)
static void batadv_check_known_mac_addr(const struct batadv_hard_iface *hard_iface)
{
- const struct net_device *mesh_iface = hard_iface->mesh_iface;
+ struct net_device *mesh_iface = hard_iface->mesh_iface;
const struct batadv_hard_iface *tmp_hard_iface;
+ struct list_head *iter;
if (!mesh_iface)
return;
- list_for_each_entry(tmp_hard_iface, &batadv_hardif_list, list) {
+ netdev_for_each_lower_private(mesh_iface, tmp_hard_iface, iter) {
if (tmp_hard_iface == hard_iface)
continue;
- if (tmp_hard_iface->mesh_iface != mesh_iface)
- continue;
-
if (tmp_hard_iface->if_status == BATADV_IF_NOT_IN_USE)
continue;
@@ -545,15 +541,13 @@ static void batadv_hardif_recalc_extra_skbroom(struct net_device *mesh_iface)
unsigned short lower_headroom = 0;
unsigned short lower_tailroom = 0;
unsigned short needed_headroom;
+ struct list_head *iter;
rcu_read_lock();
- list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
+ netdev_for_each_lower_private_rcu(mesh_iface, hard_iface, iter) {
if (hard_iface->if_status == BATADV_IF_NOT_IN_USE)
continue;
- if (hard_iface->mesh_iface != mesh_iface)
- continue;
-
lower_header_len = max_t(unsigned short, lower_header_len,
hard_iface->net_dev->hard_header_len);
@@ -586,17 +580,15 @@ int batadv_hardif_min_mtu(struct net_device *mesh_iface)
{
struct batadv_priv *bat_priv = netdev_priv(mesh_iface);
const struct batadv_hard_iface *hard_iface;
+ struct list_head *iter;
int min_mtu = INT_MAX;
rcu_read_lock();
- list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
+ netdev_for_each_lower_private_rcu(mesh_iface, hard_iface, iter) {
if (hard_iface->if_status != BATADV_IF_ACTIVE &&
hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED)
continue;
- if (hard_iface->mesh_iface != mesh_iface)
- continue;
-
min_mtu = min_t(int, hard_iface->net_dev->mtu, min_mtu);
}
rcu_read_unlock();
@@ -734,7 +726,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
bat_priv = netdev_priv(hard_iface->mesh_iface);
ret = netdev_master_upper_dev_link(hard_iface->net_dev,
- mesh_iface, NULL, NULL, NULL);
+ mesh_iface, hard_iface, NULL, NULL);
if (ret)
goto err_dev;
@@ -803,18 +795,15 @@ err_dev:
*
* Return: number of connected/enslaved hard interfaces
*/
-static size_t batadv_hardif_cnt(const struct net_device *mesh_iface)
+static size_t batadv_hardif_cnt(struct net_device *mesh_iface)
{
struct batadv_hard_iface *hard_iface;
+ struct list_head *iter;
size_t count = 0;
rcu_read_lock();
- list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
- if (hard_iface->mesh_iface != mesh_iface)
- continue;
-
+ netdev_for_each_lower_private_rcu(mesh_iface, hard_iface, iter)
count++;
- }
rcu_read_unlock();
return count;
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index c0bc75513355..20346d7b6b69 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -27,7 +27,6 @@
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/printk.h>
-#include <linux/rculist.h>
#include <linux/rcupdate.h>
#include <linux/skbuff.h>
#include <linux/slab.h>
@@ -303,16 +302,14 @@ void batadv_mesh_free(struct net_device *mesh_iface)
bool batadv_is_my_mac(struct batadv_priv *bat_priv, const u8 *addr)
{
const struct batadv_hard_iface *hard_iface;
+ struct list_head *iter;
bool is_my_mac = false;
rcu_read_lock();
- list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
+ netdev_for_each_lower_private_rcu(bat_priv->mesh_iface, hard_iface, iter) {
if (hard_iface->if_status != BATADV_IF_ACTIVE)
continue;
- if (hard_iface->mesh_iface != bat_priv->mesh_iface)
- continue;
-
if (batadv_compare_eth(hard_iface->net_dev->dev_addr, addr)) {
is_my_mac = true;
break;
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 692109be2210..1481eb2bacee 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -13,7 +13,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2025.2"
+#define BATADV_SOURCE_VERSION "2025.3"
#endif
/* B.A.T.M.A.N. parameters */
diff --git a/net/batman-adv/mesh-interface.c b/net/batman-adv/mesh-interface.c
index 5bbc366f974d..de2c2d9c6e4d 100644
--- a/net/batman-adv/mesh-interface.c
+++ b/net/batman-adv/mesh-interface.c
@@ -1101,9 +1101,9 @@ static void batadv_meshif_destroy_netlink(struct net_device *mesh_iface,
struct batadv_hard_iface *hard_iface;
struct batadv_meshif_vlan *vlan;
- list_for_each_entry(hard_iface, &batadv_hardif_list, list) {
- if (hard_iface->mesh_iface == mesh_iface)
- batadv_hardif_disable_interface(hard_iface);
+ while (!list_empty(&mesh_iface->adj_list.lower)) {
+ hard_iface = netdev_adjacent_get_private(mesh_iface->adj_list.lower.next);
+ batadv_hardif_disable_interface(hard_iface);
}
/* destroy the "untagged" VLAN */
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index 5786680aff30..e8c6b0bf670f 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -246,15 +246,13 @@ static u8 batadv_mcast_mla_rtr_flags_get(struct batadv_priv *bat_priv,
static u8 batadv_mcast_mla_forw_flags_get(struct batadv_priv *bat_priv)
{
const struct batadv_hard_iface *hard_iface;
+ struct list_head *iter;
rcu_read_lock();
- list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
+ netdev_for_each_lower_private_rcu(bat_priv->mesh_iface, hard_iface, iter) {
if (hard_iface->if_status != BATADV_IF_ACTIVE)
continue;
- if (hard_iface->mesh_iface != bat_priv->mesh_iface)
- continue;
-
if (hard_iface->net_dev->mtu < IPV6_MIN_MTU) {
rcu_read_unlock();
return BATADV_NO_FLAGS;
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index e7c8f9f2bb1f..beb181b3a7d8 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -20,7 +20,6 @@
#include <linux/if_vlan.h>
#include <linux/init.h>
#include <linux/limits.h>
-#include <linux/list.h>
#include <linux/minmax.h>
#include <linux/netdevice.h>
#include <linux/netlink.h>
@@ -968,6 +967,7 @@ batadv_netlink_dump_hardif(struct sk_buff *msg, struct netlink_callback *cb)
struct batadv_priv *bat_priv;
int portid = NETLINK_CB(cb->skb).portid;
int skip = cb->args[0];
+ struct list_head *iter;
int i = 0;
mesh_iface = batadv_netlink_get_meshif(cb);
@@ -979,10 +979,7 @@ batadv_netlink_dump_hardif(struct sk_buff *msg, struct netlink_callback *cb)
rtnl_lock();
cb->seq = batadv_hardif_generation << 1 | 1;
- list_for_each_entry(hard_iface, &batadv_hardif_list, list) {
- if (hard_iface->mesh_iface != mesh_iface)
- continue;
-
+ netdev_for_each_lower_private(mesh_iface, hard_iface, iter) {
if (i++ < skip)
continue;
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index d9cfc5c6b208..a464ff96b929 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -29,7 +29,6 @@
#include <linux/workqueue.h>
#include <uapi/linux/batadv_packet.h>
-#include "bat_algo.h"
#include "distributed-arp-table.h"
#include "fragmentation.h"
#include "gateway_client.h"
@@ -1208,6 +1207,7 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv,
struct batadv_neigh_node *best_neigh_node;
struct batadv_hard_iface *hard_iface;
bool changed_ifinfo, changed_neigh;
+ struct list_head *iter;
if (batadv_has_timed_out(orig_node->last_seen,
2 * BATADV_PURGE_TIMEOUT)) {
@@ -1232,13 +1232,10 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv,
/* ... then for all other interfaces. */
rcu_read_lock();
- list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
+ netdev_for_each_lower_private_rcu(bat_priv->mesh_iface, hard_iface, iter) {
if (hard_iface->if_status != BATADV_IF_ACTIVE)
continue;
- if (hard_iface->mesh_iface != bat_priv->mesh_iface)
- continue;
-
if (!kref_get_unless_zero(&hard_iface->refcount))
continue;
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 9d72f4f15b3d..95849ba004e7 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -21,7 +21,6 @@
#include <linux/list.h>
#include <linux/netdevice.h>
#include <linux/printk.h>
-#include <linux/rculist.h>
#include <linux/rcupdate.h>
#include <linux/skbuff.h>
#include <linux/slab.h>
@@ -924,6 +923,7 @@ static int __batadv_forw_bcast_packet(struct batadv_priv *bat_priv,
{
struct batadv_hard_iface *hard_iface;
struct batadv_hard_iface *primary_if;
+ struct list_head *iter;
int ret = NETDEV_TX_OK;
primary_if = batadv_primary_if_get_selected(bat_priv);
@@ -931,10 +931,7 @@ static int __batadv_forw_bcast_packet(struct batadv_priv *bat_priv,
return NETDEV_TX_BUSY;
rcu_read_lock();
- list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
- if (hard_iface->mesh_iface != bat_priv->mesh_iface)
- continue;
-
+ netdev_for_each_lower_private_rcu(bat_priv->mesh_iface, hard_iface, iter) {
if (!kref_get_unless_zero(&hard_iface->refcount))
continue;
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 6ad2f72f53f4..2b94e2077203 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -364,6 +364,13 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
put_cmsg(msg, SOL_BLUETOOTH, BT_SCM_PKT_STATUS,
sizeof(pkt_status), &pkt_status);
}
+
+ if (test_bit(BT_SK_PKT_SEQNUM, &bt_sk(sk)->flags)) {
+ u16 pkt_seqnum = hci_skb_pkt_seqnum(skb);
+
+ put_cmsg(msg, SOL_BLUETOOTH, BT_SCM_PKT_SEQNUM,
+ sizeof(pkt_seqnum), &pkt_seqnum);
+ }
}
skb_free_datagram(sk, skb);
@@ -815,7 +822,7 @@ static int bt_seq_show(struct seq_file *seq, void *v)
refcount_read(&sk->sk_refcnt),
sk_rmem_alloc_get(sk),
sk_wmem_alloc_get(sk),
- from_kuid(seq_user_ns(seq), sock_i_uid(sk)),
+ from_kuid(seq_user_ns(seq), sk_uid(sk)),
sock_i_ino(sk),
bt->parent ? sock_i_ino(bt->parent) : 0LU);
diff --git a/net/bluetooth/aosp.c b/net/bluetooth/aosp.c
index 1d67836e95e1..59025771af53 100644
--- a/net/bluetooth/aosp.c
+++ b/net/bluetooth/aosp.c
@@ -70,7 +70,7 @@ void aosp_do_open(struct hci_dev *hdev)
rp = (struct aosp_rp_le_get_vendor_capa *)skb->data;
version_supported = le16_to_cpu(rp->version_supported);
- /* AOSP displays the verion number like v0.98, v1.00, etc. */
+ /* AOSP displays the version number like v0.98, v1.00, etc. */
bt_dev_info(hdev, "AOSP extensions version v%u.%02u",
version_supported >> 8, version_supported & 0xff);
diff --git a/net/bluetooth/coredump.c b/net/bluetooth/coredump.c
index 819eacb38762..720cb79adf96 100644
--- a/net/bluetooth/coredump.c
+++ b/net/bluetooth/coredump.c
@@ -249,15 +249,15 @@ static void hci_devcd_dump(struct hci_dev *hdev)
size = hdev->dump.tail - hdev->dump.head;
- /* Emit a devcoredump with the available data */
- dev_coredumpv(&hdev->dev, hdev->dump.head, size, GFP_KERNEL);
-
/* Send a copy to monitor as a diagnostic packet */
skb = bt_skb_alloc(size, GFP_ATOMIC);
if (skb) {
skb_put_data(skb, hdev->dump.head, size);
hci_recv_diag(hdev, skb);
}
+
+ /* Emit a devcoredump with the available data */
+ dev_coredumpv(&hdev->dev, hdev->dump.head, size, GFP_KERNEL);
}
static void hci_devcd_handle_pkt_complete(struct hci_dev *hdev,
diff --git a/net/bluetooth/eir.c b/net/bluetooth/eir.c
index 1bc51e2b05a3..3f72111ba651 100644
--- a/net/bluetooth/eir.c
+++ b/net/bluetooth/eir.c
@@ -242,7 +242,7 @@ u8 eir_create_per_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr)
return ad_len;
}
-u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr)
+u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr, u8 size)
{
struct adv_info *adv = NULL;
u8 ad_len = 0, flags = 0;
@@ -286,7 +286,7 @@ u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr)
/* If flags would still be empty, then there is no need to
* include the "Flags" AD field".
*/
- if (flags) {
+ if (flags && (ad_len + eir_precalc_len(1) <= size)) {
ptr[0] = 0x02;
ptr[1] = EIR_FLAGS;
ptr[2] = flags;
@@ -316,7 +316,8 @@ skip_flags:
}
/* Provide Tx Power only if we can provide a valid value for it */
- if (adv_tx_power != HCI_TX_POWER_INVALID) {
+ if (adv_tx_power != HCI_TX_POWER_INVALID &&
+ (ad_len + eir_precalc_len(1) <= size)) {
ptr[0] = 0x02;
ptr[1] = EIR_TX_POWER;
ptr[2] = (u8)adv_tx_power;
@@ -366,17 +367,19 @@ u8 eir_create_scan_rsp(struct hci_dev *hdev, u8 instance, u8 *ptr)
void *eir_get_service_data(u8 *eir, size_t eir_len, u16 uuid, size_t *len)
{
- while ((eir = eir_get_data(eir, eir_len, EIR_SERVICE_DATA, len))) {
+ size_t dlen;
+
+ while ((eir = eir_get_data(eir, eir_len, EIR_SERVICE_DATA, &dlen))) {
u16 value = get_unaligned_le16(eir);
if (uuid == value) {
if (len)
- *len -= 2;
+ *len = dlen - 2;
return &eir[2];
}
- eir += *len;
- eir_len -= *len;
+ eir += dlen;
+ eir_len -= dlen;
}
return NULL;
diff --git a/net/bluetooth/eir.h b/net/bluetooth/eir.h
index 5c89a05e8b29..9372db83f912 100644
--- a/net/bluetooth/eir.h
+++ b/net/bluetooth/eir.h
@@ -9,7 +9,7 @@
void eir_create(struct hci_dev *hdev, u8 *data);
-u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr);
+u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr, u8 size);
u8 eir_create_scan_rsp(struct hci_dev *hdev, u8 instance, u8 *ptr);
u8 eir_create_per_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr);
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 99efeed6a766..7d1e79f69cd1 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -785,7 +785,7 @@ static int hci_le_big_terminate(struct hci_dev *hdev, u8 big, struct hci_conn *c
d->sync_handle = conn->sync_handle;
if (test_and_clear_bit(HCI_CONN_PA_SYNC, &conn->flags)) {
- hci_conn_hash_list_flag(hdev, find_bis, BIS_LINK,
+ hci_conn_hash_list_flag(hdev, find_bis, PA_LINK,
HCI_CONN_PA_SYNC, d);
if (!d->count)
@@ -814,7 +814,7 @@ static int hci_le_big_terminate(struct hci_dev *hdev, u8 big, struct hci_conn *c
*
* Detects if there any BIS left connected in a BIG
* broadcaster: Remove advertising instance and terminate BIG.
- * broadcaster receiver: Teminate BIG sync and terminate PA sync.
+ * broadcaster receiver: Terminate BIG sync and terminate PA sync.
*/
static void bis_cleanup(struct hci_conn *conn)
{
@@ -914,6 +914,7 @@ static struct hci_conn *__hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t
break;
case CIS_LINK:
case BIS_LINK:
+ case PA_LINK:
if (hdev->iso_mtu)
/* Dedicated ISO Buffer exists */
break;
@@ -979,6 +980,7 @@ static struct hci_conn *__hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t
break;
case CIS_LINK:
case BIS_LINK:
+ case PA_LINK:
/* conn->src should reflect the local identity address */
hci_copy_identity_address(hdev, &conn->src, &conn->src_type);
@@ -1033,7 +1035,6 @@ static struct hci_conn *__hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t
}
hci_conn_init_sysfs(conn);
-
return conn;
}
@@ -1077,6 +1078,7 @@ static void hci_conn_cleanup_child(struct hci_conn *conn, u8 reason)
break;
case CIS_LINK:
case BIS_LINK:
+ case PA_LINK:
if ((conn->state != BT_CONNECTED &&
!test_bit(HCI_CONN_CREATE_CIS, &conn->flags)) ||
test_bit(HCI_CONN_BIG_CREATED, &conn->flags))
@@ -1152,7 +1154,8 @@ void hci_conn_del(struct hci_conn *conn)
} else {
/* Unacked ISO frames */
if (conn->type == CIS_LINK ||
- conn->type == BIS_LINK) {
+ conn->type == BIS_LINK ||
+ conn->type == PA_LINK) {
if (hdev->iso_pkts)
hdev->iso_cnt += conn->sent;
else if (hdev->le_pkts)
@@ -1501,8 +1504,8 @@ static int qos_set_bis(struct hci_dev *hdev, struct bt_iso_qos *qos)
/* This function requires the caller holds hdev->lock */
static struct hci_conn *hci_add_bis(struct hci_dev *hdev, bdaddr_t *dst,
- struct bt_iso_qos *qos, __u8 base_len,
- __u8 *base)
+ __u8 sid, struct bt_iso_qos *qos,
+ __u8 base_len, __u8 *base)
{
struct hci_conn *conn;
int err;
@@ -1543,6 +1546,7 @@ static struct hci_conn *hci_add_bis(struct hci_dev *hdev, bdaddr_t *dst,
return conn;
conn->state = BT_CONNECT;
+ conn->sid = sid;
hci_conn_hold(conn);
return conn;
@@ -2062,7 +2066,8 @@ static int create_big_sync(struct hci_dev *hdev, void *data)
if (qos->bcast.bis)
sync_interval = interval * 4;
- err = hci_start_per_adv_sync(hdev, qos->bcast.bis, conn->le_per_adv_data_len,
+ err = hci_start_per_adv_sync(hdev, qos->bcast.bis, conn->sid,
+ conn->le_per_adv_data_len,
conn->le_per_adv_data, flags, interval,
interval, sync_interval);
if (err)
@@ -2079,7 +2084,7 @@ struct hci_conn *hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst,
bt_dev_dbg(hdev, "dst %pMR type %d sid %d", dst, dst_type, sid);
- conn = hci_conn_add_unset(hdev, BIS_LINK, dst, HCI_ROLE_SLAVE);
+ conn = hci_conn_add_unset(hdev, PA_LINK, dst, HCI_ROLE_SLAVE);
if (IS_ERR(conn))
return conn;
@@ -2134,7 +2139,7 @@ static void create_big_complete(struct hci_dev *hdev, void *data, int err)
}
}
-struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst,
+struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst, __u8 sid,
struct bt_iso_qos *qos,
__u8 base_len, __u8 *base)
{
@@ -2144,7 +2149,8 @@ struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst,
struct hci_link *link;
/* Look for any BIS that is open for rebinding */
- conn = hci_conn_hash_lookup_big_state(hdev, qos->bcast.big, BT_OPEN);
+ conn = hci_conn_hash_lookup_big_state(hdev, qos->bcast.big, BT_OPEN,
+ HCI_ROLE_MASTER);
if (conn) {
memcpy(qos, &conn->iso_qos, sizeof(*qos));
conn->state = BT_CONNECTED;
@@ -2156,7 +2162,7 @@ struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst,
base, base_len);
/* We need hci_conn object using the BDADDR_ANY as dst */
- conn = hci_add_bis(hdev, dst, qos, base_len, eir);
+ conn = hci_add_bis(hdev, dst, sid, qos, base_len, eir);
if (IS_ERR(conn))
return conn;
@@ -2207,20 +2213,35 @@ static void bis_mark_per_adv(struct hci_conn *conn, void *data)
}
struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst,
- __u8 dst_type, struct bt_iso_qos *qos,
+ __u8 dst_type, __u8 sid,
+ struct bt_iso_qos *qos,
__u8 base_len, __u8 *base)
{
struct hci_conn *conn;
int err;
struct iso_list_data data;
- conn = hci_bind_bis(hdev, dst, qos, base_len, base);
+ conn = hci_bind_bis(hdev, dst, sid, qos, base_len, base);
if (IS_ERR(conn))
return conn;
if (conn->state == BT_CONNECTED)
return conn;
+ /* Check if SID needs to be allocated then search for the first
+ * available.
+ */
+ if (conn->sid == HCI_SID_INVALID) {
+ u8 sid;
+
+ for (sid = 0; sid <= 0x0f; sid++) {
+ if (!hci_find_adv_sid(hdev, sid)) {
+ conn->sid = sid;
+ break;
+ }
+ }
+ }
+
data.big = qos->bcast.big;
data.bis = qos->bcast.bis;
@@ -2228,7 +2249,7 @@ struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst,
* the start periodic advertising and create BIG commands have
* been queued
*/
- hci_conn_hash_list_state(hdev, bis_mark_per_adv, BIS_LINK,
+ hci_conn_hash_list_state(hdev, bis_mark_per_adv, PA_LINK,
BT_BOUND, &data);
/* Queue start periodic advertising and create BIG */
@@ -2962,6 +2983,7 @@ void hci_conn_tx_queue(struct hci_conn *conn, struct sk_buff *skb)
switch (conn->type) {
case CIS_LINK:
case BIS_LINK:
+ case PA_LINK:
case ACL_LINK:
case LE_LINK:
break;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 3b49828160b7..55e0722fd066 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -64,7 +64,7 @@ static DEFINE_IDA(hci_index_ida);
/* Get HCI device by index.
* Device is held on return. */
-struct hci_dev *hci_dev_get(int index)
+static struct hci_dev *__hci_dev_get(int index, int *srcu_index)
{
struct hci_dev *hdev = NULL, *d;
@@ -77,6 +77,8 @@ struct hci_dev *hci_dev_get(int index)
list_for_each_entry(d, &hci_dev_list, list) {
if (d->id == index) {
hdev = hci_dev_hold(d);
+ if (srcu_index)
+ *srcu_index = srcu_read_lock(&d->srcu);
break;
}
}
@@ -84,6 +86,22 @@ struct hci_dev *hci_dev_get(int index)
return hdev;
}
+struct hci_dev *hci_dev_get(int index)
+{
+ return __hci_dev_get(index, NULL);
+}
+
+static struct hci_dev *hci_dev_get_srcu(int index, int *srcu_index)
+{
+ return __hci_dev_get(index, srcu_index);
+}
+
+static void hci_dev_put_srcu(struct hci_dev *hdev, int srcu_index)
+{
+ srcu_read_unlock(&hdev->srcu, srcu_index);
+ hci_dev_put(hdev);
+}
+
/* ---- Inquiry support ---- */
bool hci_discovery_active(struct hci_dev *hdev)
@@ -568,9 +586,9 @@ static int hci_dev_do_reset(struct hci_dev *hdev)
int hci_dev_reset(__u16 dev)
{
struct hci_dev *hdev;
- int err;
+ int err, srcu_index;
- hdev = hci_dev_get(dev);
+ hdev = hci_dev_get_srcu(dev, &srcu_index);
if (!hdev)
return -ENODEV;
@@ -592,7 +610,7 @@ int hci_dev_reset(__u16 dev)
err = hci_dev_do_reset(hdev);
done:
- hci_dev_put(hdev);
+ hci_dev_put_srcu(hdev, srcu_index);
return err;
}
@@ -1238,12 +1256,10 @@ struct smp_irk *hci_find_irk_by_addr(struct hci_dev *hdev, bdaddr_t *bdaddr,
if (addr_type == irk->addr_type &&
bacmp(bdaddr, &irk->bdaddr) == 0) {
irk_to_return = irk;
- goto done;
+ break;
}
}
-done:
-
if (irk_to_return && hci_is_blocked_key(hdev, HCI_BLOCKED_KEY_TYPE_IRK,
irk_to_return->val)) {
bt_dev_warn_ratelimited(hdev, "Identity key blocked for %pMR",
@@ -1585,6 +1601,19 @@ struct adv_info *hci_find_adv_instance(struct hci_dev *hdev, u8 instance)
}
/* This function requires the caller holds hdev->lock */
+struct adv_info *hci_find_adv_sid(struct hci_dev *hdev, u8 sid)
+{
+ struct adv_info *adv;
+
+ list_for_each_entry(adv, &hdev->adv_instances, list) {
+ if (adv->sid == sid)
+ return adv;
+ }
+
+ return NULL;
+}
+
+/* This function requires the caller holds hdev->lock */
struct adv_info *hci_get_next_instance(struct hci_dev *hdev, u8 instance)
{
struct adv_info *cur_instance;
@@ -1736,7 +1765,7 @@ struct adv_info *hci_add_adv_instance(struct hci_dev *hdev, u8 instance,
}
/* This function requires the caller holds hdev->lock */
-struct adv_info *hci_add_per_instance(struct hci_dev *hdev, u8 instance,
+struct adv_info *hci_add_per_instance(struct hci_dev *hdev, u8 instance, u8 sid,
u32 flags, u8 data_len, u8 *data,
u32 min_interval, u32 max_interval)
{
@@ -1748,6 +1777,7 @@ struct adv_info *hci_add_per_instance(struct hci_dev *hdev, u8 instance,
if (IS_ERR(adv))
return adv;
+ adv->sid = sid;
adv->periodic = true;
adv->per_adv_data_len = data_len;
@@ -1877,10 +1907,8 @@ void hci_free_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor)
if (monitor->handle)
idr_remove(&hdev->adv_monitors_idr, monitor->handle);
- if (monitor->state != ADV_MONITOR_STATE_NOT_REGISTERED) {
+ if (monitor->state != ADV_MONITOR_STATE_NOT_REGISTERED)
hdev->adv_monitors_cnt--;
- mgmt_adv_monitor_removed(hdev, monitor->handle);
- }
kfree(monitor);
}
@@ -2421,6 +2449,11 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv)
if (!hdev)
return NULL;
+ if (init_srcu_struct(&hdev->srcu)) {
+ kfree(hdev);
+ return NULL;
+ }
+
hdev->pkt_type = (HCI_DM1 | HCI_DH1 | HCI_HV1);
hdev->esco_type = (ESCO_HV1);
hdev->link_mode = (HCI_LM_ACCEPT);
@@ -2487,6 +2520,7 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv)
mutex_init(&hdev->lock);
mutex_init(&hdev->req_lock);
+ mutex_init(&hdev->mgmt_pending_lock);
ida_init(&hdev->unset_handle_ida);
@@ -2618,7 +2652,7 @@ int hci_register_dev(struct hci_dev *hdev)
/* Devices that are marked for raw-only usage are unconfigured
* and should not be included in normal operation.
*/
- if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks))
+ if (hci_test_quirk(hdev, HCI_QUIRK_RAW_DEVICE))
hci_dev_set_flag(hdev, HCI_UNCONFIGURED);
/* Mark Remote Wakeup connection flag as supported if driver has wakeup
@@ -2665,6 +2699,9 @@ void hci_unregister_dev(struct hci_dev *hdev)
list_del(&hdev->list);
write_unlock(&hci_dev_list_lock);
+ synchronize_srcu(&hdev->srcu);
+ cleanup_srcu_struct(&hdev->srcu);
+
disable_work_sync(&hdev->rx_work);
disable_work_sync(&hdev->cmd_work);
disable_work_sync(&hdev->tx_work);
@@ -2745,7 +2782,7 @@ int hci_register_suspend_notifier(struct hci_dev *hdev)
int ret = 0;
if (!hdev->suspend_notifier.notifier_call &&
- !test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks)) {
+ !hci_test_quirk(hdev, HCI_QUIRK_NO_SUSPEND_NOTIFIER)) {
hdev->suspend_notifier.notifier_call = hci_suspend_notifier;
ret = register_pm_notifier(&hdev->suspend_notifier);
}
@@ -2899,12 +2936,14 @@ int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb)
case HCI_ACLDATA_PKT:
/* Detect if ISO packet has been sent as ACL */
if (hci_conn_num(hdev, CIS_LINK) ||
- hci_conn_num(hdev, BIS_LINK)) {
+ hci_conn_num(hdev, BIS_LINK) ||
+ hci_conn_num(hdev, PA_LINK)) {
__u16 handle = __le16_to_cpu(hci_acl_hdr(skb)->handle);
__u8 type;
type = hci_conn_lookup_type(hdev, hci_handle(handle));
- if (type == CIS_LINK || type == BIS_LINK)
+ if (type == CIS_LINK || type == BIS_LINK ||
+ type == PA_LINK)
hci_skb_pkt_type(skb) = HCI_ISODATA_PKT;
}
break;
@@ -3359,6 +3398,7 @@ static inline void hci_quote_sent(struct hci_conn *conn, int num, int *quote)
break;
case CIS_LINK:
case BIS_LINK:
+ case PA_LINK:
cnt = hdev->iso_mtu ? hdev->iso_cnt :
hdev->le_mtu ? hdev->le_cnt : hdev->acl_cnt;
break;
@@ -3372,7 +3412,7 @@ static inline void hci_quote_sent(struct hci_conn *conn, int num, int *quote)
}
static struct hci_conn *hci_low_sent(struct hci_dev *hdev, __u8 type,
- __u8 type2, int *quote)
+ int *quote)
{
struct hci_conn_hash *h = &hdev->conn_hash;
struct hci_conn *conn = NULL, *c;
@@ -3384,7 +3424,7 @@ static struct hci_conn *hci_low_sent(struct hci_dev *hdev, __u8 type,
rcu_read_lock();
list_for_each_entry_rcu(c, &h->list, list) {
- if ((c->type != type && c->type != type2) ||
+ if (c->type != type ||
skb_queue_empty(&c->data_q))
continue;
@@ -3417,23 +3457,18 @@ static void hci_link_tx_to(struct hci_dev *hdev, __u8 type)
bt_dev_err(hdev, "link tx timeout");
- rcu_read_lock();
+ hci_dev_lock(hdev);
/* Kill stalled connections */
- list_for_each_entry_rcu(c, &h->list, list) {
+ list_for_each_entry(c, &h->list, list) {
if (c->type == type && c->sent) {
bt_dev_err(hdev, "killing stalled connection %pMR",
&c->dst);
- /* hci_disconnect might sleep, so, we have to release
- * the RCU read lock before calling it.
- */
- rcu_read_unlock();
hci_disconnect(c, HCI_ERROR_REMOTE_USER_TERM);
- rcu_read_lock();
}
}
- rcu_read_unlock();
+ hci_dev_unlock(hdev);
}
static struct hci_chan *hci_chan_sent(struct hci_dev *hdev, __u8 type,
@@ -3593,7 +3628,7 @@ static void hci_sched_sco(struct hci_dev *hdev, __u8 type)
else
cnt = &hdev->sco_cnt;
- while (*cnt && (conn = hci_low_sent(hdev, type, type, &quote))) {
+ while (*cnt && (conn = hci_low_sent(hdev, type, &quote))) {
while (quote-- && (skb = skb_dequeue(&conn->data_q))) {
BT_DBG("skb %p len %d", skb, skb->len);
hci_send_conn_frame(hdev, conn, skb);
@@ -3712,8 +3747,8 @@ static void hci_sched_le(struct hci_dev *hdev)
hci_prio_recalculate(hdev, LE_LINK);
}
-/* Schedule CIS */
-static void hci_sched_iso(struct hci_dev *hdev)
+/* Schedule iso */
+static void hci_sched_iso(struct hci_dev *hdev, __u8 type)
{
struct hci_conn *conn;
struct sk_buff *skb;
@@ -3721,14 +3756,12 @@ static void hci_sched_iso(struct hci_dev *hdev)
BT_DBG("%s", hdev->name);
- if (!hci_conn_num(hdev, CIS_LINK) &&
- !hci_conn_num(hdev, BIS_LINK))
+ if (!hci_conn_num(hdev, type))
return;
cnt = hdev->iso_pkts ? &hdev->iso_cnt :
hdev->le_pkts ? &hdev->le_cnt : &hdev->acl_cnt;
- while (*cnt && (conn = hci_low_sent(hdev, CIS_LINK, BIS_LINK,
- &quote))) {
+ while (*cnt && (conn = hci_low_sent(hdev, type, &quote))) {
while (quote-- && (skb = skb_dequeue(&conn->data_q))) {
BT_DBG("skb %p len %d", skb, skb->len);
hci_send_conn_frame(hdev, conn, skb);
@@ -3753,7 +3786,9 @@ static void hci_tx_work(struct work_struct *work)
/* Schedule queues and send stuff to HCI driver */
hci_sched_sco(hdev, SCO_LINK);
hci_sched_sco(hdev, ESCO_LINK);
- hci_sched_iso(hdev);
+ hci_sched_iso(hdev, CIS_LINK);
+ hci_sched_iso(hdev, BIS_LINK);
+ hci_sched_iso(hdev, PA_LINK);
hci_sched_acl(hdev);
hci_sched_le(hdev);
}
diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
index f625074d1f00..99e2e9fc70e8 100644
--- a/net/bluetooth/hci_debugfs.c
+++ b/net/bluetooth/hci_debugfs.c
@@ -38,7 +38,7 @@ static ssize_t __name ## _read(struct file *file, \
struct hci_dev *hdev = file->private_data; \
char buf[3]; \
\
- buf[0] = test_bit(__quirk, &hdev->quirks) ? 'Y' : 'N'; \
+ buf[0] = test_bit(__quirk, hdev->quirk_flags) ? 'Y' : 'N'; \
buf[1] = '\n'; \
buf[2] = '\0'; \
return simple_read_from_buffer(user_buf, count, ppos, buf, 2); \
@@ -59,10 +59,10 @@ static ssize_t __name ## _write(struct file *file, \
if (err) \
return err; \
\
- if (enable == test_bit(__quirk, &hdev->quirks)) \
+ if (enable == test_bit(__quirk, hdev->quirk_flags)) \
return -EALREADY; \
\
- change_bit(__quirk, &hdev->quirks); \
+ change_bit(__quirk, hdev->quirk_flags); \
\
return count; \
} \
@@ -1356,7 +1356,7 @@ static ssize_t vendor_diag_write(struct file *file, const char __user *user_buf,
* for the vendor callback. Instead just store the desired value and
* the setting will be programmed when the controller gets powered on.
*/
- if (test_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks) &&
+ if (hci_test_quirk(hdev, HCI_QUIRK_NON_PERSISTENT_DIAG) &&
(!test_bit(HCI_RUNNING, &hdev->flags) ||
hci_dev_test_flag(hdev, HCI_USER_CHANNEL)))
goto done;
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 66052d6aaa1d..8aa5039b975a 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -908,8 +908,8 @@ static u8 hci_cc_read_local_ext_features(struct hci_dev *hdev, void *data,
return rp->status;
if (hdev->max_page < rp->max_page) {
- if (test_bit(HCI_QUIRK_BROKEN_LOCAL_EXT_FEATURES_PAGE_2,
- &hdev->quirks))
+ if (hci_test_quirk(hdev,
+ HCI_QUIRK_BROKEN_LOCAL_EXT_FEATURES_PAGE_2))
bt_dev_warn(hdev, "broken local ext features page 2");
else
hdev->max_page = rp->max_page;
@@ -936,7 +936,7 @@ static u8 hci_cc_read_buffer_size(struct hci_dev *hdev, void *data,
hdev->acl_pkts = __le16_to_cpu(rp->acl_max_pkt);
hdev->sco_pkts = __le16_to_cpu(rp->sco_max_pkt);
- if (test_bit(HCI_QUIRK_FIXUP_BUFFER_SIZE, &hdev->quirks)) {
+ if (hci_test_quirk(hdev, HCI_QUIRK_FIXUP_BUFFER_SIZE)) {
hdev->sco_mtu = 64;
hdev->sco_pkts = 8;
}
@@ -2150,40 +2150,6 @@ static u8 hci_cc_set_adv_param(struct hci_dev *hdev, void *data,
return rp->status;
}
-static u8 hci_cc_set_ext_adv_param(struct hci_dev *hdev, void *data,
- struct sk_buff *skb)
-{
- struct hci_rp_le_set_ext_adv_params *rp = data;
- struct hci_cp_le_set_ext_adv_params *cp;
- struct adv_info *adv_instance;
-
- bt_dev_dbg(hdev, "status 0x%2.2x", rp->status);
-
- if (rp->status)
- return rp->status;
-
- cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_EXT_ADV_PARAMS);
- if (!cp)
- return rp->status;
-
- hci_dev_lock(hdev);
- hdev->adv_addr_type = cp->own_addr_type;
- if (!cp->handle) {
- /* Store in hdev for instance 0 */
- hdev->adv_tx_power = rp->tx_power;
- } else {
- adv_instance = hci_find_adv_instance(hdev, cp->handle);
- if (adv_instance)
- adv_instance->tx_power = rp->tx_power;
- }
- /* Update adv data as tx power is known now */
- hci_update_adv_data(hdev, cp->handle);
-
- hci_dev_unlock(hdev);
-
- return rp->status;
-}
-
static u8 hci_cc_read_rssi(struct hci_dev *hdev, void *data,
struct sk_buff *skb)
{
@@ -3005,7 +2971,7 @@ static void hci_inquiry_complete_evt(struct hci_dev *hdev, void *data,
* state to indicate completion.
*/
if (!hci_dev_test_flag(hdev, HCI_LE_SCAN) ||
- !test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks))
+ !hci_test_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY))
hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
goto unlock;
}
@@ -3024,7 +2990,7 @@ static void hci_inquiry_complete_evt(struct hci_dev *hdev, void *data,
* state to indicate completion.
*/
if (!hci_dev_test_flag(hdev, HCI_LE_SCAN) ||
- !test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks))
+ !hci_test_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY))
hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
}
@@ -3648,8 +3614,7 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, void *data,
/* We skip the WRITE_AUTH_PAYLOAD_TIMEOUT for ATS2851 based controllers
* to avoid unexpected SMP command errors when pairing.
*/
- if (test_bit(HCI_QUIRK_BROKEN_WRITE_AUTH_PAYLOAD_TIMEOUT,
- &hdev->quirks))
+ if (hci_test_quirk(hdev, HCI_QUIRK_BROKEN_WRITE_AUTH_PAYLOAD_TIMEOUT))
goto notify;
/* Set the default Authenticated Payload Timeout after
@@ -4164,8 +4129,6 @@ static const struct hci_cc {
HCI_CC(HCI_OP_LE_READ_NUM_SUPPORTED_ADV_SETS,
hci_cc_le_read_num_adv_sets,
sizeof(struct hci_rp_le_read_num_supported_adv_sets)),
- HCI_CC(HCI_OP_LE_SET_EXT_ADV_PARAMS, hci_cc_set_ext_adv_param,
- sizeof(struct hci_rp_le_set_ext_adv_params)),
HCI_CC_STATUS(HCI_OP_LE_SET_EXT_ADV_ENABLE,
hci_cc_le_set_ext_adv_enable),
HCI_CC_STATUS(HCI_OP_LE_SET_ADV_SET_RAND_ADDR,
@@ -4469,6 +4432,7 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, void *data,
case CIS_LINK:
case BIS_LINK:
+ case PA_LINK:
if (hdev->iso_pkts) {
hdev->iso_cnt += count;
if (hdev->iso_cnt > hdev->iso_pkts)
@@ -5754,7 +5718,7 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
conn->state = BT_CONFIG;
/* Store current advertising instance as connection advertising instance
- * when sotfware rotation is in use so it can be re-enabled when
+ * when software rotation is in use so it can be re-enabled when
* disconnected.
*/
if (!ext_adv_capable(hdev))
@@ -5950,7 +5914,7 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev,
* while we have an existing one in peripheral role.
*/
if (hdev->conn_hash.le_num_peripheral > 0 &&
- (test_bit(HCI_QUIRK_BROKEN_LE_STATES, &hdev->quirks) ||
+ (hci_test_quirk(hdev, HCI_QUIRK_BROKEN_LE_STATES) ||
!(hdev->le_states[3] & 0x10)))
return NULL;
@@ -6276,6 +6240,11 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, void *data,
static u8 ext_evt_type_to_legacy(struct hci_dev *hdev, u16 evt_type)
{
+ u16 pdu_type = evt_type & ~LE_EXT_ADV_DATA_STATUS_MASK;
+
+ if (!pdu_type)
+ return LE_ADV_NONCONN_IND;
+
if (evt_type & LE_EXT_ADV_LEGACY_PDU) {
switch (evt_type) {
case LE_LEGACY_ADV_IND:
@@ -6307,8 +6276,7 @@ static u8 ext_evt_type_to_legacy(struct hci_dev *hdev, u16 evt_type)
if (evt_type & LE_EXT_ADV_SCAN_IND)
return LE_ADV_SCAN_IND;
- if (evt_type == LE_EXT_ADV_NON_CONN_IND ||
- evt_type & LE_EXT_ADV_DIRECT_IND)
+ if (evt_type & LE_EXT_ADV_DIRECT_IND)
return LE_ADV_NONCONN_IND;
invalid:
@@ -6346,8 +6314,8 @@ static void hci_le_ext_adv_report_evt(struct hci_dev *hdev, void *data,
evt_type = __le16_to_cpu(info->type) & LE_EXT_ADV_EVT_TYPE_MASK;
legacy_evt_type = ext_evt_type_to_legacy(hdev, evt_type);
- if (test_bit(HCI_QUIRK_FIXUP_LE_EXT_ADV_REPORT_PHY,
- &hdev->quirks)) {
+ if (hci_test_quirk(hdev,
+ HCI_QUIRK_FIXUP_LE_EXT_ADV_REPORT_PHY)) {
info->primary_phy &= 0x1f;
info->secondary_phy &= 0x1f;
}
@@ -6387,8 +6355,8 @@ static int hci_le_pa_term_sync(struct hci_dev *hdev, __le16 handle)
return hci_send_cmd(hdev, HCI_OP_LE_PA_TERM_SYNC, sizeof(cp), &cp);
}
-static void hci_le_pa_sync_estabilished_evt(struct hci_dev *hdev, void *data,
- struct sk_buff *skb)
+static void hci_le_pa_sync_established_evt(struct hci_dev *hdev, void *data,
+ struct sk_buff *skb)
{
struct hci_ev_le_pa_sync_established *ev = data;
int mask = hdev->link_mode;
@@ -6414,7 +6382,7 @@ static void hci_le_pa_sync_estabilished_evt(struct hci_dev *hdev, void *data,
conn->sync_handle = le16_to_cpu(ev->handle);
conn->sid = HCI_SID_INVALID;
- mask |= hci_proto_connect_ind(hdev, &ev->bdaddr, BIS_LINK,
+ mask |= hci_proto_connect_ind(hdev, &ev->bdaddr, PA_LINK,
&flags);
if (!(mask & HCI_LM_ACCEPT)) {
hci_le_pa_term_sync(hdev, ev->handle);
@@ -6425,7 +6393,7 @@ static void hci_le_pa_sync_estabilished_evt(struct hci_dev *hdev, void *data,
goto unlock;
/* Add connection to indicate PA sync event */
- pa_sync = hci_conn_add_unset(hdev, BIS_LINK, BDADDR_ANY,
+ pa_sync = hci_conn_add_unset(hdev, PA_LINK, BDADDR_ANY,
HCI_ROLE_SLAVE);
if (IS_ERR(pa_sync))
@@ -6456,7 +6424,7 @@ static void hci_le_per_adv_report_evt(struct hci_dev *hdev, void *data,
hci_dev_lock(hdev);
- mask |= hci_proto_connect_ind(hdev, BDADDR_ANY, BIS_LINK, &flags);
+ mask |= hci_proto_connect_ind(hdev, BDADDR_ANY, PA_LINK, &flags);
if (!(mask & HCI_LM_ACCEPT))
goto unlock;
@@ -6718,8 +6686,8 @@ unlock:
hci_dev_unlock(hdev);
}
-static void hci_le_cis_estabilished_evt(struct hci_dev *hdev, void *data,
- struct sk_buff *skb)
+static void hci_le_cis_established_evt(struct hci_dev *hdev, void *data,
+ struct sk_buff *skb)
{
struct hci_evt_le_cis_established *ev = data;
struct hci_conn *conn;
@@ -6913,7 +6881,8 @@ static void hci_le_create_big_complete_evt(struct hci_dev *hdev, void *data,
/* Connect all BISes that are bound to the BIG */
while ((conn = hci_conn_hash_lookup_big_state(hdev, ev->handle,
- BT_BOUND))) {
+ BT_BOUND,
+ HCI_ROLE_MASTER))) {
if (ev->status) {
hci_connect_cfm(conn, ev->status);
hci_conn_del(conn);
@@ -6946,7 +6915,7 @@ static void hci_le_create_big_complete_evt(struct hci_dev *hdev, void *data,
static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data,
struct sk_buff *skb)
{
- struct hci_evt_le_big_sync_estabilished *ev = data;
+ struct hci_evt_le_big_sync_established *ev = data;
struct hci_conn *bis, *conn;
int i;
@@ -7002,7 +6971,10 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data,
bis->iso_qos.bcast.in.sdu = le16_to_cpu(ev->max_pdu);
if (!ev->status) {
+ bis->state = BT_CONNECTED;
set_bit(HCI_CONN_BIG_SYNC, &bis->flags);
+ hci_debugfs_create_conn(bis);
+ hci_conn_add_sysfs(bis);
hci_iso_setup_path(bis);
}
}
@@ -7026,6 +6998,37 @@ unlock:
hci_dev_unlock(hdev);
}
+static void hci_le_big_sync_lost_evt(struct hci_dev *hdev, void *data,
+ struct sk_buff *skb)
+{
+ struct hci_evt_le_big_sync_lost *ev = data;
+ struct hci_conn *bis, *conn;
+
+ bt_dev_dbg(hdev, "big handle 0x%2.2x", ev->handle);
+
+ hci_dev_lock(hdev);
+
+ /* Delete the pa sync connection */
+ bis = hci_conn_hash_lookup_pa_sync_big_handle(hdev, ev->handle);
+ if (bis) {
+ conn = hci_conn_hash_lookup_pa_sync_handle(hdev,
+ bis->sync_handle);
+ if (conn)
+ hci_conn_del(conn);
+ }
+
+ /* Delete each bis connection */
+ while ((bis = hci_conn_hash_lookup_big_state(hdev, ev->handle,
+ BT_CONNECTED,
+ HCI_ROLE_SLAVE))) {
+ clear_bit(HCI_CONN_BIG_SYNC, &bis->flags);
+ hci_disconn_cfm(bis, ev->reason);
+ hci_conn_del(bis);
+ }
+
+ hci_dev_unlock(hdev);
+}
+
static void hci_le_big_info_adv_report_evt(struct hci_dev *hdev, void *data,
struct sk_buff *skb)
{
@@ -7077,7 +7080,7 @@ unlock:
/* Entries in this table shall have their position according to the subevent
* opcode they handle so the use of the macros above is recommend since it does
* attempt to initialize at its proper index using Designated Initializers that
- * way events without a callback function can be ommited.
+ * way events without a callback function can be omitted.
*/
static const struct hci_le_ev {
void (*func)(struct hci_dev *hdev, void *data, struct sk_buff *skb);
@@ -7123,7 +7126,7 @@ static const struct hci_le_ev {
HCI_MAX_EVENT_SIZE),
/* [0x0e = HCI_EV_LE_PA_SYNC_ESTABLISHED] */
HCI_LE_EV(HCI_EV_LE_PA_SYNC_ESTABLISHED,
- hci_le_pa_sync_estabilished_evt,
+ hci_le_pa_sync_established_evt,
sizeof(struct hci_ev_le_pa_sync_established)),
/* [0x0f = HCI_EV_LE_PER_ADV_REPORT] */
HCI_LE_EV_VL(HCI_EV_LE_PER_ADV_REPORT,
@@ -7134,7 +7137,7 @@ static const struct hci_le_ev {
HCI_LE_EV(HCI_EV_LE_EXT_ADV_SET_TERM, hci_le_ext_adv_term_evt,
sizeof(struct hci_evt_le_ext_adv_set_term)),
/* [0x19 = HCI_EVT_LE_CIS_ESTABLISHED] */
- HCI_LE_EV(HCI_EVT_LE_CIS_ESTABLISHED, hci_le_cis_estabilished_evt,
+ HCI_LE_EV(HCI_EVT_LE_CIS_ESTABLISHED, hci_le_cis_established_evt,
sizeof(struct hci_evt_le_cis_established)),
/* [0x1a = HCI_EVT_LE_CIS_REQ] */
HCI_LE_EV(HCI_EVT_LE_CIS_REQ, hci_le_cis_req_evt,
@@ -7147,7 +7150,12 @@ static const struct hci_le_ev {
/* [0x1d = HCI_EV_LE_BIG_SYNC_ESTABLISHED] */
HCI_LE_EV_VL(HCI_EVT_LE_BIG_SYNC_ESTABLISHED,
hci_le_big_sync_established_evt,
- sizeof(struct hci_evt_le_big_sync_estabilished),
+ sizeof(struct hci_evt_le_big_sync_established),
+ HCI_MAX_EVENT_SIZE),
+ /* [0x1e = HCI_EVT_LE_BIG_SYNC_LOST] */
+ HCI_LE_EV_VL(HCI_EVT_LE_BIG_SYNC_LOST,
+ hci_le_big_sync_lost_evt,
+ sizeof(struct hci_evt_le_big_sync_lost),
HCI_MAX_EVENT_SIZE),
/* [0x22 = HCI_EVT_LE_BIG_INFO_ADV_REPORT] */
HCI_LE_EV_VL(HCI_EVT_LE_BIG_INFO_ADV_REPORT,
@@ -7433,7 +7441,7 @@ static const struct hci_ev {
/* [0x2c = HCI_EV_SYNC_CONN_COMPLETE] */
HCI_EV(HCI_EV_SYNC_CONN_COMPLETE, hci_sync_conn_complete_evt,
sizeof(struct hci_ev_sync_conn_complete)),
- /* [0x2d = HCI_EV_EXTENDED_INQUIRY_RESULT] */
+ /* [0x2f = HCI_EV_EXTENDED_INQUIRY_RESULT] */
HCI_EV_VL(HCI_EV_EXTENDED_INQUIRY_RESULT,
hci_extended_inquiry_result_evt,
sizeof(struct hci_ev_ext_inquiry_result), HCI_MAX_EVENT_SIZE),
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 428ee5c7de7e..fc866759910d 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -118,7 +118,7 @@ static void hci_sock_free_cookie(struct sock *sk)
int id = hci_pi(sk)->cookie;
if (id) {
- hci_pi(sk)->cookie = 0xffffffff;
+ hci_pi(sk)->cookie = 0;
ida_free(&sock_cookie_ida, id);
}
}
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 62d1ff951ebe..2b4f21fbf9c1 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -393,7 +393,7 @@ static void le_scan_disable(struct work_struct *work)
if (hdev->discovery.type != DISCOV_TYPE_INTERLEAVED)
goto _return;
- if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks)) {
+ if (hci_test_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY)) {
if (!test_bit(HCI_INQUIRY, &hdev->flags) &&
hdev->discovery.state != DISCOVERY_RESOLVING)
goto discov_stopped;
@@ -1205,9 +1205,126 @@ static int hci_set_adv_set_random_addr_sync(struct hci_dev *hdev, u8 instance,
sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}
+static int
+hci_set_ext_adv_params_sync(struct hci_dev *hdev, struct adv_info *adv,
+ const struct hci_cp_le_set_ext_adv_params *cp,
+ struct hci_rp_le_set_ext_adv_params *rp)
+{
+ struct sk_buff *skb;
+
+ skb = __hci_cmd_sync(hdev, HCI_OP_LE_SET_EXT_ADV_PARAMS, sizeof(*cp),
+ cp, HCI_CMD_TIMEOUT);
+
+ /* If command return a status event, skb will be set to -ENODATA */
+ if (skb == ERR_PTR(-ENODATA))
+ return 0;
+
+ if (IS_ERR(skb)) {
+ bt_dev_err(hdev, "Opcode 0x%4.4x failed: %ld",
+ HCI_OP_LE_SET_EXT_ADV_PARAMS, PTR_ERR(skb));
+ return PTR_ERR(skb);
+ }
+
+ if (skb->len != sizeof(*rp)) {
+ bt_dev_err(hdev, "Invalid response length for 0x%4.4x: %u",
+ HCI_OP_LE_SET_EXT_ADV_PARAMS, skb->len);
+ kfree_skb(skb);
+ return -EIO;
+ }
+
+ memcpy(rp, skb->data, sizeof(*rp));
+ kfree_skb(skb);
+
+ if (!rp->status) {
+ hdev->adv_addr_type = cp->own_addr_type;
+ if (!cp->handle) {
+ /* Store in hdev for instance 0 */
+ hdev->adv_tx_power = rp->tx_power;
+ } else if (adv) {
+ adv->tx_power = rp->tx_power;
+ }
+ }
+
+ return rp->status;
+}
+
+static int hci_set_ext_adv_data_sync(struct hci_dev *hdev, u8 instance)
+{
+ DEFINE_FLEX(struct hci_cp_le_set_ext_adv_data, pdu, data, length,
+ HCI_MAX_EXT_AD_LENGTH);
+ u8 len;
+ struct adv_info *adv = NULL;
+ int err;
+
+ if (instance) {
+ adv = hci_find_adv_instance(hdev, instance);
+ if (!adv || !adv->adv_data_changed)
+ return 0;
+ }
+
+ len = eir_create_adv_data(hdev, instance, pdu->data,
+ HCI_MAX_EXT_AD_LENGTH);
+
+ pdu->length = len;
+ pdu->handle = adv ? adv->handle : instance;
+ pdu->operation = LE_SET_ADV_DATA_OP_COMPLETE;
+ pdu->frag_pref = LE_SET_ADV_DATA_NO_FRAG;
+
+ err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_DATA,
+ struct_size(pdu, data, len), pdu,
+ HCI_CMD_TIMEOUT);
+ if (err)
+ return err;
+
+ /* Update data if the command succeed */
+ if (adv) {
+ adv->adv_data_changed = false;
+ } else {
+ memcpy(hdev->adv_data, pdu->data, len);
+ hdev->adv_data_len = len;
+ }
+
+ return 0;
+}
+
+static int hci_set_adv_data_sync(struct hci_dev *hdev, u8 instance)
+{
+ struct hci_cp_le_set_adv_data cp;
+ u8 len;
+
+ memset(&cp, 0, sizeof(cp));
+
+ len = eir_create_adv_data(hdev, instance, cp.data, sizeof(cp.data));
+
+ /* There's nothing to do if the data hasn't changed */
+ if (hdev->adv_data_len == len &&
+ memcmp(cp.data, hdev->adv_data, len) == 0)
+ return 0;
+
+ memcpy(hdev->adv_data, cp.data, sizeof(cp.data));
+ hdev->adv_data_len = len;
+
+ cp.length = len;
+
+ return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADV_DATA,
+ sizeof(cp), &cp, HCI_CMD_TIMEOUT);
+}
+
+int hci_update_adv_data_sync(struct hci_dev *hdev, u8 instance)
+{
+ if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED))
+ return 0;
+
+ if (ext_adv_capable(hdev))
+ return hci_set_ext_adv_data_sync(hdev, instance);
+
+ return hci_set_adv_data_sync(hdev, instance);
+}
+
int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance)
{
struct hci_cp_le_set_ext_adv_params cp;
+ struct hci_rp_le_set_ext_adv_params rp;
bool connectable;
u32 flags;
bdaddr_t random_addr;
@@ -1228,7 +1345,7 @@ int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance)
* Command Disallowed error, so we must first disable the
* instance if it is active.
*/
- if (adv && !adv->pending) {
+ if (adv) {
err = hci_disable_ext_adv_instance_sync(hdev, instance);
if (err)
return err;
@@ -1261,10 +1378,12 @@ int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance)
hci_cpu_to_le24(adv->min_interval, cp.min_interval);
hci_cpu_to_le24(adv->max_interval, cp.max_interval);
cp.tx_power = adv->tx_power;
+ cp.sid = adv->sid;
} else {
hci_cpu_to_le24(hdev->le_adv_min_interval, cp.min_interval);
hci_cpu_to_le24(hdev->le_adv_max_interval, cp.max_interval);
cp.tx_power = HCI_ADV_TX_POWER_NO_PREFERENCE;
+ cp.sid = 0x00;
}
secondary_adv = (flags & MGMT_ADV_FLAG_SEC_MASK);
@@ -1314,8 +1433,12 @@ int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance)
cp.secondary_phy = HCI_ADV_PHY_1M;
}
- err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_PARAMS,
- sizeof(cp), &cp, HCI_CMD_TIMEOUT);
+ err = hci_set_ext_adv_params_sync(hdev, adv, &cp, &rp);
+ if (err)
+ return err;
+
+ /* Update adv data as tx power is known now */
+ err = hci_set_ext_adv_data_sync(hdev, cp.handle);
if (err)
return err;
@@ -1559,7 +1682,8 @@ static int hci_enable_per_advertising_sync(struct hci_dev *hdev, u8 instance)
static int hci_adv_bcast_annoucement(struct hci_dev *hdev, struct adv_info *adv)
{
u8 bid[3];
- u8 ad[4 + 3];
+ u8 ad[HCI_MAX_EXT_AD_LENGTH];
+ u8 len;
/* Skip if NULL adv as instance 0x00 is used for general purpose
* advertising so it cannot used for the likes of Broadcast Announcement
@@ -1585,14 +1709,16 @@ static int hci_adv_bcast_annoucement(struct hci_dev *hdev, struct adv_info *adv)
/* Generate Broadcast ID */
get_random_bytes(bid, sizeof(bid));
- eir_append_service_data(ad, 0, 0x1852, bid, sizeof(bid));
- hci_set_adv_instance_data(hdev, adv->instance, sizeof(ad), ad, 0, NULL);
+ len = eir_append_service_data(ad, 0, 0x1852, bid, sizeof(bid));
+ memcpy(ad + len, adv->adv_data, adv->adv_data_len);
+ hci_set_adv_instance_data(hdev, adv->instance, len + adv->adv_data_len,
+ ad, 0, NULL);
return hci_update_adv_data_sync(hdev, adv->instance);
}
-int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 data_len,
- u8 *data, u32 flags, u16 min_interval,
+int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 sid,
+ u8 data_len, u8 *data, u32 flags, u16 min_interval,
u16 max_interval, u16 sync_interval)
{
struct adv_info *adv = NULL;
@@ -1603,9 +1729,28 @@ int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 data_len,
if (instance) {
adv = hci_find_adv_instance(hdev, instance);
- /* Create an instance if that could not be found */
- if (!adv) {
- adv = hci_add_per_instance(hdev, instance, flags,
+ if (adv) {
+ if (sid != HCI_SID_INVALID && adv->sid != sid) {
+ /* If the SID don't match attempt to find by
+ * SID.
+ */
+ adv = hci_find_adv_sid(hdev, sid);
+ if (!adv) {
+ bt_dev_err(hdev,
+ "Unable to find adv_info");
+ return -EINVAL;
+ }
+ }
+
+ /* Turn it into periodic advertising */
+ adv->periodic = true;
+ adv->per_adv_data_len = data_len;
+ if (data)
+ memcpy(adv->per_adv_data, data, data_len);
+ adv->flags = flags;
+ } else if (!adv) {
+ /* Create an instance if that could not be found */
+ adv = hci_add_per_instance(hdev, instance, sid, flags,
data_len, data,
sync_interval,
sync_interval);
@@ -1798,78 +1943,6 @@ int hci_le_terminate_big_sync(struct hci_dev *hdev, u8 handle, u8 reason)
sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}
-static int hci_set_ext_adv_data_sync(struct hci_dev *hdev, u8 instance)
-{
- DEFINE_FLEX(struct hci_cp_le_set_ext_adv_data, pdu, data, length,
- HCI_MAX_EXT_AD_LENGTH);
- u8 len;
- struct adv_info *adv = NULL;
- int err;
-
- if (instance) {
- adv = hci_find_adv_instance(hdev, instance);
- if (!adv || !adv->adv_data_changed)
- return 0;
- }
-
- len = eir_create_adv_data(hdev, instance, pdu->data);
-
- pdu->length = len;
- pdu->handle = adv ? adv->handle : instance;
- pdu->operation = LE_SET_ADV_DATA_OP_COMPLETE;
- pdu->frag_pref = LE_SET_ADV_DATA_NO_FRAG;
-
- err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_DATA,
- struct_size(pdu, data, len), pdu,
- HCI_CMD_TIMEOUT);
- if (err)
- return err;
-
- /* Update data if the command succeed */
- if (adv) {
- adv->adv_data_changed = false;
- } else {
- memcpy(hdev->adv_data, pdu->data, len);
- hdev->adv_data_len = len;
- }
-
- return 0;
-}
-
-static int hci_set_adv_data_sync(struct hci_dev *hdev, u8 instance)
-{
- struct hci_cp_le_set_adv_data cp;
- u8 len;
-
- memset(&cp, 0, sizeof(cp));
-
- len = eir_create_adv_data(hdev, instance, cp.data);
-
- /* There's nothing to do if the data hasn't changed */
- if (hdev->adv_data_len == len &&
- memcmp(cp.data, hdev->adv_data, len) == 0)
- return 0;
-
- memcpy(hdev->adv_data, cp.data, sizeof(cp.data));
- hdev->adv_data_len = len;
-
- cp.length = len;
-
- return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADV_DATA,
- sizeof(cp), &cp, HCI_CMD_TIMEOUT);
-}
-
-int hci_update_adv_data_sync(struct hci_dev *hdev, u8 instance)
-{
- if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED))
- return 0;
-
- if (ext_adv_capable(hdev))
- return hci_set_ext_adv_data_sync(hdev, instance);
-
- return hci_set_adv_data_sync(hdev, instance);
-}
-
int hci_schedule_adv_instance_sync(struct hci_dev *hdev, u8 instance,
bool force)
{
@@ -1945,13 +2018,10 @@ static int hci_clear_adv_sets_sync(struct hci_dev *hdev, struct sock *sk)
static int hci_clear_adv_sync(struct hci_dev *hdev, struct sock *sk, bool force)
{
struct adv_info *adv, *n;
- int err = 0;
if (ext_adv_capable(hdev))
/* Remove all existing sets */
- err = hci_clear_adv_sets_sync(hdev, sk);
- if (ext_adv_capable(hdev))
- return err;
+ return hci_clear_adv_sets_sync(hdev, sk);
/* This is safe as long as there is no command send while the lock is
* held.
@@ -1979,13 +2049,11 @@ static int hci_clear_adv_sync(struct hci_dev *hdev, struct sock *sk, bool force)
static int hci_remove_adv_sync(struct hci_dev *hdev, u8 instance,
struct sock *sk)
{
- int err = 0;
+ int err;
/* If we use extended advertising, instance has to be removed first. */
if (ext_adv_capable(hdev))
- err = hci_remove_ext_adv_instance_sync(hdev, instance, sk);
- if (ext_adv_capable(hdev))
- return err;
+ return hci_remove_ext_adv_instance_sync(hdev, instance, sk);
/* This is safe as long as there is no command send while the lock is
* held.
@@ -2084,16 +2152,13 @@ int hci_read_tx_power_sync(struct hci_dev *hdev, __le16 handle, u8 type)
int hci_disable_advertising_sync(struct hci_dev *hdev)
{
u8 enable = 0x00;
- int err = 0;
/* If controller is not advertising we are done. */
if (!hci_dev_test_flag(hdev, HCI_LE_ADV))
return 0;
if (ext_adv_capable(hdev))
- err = hci_disable_ext_adv_instance_sync(hdev, 0x00);
- if (ext_adv_capable(hdev))
- return err;
+ return hci_disable_ext_adv_instance_sync(hdev, 0x00);
return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADV_ENABLE,
sizeof(enable), &enable, HCI_CMD_TIMEOUT);
@@ -2456,6 +2521,10 @@ static int hci_pause_advertising_sync(struct hci_dev *hdev)
int err;
int old_state;
+ /* If controller is not advertising we are done. */
+ if (!hci_dev_test_flag(hdev, HCI_LE_ADV))
+ return 0;
+
/* If already been paused there is nothing to do. */
if (hdev->advertising_paused)
return 0;
@@ -2860,7 +2929,7 @@ static int hci_le_set_ext_scan_param_sync(struct hci_dev *hdev, u8 type,
if (sent) {
struct hci_conn *conn;
- conn = hci_conn_hash_lookup_ba(hdev, BIS_LINK,
+ conn = hci_conn_hash_lookup_ba(hdev, PA_LINK,
&sent->bdaddr);
if (conn) {
struct bt_iso_qos *qos = &conn->iso_qos;
@@ -3518,7 +3587,7 @@ static void hci_dev_get_bd_addr_from_property(struct hci_dev *hdev)
if (ret < 0 || !bacmp(&ba, BDADDR_ANY))
return;
- if (test_bit(HCI_QUIRK_BDADDR_PROPERTY_BROKEN, &hdev->quirks))
+ if (hci_test_quirk(hdev, HCI_QUIRK_BDADDR_PROPERTY_BROKEN))
baswap(&hdev->public_addr, &ba);
else
bacpy(&hdev->public_addr, &ba);
@@ -3593,7 +3662,7 @@ static int hci_init0_sync(struct hci_dev *hdev)
bt_dev_dbg(hdev, "");
/* Reset */
- if (!test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks)) {
+ if (!hci_test_quirk(hdev, HCI_QUIRK_RESET_ON_CLOSE)) {
err = hci_reset_sync(hdev);
if (err)
return err;
@@ -3606,7 +3675,7 @@ static int hci_unconf_init_sync(struct hci_dev *hdev)
{
int err;
- if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks))
+ if (hci_test_quirk(hdev, HCI_QUIRK_RAW_DEVICE))
return 0;
err = hci_init0_sync(hdev);
@@ -3649,7 +3718,7 @@ static int hci_read_local_cmds_sync(struct hci_dev *hdev)
* supported commands.
*/
if (hdev->hci_ver > BLUETOOTH_VER_1_1 &&
- !test_bit(HCI_QUIRK_BROKEN_LOCAL_COMMANDS, &hdev->quirks))
+ !hci_test_quirk(hdev, HCI_QUIRK_BROKEN_LOCAL_COMMANDS))
return __hci_cmd_sync_status(hdev, HCI_OP_READ_LOCAL_COMMANDS,
0, NULL, HCI_CMD_TIMEOUT);
@@ -3663,7 +3732,7 @@ static int hci_init1_sync(struct hci_dev *hdev)
bt_dev_dbg(hdev, "");
/* Reset */
- if (!test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks)) {
+ if (!hci_test_quirk(hdev, HCI_QUIRK_RESET_ON_CLOSE)) {
err = hci_reset_sync(hdev);
if (err)
return err;
@@ -3726,7 +3795,7 @@ static int hci_set_event_filter_sync(struct hci_dev *hdev, u8 flt_type,
if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED))
return 0;
- if (test_bit(HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, &hdev->quirks))
+ if (hci_test_quirk(hdev, HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL))
return 0;
memset(&cp, 0, sizeof(cp));
@@ -3753,7 +3822,7 @@ static int hci_clear_event_filter_sync(struct hci_dev *hdev)
* a hci_set_event_filter_sync() call succeeds, but we do
* the check both for parity and as a future reminder.
*/
- if (test_bit(HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, &hdev->quirks))
+ if (hci_test_quirk(hdev, HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL))
return 0;
return hci_set_event_filter_sync(hdev, HCI_FLT_CLEAR_ALL, 0x00,
@@ -3777,7 +3846,7 @@ static int hci_write_sync_flowctl_sync(struct hci_dev *hdev)
/* Check if the controller supports SCO and HCI_OP_WRITE_SYNC_FLOWCTL */
if (!lmp_sco_capable(hdev) || !(hdev->commands[10] & BIT(4)) ||
- !test_bit(HCI_QUIRK_SYNC_FLOWCTL_SUPPORTED, &hdev->quirks))
+ !hci_test_quirk(hdev, HCI_QUIRK_SYNC_FLOWCTL_SUPPORTED))
return 0;
memset(&cp, 0, sizeof(cp));
@@ -3852,7 +3921,7 @@ static int hci_write_inquiry_mode_sync(struct hci_dev *hdev)
u8 mode;
if (!lmp_inq_rssi_capable(hdev) &&
- !test_bit(HCI_QUIRK_FIXUP_INQUIRY_MODE, &hdev->quirks))
+ !hci_test_quirk(hdev, HCI_QUIRK_FIXUP_INQUIRY_MODE))
return 0;
/* If Extended Inquiry Result events are supported, then
@@ -4042,7 +4111,7 @@ static int hci_set_event_mask_sync(struct hci_dev *hdev)
}
if (lmp_inq_rssi_capable(hdev) ||
- test_bit(HCI_QUIRK_FIXUP_INQUIRY_MODE, &hdev->quirks))
+ hci_test_quirk(hdev, HCI_QUIRK_FIXUP_INQUIRY_MODE))
events[4] |= 0x02; /* Inquiry Result with RSSI */
if (lmp_ext_feat_capable(hdev))
@@ -4094,7 +4163,7 @@ static int hci_read_stored_link_key_sync(struct hci_dev *hdev)
struct hci_cp_read_stored_link_key cp;
if (!(hdev->commands[6] & 0x20) ||
- test_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks))
+ hci_test_quirk(hdev, HCI_QUIRK_BROKEN_STORED_LINK_KEY))
return 0;
memset(&cp, 0, sizeof(cp));
@@ -4143,7 +4212,7 @@ static int hci_read_def_err_data_reporting_sync(struct hci_dev *hdev)
{
if (!(hdev->commands[18] & 0x04) ||
!(hdev->features[0][6] & LMP_ERR_DATA_REPORTING) ||
- test_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks))
+ hci_test_quirk(hdev, HCI_QUIRK_BROKEN_ERR_DATA_REPORTING))
return 0;
return __hci_cmd_sync_status(hdev, HCI_OP_READ_DEF_ERR_DATA_REPORTING,
@@ -4157,7 +4226,7 @@ static int hci_read_page_scan_type_sync(struct hci_dev *hdev)
* this command in the bit mask of supported commands.
*/
if (!(hdev->commands[13] & 0x01) ||
- test_bit(HCI_QUIRK_BROKEN_READ_PAGE_SCAN_TYPE, &hdev->quirks))
+ hci_test_quirk(hdev, HCI_QUIRK_BROKEN_READ_PAGE_SCAN_TYPE))
return 0;
return __hci_cmd_sync_status(hdev, HCI_OP_READ_PAGE_SCAN_TYPE,
@@ -4352,7 +4421,7 @@ static int hci_le_read_adv_tx_power_sync(struct hci_dev *hdev)
static int hci_le_read_tx_power_sync(struct hci_dev *hdev)
{
if (!(hdev->commands[38] & 0x80) ||
- test_bit(HCI_QUIRK_BROKEN_READ_TRANSMIT_POWER, &hdev->quirks))
+ hci_test_quirk(hdev, HCI_QUIRK_BROKEN_READ_TRANSMIT_POWER))
return 0;
return __hci_cmd_sync_status(hdev, HCI_OP_LE_READ_TRANSMIT_POWER,
@@ -4395,7 +4464,7 @@ static int hci_le_set_rpa_timeout_sync(struct hci_dev *hdev)
__le16 timeout = cpu_to_le16(hdev->rpa_timeout);
if (!(hdev->commands[35] & 0x04) ||
- test_bit(HCI_QUIRK_BROKEN_SET_RPA_TIMEOUT, &hdev->quirks))
+ hci_test_quirk(hdev, HCI_QUIRK_BROKEN_SET_RPA_TIMEOUT))
return 0;
return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_RPA_TIMEOUT,
@@ -4540,7 +4609,7 @@ static int hci_delete_stored_link_key_sync(struct hci_dev *hdev)
* just disable this command.
*/
if (!(hdev->commands[6] & 0x80) ||
- test_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks))
+ hci_test_quirk(hdev, HCI_QUIRK_BROKEN_STORED_LINK_KEY))
return 0;
memset(&cp, 0, sizeof(cp));
@@ -4666,7 +4735,7 @@ static int hci_set_err_data_report_sync(struct hci_dev *hdev)
if (!(hdev->commands[18] & 0x08) ||
!(hdev->features[0][6] & LMP_ERR_DATA_REPORTING) ||
- test_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks))
+ hci_test_quirk(hdev, HCI_QUIRK_BROKEN_ERR_DATA_REPORTING))
return 0;
if (enabled == hdev->err_data_reporting)
@@ -4879,7 +4948,7 @@ static int hci_dev_setup_sync(struct hci_dev *hdev)
size_t i;
if (!hci_dev_test_flag(hdev, HCI_SETUP) &&
- !test_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks))
+ !hci_test_quirk(hdev, HCI_QUIRK_NON_PERSISTENT_SETUP))
return 0;
bt_dev_dbg(hdev, "");
@@ -4890,7 +4959,7 @@ static int hci_dev_setup_sync(struct hci_dev *hdev)
ret = hdev->setup(hdev);
for (i = 0; i < ARRAY_SIZE(hci_broken_table); i++) {
- if (test_bit(hci_broken_table[i].quirk, &hdev->quirks))
+ if (hci_test_quirk(hdev, hci_broken_table[i].quirk))
bt_dev_warn(hdev, "%s", hci_broken_table[i].desc);
}
@@ -4898,10 +4967,10 @@ static int hci_dev_setup_sync(struct hci_dev *hdev)
* BD_ADDR invalid before creating the HCI device or in
* its setup callback.
*/
- invalid_bdaddr = test_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks) ||
- test_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks);
+ invalid_bdaddr = hci_test_quirk(hdev, HCI_QUIRK_INVALID_BDADDR) ||
+ hci_test_quirk(hdev, HCI_QUIRK_USE_BDADDR_PROPERTY);
if (!ret) {
- if (test_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks) &&
+ if (hci_test_quirk(hdev, HCI_QUIRK_USE_BDADDR_PROPERTY) &&
!bacmp(&hdev->public_addr, BDADDR_ANY))
hci_dev_get_bd_addr_from_property(hdev);
@@ -4923,7 +4992,7 @@ static int hci_dev_setup_sync(struct hci_dev *hdev)
* In case any of them is set, the controller has to
* start up as unconfigured.
*/
- if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) ||
+ if (hci_test_quirk(hdev, HCI_QUIRK_EXTERNAL_CONFIG) ||
invalid_bdaddr)
hci_dev_set_flag(hdev, HCI_UNCONFIGURED);
@@ -4983,7 +5052,7 @@ static int hci_dev_init_sync(struct hci_dev *hdev)
* then they need to be reprogrammed after the init procedure
* completed.
*/
- if (test_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks) &&
+ if (hci_test_quirk(hdev, HCI_QUIRK_NON_PERSISTENT_DIAG) &&
!hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
hci_dev_test_flag(hdev, HCI_VENDOR_DIAG) && hdev->set_diag)
ret = hdev->set_diag(hdev, true);
@@ -5240,7 +5309,7 @@ int hci_dev_close_sync(struct hci_dev *hdev)
/* Reset device */
skb_queue_purge(&hdev->cmd_q);
atomic_set(&hdev->cmd_cnt, 1);
- if (test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks) &&
+ if (hci_test_quirk(hdev, HCI_QUIRK_RESET_ON_CLOSE) &&
!auto_off && !hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
set_bit(HCI_INIT, &hdev->flags);
hci_reset_sync(hdev);
@@ -5424,7 +5493,7 @@ static int hci_disconnect_sync(struct hci_dev *hdev, struct hci_conn *conn,
{
struct hci_cp_disconnect cp;
- if (test_bit(HCI_CONN_BIG_CREATED, &conn->flags)) {
+ if (conn->type == BIS_LINK || conn->type == PA_LINK) {
/* This is a BIS connection, hci_conn_del will
* do the necessary cleanup.
*/
@@ -5493,7 +5562,7 @@ static int hci_connect_cancel_sync(struct hci_dev *hdev, struct hci_conn *conn,
return HCI_ERROR_LOCAL_HOST_TERM;
}
- if (conn->type == BIS_LINK) {
+ if (conn->type == BIS_LINK || conn->type == PA_LINK) {
/* There is no way to cancel a BIS without terminating the BIG
* which is done later on connection cleanup.
*/
@@ -5558,7 +5627,7 @@ static int hci_reject_conn_sync(struct hci_dev *hdev, struct hci_conn *conn,
if (conn->type == CIS_LINK)
return hci_le_reject_cis_sync(hdev, conn, reason);
- if (conn->type == BIS_LINK)
+ if (conn->type == BIS_LINK || conn->type == PA_LINK)
return -EINVAL;
if (conn->type == SCO_LINK || conn->type == ESCO_LINK)
@@ -5608,7 +5677,7 @@ int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason)
}
/* Cleanup hci_conn object if it cannot be cancelled as it
- * likelly means the controller and host stack are out of sync
+ * likely means the controller and host stack are out of sync
* or in case of LE it was still scanning so it can be cleanup
* safely.
*/
@@ -5890,7 +5959,7 @@ static int hci_active_scan_sync(struct hci_dev *hdev, uint16_t interval)
own_addr_type = ADDR_LE_DEV_PUBLIC;
if (hci_is_adv_monitoring(hdev) ||
- (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) &&
+ (hci_test_quirk(hdev, HCI_QUIRK_STRICT_DUPLICATE_FILTER) &&
hdev->discovery.result_filtering)) {
/* Duplicate filter should be disabled when some advertisement
* monitor is activated, otherwise AdvMon can only receive one
@@ -5953,8 +6022,7 @@ int hci_start_discovery_sync(struct hci_dev *hdev)
* and LE scanning are done sequentially with separate
* timeouts.
*/
- if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY,
- &hdev->quirks)) {
+ if (hci_test_quirk(hdev, HCI_QUIRK_SIMULTANEOUS_DISCOVERY)) {
timeout = msecs_to_jiffies(DISCOV_LE_TIMEOUT);
/* During simultaneous discovery, we double LE scan
* interval. We must leave some time for the controller
@@ -6031,7 +6099,7 @@ static int hci_update_event_filter_sync(struct hci_dev *hdev)
/* Some fake CSR controllers lock up after setting this type of
* filter, so avoid sending the request altogether.
*/
- if (test_bit(HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, &hdev->quirks))
+ if (hci_test_quirk(hdev, HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL))
return 0;
/* Always clear event filter when starting */
@@ -6048,7 +6116,7 @@ static int hci_update_event_filter_sync(struct hci_dev *hdev)
&b->bdaddr,
HCI_CONN_SETUP_AUTO_ON);
if (err)
- bt_dev_dbg(hdev, "Failed to set event filter for %pMR",
+ bt_dev_err(hdev, "Failed to set event filter for %pMR",
&b->bdaddr);
else
scan = SCAN_PAGE;
@@ -6252,6 +6320,7 @@ static int hci_le_ext_directed_advertising_sync(struct hci_dev *hdev,
struct hci_conn *conn)
{
struct hci_cp_le_set_ext_adv_params cp;
+ struct hci_rp_le_set_ext_adv_params rp;
int err;
bdaddr_t random_addr;
u8 own_addr_type;
@@ -6293,8 +6362,12 @@ static int hci_le_ext_directed_advertising_sync(struct hci_dev *hdev,
if (err)
return err;
- err = __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_ADV_PARAMS,
- sizeof(cp), &cp, HCI_CMD_TIMEOUT);
+ err = hci_set_ext_adv_params_sync(hdev, NULL, &cp, &rp);
+ if (err)
+ return err;
+
+ /* Update adv data as tx power is known now */
+ err = hci_set_ext_adv_data_sync(hdev, cp.handle);
if (err)
return err;
@@ -6741,8 +6814,8 @@ int hci_get_random_address(struct hci_dev *hdev, bool require_privacy,
return 0;
}
- /* No privacy so use a public address. */
- *own_addr_type = ADDR_LE_DEV_PUBLIC;
+ /* No privacy, use the current address */
+ hci_copy_identity_address(hdev, rand_addr, own_addr_type);
return 0;
}
@@ -6921,7 +6994,7 @@ static void create_pa_complete(struct hci_dev *hdev, void *data, int err)
goto unlock;
/* Add connection to indicate PA sync error */
- pa_sync = hci_conn_add_unset(hdev, BIS_LINK, BDADDR_ANY,
+ pa_sync = hci_conn_add_unset(hdev, PA_LINK, BDADDR_ANY,
HCI_ROLE_SLAVE);
if (IS_ERR(pa_sync))
diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c
index 6e2c752aaa8f..7bd3aa0a6db9 100644
--- a/net/bluetooth/iso.c
+++ b/net/bluetooth/iso.c
@@ -336,7 +336,7 @@ static int iso_connect_bis(struct sock *sk)
struct hci_dev *hdev;
int err;
- BT_DBG("%pMR", &iso_pi(sk)->src);
+ BT_DBG("%pMR (SID 0x%2.2x)", &iso_pi(sk)->src, iso_pi(sk)->bc_sid);
hdev = hci_get_route(&iso_pi(sk)->dst, &iso_pi(sk)->src,
iso_pi(sk)->src_type);
@@ -365,7 +365,7 @@ static int iso_connect_bis(struct sock *sk)
/* Just bind if DEFER_SETUP has been set */
if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) {
- hcon = hci_bind_bis(hdev, &iso_pi(sk)->dst,
+ hcon = hci_bind_bis(hdev, &iso_pi(sk)->dst, iso_pi(sk)->bc_sid,
&iso_pi(sk)->qos, iso_pi(sk)->base_len,
iso_pi(sk)->base);
if (IS_ERR(hcon)) {
@@ -375,12 +375,16 @@ static int iso_connect_bis(struct sock *sk)
} else {
hcon = hci_connect_bis(hdev, &iso_pi(sk)->dst,
le_addr_type(iso_pi(sk)->dst_type),
- &iso_pi(sk)->qos, iso_pi(sk)->base_len,
- iso_pi(sk)->base);
+ iso_pi(sk)->bc_sid, &iso_pi(sk)->qos,
+ iso_pi(sk)->base_len, iso_pi(sk)->base);
if (IS_ERR(hcon)) {
err = PTR_ERR(hcon);
goto unlock;
}
+
+ /* Update SID if it was not set */
+ if (iso_pi(sk)->bc_sid == HCI_SID_INVALID)
+ iso_pi(sk)->bc_sid = hcon->sid;
}
conn = iso_conn_add(hcon);
@@ -409,7 +413,7 @@ static int iso_connect_bis(struct sock *sk)
sk->sk_state = BT_CONNECT;
} else {
sk->sk_state = BT_CONNECT;
- iso_sock_set_timer(sk, sk->sk_sndtimeo);
+ iso_sock_set_timer(sk, READ_ONCE(sk->sk_sndtimeo));
}
release_sock(sk);
@@ -499,7 +503,7 @@ static int iso_connect_cis(struct sock *sk)
sk->sk_state = BT_CONNECT;
} else {
sk->sk_state = BT_CONNECT;
- iso_sock_set_timer(sk, sk->sk_sndtimeo);
+ iso_sock_set_timer(sk, READ_ONCE(sk->sk_sndtimeo));
}
release_sock(sk);
@@ -1337,10 +1341,13 @@ static int iso_sock_getname(struct socket *sock, struct sockaddr *addr,
addr->sa_family = AF_BLUETOOTH;
if (peer) {
+ struct hci_conn *hcon = iso_pi(sk)->conn ?
+ iso_pi(sk)->conn->hcon : NULL;
+
bacpy(&sa->iso_bdaddr, &iso_pi(sk)->dst);
sa->iso_bdaddr_type = iso_pi(sk)->dst_type;
- if (test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags)) {
+ if (hcon && hcon->type == BIS_LINK) {
sa->iso_bc->bc_sid = iso_pi(sk)->bc_sid;
sa->iso_bc->bc_num_bis = iso_pi(sk)->bc_num_bis;
memcpy(sa->iso_bc->bc_bis, iso_pi(sk)->bc_bis,
@@ -1680,6 +1687,17 @@ static int iso_sock_setsockopt(struct socket *sock, int level, int optname,
clear_bit(BT_SK_PKT_STATUS, &bt_sk(sk)->flags);
break;
+ case BT_PKT_SEQNUM:
+ err = copy_safe_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ if (err)
+ break;
+
+ if (opt)
+ set_bit(BT_SK_PKT_SEQNUM, &bt_sk(sk)->flags);
+ else
+ clear_bit(BT_SK_PKT_SEQNUM, &bt_sk(sk)->flags);
+ break;
+
case BT_ISO_QOS:
if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND &&
sk->sk_state != BT_CONNECT2 &&
@@ -1884,7 +1902,7 @@ static void iso_sock_ready(struct sock *sk)
static bool iso_match_big(struct sock *sk, void *data)
{
- struct hci_evt_le_big_sync_estabilished *ev = data;
+ struct hci_evt_le_big_sync_established *ev = data;
return ev->handle == iso_pi(sk)->qos.bcast.big;
}
@@ -1905,7 +1923,7 @@ static void iso_conn_ready(struct iso_conn *conn)
{
struct sock *parent = NULL;
struct sock *sk = conn->sk;
- struct hci_ev_le_big_sync_estabilished *ev = NULL;
+ struct hci_ev_le_big_sync_established *ev = NULL;
struct hci_ev_le_pa_sync_established *ev2 = NULL;
struct hci_ev_le_per_adv_report *ev3 = NULL;
struct hci_conn *hcon;
@@ -2016,7 +2034,7 @@ static void iso_conn_ready(struct iso_conn *conn)
hci_conn_hold(hcon);
iso_chan_add(conn, sk, parent);
- if ((ev && ((struct hci_evt_le_big_sync_estabilished *)ev)->status) ||
+ if ((ev && ((struct hci_evt_le_big_sync_established *)ev)->status) ||
(ev2 && ev2->status)) {
/* Trigger error signal on child socket */
sk->sk_err = ECONNREFUSED;
@@ -2075,7 +2093,7 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
* proceed to establishing a BIG sync:
*
* 1. HCI_EV_LE_PA_SYNC_ESTABLISHED: The socket may specify a specific
- * SID to listen to and once sync is estabilished its handle needs to
+ * SID to listen to and once sync is established its handle needs to
* be stored in iso_pi(sk)->sync_handle so it can be matched once
* receiving the BIG Info.
* 2. HCI_EVT_LE_BIG_INFO_ADV_REPORT: When connect_ind is triggered by a
@@ -2219,7 +2237,8 @@ done:
static void iso_connect_cfm(struct hci_conn *hcon, __u8 status)
{
- if (hcon->type != CIS_LINK && hcon->type != BIS_LINK) {
+ if (hcon->type != CIS_LINK && hcon->type != BIS_LINK &&
+ hcon->type != PA_LINK) {
if (hcon->type != LE_LINK)
return;
@@ -2260,7 +2279,8 @@ static void iso_connect_cfm(struct hci_conn *hcon, __u8 status)
static void iso_disconn_cfm(struct hci_conn *hcon, __u8 reason)
{
- if (hcon->type != CIS_LINK && hcon->type != BIS_LINK)
+ if (hcon->type != CIS_LINK && hcon->type != BIS_LINK &&
+ hcon->type != PA_LINK)
return;
BT_DBG("hcon %p reason %d", hcon, reason);
@@ -2271,7 +2291,8 @@ static void iso_disconn_cfm(struct hci_conn *hcon, __u8 reason)
void iso_recv(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
{
struct iso_conn *conn = hcon->iso_data;
- __u16 pb, ts, len;
+ struct skb_shared_hwtstamps *hwts;
+ __u16 pb, ts, len, sn;
if (!conn)
goto drop;
@@ -2294,13 +2315,17 @@ void iso_recv(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
if (ts) {
struct hci_iso_ts_data_hdr *hdr;
- /* TODO: add timestamp to the packet? */
hdr = skb_pull_data(skb, HCI_ISO_TS_DATA_HDR_SIZE);
if (!hdr) {
BT_ERR("Frame is too short (len %d)", skb->len);
goto drop;
}
+ /* Record the timestamp to skb */
+ hwts = skb_hwtstamps(skb);
+ hwts->hwtstamp = us_to_ktime(le32_to_cpu(hdr->ts));
+
+ sn = __le16_to_cpu(hdr->sn);
len = __le16_to_cpu(hdr->slen);
} else {
struct hci_iso_data_hdr *hdr;
@@ -2311,18 +2336,20 @@ void iso_recv(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
goto drop;
}
+ sn = __le16_to_cpu(hdr->sn);
len = __le16_to_cpu(hdr->slen);
}
flags = hci_iso_data_flags(len);
len = hci_iso_data_len(len);
- BT_DBG("Start: total len %d, frag len %d flags 0x%4.4x", len,
- skb->len, flags);
+ BT_DBG("Start: total len %d, frag len %d flags 0x%4.4x sn %d",
+ len, skb->len, flags, sn);
if (len == skb->len) {
/* Complete frame received */
hci_skb_pkt_status(skb) = flags & 0x03;
+ hci_skb_pkt_seqnum(skb) = sn;
iso_recv_frame(conn, skb);
return;
}
@@ -2345,9 +2372,17 @@ void iso_recv(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
goto drop;
hci_skb_pkt_status(conn->rx_skb) = flags & 0x03;
+ hci_skb_pkt_seqnum(conn->rx_skb) = sn;
skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len),
skb->len);
conn->rx_len = len - skb->len;
+
+ /* Copy hw timestamp from skb to rx_skb if present */
+ if (ts) {
+ hwts = skb_hwtstamps(conn->rx_skb);
+ hwts->hwtstamp = skb_hwtstamps(skb)->hwtstamp;
+ }
+
break;
case ISO_CONT:
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index a5bde5db58ef..805c752ac0a9 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -3415,7 +3415,7 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data
struct l2cap_conf_rfc rfc = { .mode = L2CAP_MODE_BASIC };
struct l2cap_conf_efs efs;
u8 remote_efs = 0;
- u16 mtu = L2CAP_DEFAULT_MTU;
+ u16 mtu = 0;
u16 result = L2CAP_CONF_SUCCESS;
u16 size;
@@ -3520,6 +3520,29 @@ done:
/* Configure output options and let the other side know
* which ones we don't like. */
+ /* If MTU is not provided in configure request, try adjusting it
+ * to the current output MTU if it has been set
+ *
+ * Bluetooth Core 6.1, Vol 3, Part A, Section 4.5
+ *
+ * Each configuration parameter value (if any is present) in an
+ * L2CAP_CONFIGURATION_RSP packet reflects an ‘adjustment’ to a
+ * configuration parameter value that has been sent (or, in case
+ * of default values, implied) in the corresponding
+ * L2CAP_CONFIGURATION_REQ packet.
+ */
+ if (!mtu) {
+ /* Only adjust for ERTM channels as for older modes the
+ * remote stack may not be able to detect that the
+ * adjustment causing it to silently drop packets.
+ */
+ if (chan->mode == L2CAP_MODE_ERTM &&
+ chan->omtu && chan->omtu != L2CAP_DEFAULT_MTU)
+ mtu = chan->omtu;
+ else
+ mtu = L2CAP_DEFAULT_MTU;
+ }
+
if (mtu < L2CAP_DEFAULT_MIN_MTU)
result = L2CAP_CONF_UNACCEPT;
else {
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 5aa55fa69594..f4257c4d3052 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -255,7 +255,7 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr,
err = l2cap_chan_connect(chan, la.l2_psm, __le16_to_cpu(la.l2_cid),
&la.l2_bdaddr, la.l2_bdaddr_type,
- sk->sk_sndtimeo);
+ READ_ONCE(sk->sk_sndtimeo));
if (err)
return err;
@@ -1703,6 +1703,9 @@ static void l2cap_sock_resume_cb(struct l2cap_chan *chan)
{
struct sock *sk = chan->data;
+ if (!sk)
+ return;
+
if (test_and_clear_bit(FLAG_PENDING_SECURITY, &chan->flags)) {
sk->sk_state = BT_CONNECTED;
chan->state = BT_CONNECTED;
@@ -1725,7 +1728,7 @@ static long l2cap_sock_get_sndtimeo_cb(struct l2cap_chan *chan)
{
struct sock *sk = chan->data;
- return sk->sk_sndtimeo;
+ return READ_ONCE(sk->sk_sndtimeo);
}
static struct pid *l2cap_sock_get_peer_pid_cb(struct l2cap_chan *chan)
diff --git a/net/bluetooth/lib.c b/net/bluetooth/lib.c
index 43aa01fd07b9..305044a84478 100644
--- a/net/bluetooth/lib.c
+++ b/net/bluetooth/lib.c
@@ -54,7 +54,7 @@ EXPORT_SYMBOL(baswap);
* bt_to_errno() - Bluetooth error codes to standard errno
* @code: Bluetooth error code to be converted
*
- * This function takes a Bluetooth error code as input and convets
+ * This function takes a Bluetooth error code as input and converts
* it to an equivalent Unix/standard errno value.
*
* Return:
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 14a9462fced5..1ce682038b51 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -464,7 +464,7 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data,
/* Devices marked as raw-only are neither configured
* nor unconfigured controllers.
*/
- if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks))
+ if (hci_test_quirk(d, HCI_QUIRK_RAW_DEVICE))
continue;
if (!hci_dev_test_flag(d, HCI_UNCONFIGURED)) {
@@ -522,7 +522,7 @@ static int read_unconf_index_list(struct sock *sk, struct hci_dev *hdev,
/* Devices marked as raw-only are neither configured
* nor unconfigured controllers.
*/
- if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks))
+ if (hci_test_quirk(d, HCI_QUIRK_RAW_DEVICE))
continue;
if (hci_dev_test_flag(d, HCI_UNCONFIGURED)) {
@@ -576,7 +576,7 @@ static int read_ext_index_list(struct sock *sk, struct hci_dev *hdev,
/* Devices marked as raw-only are neither configured
* nor unconfigured controllers.
*/
- if (test_bit(HCI_QUIRK_RAW_DEVICE, &d->quirks))
+ if (hci_test_quirk(d, HCI_QUIRK_RAW_DEVICE))
continue;
if (hci_dev_test_flag(d, HCI_UNCONFIGURED))
@@ -612,12 +612,12 @@ static int read_ext_index_list(struct sock *sk, struct hci_dev *hdev,
static bool is_configured(struct hci_dev *hdev)
{
- if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) &&
+ if (hci_test_quirk(hdev, HCI_QUIRK_EXTERNAL_CONFIG) &&
!hci_dev_test_flag(hdev, HCI_EXT_CONFIGURED))
return false;
- if ((test_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks) ||
- test_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks)) &&
+ if ((hci_test_quirk(hdev, HCI_QUIRK_INVALID_BDADDR) ||
+ hci_test_quirk(hdev, HCI_QUIRK_USE_BDADDR_PROPERTY)) &&
!bacmp(&hdev->public_addr, BDADDR_ANY))
return false;
@@ -628,12 +628,12 @@ static __le32 get_missing_options(struct hci_dev *hdev)
{
u32 options = 0;
- if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) &&
+ if (hci_test_quirk(hdev, HCI_QUIRK_EXTERNAL_CONFIG) &&
!hci_dev_test_flag(hdev, HCI_EXT_CONFIGURED))
options |= MGMT_OPTION_EXTERNAL_CONFIG;
- if ((test_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks) ||
- test_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks)) &&
+ if ((hci_test_quirk(hdev, HCI_QUIRK_INVALID_BDADDR) ||
+ hci_test_quirk(hdev, HCI_QUIRK_USE_BDADDR_PROPERTY)) &&
!bacmp(&hdev->public_addr, BDADDR_ANY))
options |= MGMT_OPTION_PUBLIC_ADDRESS;
@@ -669,7 +669,7 @@ static int read_config_info(struct sock *sk, struct hci_dev *hdev,
memset(&rp, 0, sizeof(rp));
rp.manufacturer = cpu_to_le16(hdev->manufacturer);
- if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks))
+ if (hci_test_quirk(hdev, HCI_QUIRK_EXTERNAL_CONFIG))
options |= MGMT_OPTION_EXTERNAL_CONFIG;
if (hdev->set_bdaddr)
@@ -828,8 +828,7 @@ static u32 get_supported_settings(struct hci_dev *hdev)
if (lmp_sc_capable(hdev))
settings |= MGMT_SETTING_SECURE_CONN;
- if (test_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED,
- &hdev->quirks))
+ if (hci_test_quirk(hdev, HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED))
settings |= MGMT_SETTING_WIDEBAND_SPEECH;
}
@@ -841,8 +840,7 @@ static u32 get_supported_settings(struct hci_dev *hdev)
settings |= MGMT_SETTING_ADVERTISING;
}
- if (test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks) ||
- hdev->set_bdaddr)
+ if (hci_test_quirk(hdev, HCI_QUIRK_EXTERNAL_CONFIG) || hdev->set_bdaddr)
settings |= MGMT_SETTING_CONFIGURATION;
if (cis_central_capable(hdev))
@@ -1080,7 +1078,8 @@ static int mesh_send_done_sync(struct hci_dev *hdev, void *data)
struct mgmt_mesh_tx *mesh_tx;
hci_dev_clear_flag(hdev, HCI_MESH_SENDING);
- hci_disable_advertising_sync(hdev);
+ if (list_empty(&hdev->adv_instances))
+ hci_disable_advertising_sync(hdev);
mesh_tx = mgmt_mesh_next(hdev, NULL);
if (mesh_tx)
@@ -1447,22 +1446,17 @@ static void settings_rsp(struct mgmt_pending_cmd *cmd, void *data)
send_settings_rsp(cmd->sk, cmd->opcode, match->hdev);
- list_del(&cmd->list);
-
if (match->sk == NULL) {
match->sk = cmd->sk;
sock_hold(match->sk);
}
-
- mgmt_pending_free(cmd);
}
static void cmd_status_rsp(struct mgmt_pending_cmd *cmd, void *data)
{
u8 *status = data;
- mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, *status);
- mgmt_pending_remove(cmd);
+ mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, *status);
}
static void cmd_complete_rsp(struct mgmt_pending_cmd *cmd, void *data)
@@ -1476,8 +1470,6 @@ static void cmd_complete_rsp(struct mgmt_pending_cmd *cmd, void *data)
if (cmd->cmd_complete) {
cmd->cmd_complete(cmd, match->mgmt_status);
- mgmt_pending_remove(cmd);
-
return;
}
@@ -1486,13 +1478,13 @@ static void cmd_complete_rsp(struct mgmt_pending_cmd *cmd, void *data)
static int generic_cmd_complete(struct mgmt_pending_cmd *cmd, u8 status)
{
- return mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, status,
+ return mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, status,
cmd->param, cmd->param_len);
}
static int addr_cmd_complete(struct mgmt_pending_cmd *cmd, u8 status)
{
- return mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, status,
+ return mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, status,
cmd->param, sizeof(struct mgmt_addr_info));
}
@@ -1532,7 +1524,7 @@ static void mgmt_set_discoverable_complete(struct hci_dev *hdev, void *data,
if (err) {
u8 mgmt_err = mgmt_status(err);
- mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err);
+ mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_err);
hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE);
goto done;
}
@@ -1707,7 +1699,7 @@ static void mgmt_set_connectable_complete(struct hci_dev *hdev, void *data,
if (err) {
u8 mgmt_err = mgmt_status(err);
- mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err);
+ mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_err);
goto done;
}
@@ -1943,8 +1935,8 @@ static void set_ssp_complete(struct hci_dev *hdev, void *data, int err)
new_settings(hdev, NULL);
}
- mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, cmd_status_rsp,
- &mgmt_err);
+ mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, true,
+ cmd_status_rsp, &mgmt_err);
return;
}
@@ -1954,7 +1946,7 @@ static void set_ssp_complete(struct hci_dev *hdev, void *data, int err)
changed = hci_dev_test_and_clear_flag(hdev, HCI_SSP_ENABLED);
}
- mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, settings_rsp, &match);
+ mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, true, settings_rsp, &match);
if (changed)
new_settings(hdev, match.sk);
@@ -2074,12 +2066,12 @@ static void set_le_complete(struct hci_dev *hdev, void *data, int err)
bt_dev_dbg(hdev, "err %d", err);
if (status) {
- mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, cmd_status_rsp,
- &status);
+ mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, true, cmd_status_rsp,
+ &status);
return;
}
- mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, settings_rsp, &match);
+ mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, true, settings_rsp, &match);
new_settings(hdev, match.sk);
@@ -2138,7 +2130,7 @@ static void set_mesh_complete(struct hci_dev *hdev, void *data, int err)
struct sock *sk = cmd->sk;
if (status) {
- mgmt_pending_foreach(MGMT_OP_SET_MESH_RECEIVER, hdev,
+ mgmt_pending_foreach(MGMT_OP_SET_MESH_RECEIVER, hdev, true,
cmd_status_rsp, &status);
return;
}
@@ -2160,6 +2152,9 @@ static int set_mesh_sync(struct hci_dev *hdev, void *data)
else
hci_dev_clear_flag(hdev, HCI_MESH);
+ hdev->le_scan_interval = __le16_to_cpu(cp->period);
+ hdev->le_scan_window = __le16_to_cpu(cp->window);
+
len -= sizeof(*cp);
/* If filters don't fit, forward all adv pkts */
@@ -2174,6 +2169,7 @@ static int set_mesh(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
{
struct mgmt_cp_set_mesh *cp = data;
struct mgmt_pending_cmd *cmd;
+ __u16 period, window;
int err = 0;
bt_dev_dbg(hdev, "sock %p", sk);
@@ -2187,6 +2183,23 @@ static int set_mesh(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER,
MGMT_STATUS_INVALID_PARAMS);
+ /* Keep allowed ranges in sync with set_scan_params() */
+ period = __le16_to_cpu(cp->period);
+
+ if (period < 0x0004 || period > 0x4000)
+ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER,
+ MGMT_STATUS_INVALID_PARAMS);
+
+ window = __le16_to_cpu(cp->window);
+
+ if (window < 0x0004 || window > 0x4000)
+ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER,
+ MGMT_STATUS_INVALID_PARAMS);
+
+ if (window > period)
+ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER,
+ MGMT_STATUS_INVALID_PARAMS);
+
hci_dev_lock(hdev);
cmd = mgmt_pending_add(sk, MGMT_OP_SET_MESH_RECEIVER, hdev, data, len);
@@ -2638,7 +2651,7 @@ static void mgmt_class_complete(struct hci_dev *hdev, void *data, int err)
bt_dev_dbg(hdev, "err %d", err);
- mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode,
mgmt_status(err), hdev->dev_class, 3);
mgmt_pending_free(cmd);
@@ -3224,6 +3237,7 @@ static u8 link_to_bdaddr(u8 link_type, u8 addr_type)
switch (link_type) {
case CIS_LINK:
case BIS_LINK:
+ case PA_LINK:
case LE_LINK:
switch (addr_type) {
case ADDR_LE_DEV_PUBLIC:
@@ -3427,7 +3441,7 @@ static int pairing_complete(struct mgmt_pending_cmd *cmd, u8 status)
bacpy(&rp.addr.bdaddr, &conn->dst);
rp.addr.type = link_to_bdaddr(conn->type, conn->dst_type);
- err = mgmt_cmd_complete(cmd->sk, cmd->index, MGMT_OP_PAIR_DEVICE,
+ err = mgmt_cmd_complete(cmd->sk, cmd->hdev->id, MGMT_OP_PAIR_DEVICE,
status, &rp, sizeof(rp));
/* So we don't get further callbacks for this connection */
@@ -4292,7 +4306,7 @@ static int set_wideband_speech(struct sock *sk, struct hci_dev *hdev,
bt_dev_dbg(hdev, "sock %p", sk);
- if (!test_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks))
+ if (!hci_test_quirk(hdev, HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED))
return mgmt_cmd_status(sk, hdev->id,
MGMT_OP_SET_WIDEBAND_SPEECH,
MGMT_STATUS_NOT_SUPPORTED);
@@ -5108,24 +5122,14 @@ static void mgmt_adv_monitor_added(struct sock *sk, struct hci_dev *hdev,
mgmt_event(MGMT_EV_ADV_MONITOR_ADDED, hdev, &ev, sizeof(ev), sk);
}
-void mgmt_adv_monitor_removed(struct hci_dev *hdev, u16 handle)
+static void mgmt_adv_monitor_removed(struct sock *sk, struct hci_dev *hdev,
+ __le16 handle)
{
struct mgmt_ev_adv_monitor_removed ev;
- struct mgmt_pending_cmd *cmd;
- struct sock *sk_skip = NULL;
- struct mgmt_cp_remove_adv_monitor *cp;
-
- cmd = pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev);
- if (cmd) {
- cp = cmd->param;
- if (cp->monitor_handle)
- sk_skip = cmd->sk;
- }
-
- ev.monitor_handle = cpu_to_le16(handle);
+ ev.monitor_handle = handle;
- mgmt_event(MGMT_EV_ADV_MONITOR_REMOVED, hdev, &ev, sizeof(ev), sk_skip);
+ mgmt_event(MGMT_EV_ADV_MONITOR_REMOVED, hdev, &ev, sizeof(ev), sk);
}
static int read_adv_mon_features(struct sock *sk, struct hci_dev *hdev,
@@ -5196,7 +5200,7 @@ static void mgmt_add_adv_patterns_monitor_complete(struct hci_dev *hdev,
hci_update_passive_scan(hdev);
}
- mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode,
mgmt_status(status), &rp, sizeof(rp));
mgmt_pending_remove(cmd);
@@ -5227,8 +5231,7 @@ static int __add_adv_patterns_monitor(struct sock *sk, struct hci_dev *hdev,
if (pending_find(MGMT_OP_SET_LE, hdev) ||
pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR, hdev) ||
- pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI, hdev) ||
- pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev)) {
+ pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI, hdev)) {
status = MGMT_STATUS_BUSY;
goto unlock;
}
@@ -5398,8 +5401,7 @@ static void mgmt_remove_adv_monitor_complete(struct hci_dev *hdev,
struct mgmt_pending_cmd *cmd = data;
struct mgmt_cp_remove_adv_monitor *cp;
- if (status == -ECANCELED ||
- cmd != pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev))
+ if (status == -ECANCELED)
return;
hci_dev_lock(hdev);
@@ -5408,12 +5410,14 @@ static void mgmt_remove_adv_monitor_complete(struct hci_dev *hdev,
rp.monitor_handle = cp->monitor_handle;
- if (!status)
+ if (!status) {
+ mgmt_adv_monitor_removed(cmd->sk, hdev, cp->monitor_handle);
hci_update_passive_scan(hdev);
+ }
- mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode,
mgmt_status(status), &rp, sizeof(rp));
- mgmt_pending_remove(cmd);
+ mgmt_pending_free(cmd);
hci_dev_unlock(hdev);
bt_dev_dbg(hdev, "remove monitor %d complete, status %d",
@@ -5423,10 +5427,6 @@ static void mgmt_remove_adv_monitor_complete(struct hci_dev *hdev,
static int mgmt_remove_adv_monitor_sync(struct hci_dev *hdev, void *data)
{
struct mgmt_pending_cmd *cmd = data;
-
- if (cmd != pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev))
- return -ECANCELED;
-
struct mgmt_cp_remove_adv_monitor *cp = cmd->param;
u16 handle = __le16_to_cpu(cp->monitor_handle);
@@ -5445,14 +5445,13 @@ static int remove_adv_monitor(struct sock *sk, struct hci_dev *hdev,
hci_dev_lock(hdev);
if (pending_find(MGMT_OP_SET_LE, hdev) ||
- pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev) ||
pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR, hdev) ||
pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI, hdev)) {
status = MGMT_STATUS_BUSY;
goto unlock;
}
- cmd = mgmt_pending_add(sk, MGMT_OP_REMOVE_ADV_MONITOR, hdev, data, len);
+ cmd = mgmt_pending_new(sk, MGMT_OP_REMOVE_ADV_MONITOR, hdev, data, len);
if (!cmd) {
status = MGMT_STATUS_NO_RESOURCES;
goto unlock;
@@ -5462,7 +5461,7 @@ static int remove_adv_monitor(struct sock *sk, struct hci_dev *hdev,
mgmt_remove_adv_monitor_complete);
if (err) {
- mgmt_pending_remove(cmd);
+ mgmt_pending_free(cmd);
if (err == -ENOMEM)
status = MGMT_STATUS_NO_RESOURCES;
@@ -5792,7 +5791,7 @@ static void start_discovery_complete(struct hci_dev *hdev, void *data, int err)
cmd != pending_find(MGMT_OP_START_SERVICE_DISCOVERY, hdev))
return;
- mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(err),
+ mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err),
cmd->param, 1);
mgmt_pending_remove(cmd);
@@ -6013,7 +6012,7 @@ static void stop_discovery_complete(struct hci_dev *hdev, void *data, int err)
bt_dev_dbg(hdev, "err %d", err);
- mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(err),
+ mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err),
cmd->param, 1);
mgmt_pending_remove(cmd);
@@ -6238,7 +6237,7 @@ static void set_advertising_complete(struct hci_dev *hdev, void *data, int err)
u8 status = mgmt_status(err);
if (status) {
- mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev,
+ mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev, true,
cmd_status_rsp, &status);
return;
}
@@ -6248,7 +6247,7 @@ static void set_advertising_complete(struct hci_dev *hdev, void *data, int err)
else
hci_dev_clear_flag(hdev, HCI_ADVERTISING);
- mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev, settings_rsp,
+ mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev, true, settings_rsp,
&match);
new_settings(hdev, match.sk);
@@ -6454,6 +6453,7 @@ static int set_scan_params(struct sock *sk, struct hci_dev *hdev,
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SCAN_PARAMS,
MGMT_STATUS_NOT_SUPPORTED);
+ /* Keep allowed ranges in sync with set_mesh() */
interval = __le16_to_cpu(cp->interval);
if (interval < 0x0004 || interval > 0x4000)
@@ -6592,7 +6592,7 @@ static void set_bredr_complete(struct hci_dev *hdev, void *data, int err)
*/
hci_dev_clear_flag(hdev, HCI_BREDR_ENABLED);
- mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err);
+ mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_err);
} else {
send_settings_rsp(cmd->sk, MGMT_OP_SET_BREDR, hdev);
new_settings(hdev, cmd->sk);
@@ -6729,7 +6729,7 @@ static void set_secure_conn_complete(struct hci_dev *hdev, void *data, int err)
if (err) {
u8 mgmt_err = mgmt_status(err);
- mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err);
+ mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_err);
goto done;
}
@@ -7176,7 +7176,7 @@ static void get_conn_info_complete(struct hci_dev *hdev, void *data, int err)
rp.max_tx_power = HCI_TX_POWER_INVALID;
}
- mgmt_cmd_complete(cmd->sk, cmd->index, MGMT_OP_GET_CONN_INFO, status,
+ mgmt_cmd_complete(cmd->sk, cmd->hdev->id, MGMT_OP_GET_CONN_INFO, status,
&rp, sizeof(rp));
mgmt_pending_free(cmd);
@@ -7336,7 +7336,7 @@ static void get_clock_info_complete(struct hci_dev *hdev, void *data, int err)
}
complete:
- mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, &rp,
+ mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, status, &rp,
sizeof(rp));
mgmt_pending_free(cmd);
@@ -7934,7 +7934,7 @@ static int set_external_config(struct sock *sk, struct hci_dev *hdev,
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_EXTERNAL_CONFIG,
MGMT_STATUS_INVALID_PARAMS);
- if (!test_bit(HCI_QUIRK_EXTERNAL_CONFIG, &hdev->quirks))
+ if (!hci_test_quirk(hdev, HCI_QUIRK_EXTERNAL_CONFIG))
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_EXTERNAL_CONFIG,
MGMT_STATUS_NOT_SUPPORTED);
@@ -8586,10 +8586,10 @@ static void add_advertising_complete(struct hci_dev *hdev, void *data, int err)
rp.instance = cp->instance;
if (err)
- mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode,
mgmt_status(err));
else
- mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode,
mgmt_status(err), &rp, sizeof(rp));
add_adv_complete(hdev, cmd->sk, cp->instance, err);
@@ -8777,10 +8777,10 @@ static void add_ext_adv_params_complete(struct hci_dev *hdev, void *data,
hci_remove_adv_instance(hdev, cp->instance);
- mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode,
mgmt_status(err));
} else {
- mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode,
mgmt_status(err), &rp, sizeof(rp));
}
@@ -8927,10 +8927,10 @@ static void add_ext_adv_data_complete(struct hci_dev *hdev, void *data, int err)
rp.instance = cp->instance;
if (err)
- mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode,
mgmt_status(err));
else
- mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode,
mgmt_status(err), &rp, sizeof(rp));
mgmt_pending_free(cmd);
@@ -9089,10 +9089,10 @@ static void remove_advertising_complete(struct hci_dev *hdev, void *data,
rp.instance = cp->instance;
if (err)
- mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode,
mgmt_status(err));
else
- mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode,
MGMT_STATUS_SUCCESS, &rp, sizeof(rp));
mgmt_pending_free(cmd);
@@ -9337,7 +9337,7 @@ void mgmt_index_added(struct hci_dev *hdev)
{
struct mgmt_ev_ext_index ev;
- if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks))
+ if (hci_test_quirk(hdev, HCI_QUIRK_RAW_DEVICE))
return;
if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
@@ -9361,10 +9361,10 @@ void mgmt_index_removed(struct hci_dev *hdev)
struct mgmt_ev_ext_index ev;
struct cmd_lookup match = { NULL, hdev, MGMT_STATUS_INVALID_INDEX };
- if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks))
+ if (hci_test_quirk(hdev, HCI_QUIRK_RAW_DEVICE))
return;
- mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &match);
+ mgmt_pending_foreach(0, hdev, true, cmd_complete_rsp, &match);
if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
mgmt_index_event(MGMT_EV_UNCONF_INDEX_REMOVED, hdev, NULL, 0,
@@ -9402,7 +9402,8 @@ void mgmt_power_on(struct hci_dev *hdev, int err)
hci_update_passive_scan(hdev);
}
- mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match);
+ mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, true, settings_rsp,
+ &match);
new_settings(hdev, match.sk);
@@ -9417,7 +9418,8 @@ void __mgmt_power_off(struct hci_dev *hdev)
struct cmd_lookup match = { NULL, hdev };
u8 zero_cod[] = { 0, 0, 0 };
- mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match);
+ mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, true, settings_rsp,
+ &match);
/* If the power off is because of hdev unregistration let
* use the appropriate INVALID_INDEX status. Otherwise use
@@ -9431,7 +9433,7 @@ void __mgmt_power_off(struct hci_dev *hdev)
else
match.mgmt_status = MGMT_STATUS_NOT_POWERED;
- mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &match);
+ mgmt_pending_foreach(0, hdev, true, cmd_complete_rsp, &match);
if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0) {
mgmt_limited_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev,
@@ -9672,7 +9674,6 @@ static void unpair_device_rsp(struct mgmt_pending_cmd *cmd, void *data)
device_unpaired(hdev, &cp->addr.bdaddr, cp->addr.type, cmd->sk);
cmd->cmd_complete(cmd, 0);
- mgmt_pending_remove(cmd);
}
bool mgmt_powering_down(struct hci_dev *hdev)
@@ -9728,8 +9729,8 @@ void mgmt_disconnect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr,
struct mgmt_cp_disconnect *cp;
struct mgmt_pending_cmd *cmd;
- mgmt_pending_foreach(MGMT_OP_UNPAIR_DEVICE, hdev, unpair_device_rsp,
- hdev);
+ mgmt_pending_foreach(MGMT_OP_UNPAIR_DEVICE, hdev, true,
+ unpair_device_rsp, hdev);
cmd = pending_find(MGMT_OP_DISCONNECT, hdev);
if (!cmd)
@@ -9922,7 +9923,7 @@ void mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status)
if (status) {
u8 mgmt_err = mgmt_status(status);
- mgmt_pending_foreach(MGMT_OP_SET_LINK_SECURITY, hdev,
+ mgmt_pending_foreach(MGMT_OP_SET_LINK_SECURITY, hdev, true,
cmd_status_rsp, &mgmt_err);
return;
}
@@ -9932,8 +9933,8 @@ void mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status)
else
changed = hci_dev_test_and_clear_flag(hdev, HCI_LINK_SECURITY);
- mgmt_pending_foreach(MGMT_OP_SET_LINK_SECURITY, hdev, settings_rsp,
- &match);
+ mgmt_pending_foreach(MGMT_OP_SET_LINK_SECURITY, hdev, true,
+ settings_rsp, &match);
if (changed)
new_settings(hdev, match.sk);
@@ -9957,9 +9958,12 @@ void mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class,
{
struct cmd_lookup match = { NULL, hdev, mgmt_status(status) };
- mgmt_pending_foreach(MGMT_OP_SET_DEV_CLASS, hdev, sk_lookup, &match);
- mgmt_pending_foreach(MGMT_OP_ADD_UUID, hdev, sk_lookup, &match);
- mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, sk_lookup, &match);
+ mgmt_pending_foreach(MGMT_OP_SET_DEV_CLASS, hdev, false, sk_lookup,
+ &match);
+ mgmt_pending_foreach(MGMT_OP_ADD_UUID, hdev, false, sk_lookup,
+ &match);
+ mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, false, sk_lookup,
+ &match);
if (!status) {
mgmt_limited_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, dev_class,
@@ -10084,7 +10088,7 @@ static bool is_filter_match(struct hci_dev *hdev, s8 rssi, u8 *eir,
if (hdev->discovery.rssi != HCI_RSSI_INVALID &&
(rssi == HCI_RSSI_INVALID ||
(rssi < hdev->discovery.rssi &&
- !test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks))))
+ !hci_test_quirk(hdev, HCI_QUIRK_STRICT_DUPLICATE_FILTER))))
return false;
if (hdev->discovery.uuid_count != 0) {
@@ -10102,7 +10106,7 @@ static bool is_filter_match(struct hci_dev *hdev, s8 rssi, u8 *eir,
/* If duplicate filtering does not report RSSI changes, then restart
* scanning to ensure updated result with updated RSSI values.
*/
- if (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks)) {
+ if (hci_test_quirk(hdev, HCI_QUIRK_STRICT_DUPLICATE_FILTER)) {
/* Validate RSSI value against the RSSI threshold once more. */
if (hdev->discovery.rssi != HCI_RSSI_INVALID &&
rssi < hdev->discovery.rssi)
diff --git a/net/bluetooth/mgmt_util.c b/net/bluetooth/mgmt_util.c
index 3713ff490c65..a88a07da3947 100644
--- a/net/bluetooth/mgmt_util.c
+++ b/net/bluetooth/mgmt_util.c
@@ -217,30 +217,47 @@ int mgmt_cmd_complete(struct sock *sk, u16 index, u16 cmd, u8 status,
struct mgmt_pending_cmd *mgmt_pending_find(unsigned short channel, u16 opcode,
struct hci_dev *hdev)
{
- struct mgmt_pending_cmd *cmd;
+ struct mgmt_pending_cmd *cmd, *tmp;
+
+ mutex_lock(&hdev->mgmt_pending_lock);
- list_for_each_entry(cmd, &hdev->mgmt_pending, list) {
+ list_for_each_entry_safe(cmd, tmp, &hdev->mgmt_pending, list) {
if (hci_sock_get_channel(cmd->sk) != channel)
continue;
- if (cmd->opcode == opcode)
+
+ if (cmd->opcode == opcode) {
+ mutex_unlock(&hdev->mgmt_pending_lock);
return cmd;
+ }
}
+ mutex_unlock(&hdev->mgmt_pending_lock);
+
return NULL;
}
-void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev,
+void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev, bool remove,
void (*cb)(struct mgmt_pending_cmd *cmd, void *data),
void *data)
{
struct mgmt_pending_cmd *cmd, *tmp;
+ mutex_lock(&hdev->mgmt_pending_lock);
+
list_for_each_entry_safe(cmd, tmp, &hdev->mgmt_pending, list) {
if (opcode > 0 && cmd->opcode != opcode)
continue;
+ if (remove)
+ list_del(&cmd->list);
+
cb(cmd, data);
+
+ if (remove)
+ mgmt_pending_free(cmd);
}
+
+ mutex_unlock(&hdev->mgmt_pending_lock);
}
struct mgmt_pending_cmd *mgmt_pending_new(struct sock *sk, u16 opcode,
@@ -254,7 +271,7 @@ struct mgmt_pending_cmd *mgmt_pending_new(struct sock *sk, u16 opcode,
return NULL;
cmd->opcode = opcode;
- cmd->index = hdev->id;
+ cmd->hdev = hdev;
cmd->param = kmemdup(data, len, GFP_KERNEL);
if (!cmd->param) {
@@ -280,7 +297,9 @@ struct mgmt_pending_cmd *mgmt_pending_add(struct sock *sk, u16 opcode,
if (!cmd)
return NULL;
+ mutex_lock(&hdev->mgmt_pending_lock);
list_add_tail(&cmd->list, &hdev->mgmt_pending);
+ mutex_unlock(&hdev->mgmt_pending_lock);
return cmd;
}
@@ -294,7 +313,10 @@ void mgmt_pending_free(struct mgmt_pending_cmd *cmd)
void mgmt_pending_remove(struct mgmt_pending_cmd *cmd)
{
+ mutex_lock(&cmd->hdev->mgmt_pending_lock);
list_del(&cmd->list);
+ mutex_unlock(&cmd->hdev->mgmt_pending_lock);
+
mgmt_pending_free(cmd);
}
diff --git a/net/bluetooth/mgmt_util.h b/net/bluetooth/mgmt_util.h
index f2ba994ab1d8..024e51dd6937 100644
--- a/net/bluetooth/mgmt_util.h
+++ b/net/bluetooth/mgmt_util.h
@@ -33,7 +33,7 @@ struct mgmt_mesh_tx {
struct mgmt_pending_cmd {
struct list_head list;
u16 opcode;
- int index;
+ struct hci_dev *hdev;
void *param;
size_t param_len;
struct sock *sk;
@@ -54,7 +54,7 @@ int mgmt_cmd_complete(struct sock *sk, u16 index, u16 cmd, u8 status,
struct mgmt_pending_cmd *mgmt_pending_find(unsigned short channel, u16 opcode,
struct hci_dev *hdev);
-void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev,
+void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev, bool remove,
void (*cb)(struct mgmt_pending_cmd *cmd, void *data),
void *data);
struct mgmt_pending_cmd *mgmt_pending_add(struct sock *sk, u16 opcode,
diff --git a/net/bluetooth/msft.c b/net/bluetooth/msft.c
index 5a8ccc491b14..c560d8467669 100644
--- a/net/bluetooth/msft.c
+++ b/net/bluetooth/msft.c
@@ -989,7 +989,7 @@ static void msft_monitor_device_evt(struct hci_dev *hdev, struct sk_buff *skb)
handle_data = msft_find_handle_data(hdev, ev->monitor_handle, false);
- if (!test_bit(HCI_QUIRK_USE_MSFT_EXT_ADDRESS_FILTER, &hdev->quirks)) {
+ if (!hci_test_quirk(hdev, HCI_QUIRK_USE_MSFT_EXT_ADDRESS_FILTER)) {
if (!handle_data)
return;
mgmt_handle = handle_data->mgmt_handle;
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 3b8f39618d65..96250807b32b 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -1962,7 +1962,8 @@ static void rfcomm_accept_connection(struct rfcomm_session *s)
int err;
/* Fast check for a new connection.
- * Avoids unnesesary socket allocations. */
+ * Avoids unnecessary socket allocations.
+ */
if (list_empty(&bt_sk(sock->sk)->accept_q))
return;
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index 21a5b5535ebc..376ce6de84be 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -438,7 +438,6 @@ static int __rfcomm_release_dev(void __user *arg)
{
struct rfcomm_dev_req req;
struct rfcomm_dev *dev;
- struct tty_struct *tty;
if (copy_from_user(&req, arg, sizeof(req)))
return -EFAULT;
@@ -464,11 +463,7 @@ static int __rfcomm_release_dev(void __user *arg)
rfcomm_dlc_close(dev->dlc, 0);
/* Shut down TTY synchronously before freeing rfcomm_dev */
- tty = tty_port_tty_get(&dev->port);
- if (tty) {
- tty_vhangup(tty);
- tty_kref_put(tty);
- }
+ tty_port_tty_vhangup(&dev->port);
if (!test_bit(RFCOMM_TTY_OWNED, &dev->status))
tty_port_put(&dev->port);
@@ -980,7 +975,7 @@ static void rfcomm_tty_set_termios(struct tty_struct *tty,
baud = RFCOMM_RPN_BR_230400;
break;
default:
- /* 9600 is standard accordinag to the RFCOMM specification */
+ /* 9600 is standard according to the RFCOMM specification */
baud = RFCOMM_RPN_BR_9600;
break;
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 2945d27e75dc..d382d980fd9a 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -338,7 +338,7 @@ static int sco_connect(struct sock *sk)
hcon = hci_connect_sco(hdev, type, &sco_pi(sk)->dst,
sco_pi(sk)->setting, &sco_pi(sk)->codec,
- sk->sk_sndtimeo);
+ READ_ONCE(sk->sk_sndtimeo));
if (IS_ERR(hcon)) {
err = PTR_ERR(hcon);
goto unlock;
@@ -367,7 +367,7 @@ static int sco_connect(struct sock *sk)
sk->sk_state = BT_CONNECTED;
} else {
sk->sk_state = BT_CONNECT;
- sco_sock_set_timer(sk, sk->sk_sndtimeo);
+ sco_sock_set_timer(sk, READ_ONCE(sk->sk_sndtimeo));
}
release_sock(sk);
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 47f359f24d1f..45512b2ba951 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -1379,7 +1379,7 @@ static void smp_timeout(struct work_struct *work)
bt_dev_dbg(conn->hcon->hdev, "conn %p", conn);
- hci_disconnect(conn->hcon, HCI_ERROR_REMOTE_USER_TERM);
+ hci_disconnect(conn->hcon, HCI_ERROR_AUTH_FAILURE);
}
static struct smp_chan *smp_chan_create(struct l2cap_conn *conn)
@@ -2977,8 +2977,25 @@ static int smp_sig_channel(struct l2cap_chan *chan, struct sk_buff *skb)
if (code > SMP_CMD_MAX)
goto drop;
- if (smp && !test_and_clear_bit(code, &smp->allow_cmd))
+ if (smp && !test_and_clear_bit(code, &smp->allow_cmd)) {
+ /* If there is a context and the command is not allowed consider
+ * it a failure so the session is cleanup properly.
+ */
+ switch (code) {
+ case SMP_CMD_IDENT_INFO:
+ case SMP_CMD_IDENT_ADDR_INFO:
+ case SMP_CMD_SIGN_INFO:
+ /* 3.6.1. Key distribution and generation
+ *
+ * A device may reject a distributed key by sending the
+ * Pairing Failed command with the reason set to
+ * "Key Rejected".
+ */
+ smp_failure(conn, SMP_KEY_REJECTED);
+ break;
+ }
goto drop;
+ }
/* If we don't have a context the only allowed commands are
* pairing request and security request.
@@ -3172,7 +3189,7 @@ static void smp_ready_cb(struct l2cap_chan *chan)
/* No need to call l2cap_chan_hold() here since we already own
* the reference taken in smp_new_conn_cb(). This is just the
* first time that we tie it to a specific pointer. The code in
- * l2cap_core.c ensures that there's no risk this function wont
+ * l2cap_core.c ensures that there's no risk this function won't
* get called if smp_new_conn_cb was previously called.
*/
conn->smp = chan;
diff --git a/net/bluetooth/smp.h b/net/bluetooth/smp.h
index 87a59ec2c9f0..c5da53dfab04 100644
--- a/net/bluetooth/smp.h
+++ b/net/bluetooth/smp.h
@@ -138,6 +138,7 @@ struct smp_cmd_keypress_notify {
#define SMP_NUMERIC_COMP_FAILED 0x0c
#define SMP_BREDR_PAIRING_IN_PROGRESS 0x0d
#define SMP_CROSS_TRANSP_NOT_ALLOWED 0x0e
+#define SMP_KEY_REJECTED 0x0f
#define SMP_MIN_ENC_KEY_SIZE 7
#define SMP_MAX_ENC_KEY_SIZE 16
diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c
index f71f67c6896b..812457819b5a 100644
--- a/net/bpf/bpf_dummy_struct_ops.c
+++ b/net/bpf/bpf_dummy_struct_ops.c
@@ -171,7 +171,8 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
}
/* prog doesn't take the ownership of the reference from caller */
bpf_prog_inc(prog);
- bpf_link_init(&link->link, BPF_LINK_TYPE_STRUCT_OPS, &bpf_struct_ops_link_lops, prog);
+ bpf_link_init(&link->link, BPF_LINK_TYPE_STRUCT_OPS, &bpf_struct_ops_link_lops, prog,
+ prog->expected_attach_type);
op_idx = prog->expected_attach_type;
err = bpf_struct_ops_prepare_trampoline(tlinks, link,
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index aaf13a7d58ed..9728dbd4c66c 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -1255,7 +1255,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
headroom -= ctx->data;
}
- max_data_sz = 4096 - headroom - tailroom;
+ max_data_sz = PAGE_SIZE - headroom - tailroom;
if (size > max_data_sz) {
/* disallow live data mode for jumbo frames */
if (do_live)
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 0adeafe11a36..1885d0c315f0 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -74,9 +74,9 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
if (br->dev->addr_assign_type == NET_ADDR_SET)
break;
prechaddr_info = ptr;
- err = dev_pre_changeaddr_notify(br->dev,
- prechaddr_info->dev_addr,
- extack);
+ err = netif_pre_changeaddr_notify(br->dev,
+ prechaddr_info->dev_addr,
+ extack);
if (err)
return notifier_from_errno(err);
break;
@@ -484,3 +484,4 @@ MODULE_LICENSE("GPL");
MODULE_VERSION(BR_VERSION);
MODULE_ALIAS_RTNL_LINK("bridge");
MODULE_DESCRIPTION("Ethernet bridge driver");
+MODULE_IMPORT_NS("NETDEV_INTERNAL");
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 2450690f98cf..98c5b9c3145f 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -668,7 +668,8 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
/* Ask for permission to use this MAC address now, even if we
* don't end up choosing it below.
*/
- err = dev_pre_changeaddr_notify(br->dev, dev->dev_addr, extack);
+ err = netif_pre_changeaddr_notify(br->dev, dev->dev_addr,
+ extack);
if (err)
goto err6;
}
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 0224ef3dfec0..1377f31b719c 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -2015,10 +2015,19 @@ void br_multicast_port_ctx_init(struct net_bridge_port *port,
void br_multicast_port_ctx_deinit(struct net_bridge_mcast_port *pmctx)
{
+ struct net_bridge *br = pmctx->port->br;
+ bool del = false;
+
#if IS_ENABLED(CONFIG_IPV6)
timer_delete_sync(&pmctx->ip6_mc_router_timer);
#endif
timer_delete_sync(&pmctx->ip4_mc_router_timer);
+
+ spin_lock_bh(&br->multicast_lock);
+ del |= br_ip6_multicast_rport_del(pmctx);
+ del |= br_ip4_multicast_rport_del(pmctx);
+ br_multicast_rport_del_notify(pmctx, del);
+ spin_unlock_bh(&br->multicast_lock);
}
int br_multicast_add_port(struct net_bridge_port *port)
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 6e337937d0d7..4e2d53b27221 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -479,7 +479,7 @@ static int br_fill_ifinfo(struct sk_buff *skb,
hdr->__ifi_pad = 0;
hdr->ifi_type = dev->type;
hdr->ifi_index = dev->ifindex;
- hdr->ifi_flags = dev_get_flags(dev);
+ hdr->ifi_flags = netif_get_flags(dev);
hdr->ifi_change = 0;
if (nla_put_string(skb, IFLA_IFNAME, dev->name) ||
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index 95d7355a0407..fe3f7bbe86ee 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -17,6 +17,9 @@ static bool nbp_switchdev_can_offload_tx_fwd(const struct net_bridge_port *p,
if (!static_branch_unlikely(&br_switchdev_tx_fwd_offload))
return false;
+ if (br_multicast_igmp_type(skb))
+ return false;
+
return (p->flags & BR_TX_FWD_OFFLOAD) &&
(p->hwdom != BR_INPUT_SKB_CB(skb)->src_hwdom);
}
@@ -834,7 +837,7 @@ int br_switchdev_port_offload(struct net_bridge_port *p,
struct netdev_phys_item_id ppid;
int err;
- err = dev_get_port_parent_id(dev, &ppid, false);
+ err = netif_get_port_parent_id(dev, &ppid, false);
if (err)
return err;
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index c1176a5e02c4..cb4855ed9500 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -1026,7 +1026,7 @@ static ssize_t brforward_read(struct file *filp, struct kobject *kobj,
static const struct bin_attribute bridge_forward = {
.attr = { .name = SYSFS_BRIDGE_FDB,
.mode = 0444, },
- .read_new = brforward_read,
+ .read = brforward_read,
};
/*
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index f16bbbbb9481..60f28e4fb5c0 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -42,8 +42,8 @@ config NF_CONNTRACK_BRIDGE
# old sockopt interface and eval loop
config BRIDGE_NF_EBTABLES_LEGACY
tristate "Legacy EBTABLES support"
- depends on BRIDGE && NETFILTER_XTABLES
- default n
+ depends on BRIDGE && NETFILTER_XTABLES_LEGACY
+ default n
help
Legacy ebtables packet/frame classifier.
This is not needed if you are using ebtables over nftables
@@ -65,7 +65,7 @@ if BRIDGE_NF_EBTABLES
#
config BRIDGE_EBT_BROUTE
tristate "ebt: broute table support"
- select BRIDGE_NF_EBTABLES_LEGACY
+ depends on BRIDGE_NF_EBTABLES_LEGACY
help
The ebtables broute table is used to define rules that decide between
bridging and routing frames, giving Linux the functionality of a
@@ -76,7 +76,7 @@ config BRIDGE_EBT_BROUTE
config BRIDGE_EBT_T_FILTER
tristate "ebt: filter table support"
- select BRIDGE_NF_EBTABLES_LEGACY
+ depends on BRIDGE_NF_EBTABLES_LEGACY
help
The ebtables filter table is used to define frame filtering rules at
local input, forwarding and local output. See the man page for
@@ -86,7 +86,7 @@ config BRIDGE_EBT_T_FILTER
config BRIDGE_EBT_T_NAT
tristate "ebt: nat table support"
- select BRIDGE_NF_EBTABLES_LEGACY
+ depends on BRIDGE_NF_EBTABLES_LEGACY
help
The ebtables nat table is used to define rules that alter the MAC
source address (MAC SNAT) or the MAC destination address (MAC DNAT).
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index 20139fa1be1f..06b604cf9d58 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -351,17 +351,154 @@ int cfctrl_cancel_req(struct cflayer *layr, struct cflayer *adap_layer)
return found;
}
+static int cfctrl_link_setup(struct cfctrl *cfctrl, struct cfpkt *pkt, u8 cmdrsp)
+{
+ u8 len;
+ u8 linkid = 0;
+ enum cfctrl_srv serv;
+ enum cfctrl_srv servtype;
+ u8 endpoint;
+ u8 physlinkid;
+ u8 prio;
+ u8 tmp;
+ u8 *cp;
+ int i;
+ struct cfctrl_link_param linkparam;
+ struct cfctrl_request_info rsp, *req;
+
+ memset(&linkparam, 0, sizeof(linkparam));
+
+ tmp = cfpkt_extr_head_u8(pkt);
+
+ serv = tmp & CFCTRL_SRV_MASK;
+ linkparam.linktype = serv;
+
+ servtype = tmp >> 4;
+ linkparam.chtype = servtype;
+
+ tmp = cfpkt_extr_head_u8(pkt);
+ physlinkid = tmp & 0x07;
+ prio = tmp >> 3;
+
+ linkparam.priority = prio;
+ linkparam.phyid = physlinkid;
+ endpoint = cfpkt_extr_head_u8(pkt);
+ linkparam.endpoint = endpoint & 0x03;
+
+ switch (serv) {
+ case CFCTRL_SRV_VEI:
+ case CFCTRL_SRV_DBG:
+ if (CFCTRL_ERR_BIT & cmdrsp)
+ break;
+ /* Link ID */
+ linkid = cfpkt_extr_head_u8(pkt);
+ break;
+ case CFCTRL_SRV_VIDEO:
+ tmp = cfpkt_extr_head_u8(pkt);
+ linkparam.u.video.connid = tmp;
+ if (CFCTRL_ERR_BIT & cmdrsp)
+ break;
+ /* Link ID */
+ linkid = cfpkt_extr_head_u8(pkt);
+ break;
+
+ case CFCTRL_SRV_DATAGRAM:
+ linkparam.u.datagram.connid = cfpkt_extr_head_u32(pkt);
+ if (CFCTRL_ERR_BIT & cmdrsp)
+ break;
+ /* Link ID */
+ linkid = cfpkt_extr_head_u8(pkt);
+ break;
+ case CFCTRL_SRV_RFM:
+ /* Construct a frame, convert
+ * DatagramConnectionID
+ * to network format long and copy it out...
+ */
+ linkparam.u.rfm.connid = cfpkt_extr_head_u32(pkt);
+ cp = (u8 *) linkparam.u.rfm.volume;
+ for (tmp = cfpkt_extr_head_u8(pkt);
+ cfpkt_more(pkt) && tmp != '\0';
+ tmp = cfpkt_extr_head_u8(pkt))
+ *cp++ = tmp;
+ *cp = '\0';
+
+ if (CFCTRL_ERR_BIT & cmdrsp)
+ break;
+ /* Link ID */
+ linkid = cfpkt_extr_head_u8(pkt);
+
+ break;
+ case CFCTRL_SRV_UTIL:
+ /* Construct a frame, convert
+ * DatagramConnectionID
+ * to network format long and copy it out...
+ */
+ /* Fifosize KB */
+ linkparam.u.utility.fifosize_kb = cfpkt_extr_head_u16(pkt);
+ /* Fifosize bufs */
+ linkparam.u.utility.fifosize_bufs = cfpkt_extr_head_u16(pkt);
+ /* name */
+ cp = (u8 *) linkparam.u.utility.name;
+ caif_assert(sizeof(linkparam.u.utility.name)
+ >= UTILITY_NAME_LENGTH);
+ for (i = 0; i < UTILITY_NAME_LENGTH && cfpkt_more(pkt); i++) {
+ tmp = cfpkt_extr_head_u8(pkt);
+ *cp++ = tmp;
+ }
+ /* Length */
+ len = cfpkt_extr_head_u8(pkt);
+ linkparam.u.utility.paramlen = len;
+ /* Param Data */
+ cp = linkparam.u.utility.params;
+ while (cfpkt_more(pkt) && len--) {
+ tmp = cfpkt_extr_head_u8(pkt);
+ *cp++ = tmp;
+ }
+ if (CFCTRL_ERR_BIT & cmdrsp)
+ break;
+ /* Link ID */
+ linkid = cfpkt_extr_head_u8(pkt);
+ /* Length */
+ len = cfpkt_extr_head_u8(pkt);
+ /* Param Data */
+ cfpkt_extr_head(pkt, NULL, len);
+ break;
+ default:
+ pr_warn("Request setup, invalid type (%d)\n", serv);
+ return -1;
+ }
+
+ rsp.cmd = CFCTRL_CMD_LINK_SETUP;
+ rsp.param = linkparam;
+ spin_lock_bh(&cfctrl->info_list_lock);
+ req = cfctrl_remove_req(cfctrl, &rsp);
+
+ if (CFCTRL_ERR_BIT == (CFCTRL_ERR_BIT & cmdrsp) ||
+ cfpkt_erroneous(pkt)) {
+ pr_err("Invalid O/E bit or parse error "
+ "on CAIF control channel\n");
+ cfctrl->res.reject_rsp(cfctrl->serv.layer.up, 0,
+ req ? req->client_layer : NULL);
+ } else {
+ cfctrl->res.linksetup_rsp(cfctrl->serv.layer.up, linkid,
+ serv, physlinkid,
+ req ? req->client_layer : NULL);
+ }
+
+ kfree(req);
+
+ spin_unlock_bh(&cfctrl->info_list_lock);
+
+ return 0;
+}
+
static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
{
u8 cmdrsp;
u8 cmd;
- int ret = -1;
- u8 len;
- u8 param[255];
+ int ret = 0;
u8 linkid = 0;
struct cfctrl *cfctrl = container_obj(layer);
- struct cfctrl_request_info rsp, *req;
-
cmdrsp = cfpkt_extr_head_u8(pkt);
cmd = cmdrsp & CFCTRL_CMD_MASK;
@@ -374,150 +511,7 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
switch (cmd) {
case CFCTRL_CMD_LINK_SETUP:
- {
- enum cfctrl_srv serv;
- enum cfctrl_srv servtype;
- u8 endpoint;
- u8 physlinkid;
- u8 prio;
- u8 tmp;
- u8 *cp;
- int i;
- struct cfctrl_link_param linkparam;
- memset(&linkparam, 0, sizeof(linkparam));
-
- tmp = cfpkt_extr_head_u8(pkt);
-
- serv = tmp & CFCTRL_SRV_MASK;
- linkparam.linktype = serv;
-
- servtype = tmp >> 4;
- linkparam.chtype = servtype;
-
- tmp = cfpkt_extr_head_u8(pkt);
- physlinkid = tmp & 0x07;
- prio = tmp >> 3;
-
- linkparam.priority = prio;
- linkparam.phyid = physlinkid;
- endpoint = cfpkt_extr_head_u8(pkt);
- linkparam.endpoint = endpoint & 0x03;
-
- switch (serv) {
- case CFCTRL_SRV_VEI:
- case CFCTRL_SRV_DBG:
- if (CFCTRL_ERR_BIT & cmdrsp)
- break;
- /* Link ID */
- linkid = cfpkt_extr_head_u8(pkt);
- break;
- case CFCTRL_SRV_VIDEO:
- tmp = cfpkt_extr_head_u8(pkt);
- linkparam.u.video.connid = tmp;
- if (CFCTRL_ERR_BIT & cmdrsp)
- break;
- /* Link ID */
- linkid = cfpkt_extr_head_u8(pkt);
- break;
-
- case CFCTRL_SRV_DATAGRAM:
- linkparam.u.datagram.connid =
- cfpkt_extr_head_u32(pkt);
- if (CFCTRL_ERR_BIT & cmdrsp)
- break;
- /* Link ID */
- linkid = cfpkt_extr_head_u8(pkt);
- break;
- case CFCTRL_SRV_RFM:
- /* Construct a frame, convert
- * DatagramConnectionID
- * to network format long and copy it out...
- */
- linkparam.u.rfm.connid =
- cfpkt_extr_head_u32(pkt);
- cp = (u8 *) linkparam.u.rfm.volume;
- for (tmp = cfpkt_extr_head_u8(pkt);
- cfpkt_more(pkt) && tmp != '\0';
- tmp = cfpkt_extr_head_u8(pkt))
- *cp++ = tmp;
- *cp = '\0';
-
- if (CFCTRL_ERR_BIT & cmdrsp)
- break;
- /* Link ID */
- linkid = cfpkt_extr_head_u8(pkt);
-
- break;
- case CFCTRL_SRV_UTIL:
- /* Construct a frame, convert
- * DatagramConnectionID
- * to network format long and copy it out...
- */
- /* Fifosize KB */
- linkparam.u.utility.fifosize_kb =
- cfpkt_extr_head_u16(pkt);
- /* Fifosize bufs */
- linkparam.u.utility.fifosize_bufs =
- cfpkt_extr_head_u16(pkt);
- /* name */
- cp = (u8 *) linkparam.u.utility.name;
- caif_assert(sizeof(linkparam.u.utility.name)
- >= UTILITY_NAME_LENGTH);
- for (i = 0;
- i < UTILITY_NAME_LENGTH
- && cfpkt_more(pkt); i++) {
- tmp = cfpkt_extr_head_u8(pkt);
- *cp++ = tmp;
- }
- /* Length */
- len = cfpkt_extr_head_u8(pkt);
- linkparam.u.utility.paramlen = len;
- /* Param Data */
- cp = linkparam.u.utility.params;
- while (cfpkt_more(pkt) && len--) {
- tmp = cfpkt_extr_head_u8(pkt);
- *cp++ = tmp;
- }
- if (CFCTRL_ERR_BIT & cmdrsp)
- break;
- /* Link ID */
- linkid = cfpkt_extr_head_u8(pkt);
- /* Length */
- len = cfpkt_extr_head_u8(pkt);
- /* Param Data */
- cfpkt_extr_head(pkt, &param, len);
- break;
- default:
- pr_warn("Request setup, invalid type (%d)\n",
- serv);
- goto error;
- }
-
- rsp.cmd = cmd;
- rsp.param = linkparam;
- spin_lock_bh(&cfctrl->info_list_lock);
- req = cfctrl_remove_req(cfctrl, &rsp);
-
- if (CFCTRL_ERR_BIT == (CFCTRL_ERR_BIT & cmdrsp) ||
- cfpkt_erroneous(pkt)) {
- pr_err("Invalid O/E bit or parse error "
- "on CAIF control channel\n");
- cfctrl->res.reject_rsp(cfctrl->serv.layer.up,
- 0,
- req ? req->client_layer
- : NULL);
- } else {
- cfctrl->res.linksetup_rsp(cfctrl->serv.
- layer.up, linkid,
- serv, physlinkid,
- req ? req->
- client_layer : NULL);
- }
-
- kfree(req);
-
- spin_unlock_bh(&cfctrl->info_list_lock);
- }
+ ret = cfctrl_link_setup(cfctrl, pkt, cmdrsp);
break;
case CFCTRL_CMD_LINK_DESTROY:
linkid = cfpkt_extr_head_u8(pkt);
@@ -544,9 +538,9 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
break;
default:
pr_err("Unrecognized Control Frame\n");
+ ret = -1;
goto error;
}
- ret = 0;
error:
cfpkt_destroy(pkt);
return ret;
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 4aab7033c933..b2387a46794a 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -683,7 +683,7 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev,
pr_warn_once("PF_CAN: dropped non conform CAN skbuff: dev type %d, len %d\n",
dev->type, skb->len);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_CAN_RX_INVALID_FRAME);
return NET_RX_DROP;
}
@@ -698,7 +698,7 @@ static int canfd_rcv(struct sk_buff *skb, struct net_device *dev,
pr_warn_once("PF_CAN: dropped non conform CAN FD skbuff: dev type %d, len %d\n",
dev->type, skb->len);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_CANFD_RX_INVALID_FRAME);
return NET_RX_DROP;
}
@@ -713,7 +713,7 @@ static int canxl_rcv(struct sk_buff *skb, struct net_device *dev,
pr_warn_once("PF_CAN: dropped non conform CAN XL skbuff: dev type %d, len %d\n",
dev->type, skb->len);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_CANXL_RX_INVALID_FRAME);
return NET_RX_DROP;
}
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 6bc1cc4c94c5..5e690a2377e4 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -359,6 +359,7 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head,
unsigned int datalen = head->nframes * op->cfsiz;
int err;
unsigned int *pflags;
+ enum skb_drop_reason reason;
skb = alloc_skb(sizeof(*head) + datalen, gfp_any());
if (!skb)
@@ -413,11 +414,11 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head,
addr->can_family = AF_CAN;
addr->can_ifindex = op->rx_ifindex;
- err = sock_queue_rcv_skb(sk, skb);
+ err = sock_queue_rcv_skb_reason(sk, skb, &reason);
if (err < 0) {
struct bcm_sock *bo = bcm_sk(sk);
- kfree_skb(skb);
+ sk_skb_reason_drop(sk, skb, reason);
/* don't care about overflows in this statistic */
bo->dropped_usr_msgs++;
}
diff --git a/net/can/isotp.c b/net/can/isotp.c
index 1efa377f002e..dee1412b3c9c 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -278,6 +278,7 @@ static int isotp_send_fc(struct sock *sk, int ae, u8 flowstatus)
static void isotp_rcv_skb(struct sk_buff *skb, struct sock *sk)
{
struct sockaddr_can *addr = (struct sockaddr_can *)skb->cb;
+ enum skb_drop_reason reason;
BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct sockaddr_can));
@@ -285,8 +286,8 @@ static void isotp_rcv_skb(struct sk_buff *skb, struct sock *sk)
addr->can_family = AF_CAN;
addr->can_ifindex = skb->dev->ifindex;
- if (sock_queue_rcv_skb(sk, skb) < 0)
- kfree_skb(skb);
+ if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0)
+ sk_skb_reason_drop(sk, skb, reason);
}
static u8 padlen(u8 datalen)
diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
index 6fefe7a68761..3d8b588822f9 100644
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -311,6 +311,7 @@ static void j1939_sk_recv_one(struct j1939_sock *jsk, struct sk_buff *oskb)
{
const struct j1939_sk_buff_cb *oskcb = j1939_skb_to_cb(oskb);
struct j1939_sk_buff_cb *skcb;
+ enum skb_drop_reason reason;
struct sk_buff *skb;
if (oskb->sk == &jsk->sk)
@@ -331,8 +332,8 @@ static void j1939_sk_recv_one(struct j1939_sock *jsk, struct sk_buff *oskb)
if (skb->sk)
skcb->msg_flags |= MSG_DONTROUTE;
- if (sock_queue_rcv_skb(&jsk->sk, skb) < 0)
- kfree_skb(skb);
+ if (sock_queue_rcv_skb_reason(&jsk->sk, skb, &reason) < 0)
+ sk_skb_reason_drop(&jsk->sk, skb, reason);
}
bool j1939_sk_recv_match(struct j1939_priv *priv, struct j1939_sk_buff_cb *skcb)
diff --git a/net/can/raw.c b/net/can/raw.c
index 020f21430b1d..76b867d21def 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -129,6 +129,7 @@ static void raw_rcv(struct sk_buff *oskb, void *data)
{
struct sock *sk = (struct sock *)data;
struct raw_sock *ro = raw_sk(sk);
+ enum skb_drop_reason reason;
struct sockaddr_can *addr;
struct sk_buff *skb;
unsigned int *pflags;
@@ -205,8 +206,8 @@ static void raw_rcv(struct sk_buff *oskb, void *data)
if (oskb->sk == sk)
*pflags |= MSG_CONFIRM;
- if (sock_queue_rcv_skb(sk, skb) < 0)
- kfree_skb(skb);
+ if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0)
+ sk_skb_reason_drop(sk, skb, reason);
}
static int raw_enable_filters(struct net *net, struct net_device *dev,
diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c
index bd608ffa0627..5483b4eed94e 100644
--- a/net/ceph/messenger_v2.c
+++ b/net/ceph/messenger_v2.c
@@ -793,8 +793,8 @@ static int setup_crypto(struct ceph_connection *con,
return 0; /* auth_x, secure mode */
}
-static int hmac_sha256(struct ceph_connection *con, const struct kvec *kvecs,
- int kvec_cnt, u8 *hmac)
+static int ceph_hmac_sha256(struct ceph_connection *con,
+ const struct kvec *kvecs, int kvec_cnt, u8 *hmac)
{
SHASH_DESC_ON_STACK(desc, con->v2.hmac_tfm); /* tfm arg is ignored */
int ret;
@@ -1462,8 +1462,8 @@ static int prepare_auth_signature(struct ceph_connection *con)
if (!buf)
return -ENOMEM;
- ret = hmac_sha256(con, con->v2.in_sign_kvecs, con->v2.in_sign_kvec_cnt,
- CTRL_BODY(buf));
+ ret = ceph_hmac_sha256(con, con->v2.in_sign_kvecs,
+ con->v2.in_sign_kvec_cnt, CTRL_BODY(buf));
if (ret)
return ret;
@@ -2460,8 +2460,8 @@ static int process_auth_signature(struct ceph_connection *con,
return -EINVAL;
}
- ret = hmac_sha256(con, con->v2.out_sign_kvecs,
- con->v2.out_sign_kvec_cnt, hmac);
+ ret = ceph_hmac_sha256(con, con->v2.out_sign_kvecs,
+ con->v2.out_sign_kvec_cnt, hmac);
if (ret)
return ret;
diff --git a/net/core/dev.c b/net/core/dev.c
index be97c440ecd5..b28ce68830b2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1267,33 +1267,32 @@ struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
EXPORT_SYMBOL(dev_getfirstbyhwtype);
/**
- * __dev_get_by_flags - find any device with given flags
- * @net: the applicable net namespace
- * @if_flags: IFF_* values
- * @mask: bitmask of bits in if_flags to check
+ * netdev_get_by_flags_rcu - find any device with given flags
+ * @net: the applicable net namespace
+ * @tracker: tracking object for the acquired reference
+ * @if_flags: IFF_* values
+ * @mask: bitmask of bits in if_flags to check
+ *
+ * Search for any interface with the given flags.
*
- * Search for any interface with the given flags. Returns NULL if a device
- * is not found or a pointer to the device. Must be called inside
- * rtnl_lock(), and result refcount is unchanged.
+ * Context: rcu_read_lock() must be held.
+ * Returns: NULL if a device is not found or a pointer to the device.
*/
-
-struct net_device *__dev_get_by_flags(struct net *net, unsigned short if_flags,
- unsigned short mask)
+struct net_device *netdev_get_by_flags_rcu(struct net *net, netdevice_tracker *tracker,
+ unsigned short if_flags, unsigned short mask)
{
- struct net_device *dev, *ret;
-
- ASSERT_RTNL();
+ struct net_device *dev;
- ret = NULL;
- for_each_netdev(net, dev) {
- if (((dev->flags ^ if_flags) & mask) == 0) {
- ret = dev;
- break;
+ for_each_netdev_rcu(net, dev) {
+ if (((READ_ONCE(dev->flags) ^ if_flags) & mask) == 0) {
+ netdev_hold(dev, tracker, GFP_ATOMIC);
+ return dev;
}
}
- return ret;
+
+ return NULL;
}
-EXPORT_SYMBOL(__dev_get_by_flags);
+EXPORT_IPV6_MOD(netdev_get_by_flags_rcu);
/**
* dev_valid_name - check if name is okay for network device
@@ -1769,7 +1768,7 @@ static void __dev_close(struct net_device *dev)
list_del(&single);
}
-void dev_close_many(struct list_head *head, bool unlink)
+void netif_close_many(struct list_head *head, bool unlink)
{
struct net_device *dev, *tmp;
@@ -1787,7 +1786,7 @@ void dev_close_many(struct list_head *head, bool unlink)
list_del_init(&dev->close_list);
}
}
-EXPORT_SYMBOL(dev_close_many);
+EXPORT_SYMBOL_NS_GPL(netif_close_many, "NETDEV_INTERNAL");
void netif_close(struct net_device *dev)
{
@@ -1795,7 +1794,7 @@ void netif_close(struct net_device *dev)
LIST_HEAD(single);
list_add(&dev->close_list, &single);
- dev_close_many(&single, true);
+ netif_close_many(&single, true);
list_del(&single);
}
}
@@ -3179,7 +3178,6 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
if (dev->reg_state == NETREG_REGISTERED ||
dev->reg_state == NETREG_UNREGISTERING) {
- ASSERT_RTNL();
netdev_ops_assert_locked(dev);
rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,
@@ -3229,7 +3227,6 @@ int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
return -EINVAL;
if (dev->reg_state == NETREG_REGISTERED) {
- ASSERT_RTNL();
netdev_ops_assert_locked(dev);
rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues,
@@ -4028,7 +4025,10 @@ static void qdisc_pkt_len_init(struct sk_buff *skb)
unsigned int hdr_len;
/* mac layer + network layer */
- hdr_len = skb_transport_offset(skb);
+ if (!skb->encapsulation)
+ hdr_len = skb_transport_offset(skb);
+ else
+ hdr_len = skb_inner_transport_offset(skb);
/* + transport layer */
if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
@@ -4798,7 +4798,7 @@ static inline void ____napi_schedule(struct softnet_data *sd,
if (test_bit(NAPI_STATE_THREADED, &napi->state)) {
/* Paired with smp_mb__before_atomic() in
- * napi_enable()/dev_set_threaded().
+ * napi_enable()/netif_set_threaded().
* Use READ_ONCE() to guarantee a complete
* read on napi->thread. Only call
* wake_up_process() when it's not NULL.
@@ -5749,6 +5749,7 @@ static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
struct packet_type **ppt_prev)
{
+ enum skb_drop_reason drop_reason = SKB_DROP_REASON_UNHANDLED_PROTO;
struct packet_type *ptype, *pt_prev;
rx_handler_func_t *rx_handler;
struct sk_buff *skb = *pskb;
@@ -5840,8 +5841,10 @@ skip_taps:
#endif
skb_reset_redirect(skb);
skip_classify:
- if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
+ if (pfmemalloc && !skb_pfmemalloc_protocol(skb)) {
+ drop_reason = SKB_DROP_REASON_PFMEMALLOC;
goto drop;
+ }
if (skb_vlan_tag_present(skb)) {
if (pt_prev) {
@@ -5939,8 +5942,6 @@ check_vlan_id:
}
if (pt_prev) {
- if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
- goto drop;
*ppt_prev = pt_prev;
} else {
drop:
@@ -5948,7 +5949,8 @@ drop:
dev_core_stats_rx_dropped_inc(skb->dev);
else
dev_core_stats_rx_nohandler_inc(skb->dev);
- kfree_skb_reason(skb, SKB_DROP_REASON_UNHANDLED_PROTO);
+
+ kfree_skb_reason(skb, drop_reason);
/* Jamal, now you will not able to escape explaining
* me how you were going to use this. :-)
*/
@@ -6576,8 +6578,7 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
* it, we need to bound somehow the time packets are kept in
* the GRO layer.
*/
- gro_flush(&n->gro, !!timeout);
- gro_normal_list(&n->gro);
+ gro_flush_normal(&n->gro, !!timeout);
if (unlikely(!list_empty(&n->poll_list))) {
/* If n->poll_list is not empty, we need to mask irqs */
@@ -6647,8 +6648,7 @@ static void __busy_poll_stop(struct napi_struct *napi, bool skip_schedule)
}
/* Flush too old packets. If HZ < 1000, flush all packets */
- gro_flush(&napi->gro, HZ >= 1000);
- gro_normal_list(&napi->gro);
+ gro_flush_normal(&napi->gro, HZ >= 1000);
clear_bit(NAPI_STATE_SCHED, &napi->state);
}
@@ -6926,22 +6926,83 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
return HRTIMER_NORESTART;
}
-int dev_set_threaded(struct net_device *dev, bool threaded)
+static void napi_stop_kthread(struct napi_struct *napi)
+{
+ unsigned long val, new;
+
+ /* Wait until the napi STATE_THREADED is unset. */
+ while (true) {
+ val = READ_ONCE(napi->state);
+
+ /* If napi kthread own this napi or the napi is idle,
+ * STATE_THREADED can be unset here.
+ */
+ if ((val & NAPIF_STATE_SCHED_THREADED) ||
+ !(val & NAPIF_STATE_SCHED)) {
+ new = val & (~NAPIF_STATE_THREADED);
+ } else {
+ msleep(20);
+ continue;
+ }
+
+ if (try_cmpxchg(&napi->state, &val, new))
+ break;
+ }
+
+ /* Once STATE_THREADED is unset, wait for SCHED_THREADED to be unset by
+ * the kthread.
+ */
+ while (true) {
+ if (!test_bit(NAPIF_STATE_SCHED_THREADED, &napi->state))
+ break;
+
+ msleep(20);
+ }
+
+ kthread_stop(napi->thread);
+ napi->thread = NULL;
+}
+
+int napi_set_threaded(struct napi_struct *napi,
+ enum netdev_napi_threaded threaded)
+{
+ if (threaded) {
+ if (!napi->thread) {
+ int err = napi_kthread_create(napi);
+
+ if (err)
+ return err;
+ }
+ }
+
+ if (napi->config)
+ napi->config->threaded = threaded;
+
+ if (!threaded && napi->thread) {
+ napi_stop_kthread(napi);
+ } else {
+ /* Make sure kthread is created before THREADED bit is set. */
+ smp_mb__before_atomic();
+ assign_bit(NAPI_STATE_THREADED, &napi->state, threaded);
+ }
+
+ return 0;
+}
+
+int netif_set_threaded(struct net_device *dev,
+ enum netdev_napi_threaded threaded)
{
struct napi_struct *napi;
int err = 0;
netdev_assert_locked_or_invisible(dev);
- if (dev->threaded == threaded)
- return 0;
-
if (threaded) {
list_for_each_entry(napi, &dev->napi_list, dev_list) {
if (!napi->thread) {
err = napi_kthread_create(napi);
if (err) {
- threaded = false;
+ threaded = NETDEV_NAPI_THREADED_DISABLED;
break;
}
}
@@ -6961,12 +7022,32 @@ int dev_set_threaded(struct net_device *dev, bool threaded)
* softirq mode will happen in the next round of napi_schedule().
* This should not cause hiccups/stalls to the live traffic.
*/
- list_for_each_entry(napi, &dev->napi_list, dev_list)
- assign_bit(NAPI_STATE_THREADED, &napi->state, threaded);
+ list_for_each_entry(napi, &dev->napi_list, dev_list) {
+ if (!threaded && napi->thread)
+ napi_stop_kthread(napi);
+ else
+ assign_bit(NAPI_STATE_THREADED, &napi->state, threaded);
+ }
return err;
}
-EXPORT_SYMBOL(dev_set_threaded);
+
+/**
+ * netif_threaded_enable() - enable threaded NAPIs
+ * @dev: net_device instance
+ *
+ * Enable threaded mode for the NAPI instances of the device. This may be useful
+ * for devices where multiple NAPI instances get scheduled by a single
+ * interrupt. Threaded NAPI allows moving the NAPI processing to cores other
+ * than the core where IRQ is mapped.
+ *
+ * This function should be called before @dev is registered.
+ */
+void netif_threaded_enable(struct net_device *dev)
+{
+ WARN_ON_ONCE(netif_set_threaded(dev, NETDEV_NAPI_THREADED_ENABLED));
+}
+EXPORT_SYMBOL(netif_threaded_enable);
/**
* netif_queue_set_napi - Associate queue with the napi
@@ -7182,6 +7263,8 @@ static void napi_restore_config(struct napi_struct *n)
napi_hash_add(n);
n->config->napi_id = n->napi_id;
}
+
+ WARN_ON_ONCE(napi_set_threaded(n, n->config->threaded));
}
static void napi_save_config(struct napi_struct *n)
@@ -7279,7 +7362,7 @@ void netif_napi_add_weight_locked(struct net_device *dev,
* threaded mode will not be enabled in napi_enable().
*/
if (dev->threaded && napi_kthread_create(napi))
- dev->threaded = false;
+ dev->threaded = NETDEV_NAPI_THREADED_DISABLED;
netif_napi_set_irq_locked(napi, -1);
}
EXPORT_SYMBOL(netif_napi_add_weight_locked);
@@ -7448,8 +7531,7 @@ static int __napi_poll(struct napi_struct *n, bool *repoll)
}
/* Flush too old packets. If HZ < 1000, flush all packets */
- gro_flush(&n->gro, HZ >= 1000);
- gro_normal_list(&n->gro);
+ gro_flush_normal(&n->gro, HZ >= 1000);
/* Some drivers may have called napi_schedule
* prior to exhausting their budget.
@@ -9387,12 +9469,12 @@ void dev_set_rx_mode(struct net_device *dev)
}
/**
- * dev_get_flags - get flags reported to userspace
- * @dev: device
+ * netif_get_flags() - get flags reported to userspace
+ * @dev: device
*
- * Get the combination of flag bits exported through APIs to userspace.
+ * Get the combination of flag bits exported through APIs to userspace.
*/
-unsigned int dev_get_flags(const struct net_device *dev)
+unsigned int netif_get_flags(const struct net_device *dev)
{
unsigned int flags;
@@ -9415,7 +9497,7 @@ unsigned int dev_get_flags(const struct net_device *dev)
return flags;
}
-EXPORT_SYMBOL(dev_get_flags);
+EXPORT_SYMBOL(netif_get_flags);
int __dev_change_flags(struct net_device *dev, unsigned int flags,
struct netlink_ext_ack *extack)
@@ -9527,7 +9609,7 @@ int netif_change_flags(struct net_device *dev, unsigned int flags,
return ret;
}
-int __dev_set_mtu(struct net_device *dev, int new_mtu)
+int __netif_set_mtu(struct net_device *dev, int new_mtu)
{
const struct net_device_ops *ops = dev->netdev_ops;
@@ -9538,7 +9620,7 @@ int __dev_set_mtu(struct net_device *dev, int new_mtu)
WRITE_ONCE(dev->mtu, new_mtu);
return 0;
}
-EXPORT_SYMBOL(__dev_set_mtu);
+EXPORT_SYMBOL_NS_GPL(__netif_set_mtu, "NETDEV_INTERNAL");
int dev_validate_mtu(struct net_device *dev, int new_mtu,
struct netlink_ext_ack *extack)
@@ -9557,18 +9639,22 @@ int dev_validate_mtu(struct net_device *dev, int new_mtu,
}
/**
- * netif_set_mtu_ext - Change maximum transfer unit
- * @dev: device
- * @new_mtu: new transfer unit
- * @extack: netlink extended ack
+ * netif_set_mtu_ext() - Change maximum transfer unit
+ * @dev: device
+ * @new_mtu: new transfer unit
+ * @extack: netlink extended ack
+ *
+ * Change the maximum transfer size of the network device.
*
- * Change the maximum transfer size of the network device.
+ * Return: 0 on success, -errno on failure.
*/
int netif_set_mtu_ext(struct net_device *dev, int new_mtu,
struct netlink_ext_ack *extack)
{
int err, orig_mtu;
+ netdev_ops_assert_locked(dev);
+
if (new_mtu == dev->mtu)
return 0;
@@ -9585,7 +9671,7 @@ int netif_set_mtu_ext(struct net_device *dev, int new_mtu,
return err;
orig_mtu = dev->mtu;
- err = __dev_set_mtu(dev, new_mtu);
+ err = __netif_set_mtu(dev, new_mtu);
if (!err) {
err = call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev,
@@ -9595,7 +9681,7 @@ int netif_set_mtu_ext(struct net_device *dev, int new_mtu,
/* setting mtu back and notifying everyone again,
* so that they have a chance to revert changes.
*/
- __dev_set_mtu(dev, orig_mtu);
+ __netif_set_mtu(dev, orig_mtu);
call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev,
new_mtu);
}
@@ -9649,13 +9735,15 @@ void netif_set_group(struct net_device *dev, int new_group)
}
/**
- * dev_pre_changeaddr_notify - Call NETDEV_PRE_CHANGEADDR.
- * @dev: device
- * @addr: new address
- * @extack: netlink extended ack
+ * netif_pre_changeaddr_notify() - Call NETDEV_PRE_CHANGEADDR.
+ * @dev: device
+ * @addr: new address
+ * @extack: netlink extended ack
+ *
+ * Return: 0 on success, -errno on failure.
*/
-int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr,
- struct netlink_ext_ack *extack)
+int netif_pre_changeaddr_notify(struct net_device *dev, const char *addr,
+ struct netlink_ext_ack *extack)
{
struct netdev_notifier_pre_changeaddr_info info = {
.info.dev = dev,
@@ -9667,7 +9755,7 @@ int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr,
rc = call_netdevice_notifiers_info(NETDEV_PRE_CHANGEADDR, &info.info);
return notifier_to_errno(rc);
}
-EXPORT_SYMBOL(dev_pre_changeaddr_notify);
+EXPORT_SYMBOL_NS_GPL(netif_pre_changeaddr_notify, "NETDEV_INTERNAL");
int netif_set_mac_address(struct net_device *dev, struct sockaddr_storage *ss,
struct netlink_ext_ack *extack)
@@ -9681,7 +9769,7 @@ int netif_set_mac_address(struct net_device *dev, struct sockaddr_storage *ss,
return -EINVAL;
if (!netif_device_present(dev))
return -ENODEV;
- err = dev_pre_changeaddr_notify(dev, ss->__data, extack);
+ err = netif_pre_changeaddr_notify(dev, ss->__data, extack);
if (err)
return err;
if (memcmp(dev->dev_addr, ss->__data, dev->addr_len)) {
@@ -9698,7 +9786,7 @@ int netif_set_mac_address(struct net_device *dev, struct sockaddr_storage *ss,
DECLARE_RWSEM(dev_addr_sem);
/* "sa" is a true struct sockaddr with limited "sa_data" member. */
-int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name)
+int netif_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name)
{
size_t size = sizeof(sa->sa_data_min);
struct net_device *dev;
@@ -9724,7 +9812,7 @@ unlock:
up_read(&dev_addr_sem);
return ret;
}
-EXPORT_SYMBOL(dev_get_mac_address);
+EXPORT_SYMBOL_NS_GPL(netif_get_mac_address, "NETDEV_INTERNAL");
int netif_change_carrier(struct net_device *dev, bool new_carrier)
{
@@ -9777,16 +9865,17 @@ int dev_get_phys_port_name(struct net_device *dev,
}
/**
- * dev_get_port_parent_id - Get the device's port parent identifier
- * @dev: network device
- * @ppid: pointer to a storage for the port's parent identifier
- * @recurse: allow/disallow recursion to lower devices
+ * netif_get_port_parent_id() - Get the device's port parent identifier
+ * @dev: network device
+ * @ppid: pointer to a storage for the port's parent identifier
+ * @recurse: allow/disallow recursion to lower devices
+ *
+ * Get the devices's port parent identifier.
*
- * Get the devices's port parent identifier
+ * Return: 0 on success, -errno on failure.
*/
-int dev_get_port_parent_id(struct net_device *dev,
- struct netdev_phys_item_id *ppid,
- bool recurse)
+int netif_get_port_parent_id(struct net_device *dev,
+ struct netdev_phys_item_id *ppid, bool recurse)
{
const struct net_device_ops *ops = dev->netdev_ops;
struct netdev_phys_item_id first = { };
@@ -9805,7 +9894,7 @@ int dev_get_port_parent_id(struct net_device *dev,
return err;
netdev_for_each_lower_dev(dev, lower_dev, iter) {
- err = dev_get_port_parent_id(lower_dev, ppid, true);
+ err = netif_get_port_parent_id(lower_dev, ppid, true);
if (err)
break;
if (!first.id_len)
@@ -9816,7 +9905,7 @@ int dev_get_port_parent_id(struct net_device *dev,
return err;
}
-EXPORT_SYMBOL(dev_get_port_parent_id);
+EXPORT_SYMBOL(netif_get_port_parent_id);
/**
* netdev_port_same_parent_id - Indicate if two network devices have
@@ -9829,8 +9918,8 @@ bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b)
struct netdev_phys_item_id a_id = { };
struct netdev_phys_item_id b_id = { };
- if (dev_get_port_parent_id(a, &a_id, true) ||
- dev_get_port_parent_id(b, &b_id, true))
+ if (netif_get_port_parent_id(a, &a_id, true) ||
+ netif_get_port_parent_id(b, &b_id, true))
return false;
return netdev_phys_item_id_same(&a_id, &b_id);
@@ -10364,7 +10453,8 @@ int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
goto unlock;
}
- bpf_link_init(&link->link, BPF_LINK_TYPE_XDP, &bpf_xdp_link_lops, prog);
+ bpf_link_init(&link->link, BPF_LINK_TYPE_XDP, &bpf_xdp_link_lops, prog,
+ attr->link_create.attach_type);
link->dev = dev;
link->flags = attr->link_create.flags;
@@ -10730,12 +10820,14 @@ sync_lower:
* *before* calling udp_tunnel_get_rx_info,
* but *after* calling udp_tunnel_drop_rx_info.
*/
+ udp_tunnel_nic_lock(dev);
if (features & NETIF_F_RX_UDP_TUNNEL_PORT) {
dev->features = features;
udp_tunnel_get_rx_info(dev);
} else {
udp_tunnel_drop_rx_info(dev);
}
+ udp_tunnel_nic_unlock(dev);
}
if (diff & NETIF_F_HW_VLAN_CTAG_FILTER) {
@@ -11715,7 +11807,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev->priv_len = sizeof_priv;
- ref_tracker_dir_init(&dev->refcnt_tracker, 128, name);
+ ref_tracker_dir_init(&dev->refcnt_tracker, 128, "netdev");
#ifdef CONFIG_PCPU_DEV_REFCNT
dev->pcpu_refcnt = alloc_percpu(int);
if (!dev->pcpu_refcnt)
@@ -11937,21 +12029,8 @@ static void netdev_rss_contexts_free(struct net_device *dev)
mutex_lock(&dev->ethtool->rss_lock);
xa_for_each(&dev->ethtool->rss_ctx, context, ctx) {
- struct ethtool_rxfh_param rxfh;
-
- rxfh.indir = ethtool_rxfh_context_indir(ctx);
- rxfh.key = ethtool_rxfh_context_key(ctx);
- rxfh.hfunc = ctx->hfunc;
- rxfh.input_xfrm = ctx->input_xfrm;
- rxfh.rss_context = context;
- rxfh.rss_delete = true;
-
xa_erase(&dev->ethtool->rss_ctx, context);
- if (dev->ethtool_ops->create_rxfh_context)
- dev->ethtool_ops->remove_rxfh_context(dev, ctx,
- context, NULL);
- else
- dev->ethtool_ops->set_rxfh(dev, &rxfh, NULL);
+ dev->ethtool_ops->remove_rxfh_context(dev, ctx, context, NULL);
kfree(ctx);
}
xa_destroy(&dev->ethtool->rss_ctx);
@@ -12036,7 +12115,7 @@ void unregister_netdevice_many_notify(struct list_head *head,
netdev_lock(dev);
}
}
- dev_close_many(&close_head, true);
+ netif_close_many(&close_head, true);
/* ... now unlock them and go over the rest. */
list_for_each_entry(dev, head, unreg_list) {
if (netdev_need_ops_lock(dev))
@@ -12044,7 +12123,7 @@ void unregister_netdevice_many_notify(struct list_head *head,
else
list_add_tail(&dev->close_list, &close_head);
}
- dev_close_many(&close_head, true);
+ netif_close_many(&close_head, true);
list_for_each_entry(dev, head, unreg_list) {
/* And unlink it from device chain. */
diff --git a/net/core/dev.h b/net/core/dev.h
index e93f36b7ddf3..ab69edc0c3e3 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -315,6 +315,20 @@ static inline void napi_set_irq_suspend_timeout(struct napi_struct *n,
WRITE_ONCE(n->irq_suspend_timeout, timeout);
}
+static inline enum netdev_napi_threaded napi_get_threaded(struct napi_struct *n)
+{
+ if (test_bit(NAPI_STATE_THREADED, &n->state))
+ return NETDEV_NAPI_THREADED_ENABLED;
+
+ return NETDEV_NAPI_THREADED_DISABLED;
+}
+
+int napi_set_threaded(struct napi_struct *n,
+ enum netdev_napi_threaded threaded);
+
+int netif_set_threaded(struct net_device *dev,
+ enum netdev_napi_threaded threaded);
+
int rps_cpumask_housekeeping(struct cpumask *mask);
#if defined(CONFIG_DEBUG_NET) && defined(CONFIG_BPF_SYSCALL)
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 90716bd736f3..76c91f224886 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -603,7 +603,7 @@ int dev_addr_add(struct net_device *dev, const unsigned char *addr,
ASSERT_RTNL();
- err = dev_pre_changeaddr_notify(dev, addr, NULL);
+ err = netif_pre_changeaddr_notify(dev, addr, NULL);
if (err)
return err;
err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type);
diff --git a/net/core/dev_api.c b/net/core/dev_api.c
index 1bf0153195f2..f28852078aa6 100644
--- a/net/core/dev_api.c
+++ b/net/core/dev_api.c
@@ -367,3 +367,16 @@ void netdev_state_change(struct net_device *dev)
netdev_unlock_ops(dev);
}
EXPORT_SYMBOL(netdev_state_change);
+
+int dev_set_threaded(struct net_device *dev,
+ enum netdev_napi_threaded threaded)
+{
+ int ret;
+
+ netdev_lock(dev);
+ ret = netif_set_threaded(dev, threaded);
+ netdev_unlock(dev);
+
+ return ret;
+}
+EXPORT_SYMBOL(dev_set_threaded);
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 616479e71466..9c0ad7f4b5d8 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -147,7 +147,7 @@ static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cm
switch (cmd) {
case SIOCGIFFLAGS: /* Get interface flags */
- ifr->ifr_flags = (short) dev_get_flags(dev);
+ ifr->ifr_flags = (short)netif_get_flags(dev);
return 0;
case SIOCGIFMETRIC: /* Get the metric on the interface
@@ -728,7 +728,8 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr,
switch (cmd) {
case SIOCGIFHWADDR:
dev_load(net, ifr->ifr_name);
- ret = dev_get_mac_address(&ifr->ifr_hwaddr, net, ifr->ifr_name);
+ ret = netif_get_mac_address(&ifr->ifr_hwaddr, net,
+ ifr->ifr_name);
if (colon)
*colon = ':';
return ret;
diff --git a/net/core/dst.c b/net/core/dst.c
index 795ca07e28a4..e2de8b68c41d 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -145,12 +145,12 @@ void dst_dev_put(struct dst_entry *dst)
{
struct net_device *dev = dst->dev;
- dst->obsolete = DST_OBSOLETE_DEAD;
+ WRITE_ONCE(dst->obsolete, DST_OBSOLETE_DEAD);
if (dst->ops->ifdown)
dst->ops->ifdown(dst, dev);
- dst->input = dst_discard;
- dst->output = dst_discard_out;
- dst->dev = blackhole_netdev;
+ WRITE_ONCE(dst->input, dst_discard);
+ WRITE_ONCE(dst->output, dst_discard_out);
+ WRITE_ONCE(dst->dev, blackhole_netdev);
netdev_ref_replace(dev, blackhole_netdev, &dst->dev_tracker,
GFP_ATOMIC);
}
@@ -263,7 +263,7 @@ unsigned int dst_blackhole_mtu(const struct dst_entry *dst)
{
unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
- return mtu ? : dst->dev->mtu;
+ return mtu ? : dst_dev(dst)->mtu;
}
EXPORT_SYMBOL_GPL(dst_blackhole_mtu);
diff --git a/net/core/dst_cache.c b/net/core/dst_cache.c
index 93a04d18e505..9ab4902324e1 100644
--- a/net/core/dst_cache.c
+++ b/net/core/dst_cache.c
@@ -52,7 +52,7 @@ static struct dst_entry *dst_cache_per_cpu_get(struct dst_cache *dst_cache,
if (unlikely(!time_after(idst->refresh_ts,
READ_ONCE(dst_cache->reset_ts)) ||
- (dst->obsolete && !dst->ops->check(dst, idst->cookie)))) {
+ (READ_ONCE(dst->obsolete) && !dst->ops->check(dst, idst->cookie)))) {
dst_cache_per_cpu_dst_set(idst, NULL, 0);
dst_release(dst);
goto fail;
diff --git a/net/core/filter.c b/net/core/filter.c
index 327ca73f9cd7..c09a85c17496 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -122,6 +122,7 @@ EXPORT_SYMBOL_GPL(copy_bpf_fprog_from_user);
* @sk: sock associated with &sk_buff
* @skb: buffer to filter
* @cap: limit on how short the eBPF program may trim the packet
+ * @reason: record drop reason on errors (negative return value)
*
* Run the eBPF program and then cut skb->data to correct size returned by
* the program. If pkt_len is 0 we toss packet. If skb->len is smaller
@@ -130,7 +131,8 @@ EXPORT_SYMBOL_GPL(copy_bpf_fprog_from_user);
* be accepted or -EPERM if the packet should be tossed.
*
*/
-int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
+int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb,
+ unsigned int cap, enum skb_drop_reason *reason)
{
int err;
struct sk_filter *filter;
@@ -142,15 +144,20 @@ int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
*/
if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC)) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_PFMEMALLOCDROP);
+ *reason = SKB_DROP_REASON_PFMEMALLOC;
return -ENOMEM;
}
err = BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb);
- if (err)
+ if (err) {
+ *reason = SKB_DROP_REASON_SOCKET_FILTER;
return err;
+ }
err = security_sock_rcv_skb(sk, skb);
- if (err)
+ if (err) {
+ *reason = SKB_DROP_REASON_SECURITY_HOOK;
return err;
+ }
rcu_read_lock();
filter = rcu_dereference(sk->sk_filter);
@@ -162,6 +169,8 @@ int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
pkt_len = bpf_prog_run_save_cb(filter->prog, skb);
skb->sk = save_sk;
err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM;
+ if (err)
+ *reason = SKB_DROP_REASON_SOCKET_FILTER;
}
rcu_read_unlock();
@@ -3233,6 +3242,13 @@ static const struct bpf_func_proto bpf_skb_vlan_pop_proto = {
.arg1_type = ARG_PTR_TO_CTX,
};
+static void bpf_skb_change_protocol(struct sk_buff *skb, u16 proto)
+{
+ skb->protocol = htons(proto);
+ if (skb_valid_dst(skb))
+ skb_dst_drop(skb);
+}
+
static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len)
{
/* Caller already did skb_cow() with len as headroom,
@@ -3329,7 +3345,7 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
}
}
- skb->protocol = htons(ETH_P_IPV6);
+ bpf_skb_change_protocol(skb, ETH_P_IPV6);
skb_clear_hash(skb);
return 0;
@@ -3359,7 +3375,7 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
}
}
- skb->protocol = htons(ETH_P_IP);
+ bpf_skb_change_protocol(skb, ETH_P_IP);
skb_clear_hash(skb);
return 0;
@@ -3550,10 +3566,10 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
/* Match skb->protocol to new outer l3 protocol */
if (skb->protocol == htons(ETH_P_IP) &&
flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
- skb->protocol = htons(ETH_P_IPV6);
+ bpf_skb_change_protocol(skb, ETH_P_IPV6);
else if (skb->protocol == htons(ETH_P_IPV6) &&
flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4)
- skb->protocol = htons(ETH_P_IP);
+ bpf_skb_change_protocol(skb, ETH_P_IP);
}
if (skb_is_gso(skb)) {
@@ -3606,10 +3622,10 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
/* Match skb->protocol to new outer l3 protocol */
if (skb->protocol == htons(ETH_P_IP) &&
flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6)
- skb->protocol = htons(ETH_P_IPV6);
+ bpf_skb_change_protocol(skb, ETH_P_IPV6);
else if (skb->protocol == htons(ETH_P_IPV6) &&
flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4)
- skb->protocol = htons(ETH_P_IP);
+ bpf_skb_change_protocol(skb, ETH_P_IP);
if (skb_is_gso(skb)) {
struct skb_shared_info *shinfo = skb_shinfo(skb);
@@ -8683,7 +8699,7 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
if (size != sizeof(__u64))
return false;
break;
- case offsetof(struct __sk_buff, sk):
+ case bpf_ctx_range_ptr(struct __sk_buff, sk):
if (type == BPF_WRITE || size != sizeof(__u64))
return false;
info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL;
@@ -9261,7 +9277,7 @@ static bool sock_addr_is_valid_access(int off, int size,
return false;
}
break;
- case offsetof(struct bpf_sock_addr, sk):
+ case bpf_ctx_range_ptr(struct bpf_sock_addr, sk):
if (type != BPF_READ)
return false;
if (size != sizeof(__u64))
@@ -9311,17 +9327,17 @@ static bool sock_ops_is_valid_access(int off, int size,
if (size != sizeof(__u64))
return false;
break;
- case offsetof(struct bpf_sock_ops, sk):
+ case bpf_ctx_range_ptr(struct bpf_sock_ops, sk):
if (size != sizeof(__u64))
return false;
info->reg_type = PTR_TO_SOCKET_OR_NULL;
break;
- case offsetof(struct bpf_sock_ops, skb_data):
+ case bpf_ctx_range_ptr(struct bpf_sock_ops, skb_data):
if (size != sizeof(__u64))
return false;
info->reg_type = PTR_TO_PACKET;
break;
- case offsetof(struct bpf_sock_ops, skb_data_end):
+ case bpf_ctx_range_ptr(struct bpf_sock_ops, skb_data_end):
if (size != sizeof(__u64))
return false;
info->reg_type = PTR_TO_PACKET_END;
@@ -9330,7 +9346,7 @@ static bool sock_ops_is_valid_access(int off, int size,
bpf_ctx_record_field_size(info, size_default);
return bpf_ctx_narrow_access_ok(off, size,
size_default);
- case offsetof(struct bpf_sock_ops, skb_hwtstamp):
+ case bpf_ctx_range(struct bpf_sock_ops, skb_hwtstamp):
if (size != sizeof(__u64))
return false;
break;
@@ -9400,17 +9416,17 @@ static bool sk_msg_is_valid_access(int off, int size,
return false;
switch (off) {
- case offsetof(struct sk_msg_md, data):
+ case bpf_ctx_range_ptr(struct sk_msg_md, data):
info->reg_type = PTR_TO_PACKET;
if (size != sizeof(__u64))
return false;
break;
- case offsetof(struct sk_msg_md, data_end):
+ case bpf_ctx_range_ptr(struct sk_msg_md, data_end):
info->reg_type = PTR_TO_PACKET_END;
if (size != sizeof(__u64))
return false;
break;
- case offsetof(struct sk_msg_md, sk):
+ case bpf_ctx_range_ptr(struct sk_msg_md, sk):
if (size != sizeof(__u64))
return false;
info->reg_type = PTR_TO_SOCKET;
@@ -11616,7 +11632,7 @@ static bool sk_lookup_is_valid_access(int off, int size,
return false;
switch (off) {
- case offsetof(struct bpf_sk_lookup, sk):
+ case bpf_ctx_range_ptr(struct bpf_sk_lookup, sk):
info->reg_type = PTR_TO_SOCKET_OR_NULL;
return size == sizeof(__u64);
diff --git a/net/core/hotdata.c b/net/core/hotdata.c
index 0bc893d5f07b..95d0a4df1006 100644
--- a/net/core/hotdata.c
+++ b/net/core/hotdata.c
@@ -2,7 +2,9 @@
#include <linux/cache.h>
#include <linux/jiffies.h>
#include <linux/list.h>
+#include <net/aligned_data.h>
#include <net/hotdata.h>
+#include <net/ip.h>
#include <net/proto_memory.h>
struct net_hotdata net_hotdata __cacheline_aligned = {
@@ -22,3 +24,6 @@ struct net_hotdata net_hotdata __cacheline_aligned = {
.sysctl_mem_pcpu_rsv = SK_MEMORY_PCPU_RESERVE
};
EXPORT_SYMBOL(net_hotdata);
+
+struct net_aligned_data net_aligned_data;
+EXPORT_IPV6_MOD(net_aligned_data);
diff --git a/net/core/ieee8021q_helpers.c b/net/core/ieee8021q_helpers.c
index 759a9b9f3f89..669b357b73b2 100644
--- a/net/core/ieee8021q_helpers.c
+++ b/net/core/ieee8021q_helpers.c
@@ -7,6 +7,11 @@
#include <net/dscp.h>
#include <net/ieee8021q.h>
+/* verify that table covers all 8 traffic types */
+#define TT_MAP_SIZE_OK(tbl) \
+ compiletime_assert(ARRAY_SIZE(tbl) == IEEE8021Q_TT_MAX, \
+ #tbl " size mismatch")
+
/* The following arrays map Traffic Types (TT) to traffic classes (TC) for
* different number of queues as shown in the example provided by
* IEEE 802.1Q-2022 in Annex I "I.3 Traffic type to traffic class mapping" and
@@ -101,51 +106,28 @@ int ieee8021q_tt_to_tc(enum ieee8021q_traffic_type tt, unsigned int num_queues)
switch (num_queues) {
case 8:
- compiletime_assert(ARRAY_SIZE(ieee8021q_8queue_tt_tc_map) !=
- IEEE8021Q_TT_MAX - 1,
- "ieee8021q_8queue_tt_tc_map != max - 1");
+ TT_MAP_SIZE_OK(ieee8021q_8queue_tt_tc_map);
return ieee8021q_8queue_tt_tc_map[tt];
case 7:
- compiletime_assert(ARRAY_SIZE(ieee8021q_7queue_tt_tc_map) !=
- IEEE8021Q_TT_MAX - 1,
- "ieee8021q_7queue_tt_tc_map != max - 1");
-
+ TT_MAP_SIZE_OK(ieee8021q_7queue_tt_tc_map);
return ieee8021q_7queue_tt_tc_map[tt];
case 6:
- compiletime_assert(ARRAY_SIZE(ieee8021q_6queue_tt_tc_map) !=
- IEEE8021Q_TT_MAX - 1,
- "ieee8021q_6queue_tt_tc_map != max - 1");
-
+ TT_MAP_SIZE_OK(ieee8021q_6queue_tt_tc_map);
return ieee8021q_6queue_tt_tc_map[tt];
case 5:
- compiletime_assert(ARRAY_SIZE(ieee8021q_5queue_tt_tc_map) !=
- IEEE8021Q_TT_MAX - 1,
- "ieee8021q_5queue_tt_tc_map != max - 1");
-
+ TT_MAP_SIZE_OK(ieee8021q_5queue_tt_tc_map);
return ieee8021q_5queue_tt_tc_map[tt];
case 4:
- compiletime_assert(ARRAY_SIZE(ieee8021q_4queue_tt_tc_map) !=
- IEEE8021Q_TT_MAX - 1,
- "ieee8021q_4queue_tt_tc_map != max - 1");
-
+ TT_MAP_SIZE_OK(ieee8021q_4queue_tt_tc_map);
return ieee8021q_4queue_tt_tc_map[tt];
case 3:
- compiletime_assert(ARRAY_SIZE(ieee8021q_3queue_tt_tc_map) !=
- IEEE8021Q_TT_MAX - 1,
- "ieee8021q_3queue_tt_tc_map != max - 1");
-
+ TT_MAP_SIZE_OK(ieee8021q_3queue_tt_tc_map);
return ieee8021q_3queue_tt_tc_map[tt];
case 2:
- compiletime_assert(ARRAY_SIZE(ieee8021q_2queue_tt_tc_map) !=
- IEEE8021Q_TT_MAX - 1,
- "ieee8021q_2queue_tt_tc_map != max - 1");
-
+ TT_MAP_SIZE_OK(ieee8021q_2queue_tt_tc_map);
return ieee8021q_2queue_tt_tc_map[tt];
case 1:
- compiletime_assert(ARRAY_SIZE(ieee8021q_1queue_tt_tc_map) !=
- IEEE8021Q_TT_MAX - 1,
- "ieee8021q_1queue_tt_tc_map != max - 1");
-
+ TT_MAP_SIZE_OK(ieee8021q_1queue_tt_tc_map);
return ieee8021q_1queue_tt_tc_map[tt];
}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 49dce9a82295..bddfa389effa 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -28,6 +28,7 @@
#include <net/neighbour.h>
#include <net/arp.h>
#include <net/dst.h>
+#include <net/ip.h>
#include <net/sock.h>
#include <net/netevent.h>
#include <net/netlink.h>
@@ -53,8 +54,8 @@ static void neigh_timer_handler(struct timer_list *t);
static void __neigh_notify(struct neighbour *n, int type, int flags,
u32 pid);
static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
-static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
- struct net_device *dev);
+static void pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
+ bool skip_perm);
#ifdef CONFIG_PROC_FS
static const struct seq_operations neigh_stat_seq_ops;
@@ -153,11 +154,12 @@ static void neigh_update_gc_list(struct neighbour *n)
if (n->dead)
goto out;
- /* remove from the gc list if new state is permanent or if neighbor
- * is externally learned; otherwise entry should be on the gc list
+ /* remove from the gc list if new state is permanent or if neighbor is
+ * externally learned / validated; otherwise entry should be on the gc
+ * list
*/
exempt_from_gc = n->nud_state & NUD_PERMANENT ||
- n->flags & NTF_EXT_LEARNED;
+ n->flags & (NTF_EXT_LEARNED | NTF_EXT_VALIDATED);
on_gc_list = !list_empty(&n->gc_list);
if (exempt_from_gc && on_gc_list) {
@@ -204,6 +206,7 @@ static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify,
ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
ndm_flags |= (flags & NEIGH_UPDATE_F_MANAGED) ? NTF_MANAGED : 0;
+ ndm_flags |= (flags & NEIGH_UPDATE_F_EXT_VALIDATED) ? NTF_EXT_VALIDATED : 0;
if ((old_flags ^ ndm_flags) & NTF_EXT_LEARNED) {
if (ndm_flags & NTF_EXT_LEARNED)
@@ -221,6 +224,14 @@ static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify,
*notify = 1;
*managed_update = true;
}
+ if ((old_flags ^ ndm_flags) & NTF_EXT_VALIDATED) {
+ if (ndm_flags & NTF_EXT_VALIDATED)
+ neigh->flags |= NTF_EXT_VALIDATED;
+ else
+ neigh->flags &= ~NTF_EXT_VALIDATED;
+ *notify = 1;
+ *gc_update = true;
+ }
}
bool neigh_remove_one(struct neighbour *n)
@@ -368,6 +379,43 @@ static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net,
}
}
+static void neigh_flush_one(struct neighbour *n)
+{
+ hlist_del_rcu(&n->hash);
+ hlist_del_rcu(&n->dev_list);
+
+ write_lock(&n->lock);
+
+ neigh_del_timer(n);
+ neigh_mark_dead(n);
+
+ if (refcount_read(&n->refcnt) != 1) {
+ /* The most unpleasant situation.
+ * We must destroy neighbour entry,
+ * but someone still uses it.
+ *
+ * The destroy will be delayed until
+ * the last user releases us, but
+ * we must kill timers etc. and move
+ * it to safe state.
+ */
+ __skb_queue_purge(&n->arp_queue);
+ n->arp_queue_len_bytes = 0;
+ WRITE_ONCE(n->output, neigh_blackhole);
+
+ if (n->nud_state & NUD_VALID)
+ n->nud_state = NUD_NOARP;
+ else
+ n->nud_state = NUD_NONE;
+
+ neigh_dbg(2, "neigh %p is stray\n", n);
+ }
+
+ write_unlock(&n->lock);
+
+ neigh_cleanup_and_release(n);
+}
+
static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
bool skip_perm)
{
@@ -378,35 +426,29 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
dev_head = neigh_get_dev_table(dev, tbl->family);
hlist_for_each_entry_safe(n, tmp, dev_head, dev_list) {
- if (skip_perm && n->nud_state & NUD_PERMANENT)
+ if (skip_perm &&
+ (n->nud_state & NUD_PERMANENT ||
+ n->flags & NTF_EXT_VALIDATED))
continue;
- hlist_del_rcu(&n->hash);
- hlist_del_rcu(&n->dev_list);
- write_lock(&n->lock);
- neigh_del_timer(n);
- neigh_mark_dead(n);
- if (refcount_read(&n->refcnt) != 1) {
- /* The most unpleasant situation.
- * We must destroy neighbour entry,
- * but someone still uses it.
- *
- * The destroy will be delayed until
- * the last user releases us, but
- * we must kill timers etc. and move
- * it to safe state.
- */
- __skb_queue_purge(&n->arp_queue);
- n->arp_queue_len_bytes = 0;
- WRITE_ONCE(n->output, neigh_blackhole);
- if (n->nud_state & NUD_VALID)
- n->nud_state = NUD_NOARP;
- else
- n->nud_state = NUD_NONE;
- neigh_dbg(2, "neigh %p is stray\n", n);
- }
- write_unlock(&n->lock);
- neigh_cleanup_and_release(n);
+ neigh_flush_one(n);
+ }
+}
+
+static void neigh_flush_table(struct neigh_table *tbl)
+{
+ struct neigh_hash_table *nht;
+ int i;
+
+ nht = rcu_dereference_protected(tbl->nht,
+ lockdep_is_held(&tbl->lock));
+
+ for (i = 0; i < (1 << nht->hash_shift); i++) {
+ struct hlist_node *tmp;
+ struct neighbour *n;
+
+ neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[i])
+ neigh_flush_one(n);
}
}
@@ -422,8 +464,15 @@ static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
bool skip_perm)
{
write_lock_bh(&tbl->lock);
- neigh_flush_dev(tbl, dev, skip_perm);
- pneigh_ifdown_and_unlock(tbl, dev);
+ if (likely(dev)) {
+ neigh_flush_dev(tbl, dev, skip_perm);
+ } else {
+ DEBUG_NET_WARN_ON_ONCE(skip_perm);
+ neigh_flush_table(tbl);
+ }
+ write_unlock_bh(&tbl->lock);
+
+ pneigh_ifdown(tbl, dev, skip_perm);
pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL,
tbl->family);
if (skb_queue_empty_lockless(&tbl->proxy_queue))
@@ -706,54 +755,53 @@ static u32 pneigh_hash(const void *pkey, unsigned int key_len)
return hash_val;
}
-static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
- struct net *net,
- const void *pkey,
- unsigned int key_len,
- struct net_device *dev)
+struct pneigh_entry *pneigh_lookup(struct neigh_table *tbl,
+ struct net *net, const void *pkey,
+ struct net_device *dev)
{
+ struct pneigh_entry *n;
+ unsigned int key_len;
+ u32 hash_val;
+
+ key_len = tbl->key_len;
+ hash_val = pneigh_hash(pkey, key_len);
+ n = rcu_dereference_check(tbl->phash_buckets[hash_val],
+ lockdep_is_held(&tbl->phash_lock));
+
while (n) {
if (!memcmp(n->key, pkey, key_len) &&
net_eq(pneigh_net(n), net) &&
(n->dev == dev || !n->dev))
return n;
- n = n->next;
- }
- return NULL;
-}
-struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
- struct net *net, const void *pkey, struct net_device *dev)
-{
- unsigned int key_len = tbl->key_len;
- u32 hash_val = pneigh_hash(pkey, key_len);
+ n = rcu_dereference_check(n->next, lockdep_is_held(&tbl->phash_lock));
+ }
- return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
- net, pkey, key_len, dev);
+ return NULL;
}
-EXPORT_SYMBOL_GPL(__pneigh_lookup);
+EXPORT_IPV6_MOD(pneigh_lookup);
-struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
- struct net *net, const void *pkey,
- struct net_device *dev, int creat)
+int pneigh_create(struct neigh_table *tbl, struct net *net,
+ const void *pkey, struct net_device *dev,
+ u32 flags, u8 protocol, bool permanent)
{
struct pneigh_entry *n;
- unsigned int key_len = tbl->key_len;
- u32 hash_val = pneigh_hash(pkey, key_len);
-
- read_lock_bh(&tbl->lock);
- n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
- net, pkey, key_len, dev);
- read_unlock_bh(&tbl->lock);
+ unsigned int key_len;
+ u32 hash_val;
+ int err = 0;
- if (n || !creat)
- goto out;
+ mutex_lock(&tbl->phash_lock);
- ASSERT_RTNL();
+ n = pneigh_lookup(tbl, net, pkey, dev);
+ if (n)
+ goto update;
+ key_len = tbl->key_len;
n = kzalloc(sizeof(*n) + key_len, GFP_KERNEL);
- if (!n)
+ if (!n) {
+ err = -ENOBUFS;
goto out;
+ }
write_pnet(&n->net, net);
memcpy(n->key, pkey, key_len);
@@ -763,73 +811,98 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
if (tbl->pconstructor && tbl->pconstructor(n)) {
netdev_put(dev, &n->dev_tracker);
kfree(n);
- n = NULL;
+ err = -ENOBUFS;
goto out;
}
- write_lock_bh(&tbl->lock);
+ hash_val = pneigh_hash(pkey, key_len);
n->next = tbl->phash_buckets[hash_val];
- tbl->phash_buckets[hash_val] = n;
- write_unlock_bh(&tbl->lock);
+ rcu_assign_pointer(tbl->phash_buckets[hash_val], n);
+update:
+ WRITE_ONCE(n->flags, flags);
+ n->permanent = permanent;
+ WRITE_ONCE(n->protocol, protocol);
out:
- return n;
+ mutex_unlock(&tbl->phash_lock);
+ return err;
}
-EXPORT_SYMBOL(pneigh_lookup);
+static void pneigh_destroy(struct rcu_head *rcu)
+{
+ struct pneigh_entry *n = container_of(rcu, struct pneigh_entry, rcu);
+
+ netdev_put(n->dev, &n->dev_tracker);
+ kfree(n);
+}
int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
struct net_device *dev)
{
- struct pneigh_entry *n, **np;
- unsigned int key_len = tbl->key_len;
- u32 hash_val = pneigh_hash(pkey, key_len);
+ struct pneigh_entry *n, __rcu **np;
+ unsigned int key_len;
+ u32 hash_val;
- write_lock_bh(&tbl->lock);
- for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
+ key_len = tbl->key_len;
+ hash_val = pneigh_hash(pkey, key_len);
+
+ mutex_lock(&tbl->phash_lock);
+
+ for (np = &tbl->phash_buckets[hash_val];
+ (n = rcu_dereference_protected(*np, 1)) != NULL;
np = &n->next) {
if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
net_eq(pneigh_net(n), net)) {
- *np = n->next;
- write_unlock_bh(&tbl->lock);
+ rcu_assign_pointer(*np, n->next);
+
+ mutex_unlock(&tbl->phash_lock);
+
if (tbl->pdestructor)
tbl->pdestructor(n);
- netdev_put(n->dev, &n->dev_tracker);
- kfree(n);
+
+ call_rcu(&n->rcu, pneigh_destroy);
return 0;
}
}
- write_unlock_bh(&tbl->lock);
+
+ mutex_unlock(&tbl->phash_lock);
return -ENOENT;
}
-static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
- struct net_device *dev)
+static void pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
+ bool skip_perm)
{
- struct pneigh_entry *n, **np, *freelist = NULL;
+ struct pneigh_entry *n, __rcu **np;
+ LIST_HEAD(head);
u32 h;
+ mutex_lock(&tbl->phash_lock);
+
for (h = 0; h <= PNEIGH_HASHMASK; h++) {
np = &tbl->phash_buckets[h];
- while ((n = *np) != NULL) {
+ while ((n = rcu_dereference_protected(*np, 1)) != NULL) {
+ if (skip_perm && n->permanent)
+ goto skip;
if (!dev || n->dev == dev) {
- *np = n->next;
- n->next = freelist;
- freelist = n;
+ rcu_assign_pointer(*np, n->next);
+ list_add(&n->free_node, &head);
continue;
}
+skip:
np = &n->next;
}
}
- write_unlock_bh(&tbl->lock);
- while ((n = freelist)) {
- freelist = n->next;
- n->next = NULL;
+
+ mutex_unlock(&tbl->phash_lock);
+
+ while (!list_empty(&head)) {
+ n = list_first_entry(&head, typeof(*n), free_node);
+ list_del(&n->free_node);
+
if (tbl->pdestructor)
tbl->pdestructor(n);
- netdev_put(n->dev, &n->dev_tracker);
- kfree(n);
+
+ call_rcu(&n->rcu, pneigh_destroy);
}
- return -ENOENT;
}
static inline void neigh_parms_put(struct neigh_parms *parms)
@@ -937,7 +1010,8 @@ static void neigh_periodic_work(struct work_struct *work)
state = n->nud_state;
if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) ||
- (n->flags & NTF_EXT_LEARNED)) {
+ (n->flags &
+ (NTF_EXT_LEARNED | NTF_EXT_VALIDATED))) {
write_unlock(&n->lock);
continue;
}
@@ -1090,9 +1164,15 @@ static void neigh_timer_handler(struct timer_list *t)
if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
- WRITE_ONCE(neigh->nud_state, NUD_FAILED);
+ if (neigh->nud_state == NUD_PROBE &&
+ neigh->flags & NTF_EXT_VALIDATED) {
+ WRITE_ONCE(neigh->nud_state, NUD_STALE);
+ neigh->updated = jiffies;
+ } else {
+ WRITE_ONCE(neigh->nud_state, NUD_FAILED);
+ neigh_invalidate(neigh);
+ }
notify = 1;
- neigh_invalidate(neigh);
goto out;
}
@@ -1240,6 +1320,8 @@ static void neigh_update_hhs(struct neighbour *neigh)
NTF_ROUTER flag.
NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
a router.
+ NEIGH_UPDATE_F_EXT_VALIDATED means that the entry will not be removed
+ or invalidated.
Caller MUST hold reference count on the entry.
*/
@@ -1402,7 +1484,8 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
* we can reinject the packet there.
*/
n2 = NULL;
- if (dst && dst->obsolete != DST_OBSOLETE_DEAD) {
+ if (dst &&
+ READ_ONCE(dst->obsolete) != DST_OBSOLETE_DEAD) {
n2 = dst_neigh_lookup_skb(dst, skb);
if (n2)
n1 = n2;
@@ -1756,6 +1839,7 @@ void neigh_table_init(int index, struct neigh_table *tbl)
WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
rwlock_init(&tbl->lock);
+ mutex_init(&tbl->phash_lock);
INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
@@ -1972,21 +2056,13 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tb[NDA_PROTOCOL])
protocol = nla_get_u8(tb[NDA_PROTOCOL]);
if (ndm_flags & NTF_PROXY) {
- struct pneigh_entry *pn;
-
- if (ndm_flags & NTF_MANAGED) {
+ if (ndm_flags & (NTF_MANAGED | NTF_EXT_VALIDATED)) {
NL_SET_ERR_MSG(extack, "Invalid NTF_* flag combination");
goto out;
}
- err = -ENOBUFS;
- pn = pneigh_lookup(tbl, net, dst, dev, 1);
- if (pn) {
- pn->flags = ndm_flags;
- if (protocol)
- pn->protocol = protocol;
- err = 0;
- }
+ err = pneigh_create(tbl, net, dst, dev, ndm_flags, protocol,
+ !!(ndm->ndm_state & NUD_PERMANENT));
goto out;
}
@@ -2004,7 +2080,8 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
if (neigh == NULL) {
bool ndm_permanent = ndm->ndm_state & NUD_PERMANENT;
bool exempt_from_gc = ndm_permanent ||
- ndm_flags & NTF_EXT_LEARNED;
+ ndm_flags & (NTF_EXT_LEARNED |
+ NTF_EXT_VALIDATED);
if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
err = -ENOENT;
@@ -2015,10 +2092,27 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
err = -EINVAL;
goto out;
}
+ if (ndm_flags & NTF_EXT_VALIDATED) {
+ u8 state = ndm->ndm_state;
+
+ /* NTF_USE and NTF_MANAGED will result in the neighbor
+ * being created with an invalid state (NUD_NONE).
+ */
+ if (ndm_flags & (NTF_USE | NTF_MANAGED))
+ state = NUD_NONE;
+
+ if (!(state & NUD_VALID)) {
+ NL_SET_ERR_MSG(extack,
+ "Cannot create externally validated neighbor with an invalid state");
+ err = -EINVAL;
+ goto out;
+ }
+ }
neigh = ___neigh_create(tbl, dst, dev,
ndm_flags &
- (NTF_EXT_LEARNED | NTF_MANAGED),
+ (NTF_EXT_LEARNED | NTF_MANAGED |
+ NTF_EXT_VALIDATED),
exempt_from_gc, true);
if (IS_ERR(neigh)) {
err = PTR_ERR(neigh);
@@ -2030,6 +2124,24 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
neigh_release(neigh);
goto out;
}
+ if (ndm_flags & NTF_EXT_VALIDATED) {
+ u8 state = ndm->ndm_state;
+
+ /* NTF_USE and NTF_MANAGED do not update the existing
+ * state other than clearing it if it was
+ * NUD_PERMANENT.
+ */
+ if (ndm_flags & (NTF_USE | NTF_MANAGED))
+ state = READ_ONCE(neigh->nud_state) & ~NUD_PERMANENT;
+
+ if (!(state & NUD_VALID)) {
+ NL_SET_ERR_MSG(extack,
+ "Cannot mark neighbor as externally validated with an invalid state");
+ err = -EINVAL;
+ neigh_release(neigh);
+ goto out;
+ }
+ }
if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
flags &= ~(NEIGH_UPDATE_F_OVERRIDE |
@@ -2046,13 +2158,13 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
flags |= NEIGH_UPDATE_F_MANAGED;
if (ndm_flags & NTF_USE)
flags |= NEIGH_UPDATE_F_USE;
+ if (ndm_flags & NTF_EXT_VALIDATED)
+ flags |= NEIGH_UPDATE_F_EXT_VALIDATED;
err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
NETLINK_CB(skb).portid, extack);
- if (!err && ndm_flags & (NTF_USE | NTF_MANAGED)) {
+ if (!err && ndm_flags & (NTF_USE | NTF_MANAGED))
neigh_event_send(neigh, NULL);
- err = 0;
- }
neigh_release(neigh);
out:
return err;
@@ -2579,13 +2691,15 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
u32 neigh_flags, neigh_flags_ext;
struct nlmsghdr *nlh;
struct ndmsg *ndm;
+ u8 protocol;
nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
if (nlh == NULL)
return -EMSGSIZE;
- neigh_flags_ext = pn->flags >> NTF_EXT_SHIFT;
- neigh_flags = pn->flags & NTF_OLD_MASK;
+ neigh_flags = READ_ONCE(pn->flags);
+ neigh_flags_ext = neigh_flags >> NTF_EXT_SHIFT;
+ neigh_flags &= NTF_OLD_MASK;
ndm = nlmsg_data(nlh);
ndm->ndm_family = tbl->family;
@@ -2599,7 +2713,8 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
goto nla_put_failure;
- if (pn->protocol && nla_put_u8(skb, NDA_PROTOCOL, pn->protocol))
+ protocol = READ_ONCE(pn->protocol);
+ if (protocol && nla_put_u8(skb, NDA_PROTOCOL, protocol))
goto nla_put_failure;
if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext))
goto nla_put_failure;
@@ -2706,12 +2821,12 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
if (filter->dev_idx || filter->master_idx)
flags |= NLM_F_DUMP_FILTERED;
- read_lock_bh(&tbl->lock);
-
for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
if (h > s_h)
s_idx = 0;
- for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
+ for (n = rcu_dereference(tbl->phash_buckets[h]), idx = 0;
+ n;
+ n = rcu_dereference(n->next)) {
if (idx < s_idx || pneigh_net(n) != net)
goto next;
if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
@@ -2720,16 +2835,13 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
err = pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
RTM_NEWNEIGH, flags, tbl);
- if (err < 0) {
- read_unlock_bh(&tbl->lock);
+ if (err < 0)
goto out;
- }
next:
idx++;
}
}
- read_unlock_bh(&tbl->lock);
out:
cb->args[3] = h;
cb->args[4] = idx;
@@ -2846,64 +2958,58 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
return err;
}
-static int neigh_valid_get_req(const struct nlmsghdr *nlh,
- struct neigh_table **tbl,
- void **dst, int *dev_idx, u8 *ndm_flags,
- struct netlink_ext_ack *extack)
+static struct ndmsg *neigh_valid_get_req(const struct nlmsghdr *nlh,
+ struct nlattr **tb,
+ struct netlink_ext_ack *extack)
{
- struct nlattr *tb[NDA_MAX + 1];
struct ndmsg *ndm;
int err, i;
ndm = nlmsg_payload(nlh, sizeof(*ndm));
if (!ndm) {
NL_SET_ERR_MSG(extack, "Invalid header for neighbor get request");
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
}
if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state ||
ndm->ndm_type) {
NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor get request");
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
}
if (ndm->ndm_flags & ~NTF_PROXY) {
NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor get request");
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (!(ndm->ndm_flags & NTF_PROXY) && !ndm->ndm_ifindex) {
+ NL_SET_ERR_MSG(extack, "No device specified");
+ return ERR_PTR(-EINVAL);
}
err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), tb,
NDA_MAX, nda_policy, extack);
if (err < 0)
- return err;
-
- *ndm_flags = ndm->ndm_flags;
- *dev_idx = ndm->ndm_ifindex;
- *tbl = neigh_find_table(ndm->ndm_family);
- if (*tbl == NULL) {
- NL_SET_ERR_MSG(extack, "Unsupported family in header for neighbor get request");
- return -EAFNOSUPPORT;
- }
+ return ERR_PTR(err);
for (i = 0; i <= NDA_MAX; ++i) {
- if (!tb[i])
- continue;
-
switch (i) {
case NDA_DST:
- if (nla_len(tb[i]) != (int)(*tbl)->key_len) {
- NL_SET_ERR_MSG(extack, "Invalid network address in neighbor get request");
- return -EINVAL;
+ if (!tb[i]) {
+ NL_SET_ERR_ATTR_MISS(extack, NULL, NDA_DST);
+ return ERR_PTR(-EINVAL);
}
- *dst = nla_data(tb[i]);
break;
default:
+ if (!tb[i])
+ continue;
+
NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor get request");
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
}
}
- return 0;
+ return ndm;
}
static inline size_t neigh_nlmsg_size(void)
@@ -2917,27 +3023,6 @@ static inline size_t neigh_nlmsg_size(void)
+ nla_total_size(1); /* NDA_PROTOCOL */
}
-static int neigh_get_reply(struct net *net, struct neighbour *neigh,
- u32 pid, u32 seq)
-{
- struct sk_buff *skb;
- int err = 0;
-
- skb = nlmsg_new(neigh_nlmsg_size(), GFP_KERNEL);
- if (!skb)
- return -ENOBUFS;
-
- err = neigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0);
- if (err) {
- kfree_skb(skb);
- goto errout;
- }
-
- err = rtnl_unicast(skb, net, pid);
-errout:
- return err;
-}
-
static inline size_t pneigh_nlmsg_size(void)
{
return NLMSG_ALIGN(sizeof(struct ndmsg))
@@ -2946,85 +3031,91 @@ static inline size_t pneigh_nlmsg_size(void)
+ nla_total_size(1); /* NDA_PROTOCOL */
}
-static int pneigh_get_reply(struct net *net, struct pneigh_entry *neigh,
- u32 pid, u32 seq, struct neigh_table *tbl)
+static int neigh_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
+ struct net *net = sock_net(in_skb->sk);
+ u32 pid = NETLINK_CB(in_skb).portid;
+ struct nlattr *tb[NDA_MAX + 1];
+ struct net_device *dev = NULL;
+ u32 seq = nlh->nlmsg_seq;
+ struct neigh_table *tbl;
+ struct neighbour *neigh;
struct sk_buff *skb;
- int err = 0;
+ struct ndmsg *ndm;
+ void *dst;
+ int err;
- skb = nlmsg_new(pneigh_nlmsg_size(), GFP_KERNEL);
+ ndm = neigh_valid_get_req(nlh, tb, extack);
+ if (IS_ERR(ndm))
+ return PTR_ERR(ndm);
+
+ if (ndm->ndm_flags & NTF_PROXY)
+ skb = nlmsg_new(neigh_nlmsg_size(), GFP_KERNEL);
+ else
+ skb = nlmsg_new(pneigh_nlmsg_size(), GFP_KERNEL);
if (!skb)
return -ENOBUFS;
- err = pneigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0, tbl);
- if (err) {
- kfree_skb(skb);
- goto errout;
- }
+ rcu_read_lock();
- err = rtnl_unicast(skb, net, pid);
-errout:
- return err;
-}
+ tbl = neigh_find_table(ndm->ndm_family);
+ if (!tbl) {
+ NL_SET_ERR_MSG(extack, "Unsupported family in header for neighbor get request");
+ err = -EAFNOSUPPORT;
+ goto err_unlock;
+ }
-static int neigh_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
- struct netlink_ext_ack *extack)
-{
- struct net *net = sock_net(in_skb->sk);
- struct net_device *dev = NULL;
- struct neigh_table *tbl = NULL;
- struct neighbour *neigh;
- void *dst = NULL;
- u8 ndm_flags = 0;
- int dev_idx = 0;
- int err;
+ if (nla_len(tb[NDA_DST]) != (int)tbl->key_len) {
+ NL_SET_ERR_MSG(extack, "Invalid network address in neighbor get request");
+ err = -EINVAL;
+ goto err_unlock;
+ }
- err = neigh_valid_get_req(nlh, &tbl, &dst, &dev_idx, &ndm_flags,
- extack);
- if (err < 0)
- return err;
+ dst = nla_data(tb[NDA_DST]);
- if (dev_idx) {
- dev = __dev_get_by_index(net, dev_idx);
+ if (ndm->ndm_ifindex) {
+ dev = dev_get_by_index_rcu(net, ndm->ndm_ifindex);
if (!dev) {
NL_SET_ERR_MSG(extack, "Unknown device ifindex");
- return -ENODEV;
+ err = -ENODEV;
+ goto err_unlock;
}
}
- if (!dst) {
- NL_SET_ERR_MSG(extack, "Network address not specified");
- return -EINVAL;
- }
-
- if (ndm_flags & NTF_PROXY) {
+ if (ndm->ndm_flags & NTF_PROXY) {
struct pneigh_entry *pn;
- pn = pneigh_lookup(tbl, net, dst, dev, 0);
+ pn = pneigh_lookup(tbl, net, dst, dev);
if (!pn) {
NL_SET_ERR_MSG(extack, "Proxy neighbour entry not found");
- return -ENOENT;
+ err = -ENOENT;
+ goto err_unlock;
}
- return pneigh_get_reply(net, pn, NETLINK_CB(in_skb).portid,
- nlh->nlmsg_seq, tbl);
- }
- if (!dev) {
- NL_SET_ERR_MSG(extack, "No device specified");
- return -EINVAL;
- }
+ err = pneigh_fill_info(skb, pn, pid, seq, RTM_NEWNEIGH, 0, tbl);
+ if (err)
+ goto err_unlock;
+ } else {
+ neigh = neigh_lookup(tbl, dst, dev);
+ if (!neigh) {
+ NL_SET_ERR_MSG(extack, "Neighbour entry not found");
+ err = -ENOENT;
+ goto err_unlock;
+ }
- neigh = neigh_lookup(tbl, dst, dev);
- if (!neigh) {
- NL_SET_ERR_MSG(extack, "Neighbour entry not found");
- return -ENOENT;
+ err = neigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0);
+ neigh_release(neigh);
+ if (err)
+ goto err_unlock;
}
- err = neigh_get_reply(net, neigh, NETLINK_CB(in_skb).portid,
- nlh->nlmsg_seq);
-
- neigh_release(neigh);
+ rcu_read_unlock();
+ return rtnl_unicast(skb, net, pid);
+err_unlock:
+ rcu_read_unlock();
+ kfree_skb(skb);
return err;
}
@@ -3231,9 +3322,10 @@ static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
state->flags |= NEIGH_SEQ_IS_PNEIGH;
for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
- pn = tbl->phash_buckets[bucket];
+ pn = rcu_dereference(tbl->phash_buckets[bucket]);
+
while (pn && !net_eq(pneigh_net(pn), net))
- pn = pn->next;
+ pn = rcu_dereference(pn->next);
if (pn)
break;
}
@@ -3251,15 +3343,17 @@ static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
struct neigh_table *tbl = state->tbl;
do {
- pn = pn->next;
+ pn = rcu_dereference(pn->next);
} while (pn && !net_eq(pneigh_net(pn), net));
while (!pn) {
if (++state->bucket > PNEIGH_HASHMASK)
break;
- pn = tbl->phash_buckets[state->bucket];
+
+ pn = rcu_dereference(tbl->phash_buckets[state->bucket]);
+
while (pn && !net_eq(pneigh_net(pn), net))
- pn = pn->next;
+ pn = rcu_dereference(pn->next);
if (pn)
break;
}
@@ -3823,7 +3917,7 @@ static const struct rtnl_msg_handler neigh_rtnl_msg_handlers[] __initconst = {
{.msgtype = RTM_NEWNEIGH, .doit = neigh_add},
{.msgtype = RTM_DELNEIGH, .doit = neigh_delete},
{.msgtype = RTM_GETNEIGH, .doit = neigh_get, .dumpit = neigh_dump_info,
- .flags = RTNL_FLAG_DUMP_UNLOCKED},
+ .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED},
{.msgtype = RTM_GETNEIGHTBL, .dumpit = neightbl_dump_info},
{.msgtype = RTM_SETNEIGHTBL, .doit = neightbl_set},
};
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 1ace0cd01adc..c28cd6665444 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -256,7 +256,7 @@ static ssize_t name_assign_type_show(struct device *dev,
}
static DEVICE_ATTR_RO(name_assign_type);
-/* use same locking rules as GIFHWADDR ioctl's (dev_get_mac_address()) */
+/* use same locking rules as GIFHWADDR ioctl's (netif_get_mac_address()) */
static ssize_t address_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
@@ -641,12 +641,6 @@ static ssize_t phys_port_id_show(struct device *dev,
struct netdev_phys_item_id ppid;
ssize_t ret;
- /* The check is also done in dev_get_phys_port_id; this helps returning
- * early without hitting the locking section below.
- */
- if (!netdev->netdev_ops->ndo_get_phys_port_id)
- return -EOPNOTSUPP;
-
ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
if (ret)
return ret;
@@ -668,13 +662,6 @@ static ssize_t phys_port_name_show(struct device *dev,
char name[IFNAMSIZ];
ssize_t ret;
- /* The checks are also done in dev_get_phys_port_name; this helps
- * returning early without hitting the locking section below.
- */
- if (!netdev->netdev_ops->ndo_get_phys_port_name &&
- !netdev->devlink_port)
- return -EOPNOTSUPP;
-
ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
if (ret)
return ret;
@@ -696,19 +683,11 @@ static ssize_t phys_switch_id_show(struct device *dev,
struct netdev_phys_item_id ppid = { };
ssize_t ret;
- /* The checks are also done in dev_get_phys_port_name; this helps
- * returning early without hitting the locking section below. This works
- * because recurse is false when calling dev_get_port_parent_id.
- */
- if (!netdev->netdev_ops->ndo_get_port_parent_id &&
- !netdev->devlink_port)
- return -EOPNOTSUPP;
-
ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev);
if (ret)
return ret;
- ret = dev_get_port_parent_id(netdev, &ppid, false);
+ ret = netif_get_port_parent_id(netdev, &ppid, false);
if (!ret)
ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id);
@@ -718,6 +697,40 @@ static ssize_t phys_switch_id_show(struct device *dev,
}
static DEVICE_ATTR_RO(phys_switch_id);
+static struct attribute *netdev_phys_attrs[] __ro_after_init = {
+ &dev_attr_phys_port_id.attr,
+ &dev_attr_phys_port_name.attr,
+ &dev_attr_phys_switch_id.attr,
+ NULL,
+};
+
+static umode_t netdev_phys_is_visible(struct kobject *kobj,
+ struct attribute *attr, int index)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct net_device *netdev = to_net_dev(dev);
+
+ if (attr == &dev_attr_phys_port_id.attr) {
+ if (!netdev->netdev_ops->ndo_get_phys_port_id)
+ return 0;
+ } else if (attr == &dev_attr_phys_port_name.attr) {
+ if (!netdev->netdev_ops->ndo_get_phys_port_name &&
+ !netdev->devlink_port)
+ return 0;
+ } else if (attr == &dev_attr_phys_switch_id.attr) {
+ if (!netdev->netdev_ops->ndo_get_port_parent_id &&
+ !netdev->devlink_port)
+ return 0;
+ }
+
+ return attr->mode;
+}
+
+static const struct attribute_group netdev_phys_group = {
+ .attrs = netdev_phys_attrs,
+ .is_visible = netdev_phys_is_visible,
+};
+
static ssize_t threaded_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -744,7 +757,7 @@ static int modify_napi_threaded(struct net_device *dev, unsigned long val)
if (val != 0 && val != 1)
return -EOPNOTSUPP;
- ret = dev_set_threaded(dev, val);
+ ret = netif_set_threaded(dev, val);
return ret;
}
@@ -783,9 +796,6 @@ static struct attribute *net_class_attrs[] __ro_after_init = {
&dev_attr_tx_queue_len.attr,
&dev_attr_gro_flush_timeout.attr,
&dev_attr_napi_defer_hard_irqs.attr,
- &dev_attr_phys_port_id.attr,
- &dev_attr_phys_port_name.attr,
- &dev_attr_phys_switch_id.attr,
&dev_attr_proto_down.attr,
&dev_attr_carrier_up_count.attr,
&dev_attr_carrier_down_count.attr,
@@ -1200,12 +1210,21 @@ static int rx_queue_default_mask(struct net_device *dev,
struct netdev_rx_queue *queue)
{
#if IS_ENABLED(CONFIG_RPS) && IS_ENABLED(CONFIG_SYSCTL)
- struct cpumask *rps_default_mask = READ_ONCE(dev_net(dev)->core.rps_default_mask);
+ struct cpumask *rps_default_mask;
+ int res = 0;
+ mutex_lock(&rps_default_mask_mutex);
+
+ rps_default_mask = dev_net(dev)->core.rps_default_mask;
if (rps_default_mask && !cpumask_empty(rps_default_mask))
- return netdev_rx_queue_set_rps_mask(queue, rps_default_mask);
-#endif
+ res = netdev_rx_queue_set_rps_mask(queue, rps_default_mask);
+
+ mutex_unlock(&rps_default_mask_mutex);
+
+ return res;
+#else
return 0;
+#endif
}
static int rx_queue_add_kobject(struct net_device *dev, int index)
@@ -2328,6 +2347,7 @@ int netdev_register_kobject(struct net_device *ndev)
groups++;
*groups++ = &netstat_group;
+ *groups++ = &netdev_phys_group;
if (wireless_group_needed(ndev))
*groups++ = &wireless_group;
diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h
index 8a5b04c2699a..e938f25e8e86 100644
--- a/net/core/net-sysfs.h
+++ b/net/core/net-sysfs.h
@@ -11,4 +11,6 @@ int netdev_queue_update_kobjects(struct net_device *net,
int netdev_change_owner(struct net_device *, const struct net *net_old,
const struct net *net_new);
+extern struct mutex rps_default_mask_mutex;
+
#endif
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index ae54f26709ca..1b6f3826dd0e 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -19,9 +19,9 @@
#include <linux/net_namespace.h>
#include <linux/sched/task.h>
#include <linux/uidgid.h>
-#include <linux/cookie.h>
#include <linux/proc_fs.h>
+#include <net/aligned_data.h>
#include <net/sock.h>
#include <net/netlink.h>
#include <net/net_namespace.h>
@@ -64,8 +64,6 @@ DECLARE_RWSEM(pernet_ops_rwsem);
static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;
-DEFINE_COOKIE(net_cookie);
-
static struct net_generic *net_alloc_generic(void)
{
unsigned int gen_ptrs = READ_ONCE(max_gen_ptrs);
@@ -319,10 +317,10 @@ int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp)
if (refcount_read(&net->ns.count) == 0)
return NETNSA_NSID_NOT_ASSIGNED;
- spin_lock_bh(&net->nsid_lock);
+ spin_lock(&net->nsid_lock);
id = __peernet2id(net, peer);
if (id >= 0) {
- spin_unlock_bh(&net->nsid_lock);
+ spin_unlock(&net->nsid_lock);
return id;
}
@@ -332,12 +330,12 @@ int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp)
* just been idr_remove()'d from there in cleanup_net().
*/
if (!maybe_get_net(peer)) {
- spin_unlock_bh(&net->nsid_lock);
+ spin_unlock(&net->nsid_lock);
return NETNSA_NSID_NOT_ASSIGNED;
}
id = alloc_netid(net, peer, -1);
- spin_unlock_bh(&net->nsid_lock);
+ spin_unlock(&net->nsid_lock);
put_net(peer);
if (id < 0)
@@ -403,8 +401,8 @@ static __net_init void preinit_net(struct net *net, struct user_namespace *user_
{
refcount_set(&net->passive, 1);
refcount_set(&net->ns.count, 1);
- ref_tracker_dir_init(&net->refcnt_tracker, 128, "net refcnt");
- ref_tracker_dir_init(&net->notrefcnt_tracker, 128, "net notrefcnt");
+ ref_tracker_dir_init(&net->refcnt_tracker, 128, "net_refcnt");
+ ref_tracker_dir_init(&net->notrefcnt_tracker, 128, "net_notrefcnt");
get_random_bytes(&net->hash_mix, sizeof(u32));
net->dev_base_seq = 1;
@@ -434,9 +432,7 @@ static __net_init int setup_net(struct net *net)
LIST_HEAD(net_exit_list);
int error = 0;
- preempt_disable();
- net->net_cookie = gen_cookie_next(&net_cookie);
- preempt_enable();
+ net->net_cookie = atomic64_inc_return(&net_aligned_data.net_cookie);
list_for_each_entry(ops, &pernet_list, list) {
error = ops_init(ops, net);
@@ -628,20 +624,20 @@ static void unhash_nsid(struct net *net, struct net *last)
for_each_net(tmp) {
int id;
- spin_lock_bh(&tmp->nsid_lock);
+ spin_lock(&tmp->nsid_lock);
id = __peernet2id(tmp, net);
if (id >= 0)
idr_remove(&tmp->netns_ids, id);
- spin_unlock_bh(&tmp->nsid_lock);
+ spin_unlock(&tmp->nsid_lock);
if (id >= 0)
rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0, NULL,
GFP_KERNEL);
if (tmp == last)
break;
}
- spin_lock_bh(&net->nsid_lock);
+ spin_lock(&net->nsid_lock);
idr_destroy(&net->netns_ids);
- spin_unlock_bh(&net->nsid_lock);
+ spin_unlock(&net->nsid_lock);
}
static LLIST_HEAD(cleanup_list);
@@ -791,16 +787,50 @@ struct net *get_net_ns_by_pid(pid_t pid)
}
EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
+#ifdef CONFIG_NET_NS_REFCNT_TRACKER
+static void net_ns_net_debugfs(struct net *net)
+{
+ ref_tracker_dir_symlink(&net->refcnt_tracker, "netns-%llx-%u-refcnt",
+ net->net_cookie, net->ns.inum);
+ ref_tracker_dir_symlink(&net->notrefcnt_tracker, "netns-%llx-%u-notrefcnt",
+ net->net_cookie, net->ns.inum);
+}
+
+static int __init init_net_debugfs(void)
+{
+ ref_tracker_dir_debugfs(&init_net.refcnt_tracker);
+ ref_tracker_dir_debugfs(&init_net.notrefcnt_tracker);
+ net_ns_net_debugfs(&init_net);
+ return 0;
+}
+late_initcall(init_net_debugfs);
+#else
+static void net_ns_net_debugfs(struct net *net)
+{
+}
+#endif
+
static __net_init int net_ns_net_init(struct net *net)
{
#ifdef CONFIG_NET_NS
net->ns.ops = &netns_operations;
#endif
- return ns_alloc_inum(&net->ns);
+ net->ns.inum = PROC_NET_INIT_INO;
+ if (net != &init_net) {
+ int ret = ns_alloc_inum(&net->ns);
+ if (ret)
+ return ret;
+ }
+ net_ns_net_debugfs(net);
+ return 0;
}
static __net_exit void net_ns_net_exit(struct net *net)
{
+ /*
+ * Initial network namespace doesn't exit so we don't need any
+ * special checks here.
+ */
ns_free_inum(&net->ns);
}
@@ -852,9 +882,9 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh,
return PTR_ERR(peer);
}
- spin_lock_bh(&net->nsid_lock);
+ spin_lock(&net->nsid_lock);
if (__peernet2id(net, peer) >= 0) {
- spin_unlock_bh(&net->nsid_lock);
+ spin_unlock(&net->nsid_lock);
err = -EEXIST;
NL_SET_BAD_ATTR(extack, nla);
NL_SET_ERR_MSG(extack,
@@ -863,7 +893,7 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh,
}
err = alloc_netid(net, peer, nsid);
- spin_unlock_bh(&net->nsid_lock);
+ spin_unlock(&net->nsid_lock);
if (err >= 0) {
rtnl_net_notifyid(net, RTM_NEWNSID, err, NETLINK_CB(skb).portid,
nlh, GFP_KERNEL);
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index d22f0919821e..dff66d8fb325 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -21,7 +21,9 @@ static inline struct cgroup_cls_state *css_cls_state(struct cgroup_subsys_state
struct cgroup_cls_state *task_cls_state(struct task_struct *p)
{
return css_cls_state(task_css_check(p, net_cls_cgrp_id,
- rcu_read_lock_bh_held()));
+ rcu_read_lock_held() ||
+ rcu_read_lock_bh_held() ||
+ rcu_read_lock_trace_held()));
}
EXPORT_SYMBOL_GPL(task_cls_state);
diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c
index 4fc44587f493..e9a2a6f26cb7 100644
--- a/net/core/netdev-genl-gen.c
+++ b/net/core/netdev-genl-gen.c
@@ -92,11 +92,12 @@ static const struct nla_policy netdev_bind_rx_nl_policy[NETDEV_A_DMABUF_FD + 1]
};
/* NETDEV_CMD_NAPI_SET - do */
-static const struct nla_policy netdev_napi_set_nl_policy[NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT + 1] = {
+static const struct nla_policy netdev_napi_set_nl_policy[NETDEV_A_NAPI_THREADED + 1] = {
[NETDEV_A_NAPI_ID] = { .type = NLA_U32, },
[NETDEV_A_NAPI_DEFER_HARD_IRQS] = NLA_POLICY_FULL_RANGE(NLA_U32, &netdev_a_napi_defer_hard_irqs_range),
[NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT] = { .type = NLA_UINT, },
[NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT] = { .type = NLA_UINT, },
+ [NETDEV_A_NAPI_THREADED] = NLA_POLICY_MAX(NLA_U32, 1),
};
/* NETDEV_CMD_BIND_TX - do */
@@ -193,7 +194,7 @@ static const struct genl_split_ops netdev_nl_ops[] = {
.cmd = NETDEV_CMD_NAPI_SET,
.doit = netdev_nl_napi_set_doit,
.policy = netdev_napi_set_nl_policy,
- .maxattr = NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT,
+ .maxattr = NETDEV_A_NAPI_THREADED,
.flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
},
{
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index 2afa7b2141aa..6314eb7bdf69 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -184,6 +184,10 @@ netdev_nl_napi_fill_one(struct sk_buff *rsp, struct napi_struct *napi,
if (napi->irq >= 0 && nla_put_u32(rsp, NETDEV_A_NAPI_IRQ, napi->irq))
goto nla_put_failure;
+ if (nla_put_uint(rsp, NETDEV_A_NAPI_THREADED,
+ napi_get_threaded(napi)))
+ goto nla_put_failure;
+
if (napi->thread) {
pid = task_pid_nr(napi->thread);
if (nla_put_u32(rsp, NETDEV_A_NAPI_PID, pid))
@@ -322,8 +326,18 @@ netdev_nl_napi_set_config(struct napi_struct *napi, struct genl_info *info)
{
u64 irq_suspend_timeout = 0;
u64 gro_flush_timeout = 0;
+ u8 threaded = 0;
u32 defer = 0;
+ if (info->attrs[NETDEV_A_NAPI_THREADED]) {
+ int ret;
+
+ threaded = nla_get_uint(info->attrs[NETDEV_A_NAPI_THREADED]);
+ ret = napi_set_threaded(napi, threaded);
+ if (ret)
+ return ret;
+ }
+
if (info->attrs[NETDEV_A_NAPI_DEFER_HARD_IRQS]) {
defer = nla_get_u32(info->attrs[NETDEV_A_NAPI_DEFER_HARD_IRQS]);
napi_set_defer_hard_irqs(napi, defer);
diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c
index d126f10197bf..3bf1151d8061 100644
--- a/net/core/netdev_rx_queue.c
+++ b/net/core/netdev_rx_queue.c
@@ -97,14 +97,12 @@ int __net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx,
if (!netdev_need_ops_lock(dev))
return -EOPNOTSUPP;
- if (rxq_idx >= dev->real_num_rx_queues)
- return -EINVAL;
- rxq_idx = array_index_nospec(rxq_idx, dev->real_num_rx_queues);
-
if (rxq_idx >= dev->real_num_rx_queues) {
NL_SET_ERR_MSG(extack, "rx queue index out of range");
return -ERANGE;
}
+ rxq_idx = array_index_nospec(rxq_idx, dev->real_num_rx_queues);
+
if (dev->cfg->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED) {
NL_SET_ERR_MSG(extack, "tcp-data-split is disabled");
return -EINVAL;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 4ddb7490df4b..a1da97b5b30b 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -58,13 +58,6 @@ static void zap_completion_queue(void);
static unsigned int carrier_timeout = 4;
module_param(carrier_timeout, uint, 0644);
-#define np_info(np, fmt, ...) \
- pr_info("%s: " fmt, np->name, ##__VA_ARGS__)
-#define np_err(np, fmt, ...) \
- pr_err("%s: " fmt, np->name, ##__VA_ARGS__)
-#define np_notice(np, fmt, ...) \
- pr_notice("%s: " fmt, np->name, ##__VA_ARGS__)
-
static netdev_tx_t netpoll_start_xmit(struct sk_buff *skb,
struct net_device *dev,
struct netdev_queue *txq)
@@ -379,6 +372,31 @@ out:
return ret;
}
+static void netpoll_udp_checksum(struct netpoll *np, struct sk_buff *skb,
+ int len)
+{
+ struct udphdr *udph;
+ int udp_len;
+
+ udp_len = len + sizeof(struct udphdr);
+ udph = udp_hdr(skb);
+
+ /* check needs to be set, since it will be consumed in csum_partial */
+ udph->check = 0;
+ if (np->ipv6)
+ udph->check = csum_ipv6_magic(&np->local_ip.in6,
+ &np->remote_ip.in6,
+ udp_len, IPPROTO_UDP,
+ csum_partial(udph, udp_len, 0));
+ else
+ udph->check = csum_tcpudp_magic(np->local_ip.ip,
+ np->remote_ip.ip,
+ udp_len, IPPROTO_UDP,
+ csum_partial(udph, udp_len, 0));
+ if (udph->check == 0)
+ udph->check = CSUM_MANGLED_0;
+}
+
netdev_tx_t netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
{
unsigned long flags;
@@ -396,24 +414,101 @@ netdev_tx_t netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
}
EXPORT_SYMBOL(netpoll_send_skb);
+static void push_ipv6(struct netpoll *np, struct sk_buff *skb, int len)
+{
+ struct ipv6hdr *ip6h;
+
+ skb_push(skb, sizeof(struct ipv6hdr));
+ skb_reset_network_header(skb);
+ ip6h = ipv6_hdr(skb);
+
+ /* ip6h->version = 6; ip6h->priority = 0; */
+ *(unsigned char *)ip6h = 0x60;
+ ip6h->flow_lbl[0] = 0;
+ ip6h->flow_lbl[1] = 0;
+ ip6h->flow_lbl[2] = 0;
+
+ ip6h->payload_len = htons(sizeof(struct udphdr) + len);
+ ip6h->nexthdr = IPPROTO_UDP;
+ ip6h->hop_limit = 32;
+ ip6h->saddr = np->local_ip.in6;
+ ip6h->daddr = np->remote_ip.in6;
+
+ skb->protocol = htons(ETH_P_IPV6);
+}
+
+static void push_ipv4(struct netpoll *np, struct sk_buff *skb, int len)
+{
+ static atomic_t ip_ident;
+ struct iphdr *iph;
+ int ip_len;
+
+ ip_len = len + sizeof(struct udphdr) + sizeof(struct iphdr);
+
+ skb_push(skb, sizeof(struct iphdr));
+ skb_reset_network_header(skb);
+ iph = ip_hdr(skb);
+
+ /* iph->version = 4; iph->ihl = 5; */
+ *(unsigned char *)iph = 0x45;
+ iph->tos = 0;
+ put_unaligned(htons(ip_len), &iph->tot_len);
+ iph->id = htons(atomic_inc_return(&ip_ident));
+ iph->frag_off = 0;
+ iph->ttl = 64;
+ iph->protocol = IPPROTO_UDP;
+ iph->check = 0;
+ put_unaligned(np->local_ip.ip, &iph->saddr);
+ put_unaligned(np->remote_ip.ip, &iph->daddr);
+ iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
+ skb->protocol = htons(ETH_P_IP);
+}
+
+static void push_udp(struct netpoll *np, struct sk_buff *skb, int len)
+{
+ struct udphdr *udph;
+ int udp_len;
+
+ udp_len = len + sizeof(struct udphdr);
+
+ skb_push(skb, sizeof(struct udphdr));
+ skb_reset_transport_header(skb);
+
+ udph = udp_hdr(skb);
+ udph->source = htons(np->local_port);
+ udph->dest = htons(np->remote_port);
+ udph->len = htons(udp_len);
+
+ netpoll_udp_checksum(np, skb, len);
+}
+
+static void push_eth(struct netpoll *np, struct sk_buff *skb)
+{
+ struct ethhdr *eth;
+
+ eth = skb_push(skb, ETH_HLEN);
+ skb_reset_mac_header(skb);
+ ether_addr_copy(eth->h_source, np->dev->dev_addr);
+ ether_addr_copy(eth->h_dest, np->remote_mac);
+ if (np->ipv6)
+ eth->h_proto = htons(ETH_P_IPV6);
+ else
+ eth->h_proto = htons(ETH_P_IP);
+}
+
int netpoll_send_udp(struct netpoll *np, const char *msg, int len)
{
int total_len, ip_len, udp_len;
struct sk_buff *skb;
- struct udphdr *udph;
- struct iphdr *iph;
- struct ethhdr *eth;
- static atomic_t ip_ident;
- struct ipv6hdr *ip6h;
if (!IS_ENABLED(CONFIG_PREEMPT_RT))
WARN_ON_ONCE(!irqs_disabled());
- udp_len = len + sizeof(*udph);
+ udp_len = len + sizeof(struct udphdr);
if (np->ipv6)
- ip_len = udp_len + sizeof(*ip6h);
+ ip_len = udp_len + sizeof(struct ipv6hdr);
else
- ip_len = udp_len + sizeof(*iph);
+ ip_len = udp_len + sizeof(struct iphdr);
total_len = ip_len + LL_RESERVED_SPACE(np->dev);
@@ -425,117 +520,18 @@ int netpoll_send_udp(struct netpoll *np, const char *msg, int len)
skb_copy_to_linear_data(skb, msg, len);
skb_put(skb, len);
- skb_push(skb, sizeof(*udph));
- skb_reset_transport_header(skb);
- udph = udp_hdr(skb);
- udph->source = htons(np->local_port);
- udph->dest = htons(np->remote_port);
- udph->len = htons(udp_len);
-
- if (np->ipv6) {
- udph->check = csum_ipv6_magic(&np->local_ip.in6,
- &np->remote_ip.in6,
- udp_len, IPPROTO_UDP,
- csum_partial(udph, udp_len, 0));
- if (udph->check == 0)
- udph->check = CSUM_MANGLED_0;
-
- skb_push(skb, sizeof(*ip6h));
- skb_reset_network_header(skb);
- ip6h = ipv6_hdr(skb);
-
- /* ip6h->version = 6; ip6h->priority = 0; */
- *(unsigned char *)ip6h = 0x60;
- ip6h->flow_lbl[0] = 0;
- ip6h->flow_lbl[1] = 0;
- ip6h->flow_lbl[2] = 0;
-
- ip6h->payload_len = htons(sizeof(struct udphdr) + len);
- ip6h->nexthdr = IPPROTO_UDP;
- ip6h->hop_limit = 32;
- ip6h->saddr = np->local_ip.in6;
- ip6h->daddr = np->remote_ip.in6;
-
- eth = skb_push(skb, ETH_HLEN);
- skb_reset_mac_header(skb);
- skb->protocol = eth->h_proto = htons(ETH_P_IPV6);
- } else {
- udph->check = 0;
- udph->check = csum_tcpudp_magic(np->local_ip.ip,
- np->remote_ip.ip,
- udp_len, IPPROTO_UDP,
- csum_partial(udph, udp_len, 0));
- if (udph->check == 0)
- udph->check = CSUM_MANGLED_0;
-
- skb_push(skb, sizeof(*iph));
- skb_reset_network_header(skb);
- iph = ip_hdr(skb);
-
- /* iph->version = 4; iph->ihl = 5; */
- *(unsigned char *)iph = 0x45;
- iph->tos = 0;
- put_unaligned(htons(ip_len), &(iph->tot_len));
- iph->id = htons(atomic_inc_return(&ip_ident));
- iph->frag_off = 0;
- iph->ttl = 64;
- iph->protocol = IPPROTO_UDP;
- iph->check = 0;
- put_unaligned(np->local_ip.ip, &(iph->saddr));
- put_unaligned(np->remote_ip.ip, &(iph->daddr));
- iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
-
- eth = skb_push(skb, ETH_HLEN);
- skb_reset_mac_header(skb);
- skb->protocol = eth->h_proto = htons(ETH_P_IP);
- }
-
- ether_addr_copy(eth->h_source, np->dev->dev_addr);
- ether_addr_copy(eth->h_dest, np->remote_mac);
-
+ push_udp(np, skb, len);
+ if (np->ipv6)
+ push_ipv6(np, skb, len);
+ else
+ push_ipv4(np, skb, len);
+ push_eth(np, skb);
skb->dev = np->dev;
return (int)netpoll_send_skb(np, skb);
}
EXPORT_SYMBOL(netpoll_send_udp);
-void netpoll_print_options(struct netpoll *np)
-{
- np_info(np, "local port %d\n", np->local_port);
- if (np->ipv6)
- np_info(np, "local IPv6 address %pI6c\n", &np->local_ip.in6);
- else
- np_info(np, "local IPv4 address %pI4\n", &np->local_ip.ip);
- np_info(np, "interface name '%s'\n", np->dev_name);
- np_info(np, "local ethernet address '%pM'\n", np->dev_mac);
- np_info(np, "remote port %d\n", np->remote_port);
- if (np->ipv6)
- np_info(np, "remote IPv6 address %pI6c\n", &np->remote_ip.in6);
- else
- np_info(np, "remote IPv4 address %pI4\n", &np->remote_ip.ip);
- np_info(np, "remote ethernet address %pM\n", np->remote_mac);
-}
-EXPORT_SYMBOL(netpoll_print_options);
-
-static int netpoll_parse_ip_addr(const char *str, union inet_addr *addr)
-{
- const char *end;
-
- if (!strchr(str, ':') &&
- in4_pton(str, -1, (void *)addr, -1, &end) > 0) {
- if (!*end)
- return 0;
- }
- if (in6_pton(str, -1, addr->in6.s6_addr, -1, &end) > 0) {
-#if IS_ENABLED(CONFIG_IPV6)
- if (!*end)
- return 1;
-#else
- return -1;
-#endif
- }
- return -1;
-}
static void skb_pool_flush(struct netpoll *np)
{
@@ -546,95 +542,6 @@ static void skb_pool_flush(struct netpoll *np)
skb_queue_purge_reason(skb_pool, SKB_CONSUMED);
}
-int netpoll_parse_options(struct netpoll *np, char *opt)
-{
- char *cur=opt, *delim;
- int ipv6;
- bool ipversion_set = false;
-
- if (*cur != '@') {
- if ((delim = strchr(cur, '@')) == NULL)
- goto parse_failed;
- *delim = 0;
- if (kstrtou16(cur, 10, &np->local_port))
- goto parse_failed;
- cur = delim;
- }
- cur++;
-
- if (*cur != '/') {
- ipversion_set = true;
- if ((delim = strchr(cur, '/')) == NULL)
- goto parse_failed;
- *delim = 0;
- ipv6 = netpoll_parse_ip_addr(cur, &np->local_ip);
- if (ipv6 < 0)
- goto parse_failed;
- else
- np->ipv6 = (bool)ipv6;
- cur = delim;
- }
- cur++;
-
- if (*cur != ',') {
- /* parse out dev_name or dev_mac */
- if ((delim = strchr(cur, ',')) == NULL)
- goto parse_failed;
- *delim = 0;
-
- np->dev_name[0] = '\0';
- eth_broadcast_addr(np->dev_mac);
- if (!strchr(cur, ':'))
- strscpy(np->dev_name, cur, sizeof(np->dev_name));
- else if (!mac_pton(cur, np->dev_mac))
- goto parse_failed;
-
- cur = delim;
- }
- cur++;
-
- if (*cur != '@') {
- /* dst port */
- if ((delim = strchr(cur, '@')) == NULL)
- goto parse_failed;
- *delim = 0;
- if (*cur == ' ' || *cur == '\t')
- np_info(np, "warning: whitespace is not allowed\n");
- if (kstrtou16(cur, 10, &np->remote_port))
- goto parse_failed;
- cur = delim;
- }
- cur++;
-
- /* dst ip */
- if ((delim = strchr(cur, '/')) == NULL)
- goto parse_failed;
- *delim = 0;
- ipv6 = netpoll_parse_ip_addr(cur, &np->remote_ip);
- if (ipv6 < 0)
- goto parse_failed;
- else if (ipversion_set && np->ipv6 != (bool)ipv6)
- goto parse_failed;
- else
- np->ipv6 = (bool)ipv6;
- cur = delim + 1;
-
- if (*cur != 0) {
- /* MAC address */
- if (!mac_pton(cur, np->remote_mac))
- goto parse_failed;
- }
-
- netpoll_print_options(np);
-
- return 0;
-
- parse_failed:
- np_info(np, "couldn't parse config at '%s'!\n", cur);
- return -1;
-}
-EXPORT_SYMBOL(netpoll_parse_options);
-
static void refill_skbs_work_handler(struct work_struct *work)
{
struct netpoll *np =
@@ -716,13 +623,97 @@ static char *egress_dev(struct netpoll *np, char *buf)
return buf;
}
+static void netpoll_wait_carrier(struct netpoll *np, struct net_device *ndev,
+ unsigned int timeout)
+{
+ unsigned long atmost;
+
+ atmost = jiffies + timeout * HZ;
+ while (!netif_carrier_ok(ndev)) {
+ if (time_after(jiffies, atmost)) {
+ np_notice(np, "timeout waiting for carrier\n");
+ break;
+ }
+ msleep(1);
+ }
+}
+
+/*
+ * Take the IPv6 from ndev and populate local_ip structure in netpoll
+ */
+static int netpoll_take_ipv6(struct netpoll *np, struct net_device *ndev)
+{
+ char buf[MAC_ADDR_STR_LEN + 1];
+ int err = -EDESTADDRREQ;
+ struct inet6_dev *idev;
+
+ if (!IS_ENABLED(CONFIG_IPV6)) {
+ np_err(np, "IPv6 is not supported %s, aborting\n",
+ egress_dev(np, buf));
+ return -EINVAL;
+ }
+
+ idev = __in6_dev_get(ndev);
+ if (idev) {
+ struct inet6_ifaddr *ifp;
+
+ read_lock_bh(&idev->lock);
+ list_for_each_entry(ifp, &idev->addr_list, if_list) {
+ if (!!(ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL) !=
+ !!(ipv6_addr_type(&np->remote_ip.in6) & IPV6_ADDR_LINKLOCAL))
+ continue;
+ /* Got the IP, let's return */
+ np->local_ip.in6 = ifp->addr;
+ err = 0;
+ break;
+ }
+ read_unlock_bh(&idev->lock);
+ }
+ if (err) {
+ np_err(np, "no IPv6 address for %s, aborting\n",
+ egress_dev(np, buf));
+ return err;
+ }
+
+ np_info(np, "local IPv6 %pI6c\n", &np->local_ip.in6);
+ return 0;
+}
+
+/*
+ * Take the IPv4 from ndev and populate local_ip structure in netpoll
+ */
+static int netpoll_take_ipv4(struct netpoll *np, struct net_device *ndev)
+{
+ char buf[MAC_ADDR_STR_LEN + 1];
+ const struct in_ifaddr *ifa;
+ struct in_device *in_dev;
+
+ in_dev = __in_dev_get_rtnl(ndev);
+ if (!in_dev) {
+ np_err(np, "no IP address for %s, aborting\n",
+ egress_dev(np, buf));
+ return -EDESTADDRREQ;
+ }
+
+ ifa = rtnl_dereference(in_dev->ifa_list);
+ if (!ifa) {
+ np_err(np, "no IP address for %s, aborting\n",
+ egress_dev(np, buf));
+ return -EDESTADDRREQ;
+ }
+
+ np->local_ip.ip = ifa->ifa_local;
+ np_info(np, "local IP %pI4\n", &np->local_ip.ip);
+
+ return 0;
+}
+
int netpoll_setup(struct netpoll *np)
{
struct net *net = current->nsproxy->net_ns;
char buf[MAC_ADDR_STR_LEN + 1];
struct net_device *ndev = NULL;
bool ip_overwritten = false;
- struct in_device *in_dev;
int err;
rtnl_lock();
@@ -746,85 +737,31 @@ int netpoll_setup(struct netpoll *np)
}
if (!netif_running(ndev)) {
- unsigned long atmost;
-
np_info(np, "device %s not up yet, forcing it\n",
egress_dev(np, buf));
err = dev_open(ndev, NULL);
-
if (err) {
np_err(np, "failed to open %s\n", ndev->name);
goto put;
}
rtnl_unlock();
- atmost = jiffies + carrier_timeout * HZ;
- while (!netif_carrier_ok(ndev)) {
- if (time_after(jiffies, atmost)) {
- np_notice(np, "timeout waiting for carrier\n");
- break;
- }
- msleep(1);
- }
-
+ netpoll_wait_carrier(np, ndev, carrier_timeout);
rtnl_lock();
}
if (!np->local_ip.ip) {
if (!np->ipv6) {
- const struct in_ifaddr *ifa;
-
- in_dev = __in_dev_get_rtnl(ndev);
- if (!in_dev)
- goto put_noaddr;
-
- ifa = rtnl_dereference(in_dev->ifa_list);
- if (!ifa) {
-put_noaddr:
- np_err(np, "no IP address for %s, aborting\n",
- egress_dev(np, buf));
- err = -EDESTADDRREQ;
+ err = netpoll_take_ipv4(np, ndev);
+ if (err)
goto put;
- }
-
- np->local_ip.ip = ifa->ifa_local;
- ip_overwritten = true;
- np_info(np, "local IP %pI4\n", &np->local_ip.ip);
} else {
-#if IS_ENABLED(CONFIG_IPV6)
- struct inet6_dev *idev;
-
- err = -EDESTADDRREQ;
- idev = __in6_dev_get(ndev);
- if (idev) {
- struct inet6_ifaddr *ifp;
-
- read_lock_bh(&idev->lock);
- list_for_each_entry(ifp, &idev->addr_list, if_list) {
- if (!!(ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL) !=
- !!(ipv6_addr_type(&np->remote_ip.in6) & IPV6_ADDR_LINKLOCAL))
- continue;
- np->local_ip.in6 = ifp->addr;
- ip_overwritten = true;
- err = 0;
- break;
- }
- read_unlock_bh(&idev->lock);
- }
- if (err) {
- np_err(np, "no IPv6 address for %s, aborting\n",
- egress_dev(np, buf));
+ err = netpoll_take_ipv6(np, ndev);
+ if (err)
goto put;
- } else
- np_info(np, "local IPv6 %pI6c\n", &np->local_ip.in6);
-#else
- np_err(np, "IPv6 is not supported %s, aborting\n",
- egress_dev(np, buf));
- err = -EINVAL;
- goto put;
-#endif
}
+ ip_overwritten = true;
}
err = __netpoll_setup(np, ndev);
@@ -863,7 +800,7 @@ static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head)
kfree(npinfo);
}
-void __netpoll_cleanup(struct netpoll *np)
+static void __netpoll_cleanup(struct netpoll *np)
{
struct netpoll_info *npinfo;
@@ -885,7 +822,6 @@ void __netpoll_cleanup(struct netpoll *np)
skb_pool_flush(np);
}
-EXPORT_SYMBOL_GPL(__netpoll_cleanup);
void __netpoll_free(struct netpoll *np)
{
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index ba7cf3e3c32f..05e2e22a8f7c 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -371,7 +371,7 @@ struct page_pool *page_pool_create(const struct page_pool_params *params)
}
EXPORT_SYMBOL(page_pool_create);
-static void page_pool_return_page(struct page_pool *pool, netmem_ref netmem);
+static void page_pool_return_netmem(struct page_pool *pool, netmem_ref netmem);
static noinline netmem_ref page_pool_refill_alloc_cache(struct page_pool *pool)
{
@@ -409,7 +409,7 @@ static noinline netmem_ref page_pool_refill_alloc_cache(struct page_pool *pool)
* (2) break out to fallthrough to alloc_pages_node.
* This limit stress on page buddy alloactor.
*/
- page_pool_return_page(pool, netmem);
+ page_pool_return_netmem(pool, netmem);
alloc_stat_inc(pool, waive);
netmem = 0;
break;
@@ -544,8 +544,8 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool,
}
/* slow path */
-static noinline netmem_ref __page_pool_alloc_pages_slow(struct page_pool *pool,
- gfp_t gfp)
+static noinline netmem_ref __page_pool_alloc_netmems_slow(struct page_pool *pool,
+ gfp_t gfp)
{
const int bulk = PP_ALLOC_CACHE_REFILL;
unsigned int pp_order = pool->p.order;
@@ -615,7 +615,7 @@ netmem_ref page_pool_alloc_netmems(struct page_pool *pool, gfp_t gfp)
if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops)
netmem = pool->mp_ops->alloc_netmems(pool, gfp);
else
- netmem = __page_pool_alloc_pages_slow(pool, gfp);
+ netmem = __page_pool_alloc_netmems_slow(pool, gfp);
return netmem;
}
EXPORT_SYMBOL(page_pool_alloc_netmems);
@@ -673,8 +673,8 @@ void page_pool_clear_pp_info(netmem_ref netmem)
netmem_set_pp(netmem, NULL);
}
-static __always_inline void __page_pool_release_page_dma(struct page_pool *pool,
- netmem_ref netmem)
+static __always_inline void __page_pool_release_netmem_dma(struct page_pool *pool,
+ netmem_ref netmem)
{
struct page *old, *page = netmem_to_page(netmem);
unsigned long id;
@@ -712,7 +712,7 @@ static __always_inline void __page_pool_release_page_dma(struct page_pool *pool,
* a regular page (that will eventually be returned to the normal
* page-allocator via put_page).
*/
-void page_pool_return_page(struct page_pool *pool, netmem_ref netmem)
+static void page_pool_return_netmem(struct page_pool *pool, netmem_ref netmem)
{
int count;
bool put;
@@ -721,7 +721,7 @@ void page_pool_return_page(struct page_pool *pool, netmem_ref netmem)
if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops)
put = pool->mp_ops->release_netmem(pool, netmem);
else
- __page_pool_release_page_dma(pool, netmem);
+ __page_pool_release_netmem_dma(pool, netmem);
/* This may be the last page returned, releasing the pool, so
* it is not safe to reference pool afterwards.
@@ -826,7 +826,7 @@ __page_pool_put_page(struct page_pool *pool, netmem_ref netmem,
* will be invoking put_page.
*/
recycle_stat_inc(pool, released_refcnt);
- page_pool_return_page(pool, netmem);
+ page_pool_return_netmem(pool, netmem);
return 0;
}
@@ -869,7 +869,7 @@ void page_pool_put_unrefed_netmem(struct page_pool *pool, netmem_ref netmem,
if (netmem && !page_pool_recycle_in_ring(pool, netmem)) {
/* Cache full, fallback to free pages */
recycle_stat_inc(pool, ring_full);
- page_pool_return_page(pool, netmem);
+ page_pool_return_netmem(pool, netmem);
}
}
EXPORT_SYMBOL(page_pool_put_unrefed_netmem);
@@ -912,7 +912,7 @@ static void page_pool_recycle_ring_bulk(struct page_pool *pool,
* since put_page() with refcnt == 1 can be an expensive operation.
*/
for (; i < bulk_len; i++)
- page_pool_return_page(pool, bulk[i]);
+ page_pool_return_netmem(pool, bulk[i]);
}
/**
@@ -995,7 +995,7 @@ static netmem_ref page_pool_drain_frag(struct page_pool *pool,
return netmem;
}
- page_pool_return_page(pool, netmem);
+ page_pool_return_netmem(pool, netmem);
return 0;
}
@@ -1009,7 +1009,7 @@ static void page_pool_free_frag(struct page_pool *pool)
if (!netmem || page_pool_unref_netmem(netmem, drain_count))
return;
- page_pool_return_page(pool, netmem);
+ page_pool_return_netmem(pool, netmem);
}
netmem_ref page_pool_alloc_frag_netmem(struct page_pool *pool,
@@ -1076,7 +1076,7 @@ static void page_pool_empty_ring(struct page_pool *pool)
pr_crit("%s() page_pool refcnt %d violation\n",
__func__, netmem_ref_count(netmem));
- page_pool_return_page(pool, netmem);
+ page_pool_return_netmem(pool, netmem);
}
}
@@ -1109,7 +1109,7 @@ static void page_pool_empty_alloc_cache_once(struct page_pool *pool)
*/
while (pool->alloc.count) {
netmem = pool->alloc.cache[--pool->alloc.count];
- page_pool_return_page(pool, netmem);
+ page_pool_return_netmem(pool, netmem);
}
}
@@ -1136,7 +1136,7 @@ static void page_pool_scrub(struct page_pool *pool)
}
xa_for_each(&pool->dma_mapped, id, ptr)
- __page_pool_release_page_dma(pool, page_to_netmem(ptr));
+ __page_pool_release_netmem_dma(pool, page_to_netmem((struct page *)ptr));
}
/* No more consumers should exist, but producers could still
@@ -1253,7 +1253,7 @@ void page_pool_update_nid(struct page_pool *pool, int new_nid)
/* Flush pool alloc cache, as refill will check NUMA node */
while (pool->alloc.count) {
netmem = pool->alloc.cache[--pool->alloc.count];
- page_pool_return_page(pool, netmem);
+ page_pool_return_netmem(pool, netmem);
}
}
EXPORT_SYMBOL(page_pool_update_nid);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index c57692eb8da9..094b085cff20 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1026,9 +1026,11 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
.rta_error = error,
.rta_id = id,
};
+ unsigned long delta;
if (dst) {
- ci.rta_lastuse = jiffies_delta_to_clock_t(jiffies - dst->lastuse);
+ delta = jiffies - READ_ONCE(dst->lastuse);
+ ci.rta_lastuse = jiffies_delta_to_clock_t(delta);
ci.rta_used = dst->__use;
ci.rta_clntref = rcuref_read(&dst->__rcuref);
}
@@ -1446,7 +1448,7 @@ static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev)
struct netdev_phys_item_id ppid = { };
int err;
- err = dev_get_port_parent_id(dev, &ppid, false);
+ err = netif_get_port_parent_id(dev, &ppid, false);
if (err) {
if (err == -EOPNOTSUPP)
return 0;
@@ -2036,7 +2038,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
ifm->__ifi_pad = 0;
ifm->ifi_type = READ_ONCE(dev->type);
ifm->ifi_index = READ_ONCE(dev->ifindex);
- ifm->ifi_flags = dev_get_flags(dev);
+ ifm->ifi_flags = netif_get_flags(dev);
ifm->ifi_change = change;
if (tgt_netnsid >= 0 && nla_put_s32(skb, IFLA_TARGET_NETNSID, tgt_netnsid))
@@ -5225,7 +5227,7 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
ifm->__ifi_pad = 0;
ifm->ifi_type = dev->type;
ifm->ifi_index = dev->ifindex;
- ifm->ifi_flags = dev_get_flags(dev);
+ ifm->ifi_flags = netif_get_flags(dev);
ifm->ifi_change = 0;
diff --git a/net/core/scm.c b/net/core/scm.c
index 0225bd94170f..072d5742440a 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -23,6 +23,8 @@
#include <linux/security.h>
#include <linux/pid_namespace.h>
#include <linux/pid.h>
+#include <uapi/linux/pidfd.h>
+#include <linux/pidfs.h>
#include <linux/nsproxy.h>
#include <linux/slab.h>
#include <linux/errqueue.h>
@@ -145,6 +147,22 @@ void __scm_destroy(struct scm_cookie *scm)
}
EXPORT_SYMBOL(__scm_destroy);
+static inline int scm_replace_pid(struct scm_cookie *scm, struct pid *pid)
+{
+ int err;
+
+ /* drop all previous references */
+ scm_destroy_cred(scm);
+
+ err = pidfs_register_pid(pid);
+ if (unlikely(err))
+ return err;
+
+ scm->pid = pid;
+ scm->creds.pid = pid_vnr(pid);
+ return 0;
+}
+
int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
{
const struct proto_ops *ops = READ_ONCE(sock->ops);
@@ -189,15 +207,21 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
if (err)
goto error;
- p->creds.pid = creds.pid;
if (!p->pid || pid_vnr(p->pid) != creds.pid) {
struct pid *pid;
err = -ESRCH;
pid = find_get_pid(creds.pid);
if (!pid)
goto error;
- put_pid(p->pid);
- p->pid = pid;
+
+ /* pass a struct pid reference from
+ * find_get_pid() to scm_replace_pid().
+ */
+ err = scm_replace_pid(p, pid);
+ if (err) {
+ put_pid(pid);
+ goto error;
+ }
}
err = -EINVAL;
@@ -459,7 +483,7 @@ static void scm_pidfd_recv(struct msghdr *msg, struct scm_cookie *scm)
if (!scm->pid)
return;
- pidfd = pidfd_prepare(scm->pid, 0, &pidfd_file);
+ pidfd = pidfd_prepare(scm->pid, PIDFD_STALE, &pidfd_file);
if (put_cmsg(msg, SOL_SOCKET, SCM_PIDFD, sizeof(int), &pidfd)) {
if (pidfd_file) {
diff --git a/net/core/selftests.c b/net/core/selftests.c
index 35f807ea9952..3d79133a91a6 100644
--- a/net/core/selftests.c
+++ b/net/core/selftests.c
@@ -27,6 +27,7 @@ struct net_packet_attrs {
int max_size;
u8 id;
u16 queue_mapping;
+ bool bad_csum;
};
struct net_test_priv {
@@ -160,10 +161,25 @@ static struct sk_buff *net_test_get_skb(struct net_device *ndev,
skb->csum = 0;
skb->ip_summed = CHECKSUM_PARTIAL;
if (attr->tcp) {
- thdr->check = ~tcp_v4_check(skb->len, ihdr->saddr,
- ihdr->daddr, 0);
+ int l4len = skb->len - skb_transport_offset(skb);
+
+ thdr->check = ~tcp_v4_check(l4len, ihdr->saddr, ihdr->daddr, 0);
skb->csum_start = skb_transport_header(skb) - skb->head;
skb->csum_offset = offsetof(struct tcphdr, check);
+
+ if (attr->bad_csum) {
+ /* Force mangled checksum */
+ if (skb_checksum_help(skb)) {
+ kfree_skb(skb);
+ return NULL;
+ }
+
+ if (thdr->check != CSUM_MANGLED_0)
+ thdr->check = CSUM_MANGLED_0;
+ else
+ thdr->check = csum16_sub(thdr->check,
+ cpu_to_be16(1));
+ }
} else {
udp4_hwcsum(skb, ihdr->saddr, ihdr->daddr);
}
@@ -238,7 +254,11 @@ static int net_test_loopback_validate(struct sk_buff *skb,
if (tpriv->packet->id != shdr->id)
goto out;
- tpriv->ok = true;
+ if (tpriv->packet->bad_csum && skb->ip_summed == CHECKSUM_UNNECESSARY)
+ tpriv->ok = -EIO;
+ else
+ tpriv->ok = true;
+
complete(&tpriv->comp);
out:
kfree_skb(skb);
@@ -284,7 +304,12 @@ static int __net_test_loopback(struct net_device *ndev,
attr->timeout = NET_LB_TIMEOUT;
wait_for_completion_timeout(&tpriv->comp, attr->timeout);
- ret = tpriv->ok ? 0 : -ETIMEDOUT;
+ if (tpriv->ok < 0)
+ ret = tpriv->ok;
+ else if (!tpriv->ok)
+ ret = -ETIMEDOUT;
+ else
+ ret = 0;
cleanup:
dev_remove_pack(&tpriv->pt);
@@ -344,6 +369,42 @@ static int net_test_phy_loopback_tcp(struct net_device *ndev)
return __net_test_loopback(ndev, &attr);
}
+/**
+ * net_test_phy_loopback_tcp_bad_csum - PHY loopback test with a deliberately
+ * corrupted TCP checksum
+ * @ndev: the network device to test
+ *
+ * Builds the same minimal Ethernet/IPv4/TCP frame as
+ * net_test_phy_loopback_tcp(), then flips the least-significant bit of the TCP
+ * checksum so the resulting value is provably invalid (neither 0 nor 0xFFFF).
+ * The frame is transmitted through the device’s internal PHY loopback path:
+ *
+ * test code -> MAC driver -> MAC HW -> xMII -> PHY ->
+ * internal PHY loopback -> xMII -> MAC HW -> MAC driver -> test code
+ *
+ * Result interpretation
+ * ---------------------
+ * 0 The frame is delivered to the stack and the driver reports
+ * ip_summed as CHECKSUM_NONE or CHECKSUM_COMPLETE - both are
+ * valid ways to indicate “bad checksum, let the stack verify.”
+ * -ETIMEDOUT The MAC/PHY silently dropped the frame; hardware checksum
+ * verification filtered it out before the driver saw it.
+ * -EIO The driver returned the frame with ip_summed ==
+ * CHECKSUM_UNNECESSARY, falsely claiming a valid checksum and
+ * indicating a serious RX-path defect.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+static int net_test_phy_loopback_tcp_bad_csum(struct net_device *ndev)
+{
+ struct net_packet_attrs attr = { };
+
+ attr.dst = ndev->dev_addr;
+ attr.tcp = true;
+ attr.bad_csum = true;
+ return __net_test_loopback(ndev, &attr);
+}
+
static const struct net_test {
char name[ETH_GSTRING_LEN];
int (*fn)(struct net_device *ndev);
@@ -368,6 +429,9 @@ static const struct net_test {
.name = "PHY internal loopback, TCP ",
.fn = net_test_phy_loopback_tcp,
}, {
+ .name = "PHY loopback, bad TCP csum ",
+ .fn = net_test_phy_loopback_tcp_bad_csum,
+ }, {
/* This test should be done after all PHY loopback test */
.name = "PHY internal loopback, disable",
.fn = net_test_phy_loopback_disable,
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 85fc82f72d26..ee0274417948 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -384,8 +384,7 @@ static inline void __finalize_skb_around(struct sk_buff *skb, void *data,
skb_set_kcov_handle(skb, kcov_common_handle());
}
-static inline void *__slab_build_skb(struct sk_buff *skb, void *data,
- unsigned int *size)
+static inline void *__slab_build_skb(void *data, unsigned int *size)
{
void *resized;
@@ -418,7 +417,7 @@ struct sk_buff *slab_build_skb(void *data)
return NULL;
memset(skb, 0, offsetof(struct sk_buff, tail));
- data = __slab_build_skb(skb, data, &size);
+ data = __slab_build_skb(data, &size);
__finalize_skb_around(skb, data, size);
return skb;
@@ -435,7 +434,7 @@ static void __build_skb_around(struct sk_buff *skb, void *data,
* using slab buffer should use slab_build_skb() instead.
*/
if (WARN_ONCE(size == 0, "Use slab_build_skb() instead"))
- data = __slab_build_skb(skb, data, &size);
+ data = __slab_build_skb(data, &size);
__finalize_skb_around(skb, data, size);
}
@@ -3060,10 +3059,8 @@ static bool spd_can_coalesce(const struct splice_pipe_desc *spd,
/*
* Fill page/offset/length into spd, if it can hold more pages.
*/
-static bool spd_fill_page(struct splice_pipe_desc *spd,
- struct pipe_inode_info *pipe, struct page *page,
- unsigned int *len, unsigned int offset,
- bool linear,
+static bool spd_fill_page(struct splice_pipe_desc *spd, struct page *page,
+ unsigned int *len, unsigned int offset, bool linear,
struct sock *sk)
{
if (unlikely(spd->nr_pages == MAX_SKB_FRAGS))
@@ -3091,8 +3088,7 @@ static bool __splice_segment(struct page *page, unsigned int poff,
unsigned int plen, unsigned int *off,
unsigned int *len,
struct splice_pipe_desc *spd, bool linear,
- struct sock *sk,
- struct pipe_inode_info *pipe)
+ struct sock *sk)
{
if (!*len)
return true;
@@ -3111,8 +3107,7 @@ static bool __splice_segment(struct page *page, unsigned int poff,
do {
unsigned int flen = min(*len, plen);
- if (spd_fill_page(spd, pipe, page, &flen, poff,
- linear, sk))
+ if (spd_fill_page(spd, page, &flen, poff, linear, sk))
return true;
poff += flen;
plen -= flen;
@@ -3130,8 +3125,8 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
unsigned int *offset, unsigned int *len,
struct splice_pipe_desc *spd, struct sock *sk)
{
- int seg;
struct sk_buff *iter;
+ int seg;
/* map the linear part :
* If skb->head_frag is set, this 'linear' part is backed by a
@@ -3143,7 +3138,7 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
skb_headlen(skb),
offset, len, spd,
skb_head_is_locked(skb),
- sk, pipe))
+ sk))
return true;
/*
@@ -3160,7 +3155,7 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
if (__splice_segment(skb_frag_page(f),
skb_frag_off(f), skb_frag_size(f),
- offset, len, spd, false, sk, pipe))
+ offset, len, spd, false, sk))
return true;
}
@@ -3235,6 +3230,7 @@ typedef int (*sendmsg_func)(struct sock *sk, struct msghdr *msg);
static int __skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset,
int len, sendmsg_func sendmsg, int flags)
{
+ int more_hint = sk_is_tcp(sk) ? MSG_MORE : 0;
unsigned int orig_len = len;
struct sk_buff *head = skb;
unsigned short fragidx;
@@ -3252,6 +3248,8 @@ do_frag_list:
kv.iov_len = slen;
memset(&msg, 0, sizeof(msg));
msg.msg_flags = MSG_DONTWAIT | flags;
+ if (slen < len)
+ msg.msg_flags |= more_hint;
iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, &kv, 1, slen);
ret = INDIRECT_CALL_2(sendmsg, sendmsg_locked,
@@ -3292,6 +3290,8 @@ do_frag_list:
flags,
};
+ if (slen < len)
+ msg.msg_flags |= more_hint;
bvec_set_page(&bvec, skb_frag_page(frag), slen,
skb_frag_off(frag) + offset);
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1,
@@ -6261,9 +6261,6 @@ int skb_ensure_writable(struct sk_buff *skb, unsigned int write_len)
if (!pskb_may_pull(skb, write_len))
return -ENOMEM;
- if (!skb_frags_readable(skb))
- return -EFAULT;
-
if (!skb_cloned(skb) || skb_clone_writable(skb, write_len))
return 0;
@@ -6766,8 +6763,7 @@ static int pskb_carve(struct sk_buff *skb, const u32 off, gfp_t gfp);
/* carve out the first eat bytes from skb's frag_list. May recurse into
* pskb_carve()
*/
-static int pskb_carve_frag_list(struct sk_buff *skb,
- struct skb_shared_info *shinfo, int eat,
+static int pskb_carve_frag_list(struct skb_shared_info *shinfo, int eat,
gfp_t gfp_mask)
{
struct sk_buff *list = shinfo->frag_list;
@@ -6872,7 +6868,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
skb_clone_fraglist(skb);
/* split line is in frag list */
- if (k == 0 && pskb_carve_frag_list(skb, shinfo, off - pos, gfp_mask)) {
+ if (k == 0 && pskb_carve_frag_list(shinfo, off - pos, gfp_mask)) {
/* skb_frag_unref() is not needed here as shinfo->nr_frags = 0. */
if (skb_has_frag_list(skb))
kfree_skb_list(skb_shinfo(skb)->frag_list);
@@ -7237,7 +7233,6 @@ static void skb_splice_csum_page(struct sk_buff *skb, struct page *page,
* @skb: The buffer to add pages to
* @iter: Iterator representing the pages to be added
* @maxsize: Maximum amount of pages to be added
- * @gfp: Allocation flags
*
* This is a common helper function for supporting MSG_SPLICE_PAGES. It
* extracts pages from an iterator and adds them to the socket buffer if
@@ -7248,7 +7243,7 @@ static void skb_splice_csum_page(struct sk_buff *skb, struct page *page,
* insufficient space in the buffer to transfer anything.
*/
ssize_t skb_splice_from_iter(struct sk_buff *skb, struct iov_iter *iter,
- ssize_t maxsize, gfp_t gfp)
+ ssize_t maxsize)
{
size_t frag_limit = READ_ONCE(net_hotdata.sysctl_max_skb_frags);
struct page *pages[8], **ppages = pages;
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 34c51eb1a14f..83c78379932e 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -656,6 +656,13 @@ static void sk_psock_backlog(struct work_struct *work)
bool ingress;
int ret;
+ /* If sk is quickly removed from the map and then added back, the old
+ * psock should not be scheduled, because there are now two psocks
+ * pointing to the same sk.
+ */
+ if (!sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
+ return;
+
/* Increment the psock refcnt to synchronize with close(fd) path in
* sock_map_close(), ensuring we wait for backlog thread completion
* before sk_socket freed. If refcnt increment fails, it indicates
diff --git a/net/core/sock.c b/net/core/sock.c
index 3b409bc8ef6d..7c26ec8dce63 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -526,11 +526,10 @@ int sock_queue_rcv_skb_reason(struct sock *sk, struct sk_buff *skb,
enum skb_drop_reason drop_reason;
int err;
- err = sk_filter(sk, skb);
- if (err) {
- drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
+ err = sk_filter_reason(sk, skb, &drop_reason);
+ if (err)
goto out;
- }
+
err = __sock_queue_rcv_skb(sk, skb);
switch (err) {
case -ENOMEM:
@@ -553,15 +552,18 @@ EXPORT_SYMBOL(sock_queue_rcv_skb_reason);
int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
const int nested, unsigned int trim_cap, bool refcounted)
{
+ enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
int rc = NET_RX_SUCCESS;
+ int err;
- if (sk_filter_trim_cap(sk, skb, trim_cap))
+ if (sk_filter_trim_cap(sk, skb, trim_cap, &reason))
goto discard_and_relse;
skb->dev = NULL;
if (sk_rcvqueues_full(sk, READ_ONCE(sk->sk_rcvbuf))) {
atomic_inc(&sk->sk_drops);
+ reason = SKB_DROP_REASON_SOCKET_RCVBUFF;
goto discard_and_relse;
}
if (nested)
@@ -577,8 +579,12 @@ int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
rc = sk_backlog_rcv(sk, skb);
mutex_release(&sk->sk_lock.dep_map, _RET_IP_);
- } else if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf))) {
+ } else if ((err = sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf)))) {
bh_unlock_sock(sk);
+ if (err == -ENOMEM)
+ reason = SKB_DROP_REASON_PFMEMALLOC;
+ if (err == -ENOBUFS)
+ reason = SKB_DROP_REASON_SOCKET_BACKLOG;
atomic_inc(&sk->sk_drops);
goto discard_and_relse;
}
@@ -589,7 +595,7 @@ out:
sock_put(sk);
return rc;
discard_and_relse:
- kfree_skb(skb);
+ sk_skb_reason_drop(sk, skb, reason);
goto out;
}
EXPORT_SYMBOL(__sk_receive_skb);
@@ -602,7 +608,7 @@ struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
{
struct dst_entry *dst = __sk_dst_get(sk);
- if (dst && dst->obsolete &&
+ if (dst && READ_ONCE(dst->obsolete) &&
INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, ipv4_dst_check,
dst, cookie) == NULL) {
sk_tx_queue_clear(sk);
@@ -620,7 +626,7 @@ struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
{
struct dst_entry *dst = sk_dst_get(sk);
- if (dst && dst->obsolete &&
+ if (dst && READ_ONCE(dst->obsolete) &&
INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, ipv4_dst_check,
dst, cookie) == NULL) {
sk_dst_reset(sk);
@@ -818,12 +824,10 @@ EXPORT_SYMBOL(sock_set_priority);
void sock_set_sndtimeo(struct sock *sk, s64 secs)
{
- lock_sock(sk);
if (secs && secs < MAX_SCHEDULE_TIMEOUT / HZ - 1)
WRITE_ONCE(sk->sk_sndtimeo, secs * HZ);
else
WRITE_ONCE(sk->sk_sndtimeo, MAX_SCHEDULE_TIMEOUT);
- release_sock(sk);
}
EXPORT_SYMBOL(sock_set_sndtimeo);
@@ -837,14 +841,6 @@ static void __sock_set_timestamps(struct sock *sk, bool val, bool new, bool ns)
}
}
-void sock_enable_timestamps(struct sock *sk)
-{
- lock_sock(sk);
- __sock_set_timestamps(sk, true, false, true);
- release_sock(sk);
-}
-EXPORT_SYMBOL(sock_enable_timestamps);
-
void sock_set_timestamp(struct sock *sk, int optname, bool valbool)
{
switch (optname) {
@@ -1295,6 +1291,14 @@ int sk_setsockopt(struct sock *sk, int level, int optname,
case SO_DEVMEM_DONTNEED:
return sock_devmem_dontneed(sk, optval, optlen);
#endif
+ case SO_SNDTIMEO_OLD:
+ case SO_SNDTIMEO_NEW:
+ return sock_set_timeout(&sk->sk_sndtimeo, optval,
+ optlen, optname == SO_SNDTIMEO_OLD);
+ case SO_RCVTIMEO_OLD:
+ case SO_RCVTIMEO_NEW:
+ return sock_set_timeout(&sk->sk_rcvtimeo, optval,
+ optlen, optname == SO_RCVTIMEO_OLD);
}
sockopt_lock_sock(sk);
@@ -1450,18 +1454,6 @@ set_sndbuf:
WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
break;
}
- case SO_RCVTIMEO_OLD:
- case SO_RCVTIMEO_NEW:
- ret = sock_set_timeout(&sk->sk_rcvtimeo, optval,
- optlen, optname == SO_RCVTIMEO_OLD);
- break;
-
- case SO_SNDTIMEO_OLD:
- case SO_SNDTIMEO_NEW:
- ret = sock_set_timeout(&sk->sk_sndtimeo, optval,
- optlen, optname == SO_SNDTIMEO_OLD);
- break;
-
case SO_ATTACH_FILTER: {
struct sock_fprog fprog;
@@ -2602,8 +2594,8 @@ static u32 sk_dst_gso_max_size(struct sock *sk, struct dst_entry *dst)
!ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr));
#endif
/* pairs with the WRITE_ONCE() in netif_set_gso(_ipv4)_max_size() */
- max_size = is_ipv6 ? READ_ONCE(dst->dev->gso_max_size) :
- READ_ONCE(dst->dev->gso_ipv4_max_size);
+ max_size = is_ipv6 ? READ_ONCE(dst_dev(dst)->gso_max_size) :
+ READ_ONCE(dst_dev(dst)->gso_ipv4_max_size);
if (max_size > GSO_LEGACY_MAX_SIZE && !sk_is_tcp(sk))
max_size = GSO_LEGACY_MAX_SIZE;
@@ -2614,7 +2606,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
{
u32 max_segs = 1;
- sk->sk_route_caps = dst->dev->features;
+ sk->sk_route_caps = dst_dev(dst)->features;
if (sk_is_tcp(sk)) {
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -2632,7 +2624,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
sk->sk_gso_max_size = sk_dst_gso_max_size(sk, dst);
/* pairs with the WRITE_ONCE() in netif_set_gso_max_segs() */
- max_segs = max_t(u32, READ_ONCE(dst->dev->gso_max_segs), 1);
+ max_segs = max_t(u32, READ_ONCE(dst_dev(dst)->gso_max_segs), 1);
}
}
sk->sk_gso_max_segs = max_segs;
@@ -2788,17 +2780,6 @@ void sock_pfree(struct sk_buff *skb)
EXPORT_SYMBOL(sock_pfree);
#endif /* CONFIG_INET */
-kuid_t sock_i_uid(struct sock *sk)
-{
- kuid_t uid;
-
- read_lock_bh(&sk->sk_callback_lock);
- uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : GLOBAL_ROOT_UID;
- read_unlock_bh(&sk->sk_callback_lock);
- return uid;
-}
-EXPORT_SYMBOL(sock_i_uid);
-
unsigned long __sock_i_ino(struct sock *sk)
{
unsigned long ino;
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 82a14f131d00..5947b38e4f8b 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -1709,7 +1709,6 @@ EXPORT_SYMBOL_GPL(sock_map_close);
struct sockmap_link {
struct bpf_link link;
struct bpf_map *map;
- enum bpf_attach_type attach_type;
};
static void sock_map_link_release(struct bpf_link *link)
@@ -1721,7 +1720,7 @@ static void sock_map_link_release(struct bpf_link *link)
goto out;
WARN_ON_ONCE(sock_map_prog_update(sockmap_link->map, NULL, link->prog, link,
- sockmap_link->attach_type));
+ link->attach_type));
bpf_map_put_with_uref(sockmap_link->map);
sockmap_link->map = NULL;
@@ -1772,7 +1771,7 @@ static int sock_map_link_update_prog(struct bpf_link *link,
}
ret = sock_map_prog_link_lookup(sockmap_link->map, &pprog, &plink,
- sockmap_link->attach_type);
+ link->attach_type);
if (ret)
goto out;
@@ -1817,7 +1816,7 @@ static int sock_map_link_fill_info(const struct bpf_link *link,
u32 map_id = sock_map_link_get_map_id(sockmap_link);
info->sockmap.map_id = map_id;
- info->sockmap.attach_type = sockmap_link->attach_type;
+ info->sockmap.attach_type = link->attach_type;
return 0;
}
@@ -1828,7 +1827,7 @@ static void sock_map_link_show_fdinfo(const struct bpf_link *link,
u32 map_id = sock_map_link_get_map_id(sockmap_link);
seq_printf(seq, "map_id:\t%u\n", map_id);
- seq_printf(seq, "attach_type:\t%u\n", sockmap_link->attach_type);
+ seq_printf(seq, "attach_type:\t%u\n", link->attach_type);
}
static const struct bpf_link_ops sock_map_link_ops = {
@@ -1866,9 +1865,9 @@ int sock_map_link_create(const union bpf_attr *attr, struct bpf_prog *prog)
}
attach_type = attr->link_create.attach_type;
- bpf_link_init(&sockmap_link->link, BPF_LINK_TYPE_SOCKMAP, &sock_map_link_ops, prog);
+ bpf_link_init(&sockmap_link->link, BPF_LINK_TYPE_SOCKMAP, &sock_map_link_ops, prog,
+ attach_type);
sockmap_link->map = map;
- sockmap_link->attach_type = attach_type;
ret = bpf_link_prime(&sockmap_link->link, &link_primer);
if (ret) {
diff --git a/net/core/stream.c b/net/core/stream.c
index b16dfa568a2d..7a37e7dd2c43 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -23,9 +23,13 @@
/**
* sk_stream_write_space - stream socket write_space callback.
- * @sk: socket
+ * @sk: pointer to the socket structure
*
- * FIXME: write proper description
+ * This function is invoked when there's space available in the socket's
+ * send buffer for writing. It first checks if the socket is writable,
+ * clears the SOCK_NOSPACE flag indicating that memory for writing
+ * is now available, wakes up any processes waiting for write operations
+ * and sends asynchronous notifications if needed.
*/
void sk_stream_write_space(struct sock *sk)
{
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 5dbb2c6f371d..8cf04b57ade1 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -28,6 +28,7 @@
#include <net/rps.h>
#include "dev.h"
+#include "net-sysfs.h"
static int int_3600 = 3600;
static int min_sndbuf = SOCK_MIN_SNDBUF;
@@ -96,50 +97,40 @@ free_buf:
#ifdef CONFIG_RPS
-static struct cpumask *rps_default_mask_cow_alloc(struct net *net)
-{
- struct cpumask *rps_default_mask;
-
- if (net->core.rps_default_mask)
- return net->core.rps_default_mask;
-
- rps_default_mask = kzalloc(cpumask_size(), GFP_KERNEL);
- if (!rps_default_mask)
- return NULL;
-
- /* pairs with READ_ONCE in rx_queue_default_mask() */
- WRITE_ONCE(net->core.rps_default_mask, rps_default_mask);
- return rps_default_mask;
-}
+DEFINE_MUTEX(rps_default_mask_mutex);
static int rps_default_mask_sysctl(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct net *net = (struct net *)table->data;
+ struct cpumask *mask;
int err = 0;
- rtnl_lock();
+ mutex_lock(&rps_default_mask_mutex);
+ mask = net->core.rps_default_mask;
if (write) {
- struct cpumask *rps_default_mask = rps_default_mask_cow_alloc(net);
-
+ if (!mask) {
+ mask = kzalloc(cpumask_size(), GFP_KERNEL);
+ net->core.rps_default_mask = mask;
+ }
err = -ENOMEM;
- if (!rps_default_mask)
+ if (!mask)
goto done;
- err = cpumask_parse(buffer, rps_default_mask);
+ err = cpumask_parse(buffer, mask);
if (err)
goto done;
- err = rps_cpumask_housekeeping(rps_default_mask);
+ err = rps_cpumask_housekeeping(mask);
if (err)
goto done;
} else {
err = dump_cpumask(buffer, lenp, ppos,
- net->core.rps_default_mask ? : cpu_none_mask);
+ mask ?: cpu_none_mask);
}
done:
- rtnl_unlock();
+ mutex_unlock(&rps_default_mask_mutex);
return err;
}
diff --git a/net/devlink/netlink_gen.c b/net/devlink/netlink_gen.c
index e340d955cf3b..d97c326a9045 100644
--- a/net/devlink/netlink_gen.c
+++ b/net/devlink/netlink_gen.c
@@ -45,6 +45,11 @@ const struct nla_policy devlink_dl_port_function_nl_policy[DEVLINK_PORT_FN_ATTR_
[DEVLINK_PORT_FN_ATTR_CAPS] = NLA_POLICY_BITFIELD32(15),
};
+const struct nla_policy devlink_dl_rate_tc_bws_nl_policy[DEVLINK_RATE_TC_ATTR_BW + 1] = {
+ [DEVLINK_RATE_TC_ATTR_INDEX] = NLA_POLICY_MAX(NLA_U8, DEVLINK_RATE_TC_INDEX_MAX),
+ [DEVLINK_RATE_TC_ATTR_BW] = { .type = NLA_U32, },
+};
+
const struct nla_policy devlink_dl_selftest_id_nl_policy[DEVLINK_ATTR_SELFTEST_ID_FLASH + 1] = {
[DEVLINK_ATTR_SELFTEST_ID_FLASH] = { .type = NLA_FLAG, },
};
@@ -523,7 +528,7 @@ static const struct nla_policy devlink_rate_get_dump_nl_policy[DEVLINK_ATTR_DEV_
};
/* DEVLINK_CMD_RATE_SET - do */
-static const struct nla_policy devlink_rate_set_nl_policy[DEVLINK_ATTR_RATE_TX_WEIGHT + 1] = {
+static const struct nla_policy devlink_rate_set_nl_policy[DEVLINK_ATTR_RATE_TC_BWS + 1] = {
[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
[DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING, },
@@ -532,10 +537,11 @@ static const struct nla_policy devlink_rate_set_nl_policy[DEVLINK_ATTR_RATE_TX_W
[DEVLINK_ATTR_RATE_TX_PRIORITY] = { .type = NLA_U32, },
[DEVLINK_ATTR_RATE_TX_WEIGHT] = { .type = NLA_U32, },
[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_RATE_TC_BWS] = NLA_POLICY_NESTED(devlink_dl_rate_tc_bws_nl_policy),
};
/* DEVLINK_CMD_RATE_NEW - do */
-static const struct nla_policy devlink_rate_new_nl_policy[DEVLINK_ATTR_RATE_TX_WEIGHT + 1] = {
+static const struct nla_policy devlink_rate_new_nl_policy[DEVLINK_ATTR_RATE_TC_BWS + 1] = {
[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
[DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING, },
@@ -544,6 +550,7 @@ static const struct nla_policy devlink_rate_new_nl_policy[DEVLINK_ATTR_RATE_TX_W
[DEVLINK_ATTR_RATE_TX_PRIORITY] = { .type = NLA_U32, },
[DEVLINK_ATTR_RATE_TX_WEIGHT] = { .type = NLA_U32, },
[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_RATE_TC_BWS] = NLA_POLICY_NESTED(devlink_dl_rate_tc_bws_nl_policy),
};
/* DEVLINK_CMD_RATE_DEL - do */
@@ -1191,7 +1198,7 @@ const struct genl_split_ops devlink_nl_ops[74] = {
.doit = devlink_nl_rate_set_doit,
.post_doit = devlink_nl_post_doit,
.policy = devlink_rate_set_nl_policy,
- .maxattr = DEVLINK_ATTR_RATE_TX_WEIGHT,
+ .maxattr = DEVLINK_ATTR_RATE_TC_BWS,
.flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
},
{
@@ -1201,7 +1208,7 @@ const struct genl_split_ops devlink_nl_ops[74] = {
.doit = devlink_nl_rate_new_doit,
.post_doit = devlink_nl_post_doit,
.policy = devlink_rate_new_nl_policy,
- .maxattr = DEVLINK_ATTR_RATE_TX_WEIGHT,
+ .maxattr = DEVLINK_ATTR_RATE_TC_BWS,
.flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
},
{
diff --git a/net/devlink/netlink_gen.h b/net/devlink/netlink_gen.h
index 8f2bd50ddf5e..09cc6f264ccf 100644
--- a/net/devlink/netlink_gen.h
+++ b/net/devlink/netlink_gen.h
@@ -13,6 +13,7 @@
/* Common nested types */
extern const struct nla_policy devlink_dl_port_function_nl_policy[DEVLINK_PORT_FN_ATTR_CAPS + 1];
+extern const struct nla_policy devlink_dl_rate_tc_bws_nl_policy[DEVLINK_RATE_TC_ATTR_BW + 1];
extern const struct nla_policy devlink_dl_selftest_id_nl_policy[DEVLINK_ATTR_SELFTEST_ID_FLASH + 1];
/* Ops table for devlink */
diff --git a/net/devlink/param.c b/net/devlink/param.c
index b29abf8d3ed4..41dcc86cfd94 100644
--- a/net/devlink/param.c
+++ b/net/devlink/param.c
@@ -92,6 +92,16 @@ static const struct devlink_param devlink_param_generic[] = {
.name = DEVLINK_PARAM_GENERIC_EVENT_EQ_SIZE_NAME,
.type = DEVLINK_PARAM_GENERIC_EVENT_EQ_SIZE_TYPE,
},
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_PHC,
+ .name = DEVLINK_PARAM_GENERIC_ENABLE_PHC_NAME,
+ .type = DEVLINK_PARAM_GENERIC_ENABLE_PHC_TYPE,
+ },
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_CLOCK_ID,
+ .name = DEVLINK_PARAM_GENERIC_CLOCK_ID_NAME,
+ .type = DEVLINK_PARAM_GENERIC_CLOCK_ID_TYPE,
+ },
};
static int devlink_param_generic_verify(const struct devlink_param *param)
@@ -195,6 +205,11 @@ devlink_nl_param_value_fill_one(struct sk_buff *msg,
if (nla_put_u32(msg, DEVLINK_ATTR_PARAM_VALUE_DATA, val.vu32))
goto value_nest_cancel;
break;
+ case DEVLINK_PARAM_TYPE_U64:
+ if (devlink_nl_put_u64(msg, DEVLINK_ATTR_PARAM_VALUE_DATA,
+ val.vu64))
+ goto value_nest_cancel;
+ break;
case DEVLINK_PARAM_TYPE_STRING:
if (nla_put_string(msg, DEVLINK_ATTR_PARAM_VALUE_DATA,
val.vstr))
@@ -429,6 +444,11 @@ devlink_param_value_get_from_info(const struct devlink_param *param,
return -EINVAL;
value->vu32 = nla_get_u32(param_data);
break;
+ case DEVLINK_PARAM_TYPE_U64:
+ if (nla_len(param_data) != sizeof(u64))
+ return -EINVAL;
+ value->vu64 = nla_get_u64(param_data);
+ break;
case DEVLINK_PARAM_TYPE_STRING:
len = strnlen(nla_data(param_data), nla_len(param_data));
if (len == nla_len(param_data) ||
diff --git a/net/devlink/rate.c b/net/devlink/rate.c
index 8828ffaf6cbc..110b3fa8a0b1 100644
--- a/net/devlink/rate.c
+++ b/net/devlink/rate.c
@@ -80,6 +80,29 @@ devlink_rate_get_from_info(struct devlink *devlink, struct genl_info *info)
return ERR_PTR(-EINVAL);
}
+static int devlink_rate_put_tc_bws(struct sk_buff *msg, u32 *tc_bw)
+{
+ struct nlattr *nla_tc_bw;
+ int i;
+
+ for (i = 0; i < DEVLINK_RATE_TCS_MAX; i++) {
+ nla_tc_bw = nla_nest_start(msg, DEVLINK_ATTR_RATE_TC_BWS);
+ if (!nla_tc_bw)
+ return -EMSGSIZE;
+
+ if (nla_put_u8(msg, DEVLINK_RATE_TC_ATTR_INDEX, i) ||
+ nla_put_u32(msg, DEVLINK_RATE_TC_ATTR_BW, tc_bw[i]))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, nla_tc_bw);
+ }
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, nla_tc_bw);
+ return -EMSGSIZE;
+}
+
static int devlink_nl_rate_fill(struct sk_buff *msg,
struct devlink_rate *devlink_rate,
enum devlink_command cmd, u32 portid, u32 seq,
@@ -129,6 +152,9 @@ static int devlink_nl_rate_fill(struct sk_buff *msg,
devlink_rate->parent->name))
goto nla_put_failure;
+ if (devlink_rate_put_tc_bws(msg, devlink_rate->tc_bw))
+ goto nla_put_failure;
+
genlmsg_end(msg, hdr);
return 0;
@@ -316,6 +342,87 @@ devlink_nl_rate_parent_node_set(struct devlink_rate *devlink_rate,
return 0;
}
+static int devlink_nl_rate_tc_bw_parse(struct nlattr *parent_nest, u32 *tc_bw,
+ unsigned long *bitmap,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[DEVLINK_RATE_TC_ATTR_MAX + 1];
+ u8 tc_index;
+ int err;
+
+ err = nla_parse_nested(tb, DEVLINK_RATE_TC_ATTR_MAX, parent_nest,
+ devlink_dl_rate_tc_bws_nl_policy, extack);
+ if (err)
+ return err;
+
+ if (!tb[DEVLINK_RATE_TC_ATTR_INDEX]) {
+ NL_SET_ERR_ATTR_MISS(extack, parent_nest,
+ DEVLINK_RATE_TC_ATTR_INDEX);
+ return -EINVAL;
+ }
+
+ tc_index = nla_get_u8(tb[DEVLINK_RATE_TC_ATTR_INDEX]);
+
+ if (!tb[DEVLINK_RATE_TC_ATTR_BW]) {
+ NL_SET_ERR_ATTR_MISS(extack, parent_nest,
+ DEVLINK_RATE_TC_ATTR_BW);
+ return -EINVAL;
+ }
+
+ if (test_and_set_bit(tc_index, bitmap)) {
+ NL_SET_ERR_MSG_FMT(extack,
+ "Duplicate traffic class index specified (%u)",
+ tc_index);
+ return -EINVAL;
+ }
+
+ tc_bw[tc_index] = nla_get_u32(tb[DEVLINK_RATE_TC_ATTR_BW]);
+
+ return 0;
+}
+
+static int devlink_nl_rate_tc_bw_set(struct devlink_rate *devlink_rate,
+ struct genl_info *info)
+{
+ DECLARE_BITMAP(bitmap, DEVLINK_RATE_TCS_MAX) = {};
+ struct devlink *devlink = devlink_rate->devlink;
+ const struct devlink_ops *ops = devlink->ops;
+ u32 tc_bw[DEVLINK_RATE_TCS_MAX] = {};
+ int rem, err = -EOPNOTSUPP, i;
+ struct nlattr *attr;
+
+ nlmsg_for_each_attr_type(attr, DEVLINK_ATTR_RATE_TC_BWS, info->nlhdr,
+ GENL_HDRLEN, rem) {
+ err = devlink_nl_rate_tc_bw_parse(attr, tc_bw, bitmap,
+ info->extack);
+ if (err)
+ return err;
+ }
+
+ for (i = 0; i < DEVLINK_RATE_TCS_MAX; i++) {
+ if (!test_bit(i, bitmap)) {
+ NL_SET_ERR_MSG_FMT(info->extack,
+ "Bandwidth values must be specified for all %u traffic classes",
+ DEVLINK_RATE_TCS_MAX);
+ return -EINVAL;
+ }
+ }
+
+ if (devlink_rate_is_leaf(devlink_rate))
+ err = ops->rate_leaf_tc_bw_set(devlink_rate, devlink_rate->priv,
+ tc_bw, info->extack);
+ else if (devlink_rate_is_node(devlink_rate))
+ err = ops->rate_node_tc_bw_set(devlink_rate, devlink_rate->priv,
+ tc_bw, info->extack);
+
+ if (err)
+ return err;
+
+ memcpy(devlink_rate->tc_bw, tc_bw, sizeof(tc_bw));
+
+ return 0;
+}
+
static int devlink_nl_rate_set(struct devlink_rate *devlink_rate,
const struct devlink_ops *ops,
struct genl_info *info)
@@ -388,6 +495,12 @@ static int devlink_nl_rate_set(struct devlink_rate *devlink_rate,
return err;
}
+ if (attrs[DEVLINK_ATTR_RATE_TC_BWS]) {
+ err = devlink_nl_rate_tc_bw_set(devlink_rate, info);
+ if (err)
+ return err;
+ }
+
return 0;
}
@@ -423,6 +536,13 @@ static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops,
"TX weight set isn't supported for the leafs");
return false;
}
+ if (attrs[DEVLINK_ATTR_RATE_TC_BWS] &&
+ !ops->rate_leaf_tc_bw_set) {
+ NL_SET_ERR_MSG_ATTR(info->extack,
+ attrs[DEVLINK_ATTR_RATE_TC_BWS],
+ "TC bandwidth set isn't supported for the leafs");
+ return false;
+ }
} else if (type == DEVLINK_RATE_TYPE_NODE) {
if (attrs[DEVLINK_ATTR_RATE_TX_SHARE] && !ops->rate_node_tx_share_set) {
NL_SET_ERR_MSG(info->extack, "TX share set isn't supported for the nodes");
@@ -449,6 +569,13 @@ static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops,
"TX weight set isn't supported for the nodes");
return false;
}
+ if (attrs[DEVLINK_ATTR_RATE_TC_BWS] &&
+ !ops->rate_node_tc_bw_set) {
+ NL_SET_ERR_MSG_ATTR(info->extack,
+ attrs[DEVLINK_ATTR_RATE_TC_BWS],
+ "TC bandwidth set isn't supported for the nodes");
+ return false;
+ }
} else {
WARN(1, "Unknown type of rate object");
return false;
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 2dfe9063613f..869cbe57162f 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -42,12 +42,24 @@ config NET_DSA_TAG_BRCM
Broadcom switches which place the tag after the MAC source address.
config NET_DSA_TAG_BRCM_LEGACY
- tristate "Tag driver for Broadcom legacy switches using in-frame headers"
+ tristate "Tag driver for BCM63xx legacy switches using in-frame headers"
select NET_DSA_TAG_BRCM_COMMON
help
Say Y if you want to enable support for tagging frames for the
- Broadcom legacy switches which place the tag after the MAC source
+ BCM63xx legacy switches which place the tag after the MAC source
address.
+ This tag is used in BCM63xx legacy switches which work without the
+ original FCS and length before the tag insertion.
+
+config NET_DSA_TAG_BRCM_LEGACY_FCS
+ tristate "Tag driver for BCM53xx legacy switches using in-frame headers"
+ select NET_DSA_TAG_BRCM_COMMON
+ help
+ Say Y if you want to enable support for tagging frames for the
+ BCM53xx legacy switches which place the tag after the MAC source
+ address.
+ This tag is used in BCM53xx legacy switches which expect original
+ FCS and length before the tag insertion to be present.
config NET_DSA_TAG_BRCM_PREPEND
tristate "Tag driver for Broadcom switches using prepended headers"
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 436a7e1b412a..5b01a0e43ebe 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -1621,7 +1621,7 @@ void dsa_switch_shutdown(struct dsa_switch *ds)
dsa_switch_for_each_cpu_port(dp, ds)
list_add(&dp->conduit->close_list, &close_list);
- dev_close_many(&close_list, true);
+ netif_close_many(&close_list, true);
dsa_switch_for_each_user_port(dp, ds) {
conduit = dsa_port_to_conduit(dp);
@@ -1829,3 +1829,4 @@ MODULE_AUTHOR("Lennert Buytenhek <buytenh@wantstofly.org>");
MODULE_DESCRIPTION("Driver for Distributed Switch Architecture switch chips");
MODULE_LICENSE("GPL");
MODULE_ALIAS("platform:dsa");
+MODULE_IMPORT_NS("NETDEV_INTERNAL");
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index fe75821623a4..26bb657ceac3 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -15,6 +15,7 @@
#define BRCM_NAME "brcm"
#define BRCM_LEGACY_NAME "brcm-legacy"
+#define BRCM_LEGACY_FCS_NAME "brcm-legacy-fcs"
#define BRCM_PREPEND_NAME "brcm-prepend"
/* Legacy Broadcom tag (6 bytes) */
@@ -32,6 +33,10 @@
#define BRCM_LEG_MULTICAST (1 << 5)
#define BRCM_LEG_EGRESS (2 << 5)
#define BRCM_LEG_INGRESS (3 << 5)
+#define BRCM_LEG_LEN_HI(x) (((x) >> 8) & 0x7)
+
+/* 4th byte in the tag */
+#define BRCM_LEG_LEN_LO(x) ((x) & 0xff)
/* 6th byte in the tag */
#define BRCM_LEG_PORT_ID (0xf)
@@ -212,6 +217,41 @@ DSA_TAG_DRIVER(brcm_netdev_ops);
MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_BRCM, BRCM_NAME);
#endif
+#if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM_LEGACY) || \
+ IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM_LEGACY_FCS)
+static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ int len = BRCM_LEG_TAG_LEN;
+ int source_port;
+ u8 *brcm_tag;
+
+ if (unlikely(!pskb_may_pull(skb, BRCM_LEG_TAG_LEN + VLAN_HLEN)))
+ return NULL;
+
+ brcm_tag = dsa_etype_header_pos_rx(skb);
+
+ source_port = brcm_tag[5] & BRCM_LEG_PORT_ID;
+
+ skb->dev = dsa_conduit_find_user(dev, 0, source_port);
+ if (!skb->dev)
+ return NULL;
+
+ /* VLAN tag is added by BCM63xx internal switch */
+ if (netdev_uses_dsa(skb->dev))
+ len += VLAN_HLEN;
+
+ /* Remove Broadcom tag and update checksum */
+ skb_pull_rcsum(skb, len);
+
+ dsa_default_offload_fwd_mark(skb);
+
+ dsa_strip_etype_header(skb, len);
+
+ return skb;
+}
+#endif /* CONFIG_NET_DSA_TAG_BRCM_LEGACY || CONFIG_NET_DSA_TAG_BRCM_LEGACY_FCS */
+
#if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM_LEGACY)
static struct sk_buff *brcm_leg_tag_xmit(struct sk_buff *skb,
struct net_device *dev)
@@ -250,49 +290,77 @@ static struct sk_buff *brcm_leg_tag_xmit(struct sk_buff *skb,
return skb;
}
-static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb,
- struct net_device *dev)
+static const struct dsa_device_ops brcm_legacy_netdev_ops = {
+ .name = BRCM_LEGACY_NAME,
+ .proto = DSA_TAG_PROTO_BRCM_LEGACY,
+ .xmit = brcm_leg_tag_xmit,
+ .rcv = brcm_leg_tag_rcv,
+ .needed_headroom = BRCM_LEG_TAG_LEN,
+};
+
+DSA_TAG_DRIVER(brcm_legacy_netdev_ops);
+MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_BRCM_LEGACY, BRCM_LEGACY_NAME);
+#endif /* CONFIG_NET_DSA_TAG_BRCM_LEGACY */
+
+#if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM_LEGACY_FCS)
+static struct sk_buff *brcm_leg_fcs_tag_xmit(struct sk_buff *skb,
+ struct net_device *dev)
{
- int len = BRCM_LEG_TAG_LEN;
- int source_port;
+ struct dsa_port *dp = dsa_user_to_port(dev);
+ unsigned int fcs_len;
+ __le32 fcs_val;
u8 *brcm_tag;
- if (unlikely(!pskb_may_pull(skb, BRCM_LEG_TAG_LEN + VLAN_HLEN)))
+ /* The Ethernet switch we are interfaced with needs packets to be at
+ * least 64 bytes (including FCS) otherwise they will be discarded when
+ * they enter the switch port logic. When Broadcom tags are enabled, we
+ * need to make sure that packets are at least 70 bytes (including FCS
+ * and tag) because the length verification is done after the Broadcom
+ * tag is stripped off the ingress packet.
+ *
+ * Let dsa_user_xmit() free the SKB.
+ */
+ if (__skb_put_padto(skb, ETH_ZLEN + BRCM_LEG_TAG_LEN, false))
return NULL;
- brcm_tag = dsa_etype_header_pos_rx(skb);
+ fcs_len = skb->len;
+ fcs_val = cpu_to_le32(crc32_le(~0, skb->data, fcs_len) ^ ~0);
- source_port = brcm_tag[5] & BRCM_LEG_PORT_ID;
+ skb_push(skb, BRCM_LEG_TAG_LEN);
- skb->dev = dsa_conduit_find_user(dev, 0, source_port);
- if (!skb->dev)
- return NULL;
+ dsa_alloc_etype_header(skb, BRCM_LEG_TAG_LEN);
- /* VLAN tag is added by BCM63xx internal switch */
- if (netdev_uses_dsa(skb->dev))
- len += VLAN_HLEN;
+ brcm_tag = skb->data + 2 * ETH_ALEN;
- /* Remove Broadcom tag and update checksum */
- skb_pull_rcsum(skb, len);
+ /* Broadcom tag type */
+ brcm_tag[0] = BRCM_LEG_TYPE_HI;
+ brcm_tag[1] = BRCM_LEG_TYPE_LO;
- dsa_default_offload_fwd_mark(skb);
+ /* Broadcom tag value */
+ brcm_tag[2] = BRCM_LEG_EGRESS | BRCM_LEG_LEN_HI(fcs_len);
+ brcm_tag[3] = BRCM_LEG_LEN_LO(fcs_len);
+ brcm_tag[4] = 0;
+ brcm_tag[5] = dp->index & BRCM_LEG_PORT_ID;
- dsa_strip_etype_header(skb, len);
+ /* Original FCS value */
+ if (__skb_pad(skb, ETH_FCS_LEN, false))
+ return NULL;
+ skb_put_data(skb, &fcs_val, ETH_FCS_LEN);
return skb;
}
-static const struct dsa_device_ops brcm_legacy_netdev_ops = {
- .name = BRCM_LEGACY_NAME,
- .proto = DSA_TAG_PROTO_BRCM_LEGACY,
- .xmit = brcm_leg_tag_xmit,
+static const struct dsa_device_ops brcm_legacy_fcs_netdev_ops = {
+ .name = BRCM_LEGACY_FCS_NAME,
+ .proto = DSA_TAG_PROTO_BRCM_LEGACY_FCS,
+ .xmit = brcm_leg_fcs_tag_xmit,
.rcv = brcm_leg_tag_rcv,
.needed_headroom = BRCM_LEG_TAG_LEN,
};
-DSA_TAG_DRIVER(brcm_legacy_netdev_ops);
-MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_BRCM_LEGACY, BRCM_LEGACY_NAME);
-#endif /* CONFIG_NET_DSA_TAG_BRCM_LEGACY */
+DSA_TAG_DRIVER(brcm_legacy_fcs_netdev_ops);
+MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_BRCM_LEGACY_FCS, BRCM_LEGACY_FCS_NAME);
+#endif /* CONFIG_NET_DSA_TAG_BRCM_LEGACY_FCS */
#if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM_PREPEND)
static struct sk_buff *brcm_tag_xmit_prepend(struct sk_buff *skb,
@@ -328,6 +396,9 @@ static struct dsa_tag_driver *dsa_tag_driver_array[] = {
#if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM_LEGACY)
&DSA_TAG_DRIVER_NAME(brcm_legacy_netdev_ops),
#endif
+#if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM_LEGACY_FCS)
+ &DSA_TAG_DRIVER_NAME(brcm_legacy_fcs_netdev_ops),
+#endif
#if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM_PREPEND)
&DSA_TAG_DRIVER_NAME(brcm_prepend_netdev_ops),
#endif
diff --git a/net/dsa/user.c b/net/dsa/user.c
index e9334520c54a..f59d66f0975d 100644
--- a/net/dsa/user.c
+++ b/net/dsa/user.c
@@ -3604,7 +3604,7 @@ static int dsa_user_netdevice_event(struct notifier_block *nb,
list_add(&dp->user->close_list, &close_list);
}
- dev_close_many(&close_list, true);
+ netif_close_many(&close_list, true);
return NOTIFY_OK;
}
diff --git a/net/ethtool/common.c b/net/ethtool/common.c
index eb253e0fd61b..4f58648a27ad 100644
--- a/net/ethtool/common.c
+++ b/net/ethtool/common.c
@@ -707,7 +707,9 @@ static u32 ethtool_get_max_rxfh_channel(struct net_device *dev)
if (!rxfh.indir)
return U32_MAX;
+ mutex_lock(&dev->ethtool->rss_lock);
ret = dev->ethtool_ops->get_rxfh(dev, &rxfh);
+ mutex_unlock(&dev->ethtool->rss_lock);
if (ret) {
current_max = U32_MAX;
goto out_free;
@@ -804,12 +806,67 @@ out_free:
return rc;
}
+struct ethtool_rxfh_context *
+ethtool_rxfh_ctx_alloc(const struct ethtool_ops *ops,
+ u32 indir_size, u32 key_size)
+{
+ size_t indir_bytes, flex_len, key_off, size;
+ struct ethtool_rxfh_context *ctx;
+ u32 priv_bytes, indir_max;
+ u16 key_max;
+
+ key_max = max(key_size, ops->rxfh_key_space);
+ indir_max = max(indir_size, ops->rxfh_indir_space);
+
+ priv_bytes = ALIGN(ops->rxfh_priv_size, sizeof(u32));
+ indir_bytes = array_size(indir_max, sizeof(u32));
+
+ key_off = size_add(priv_bytes, indir_bytes);
+ flex_len = size_add(key_off, key_max);
+ size = struct_size_t(struct ethtool_rxfh_context, data, flex_len);
+
+ ctx = kzalloc(size, GFP_KERNEL_ACCOUNT);
+ if (!ctx)
+ return NULL;
+
+ ctx->indir_size = indir_size;
+ ctx->key_size = key_size;
+ ctx->key_off = key_off;
+ ctx->priv_size = ops->rxfh_priv_size;
+
+ ctx->hfunc = ETH_RSS_HASH_NO_CHANGE;
+ ctx->input_xfrm = RXH_XFRM_NO_CHANGE;
+
+ return ctx;
+}
+
+/* Check if fields configured for flow hash are symmetric - if src is included
+ * so is dst and vice versa.
+ */
+int ethtool_rxfh_config_is_sym(u64 rxfh)
+{
+ bool sym;
+
+ sym = rxfh == (rxfh & (RXH_IP_SRC | RXH_IP_DST |
+ RXH_L4_B_0_1 | RXH_L4_B_2_3));
+ sym &= !!(rxfh & RXH_IP_SRC) == !!(rxfh & RXH_IP_DST);
+ sym &= !!(rxfh & RXH_L4_B_0_1) == !!(rxfh & RXH_L4_B_2_3);
+
+ return sym;
+}
+
int ethtool_check_ops(const struct ethtool_ops *ops)
{
if (WARN_ON(ops->set_coalesce && !ops->supported_coalesce_params))
return -EINVAL;
if (WARN_ON(ops->rxfh_max_num_contexts == 1))
return -EINVAL;
+ if (WARN_ON(ops->supported_input_xfrm && !ops->get_rxfh_fields))
+ return -EINVAL;
+ if (WARN_ON(ops->supported_input_xfrm &&
+ ops->rxfh_per_ctx_fields != ops->rxfh_per_ctx_key))
+ return -EINVAL;
+
/* NOTE: sufficiently insane drivers may swap ethtool_ops at runtime,
* the fact that ops are checked at registration time does not
* mean the ops attached to a netdev later on are sane.
@@ -1079,5 +1136,6 @@ void ethtool_rxfh_context_lost(struct net_device *dev, u32 context_id)
netdev_err(dev, "device error, RSS context %d lost\n", context_id);
ctx = xa_erase(&dev->ethtool->rss_ctx, context_id);
kfree(ctx);
+ ethtool_rss_notify(dev, ETHTOOL_MSG_RSS_DELETE_NTF, context_id);
}
EXPORT_SYMBOL(ethtool_rxfh_context_lost);
diff --git a/net/ethtool/common.h b/net/ethtool/common.h
index b4683d286a5a..c4d084dde5bf 100644
--- a/net/ethtool/common.h
+++ b/net/ethtool/common.h
@@ -43,7 +43,11 @@ bool convert_legacy_settings_to_link_ksettings(
int ethtool_check_max_channel(struct net_device *dev,
struct ethtool_channels channels,
struct genl_info *info);
+struct ethtool_rxfh_context *
+ethtool_rxfh_ctx_alloc(const struct ethtool_ops *ops,
+ u32 indir_size, u32 key_size);
int ethtool_check_rss_ctx_busy(struct net_device *dev, u32 rss_context);
+int ethtool_rxfh_config_is_sym(u64 rxfh);
void ethtool_ringparam_get_cfg(struct net_device *dev,
struct ethtool_ringparam *param,
@@ -74,4 +78,13 @@ int ethtool_get_module_eeprom_call(struct net_device *dev,
bool __ethtool_dev_mm_supported(struct net_device *dev);
+#if IS_ENABLED(CONFIG_ETHTOOL_NETLINK)
+void ethtool_rss_notify(struct net_device *dev, u32 type, u32 rss_context);
+#else
+static inline void
+ethtool_rss_notify(struct net_device *dev, u32 type, u32 rss_context)
+{
+}
+#endif
+
#endif /* _ETHTOOL_COMMON_H */
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index 39ec920f5de7..43a7854e784e 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -617,8 +617,8 @@ static int ethtool_set_link_ksettings(struct net_device *dev,
err = dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings);
if (err >= 0) {
- ethtool_notify(dev, ETHTOOL_MSG_LINKINFO_NTF, NULL);
- ethtool_notify(dev, ETHTOOL_MSG_LINKMODES_NTF, NULL);
+ ethtool_notify(dev, ETHTOOL_MSG_LINKINFO_NTF);
+ ethtool_notify(dev, ETHTOOL_MSG_LINKMODES_NTF);
}
return err;
}
@@ -708,8 +708,8 @@ static int ethtool_set_settings(struct net_device *dev, void __user *useraddr)
__ETHTOOL_LINK_MODE_MASK_NU32;
ret = dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings);
if (ret >= 0) {
- ethtool_notify(dev, ETHTOOL_MSG_LINKINFO_NTF, NULL);
- ethtool_notify(dev, ETHTOOL_MSG_LINKMODES_NTF, NULL);
+ ethtool_notify(dev, ETHTOOL_MSG_LINKINFO_NTF);
+ ethtool_notify(dev, ETHTOOL_MSG_LINKMODES_NTF);
}
return ret;
}
@@ -981,6 +981,7 @@ static int ethtool_rxnfc_copy_to_user(void __user *useraddr,
static bool flow_type_hashable(u32 flow_type)
{
switch (flow_type) {
+ case ETHER_FLOW:
case TCP_V4_FLOW:
case UDP_V4_FLOW:
case SCTP_V4_FLOW:
@@ -1026,9 +1027,7 @@ static int ethtool_check_xfrm_rxfh(u32 input_xfrm, u64 rxfh)
*/
if ((input_xfrm != RXH_XFRM_NO_CHANGE &&
input_xfrm & (RXH_XFRM_SYM_XOR | RXH_XFRM_SYM_OR_XOR)) &&
- ((rxfh & ~(RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 | RXH_L4_B_2_3)) ||
- (!!(rxfh & RXH_IP_SRC) ^ !!(rxfh & RXH_IP_DST)) ||
- (!!(rxfh & RXH_L4_B_0_1) ^ !!(rxfh & RXH_L4_B_2_3))))
+ !ethtool_rxfh_config_is_sym(rxfh))
return -EINVAL;
return 0;
@@ -1037,22 +1036,24 @@ static int ethtool_check_xfrm_rxfh(u32 input_xfrm, u64 rxfh)
static int ethtool_check_flow_types(struct net_device *dev, u32 input_xfrm)
{
const struct ethtool_ops *ops = dev->ethtool_ops;
- struct ethtool_rxnfc info = {
- .cmd = ETHTOOL_GRXFH,
- };
int err;
u32 i;
+ if (!input_xfrm || input_xfrm == RXH_XFRM_NO_CHANGE)
+ return 0;
+
for (i = 0; i < __FLOW_TYPE_COUNT; i++) {
+ struct ethtool_rxfh_fields fields = {
+ .flow_type = i,
+ };
+
if (!flow_type_hashable(i))
continue;
- info.flow_type = i;
- err = ops->get_rxnfc(dev, &info, NULL);
- if (err)
+ if (ops->get_rxfh_fields(dev, &fields))
continue;
- err = ethtool_check_xfrm_rxfh(input_xfrm, info.data);
+ err = ethtool_check_xfrm_rxfh(input_xfrm, fields.data);
if (err)
return err;
}
@@ -1060,6 +1061,89 @@ static int ethtool_check_flow_types(struct net_device *dev, u32 input_xfrm)
return 0;
}
+static noinline_for_stack int
+ethtool_set_rxfh_fields(struct net_device *dev, u32 cmd, void __user *useraddr)
+{
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ struct ethtool_rxfh_fields fields = {};
+ struct ethtool_rxnfc info;
+ size_t info_size = sizeof(info);
+ int rc;
+
+ if (!ops->set_rxfh_fields)
+ return -EOPNOTSUPP;
+
+ rc = ethtool_rxnfc_copy_struct(cmd, &info, &info_size, useraddr);
+ if (rc)
+ return rc;
+
+ if (info.flow_type & FLOW_RSS && info.rss_context &&
+ !ops->rxfh_per_ctx_fields)
+ return -EINVAL;
+
+ mutex_lock(&dev->ethtool->rss_lock);
+ if (ops->get_rxfh) {
+ struct ethtool_rxfh_param rxfh = {};
+
+ rc = ops->get_rxfh(dev, &rxfh);
+ if (rc)
+ goto exit_unlock;
+
+ rc = ethtool_check_xfrm_rxfh(rxfh.input_xfrm, info.data);
+ if (rc)
+ goto exit_unlock;
+ }
+
+ fields.data = info.data;
+ fields.flow_type = info.flow_type & ~FLOW_RSS;
+ if (info.flow_type & FLOW_RSS)
+ fields.rss_context = info.rss_context;
+
+ rc = ops->set_rxfh_fields(dev, &fields, NULL);
+exit_unlock:
+ mutex_unlock(&dev->ethtool->rss_lock);
+ if (rc)
+ return rc;
+
+ ethtool_rss_notify(dev, ETHTOOL_MSG_RSS_NTF, fields.rss_context);
+ return 0;
+}
+
+static noinline_for_stack int
+ethtool_get_rxfh_fields(struct net_device *dev, u32 cmd, void __user *useraddr)
+{
+ struct ethtool_rxnfc info;
+ size_t info_size = sizeof(info);
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ struct ethtool_rxfh_fields fields = {};
+ int ret;
+
+ if (!ops->get_rxfh_fields)
+ return -EOPNOTSUPP;
+
+ ret = ethtool_rxnfc_copy_struct(cmd, &info, &info_size, useraddr);
+ if (ret)
+ return ret;
+
+ if (info.flow_type & FLOW_RSS && info.rss_context &&
+ !ops->rxfh_per_ctx_fields)
+ return -EINVAL;
+
+ fields.flow_type = info.flow_type & ~FLOW_RSS;
+ if (info.flow_type & FLOW_RSS)
+ fields.rss_context = info.rss_context;
+
+ mutex_lock(&dev->ethtool->rss_lock);
+ ret = ops->get_rxfh_fields(dev, &fields);
+ mutex_unlock(&dev->ethtool->rss_lock);
+ if (ret < 0)
+ return ret;
+
+ info.data = fields.data;
+
+ return ethtool_rxnfc_copy_to_user(useraddr, &info, info_size, NULL);
+}
+
static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
u32 cmd, void __user *useraddr)
{
@@ -1083,22 +1167,11 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
ethtool_get_flow_spec_ring(info.fs.ring_cookie))
return -EINVAL;
- if (!xa_load(&dev->ethtool->rss_ctx, info.rss_context))
+ if (info.rss_context &&
+ !xa_load(&dev->ethtool->rss_ctx, info.rss_context))
return -EINVAL;
}
- if (cmd == ETHTOOL_SRXFH && ops->get_rxfh) {
- struct ethtool_rxfh_param rxfh = {};
-
- rc = ops->get_rxfh(dev, &rxfh);
- if (rc)
- return rc;
-
- rc = ethtool_check_xfrm_rxfh(rxfh.input_xfrm, info.data);
- if (rc)
- return rc;
- }
-
rc = ops->set_rxnfc(dev, &info);
if (rc)
return rc;
@@ -1208,7 +1281,9 @@ static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
if (!rxfh.indir)
return -ENOMEM;
+ mutex_lock(&dev->ethtool->rss_lock);
ret = dev->ethtool_ops->get_rxfh(dev, &rxfh);
+ mutex_unlock(&dev->ethtool->rss_lock);
if (ret)
goto out;
if (copy_to_user(useraddr +
@@ -1273,9 +1348,11 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
}
rxfh_dev.hfunc = ETH_RSS_HASH_NO_CHANGE;
+
+ mutex_lock(&dev->ethtool->rss_lock);
ret = ops->set_rxfh(dev, &rxfh_dev, extack);
if (ret)
- goto out;
+ goto out_unlock;
/* indicate whether rxfh was set to default */
if (user_size == 0)
@@ -1283,6 +1360,8 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
else
dev->priv_flags |= IFF_RXFH_CONFIGURED;
+out_unlock:
+ mutex_unlock(&dev->ethtool->rss_lock);
out:
kfree(rxfh_dev.indir);
return ret;
@@ -1318,8 +1397,7 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd32)
return -EINVAL;
/* Most drivers don't handle rss_context, check it's 0 as well */
- if (rxfh.rss_context && !(ops->cap_rss_ctx_supported ||
- ops->create_rxfh_context))
+ if (rxfh.rss_context && !ops->create_rxfh_context)
return -EOPNOTSUPP;
rxfh.indir_size = rxfh_dev.indir_size;
@@ -1343,6 +1421,7 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
if (user_key_size)
rxfh_dev.key = rss_config + indir_bytes;
+ mutex_lock(&dev->ethtool->rss_lock);
if (rxfh.rss_context) {
ctx = xa_load(&dev->ethtool->rss_ctx, rxfh.rss_context);
if (!ctx) {
@@ -1388,45 +1467,12 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
ret = -EFAULT;
}
out:
+ mutex_unlock(&dev->ethtool->rss_lock);
kfree(rss_config);
return ret;
}
-static struct ethtool_rxfh_context *
-ethtool_rxfh_ctx_alloc(const struct ethtool_ops *ops,
- u32 indir_size, u32 key_size)
-{
- size_t indir_bytes, flex_len, key_off, size;
- struct ethtool_rxfh_context *ctx;
- u32 priv_bytes, indir_max;
- u16 key_max;
-
- key_max = max(key_size, ops->rxfh_key_space);
- indir_max = max(indir_size, ops->rxfh_indir_space);
-
- priv_bytes = ALIGN(ops->rxfh_priv_size, sizeof(u32));
- indir_bytes = array_size(indir_max, sizeof(u32));
-
- key_off = size_add(priv_bytes, indir_bytes);
- flex_len = size_add(key_off, key_max);
- size = struct_size_t(struct ethtool_rxfh_context, data, flex_len);
-
- ctx = kzalloc(size, GFP_KERNEL_ACCOUNT);
- if (!ctx)
- return NULL;
-
- ctx->indir_size = indir_size;
- ctx->key_size = key_size;
- ctx->key_off = key_off;
- ctx->priv_size = ops->rxfh_priv_size;
-
- ctx->hfunc = ETH_RSS_HASH_NO_CHANGE;
- ctx->input_xfrm = RXH_XFRM_NO_CHANGE;
-
- return ctx;
-}
-
static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
void __user *useraddr)
{
@@ -1439,9 +1485,9 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
struct netlink_ext_ack *extack = NULL;
struct ethtool_rxnfc rx_rings;
struct ethtool_rxfh rxfh;
- bool locked = false; /* dev->ethtool->rss_lock taken */
bool create = false;
u8 *rss_config;
+ int ntf = 0;
int ret;
if (!ops->get_rxnfc || !ops->set_rxfh)
@@ -1459,8 +1505,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd32)
return -EINVAL;
/* Most drivers don't handle rss_context, check it's 0 as well */
- if (rxfh.rss_context && !(ops->cap_rss_ctx_supported ||
- ops->create_rxfh_context))
+ if (rxfh.rss_context && !ops->create_rxfh_context)
return -EOPNOTSUPP;
/* Check input data transformation capabilities */
if (rxfh.input_xfrm && rxfh.input_xfrm != RXH_XFRM_SYM_XOR &&
@@ -1488,10 +1533,6 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
rxfh.input_xfrm == RXH_XFRM_NO_CHANGE))
return -EINVAL;
- ret = ethtool_check_flow_types(dev, rxfh.input_xfrm);
- if (ret)
- return ret;
-
indir_bytes = dev_indir_size * sizeof(rxfh_dev.indir[0]);
/* Check settings which may be global rather than per RSS-context */
@@ -1508,7 +1549,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
rx_rings.cmd = ETHTOOL_GRXRINGS;
ret = ops->get_rxnfc(dev, &rx_rings, NULL);
if (ret)
- goto out;
+ goto out_free;
/* rxfh.indir_size == 0 means reset the indir table to default (master
* context) or delete the context (other RSS contexts).
@@ -1524,7 +1565,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
&rx_rings,
rxfh.indir_size);
if (ret)
- goto out;
+ goto out_free;
} else if (rxfh.indir_size == 0) {
if (rxfh.rss_context == 0) {
u32 *indir;
@@ -1546,86 +1587,81 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
useraddr + rss_cfg_offset + user_indir_len,
rxfh.key_size)) {
ret = -EFAULT;
- goto out;
+ goto out_free;
}
}
- if (rxfh.rss_context) {
- mutex_lock(&dev->ethtool->rss_lock);
- locked = true;
- }
+ mutex_lock(&dev->ethtool->rss_lock);
+
+ ret = ethtool_check_flow_types(dev, rxfh.input_xfrm);
+ if (ret)
+ goto out_unlock;
if (rxfh.rss_context && rxfh_dev.rss_delete) {
ret = ethtool_check_rss_ctx_busy(dev, rxfh.rss_context);
if (ret)
- goto out;
+ goto out_unlock;
}
if (create) {
+ u32 limit, ctx_id;
+
if (rxfh_dev.rss_delete) {
ret = -EINVAL;
- goto out;
+ goto out_unlock;
}
ctx = ethtool_rxfh_ctx_alloc(ops, dev_indir_size, dev_key_size);
if (!ctx) {
ret = -ENOMEM;
- goto out;
+ goto out_unlock;
}
- if (ops->create_rxfh_context) {
- u32 limit = ops->rxfh_max_num_contexts ?: U32_MAX;
- u32 ctx_id;
-
- /* driver uses new API, core allocates ID */
- ret = xa_alloc(&dev->ethtool->rss_ctx, &ctx_id, ctx,
- XA_LIMIT(1, limit - 1),
- GFP_KERNEL_ACCOUNT);
- if (ret < 0) {
- kfree(ctx);
- goto out;
- }
- WARN_ON(!ctx_id); /* can't happen */
- rxfh.rss_context = ctx_id;
+ limit = ops->rxfh_max_num_contexts ?: U32_MAX;
+ ret = xa_alloc(&dev->ethtool->rss_ctx, &ctx_id, ctx,
+ XA_LIMIT(1, limit - 1), GFP_KERNEL_ACCOUNT);
+ if (ret < 0) {
+ kfree(ctx);
+ goto out_unlock;
}
+ WARN_ON(!ctx_id); /* can't happen */
+ rxfh.rss_context = ctx_id;
} else if (rxfh.rss_context) {
ctx = xa_load(&dev->ethtool->rss_ctx, rxfh.rss_context);
if (!ctx) {
ret = -ENOENT;
- goto out;
+ goto out_unlock;
}
}
rxfh_dev.hfunc = rxfh.hfunc;
rxfh_dev.rss_context = rxfh.rss_context;
rxfh_dev.input_xfrm = rxfh.input_xfrm;
- if (rxfh.rss_context && ops->create_rxfh_context) {
- if (create) {
- ret = ops->create_rxfh_context(dev, ctx, &rxfh_dev,
- extack);
- /* Make sure driver populates defaults */
- WARN_ON_ONCE(!ret && !rxfh_dev.key &&
- ops->rxfh_per_ctx_key &&
- !memchr_inv(ethtool_rxfh_context_key(ctx),
- 0, ctx->key_size));
- } else if (rxfh_dev.rss_delete) {
- ret = ops->remove_rxfh_context(dev, ctx,
- rxfh.rss_context,
- extack);
- } else {
- ret = ops->modify_rxfh_context(dev, ctx, &rxfh_dev,
- extack);
- }
- } else {
+ if (!rxfh.rss_context) {
+ ntf = ETHTOOL_MSG_RSS_NTF;
ret = ops->set_rxfh(dev, &rxfh_dev, extack);
+ } else if (create) {
+ ntf = ETHTOOL_MSG_RSS_CREATE_NTF;
+ ret = ops->create_rxfh_context(dev, ctx, &rxfh_dev, extack);
+ /* Make sure driver populates defaults */
+ WARN_ON_ONCE(!ret && !rxfh_dev.key && ops->rxfh_per_ctx_key &&
+ !memchr_inv(ethtool_rxfh_context_key(ctx), 0,
+ ctx->key_size));
+ } else if (rxfh_dev.rss_delete) {
+ ntf = ETHTOOL_MSG_RSS_DELETE_NTF;
+ ret = ops->remove_rxfh_context(dev, ctx, rxfh.rss_context,
+ extack);
+ } else {
+ ntf = ETHTOOL_MSG_RSS_NTF;
+ ret = ops->modify_rxfh_context(dev, ctx, &rxfh_dev, extack);
}
if (ret) {
+ ntf = 0;
if (create) {
/* failed to create, free our new tracking entry */
- if (ops->create_rxfh_context)
- xa_erase(&dev->ethtool->rss_ctx, rxfh.rss_context);
+ xa_erase(&dev->ethtool->rss_ctx, rxfh.rss_context);
kfree(ctx);
}
- goto out;
+ goto out_unlock;
}
if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh, rss_context),
@@ -1640,36 +1676,6 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
dev->priv_flags |= IFF_RXFH_CONFIGURED;
}
/* Update rss_ctx tracking */
- if (create && !ops->create_rxfh_context) {
- /* driver uses old API, it chose context ID */
- if (WARN_ON(xa_load(&dev->ethtool->rss_ctx, rxfh_dev.rss_context))) {
- /* context ID reused, our tracking is screwed */
- kfree(ctx);
- goto out;
- }
- /* Allocate the exact ID the driver gave us */
- if (xa_is_err(xa_store(&dev->ethtool->rss_ctx, rxfh_dev.rss_context,
- ctx, GFP_KERNEL))) {
- kfree(ctx);
- goto out;
- }
-
- /* Fetch the defaults for the old API, in the new API drivers
- * should write defaults into ctx themselves.
- */
- rxfh_dev.indir = (u32 *)rss_config;
- rxfh_dev.indir_size = dev_indir_size;
-
- rxfh_dev.key = rss_config + indir_bytes;
- rxfh_dev.key_size = dev_key_size;
-
- ret = ops->get_rxfh(dev, &rxfh_dev);
- if (WARN_ON(ret)) {
- xa_erase(&dev->ethtool->rss_ctx, rxfh.rss_context);
- kfree(ctx);
- goto out;
- }
- }
if (rxfh_dev.rss_delete) {
WARN_ON(xa_erase(&dev->ethtool->rss_ctx, rxfh.rss_context) != ctx);
kfree(ctx);
@@ -1692,10 +1698,12 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
ctx->input_xfrm = rxfh_dev.input_xfrm;
}
-out:
- if (locked)
- mutex_unlock(&dev->ethtool->rss_lock);
+out_unlock:
+ mutex_unlock(&dev->ethtool->rss_lock);
+out_free:
kfree(rss_config);
+ if (ntf)
+ ethtool_rss_notify(dev, ntf, rxfh.rss_context);
return ret;
}
@@ -1807,7 +1815,7 @@ static int ethtool_set_wol(struct net_device *dev, char __user *useraddr)
return ret;
dev->ethtool->wol_enabled = !!wol.wolopts;
- ethtool_notify(dev, ETHTOOL_MSG_WOL_NTF, NULL);
+ ethtool_notify(dev, ETHTOOL_MSG_WOL_NTF);
return 0;
}
@@ -1883,7 +1891,7 @@ static int ethtool_set_eee(struct net_device *dev, char __user *useraddr)
eee_to_keee(&keee, &eee);
ret = dev->ethtool_ops->set_eee(dev, &keee);
if (!ret)
- ethtool_notify(dev, ETHTOOL_MSG_EEE_NTF, NULL);
+ ethtool_notify(dev, ETHTOOL_MSG_EEE_NTF);
return ret;
}
@@ -2123,7 +2131,7 @@ static noinline_for_stack int ethtool_set_coalesce(struct net_device *dev,
ret = dev->ethtool_ops->set_coalesce(dev, &coalesce, &kernel_coalesce,
NULL);
if (!ret)
- ethtool_notify(dev, ETHTOOL_MSG_COALESCE_NTF, NULL);
+ ethtool_notify(dev, ETHTOOL_MSG_COALESCE_NTF);
return ret;
}
@@ -2167,7 +2175,7 @@ static int ethtool_set_ringparam(struct net_device *dev, void __user *useraddr)
ret = dev->ethtool_ops->set_ringparam(dev, &ringparam,
&kernel_ringparam, NULL);
if (!ret)
- ethtool_notify(dev, ETHTOOL_MSG_RINGS_NTF, NULL);
+ ethtool_notify(dev, ETHTOOL_MSG_RINGS_NTF);
return ret;
}
@@ -2234,7 +2242,7 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev,
ret = dev->ethtool_ops->set_channels(dev, &channels);
if (!ret)
- ethtool_notify(dev, ETHTOOL_MSG_CHANNELS_NTF, NULL);
+ ethtool_notify(dev, ETHTOOL_MSG_CHANNELS_NTF);
return ret;
}
@@ -2265,7 +2273,7 @@ static int ethtool_set_pauseparam(struct net_device *dev, void __user *useraddr)
ret = dev->ethtool_ops->set_pauseparam(dev, &pauseparam);
if (!ret)
- ethtool_notify(dev, ETHTOOL_MSG_PAUSE_NTF, NULL);
+ ethtool_notify(dev, ETHTOOL_MSG_PAUSE_NTF);
return ret;
}
@@ -3267,7 +3275,7 @@ __dev_ethtool(struct net *net, struct ifreq *ifr, void __user *useraddr,
rc = ethtool_set_value_void(dev, useraddr,
dev->ethtool_ops->set_msglevel);
if (!rc)
- ethtool_notify(dev, ETHTOOL_MSG_DEBUG_NTF, NULL);
+ ethtool_notify(dev, ETHTOOL_MSG_DEBUG_NTF);
break;
case ETHTOOL_GEEE:
rc = ethtool_get_eee(dev, useraddr);
@@ -3331,20 +3339,24 @@ __dev_ethtool(struct net *net, struct ifreq *ifr, void __user *useraddr,
rc = ethtool_get_value(dev, useraddr, ethcmd,
dev->ethtool_ops->get_priv_flags);
if (!rc)
- ethtool_notify(dev, ETHTOOL_MSG_PRIVFLAGS_NTF, NULL);
+ ethtool_notify(dev, ETHTOOL_MSG_PRIVFLAGS_NTF);
break;
case ETHTOOL_SPFLAGS:
rc = ethtool_set_value(dev, useraddr,
dev->ethtool_ops->set_priv_flags);
break;
case ETHTOOL_GRXFH:
+ rc = ethtool_get_rxfh_fields(dev, ethcmd, useraddr);
+ break;
+ case ETHTOOL_SRXFH:
+ rc = ethtool_set_rxfh_fields(dev, ethcmd, useraddr);
+ break;
case ETHTOOL_GRXRINGS:
case ETHTOOL_GRXCLSRLCNT:
case ETHTOOL_GRXCLSRULE:
case ETHTOOL_GRXCLSRLALL:
rc = ethtool_get_rxnfc(dev, ethcmd, useraddr);
break;
- case ETHTOOL_SRXFH:
case ETHTOOL_SRXCLSRLDEL:
case ETHTOOL_SRXCLSRLINS:
rc = ethtool_set_rxnfc(dev, ethcmd, useraddr);
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index 9de828df46cd..2f813f25f07e 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -81,6 +81,12 @@ static void ethnl_sock_priv_destroy(void *priv)
}
}
+u32 ethnl_bcast_seq_next(void)
+{
+ ASSERT_RTNL();
+ return ++ethnl_bcast_seq;
+}
+
int ethnl_ops_begin(struct net_device *dev)
{
int ret;
@@ -405,6 +411,7 @@ ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = {
[ETHTOOL_MSG_PSE_GET] = &ethnl_pse_request_ops,
[ETHTOOL_MSG_PSE_SET] = &ethnl_pse_request_ops,
[ETHTOOL_MSG_RSS_GET] = &ethnl_rss_request_ops,
+ [ETHTOOL_MSG_RSS_SET] = &ethnl_rss_request_ops,
[ETHTOOL_MSG_PLCA_GET_CFG] = &ethnl_plca_cfg_request_ops,
[ETHTOOL_MSG_PLCA_SET_CFG] = &ethnl_plca_cfg_request_ops,
[ETHTOOL_MSG_PLCA_GET_STATUS] = &ethnl_plca_status_request_ops,
@@ -455,10 +462,15 @@ static int ethnl_default_parse(struct ethnl_req_info *req_info,
if (request_ops->parse_request) {
ret = request_ops->parse_request(req_info, tb, info->extack);
if (ret < 0)
- return ret;
+ goto err_dev;
}
return 0;
+
+err_dev:
+ netdev_put(req_info->dev, &req_info->dev_tracker);
+ req_info->dev = NULL;
+ return ret;
}
/**
@@ -508,7 +520,7 @@ static int ethnl_default_doit(struct sk_buff *skb, struct genl_info *info)
ret = ethnl_default_parse(req_info, info, ops, !ops->allow_nodev_do);
if (ret < 0)
- goto err_dev;
+ goto err_free;
ethnl_init_reply_data(reply_data, ops, req_info->dev);
rtnl_lock();
@@ -554,6 +566,7 @@ err_cleanup:
ops->cleanup_data(reply_data);
err_dev:
netdev_put(req_info->dev, &req_info->dev_tracker);
+err_free:
kfree(reply_data);
kfree(req_info);
return ret;
@@ -656,6 +669,8 @@ static int ethnl_default_start(struct netlink_callback *cb)
}
ret = ethnl_default_parse(req_info, &info->info, ops, false);
+ if (ret < 0)
+ goto free_reply_data;
if (req_info->dev) {
/* We ignore device specification in dump requests but as the
* same parser as for non-dump (doit) requests is used, it
@@ -664,8 +679,6 @@ static int ethnl_default_start(struct netlink_callback *cb)
netdev_put(req_info->dev, &req_info->dev_tracker);
req_info->dev = NULL;
}
- if (ret < 0)
- goto free_reply_data;
ctx->ops = ops;
ctx->req_info = req_info;
@@ -714,13 +727,13 @@ static int ethnl_perphy_start(struct netlink_callback *cb)
* the dev's ifindex, .dumpit() will grab and release the netdev itself.
*/
ret = ethnl_default_parse(req_info, &info->info, ops, false);
+ if (ret < 0)
+ goto free_reply_data;
if (req_info->dev) {
phy_ctx->ifindex = req_info->dev->ifindex;
netdev_put(req_info->dev, &req_info->dev_tracker);
req_info->dev = NULL;
}
- if (ret < 0)
- goto free_reply_data;
ctx->ops = ops;
ctx->req_info = req_info;
@@ -863,8 +876,8 @@ static int ethnl_default_done(struct netlink_callback *cb)
static int ethnl_default_set_doit(struct sk_buff *skb, struct genl_info *info)
{
const struct ethnl_request_ops *ops;
- struct ethnl_req_info req_info = {};
const u8 cmd = info->genlhdr->cmd;
+ struct ethnl_req_info *req_info;
struct net_device *dev;
int ret;
@@ -874,20 +887,22 @@ static int ethnl_default_set_doit(struct sk_buff *skb, struct genl_info *info)
if (GENL_REQ_ATTR_CHECK(info, ops->hdr_attr))
return -EINVAL;
- ret = ethnl_parse_header_dev_get(&req_info, info->attrs[ops->hdr_attr],
- genl_info_net(info), info->extack,
- true);
+ req_info = kzalloc(ops->req_info_size, GFP_KERNEL);
+ if (!req_info)
+ return -ENOMEM;
+
+ ret = ethnl_default_parse(req_info, info, ops, true);
if (ret < 0)
- return ret;
+ goto out_free_req;
if (ops->set_validate) {
- ret = ops->set_validate(&req_info, info);
+ ret = ops->set_validate(req_info, info);
/* 0 means nothing to do */
if (ret <= 0)
goto out_dev;
}
- dev = req_info.dev;
+ dev = req_info->dev;
rtnl_lock();
netdev_lock_ops(dev);
@@ -902,14 +917,14 @@ static int ethnl_default_set_doit(struct sk_buff *skb, struct genl_info *info)
if (ret < 0)
goto out_free_cfg;
- ret = ops->set(&req_info, info);
+ ret = ops->set(req_info, info);
if (ret < 0)
goto out_ops;
swap(dev->cfg, dev->cfg_pending);
if (!ret)
goto out_ops;
- ethtool_notify(dev, ops->set_ntf_cmd, NULL);
+ ethnl_notify(dev, ops->set_ntf_cmd, req_info);
ret = 0;
out_ops:
@@ -921,7 +936,9 @@ out_tie_cfg:
netdev_unlock_ops(dev);
rtnl_unlock();
out_dev:
- ethnl_parse_header_dev_put(&req_info);
+ ethnl_parse_header_dev_put(req_info);
+out_free_req:
+ kfree(req_info);
return ret;
}
@@ -942,11 +959,13 @@ ethnl_default_notify_ops[ETHTOOL_MSG_KERNEL_MAX + 1] = {
[ETHTOOL_MSG_MODULE_NTF] = &ethnl_module_request_ops,
[ETHTOOL_MSG_PLCA_NTF] = &ethnl_plca_cfg_request_ops,
[ETHTOOL_MSG_MM_NTF] = &ethnl_mm_request_ops,
+ [ETHTOOL_MSG_RSS_NTF] = &ethnl_rss_request_ops,
+ [ETHTOOL_MSG_RSS_CREATE_NTF] = &ethnl_rss_request_ops,
};
/* default notification handler */
static void ethnl_default_notify(struct net_device *dev, unsigned int cmd,
- const void *data)
+ const struct ethnl_req_info *orig_req_info)
{
struct ethnl_reply_data *reply_data;
const struct ethnl_request_ops *ops;
@@ -975,6 +994,11 @@ static void ethnl_default_notify(struct net_device *dev, unsigned int cmd,
req_info->dev = dev;
req_info->flags |= ETHTOOL_FLAG_COMPACT_BITSETS;
+ if (orig_req_info) {
+ req_info->phy_index = orig_req_info->phy_index;
+ memcpy(&req_info[1], &orig_req_info[1],
+ ops->req_info_size - sizeof(*req_info));
+ }
netdev_ops_assert_locked(dev);
@@ -1025,7 +1049,7 @@ err_rep:
/* notifications */
typedef void (*ethnl_notify_handler_t)(struct net_device *dev, unsigned int cmd,
- const void *data);
+ const struct ethnl_req_info *req_info);
static const ethnl_notify_handler_t ethnl_notify_handlers[] = {
[ETHTOOL_MSG_LINKINFO_NTF] = ethnl_default_notify,
@@ -1043,9 +1067,12 @@ static const ethnl_notify_handler_t ethnl_notify_handlers[] = {
[ETHTOOL_MSG_MODULE_NTF] = ethnl_default_notify,
[ETHTOOL_MSG_PLCA_NTF] = ethnl_default_notify,
[ETHTOOL_MSG_MM_NTF] = ethnl_default_notify,
+ [ETHTOOL_MSG_RSS_NTF] = ethnl_default_notify,
+ [ETHTOOL_MSG_RSS_CREATE_NTF] = ethnl_default_notify,
};
-void ethtool_notify(struct net_device *dev, unsigned int cmd, const void *data)
+void ethnl_notify(struct net_device *dev, unsigned int cmd,
+ const struct ethnl_req_info *req_info)
{
if (unlikely(!ethnl_ok))
return;
@@ -1053,18 +1080,23 @@ void ethtool_notify(struct net_device *dev, unsigned int cmd, const void *data)
if (likely(cmd < ARRAY_SIZE(ethnl_notify_handlers) &&
ethnl_notify_handlers[cmd]))
- ethnl_notify_handlers[cmd](dev, cmd, data);
+ ethnl_notify_handlers[cmd](dev, cmd, req_info);
else
WARN_ONCE(1, "notification %u not implemented (dev=%s)\n",
cmd, netdev_name(dev));
}
+
+void ethtool_notify(struct net_device *dev, unsigned int cmd)
+{
+ ethnl_notify(dev, cmd, NULL);
+}
EXPORT_SYMBOL(ethtool_notify);
static void ethnl_notify_features(struct netdev_notifier_info *info)
{
struct net_device *dev = netdev_notifier_info_to_dev(info);
- ethtool_notify(dev, ETHTOOL_MSG_FEATURES_NTF, NULL);
+ ethtool_notify(dev, ETHTOOL_MSG_FEATURES_NTF);
}
static int ethnl_netdev_event(struct notifier_block *this, unsigned long event,
@@ -1481,6 +1513,27 @@ static const struct genl_ops ethtool_genl_ops[] = {
.policy = ethnl_tsconfig_set_policy,
.maxattr = ARRAY_SIZE(ethnl_tsconfig_set_policy) - 1,
},
+ {
+ .cmd = ETHTOOL_MSG_RSS_SET,
+ .flags = GENL_UNS_ADMIN_PERM,
+ .doit = ethnl_default_set_doit,
+ .policy = ethnl_rss_set_policy,
+ .maxattr = ARRAY_SIZE(ethnl_rss_set_policy) - 1,
+ },
+ {
+ .cmd = ETHTOOL_MSG_RSS_CREATE_ACT,
+ .flags = GENL_UNS_ADMIN_PERM,
+ .doit = ethnl_rss_create_doit,
+ .policy = ethnl_rss_create_policy,
+ .maxattr = ARRAY_SIZE(ethnl_rss_create_policy) - 1,
+ },
+ {
+ .cmd = ETHTOOL_MSG_RSS_DELETE_ACT,
+ .flags = GENL_UNS_ADMIN_PERM,
+ .doit = ethnl_rss_delete_doit,
+ .policy = ethnl_rss_delete_policy,
+ .maxattr = ARRAY_SIZE(ethnl_rss_delete_policy) - 1,
+ },
};
static const struct genl_multicast_group ethtool_nl_mcgrps[] = {
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index 91b953924af3..1d4f9ecb3d26 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -10,6 +10,7 @@
struct ethnl_req_info;
+u32 ethnl_bcast_seq_next(void);
int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info,
const struct nlattr *nest, struct net *net,
struct netlink_ext_ack *extack,
@@ -23,6 +24,8 @@ void *ethnl_dump_put(struct sk_buff *skb, struct netlink_callback *cb, u8 cmd);
void *ethnl_bcastmsg_put(struct sk_buff *skb, u8 cmd);
void *ethnl_unicast_put(struct sk_buff *skb, u32 portid, u32 seq, u8 cmd);
int ethnl_multicast(struct sk_buff *skb, struct net_device *dev);
+void ethnl_notify(struct net_device *dev, unsigned int cmd,
+ const struct ethnl_req_info *req_info);
/**
* ethnl_strz_size() - calculate attribute length for fixed size string
@@ -337,6 +340,8 @@ int ethnl_sock_priv_set(struct sk_buff *skb, struct net_device *dev, u32 portid,
* header is already filled on entry, the rest up to @repdata_offset
* is zero initialized. This callback should only modify type specific
* request info by parsed attributes from request message.
+ * Called for both GET and SET. Information parsed for SET will
+ * be conveyed to the req_info used during NTF generation.
* @prepare_data:
* Retrieve and prepare data needed to compose a reply message. Calls to
* ethtool_ops handlers are limited to this callback. Common reply data
@@ -463,7 +468,7 @@ extern const struct nla_policy ethnl_channels_set_policy[ETHTOOL_A_CHANNELS_COMB
extern const struct nla_policy ethnl_coalesce_get_policy[ETHTOOL_A_COALESCE_HEADER + 1];
extern const struct nla_policy ethnl_coalesce_set_policy[ETHTOOL_A_COALESCE_MAX + 1];
extern const struct nla_policy ethnl_pause_get_policy[ETHTOOL_A_PAUSE_STATS_SRC + 1];
-extern const struct nla_policy ethnl_pause_set_policy[ETHTOOL_A_PAUSE_TX + 1];
+extern const struct nla_policy ethnl_pause_set_policy[ETHTOOL_A_PAUSE_STATS_SRC + 1];
extern const struct nla_policy ethnl_eee_get_policy[ETHTOOL_A_EEE_HEADER + 1];
extern const struct nla_policy ethnl_eee_set_policy[ETHTOOL_A_EEE_TX_LPI_TIMER + 1];
extern const struct nla_policy ethnl_tsinfo_get_policy[ETHTOOL_A_TSINFO_MAX + 1];
@@ -480,6 +485,9 @@ extern const struct nla_policy ethnl_module_set_policy[ETHTOOL_A_MODULE_POWER_MO
extern const struct nla_policy ethnl_pse_get_policy[ETHTOOL_A_PSE_HEADER + 1];
extern const struct nla_policy ethnl_pse_set_policy[ETHTOOL_A_PSE_MAX + 1];
extern const struct nla_policy ethnl_rss_get_policy[ETHTOOL_A_RSS_START_CONTEXT + 1];
+extern const struct nla_policy ethnl_rss_set_policy[ETHTOOL_A_RSS_FLOW_HASH + 1];
+extern const struct nla_policy ethnl_rss_create_policy[ETHTOOL_A_RSS_INPUT_XFRM + 1];
+extern const struct nla_policy ethnl_rss_delete_policy[ETHTOOL_A_RSS_CONTEXT + 1];
extern const struct nla_policy ethnl_plca_get_cfg_policy[ETHTOOL_A_PLCA_HEADER + 1];
extern const struct nla_policy ethnl_plca_set_cfg_policy[ETHTOOL_A_PLCA_MAX + 1];
extern const struct nla_policy ethnl_plca_get_status_policy[ETHTOOL_A_PLCA_HEADER + 1];
@@ -502,6 +510,8 @@ int ethnl_rss_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
int ethnl_tsinfo_start(struct netlink_callback *cb);
int ethnl_tsinfo_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
int ethnl_tsinfo_done(struct netlink_callback *cb);
+int ethnl_rss_create_doit(struct sk_buff *skb, struct genl_info *info);
+int ethnl_rss_delete_doit(struct sk_buff *skb, struct genl_info *info);
extern const char stats_std_names[__ETHTOOL_STATS_CNT][ETH_GSTRING_LEN];
extern const char stats_eth_phy_names[__ETHTOOL_A_STATS_ETH_PHY_CNT][ETH_GSTRING_LEN];
diff --git a/net/ethtool/pause.c b/net/ethtool/pause.c
index f7c847aeb1a2..0f9af1e66548 100644
--- a/net/ethtool/pause.c
+++ b/net/ethtool/pause.c
@@ -168,6 +168,7 @@ const struct nla_policy ethnl_pause_set_policy[] = {
[ETHTOOL_A_PAUSE_AUTONEG] = { .type = NLA_U8 },
[ETHTOOL_A_PAUSE_RX] = { .type = NLA_U8 },
[ETHTOOL_A_PAUSE_TX] = { .type = NLA_U8 },
+ [ETHTOOL_A_PAUSE_STATS_SRC] = { .type = NLA_REJECT },
};
static int
diff --git a/net/ethtool/pse-pd.c b/net/ethtool/pse-pd.c
index 4f6b99eab2a6..24def9c9dd54 100644
--- a/net/ethtool/pse-pd.c
+++ b/net/ethtool/pse-pd.c
@@ -11,6 +11,7 @@
#include "netlink.h"
#include <linux/ethtool_netlink.h>
#include <linux/ethtool.h>
+#include <linux/export.h>
#include <linux/phy.h>
struct pse_req_info {
@@ -83,6 +84,8 @@ static int pse_reply_size(const struct ethnl_req_info *req_base,
const struct ethtool_pse_control_status *st = &data->status;
int len = 0;
+ if (st->pw_d_id)
+ len += nla_total_size(sizeof(u32)); /* _PSE_PW_D_ID */
if (st->podl_admin_state > 0)
len += nla_total_size(sizeof(u32)); /* _PODL_PSE_ADMIN_STATE */
if (st->podl_pw_status > 0)
@@ -109,6 +112,9 @@ static int pse_reply_size(const struct ethnl_req_info *req_base,
len += st->c33_pw_limit_nb_ranges *
(nla_total_size(0) +
nla_total_size(sizeof(u32)) * 2);
+ if (st->prio_max)
+ /* _PSE_PRIO_MAX + _PSE_PRIO */
+ len += nla_total_size(sizeof(u32)) * 2;
return len;
}
@@ -148,6 +154,11 @@ static int pse_fill_reply(struct sk_buff *skb,
const struct pse_reply_data *data = PSE_REPDATA(reply_base);
const struct ethtool_pse_control_status *st = &data->status;
+ if (st->pw_d_id &&
+ nla_put_u32(skb, ETHTOOL_A_PSE_PW_D_ID,
+ st->pw_d_id))
+ return -EMSGSIZE;
+
if (st->podl_admin_state > 0 &&
nla_put_u32(skb, ETHTOOL_A_PODL_PSE_ADMIN_STATE,
st->podl_admin_state))
@@ -198,6 +209,11 @@ static int pse_fill_reply(struct sk_buff *skb,
pse_put_pw_limit_ranges(skb, st))
return -EMSGSIZE;
+ if (st->prio_max &&
+ (nla_put_u32(skb, ETHTOOL_A_PSE_PRIO_MAX, st->prio_max) ||
+ nla_put_u32(skb, ETHTOOL_A_PSE_PRIO, st->prio)))
+ return -EMSGSIZE;
+
return 0;
}
@@ -219,6 +235,7 @@ const struct nla_policy ethnl_pse_set_policy[ETHTOOL_A_PSE_MAX + 1] = {
NLA_POLICY_RANGE(NLA_U32, ETHTOOL_C33_PSE_ADMIN_STATE_DISABLED,
ETHTOOL_C33_PSE_ADMIN_STATE_ENABLED),
[ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT] = { .type = NLA_U32 },
+ [ETHTOOL_A_PSE_PRIO] = { .type = NLA_U32 },
};
static int
@@ -267,6 +284,15 @@ ethnl_set_pse(struct ethnl_req_info *req_info, struct genl_info *info)
if (ret)
return ret;
+ if (tb[ETHTOOL_A_PSE_PRIO]) {
+ unsigned int prio;
+
+ prio = nla_get_u32(tb[ETHTOOL_A_PSE_PRIO]);
+ ret = pse_ethtool_set_prio(phydev->psec, info->extack, prio);
+ if (ret)
+ return ret;
+ }
+
if (tb[ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT]) {
unsigned int pw_limit;
@@ -315,3 +341,42 @@ const struct ethnl_request_ops ethnl_pse_request_ops = {
.set = ethnl_set_pse,
/* PSE has no notification */
};
+
+void ethnl_pse_send_ntf(struct net_device *netdev, unsigned long notifs)
+{
+ void *reply_payload;
+ struct sk_buff *skb;
+ int reply_len;
+ int ret;
+
+ ASSERT_RTNL();
+
+ if (!netdev || !notifs)
+ return;
+
+ reply_len = ethnl_reply_header_size() +
+ nla_total_size(sizeof(u32)); /* _PSE_NTF_EVENTS */
+
+ skb = genlmsg_new(reply_len, GFP_KERNEL);
+ if (!skb)
+ return;
+
+ reply_payload = ethnl_bcastmsg_put(skb, ETHTOOL_MSG_PSE_NTF);
+ if (!reply_payload)
+ goto err_skb;
+
+ ret = ethnl_fill_reply_header(skb, netdev, ETHTOOL_A_PSE_NTF_HEADER);
+ if (ret < 0)
+ goto err_skb;
+
+ if (nla_put_uint(skb, ETHTOOL_A_PSE_NTF_EVENTS, notifs))
+ goto err_skb;
+
+ genlmsg_end(skb, reply_payload);
+ ethnl_multicast(skb, netdev);
+ return;
+
+err_skb:
+ nlmsg_free(skb);
+}
+EXPORT_SYMBOL_GPL(ethnl_pse_send_ntf);
diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c
index 6d9b1769896b..992e98abe9dd 100644
--- a/net/ethtool/rss.c
+++ b/net/ethtool/rss.c
@@ -12,6 +12,7 @@ struct rss_req_info {
struct rss_reply_data {
struct ethnl_reply_data base;
+ bool has_flow_hash;
bool no_key_fields;
u32 indir_size;
u32 hkey_size;
@@ -19,6 +20,37 @@ struct rss_reply_data {
u32 input_xfrm;
u32 *indir_table;
u8 *hkey;
+ int flow_hash[__ETHTOOL_A_FLOW_CNT];
+};
+
+static const u8 ethtool_rxfh_ft_nl2ioctl[] = {
+ [ETHTOOL_A_FLOW_ETHER] = ETHER_FLOW,
+ [ETHTOOL_A_FLOW_IP4] = IPV4_FLOW,
+ [ETHTOOL_A_FLOW_IP6] = IPV6_FLOW,
+ [ETHTOOL_A_FLOW_TCP4] = TCP_V4_FLOW,
+ [ETHTOOL_A_FLOW_UDP4] = UDP_V4_FLOW,
+ [ETHTOOL_A_FLOW_SCTP4] = SCTP_V4_FLOW,
+ [ETHTOOL_A_FLOW_AH_ESP4] = AH_ESP_V4_FLOW,
+ [ETHTOOL_A_FLOW_TCP6] = TCP_V6_FLOW,
+ [ETHTOOL_A_FLOW_UDP6] = UDP_V6_FLOW,
+ [ETHTOOL_A_FLOW_SCTP6] = SCTP_V6_FLOW,
+ [ETHTOOL_A_FLOW_AH_ESP6] = AH_ESP_V6_FLOW,
+ [ETHTOOL_A_FLOW_AH4] = AH_V4_FLOW,
+ [ETHTOOL_A_FLOW_ESP4] = ESP_V4_FLOW,
+ [ETHTOOL_A_FLOW_AH6] = AH_V6_FLOW,
+ [ETHTOOL_A_FLOW_ESP6] = ESP_V6_FLOW,
+ [ETHTOOL_A_FLOW_GTPU4] = GTPU_V4_FLOW,
+ [ETHTOOL_A_FLOW_GTPU6] = GTPU_V6_FLOW,
+ [ETHTOOL_A_FLOW_GTPC4] = GTPC_V4_FLOW,
+ [ETHTOOL_A_FLOW_GTPC6] = GTPC_V6_FLOW,
+ [ETHTOOL_A_FLOW_GTPC_TEID4] = GTPC_TEID_V4_FLOW,
+ [ETHTOOL_A_FLOW_GTPC_TEID6] = GTPC_TEID_V6_FLOW,
+ [ETHTOOL_A_FLOW_GTPU_EH4] = GTPU_EH_V4_FLOW,
+ [ETHTOOL_A_FLOW_GTPU_EH6] = GTPU_EH_V6_FLOW,
+ [ETHTOOL_A_FLOW_GTPU_UL4] = GTPU_UL_V4_FLOW,
+ [ETHTOOL_A_FLOW_GTPU_UL6] = GTPU_UL_V6_FLOW,
+ [ETHTOOL_A_FLOW_GTPU_DL4] = GTPU_DL_V4_FLOW,
+ [ETHTOOL_A_FLOW_GTPU_DL6] = GTPU_DL_V6_FLOW,
};
#define RSS_REQINFO(__req_base) \
@@ -49,21 +81,43 @@ rss_parse_request(struct ethnl_req_info *req_info, struct nlattr **tb,
return 0;
}
+static void
+rss_prepare_flow_hash(const struct rss_req_info *req, struct net_device *dev,
+ struct rss_reply_data *data, const struct genl_info *info)
+{
+ int i;
+
+ data->has_flow_hash = false;
+
+ if (!dev->ethtool_ops->get_rxfh_fields)
+ return;
+ if (req->rss_context && !dev->ethtool_ops->rxfh_per_ctx_fields)
+ return;
+
+ mutex_lock(&dev->ethtool->rss_lock);
+ for (i = 1; i < __ETHTOOL_A_FLOW_CNT; i++) {
+ struct ethtool_rxfh_fields fields = {
+ .flow_type = ethtool_rxfh_ft_nl2ioctl[i],
+ .rss_context = req->rss_context,
+ };
+
+ if (dev->ethtool_ops->get_rxfh_fields(dev, &fields)) {
+ data->flow_hash[i] = -1; /* Unsupported */
+ continue;
+ }
+
+ data->flow_hash[i] = fields.data;
+ data->has_flow_hash = true;
+ }
+ mutex_unlock(&dev->ethtool->rss_lock);
+}
+
static int
-rss_prepare_get(const struct rss_req_info *request, struct net_device *dev,
- struct rss_reply_data *data, const struct genl_info *info)
+rss_get_data_alloc(struct net_device *dev, struct rss_reply_data *data)
{
- struct ethtool_rxfh_param rxfh = {};
- const struct ethtool_ops *ops;
+ const struct ethtool_ops *ops = dev->ethtool_ops;
u32 total_size, indir_bytes;
u8 *rss_config;
- int ret;
-
- ops = dev->ethtool_ops;
-
- ret = ethnl_ops_begin(dev);
- if (ret < 0)
- return ret;
data->indir_size = 0;
data->hkey_size = 0;
@@ -75,16 +129,39 @@ rss_prepare_get(const struct rss_req_info *request, struct net_device *dev,
indir_bytes = data->indir_size * sizeof(u32);
total_size = indir_bytes + data->hkey_size;
rss_config = kzalloc(total_size, GFP_KERNEL);
- if (!rss_config) {
- ret = -ENOMEM;
- goto out_ops;
- }
+ if (!rss_config)
+ return -ENOMEM;
if (data->indir_size)
data->indir_table = (u32 *)rss_config;
if (data->hkey_size)
data->hkey = rss_config + indir_bytes;
+ return 0;
+}
+
+static void rss_get_data_free(const struct rss_reply_data *data)
+{
+ kfree(data->indir_table);
+}
+
+static int
+rss_prepare_get(const struct rss_req_info *request, struct net_device *dev,
+ struct rss_reply_data *data, const struct genl_info *info)
+{
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ struct ethtool_rxfh_param rxfh = {};
+ int ret;
+
+ ret = ethnl_ops_begin(dev);
+ if (ret < 0)
+ return ret;
+ mutex_lock(&dev->ethtool->rss_lock);
+
+ ret = rss_get_data_alloc(dev, data);
+ if (ret)
+ goto out_unlock;
+
rxfh.indir_size = data->indir_size;
rxfh.indir = data->indir_table;
rxfh.key_size = data->hkey_size;
@@ -92,15 +169,35 @@ rss_prepare_get(const struct rss_req_info *request, struct net_device *dev,
ret = ops->get_rxfh(dev, &rxfh);
if (ret)
- goto out_ops;
+ goto out_unlock;
data->hfunc = rxfh.hfunc;
data->input_xfrm = rxfh.input_xfrm;
-out_ops:
+out_unlock:
+ mutex_unlock(&dev->ethtool->rss_lock);
ethnl_ops_complete(dev);
return ret;
}
+static void
+__rss_prepare_ctx(struct net_device *dev, struct rss_reply_data *data,
+ struct ethtool_rxfh_context *ctx)
+{
+ if (WARN_ON_ONCE(data->indir_size != ctx->indir_size ||
+ data->hkey_size != ctx->key_size))
+ return;
+
+ data->no_key_fields = !dev->ethtool_ops->rxfh_per_ctx_key;
+
+ data->hfunc = ctx->hfunc;
+ data->input_xfrm = ctx->input_xfrm;
+ memcpy(data->indir_table, ethtool_rxfh_context_indir(ctx),
+ data->indir_size * sizeof(u32));
+ if (data->hkey_size)
+ memcpy(data->hkey, ethtool_rxfh_context_key(ctx),
+ data->hkey_size);
+}
+
static int
rss_prepare_ctx(const struct rss_req_info *request, struct net_device *dev,
struct rss_reply_data *data, const struct genl_info *info)
@@ -108,34 +205,51 @@ rss_prepare_ctx(const struct rss_req_info *request, struct net_device *dev,
struct ethtool_rxfh_context *ctx;
u32 total_size, indir_bytes;
u8 *rss_config;
+ int ret;
- data->no_key_fields = !dev->ethtool_ops->rxfh_per_ctx_key;
-
+ mutex_lock(&dev->ethtool->rss_lock);
ctx = xa_load(&dev->ethtool->rss_ctx, request->rss_context);
- if (!ctx)
- return -ENOENT;
+ if (!ctx) {
+ ret = -ENOENT;
+ goto out_unlock;
+ }
data->indir_size = ctx->indir_size;
data->hkey_size = ctx->key_size;
- data->hfunc = ctx->hfunc;
- data->input_xfrm = ctx->input_xfrm;
indir_bytes = data->indir_size * sizeof(u32);
total_size = indir_bytes + data->hkey_size;
rss_config = kzalloc(total_size, GFP_KERNEL);
- if (!rss_config)
- return -ENOMEM;
+ if (!rss_config) {
+ ret = -ENOMEM;
+ goto out_unlock;
+ }
data->indir_table = (u32 *)rss_config;
- memcpy(data->indir_table, ethtool_rxfh_context_indir(ctx), indir_bytes);
-
- if (data->hkey_size) {
+ if (data->hkey_size)
data->hkey = rss_config + indir_bytes;
- memcpy(data->hkey, ethtool_rxfh_context_key(ctx),
- data->hkey_size);
- }
- return 0;
+ __rss_prepare_ctx(dev, data, ctx);
+
+ ret = 0;
+out_unlock:
+ mutex_unlock(&dev->ethtool->rss_lock);
+ return ret;
+}
+
+static int
+rss_prepare(const struct rss_req_info *request, struct net_device *dev,
+ struct rss_reply_data *data, const struct genl_info *info)
+{
+ rss_prepare_flow_hash(request, dev, data, info);
+
+ /* Coming from RSS_SET, driver may only have flow_hash_fields ops */
+ if (!dev->ethtool_ops->get_rxfh)
+ return 0;
+
+ if (request->rss_context)
+ return rss_prepare_ctx(request, dev, data, info);
+ return rss_prepare_get(request, dev, data, info);
}
static int
@@ -153,14 +267,10 @@ rss_prepare_data(const struct ethnl_req_info *req_base,
return -EOPNOTSUPP;
/* Some drivers don't handle rss_context */
- if (request->rss_context) {
- if (!ops->cap_rss_ctx_supported && !ops->create_rxfh_context)
- return -EOPNOTSUPP;
-
- return rss_prepare_ctx(request, dev, data, info);
- }
+ if (request->rss_context && !ops->create_rxfh_context)
+ return -EOPNOTSUPP;
- return rss_prepare_get(request, dev, data, info);
+ return rss_prepare(request, dev, data, info);
}
static int
@@ -174,7 +284,10 @@ rss_reply_size(const struct ethnl_req_info *req_base,
nla_total_size(sizeof(u32)) + /* _RSS_HFUNC */
nla_total_size(sizeof(u32)) + /* _RSS_INPUT_XFRM */
nla_total_size(sizeof(u32) * data->indir_size) + /* _RSS_INDIR */
- nla_total_size(data->hkey_size); /* _RSS_HKEY */
+ nla_total_size(data->hkey_size) + /* _RSS_HKEY */
+ nla_total_size(0) + /* _RSS_FLOW_HASH */
+ nla_total_size(sizeof(u32)) * ETHTOOL_A_FLOW_MAX +
+ 0;
return len;
}
@@ -195,17 +308,34 @@ rss_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base,
sizeof(u32) * data->indir_size, data->indir_table)))
return -EMSGSIZE;
- if (data->no_key_fields)
- return 0;
-
- if ((data->hfunc &&
- nla_put_u32(skb, ETHTOOL_A_RSS_HFUNC, data->hfunc)) ||
- (data->input_xfrm &&
- nla_put_u32(skb, ETHTOOL_A_RSS_INPUT_XFRM, data->input_xfrm)) ||
- (data->hkey_size &&
- nla_put(skb, ETHTOOL_A_RSS_HKEY, data->hkey_size, data->hkey)))
+ if (!data->no_key_fields &&
+ ((data->hfunc &&
+ nla_put_u32(skb, ETHTOOL_A_RSS_HFUNC, data->hfunc)) ||
+ (data->input_xfrm &&
+ nla_put_u32(skb, ETHTOOL_A_RSS_INPUT_XFRM, data->input_xfrm)) ||
+ (data->hkey_size &&
+ nla_put(skb, ETHTOOL_A_RSS_HKEY, data->hkey_size, data->hkey))))
return -EMSGSIZE;
+ if (data->has_flow_hash) {
+ struct nlattr *nest;
+ int i;
+
+ nest = nla_nest_start(skb, ETHTOOL_A_RSS_FLOW_HASH);
+ if (!nest)
+ return -EMSGSIZE;
+
+ for (i = 1; i < __ETHTOOL_A_FLOW_CNT; i++) {
+ if (data->flow_hash[i] >= 0 &&
+ nla_put_uint(skb, i, data->flow_hash[i])) {
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
+ }
+ }
+
+ nla_nest_end(skb, nest);
+ }
+
return 0;
}
@@ -213,7 +343,7 @@ static void rss_cleanup_data(struct ethnl_reply_data *reply_base)
{
const struct rss_reply_data *data = RSS_REPDATA(reply_base);
- kfree(data->indir_table);
+ rss_get_data_free(data);
}
struct rss_nl_dump_ctx {
@@ -284,11 +414,7 @@ rss_dump_one_ctx(struct sk_buff *skb, struct netlink_callback *cb,
if (ret < 0)
goto err_cancel;
- /* Context 0 is not currently storred or cached in the XArray */
- if (!rss_context)
- ret = rss_prepare_get(&req, dev, &data, info);
- else
- ret = rss_prepare_ctx(&req, dev, &data, info);
+ ret = rss_prepare(&req, dev, &data, info);
if (ret)
goto err_cancel;
@@ -358,6 +484,434 @@ int ethnl_rss_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
return ret;
}
+/* RSS_NTF */
+
+static void ethnl_rss_delete_notify(struct net_device *dev, u32 rss_context)
+{
+ struct sk_buff *ntf;
+ size_t ntf_size;
+ void *hdr;
+
+ ntf_size = ethnl_reply_header_size() +
+ nla_total_size(sizeof(u32)); /* _RSS_CONTEXT */
+
+ ntf = genlmsg_new(ntf_size, GFP_KERNEL);
+ if (!ntf)
+ goto out_warn;
+
+ hdr = ethnl_bcastmsg_put(ntf, ETHTOOL_MSG_RSS_DELETE_NTF);
+ if (!hdr)
+ goto out_free_ntf;
+
+ if (ethnl_fill_reply_header(ntf, dev, ETHTOOL_A_RSS_HEADER) ||
+ nla_put_u32(ntf, ETHTOOL_A_RSS_CONTEXT, rss_context))
+ goto out_free_ntf;
+
+ genlmsg_end(ntf, hdr);
+ if (ethnl_multicast(ntf, dev))
+ goto out_warn;
+
+ return;
+
+out_free_ntf:
+ nlmsg_free(ntf);
+out_warn:
+ pr_warn_once("Failed to send a RSS delete notification");
+}
+
+void ethtool_rss_notify(struct net_device *dev, u32 type, u32 rss_context)
+{
+ struct rss_req_info req_info = {
+ .rss_context = rss_context,
+ };
+
+ if (type == ETHTOOL_MSG_RSS_DELETE_NTF)
+ ethnl_rss_delete_notify(dev, rss_context);
+ else
+ ethnl_notify(dev, type, &req_info.base);
+}
+
+/* RSS_SET */
+
+#define RFH_MASK (RXH_L2DA | RXH_VLAN | RXH_IP_SRC | RXH_IP_DST | \
+ RXH_L3_PROTO | RXH_L4_B_0_1 | RXH_L4_B_2_3 | \
+ RXH_GTP_TEID | RXH_DISCARD)
+
+static const struct nla_policy ethnl_rss_flows_policy[] = {
+ [ETHTOOL_A_FLOW_ETHER] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
+ [ETHTOOL_A_FLOW_IP4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
+ [ETHTOOL_A_FLOW_IP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
+ [ETHTOOL_A_FLOW_TCP4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
+ [ETHTOOL_A_FLOW_UDP4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
+ [ETHTOOL_A_FLOW_SCTP4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
+ [ETHTOOL_A_FLOW_AH_ESP4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
+ [ETHTOOL_A_FLOW_TCP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
+ [ETHTOOL_A_FLOW_UDP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
+ [ETHTOOL_A_FLOW_SCTP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
+ [ETHTOOL_A_FLOW_AH_ESP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
+ [ETHTOOL_A_FLOW_AH4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
+ [ETHTOOL_A_FLOW_ESP4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
+ [ETHTOOL_A_FLOW_AH6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
+ [ETHTOOL_A_FLOW_ESP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
+ [ETHTOOL_A_FLOW_GTPU4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
+ [ETHTOOL_A_FLOW_GTPU6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
+ [ETHTOOL_A_FLOW_GTPC4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
+ [ETHTOOL_A_FLOW_GTPC6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
+ [ETHTOOL_A_FLOW_GTPC_TEID4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
+ [ETHTOOL_A_FLOW_GTPC_TEID6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
+ [ETHTOOL_A_FLOW_GTPU_EH4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
+ [ETHTOOL_A_FLOW_GTPU_EH6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
+ [ETHTOOL_A_FLOW_GTPU_UL4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
+ [ETHTOOL_A_FLOW_GTPU_UL6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
+ [ETHTOOL_A_FLOW_GTPU_DL4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
+ [ETHTOOL_A_FLOW_GTPU_DL6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK),
+};
+
+const struct nla_policy ethnl_rss_set_policy[ETHTOOL_A_RSS_FLOW_HASH + 1] = {
+ [ETHTOOL_A_RSS_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy),
+ [ETHTOOL_A_RSS_CONTEXT] = { .type = NLA_U32, },
+ [ETHTOOL_A_RSS_HFUNC] = NLA_POLICY_MIN(NLA_U32, 1),
+ [ETHTOOL_A_RSS_INDIR] = { .type = NLA_BINARY, },
+ [ETHTOOL_A_RSS_HKEY] = NLA_POLICY_MIN(NLA_BINARY, 1),
+ [ETHTOOL_A_RSS_INPUT_XFRM] =
+ NLA_POLICY_MAX(NLA_U32, RXH_XFRM_SYM_OR_XOR),
+ [ETHTOOL_A_RSS_FLOW_HASH] = NLA_POLICY_NESTED(ethnl_rss_flows_policy),
+};
+
+static int
+ethnl_rss_set_validate(struct ethnl_req_info *req_info, struct genl_info *info)
+{
+ const struct ethtool_ops *ops = req_info->dev->ethtool_ops;
+ struct rss_req_info *request = RSS_REQINFO(req_info);
+ struct nlattr **tb = info->attrs;
+ struct nlattr *bad_attr = NULL;
+ u32 input_xfrm;
+
+ if (request->rss_context && !ops->create_rxfh_context)
+ bad_attr = bad_attr ?: tb[ETHTOOL_A_RSS_CONTEXT];
+
+ if (request->rss_context && !ops->rxfh_per_ctx_key) {
+ bad_attr = bad_attr ?: tb[ETHTOOL_A_RSS_HFUNC];
+ bad_attr = bad_attr ?: tb[ETHTOOL_A_RSS_HKEY];
+ bad_attr = bad_attr ?: tb[ETHTOOL_A_RSS_INPUT_XFRM];
+ }
+
+ input_xfrm = nla_get_u32_default(tb[ETHTOOL_A_RSS_INPUT_XFRM], 0);
+ if (input_xfrm & ~ops->supported_input_xfrm)
+ bad_attr = bad_attr ?: tb[ETHTOOL_A_RSS_INPUT_XFRM];
+
+ if (tb[ETHTOOL_A_RSS_FLOW_HASH] && !ops->set_rxfh_fields)
+ bad_attr = bad_attr ?: tb[ETHTOOL_A_RSS_FLOW_HASH];
+ if (request->rss_context &&
+ tb[ETHTOOL_A_RSS_FLOW_HASH] && !ops->rxfh_per_ctx_fields)
+ bad_attr = bad_attr ?: tb[ETHTOOL_A_RSS_FLOW_HASH];
+
+ if (bad_attr) {
+ NL_SET_BAD_ATTR(info->extack, bad_attr);
+ return -EOPNOTSUPP;
+ }
+
+ return 1;
+}
+
+static int
+rss_set_prep_indir(struct net_device *dev, struct genl_info *info,
+ struct rss_reply_data *data, struct ethtool_rxfh_param *rxfh,
+ bool *reset, bool *mod)
+{
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ struct netlink_ext_ack *extack = info->extack;
+ struct nlattr **tb = info->attrs;
+ struct ethtool_rxnfc rx_rings;
+ size_t alloc_size;
+ u32 user_size;
+ int i, err;
+
+ if (!tb[ETHTOOL_A_RSS_INDIR])
+ return 0;
+ if (!data->indir_size || !ops->get_rxnfc)
+ return -EOPNOTSUPP;
+
+ rx_rings.cmd = ETHTOOL_GRXRINGS;
+ err = ops->get_rxnfc(dev, &rx_rings, NULL);
+ if (err)
+ return err;
+
+ if (nla_len(tb[ETHTOOL_A_RSS_INDIR]) % 4) {
+ NL_SET_BAD_ATTR(info->extack, tb[ETHTOOL_A_RSS_INDIR]);
+ return -EINVAL;
+ }
+ user_size = nla_len(tb[ETHTOOL_A_RSS_INDIR]) / 4;
+ if (!user_size) {
+ if (rxfh->rss_context) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_RSS_INDIR],
+ "can't reset table for a context");
+ return -EINVAL;
+ }
+ *reset = true;
+ } else if (data->indir_size % user_size) {
+ NL_SET_ERR_MSG_ATTR_FMT(extack, tb[ETHTOOL_A_RSS_INDIR],
+ "size (%d) mismatch with device indir table (%d)",
+ user_size, data->indir_size);
+ return -EINVAL;
+ }
+
+ rxfh->indir_size = data->indir_size;
+ alloc_size = array_size(data->indir_size, sizeof(rxfh->indir[0]));
+ rxfh->indir = kzalloc(alloc_size, GFP_KERNEL);
+ if (!rxfh->indir)
+ return -ENOMEM;
+
+ nla_memcpy(rxfh->indir, tb[ETHTOOL_A_RSS_INDIR], alloc_size);
+ for (i = 0; i < user_size; i++) {
+ if (rxfh->indir[i] < rx_rings.data)
+ continue;
+
+ NL_SET_ERR_MSG_ATTR_FMT(extack, tb[ETHTOOL_A_RSS_INDIR],
+ "entry %d: queue out of range (%d)",
+ i, rxfh->indir[i]);
+ err = -EINVAL;
+ goto err_free;
+ }
+
+ if (user_size) {
+ /* Replicate the user-provided table to fill the device table */
+ for (i = user_size; i < data->indir_size; i++)
+ rxfh->indir[i] = rxfh->indir[i % user_size];
+ } else {
+ for (i = 0; i < data->indir_size; i++)
+ rxfh->indir[i] =
+ ethtool_rxfh_indir_default(i, rx_rings.data);
+ }
+
+ *mod |= memcmp(rxfh->indir, data->indir_table, data->indir_size);
+
+ return 0;
+
+err_free:
+ kfree(rxfh->indir);
+ rxfh->indir = NULL;
+ return err;
+}
+
+static int
+rss_set_prep_hkey(struct net_device *dev, struct genl_info *info,
+ struct rss_reply_data *data, struct ethtool_rxfh_param *rxfh,
+ bool *mod)
+{
+ struct nlattr **tb = info->attrs;
+
+ if (!tb[ETHTOOL_A_RSS_HKEY])
+ return 0;
+
+ if (nla_len(tb[ETHTOOL_A_RSS_HKEY]) != data->hkey_size) {
+ NL_SET_BAD_ATTR(info->extack, tb[ETHTOOL_A_RSS_HKEY]);
+ return -EINVAL;
+ }
+
+ rxfh->key_size = data->hkey_size;
+ rxfh->key = kmemdup(data->hkey, data->hkey_size, GFP_KERNEL);
+ if (!rxfh->key)
+ return -ENOMEM;
+
+ ethnl_update_binary(rxfh->key, rxfh->key_size, tb[ETHTOOL_A_RSS_HKEY],
+ mod);
+ return 0;
+}
+
+static int
+rss_check_rxfh_fields_sym(struct net_device *dev, struct genl_info *info,
+ struct rss_reply_data *data, bool xfrm_sym)
+{
+ struct nlattr **tb = info->attrs;
+ int i;
+
+ if (!xfrm_sym)
+ return 0;
+ if (!data->has_flow_hash) {
+ NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_RSS_INPUT_XFRM],
+ "hash field config not reported");
+ return -EINVAL;
+ }
+
+ for (i = 1; i < __ETHTOOL_A_FLOW_CNT; i++)
+ if (data->flow_hash[i] >= 0 &&
+ !ethtool_rxfh_config_is_sym(data->flow_hash[i])) {
+ NL_SET_ERR_MSG_ATTR(info->extack,
+ tb[ETHTOOL_A_RSS_INPUT_XFRM],
+ "hash field config is not symmetric");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int
+ethnl_set_rss_fields(struct net_device *dev, struct genl_info *info,
+ u32 rss_context, struct rss_reply_data *data,
+ bool xfrm_sym, bool *mod)
+{
+ struct nlattr *flow_nest = info->attrs[ETHTOOL_A_RSS_FLOW_HASH];
+ struct nlattr *flows[ETHTOOL_A_FLOW_MAX + 1];
+ const struct ethtool_ops *ops;
+ int i, ret;
+
+ ops = dev->ethtool_ops;
+
+ ret = rss_check_rxfh_fields_sym(dev, info, data, xfrm_sym);
+ if (ret)
+ return ret;
+
+ if (!flow_nest)
+ return 0;
+
+ ret = nla_parse_nested(flows, ARRAY_SIZE(ethnl_rss_flows_policy) - 1,
+ flow_nest, ethnl_rss_flows_policy, info->extack);
+ if (ret < 0)
+ return ret;
+
+ for (i = 1; i < __ETHTOOL_A_FLOW_CNT; i++) {
+ struct ethtool_rxfh_fields fields = {
+ .flow_type = ethtool_rxfh_ft_nl2ioctl[i],
+ .rss_context = rss_context,
+ };
+
+ if (!flows[i])
+ continue;
+
+ fields.data = nla_get_u32(flows[i]);
+ if (data->has_flow_hash && data->flow_hash[i] == fields.data)
+ continue;
+
+ if (xfrm_sym && !ethtool_rxfh_config_is_sym(fields.data)) {
+ NL_SET_ERR_MSG_ATTR(info->extack, flows[i],
+ "conflict with xfrm-input");
+ return -EINVAL;
+ }
+
+ ret = ops->set_rxfh_fields(dev, &fields, info->extack);
+ if (ret)
+ return ret;
+
+ *mod = true;
+ }
+
+ return 0;
+}
+
+static void
+rss_set_ctx_update(struct ethtool_rxfh_context *ctx, struct nlattr **tb,
+ struct rss_reply_data *data, struct ethtool_rxfh_param *rxfh)
+{
+ int i;
+
+ if (rxfh->indir) {
+ for (i = 0; i < data->indir_size; i++)
+ ethtool_rxfh_context_indir(ctx)[i] = rxfh->indir[i];
+ ctx->indir_configured = !!nla_len(tb[ETHTOOL_A_RSS_INDIR]);
+ }
+ if (rxfh->key) {
+ memcpy(ethtool_rxfh_context_key(ctx), rxfh->key,
+ data->hkey_size);
+ ctx->key_configured = !!rxfh->key_size;
+ }
+ if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE)
+ ctx->hfunc = rxfh->hfunc;
+ if (rxfh->input_xfrm != RXH_XFRM_NO_CHANGE)
+ ctx->input_xfrm = rxfh->input_xfrm;
+}
+
+static int
+ethnl_rss_set(struct ethnl_req_info *req_info, struct genl_info *info)
+{
+ bool indir_reset = false, indir_mod, xfrm_sym = false;
+ struct rss_req_info *request = RSS_REQINFO(req_info);
+ struct ethtool_rxfh_context *ctx = NULL;
+ struct net_device *dev = req_info->dev;
+ bool mod = false, fields_mod = false;
+ struct ethtool_rxfh_param rxfh = {};
+ struct nlattr **tb = info->attrs;
+ struct rss_reply_data data = {};
+ const struct ethtool_ops *ops;
+ int ret;
+
+ ops = dev->ethtool_ops;
+ data.base.dev = dev;
+
+ ret = rss_prepare(request, dev, &data, info);
+ if (ret)
+ return ret;
+
+ rxfh.rss_context = request->rss_context;
+
+ ret = rss_set_prep_indir(dev, info, &data, &rxfh, &indir_reset, &mod);
+ if (ret)
+ goto exit_clean_data;
+ indir_mod = !!tb[ETHTOOL_A_RSS_INDIR];
+
+ rxfh.hfunc = data.hfunc;
+ ethnl_update_u8(&rxfh.hfunc, tb[ETHTOOL_A_RSS_HFUNC], &mod);
+ if (rxfh.hfunc == data.hfunc)
+ rxfh.hfunc = ETH_RSS_HASH_NO_CHANGE;
+
+ ret = rss_set_prep_hkey(dev, info, &data, &rxfh, &mod);
+ if (ret)
+ goto exit_free_indir;
+
+ rxfh.input_xfrm = data.input_xfrm;
+ ethnl_update_u8(&rxfh.input_xfrm, tb[ETHTOOL_A_RSS_INPUT_XFRM], &mod);
+ /* For drivers which don't support input_xfrm it will be set to 0xff
+ * in the RSS context info. In all other case input_xfrm != 0 means
+ * symmetric hashing is requested.
+ */
+ if (!request->rss_context || ops->rxfh_per_ctx_key)
+ xfrm_sym = rxfh.input_xfrm || data.input_xfrm;
+ if (rxfh.input_xfrm == data.input_xfrm)
+ rxfh.input_xfrm = RXH_XFRM_NO_CHANGE;
+
+ mutex_lock(&dev->ethtool->rss_lock);
+ if (request->rss_context) {
+ ctx = xa_load(&dev->ethtool->rss_ctx, request->rss_context);
+ if (!ctx) {
+ ret = -ENOENT;
+ goto exit_unlock;
+ }
+ }
+
+ ret = ethnl_set_rss_fields(dev, info, request->rss_context,
+ &data, xfrm_sym, &fields_mod);
+ if (ret)
+ goto exit_unlock;
+
+ if (!mod)
+ ret = 0; /* nothing to tell the driver */
+ else if (!ops->set_rxfh)
+ ret = -EOPNOTSUPP;
+ else if (!rxfh.rss_context)
+ ret = ops->set_rxfh(dev, &rxfh, info->extack);
+ else
+ ret = ops->modify_rxfh_context(dev, ctx, &rxfh, info->extack);
+ if (ret)
+ goto exit_unlock;
+
+ if (ctx)
+ rss_set_ctx_update(ctx, tb, &data, &rxfh);
+ else if (indir_reset)
+ dev->priv_flags &= ~IFF_RXFH_CONFIGURED;
+ else if (indir_mod)
+ dev->priv_flags |= IFF_RXFH_CONFIGURED;
+
+exit_unlock:
+ mutex_unlock(&dev->ethtool->rss_lock);
+ kfree(rxfh.key);
+exit_free_indir:
+ kfree(rxfh.indir);
+exit_clean_data:
+ rss_cleanup_data(&data.base);
+
+ return ret ?: mod || fields_mod;
+}
+
const struct ethnl_request_ops ethnl_rss_request_ops = {
.request_cmd = ETHTOOL_MSG_RSS_GET,
.reply_cmd = ETHTOOL_MSG_RSS_GET_REPLY,
@@ -370,4 +924,282 @@ const struct ethnl_request_ops ethnl_rss_request_ops = {
.reply_size = rss_reply_size,
.fill_reply = rss_fill_reply,
.cleanup_data = rss_cleanup_data,
+
+ .set_validate = ethnl_rss_set_validate,
+ .set = ethnl_rss_set,
+ .set_ntf_cmd = ETHTOOL_MSG_RSS_NTF,
+};
+
+/* RSS_CREATE */
+
+const struct nla_policy ethnl_rss_create_policy[ETHTOOL_A_RSS_INPUT_XFRM + 1] = {
+ [ETHTOOL_A_RSS_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy),
+ [ETHTOOL_A_RSS_CONTEXT] = NLA_POLICY_MIN(NLA_U32, 1),
+ [ETHTOOL_A_RSS_HFUNC] = NLA_POLICY_MIN(NLA_U32, 1),
+ [ETHTOOL_A_RSS_INDIR] = NLA_POLICY_MIN(NLA_BINARY, 1),
+ [ETHTOOL_A_RSS_HKEY] = NLA_POLICY_MIN(NLA_BINARY, 1),
+ [ETHTOOL_A_RSS_INPUT_XFRM] =
+ NLA_POLICY_MAX(NLA_U32, RXH_XFRM_SYM_OR_XOR),
};
+
+static int
+ethnl_rss_create_validate(struct net_device *dev, struct genl_info *info)
+{
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ struct nlattr **tb = info->attrs;
+ struct nlattr *bad_attr = NULL;
+ u32 rss_context, input_xfrm;
+
+ if (!ops->create_rxfh_context)
+ return -EOPNOTSUPP;
+
+ rss_context = nla_get_u32_default(tb[ETHTOOL_A_RSS_CONTEXT], 0);
+ if (ops->rxfh_max_num_contexts &&
+ ops->rxfh_max_num_contexts <= rss_context) {
+ NL_SET_BAD_ATTR(info->extack, tb[ETHTOOL_A_RSS_CONTEXT]);
+ return -ERANGE;
+ }
+
+ if (!ops->rxfh_per_ctx_key) {
+ bad_attr = bad_attr ?: tb[ETHTOOL_A_RSS_HFUNC];
+ bad_attr = bad_attr ?: tb[ETHTOOL_A_RSS_HKEY];
+ bad_attr = bad_attr ?: tb[ETHTOOL_A_RSS_INPUT_XFRM];
+ }
+
+ input_xfrm = nla_get_u32_default(tb[ETHTOOL_A_RSS_INPUT_XFRM], 0);
+ if (input_xfrm & ~ops->supported_input_xfrm)
+ bad_attr = bad_attr ?: tb[ETHTOOL_A_RSS_INPUT_XFRM];
+
+ if (bad_attr) {
+ NL_SET_BAD_ATTR(info->extack, bad_attr);
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static void
+ethnl_rss_create_send_ntf(struct sk_buff *rsp, struct net_device *dev)
+{
+ struct nlmsghdr *nlh = (void *)rsp->data;
+ struct genlmsghdr *genl_hdr;
+
+ /* Convert the reply into a notification */
+ nlh->nlmsg_pid = 0;
+ nlh->nlmsg_seq = ethnl_bcast_seq_next();
+
+ genl_hdr = nlmsg_data(nlh);
+ genl_hdr->cmd = ETHTOOL_MSG_RSS_CREATE_NTF;
+
+ ethnl_multicast(rsp, dev);
+}
+
+int ethnl_rss_create_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ bool indir_dflt = false, mod = false, ntf_fail = false;
+ struct ethtool_rxfh_param rxfh = {};
+ struct ethtool_rxfh_context *ctx;
+ struct nlattr **tb = info->attrs;
+ struct rss_reply_data data = {};
+ const struct ethtool_ops *ops;
+ struct rss_req_info req = {};
+ struct net_device *dev;
+ struct sk_buff *rsp;
+ void *hdr;
+ u32 limit;
+ int ret;
+
+ rsp = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!rsp)
+ return -ENOMEM;
+
+ ret = ethnl_parse_header_dev_get(&req.base, tb[ETHTOOL_A_RSS_HEADER],
+ genl_info_net(info), info->extack,
+ true);
+ if (ret < 0)
+ goto exit_free_rsp;
+
+ dev = req.base.dev;
+ ops = dev->ethtool_ops;
+
+ req.rss_context = nla_get_u32_default(tb[ETHTOOL_A_RSS_CONTEXT], 0);
+
+ ret = ethnl_rss_create_validate(dev, info);
+ if (ret)
+ goto exit_free_dev;
+
+ rtnl_lock();
+ netdev_lock_ops(dev);
+
+ ret = ethnl_ops_begin(dev);
+ if (ret < 0)
+ goto exit_dev_unlock;
+
+ ret = rss_get_data_alloc(dev, &data);
+ if (ret)
+ goto exit_ops;
+
+ ret = rss_set_prep_indir(dev, info, &data, &rxfh, &indir_dflt, &mod);
+ if (ret)
+ goto exit_clean_data;
+
+ ethnl_update_u8(&rxfh.hfunc, tb[ETHTOOL_A_RSS_HFUNC], &mod);
+
+ ret = rss_set_prep_hkey(dev, info, &data, &rxfh, &mod);
+ if (ret)
+ goto exit_free_indir;
+
+ rxfh.input_xfrm = RXH_XFRM_NO_CHANGE;
+ ethnl_update_u8(&rxfh.input_xfrm, tb[ETHTOOL_A_RSS_INPUT_XFRM], &mod);
+
+ ctx = ethtool_rxfh_ctx_alloc(ops, data.indir_size, data.hkey_size);
+ if (!ctx) {
+ ret = -ENOMEM;
+ goto exit_free_hkey;
+ }
+
+ mutex_lock(&dev->ethtool->rss_lock);
+ if (!req.rss_context) {
+ limit = ops->rxfh_max_num_contexts ?: U32_MAX;
+ ret = xa_alloc(&dev->ethtool->rss_ctx, &req.rss_context, ctx,
+ XA_LIMIT(1, limit - 1), GFP_KERNEL_ACCOUNT);
+ } else {
+ ret = xa_insert(&dev->ethtool->rss_ctx,
+ req.rss_context, ctx, GFP_KERNEL_ACCOUNT);
+ }
+ if (ret < 0) {
+ NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_RSS_CONTEXT],
+ "error allocating context ID");
+ goto err_unlock_free_ctx;
+ }
+ rxfh.rss_context = req.rss_context;
+
+ ret = ops->create_rxfh_context(dev, ctx, &rxfh, info->extack);
+ if (ret)
+ goto err_ctx_id_free;
+
+ /* Make sure driver populates defaults */
+ WARN_ON_ONCE(!rxfh.key && ops->rxfh_per_ctx_key &&
+ !memchr_inv(ethtool_rxfh_context_key(ctx), 0,
+ ctx->key_size));
+
+ /* Store the config from rxfh to Xarray.. */
+ rss_set_ctx_update(ctx, tb, &data, &rxfh);
+ /* .. copy from Xarray to data. */
+ __rss_prepare_ctx(dev, &data, ctx);
+
+ hdr = ethnl_unicast_put(rsp, info->snd_portid, info->snd_seq,
+ ETHTOOL_MSG_RSS_CREATE_ACT_REPLY);
+ ntf_fail = ethnl_fill_reply_header(rsp, dev, ETHTOOL_A_RSS_HEADER);
+ ntf_fail |= rss_fill_reply(rsp, &req.base, &data.base);
+ if (WARN_ON(!hdr || ntf_fail)) {
+ ret = -EMSGSIZE;
+ goto exit_unlock;
+ }
+
+ genlmsg_end(rsp, hdr);
+
+ /* Use the same skb for the response and the notification,
+ * genlmsg_reply() will copy the skb if it has elevated user count.
+ */
+ skb_get(rsp);
+ ret = genlmsg_reply(rsp, info);
+ ethnl_rss_create_send_ntf(rsp, dev);
+ rsp = NULL;
+
+exit_unlock:
+ mutex_unlock(&dev->ethtool->rss_lock);
+exit_free_hkey:
+ kfree(rxfh.key);
+exit_free_indir:
+ kfree(rxfh.indir);
+exit_clean_data:
+ rss_get_data_free(&data);
+exit_ops:
+ ethnl_ops_complete(dev);
+exit_dev_unlock:
+ netdev_unlock_ops(dev);
+ rtnl_unlock();
+exit_free_dev:
+ ethnl_parse_header_dev_put(&req.base);
+exit_free_rsp:
+ nlmsg_free(rsp);
+ return ret;
+
+err_ctx_id_free:
+ xa_erase(&dev->ethtool->rss_ctx, req.rss_context);
+err_unlock_free_ctx:
+ kfree(ctx);
+ goto exit_unlock;
+}
+
+/* RSS_DELETE */
+
+const struct nla_policy ethnl_rss_delete_policy[ETHTOOL_A_RSS_CONTEXT + 1] = {
+ [ETHTOOL_A_RSS_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy),
+ [ETHTOOL_A_RSS_CONTEXT] = NLA_POLICY_MIN(NLA_U32, 1),
+};
+
+int ethnl_rss_delete_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct ethtool_rxfh_context *ctx;
+ struct nlattr **tb = info->attrs;
+ struct ethnl_req_info req = {};
+ const struct ethtool_ops *ops;
+ struct net_device *dev;
+ u32 rss_context;
+ int ret;
+
+ if (GENL_REQ_ATTR_CHECK(info, ETHTOOL_A_RSS_CONTEXT))
+ return -EINVAL;
+ rss_context = nla_get_u32(tb[ETHTOOL_A_RSS_CONTEXT]);
+
+ ret = ethnl_parse_header_dev_get(&req, tb[ETHTOOL_A_RSS_HEADER],
+ genl_info_net(info), info->extack,
+ true);
+ if (ret < 0)
+ return ret;
+
+ dev = req.dev;
+ ops = dev->ethtool_ops;
+
+ if (!ops->create_rxfh_context)
+ goto exit_free_dev;
+
+ rtnl_lock();
+ netdev_lock_ops(dev);
+
+ ret = ethnl_ops_begin(dev);
+ if (ret < 0)
+ goto exit_dev_unlock;
+
+ mutex_lock(&dev->ethtool->rss_lock);
+ ret = ethtool_check_rss_ctx_busy(dev, rss_context);
+ if (ret)
+ goto exit_unlock;
+
+ ctx = xa_load(&dev->ethtool->rss_ctx, rss_context);
+ if (!ctx) {
+ ret = -ENOENT;
+ goto exit_unlock;
+ }
+
+ ret = ops->remove_rxfh_context(dev, ctx, rss_context, info->extack);
+ if (ret)
+ goto exit_unlock;
+
+ WARN_ON(xa_erase(&dev->ethtool->rss_ctx, rss_context) != ctx);
+ kfree(ctx);
+
+ ethnl_rss_delete_notify(dev, rss_context);
+
+exit_unlock:
+ mutex_unlock(&dev->ethtool->rss_lock);
+ ethnl_ops_complete(dev);
+exit_dev_unlock:
+ netdev_unlock_ops(dev);
+ rtnl_unlock();
+exit_free_dev:
+ ethnl_parse_header_dev_put(&req);
+ return ret;
+}
diff --git a/net/handshake/tlshd.c b/net/handshake/tlshd.c
index d6f52839827e..081093dfd553 100644
--- a/net/handshake/tlshd.c
+++ b/net/handshake/tlshd.c
@@ -230,6 +230,12 @@ static int tls_handshake_accept(struct handshake_req *req,
if (ret < 0)
goto out_cancel;
}
+ if (treq->th_keyring) {
+ ret = nla_put_u32(msg, HANDSHAKE_A_ACCEPT_KEYRING,
+ treq->th_keyring);
+ if (ret < 0)
+ goto out_cancel;
+ }
ret = nla_put_u32(msg, HANDSHAKE_A_ACCEPT_AUTH_MODE,
treq->th_auth_mode);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index a648fff71ea7..5cfc1c939673 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -864,7 +864,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
(arp_fwd_proxy(in_dev, dev, rt) ||
arp_fwd_pvlan(in_dev, dev, rt, sip, tip) ||
(rt->dst.dev != dev &&
- pneigh_lookup(&arp_tbl, net, &tip, dev, 0)))) {
+ pneigh_lookup(&arp_tbl, net, &tip, dev)))) {
n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
if (n)
neigh_release(n);
@@ -966,6 +966,7 @@ static int arp_is_multicast(const void *pkey)
static int arp_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev)
{
+ enum skb_drop_reason drop_reason;
const struct arphdr *arp;
/* do not tweak dropwatch on an ARP we will ignore */
@@ -979,12 +980,15 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev,
goto out_of_mem;
/* ARP header, plus 2 device addresses, plus 2 IP addresses. */
- if (!pskb_may_pull(skb, arp_hdr_len(dev)))
+ drop_reason = pskb_may_pull_reason(skb, arp_hdr_len(dev));
+ if (drop_reason != SKB_NOT_DROPPED_YET)
goto freeskb;
arp = arp_hdr(skb);
- if (arp->ar_hln != dev->addr_len || arp->ar_pln != 4)
+ if (arp->ar_hln != dev->addr_len || arp->ar_pln != 4) {
+ drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
goto freeskb;
+ }
memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
@@ -996,7 +1000,7 @@ consumeskb:
consume_skb(skb);
return NET_RX_SUCCESS;
freeskb:
- kfree_skb(skb);
+ kfree_skb_reason(skb, drop_reason);
out_of_mem:
return NET_RX_DROP;
}
@@ -1085,9 +1089,7 @@ static int arp_req_set_public(struct net *net, struct arpreq *r,
if (mask) {
__be32 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
- if (!pneigh_lookup(&arp_tbl, net, &ip, dev, 1))
- return -ENOBUFS;
- return 0;
+ return pneigh_create(&arp_tbl, net, &ip, dev, 0, 0, false);
}
return arp_req_set_proxy(net, dev, 1);
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 4b5bc6eb52e7..c2b2cda1a7e5 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -109,7 +109,7 @@ void ip4_datagram_release_cb(struct sock *sk)
rcu_read_lock();
dst = __sk_dst_get(sk);
- if (!dst || !dst->obsolete || dst->ops->check(dst, 0)) {
+ if (!dst || !READ_ONCE(dst->obsolete) || dst->ops->check(dst, 0)) {
rcu_read_unlock();
return;
}
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index fd1e1507a224..6e1b94796f67 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1524,7 +1524,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
fib_disable_ip(dev, event, false);
break;
case NETDEV_CHANGE:
- flags = dev_get_flags(dev);
+ flags = netif_get_flags(dev);
if (flags & (IFF_RUNNING | IFF_LOWER_UP))
fib_sync_up(dev, RTNH_F_LINKDOWN);
else
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index d643bd1a0d9d..a5f3c8459758 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -625,11 +625,6 @@ int fib_nh_common_init(struct net *net, struct fib_nh_common *nhc,
if (encap) {
struct lwtunnel_state *lwtstate;
- if (encap_type == LWTUNNEL_ENCAP_NONE) {
- NL_SET_ERR_MSG(extack, "LWT encap type not specified");
- err = -EINVAL;
- goto lwt_failure;
- }
err = lwtunnel_build_state(net, encap_type, encap,
nhc->nhc_family, cfg, &lwtstate,
extack);
@@ -1640,8 +1635,7 @@ int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nhc,
nla_put_u32(skb, RTA_OIF, nhc->nhc_dev->ifindex))
goto nla_put_failure;
- if (nhc->nhc_lwtstate &&
- lwtunnel_fill_encap(skb, nhc->nhc_lwtstate,
+ if (lwtunnel_fill_encap(skb, nhc->nhc_lwtstate,
RTA_ENCAP, RTA_ENCAP_TYPE) < 0)
goto nla_put_failure;
@@ -2093,7 +2087,7 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
return 0;
if (nh_flags & RTNH_F_DEAD) {
- unsigned int flags = dev_get_flags(dev);
+ unsigned int flags = netif_get_flags(dev);
if (flags & (IFF_RUNNING | IFF_LOWER_UP))
nh_flags |= RTNH_F_LINKDOWN;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 717cb7d3607a..2ffe73ea644f 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -311,18 +311,20 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
{
struct dst_entry *dst = &rt->dst;
struct inet_peer *peer;
+ struct net_device *dev;
bool rc = true;
if (!apply_ratelimit)
return true;
/* No rate limit on loopback */
- if (dst->dev && (dst->dev->flags&IFF_LOOPBACK))
+ dev = dst_dev(dst);
+ if (dev && (dev->flags & IFF_LOOPBACK))
goto out;
rcu_read_lock();
peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr,
- l3mdev_master_ifindex_rcu(dst->dev));
+ l3mdev_master_ifindex_rcu(dev));
rc = inet_peer_xrlim_allow(peer,
READ_ONCE(net->ipv4.sysctl_icmp_ratelimit));
rcu_read_unlock();
@@ -466,13 +468,13 @@ out_bh_enable:
*/
static struct net_device *icmp_get_route_lookup_dev(struct sk_buff *skb)
{
- struct net_device *route_lookup_dev = NULL;
+ struct net_device *dev = skb->dev;
+ const struct dst_entry *dst;
- if (skb->dev)
- route_lookup_dev = skb->dev;
- else if (skb_dst(skb))
- route_lookup_dev = skb_dst(skb)->dev;
- return route_lookup_dev;
+ if (dev)
+ return dev;
+ dst = skb_dst(skb);
+ return dst ? dst_dev(dst) : NULL;
}
static struct rtable *icmp_route_lookup(struct net *net, struct flowi4 *fl4,
@@ -869,7 +871,7 @@ static enum skb_drop_reason icmp_unreach(struct sk_buff *skb)
struct net *net;
u32 info = 0;
- net = dev_net_rcu(skb_dst(skb)->dev);
+ net = skb_dst_dev_net_rcu(skb);
/*
* Incomplete header ?
@@ -1012,7 +1014,7 @@ static enum skb_drop_reason icmp_echo(struct sk_buff *skb)
struct icmp_bxm icmp_param;
struct net *net;
- net = dev_net_rcu(skb_dst(skb)->dev);
+ net = skb_dst_dev_net_rcu(skb);
/* should there be an ICMP stat for ignored echos? */
if (READ_ONCE(net->ipv4.sysctl_icmp_echo_ignore_all))
return SKB_NOT_DROPPED_YET;
@@ -1182,7 +1184,7 @@ static enum skb_drop_reason icmp_timestamp(struct sk_buff *skb)
return SKB_NOT_DROPPED_YET;
out_err:
- __ICMP_INC_STATS(dev_net_rcu(skb_dst(skb)->dev), ICMP_MIB_INERRORS);
+ __ICMP_INC_STATS(skb_dst_dev_net_rcu(skb), ICMP_MIB_INERRORS);
return SKB_DROP_REASON_PKT_TOO_SMALL;
}
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index d1769034b643..7182f1419c2a 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -427,7 +427,7 @@ static int igmpv3_sendpack(struct sk_buff *skb)
pig->csum = ip_compute_csum(igmp_hdr(skb), igmplen);
- return ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb);
+ return ip_local_out(skb_dst_dev_net(skb), skb->sk, skb);
}
static int grec_size(struct ip_mc_list *pmc, int type, int gdel, int sdel)
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 6906bedad19a..1e2df51427fe 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -168,7 +168,7 @@ static bool inet_use_bhash2_on_bind(const struct sock *sk)
}
static bool inet_bind_conflict(const struct sock *sk, struct sock *sk2,
- kuid_t sk_uid, bool relax,
+ kuid_t uid, bool relax,
bool reuseport_cb_ok, bool reuseport_ok)
{
int bound_dev_if2;
@@ -185,12 +185,12 @@ static bool inet_bind_conflict(const struct sock *sk, struct sock *sk2,
if (!relax || (!reuseport_ok && sk->sk_reuseport &&
sk2->sk_reuseport && reuseport_cb_ok &&
(sk2->sk_state == TCP_TIME_WAIT ||
- uid_eq(sk_uid, sock_i_uid(sk2)))))
+ uid_eq(uid, sk_uid(sk2)))))
return true;
} else if (!reuseport_ok || !sk->sk_reuseport ||
!sk2->sk_reuseport || !reuseport_cb_ok ||
(sk2->sk_state != TCP_TIME_WAIT &&
- !uid_eq(sk_uid, sock_i_uid(sk2)))) {
+ !uid_eq(uid, sk_uid(sk2)))) {
return true;
}
}
@@ -198,7 +198,7 @@ static bool inet_bind_conflict(const struct sock *sk, struct sock *sk2,
}
static bool __inet_bhash2_conflict(const struct sock *sk, struct sock *sk2,
- kuid_t sk_uid, bool relax,
+ kuid_t uid, bool relax,
bool reuseport_cb_ok, bool reuseport_ok)
{
if (ipv6_only_sock(sk2)) {
@@ -211,20 +211,20 @@ static bool __inet_bhash2_conflict(const struct sock *sk, struct sock *sk2,
#endif
}
- return inet_bind_conflict(sk, sk2, sk_uid, relax,
+ return inet_bind_conflict(sk, sk2, uid, relax,
reuseport_cb_ok, reuseport_ok);
}
static bool inet_bhash2_conflict(const struct sock *sk,
const struct inet_bind2_bucket *tb2,
- kuid_t sk_uid,
+ kuid_t uid,
bool relax, bool reuseport_cb_ok,
bool reuseport_ok)
{
struct sock *sk2;
sk_for_each_bound(sk2, &tb2->owners) {
- if (__inet_bhash2_conflict(sk, sk2, sk_uid, relax,
+ if (__inet_bhash2_conflict(sk, sk2, uid, relax,
reuseport_cb_ok, reuseport_ok))
return true;
}
@@ -242,8 +242,8 @@ static int inet_csk_bind_conflict(const struct sock *sk,
const struct inet_bind2_bucket *tb2, /* may be null */
bool relax, bool reuseport_ok)
{
- kuid_t uid = sock_i_uid((struct sock *)sk);
struct sock_reuseport *reuseport_cb;
+ kuid_t uid = sk_uid(sk);
bool reuseport_cb_ok;
struct sock *sk2;
@@ -287,11 +287,11 @@ static int inet_csk_bind_conflict(const struct sock *sk,
static bool inet_bhash2_addr_any_conflict(const struct sock *sk, int port, int l3mdev,
bool relax, bool reuseport_ok)
{
- kuid_t uid = sock_i_uid((struct sock *)sk);
const struct net *net = sock_net(sk);
struct sock_reuseport *reuseport_cb;
struct inet_bind_hashbucket *head2;
struct inet_bind2_bucket *tb2;
+ kuid_t uid = sk_uid(sk);
bool conflict = false;
bool reuseport_cb_ok;
@@ -425,15 +425,13 @@ success:
static inline int sk_reuseport_match(struct inet_bind_bucket *tb,
struct sock *sk)
{
- kuid_t uid = sock_i_uid(sk);
-
if (tb->fastreuseport <= 0)
return 0;
if (!sk->sk_reuseport)
return 0;
if (rcu_access_pointer(sk->sk_reuseport_cb))
return 0;
- if (!uid_eq(tb->fastuid, uid))
+ if (!uid_eq(tb->fastuid, sk_uid(sk)))
return 0;
/* We only need to check the rcv_saddr if this tb was once marked
* without fastreuseport and then was reset, as we can only know that
@@ -458,14 +456,13 @@ static inline int sk_reuseport_match(struct inet_bind_bucket *tb,
void inet_csk_update_fastreuse(struct inet_bind_bucket *tb,
struct sock *sk)
{
- kuid_t uid = sock_i_uid(sk);
bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
if (hlist_empty(&tb->bhash2)) {
tb->fastreuse = reuse;
if (sk->sk_reuseport) {
tb->fastreuseport = FASTREUSEPORT_ANY;
- tb->fastuid = uid;
+ tb->fastuid = sk_uid(sk);
tb->fast_rcv_saddr = sk->sk_rcv_saddr;
tb->fast_ipv6_only = ipv6_only_sock(sk);
tb->fast_sk_family = sk->sk_family;
@@ -492,7 +489,7 @@ void inet_csk_update_fastreuse(struct inet_bind_bucket *tb,
*/
if (!sk_reuseport_match(tb, sk)) {
tb->fastreuseport = FASTREUSEPORT_STRICT;
- tb->fastuid = uid;
+ tb->fastuid = sk_uid(sk);
tb->fast_rcv_saddr = sk->sk_rcv_saddr;
tb->fast_ipv6_only = ipv6_only_sock(sk);
tb->fast_sk_family = sk->sk_family;
@@ -812,7 +809,7 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk,
sk->sk_protocol, inet_sk_flowi_flags(sk),
(opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
ireq->ir_loc_addr, ireq->ir_rmt_port,
- htons(ireq->ir_num), sk->sk_uid);
+ htons(ireq->ir_num), sk_uid(sk));
security_req_classify_flow(req, flowi4_to_flowi_common(fl4));
rt = ip_route_output_flow(net, fl4, sk);
if (IS_ERR(rt))
@@ -849,7 +846,7 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
sk->sk_protocol, inet_sk_flowi_flags(sk),
(opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
ireq->ir_loc_addr, ireq->ir_rmt_port,
- htons(ireq->ir_num), sk->sk_uid);
+ htons(ireq->ir_num), sk_uid(sk));
security_req_classify_flow(req, flowi4_to_flowi_common(fl4));
rt = ip_route_output_flow(net, fl4, sk);
if (IS_ERR(rt))
@@ -887,15 +884,6 @@ static void syn_ack_recalc(struct request_sock *req,
req->num_timeout >= rskq_defer_accept - 1;
}
-int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req)
-{
- int err = req->rsk_ops->rtx_syn_ack(parent, req);
-
- if (!err)
- req->num_retrans++;
- return err;
-}
-
static struct request_sock *
reqsk_alloc_noprof(const struct request_sock_ops *ops, struct sock *sk_listener,
bool attach_listener)
@@ -1135,7 +1123,7 @@ static void reqsk_timer_handler(struct timer_list *t)
req->rsk_ops->syn_ack_timeout(req);
if (!expire &&
(!resend ||
- !inet_rtx_syn_ack(sk_listener, req) ||
+ !tcp_rtx_synack(sk_listener, req) ||
inet_rsk(req)->acked)) {
if (req->num_timeout++ == 0)
atomic_dec(&queue->young);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 1d1d6ad53f4c..2fa53b16fe77 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -181,7 +181,7 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
goto errout;
#endif
- r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk));
+ r->idiag_uid = from_kuid_munged(user_ns, sk_uid(sk));
r->idiag_inode = sock_i_ino(sk);
memset(&inet_sockopt, 0, sizeof(inet_sockopt));
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 77a0b52b2eab..ceeeec9b7290 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -721,8 +721,8 @@ static int inet_reuseport_add_sock(struct sock *sk,
{
struct inet_bind_bucket *tb = inet_csk(sk)->icsk_bind_hash;
const struct hlist_nulls_node *node;
+ kuid_t uid = sk_uid(sk);
struct sock *sk2;
- kuid_t uid = sock_i_uid(sk);
sk_nulls_for_each_rcu(sk2, node, &ilb->nulls_head) {
if (sk2 != sk &&
@@ -730,7 +730,7 @@ static int inet_reuseport_add_sock(struct sock *sk,
ipv6_only_sock(sk2) == ipv6_only_sock(sk) &&
sk2->sk_bound_dev_if == sk->sk_bound_dev_if &&
inet_csk(sk2)->icsk_bind_hash == tb &&
- sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) &&
+ sk2->sk_reuseport && uid_eq(uid, sk_uid(sk2)) &&
inet_rcv_saddr_equal(sk, sk2, false))
return reuseport_add_sock(sk, sk2,
inet_rcv_saddr_any(sk));
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 64b3fb3208af..b2584cce90ae 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -476,7 +476,7 @@ out_fail:
/* Process an incoming IP datagram fragment. */
int ip_defrag(struct net *net, struct sk_buff *skb, u32 user)
{
- struct net_device *dev = skb->dev ? : skb_dst(skb)->dev;
+ struct net_device *dev = skb->dev ? : skb_dst_dev(skb);
int vif = l3mdev_master_ifindex_rcu(dev);
struct ipq *qp;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 30a5e9460d00..fc323994b1fa 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -226,6 +226,12 @@ resubmit:
static int ip_local_deliver_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
+ if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC))) {
+ __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS);
+ kfree_skb_reason(skb, SKB_DROP_REASON_NOMEM);
+ return 0;
+ }
+
skb_clear_delivery_time(skb);
__skb_pull(skb, skb_network_header_len(skb));
@@ -319,8 +325,8 @@ static int ip_rcv_finish_core(struct net *net,
const struct sk_buff *hint)
{
const struct iphdr *iph = ip_hdr(skb);
- int err, drop_reason;
struct rtable *rt;
+ int drop_reason;
if (ip_can_use_hint(skb, iph, hint)) {
drop_reason = ip_route_use_hint(skb, iph->daddr, iph->saddr,
@@ -345,9 +351,10 @@ static int ip_rcv_finish_core(struct net *net,
break;
case IPPROTO_UDP:
if (READ_ONCE(net->ipv4.sysctl_udp_early_demux)) {
- err = udp_v4_early_demux(skb);
- if (unlikely(err))
+ drop_reason = udp_v4_early_demux(skb);
+ if (unlikely(drop_reason))
goto drop_error;
+ drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
/* must reload iph, skb->head might have changed */
iph = ip_hdr(skb);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index a2705d454fd6..10a1d182fd84 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -116,7 +116,7 @@ int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
skb->protocol = htons(ETH_P_IP);
return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
- net, sk, skb, NULL, skb_dst(skb)->dev,
+ net, sk, skb, NULL, skb_dst_dev(skb),
dst_output);
}
@@ -199,7 +199,7 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
{
struct dst_entry *dst = skb_dst(skb);
struct rtable *rt = dst_rtable(dst);
- struct net_device *dev = dst->dev;
+ struct net_device *dev = dst_dev(dst);
unsigned int hh_len = LL_RESERVED_SPACE(dev);
struct neighbour *neigh;
bool is_v6gw = false;
@@ -425,7 +425,7 @@ int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
+ struct net_device *dev = skb_dst_dev(skb), *indev = skb->dev;
skb->dev = dev;
skb->protocol = htons(ETH_P_IP);
@@ -1222,8 +1222,7 @@ alloc_new_skb:
if (WARN_ON_ONCE(copy > msg->msg_iter.count))
goto error;
- err = skb_splice_from_iter(skb, &msg->msg_iter, copy,
- sk->sk_allocation);
+ err = skb_splice_from_iter(skb, &msg->msg_iter, copy);
if (err < 0)
goto error;
copy = err;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 678b8f96e3e9..aaeb5d16f0c9 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -668,7 +668,7 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
ip_tunnel_adj_headroom(dev, headroom);
iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
- df, !net_eq(tunnel->net, dev_net(dev)));
+ df, !net_eq(tunnel->net, dev_net(dev)), 0);
return;
tx_error:
DEV_STATS_INC(dev, tx_errors);
@@ -857,7 +857,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
ip_tunnel_adj_headroom(dev, max_headroom);
iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
- df, !net_eq(tunnel->net, dev_net(dev)));
+ df, !net_eq(tunnel->net, dev_net(dev)), 0);
return;
#if IS_ENABLED(CONFIG_IPV6)
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index f65d2f727381..cc9915543637 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -49,7 +49,8 @@ EXPORT_SYMBOL(ip6tun_encaps);
void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
__be32 src, __be32 dst, __u8 proto,
- __u8 tos, __u8 ttl, __be16 df, bool xnet)
+ __u8 tos, __u8 ttl, __be16 df, bool xnet,
+ u16 ipcb_flags)
{
int pkt_len = skb->len - skb_inner_network_offset(skb);
struct net *net = dev_net(rt->dst.dev);
@@ -62,6 +63,7 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
skb_clear_hash_if_not_l4(skb);
skb_dst_set(skb, &rt->dst);
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+ IPCB(skb)->flags = ipcb_flags;
/* Push down and install the IP header. */
skb_push(skb, sizeof(struct iphdr));
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 686e4f3d83aa..95b6bb78fcd2 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -229,7 +229,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
goto tx_error_icmp;
}
- tdev = dst->dev;
+ tdev = dst_dev(dst);
if (tdev == dev) {
dst_release(dst);
@@ -259,7 +259,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
xmit:
skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev)));
skb_dst_set(skb, dst);
- skb->dev = skb_dst(skb)->dev;
+ skb->dev = skb_dst_dev(skb);
err = dst_output(tunnel->net, skb->sk, skb);
if (net_xmit_eval(err) == 0)
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 5a4fb2539b08..9a45aed508d1 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -54,6 +54,7 @@ static int ipcomp4_err(struct sk_buff *skb, u32 info)
}
/* We always hold one tunnel user reference to indicate a tunnel */
+static struct lock_class_key xfrm_state_lock_key;
static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
{
struct net *net = xs_net(x);
@@ -62,6 +63,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
t = xfrm_state_alloc(net);
if (!t)
goto out;
+ lockdep_set_class(&t->lock, &xfrm_state_lock_key);
t->id.proto = IPPROTO_IPIP;
t->id.spi = x->props.saddr.a4;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index c56b6fe6f0d7..22a7889876c1 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -274,9 +274,9 @@ static int __init ic_open_devs(void)
/* wait for a carrier on at least one device */
start = jiffies;
- next_msg = start + msecs_to_jiffies(20000);
+ next_msg = start + secs_to_jiffies(20);
while (time_before(jiffies, start +
- msecs_to_jiffies(carrier_timeout * 1000))) {
+ secs_to_jiffies(carrier_timeout))) {
int wait, elapsed;
rtnl_lock();
@@ -295,7 +295,7 @@ static int __init ic_open_devs(void)
elapsed = jiffies_to_msecs(jiffies - start);
wait = (carrier_timeout * 1000 - elapsed + 500) / 1000;
pr_info("Waiting up to %d more seconds for network.\n", wait);
- next_msg = jiffies + msecs_to_jiffies(20000);
+ next_msg = jiffies + secs_to_jiffies(20);
}
have_carrier:
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index a7d09ae9d761..e86a8a862c41 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -901,7 +901,7 @@ static int vif_add(struct net *net, struct mr_table *mrt,
vifc->vifc_flags | (!mrtsock ? VIFF_STATIC : 0),
(VIFF_TUNNEL | VIFF_REGISTER));
- err = dev_get_port_parent_id(dev, &ppid, true);
+ err = netif_get_port_parent_id(dev, &ppid, true);
if (err == 0) {
memcpy(v->dev_parent_id.id, ppid.id, ppid.id_len);
v->dev_parent_id.id_len = ppid.id_len;
@@ -1853,20 +1853,19 @@ static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt,
/* Processing handlers for ipmr_forward, under rcu_read_lock() */
-static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
- int in_vifi, struct sk_buff *skb, int vifi)
+static int ipmr_prepare_xmit(struct net *net, struct mr_table *mrt,
+ struct sk_buff *skb, int vifi)
{
const struct iphdr *iph = ip_hdr(skb);
struct vif_device *vif = &mrt->vif_table[vifi];
struct net_device *vif_dev;
- struct net_device *dev;
struct rtable *rt;
struct flowi4 fl4;
int encap = 0;
vif_dev = vif_dev_read(vif);
if (!vif_dev)
- goto out_free;
+ return -1;
if (vif->flags & VIFF_REGISTER) {
WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1);
@@ -1874,12 +1873,9 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
DEV_STATS_ADD(vif_dev, tx_bytes, skb->len);
DEV_STATS_INC(vif_dev, tx_packets);
ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
- goto out_free;
+ return -1;
}
- if (ipmr_forward_offloaded(skb, mrt, in_vifi, vifi))
- goto out_free;
-
if (vif->flags & VIFF_TUNNEL) {
rt = ip_route_output_ports(net, &fl4, NULL,
vif->remote, vif->local,
@@ -1887,7 +1883,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
IPPROTO_IPIP,
iph->tos & INET_DSCP_MASK, vif->link);
if (IS_ERR(rt))
- goto out_free;
+ return -1;
encap = sizeof(struct iphdr);
} else {
rt = ip_route_output_ports(net, &fl4, NULL, iph->daddr, 0,
@@ -1895,11 +1891,9 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
IPPROTO_IPIP,
iph->tos & INET_DSCP_MASK, vif->link);
if (IS_ERR(rt))
- goto out_free;
+ return -1;
}
- dev = rt->dst.dev;
-
if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) {
/* Do not fragment multicasts. Alas, IPv4 does not
* allow to send ICMP, so that packets will disappear
@@ -1907,14 +1901,14 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
*/
IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
ip_rt_put(rt);
- goto out_free;
+ return -1;
}
- encap += LL_RESERVED_SPACE(dev) + rt->dst.header_len;
+ encap += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
if (skb_cow(skb, encap)) {
ip_rt_put(rt);
- goto out_free;
+ return -1;
}
WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1);
@@ -1934,6 +1928,22 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
DEV_STATS_ADD(vif_dev, tx_bytes, skb->len);
}
+ return 0;
+}
+
+static void ipmr_queue_fwd_xmit(struct net *net, struct mr_table *mrt,
+ int in_vifi, struct sk_buff *skb, int vifi)
+{
+ struct rtable *rt;
+
+ if (ipmr_forward_offloaded(skb, mrt, in_vifi, vifi))
+ goto out_free;
+
+ if (ipmr_prepare_xmit(net, mrt, skb, vifi))
+ goto out_free;
+
+ rt = skb_rtable(skb);
+
IPCB(skb)->flags |= IPSKB_FORWARDED;
/* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
@@ -1947,7 +1957,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
* result in receiving multiple packets.
*/
NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD,
- net, NULL, skb, skb->dev, dev,
+ net, NULL, skb, skb->dev, rt->dst.dev,
ipmr_forward_finish);
return;
@@ -1955,6 +1965,19 @@ out_free:
kfree_skb(skb);
}
+static void ipmr_queue_output_xmit(struct net *net, struct mr_table *mrt,
+ struct sk_buff *skb, int vifi)
+{
+ if (ipmr_prepare_xmit(net, mrt, skb, vifi))
+ goto out_free;
+
+ ip_mc_output(net, NULL, skb);
+ return;
+
+out_free:
+ kfree_skb(skb);
+}
+
/* Called with mrt_lock or rcu_read_lock() */
static int ipmr_find_vif(const struct mr_table *mrt, struct net_device *dev)
{
@@ -2065,8 +2088,8 @@ forward:
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
if (skb2)
- ipmr_queue_xmit(net, mrt, true_vifi,
- skb2, psend);
+ ipmr_queue_fwd_xmit(net, mrt, true_vifi,
+ skb2, psend);
}
psend = ct;
}
@@ -2077,10 +2100,10 @@ last_forward:
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
if (skb2)
- ipmr_queue_xmit(net, mrt, true_vifi, skb2,
- psend);
+ ipmr_queue_fwd_xmit(net, mrt, true_vifi, skb2,
+ psend);
} else {
- ipmr_queue_xmit(net, mrt, true_vifi, skb, psend);
+ ipmr_queue_fwd_xmit(net, mrt, true_vifi, skb, psend);
return;
}
}
@@ -2214,6 +2237,110 @@ dont_forward:
return 0;
}
+static void ip_mr_output_finish(struct net *net, struct mr_table *mrt,
+ struct net_device *dev, struct sk_buff *skb,
+ struct mfc_cache *c)
+{
+ int psend = -1;
+ int ct;
+
+ atomic_long_inc(&c->_c.mfc_un.res.pkt);
+ atomic_long_add(skb->len, &c->_c.mfc_un.res.bytes);
+ WRITE_ONCE(c->_c.mfc_un.res.lastuse, jiffies);
+
+ /* Forward the frame */
+ if (c->mfc_origin == htonl(INADDR_ANY) &&
+ c->mfc_mcastgrp == htonl(INADDR_ANY)) {
+ if (ip_hdr(skb)->ttl >
+ c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
+ /* It's an (*,*) entry and the packet is not coming from
+ * the upstream: forward the packet to the upstream
+ * only.
+ */
+ psend = c->_c.mfc_parent;
+ goto last_xmit;
+ }
+ goto dont_xmit;
+ }
+
+ for (ct = c->_c.mfc_un.res.maxvif - 1;
+ ct >= c->_c.mfc_un.res.minvif; ct--) {
+ if (ip_hdr(skb)->ttl > c->_c.mfc_un.res.ttls[ct]) {
+ if (psend != -1) {
+ struct sk_buff *skb2;
+
+ skb2 = skb_clone(skb, GFP_ATOMIC);
+ if (skb2)
+ ipmr_queue_output_xmit(net, mrt,
+ skb2, psend);
+ }
+ psend = ct;
+ }
+ }
+
+last_xmit:
+ if (psend != -1) {
+ ipmr_queue_output_xmit(net, mrt, skb, psend);
+ return;
+ }
+
+dont_xmit:
+ kfree_skb(skb);
+}
+
+/* Multicast packets for forwarding arrive here
+ * Called with rcu_read_lock();
+ */
+int ip_mr_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+ struct rtable *rt = skb_rtable(skb);
+ struct mfc_cache *cache;
+ struct net_device *dev;
+ struct mr_table *mrt;
+ int vif;
+
+ guard(rcu)();
+
+ dev = rt->dst.dev;
+
+ if (IPCB(skb)->flags & IPSKB_FORWARDED)
+ goto mc_output;
+ if (!(IPCB(skb)->flags & IPSKB_MCROUTE))
+ goto mc_output;
+
+ skb->dev = dev;
+
+ mrt = ipmr_rt_fib_lookup(net, skb);
+ if (IS_ERR(mrt))
+ goto mc_output;
+
+ cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
+ if (!cache) {
+ vif = ipmr_find_vif(mrt, dev);
+ if (vif >= 0)
+ cache = ipmr_cache_find_any(mrt, ip_hdr(skb)->daddr,
+ vif);
+ }
+
+ /* No usable cache entry */
+ if (!cache) {
+ vif = ipmr_find_vif(mrt, dev);
+ if (vif >= 0)
+ return ipmr_cache_unresolved(mrt, vif, skb, dev);
+ goto mc_output;
+ }
+
+ vif = cache->_c.mfc_parent;
+ if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev)
+ goto mc_output;
+
+ ip_mr_output_finish(net, mrt, dev, skb, cache);
+ return 0;
+
+mc_output:
+ return ip_mc_output(net, sk, skb);
+}
+
#ifdef CONFIG_IP_PIMSM_V1
/* Handle IGMP messages of PIMv1 */
int pim_rcv_v1(struct sk_buff *skb)
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 08bc3f2c0078..0565f001120d 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -20,12 +20,12 @@
/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, unsigned int addr_type)
{
+ struct net_device *dev = skb_dst_dev(skb);
const struct iphdr *iph = ip_hdr(skb);
struct rtable *rt;
struct flowi4 fl4 = {};
__be32 saddr = iph->saddr;
__u8 flags;
- struct net_device *dev = skb_dst(skb)->dev;
struct flow_keys flkeys;
unsigned int hh_len;
@@ -74,7 +74,7 @@ int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, un
#endif
/* Change in oif may mean change in hh_len. */
- hh_len = skb_dst(skb)->dev->hard_header_len;
+ hh_len = skb_dst_dev(skb)->hard_header_len;
if (skb_headroom(skb) < hh_len &&
pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)),
0, GFP_ATOMIC))
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index ef8009281da5..2c438b140e88 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -13,8 +13,8 @@ config NF_DEFRAG_IPV4
# old sockopt interface and eval loop
config IP_NF_IPTABLES_LEGACY
tristate "Legacy IP tables support"
- default n
- select NETFILTER_XTABLES
+ depends on NETFILTER_XTABLES_LEGACY
+ default m if NETFILTER_XTABLES_LEGACY
help
iptables is a legacy packet classifier.
This is not needed if you are using iptables over nftables
@@ -182,8 +182,8 @@ config IP_NF_MATCH_TTL
# `filter', generic and specific targets
config IP_NF_FILTER
tristate "Packet filtering"
- default m if NETFILTER_ADVANCED=n
- select IP_NF_IPTABLES_LEGACY
+ default m if NETFILTER_ADVANCED=n || IP_NF_IPTABLES_LEGACY
+ depends on IP_NF_IPTABLES_LEGACY
help
Packet filtering defines a table `filter', which has a series of
rules for simple packet filtering at local input, forwarding and
@@ -220,10 +220,10 @@ config IP_NF_TARGET_SYNPROXY
config IP_NF_NAT
tristate "iptables NAT support"
depends on NF_CONNTRACK
+ depends on IP_NF_IPTABLES_LEGACY
default m if NETFILTER_ADVANCED=n
select NF_NAT
select NETFILTER_XT_NAT
- select IP_NF_IPTABLES_LEGACY
help
This enables the `nat' table in iptables. This allows masquerading,
port forwarding and other forms of full Network Address Port
@@ -263,8 +263,8 @@ endif # IP_NF_NAT
# mangle + specific targets
config IP_NF_MANGLE
tristate "Packet mangling"
- default m if NETFILTER_ADVANCED=n
- select IP_NF_IPTABLES_LEGACY
+ default m if NETFILTER_ADVANCED=n || IP_NF_IPTABLES_LEGACY
+ depends on IP_NF_IPTABLES_LEGACY
help
This option adds a `mangle' table to iptables: see the man page for
iptables(8). This table is used for various packet alterations
@@ -299,7 +299,7 @@ config IP_NF_TARGET_TTL
# raw + specific targets
config IP_NF_RAW
tristate 'raw table support (required for NOTRACK/TRACE)'
- select IP_NF_IPTABLES_LEGACY
+ depends on IP_NF_IPTABLES_LEGACY
help
This option adds a `raw' table to iptables. This table is the very
first in the netfilter framework and hooks in at the PREROUTING
@@ -313,7 +313,7 @@ config IP_NF_SECURITY
tristate "Security table"
depends on SECURITY
depends on NETFILTER_ADVANCED
- select IP_NF_IPTABLES_LEGACY
+ depends on IP_NF_IPTABLES_LEGACY
help
This option adds a `security' table to iptables, for use
with Mandatory Access Control (MAC) policy.
@@ -325,8 +325,8 @@ endif # IP_NF_IPTABLES
# ARP tables
config IP_NF_ARPTABLES
tristate "Legacy ARPTABLES support"
- depends on NETFILTER_XTABLES
- default n
+ depends on NETFILTER_XTABLES_LEGACY
+ default n
help
arptables is a legacy packet classifier.
This is not needed if you are using arptables over nftables
@@ -342,7 +342,7 @@ config IP_NF_ARPFILTER
tristate "arptables-legacy packet filtering support"
select IP_NF_ARPTABLES
select NETFILTER_FAMILY_ARP
- depends on NETFILTER_XTABLES
+ depends on NETFILTER_XTABLES_LEGACY
help
ARP packet filtering defines a table `filter', which has a series of
rules for simple ARP packet filtering at local input and
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 4397e89d3123..29118c43ebf5 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -985,8 +985,7 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
break;
}
- if (nhi->fib_nhc.nhc_lwtstate &&
- lwtunnel_fill_encap(skb, nhi->fib_nhc.nhc_lwtstate,
+ if (lwtunnel_fill_encap(skb, nhi->fib_nhc.nhc_lwtstate,
NHA_ENCAP, NHA_ENCAP_TYPE) < 0)
goto nla_put_failure;
@@ -3885,7 +3884,7 @@ static int nh_netdev_event(struct notifier_block *this,
nexthop_flush_dev(dev, event);
break;
case NETDEV_CHANGE:
- if (!(dev_get_flags(dev) & (IFF_RUNNING | IFF_LOWER_UP)))
+ if (!(netif_get_flags(dev) & (IFF_RUNNING | IFF_LOWER_UP)))
nexthop_flush_dev(dev, event);
break;
case NETDEV_CHANGEMTU:
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index c14baa6589c7..031df4c19fcc 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -781,7 +781,7 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark,
ipc.tos & INET_DSCP_MASK, scope,
sk->sk_protocol, inet_sk_flowi_flags(sk), faddr,
- saddr, 0, 0, sk->sk_uid);
+ saddr, 0, 0, sk_uid(sk));
fl4.fl4_icmp_type = user_icmph.type;
fl4.fl4_icmp_code = user_icmph.code;
@@ -1116,7 +1116,7 @@ static void ping_v4_format_sock(struct sock *sp, struct seq_file *f,
sk_wmem_alloc_get(sp),
sk_rmem_alloc_get(sp),
0, 0L, 0,
- from_kuid_munged(seq_user_ns(f), sock_i_uid(sp)),
+ from_kuid_munged(seq_user_ns(f), sk_uid(sp)),
0, sock_i_ino(sp),
refcount_read(&sp->sk_refcnt), sp,
atomic_read(&sp->sk_drops));
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index ea2f01584379..65b0d0ab0084 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -189,6 +189,7 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TWKilled", LINUX_MIB_TIMEWAITKILLED),
SNMP_MIB_ITEM("PAWSActive", LINUX_MIB_PAWSACTIVEREJECTED),
SNMP_MIB_ITEM("PAWSEstab", LINUX_MIB_PAWSESTABREJECTED),
+ SNMP_MIB_ITEM("BeyondWindow", LINUX_MIB_BEYOND_WINDOW),
SNMP_MIB_ITEM("TSEcrRejected", LINUX_MIB_TSECRREJECTED),
SNMP_MIB_ITEM("PAWSOldAck", LINUX_MIB_PAWS_OLD_ACK),
SNMP_MIB_ITEM("PAWSTimewait", LINUX_MIB_PAWS_TW_REJECTED),
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 6aace4d55733..1d2c89d63cc7 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -610,7 +610,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
hdrincl ? ipc.protocol : sk->sk_protocol,
inet_sk_flowi_flags(sk) |
(hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
- daddr, saddr, 0, 0, sk->sk_uid);
+ daddr, saddr, 0, 0, sk_uid(sk));
fl4.fl4_icmp_type = 0;
fl4.fl4_icmp_code = 0;
@@ -1043,7 +1043,7 @@ static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
sk_wmem_alloc_get(sp),
sk_rmem_alloc_get(sp),
0, 0L, 0,
- from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
+ from_kuid_munged(seq_user_ns(seq), sk_uid(sp)),
0, sock_i_ino(sp),
refcount_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops));
}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index fccb05fb3a79..f639a2ae881a 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -413,7 +413,7 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
const void *daddr)
{
const struct rtable *rt = container_of(dst, struct rtable, dst);
- struct net_device *dev = dst->dev;
+ struct net_device *dev = dst_dev(dst);
struct neighbour *n;
rcu_read_lock();
@@ -440,7 +440,7 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr)
{
const struct rtable *rt = container_of(dst, struct rtable, dst);
- struct net_device *dev = dst->dev;
+ struct net_device *dev = dst_dev(dst);
const __be32 *pkey = daddr;
if (rt->rt_gw_family == AF_INET) {
@@ -556,7 +556,8 @@ static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
inet_test_bit(HDRINCL, sk) ?
IPPROTO_RAW : sk->sk_protocol,
inet_sk_flowi_flags(sk),
- daddr, inet->inet_saddr, 0, 0, sk->sk_uid);
+ daddr, inet->inet_saddr, 0, 0,
+ sk_uid(sk));
rcu_read_unlock();
}
@@ -716,7 +717,7 @@ static void update_or_create_fnhe(struct fib_nh_common *nhc, __be32 daddr,
*/
rt = rcu_dereference(nhc->nhc_rth_input);
if (rt)
- rt->dst.obsolete = DST_OBSOLETE_KILL;
+ WRITE_ONCE(rt->dst.obsolete, DST_OBSOLETE_KILL);
for_each_possible_cpu(i) {
struct rtable __rcu **prt;
@@ -724,7 +725,7 @@ static void update_or_create_fnhe(struct fib_nh_common *nhc, __be32 daddr,
prt = per_cpu_ptr(nhc->nhc_pcpu_rth_output, i);
rt = rcu_dereference(*prt);
if (rt)
- rt->dst.obsolete = DST_OBSOLETE_KILL;
+ WRITE_ONCE(rt->dst.obsolete, DST_OBSOLETE_KILL);
}
}
@@ -796,7 +797,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
jiffies + ip_rt_gc_timeout);
}
if (kill_route)
- rt->dst.obsolete = DST_OBSOLETE_KILL;
+ WRITE_ONCE(rt->dst.obsolete, DST_OBSOLETE_KILL);
call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
}
neigh_release(n);
@@ -841,9 +842,9 @@ static void ipv4_negative_advice(struct sock *sk,
{
struct rtable *rt = dst_rtable(dst);
- if ((dst->obsolete > 0) ||
+ if ((READ_ONCE(dst->obsolete) > 0) ||
(rt->rt_flags & RTCF_REDIRECTED) ||
- rt->dst.expires)
+ READ_ONCE(rt->dst.expires))
sk_dst_reset(sk);
}
@@ -1025,14 +1026,15 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
return;
rcu_read_lock();
- net = dev_net_rcu(dst->dev);
+ net = dev_net_rcu(dst_dev(dst));
if (mtu < net->ipv4.ip_rt_min_pmtu) {
lock = true;
mtu = min(old_mtu, net->ipv4.ip_rt_min_pmtu);
}
if (rt->rt_pmtu == mtu && !lock &&
- time_before(jiffies, dst->expires - net->ipv4.ip_rt_mtu_expires / 2))
+ time_before(jiffies, READ_ONCE(dst->expires) -
+ net->ipv4.ip_rt_mtu_expires / 2))
goto out;
if (fib_lookup(net, fl4, &res, 0) == 0) {
@@ -1135,7 +1137,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
__build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
rt = dst_rtable(odst);
- if (odst->obsolete && !odst->ops->check(odst, 0)) {
+ if (READ_ONCE(odst->obsolete) && !odst->ops->check(odst, 0)) {
rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
if (IS_ERR(rt))
goto out;
@@ -1210,7 +1212,8 @@ INDIRECT_CALLABLE_SCOPE struct dst_entry *ipv4_dst_check(struct dst_entry *dst,
* this is indicated by setting obsolete to DST_OBSOLETE_KILL or
* DST_OBSOLETE_DEAD.
*/
- if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt))
+ if (READ_ONCE(dst->obsolete) != DST_OBSOLETE_FORCE_CHK ||
+ rt_is_expired(rt))
return NULL;
return dst;
}
@@ -1323,7 +1326,7 @@ static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
struct net *net;
rcu_read_lock();
- net = dev_net_rcu(dst->dev);
+ net = dev_net_rcu(dst_dev(dst));
advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size,
net->ipv4.ip_rt_min_advmss);
rcu_read_unlock();
@@ -1570,7 +1573,7 @@ void rt_flush_dev(struct net_device *dev)
static bool rt_cache_valid(const struct rtable *rt)
{
return rt &&
- rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
+ READ_ONCE(rt->dst.obsolete) == DST_OBSOLETE_FORCE_CHK &&
!rt_is_expired(rt);
}
@@ -1684,8 +1687,8 @@ struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt)
else if (rt->rt_gw_family == AF_INET6)
new_rt->rt_gw6 = rt->rt_gw6;
- new_rt->dst.input = rt->dst.input;
- new_rt->dst.output = rt->dst.output;
+ new_rt->dst.input = READ_ONCE(rt->dst.input);
+ new_rt->dst.output = READ_ONCE(rt->dst.output);
new_rt->dst.error = rt->dst.error;
new_rt->dst.lastuse = jiffies;
new_rt->dst.lwtstate = lwtstate_get(rt->dst.lwtstate);
@@ -2585,7 +2588,6 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
do_cache = true;
if (type == RTN_BROADCAST) {
flags |= RTCF_BROADCAST | RTCF_LOCAL;
- fi = NULL;
} else if (type == RTN_MULTICAST) {
flags |= RTCF_MULTICAST | RTCF_LOCAL;
if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr,
@@ -2660,7 +2662,7 @@ add:
if (IN_DEV_MFORWARD(in_dev) &&
!ipv4_is_local_multicast(fl4->daddr)) {
rth->dst.input = ip_mr_input;
- rth->dst.output = ip_mc_output;
+ rth->dst.output = ip_mr_output;
}
}
#endif
@@ -2977,8 +2979,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
if (rt->dst.dev &&
nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
goto nla_put_failure;
- if (rt->dst.lwtstate &&
- lwtunnel_fill_encap(skb, rt->dst.lwtstate, RTA_ENCAP, RTA_ENCAP_TYPE) < 0)
+ if (lwtunnel_fill_encap(skb, rt->dst.lwtstate, RTA_ENCAP, RTA_ENCAP_TYPE) < 0)
goto nla_put_failure;
#ifdef CONFIG_IP_ROUTE_CLASSID
if (rt->dst.tclassid &&
@@ -3009,7 +3010,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
}
}
- expires = rt->dst.expires;
+ expires = READ_ONCE(rt->dst.expires);
if (expires) {
unsigned long now = jiffies;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 5459a78b9809..eb0819463fae 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -454,7 +454,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
ip_sock_rt_tos(sk), ip_sock_rt_scope(sk),
IPPROTO_TCP, inet_sk_flowi_flags(sk),
opt->srr ? opt->faddr : ireq->ir_rmt_addr,
- ireq->ir_loc_addr, th->source, th->dest, sk->sk_uid);
+ ireq->ir_loc_addr, th->source, th->dest,
+ sk_uid(sk));
security_req_classify_flow(req, flowi4_to_flowi_common(&fl4));
rt = ip_route_output_key(net, &fl4);
if (IS_ERR(rt)) {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f64f8276a73c..71a956fbfc55 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -302,8 +302,6 @@ EXPORT_PER_CPU_SYMBOL_GPL(tcp_tw_isn);
long sysctl_tcp_mem[3] __read_mostly;
EXPORT_IPV6_MOD(sysctl_tcp_mem);
-atomic_long_t tcp_memory_allocated ____cacheline_aligned_in_smp; /* Current allocated memory. */
-EXPORT_IPV6_MOD(tcp_memory_allocated);
DEFINE_PER_CPU(int, tcp_memory_per_cpu_fw_alloc);
EXPORT_PER_CPU_SYMBOL_GPL(tcp_memory_per_cpu_fw_alloc);
@@ -1176,7 +1174,7 @@ restart:
goto do_error;
while (msg_data_left(msg)) {
- ssize_t copy = 0;
+ int copy = 0;
skb = tcp_write_queue_tail(sk);
if (skb)
@@ -1297,8 +1295,7 @@ new_segment:
if (!copy)
goto wait_for_space;
- err = skb_splice_from_iter(skb, &msg->msg_iter, copy,
- sk->sk_allocation);
+ err = skb_splice_from_iter(skb, &msg->msg_iter, copy);
if (err < 0) {
if (err == -EMSGSIZE) {
tcp_mark_push(tp, skb);
@@ -3754,6 +3751,19 @@ int tcp_set_window_clamp(struct sock *sk, int val)
return 0;
}
+int tcp_sock_set_maxseg(struct sock *sk, int val)
+{
+ /* Values greater than interface MTU won't take effect. However
+ * at the point when this call is done we typically don't yet
+ * know which interface is going to be used
+ */
+ if (val && (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW))
+ return -EINVAL;
+
+ tcp_sk(sk)->rx_opt.user_mss = val;
+ return 0;
+}
+
/*
* Socket option code for TCP.
*/
@@ -3886,15 +3896,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
switch (optname) {
case TCP_MAXSEG:
- /* Values greater than interface MTU won't take effect. However
- * at the point when this call is done we typically don't yet
- * know which interface is going to be used
- */
- if (val && (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW)) {
- err = -EINVAL;
- break;
- }
- tp->rx_opt.user_mss = val;
+ err = tcp_sock_set_maxseg(sk, val);
break;
case TCP_NODELAY:
@@ -5053,9 +5055,8 @@ static void __init tcp_struct_check(void)
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, reordering);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, notsent_lowat);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, gso_segs);
- CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, lost_skb_hint);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, retransmit_skb_hint);
- CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_tx, 40);
+ CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_tx, 32);
/* TXRX read-mostly hotpath cache lines */
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, tsoffset);
@@ -5243,6 +5244,6 @@ void __init tcp_init(void)
tcp_v4_init();
tcp_metrics_init();
BUG_ON(tcp_register_congestion_control(&tcp_reno) != 0);
- tcp_tasklet_init();
+ tcp_tsq_work_init();
mptcp_init();
}
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 9b83d639b5ac..f1884f0c9e52 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -3,6 +3,7 @@
#include <linux/tcp.h>
#include <linux/rcupdate.h>
#include <net/tcp.h>
+#include <net/busy_poll.h>
void tcp_fastopen_init_key_once(struct net *net)
{
@@ -279,6 +280,8 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
refcount_set(&req->rsk_refcnt, 2);
+ sk_mark_napi_id_set(child, skb);
+
/* Now finish processing the fastopen child socket. */
tcp_init_transfer(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB, skb);
@@ -556,6 +559,7 @@ bool tcp_fastopen_active_should_disable(struct sock *sk)
void tcp_fastopen_active_disable_ofo_check(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
+ struct net_device *dev;
struct dst_entry *dst;
struct sk_buff *skb;
@@ -573,7 +577,8 @@ void tcp_fastopen_active_disable_ofo_check(struct sock *sk)
} else if (tp->syn_fastopen_ch &&
atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times)) {
dst = sk_dst_get(sk);
- if (!(dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK)))
+ dev = dst ? dst_dev(dst) : NULL;
+ if (!(dev && (dev->flags & IFF_LOOPBACK)))
atomic_set(&sock_net(sk)->ipv4.tfo_active_disable_times, 0);
dst_release(dst);
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 8ec92dec321a..71b76e98371a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1451,11 +1451,6 @@ static u8 tcp_sacktag_one(struct sock *sk,
tp->sacked_out += pcount;
/* Out-of-order packets delivered */
state->sack_delivered += pcount;
-
- /* Lost marker hint past SACKed? Tweak RFC3517 cnt */
- if (tp->lost_skb_hint &&
- before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq))
- tp->lost_cnt_hint += pcount;
}
/* D-SACK. We can detect redundant retransmission in S|R and plain R
@@ -1496,9 +1491,6 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev,
tcp_skb_timestamp_us(skb));
tcp_rate_skb_delivered(sk, skb, state->rate);
- if (skb == tp->lost_skb_hint)
- tp->lost_cnt_hint += pcount;
-
TCP_SKB_CB(prev)->end_seq += shifted;
TCP_SKB_CB(skb)->seq += shifted;
@@ -1531,10 +1523,6 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev,
if (skb == tp->retransmit_skb_hint)
tp->retransmit_skb_hint = prev;
- if (skb == tp->lost_skb_hint) {
- tp->lost_skb_hint = prev;
- tp->lost_cnt_hint -= tcp_skb_pcount(prev);
- }
TCP_SKB_CB(prev)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
TCP_SKB_CB(prev)->eor = TCP_SKB_CB(skb)->eor;
@@ -2151,12 +2139,6 @@ static inline void tcp_init_undo(struct tcp_sock *tp)
tp->undo_retrans = -1;
}
-static bool tcp_is_rack(const struct sock *sk)
-{
- return READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
- TCP_RACK_LOSS_DETECTION;
-}
-
/* If we detect SACK reneging, forget all SACK information
* and reset tags completely, otherwise preserve SACKs. If receiver
* dropped its ofo queue, we will know this due to reneging detection.
@@ -2182,8 +2164,7 @@ static void tcp_timeout_mark_lost(struct sock *sk)
skb_rbtree_walk_from(skb) {
if (is_reneg)
TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
- else if (tcp_is_rack(sk) && skb != head &&
- tcp_rack_skb_timeout(tp, skb, 0) > 0)
+ else if (skb != head && tcp_rack_skb_timeout(tp, skb, 0) > 0)
continue; /* Don't mark recently sent ones lost yet */
tcp_mark_skb_lost(sk, skb);
}
@@ -2264,22 +2245,6 @@ static bool tcp_check_sack_reneging(struct sock *sk, int *ack_flag)
return false;
}
-/* Heurestics to calculate number of duplicate ACKs. There's no dupACKs
- * counter when SACK is enabled (without SACK, sacked_out is used for
- * that purpose).
- *
- * With reordering, holes may still be in flight, so RFC3517 recovery
- * uses pure sacked_out (total number of SACKed segments) even though
- * it violates the RFC that uses duplicate ACKs, often these are equal
- * but when e.g. out-of-window ACKs or packet duplication occurs,
- * they differ. Since neither occurs due to loss, TCP should really
- * ignore them.
- */
-static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
-{
- return tp->sacked_out + 1;
-}
-
/* Linux NewReno/SACK/ECN state machine.
* --------------------------------------
*
@@ -2332,13 +2297,7 @@ static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
*
* If the receiver supports SACK:
*
- * RFC6675/3517: It is the conventional algorithm. A packet is
- * considered lost if the number of higher sequence packets
- * SACKed is greater than or equal the DUPACK thoreshold
- * (reordering). This is implemented in tcp_mark_head_lost and
- * tcp_update_scoreboard.
- *
- * RACK (draft-ietf-tcpm-rack-01): it is a newer algorithm
+ * RACK (RFC8985): RACK is a newer loss detection algorithm
* (2017-) that checks timing instead of counting DUPACKs.
* Essentially a packet is considered lost if it's not S/ACKed
* after RTT + reordering_window, where both metrics are
@@ -2353,8 +2312,8 @@ static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
* is lost (NewReno). This heuristics are the same in NewReno
* and SACK.
*
- * Really tricky (and requiring careful tuning) part of algorithm
- * is hidden in functions tcp_time_to_recover() and tcp_xmit_retransmit_queue().
+ * The really tricky (and requiring careful tuning) part of the algorithm
+ * is hidden in the RACK code in tcp_recovery.c and tcp_xmit_retransmit_queue().
* The first determines the moment _when_ we should reduce CWND and,
* hence, slow down forward transmission. In fact, it determines the moment
* when we decide that hole is caused by loss, rather than by a reorder.
@@ -2377,83 +2336,10 @@ static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
* Main question: may we further continue forward transmission
* with the same cwnd?
*/
-static bool tcp_time_to_recover(struct sock *sk, int flag)
+static bool tcp_time_to_recover(const struct tcp_sock *tp)
{
- struct tcp_sock *tp = tcp_sk(sk);
-
- /* Trick#1: The loss is proven. */
- if (tp->lost_out)
- return true;
-
- /* Not-A-Trick#2 : Classic rule... */
- if (!tcp_is_rack(sk) && tcp_dupack_heuristics(tp) > tp->reordering)
- return true;
-
- return false;
-}
-
-/* Detect loss in event "A" above by marking head of queue up as lost.
- * For RFC3517 SACK, a segment is considered lost if it
- * has at least tp->reordering SACKed seqments above it; "packets" refers to
- * the maximum SACKed segments to pass before reaching this limit.
- */
-static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
-{
- struct tcp_sock *tp = tcp_sk(sk);
- struct sk_buff *skb;
- int cnt;
- /* Use SACK to deduce losses of new sequences sent during recovery */
- const u32 loss_high = tp->snd_nxt;
-
- WARN_ON(packets > tp->packets_out);
- skb = tp->lost_skb_hint;
- if (skb) {
- /* Head already handled? */
- if (mark_head && after(TCP_SKB_CB(skb)->seq, tp->snd_una))
- return;
- cnt = tp->lost_cnt_hint;
- } else {
- skb = tcp_rtx_queue_head(sk);
- cnt = 0;
- }
-
- skb_rbtree_walk_from(skb) {
- /* TODO: do this better */
- /* this is not the most efficient way to do this... */
- tp->lost_skb_hint = skb;
- tp->lost_cnt_hint = cnt;
-
- if (after(TCP_SKB_CB(skb)->end_seq, loss_high))
- break;
-
- if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
- cnt += tcp_skb_pcount(skb);
-
- if (cnt > packets)
- break;
-
- if (!(TCP_SKB_CB(skb)->sacked & TCPCB_LOST))
- tcp_mark_skb_lost(sk, skb);
-
- if (mark_head)
- break;
- }
- tcp_verify_left_out(tp);
-}
-
-/* Account newly detected lost packet(s) */
-
-static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
-{
- struct tcp_sock *tp = tcp_sk(sk);
-
- if (tcp_is_sack(tp)) {
- int sacked_upto = tp->sacked_out - tp->reordering;
- if (sacked_upto >= 0)
- tcp_mark_head_lost(sk, sacked_upto, 0);
- else if (fast_rexmit)
- tcp_mark_head_lost(sk, 1, 1);
- }
+ /* Has loss detection marked at least one packet lost? */
+ return tp->lost_out != 0;
}
static bool tcp_tsopt_ecr_before(const struct tcp_sock *tp, u32 when)
@@ -2479,20 +2365,33 @@ static inline bool tcp_packet_delayed(const struct tcp_sock *tp)
{
const struct sock *sk = (const struct sock *)tp;
- if (tp->retrans_stamp &&
- tcp_tsopt_ecr_before(tp, tp->retrans_stamp))
- return true; /* got echoed TS before first retransmission */
+ /* Received an echoed timestamp before the first retransmission? */
+ if (tp->retrans_stamp)
+ return tcp_tsopt_ecr_before(tp, tp->retrans_stamp);
+
+ /* We set tp->retrans_stamp upon the first retransmission of a loss
+ * recovery episode, so normally if tp->retrans_stamp is 0 then no
+ * retransmission has happened yet (likely due to TSQ, which can cause
+ * fast retransmits to be delayed). So if snd_una advanced while
+ * (tp->retrans_stamp is 0 then apparently a packet was merely delayed,
+ * not lost. But there are exceptions where we retransmit but then
+ * clear tp->retrans_stamp, so we check for those exceptions.
+ */
- /* Check if nothing was retransmitted (retrans_stamp==0), which may
- * happen in fast recovery due to TSQ. But we ignore zero retrans_stamp
- * in TCP_SYN_SENT, since when we set FLAG_SYN_ACKED we also clear
- * retrans_stamp even if we had retransmitted the SYN.
+ /* (1) For non-SACK connections, tcp_is_non_sack_preventing_reopen()
+ * clears tp->retrans_stamp when snd_una == high_seq.
*/
- if (!tp->retrans_stamp && /* no record of a retransmit/SYN? */
- sk->sk_state != TCP_SYN_SENT) /* not the FLAG_SYN_ACKED case? */
- return true; /* nothing was retransmitted */
+ if (!tcp_is_sack(tp) && !before(tp->snd_una, tp->high_seq))
+ return false;
- return false;
+ /* (2) In TCP_SYN_SENT tcp_clean_rtx_queue() clears tp->retrans_stamp
+ * when setting FLAG_SYN_ACKED is set, even if the SYN was
+ * retransmitted.
+ */
+ if (sk->sk_state == TCP_SYN_SENT)
+ return false;
+
+ return true; /* tp->retrans_stamp is zero; no retransmit yet */
}
/* Undo procedures. */
@@ -2881,8 +2780,6 @@ void tcp_simple_retransmit(struct sock *sk)
tcp_mark_skb_lost(sk, skb);
}
- tcp_clear_retrans_hints_partial(tp);
-
if (!tp->lost_out)
return;
@@ -2990,17 +2887,8 @@ static void tcp_process_loss(struct sock *sk, int flag, int num_dupack,
*rexmit = REXMIT_LOST;
}
-static bool tcp_force_fast_retransmit(struct sock *sk)
-{
- struct tcp_sock *tp = tcp_sk(sk);
-
- return after(tcp_highest_sack_seq(tp),
- tp->snd_una + tp->reordering * tp->mss_cache);
-}
-
/* Undo during fast recovery after partial ACK. */
-static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una,
- bool *do_lost)
+static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -3025,9 +2913,6 @@ static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una,
tcp_undo_cwnd_reduction(sk, true);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
tcp_try_keep_open(sk);
- } else {
- /* Partial ACK arrived. Force fast retransmit. */
- *do_lost = tcp_force_fast_retransmit(sk);
}
return false;
}
@@ -3041,7 +2926,7 @@ static void tcp_identify_packet_loss(struct sock *sk, int *ack_flag)
if (unlikely(tcp_is_reno(tp))) {
tcp_newreno_mark_lost(sk, *ack_flag & FLAG_SND_UNA_ADVANCED);
- } else if (tcp_is_rack(sk)) {
+ } else {
u32 prior_retrans = tp->retrans_out;
if (tcp_rack_mark_lost(sk))
@@ -3068,10 +2953,8 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
- int fast_rexmit = 0, flag = *ack_flag;
+ int flag = *ack_flag;
bool ece_ack = flag & FLAG_ECE;
- bool do_lost = num_dupack || ((flag & FLAG_DATA_SACKED) &&
- tcp_force_fast_retransmit(sk));
if (!tp->packets_out && tp->sacked_out)
tp->sacked_out = 0;
@@ -3120,7 +3003,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
if (!(flag & FLAG_SND_UNA_ADVANCED)) {
if (tcp_is_reno(tp))
tcp_add_reno_sack(sk, num_dupack, ece_ack);
- } else if (tcp_try_undo_partial(sk, prior_snd_una, &do_lost))
+ } else if (tcp_try_undo_partial(sk, prior_snd_una))
return;
if (tcp_try_undo_dsack(sk))
@@ -3128,7 +3011,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
tcp_identify_packet_loss(sk, ack_flag);
if (icsk->icsk_ca_state != TCP_CA_Recovery) {
- if (!tcp_time_to_recover(sk, flag))
+ if (!tcp_time_to_recover(tp))
return;
/* Undo reverts the recovery state. If loss is evident,
* starts a new recovery (e.g. reordering then loss);
@@ -3157,7 +3040,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
tcp_try_undo_dsack(sk);
tcp_identify_packet_loss(sk, ack_flag);
- if (!tcp_time_to_recover(sk, flag)) {
+ if (!tcp_time_to_recover(tp)) {
tcp_try_to_open(sk, flag);
return;
}
@@ -3175,11 +3058,8 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
/* Otherwise enter Recovery state */
tcp_enter_recovery(sk, ece_ack);
- fast_rexmit = 1;
}
- if (!tcp_is_rack(sk) && do_lost)
- tcp_update_scoreboard(sk, fast_rexmit);
*rexmit = REXMIT_LOST;
}
@@ -3435,8 +3315,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, const struct sk_buff *ack_skb,
next = skb_rb_next(skb);
if (unlikely(skb == tp->retransmit_skb_hint))
tp->retransmit_skb_hint = NULL;
- if (unlikely(skb == tp->lost_skb_hint))
- tp->lost_skb_hint = NULL;
tcp_highest_sack_replace(sk, skb, next);
tcp_rtx_queue_unlink_and_free(skb, sk);
}
@@ -3494,14 +3372,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, const struct sk_buff *ack_skb,
if (flag & FLAG_RETRANS_DATA_ACKED)
flag &= ~FLAG_ORIG_SACK_ACKED;
} else {
- int delta;
-
/* Non-retransmitted hole got filled? That's reordering */
if (before(reord, prior_fack))
tcp_check_sack_reordering(sk, reord, 0);
-
- delta = prior_sacked - tp->sacked_out;
- tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta);
}
} else if (skb && rtt_update && sack_rtt_us >= 0 &&
sack_rtt_us > tcp_stamp_us_delta(tp->tcp_mstamp,
@@ -3841,7 +3714,7 @@ static int tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
}
/* This routine deals with acks during a TLP episode and ends an episode by
- * resetting tlp_high_seq. Ref: TLP algorithm in draft-ietf-tcpm-rack
+ * resetting tlp_high_seq. Ref: TLP algorithm in RFC8985
*/
static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
{
@@ -4518,14 +4391,22 @@ static enum skb_drop_reason tcp_disordered_ack_check(const struct sock *sk,
* (borrowed from freebsd)
*/
-static enum skb_drop_reason tcp_sequence(const struct tcp_sock *tp,
+static enum skb_drop_reason tcp_sequence(const struct sock *sk,
u32 seq, u32 end_seq)
{
+ const struct tcp_sock *tp = tcp_sk(sk);
+
if (before(end_seq, tp->rcv_wup))
return SKB_DROP_REASON_TCP_OLD_SEQUENCE;
- if (after(seq, tp->rcv_nxt + tcp_receive_window(tp)))
- return SKB_DROP_REASON_TCP_INVALID_SEQUENCE;
+ if (after(end_seq, tp->rcv_nxt + tcp_receive_window(tp))) {
+ if (after(seq, tp->rcv_nxt + tcp_receive_window(tp)))
+ return SKB_DROP_REASON_TCP_INVALID_SEQUENCE;
+
+ /* Only accept this packet if receive queue is empty. */
+ if (skb_queue_len(&sk->sk_receive_queue))
+ return SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE;
+ }
return SKB_NOT_DROPPED_YET;
}
@@ -4972,8 +4853,9 @@ static void tcp_ofo_queue(struct sock *sk)
if (before(TCP_SKB_CB(skb)->seq, dsack_high)) {
__u32 dsack = dsack_high;
+
if (before(TCP_SKB_CB(skb)->end_seq, dsack_high))
- dsack_high = TCP_SKB_CB(skb)->end_seq;
+ dsack = TCP_SKB_CB(skb)->end_seq;
tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack);
}
p = rb_next(p);
@@ -5006,10 +4888,20 @@ static void tcp_ofo_queue(struct sock *sk)
static bool tcp_prune_ofo_queue(struct sock *sk, const struct sk_buff *in_skb);
static int tcp_prune_queue(struct sock *sk, const struct sk_buff *in_skb);
-static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
+/* Check if this incoming skb can be added to socket receive queues
+ * while satisfying sk->sk_rcvbuf limit.
+ */
+static bool tcp_can_ingest(const struct sock *sk, const struct sk_buff *skb)
+{
+ unsigned int new_mem = atomic_read(&sk->sk_rmem_alloc) + skb->truesize;
+
+ return new_mem <= sk->sk_rcvbuf;
+}
+
+static int tcp_try_rmem_schedule(struct sock *sk, const struct sk_buff *skb,
unsigned int size)
{
- if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
+ if (!tcp_can_ingest(sk, skb) ||
!sk_rmem_schedule(sk, skb, size)) {
if (tcp_prune_queue(sk, skb) < 0)
@@ -5041,6 +4933,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
return;
}
+ tcp_measure_rcv_mss(sk, skb);
/* Disable header prediction. */
tp->pred_flags = 0;
inet_csk_schedule_ack(sk);
@@ -5168,7 +5061,9 @@ end:
skb_condense(skb);
skb_set_owner_r(skb, sk);
}
- tcp_rcvbuf_grow(sk);
+ /* do not grow rcvbuf for not-yet-accepted or orphaned sockets. */
+ if (sk->sk_socket)
+ tcp_rcvbuf_grow(sk);
}
static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb,
@@ -5622,7 +5517,7 @@ static bool tcp_prune_ofo_queue(struct sock *sk, const struct sk_buff *in_skb)
tcp_drop_reason(sk, skb, SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE);
tp->ooo_last_skb = rb_to_skb(prev);
if (!prev || goal <= 0) {
- if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
+ if (tcp_can_ingest(sk, in_skb) &&
!tcp_under_memory_pressure(sk))
break;
goal = sk->sk_rcvbuf >> 3;
@@ -5654,14 +5549,18 @@ static int tcp_prune_queue(struct sock *sk, const struct sk_buff *in_skb)
{
struct tcp_sock *tp = tcp_sk(sk);
+ /* Do nothing if our queues are empty. */
+ if (!atomic_read(&sk->sk_rmem_alloc))
+ return -1;
+
NET_INC_STATS(sock_net(sk), LINUX_MIB_PRUNECALLED);
- if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
+ if (!tcp_can_ingest(sk, in_skb))
tcp_clamp_window(sk);
else if (tcp_under_memory_pressure(sk))
tcp_adjust_rcv_ssthresh(sk);
- if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
+ if (tcp_can_ingest(sk, in_skb))
return 0;
tcp_collapse_ofo_queue(sk);
@@ -5671,7 +5570,7 @@ static int tcp_prune_queue(struct sock *sk, const struct sk_buff *in_skb)
NULL,
tp->copied_seq, tp->rcv_nxt);
- if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
+ if (tcp_can_ingest(sk, in_skb))
return 0;
/* Collapsing did not help, destructive actions follow.
@@ -5679,7 +5578,7 @@ static int tcp_prune_queue(struct sock *sk, const struct sk_buff *in_skb)
tcp_prune_ofo_queue(sk, in_skb);
- if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
+ if (tcp_can_ingest(sk, in_skb))
return 0;
/* If we are really being abused, tell the caller to silently
@@ -6005,7 +5904,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
step1:
/* Step 1: check sequence number */
- reason = tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
+ reason = tcp_sequence(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
if (reason) {
/* RFC793, page 37: "In all states except SYN-SENT, all reset
* (RST) segments are validated by checking their SEQ-fields."
@@ -6016,6 +5915,11 @@ step1:
if (!th->rst) {
if (th->syn)
goto syn_challenge;
+
+ if (reason == SKB_DROP_REASON_TCP_INVALID_SEQUENCE ||
+ reason == SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE)
+ NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_BEYOND_WINDOW);
if (!tcp_oow_rate_limited(sock_net(sk), skb,
LINUX_MIB_TCPACKSKIPPEDSEQ,
&tp->last_oow_ack_time))
@@ -6234,6 +6138,10 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
if (tcp_checksum_complete(skb))
goto csum_error;
+ if (after(TCP_SKB_CB(skb)->end_seq,
+ tp->rcv_nxt + tcp_receive_window(tp)))
+ goto validate;
+
if ((int)skb->truesize > sk->sk_forward_alloc)
goto step5;
@@ -6289,7 +6197,7 @@ slow_path:
/*
* Standard slow path.
*/
-
+validate:
if (!tcp_validate_incoming(sk, skb, th, 1))
return;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 6a14f9e6fef6..84d3d556ed80 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -58,7 +58,9 @@
#include <linux/times.h>
#include <linux/slab.h>
#include <linux/sched.h>
+#include <linux/sock_diag.h>
+#include <net/aligned_data.h>
#include <net/net_namespace.h>
#include <net/icmp.h>
#include <net/inet_hashtables.h>
@@ -787,7 +789,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb,
arg.iov[0].iov_base = (unsigned char *)&rep;
arg.iov[0].iov_len = sizeof(rep.th);
- net = sk ? sock_net(sk) : dev_net_rcu(skb_dst(skb)->dev);
+ net = sk ? sock_net(sk) : skb_dst_dev_net_rcu(skb);
/* Invalid TCP option size or twice included auth */
if (tcp_parse_auth_options(tcp_hdr(skb), &md5_hash_location, &aoh))
@@ -1703,7 +1705,6 @@ static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
struct request_sock_ops tcp_request_sock_ops __read_mostly = {
.family = PF_INET,
.obj_size = sizeof(struct tcp_request_sock),
- .rtx_syn_ack = tcp_rtx_synack,
.send_ack = tcp_v4_reqsk_send_ack,
.destructor = tcp_v4_reqsk_destructor,
.send_reset = tcp_v4_send_reset,
@@ -2025,6 +2026,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
u32 gso_size;
u64 limit;
int delta;
+ int err;
/* In case all data was pulled from skb frags (in __pskb_pull_tail()),
* we can fix skb->truesize to its real value to avoid future drops.
@@ -2135,21 +2137,27 @@ no_coalesce:
limit = min_t(u64, limit, UINT_MAX);
- if (unlikely(sk_add_backlog(sk, skb, limit))) {
+ err = sk_add_backlog(sk, skb, limit);
+ if (unlikely(err)) {
bh_unlock_sock(sk);
- *reason = SKB_DROP_REASON_SOCKET_BACKLOG;
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP);
+ if (err == -ENOMEM) {
+ *reason = SKB_DROP_REASON_PFMEMALLOC;
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_PFMEMALLOCDROP);
+ } else {
+ *reason = SKB_DROP_REASON_SOCKET_BACKLOG;
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP);
+ }
return true;
}
return false;
}
EXPORT_IPV6_MOD(tcp_add_backlog);
-int tcp_filter(struct sock *sk, struct sk_buff *skb)
+int tcp_filter(struct sock *sk, struct sk_buff *skb, enum skb_drop_reason *reason)
{
struct tcphdr *th = (struct tcphdr *)skb->data;
- return sk_filter_trim_cap(sk, skb, th->doff * 4);
+ return sk_filter_trim_cap(sk, skb, th->doff * 4, reason);
}
EXPORT_IPV6_MOD(tcp_filter);
@@ -2276,14 +2284,12 @@ lookup:
}
refcounted = true;
nsk = NULL;
- if (!tcp_filter(sk, skb)) {
+ if (!tcp_filter(sk, skb, &drop_reason)) {
th = (const struct tcphdr *)skb->data;
iph = ip_hdr(skb);
tcp_v4_fill_cb(skb, iph, th);
nsk = tcp_check_req(sk, skb, req, false, &req_stolen,
&drop_reason);
- } else {
- drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
}
if (!nsk) {
reqsk_put(req);
@@ -2339,10 +2345,9 @@ process:
nf_reset_ct(skb);
- if (tcp_filter(sk, skb)) {
- drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
+ if (tcp_filter(sk, skb, &drop_reason))
goto discard_and_relse;
- }
+
th = (const struct tcphdr *)skb->data;
iph = ip_hdr(skb);
tcp_v4_fill_cb(skb, iph, th);
@@ -2896,7 +2901,7 @@ static void get_openreq4(const struct request_sock *req,
jiffies_delta_to_clock_t(delta),
req->num_timeout,
from_kuid_munged(seq_user_ns(f),
- sock_i_uid(req->rsk_listener)),
+ sk_uid(req->rsk_listener)),
0, /* non standard timer */
0, /* open_requests have no inode */
0,
@@ -2954,7 +2959,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
timer_active,
jiffies_delta_to_clock_t(timer_expires - jiffies),
icsk->icsk_retransmits,
- from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
+ from_kuid_munged(seq_user_ns(f), sk_uid(sk)),
icsk->icsk_probes_out,
sock_i_ino(sk),
refcount_read(&sk->sk_refcnt), sk,
@@ -3014,13 +3019,17 @@ out:
}
#ifdef CONFIG_BPF_SYSCALL
+union bpf_tcp_iter_batch_item {
+ struct sock *sk;
+ __u64 cookie;
+};
+
struct bpf_tcp_iter_state {
struct tcp_iter_state state;
unsigned int cur_sk;
unsigned int end_sk;
unsigned int max_sk;
- struct sock **batch;
- bool st_bucket_done;
+ union bpf_tcp_iter_batch_item *batch;
};
struct bpf_iter__tcp {
@@ -3043,21 +3052,32 @@ static int tcp_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
static void bpf_iter_tcp_put_batch(struct bpf_tcp_iter_state *iter)
{
- while (iter->cur_sk < iter->end_sk)
- sock_gen_put(iter->batch[iter->cur_sk++]);
+ union bpf_tcp_iter_batch_item *item;
+ unsigned int cur_sk = iter->cur_sk;
+ __u64 cookie;
+
+ /* Remember the cookies of the sockets we haven't seen yet, so we can
+ * pick up where we left off next time around.
+ */
+ while (cur_sk < iter->end_sk) {
+ item = &iter->batch[cur_sk++];
+ cookie = sock_gen_cookie(item->sk);
+ sock_gen_put(item->sk);
+ item->cookie = cookie;
+ }
}
static int bpf_iter_tcp_realloc_batch(struct bpf_tcp_iter_state *iter,
- unsigned int new_batch_sz)
+ unsigned int new_batch_sz, gfp_t flags)
{
- struct sock **new_batch;
+ union bpf_tcp_iter_batch_item *new_batch;
new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz,
- GFP_USER | __GFP_NOWARN);
+ flags | __GFP_NOWARN);
if (!new_batch)
return -ENOMEM;
- bpf_iter_tcp_put_batch(iter);
+ memcpy(new_batch, iter->batch, sizeof(*iter->batch) * iter->end_sk);
kvfree(iter->batch);
iter->batch = new_batch;
iter->max_sk = new_batch_sz;
@@ -3065,112 +3085,234 @@ static int bpf_iter_tcp_realloc_batch(struct bpf_tcp_iter_state *iter,
return 0;
}
-static unsigned int bpf_iter_tcp_listening_batch(struct seq_file *seq,
- struct sock *start_sk)
+static struct sock *bpf_iter_tcp_resume_bucket(struct sock *first_sk,
+ union bpf_tcp_iter_batch_item *cookies,
+ int n_cookies)
+{
+ struct hlist_nulls_node *node;
+ struct sock *sk;
+ int i;
+
+ for (i = 0; i < n_cookies; i++) {
+ sk = first_sk;
+ sk_nulls_for_each_from(sk, node)
+ if (cookies[i].cookie == atomic64_read(&sk->sk_cookie))
+ return sk;
+ }
+
+ return NULL;
+}
+
+static struct sock *bpf_iter_tcp_resume_listening(struct seq_file *seq)
+{
+ struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo;
+ struct bpf_tcp_iter_state *iter = seq->private;
+ struct tcp_iter_state *st = &iter->state;
+ unsigned int find_cookie = iter->cur_sk;
+ unsigned int end_cookie = iter->end_sk;
+ int resume_bucket = st->bucket;
+ struct sock *sk;
+
+ if (end_cookie && find_cookie == end_cookie)
+ ++st->bucket;
+
+ sk = listening_get_first(seq);
+ iter->cur_sk = 0;
+ iter->end_sk = 0;
+
+ if (sk && st->bucket == resume_bucket && end_cookie) {
+ sk = bpf_iter_tcp_resume_bucket(sk, &iter->batch[find_cookie],
+ end_cookie - find_cookie);
+ if (!sk) {
+ spin_unlock(&hinfo->lhash2[st->bucket].lock);
+ ++st->bucket;
+ sk = listening_get_first(seq);
+ }
+ }
+
+ return sk;
+}
+
+static struct sock *bpf_iter_tcp_resume_established(struct seq_file *seq)
{
struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo;
struct bpf_tcp_iter_state *iter = seq->private;
struct tcp_iter_state *st = &iter->state;
+ unsigned int find_cookie = iter->cur_sk;
+ unsigned int end_cookie = iter->end_sk;
+ int resume_bucket = st->bucket;
+ struct sock *sk;
+
+ if (end_cookie && find_cookie == end_cookie)
+ ++st->bucket;
+
+ sk = established_get_first(seq);
+ iter->cur_sk = 0;
+ iter->end_sk = 0;
+
+ if (sk && st->bucket == resume_bucket && end_cookie) {
+ sk = bpf_iter_tcp_resume_bucket(sk, &iter->batch[find_cookie],
+ end_cookie - find_cookie);
+ if (!sk) {
+ spin_unlock_bh(inet_ehash_lockp(hinfo, st->bucket));
+ ++st->bucket;
+ sk = established_get_first(seq);
+ }
+ }
+
+ return sk;
+}
+
+static struct sock *bpf_iter_tcp_resume(struct seq_file *seq)
+{
+ struct bpf_tcp_iter_state *iter = seq->private;
+ struct tcp_iter_state *st = &iter->state;
+ struct sock *sk = NULL;
+
+ switch (st->state) {
+ case TCP_SEQ_STATE_LISTENING:
+ sk = bpf_iter_tcp_resume_listening(seq);
+ if (sk)
+ break;
+ st->bucket = 0;
+ st->state = TCP_SEQ_STATE_ESTABLISHED;
+ fallthrough;
+ case TCP_SEQ_STATE_ESTABLISHED:
+ sk = bpf_iter_tcp_resume_established(seq);
+ break;
+ }
+
+ return sk;
+}
+
+static unsigned int bpf_iter_tcp_listening_batch(struct seq_file *seq,
+ struct sock **start_sk)
+{
+ struct bpf_tcp_iter_state *iter = seq->private;
struct hlist_nulls_node *node;
unsigned int expected = 1;
struct sock *sk;
- sock_hold(start_sk);
- iter->batch[iter->end_sk++] = start_sk;
+ sock_hold(*start_sk);
+ iter->batch[iter->end_sk++].sk = *start_sk;
- sk = sk_nulls_next(start_sk);
+ sk = sk_nulls_next(*start_sk);
+ *start_sk = NULL;
sk_nulls_for_each_from(sk, node) {
if (seq_sk_match(seq, sk)) {
if (iter->end_sk < iter->max_sk) {
sock_hold(sk);
- iter->batch[iter->end_sk++] = sk;
+ iter->batch[iter->end_sk++].sk = sk;
+ } else if (!*start_sk) {
+ /* Remember where we left off. */
+ *start_sk = sk;
}
expected++;
}
}
- spin_unlock(&hinfo->lhash2[st->bucket].lock);
return expected;
}
static unsigned int bpf_iter_tcp_established_batch(struct seq_file *seq,
- struct sock *start_sk)
+ struct sock **start_sk)
{
- struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo;
struct bpf_tcp_iter_state *iter = seq->private;
- struct tcp_iter_state *st = &iter->state;
struct hlist_nulls_node *node;
unsigned int expected = 1;
struct sock *sk;
- sock_hold(start_sk);
- iter->batch[iter->end_sk++] = start_sk;
+ sock_hold(*start_sk);
+ iter->batch[iter->end_sk++].sk = *start_sk;
- sk = sk_nulls_next(start_sk);
+ sk = sk_nulls_next(*start_sk);
+ *start_sk = NULL;
sk_nulls_for_each_from(sk, node) {
if (seq_sk_match(seq, sk)) {
if (iter->end_sk < iter->max_sk) {
sock_hold(sk);
- iter->batch[iter->end_sk++] = sk;
+ iter->batch[iter->end_sk++].sk = sk;
+ } else if (!*start_sk) {
+ /* Remember where we left off. */
+ *start_sk = sk;
}
expected++;
}
}
- spin_unlock_bh(inet_ehash_lockp(hinfo, st->bucket));
return expected;
}
-static struct sock *bpf_iter_tcp_batch(struct seq_file *seq)
+static unsigned int bpf_iter_fill_batch(struct seq_file *seq,
+ struct sock **start_sk)
+{
+ struct bpf_tcp_iter_state *iter = seq->private;
+ struct tcp_iter_state *st = &iter->state;
+
+ if (st->state == TCP_SEQ_STATE_LISTENING)
+ return bpf_iter_tcp_listening_batch(seq, start_sk);
+ else
+ return bpf_iter_tcp_established_batch(seq, start_sk);
+}
+
+static void bpf_iter_tcp_unlock_bucket(struct seq_file *seq)
{
struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo;
struct bpf_tcp_iter_state *iter = seq->private;
struct tcp_iter_state *st = &iter->state;
+
+ if (st->state == TCP_SEQ_STATE_LISTENING)
+ spin_unlock(&hinfo->lhash2[st->bucket].lock);
+ else
+ spin_unlock_bh(inet_ehash_lockp(hinfo, st->bucket));
+}
+
+static struct sock *bpf_iter_tcp_batch(struct seq_file *seq)
+{
+ struct bpf_tcp_iter_state *iter = seq->private;
unsigned int expected;
- bool resized = false;
struct sock *sk;
+ int err;
- /* The st->bucket is done. Directly advance to the next
- * bucket instead of having the tcp_seek_last_pos() to skip
- * one by one in the current bucket and eventually find out
- * it has to advance to the next bucket.
- */
- if (iter->st_bucket_done) {
- st->offset = 0;
- st->bucket++;
- if (st->state == TCP_SEQ_STATE_LISTENING &&
- st->bucket > hinfo->lhash2_mask) {
- st->state = TCP_SEQ_STATE_ESTABLISHED;
- st->bucket = 0;
- }
- }
+ sk = bpf_iter_tcp_resume(seq);
+ if (!sk)
+ return NULL; /* Done */
-again:
- /* Get a new batch */
- iter->cur_sk = 0;
- iter->end_sk = 0;
- iter->st_bucket_done = false;
+ expected = bpf_iter_fill_batch(seq, &sk);
+ if (likely(iter->end_sk == expected))
+ goto done;
- sk = tcp_seek_last_pos(seq);
+ /* Batch size was too small. */
+ bpf_iter_tcp_unlock_bucket(seq);
+ bpf_iter_tcp_put_batch(iter);
+ err = bpf_iter_tcp_realloc_batch(iter, expected * 3 / 2,
+ GFP_USER);
+ if (err)
+ return ERR_PTR(err);
+
+ sk = bpf_iter_tcp_resume(seq);
if (!sk)
return NULL; /* Done */
- if (st->state == TCP_SEQ_STATE_LISTENING)
- expected = bpf_iter_tcp_listening_batch(seq, sk);
- else
- expected = bpf_iter_tcp_established_batch(seq, sk);
-
- if (iter->end_sk == expected) {
- iter->st_bucket_done = true;
- return sk;
- }
+ expected = bpf_iter_fill_batch(seq, &sk);
+ if (likely(iter->end_sk == expected))
+ goto done;
- if (!resized && !bpf_iter_tcp_realloc_batch(iter, expected * 3 / 2)) {
- resized = true;
- goto again;
+ /* Batch size was still too small. Hold onto the lock while we try
+ * again with a larger batch to make sure the current bucket's size
+ * does not change in the meantime.
+ */
+ err = bpf_iter_tcp_realloc_batch(iter, expected, GFP_NOWAIT);
+ if (err) {
+ bpf_iter_tcp_unlock_bucket(seq);
+ return ERR_PTR(err);
}
- return sk;
+ expected = bpf_iter_fill_batch(seq, &sk);
+ WARN_ON_ONCE(iter->end_sk != expected);
+done:
+ bpf_iter_tcp_unlock_bucket(seq);
+ return iter->batch[0].sk;
}
static void *bpf_iter_tcp_seq_start(struct seq_file *seq, loff_t *pos)
@@ -3200,16 +3342,11 @@ static void *bpf_iter_tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
* meta.seq_num is used instead.
*/
st->num++;
- /* Move st->offset to the next sk in the bucket such that
- * the future start() will resume at st->offset in
- * st->bucket. See tcp_seek_last_pos().
- */
- st->offset++;
- sock_gen_put(iter->batch[iter->cur_sk++]);
+ sock_gen_put(iter->batch[iter->cur_sk++].sk);
}
if (iter->cur_sk < iter->end_sk)
- sk = iter->batch[iter->cur_sk];
+ sk = iter->batch[iter->cur_sk].sk;
else
sk = bpf_iter_tcp_batch(seq);
@@ -3246,9 +3383,9 @@ static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v)
const struct request_sock *req = v;
uid = from_kuid_munged(seq_user_ns(seq),
- sock_i_uid(req->rsk_listener));
+ sk_uid(req->rsk_listener));
} else {
- uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
+ uid = from_kuid_munged(seq_user_ns(seq), sk_uid(sk));
}
meta.seq = seq;
@@ -3275,10 +3412,8 @@ static void bpf_iter_tcp_seq_stop(struct seq_file *seq, void *v)
(void)tcp_prog_seq_show(prog, &meta, v, 0);
}
- if (iter->cur_sk < iter->end_sk) {
+ if (iter->cur_sk < iter->end_sk)
bpf_iter_tcp_put_batch(iter);
- iter->st_bucket_done = false;
- }
}
static const struct seq_operations bpf_iter_tcp_seq_ops = {
@@ -3391,7 +3526,7 @@ struct proto tcp_prot = {
.sockets_allocated = &tcp_sockets_allocated,
.orphan_count = &tcp_orphan_count,
- .memory_allocated = &tcp_memory_allocated,
+ .memory_allocated = &net_aligned_data.tcp_memory_allocated,
.per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc,
.memory_pressure = &tcp_memory_pressure,
@@ -3596,7 +3731,7 @@ static int bpf_iter_init_tcp(void *priv_data, struct bpf_iter_aux_info *aux)
if (err)
return err;
- err = bpf_iter_tcp_realloc_batch(iter, INIT_BATCH_SZ);
+ err = bpf_iter_tcp_realloc_batch(iter, INIT_BATCH_SZ, GFP_USER);
if (err) {
bpf_iter_fini_seq_net(priv_data);
return err;
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 4251670e328c..03c068ea27b6 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -166,11 +166,11 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
unsigned int hash)
{
struct tcp_metrics_block *tm;
- struct net *net;
bool reclaim = false;
+ struct net *net;
spin_lock_bh(&tcp_metrics_lock);
- net = dev_net_rcu(dst->dev);
+ net = dev_net_rcu(dst_dev(dst));
/* While waiting for the spin-lock the cache might have been populated
* with this entry and so we have to check again.
@@ -273,7 +273,7 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req,
return NULL;
}
- net = dev_net_rcu(dst->dev);
+ net = dev_net_rcu(dst_dev(dst));
hash ^= net_hash_mix(net);
hash = hash_32(hash, tcp_metrics_hash_log);
@@ -318,7 +318,7 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk,
else
return NULL;
- net = dev_net_rcu(dst->dev);
+ net = dev_net_rcu(dst_dev(dst));
hash ^= net_hash_mix(net);
hash = hash_32(hash, tcp_metrics_hash_log);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 43d7852ce07e..2994c9222c9c 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -726,7 +726,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
LINUX_MIB_TCPACKSKIPPEDSYNRECV,
&tcp_rsk(req)->last_oow_ack_time) &&
- !inet_rtx_syn_ack(sk, req)) {
+ !tcp_rtx_synack(sk, req)) {
unsigned long expires = jiffies;
expires += reqsk_timeout(req, TCP_RTO_MAX);
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index d293087b426d..be5c2294610e 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -359,6 +359,7 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb,
flush |= skb->ip_summed != p->ip_summed;
flush |= skb->csum_level != p->csum_level;
flush |= NAPI_GRO_CB(p)->count >= 64;
+ skb_set_network_header(skb, skb_gro_receive_network_offset(skb));
if (flush || skb_gro_receive_list(p, skb))
mss = 1;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 3ac8d2d17e1f..caf11920a878 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1066,15 +1066,15 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
* needs to be reallocated in a driver.
* The invariant being skb->truesize subtracted from sk->sk_wmem_alloc
*
- * Since transmit from skb destructor is forbidden, we use a tasklet
+ * Since transmit from skb destructor is forbidden, we use a BH work item
* to process all sockets that eventually need to send more skbs.
- * We use one tasklet per cpu, with its own queue of sockets.
+ * We use one work item per cpu, with its own queue of sockets.
*/
-struct tsq_tasklet {
- struct tasklet_struct tasklet;
+struct tsq_work {
+ struct work_struct work;
struct list_head head; /* queue of tcp sockets */
};
-static DEFINE_PER_CPU(struct tsq_tasklet, tsq_tasklet);
+static DEFINE_PER_CPU(struct tsq_work, tsq_work);
static void tcp_tsq_write(struct sock *sk)
{
@@ -1104,14 +1104,14 @@ static void tcp_tsq_handler(struct sock *sk)
bh_unlock_sock(sk);
}
/*
- * One tasklet per cpu tries to send more skbs.
- * We run in tasklet context but need to disable irqs when
+ * One work item per cpu tries to send more skbs.
+ * We run in BH context but need to disable irqs when
* transferring tsq->head because tcp_wfree() might
* interrupt us (non NAPI drivers)
*/
-static void tcp_tasklet_func(struct tasklet_struct *t)
+static void tcp_tsq_workfn(struct work_struct *work)
{
- struct tsq_tasklet *tsq = from_tasklet(tsq, t, tasklet);
+ struct tsq_work *tsq = container_of(work, struct tsq_work, work);
LIST_HEAD(list);
unsigned long flags;
struct list_head *q, *n;
@@ -1181,15 +1181,15 @@ void tcp_release_cb(struct sock *sk)
}
EXPORT_IPV6_MOD(tcp_release_cb);
-void __init tcp_tasklet_init(void)
+void __init tcp_tsq_work_init(void)
{
int i;
for_each_possible_cpu(i) {
- struct tsq_tasklet *tsq = &per_cpu(tsq_tasklet, i);
+ struct tsq_work *tsq = &per_cpu(tsq_work, i);
INIT_LIST_HEAD(&tsq->head);
- tasklet_setup(&tsq->tasklet, tcp_tasklet_func);
+ INIT_WORK(&tsq->work, tcp_tsq_workfn);
}
}
@@ -1203,11 +1203,11 @@ void tcp_wfree(struct sk_buff *skb)
struct sock *sk = skb->sk;
struct tcp_sock *tp = tcp_sk(sk);
unsigned long flags, nval, oval;
- struct tsq_tasklet *tsq;
+ struct tsq_work *tsq;
bool empty;
/* Keep one reference on sk_wmem_alloc.
- * Will be released by sk_free() from here or tcp_tasklet_func()
+ * Will be released by sk_free() from here or tcp_tsq_workfn()
*/
WARN_ON(refcount_sub_and_test(skb->truesize - 1, &sk->sk_wmem_alloc));
@@ -1229,13 +1229,13 @@ void tcp_wfree(struct sk_buff *skb)
nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED;
} while (!try_cmpxchg(&sk->sk_tsq_flags, &oval, nval));
- /* queue this socket to tasklet queue */
+ /* queue this socket to BH workqueue */
local_irq_save(flags);
- tsq = this_cpu_ptr(&tsq_tasklet);
+ tsq = this_cpu_ptr(&tsq_work);
empty = list_empty(&tsq->head);
list_add(&tp->tsq_node, &tsq->head);
if (empty)
- tasklet_schedule(&tsq->tasklet);
+ queue_work(system_bh_wq, &tsq->work);
local_irq_restore(flags);
return;
out:
@@ -1554,11 +1554,6 @@ static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int de
if (tcp_is_reno(tp) && decr > 0)
tp->sacked_out -= min_t(u32, tp->sacked_out, decr);
- if (tp->lost_skb_hint &&
- before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) &&
- (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
- tp->lost_cnt_hint -= decr;
-
tcp_verify_left_out(tp);
}
@@ -2639,7 +2634,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
if (refcount_read(&sk->sk_wmem_alloc) > limit) {
/* Always send skb if rtx queue is empty or has one skb.
* No need to wait for TX completion to call us back,
- * after softirq/tasklet schedule.
+ * after softirq schedule.
* This helps when TX completions are delayed too much.
*/
if (tcp_rtx_queue_empty_or_single_skb(sk))
@@ -3252,7 +3247,6 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
TCP_SKB_CB(skb)->eor = TCP_SKB_CB(next_skb)->eor;
/* changed transmit queue under us so clear hints */
- tcp_clear_retrans_hints_partial(tp);
if (next_skb == tp->retransmit_skb_hint)
tp->retransmit_skb_hint = skb;
@@ -3336,8 +3330,10 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
if (icsk->icsk_mtup.probe_size)
icsk->icsk_mtup.probe_size = 0;
- if (skb_still_in_host_queue(sk, skb))
- return -EBUSY;
+ if (skb_still_in_host_queue(sk, skb)) {
+ err = -EBUSY;
+ goto out;
+ }
start:
if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
@@ -3348,14 +3344,19 @@ start:
}
if (unlikely(before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))) {
WARN_ON_ONCE(1);
- return -EINVAL;
+ err = -EINVAL;
+ goto out;
+ }
+ if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq)) {
+ err = -ENOMEM;
+ goto out;
}
- if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
- return -ENOMEM;
}
- if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
- return -EHOSTUNREACH; /* Routing failure or similar. */
+ if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) {
+ err = -EHOSTUNREACH; /* Routing failure or similar. */
+ goto out;
+ }
cur_mss = tcp_current_mss(sk);
avail_wnd = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
@@ -3366,8 +3367,10 @@ start:
* our retransmit of one segment serves as a zero window probe.
*/
if (avail_wnd <= 0) {
- if (TCP_SKB_CB(skb)->seq != tp->snd_una)
- return -EAGAIN;
+ if (TCP_SKB_CB(skb)->seq != tp->snd_una) {
+ err = -EAGAIN;
+ goto out;
+ }
avail_wnd = cur_mss;
}
@@ -3379,11 +3382,15 @@ start:
}
if (skb->len > len) {
if (tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb, len,
- cur_mss, GFP_ATOMIC))
- return -ENOMEM; /* We'll try again later. */
+ cur_mss, GFP_ATOMIC)) {
+ err = -ENOMEM; /* We'll try again later. */
+ goto out;
+ }
} else {
- if (skb_unclone_keeptruesize(skb, GFP_ATOMIC))
- return -ENOMEM;
+ if (skb_unclone_keeptruesize(skb, GFP_ATOMIC)) {
+ err = -ENOMEM;
+ goto out;
+ }
diff = tcp_skb_pcount(skb);
tcp_set_skb_tso_segs(skb, cur_mss);
@@ -3437,17 +3444,16 @@ start:
tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RETRANS_CB,
TCP_SKB_CB(skb)->seq, segs, err);
- if (likely(!err)) {
- trace_tcp_retransmit_skb(sk, skb);
- } else if (err != -EBUSY) {
+ if (unlikely(err) && err != -EBUSY)
NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL, segs);
- }
/* To avoid taking spuriously low RTT samples based on a timestamp
* for a transmit that never happened, always mark EVER_RETRANS
*/
TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
+out:
+ trace_tcp_retransmit_skb(sk, skb, err);
return err;
}
@@ -4431,6 +4437,7 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)
tcp_sk_rw(sk)->total_retrans++;
}
trace_tcp_retransmit_synack(sk, req);
+ req->num_retrans++;
}
return res;
}
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index bba10110fbbc..c52fd3254b6e 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -35,7 +35,7 @@ s32 tcp_rack_skb_timeout(struct tcp_sock *tp, struct sk_buff *skb, u32 reo_wnd)
tcp_stamp_us_delta(tp->tcp_mstamp, tcp_skb_timestamp_us(skb));
}
-/* RACK loss detection (IETF draft draft-ietf-tcpm-rack-01):
+/* RACK loss detection (IETF RFC8985):
*
* Marks a packet lost, if some packet sent later has been (s)acked.
* The underlying idea is similar to the traditional dupthresh and FACK
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index bb37e24b97a7..a207877270fb 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -478,7 +478,7 @@ static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req)
* regular retransmit because if the child socket has been accepted
* it's not good to give up too easily.
*/
- inet_rtx_syn_ack(sk, req);
+ tcp_rtx_synack(sk, req);
req->num_timeout++;
tcp_update_rto_stats(sk);
if (!tp->retrans_stamp)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index dde52b8050b8..cc3ce0f762ec 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -127,8 +127,6 @@ struct udp_table udp_table __read_mostly;
long sysctl_udp_mem[3] __read_mostly;
EXPORT_IPV6_MOD(sysctl_udp_mem);
-atomic_long_t udp_memory_allocated ____cacheline_aligned_in_smp;
-EXPORT_IPV6_MOD(udp_memory_allocated);
DEFINE_PER_CPU(int, udp_memory_per_cpu_fw_alloc);
EXPORT_PER_CPU_SYMBOL_GPL(udp_memory_per_cpu_fw_alloc);
@@ -145,8 +143,8 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,
unsigned long *bitmap,
struct sock *sk, unsigned int log)
{
+ kuid_t uid = sk_uid(sk);
struct sock *sk2;
- kuid_t uid = sock_i_uid(sk);
sk_for_each(sk2, &hslot->head) {
if (net_eq(sock_net(sk2), net) &&
@@ -158,7 +156,7 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,
inet_rcv_saddr_equal(sk, sk2, true)) {
if (sk2->sk_reuseport && sk->sk_reuseport &&
!rcu_access_pointer(sk->sk_reuseport_cb) &&
- uid_eq(uid, sock_i_uid(sk2))) {
+ uid_eq(uid, sk_uid(sk2))) {
if (!bitmap)
return 0;
} else {
@@ -180,8 +178,8 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num,
struct udp_hslot *hslot2,
struct sock *sk)
{
+ kuid_t uid = sk_uid(sk);
struct sock *sk2;
- kuid_t uid = sock_i_uid(sk);
int res = 0;
spin_lock(&hslot2->lock);
@@ -195,7 +193,7 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num,
inet_rcv_saddr_equal(sk, sk2, true)) {
if (sk2->sk_reuseport && sk->sk_reuseport &&
!rcu_access_pointer(sk->sk_reuseport_cb) &&
- uid_eq(uid, sock_i_uid(sk2))) {
+ uid_eq(uid, sk_uid(sk2))) {
res = 0;
} else {
res = 1;
@@ -210,7 +208,7 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num,
static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot)
{
struct net *net = sock_net(sk);
- kuid_t uid = sock_i_uid(sk);
+ kuid_t uid = sk_uid(sk);
struct sock *sk2;
sk_for_each(sk2, &hslot->head) {
@@ -220,7 +218,7 @@ static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot)
ipv6_only_sock(sk2) == ipv6_only_sock(sk) &&
(udp_sk(sk2)->udp_port_hash == udp_sk(sk)->udp_port_hash) &&
(sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
- sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) &&
+ sk2->sk_reuseport && uid_eq(uid, sk_uid(sk2)) &&
inet_rcv_saddr_equal(sk, sk2, false)) {
return reuseport_add_sock(sk, sk2,
inet_rcv_saddr_any(sk));
@@ -1445,7 +1443,8 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
flowi4_init_output(fl4, ipc.oif, ipc.sockc.mark,
ipc.tos & INET_DSCP_MASK, scope,
sk->sk_protocol, flow_flags, faddr, saddr,
- dport, inet->inet_sport, sk->sk_uid);
+ dport, inet->inet_sport,
+ sk_uid(sk));
security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4));
rt = ip_route_output_flow(net, fl4, sk);
@@ -2348,7 +2347,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
*/
static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
{
- int drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
+ enum skb_drop_reason drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
struct udp_sock *up = udp_sk(sk);
int is_udplite = IS_UDPLITE(sk);
@@ -2437,10 +2436,8 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
udp_lib_checksum_complete(skb))
goto csum_error;
- if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr))) {
- drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
+ if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr), &drop_reason))
goto drop;
- }
udp_csum_pull_header(skb);
@@ -3234,7 +3231,7 @@ struct proto udp_prot = {
#ifdef CONFIG_BPF_SYSCALL
.psock_update_sk_prot = udp_bpf_update_proto,
#endif
- .memory_allocated = &udp_memory_allocated,
+ .memory_allocated = &net_aligned_data.udp_memory_allocated,
.per_cpu_fw_alloc = &udp_memory_per_cpu_fw_alloc,
.sysctl_mem = sysctl_udp_mem,
@@ -3386,7 +3383,7 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f,
sk_wmem_alloc_get(sp),
udp_rqueue_get(sp),
0, 0L, 0,
- from_kuid_munged(seq_user_ns(f), sock_i_uid(sp)),
+ from_kuid_munged(seq_user_ns(f), sk_uid(sp)),
0, sock_i_ino(sp),
refcount_read(&sp->sk_refcnt), sp,
atomic_read(&sp->sk_drops));
@@ -3629,7 +3626,7 @@ static int bpf_iter_udp_seq_show(struct seq_file *seq, void *v)
goto unlock;
}
- uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
+ uid = from_kuid_munged(seq_user_ns(seq), sk_uid(sk));
meta.seq = seq;
prog = bpf_iter_get_info(&meta, false);
ret = udp_prog_seq_show(prog, &meta, v, uid, state->bucket);
diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h
index e1ff3a375996..c7142213fc21 100644
--- a/net/ipv4/udp_impl.h
+++ b/net/ipv4/udp_impl.h
@@ -1,6 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _UDP4_IMPL_H
#define _UDP4_IMPL_H
+#include <net/aligned_data.h>
#include <net/udp.h>
#include <net/udplite.h>
#include <net/protocol.h>
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 85b5aa82d7d7..5128e2a5b00a 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -44,7 +44,7 @@ struct udp_tunnel_type_entry {
DEFINE_STATIC_CALL(udp_tunnel_gro_rcv, dummy_gro_rcv);
static DEFINE_STATIC_KEY_FALSE(udp_tunnel_static_call);
-static struct mutex udp_tunnel_gro_type_lock;
+static DEFINE_MUTEX(udp_tunnel_gro_type_lock);
static struct udp_tunnel_type_entry udp_tunnel_gro_types[UDP_MAX_TUNNEL_TYPES];
static unsigned int udp_tunnel_gro_type_nr;
static DEFINE_SPINLOCK(udp_tunnel_gro_lock);
@@ -144,11 +144,6 @@ out:
}
EXPORT_SYMBOL_GPL(udp_tunnel_update_gro_rcv);
-static void udp_tunnel_gro_init(void)
-{
- mutex_init(&udp_tunnel_gro_type_lock);
-}
-
static struct sk_buff *udp_tunnel_gro_rcv(struct sock *sk,
struct list_head *head,
struct sk_buff *skb)
@@ -165,8 +160,6 @@ static struct sk_buff *udp_tunnel_gro_rcv(struct sock *sk,
#else
-static void udp_tunnel_gro_init(void) {}
-
static struct sk_buff *udp_tunnel_gro_rcv(struct sock *sk,
struct list_head *head,
struct sk_buff *skb)
@@ -767,6 +760,7 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
NAPI_GRO_CB(skb)->flush = 1;
return NULL;
}
+ skb_set_network_header(skb, skb_gro_receive_network_offset(skb));
ret = skb_gro_receive_list(p, skb);
} else {
skb_gro_postpull_rcsum(skb, uh,
@@ -1000,6 +994,5 @@ int __init udpv4_offload_init(void)
},
};
- udp_tunnel_gro_init();
return inet_add_offload(&net_hotdata.udpv4_offload, IPPROTO_UDP);
}
diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c
index 2326548997d3..fce945f23069 100644
--- a/net/ipv4/udp_tunnel_core.c
+++ b/net/ipv4/udp_tunnel_core.c
@@ -134,15 +134,17 @@ void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type)
struct udp_tunnel_info ti;
struct net_device *dev;
+ ASSERT_RTNL();
+
ti.type = type;
ti.sa_family = sk->sk_family;
ti.port = inet_sk(sk)->inet_sport;
- rcu_read_lock();
- for_each_netdev_rcu(net, dev) {
+ for_each_netdev(net, dev) {
+ udp_tunnel_nic_lock(dev);
udp_tunnel_nic_add_port(dev, &ti);
+ udp_tunnel_nic_unlock(dev);
}
- rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(udp_tunnel_notify_add_rx_port);
@@ -154,22 +156,24 @@ void udp_tunnel_notify_del_rx_port(struct socket *sock, unsigned short type)
struct udp_tunnel_info ti;
struct net_device *dev;
+ ASSERT_RTNL();
+
ti.type = type;
ti.sa_family = sk->sk_family;
ti.port = inet_sk(sk)->inet_sport;
- rcu_read_lock();
- for_each_netdev_rcu(net, dev) {
+ for_each_netdev(net, dev) {
+ udp_tunnel_nic_lock(dev);
udp_tunnel_nic_del_port(dev, &ti);
+ udp_tunnel_nic_unlock(dev);
}
- rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(udp_tunnel_notify_del_rx_port);
void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
__be32 src, __be32 dst, __u8 tos, __u8 ttl,
__be16 df, __be16 src_port, __be16 dst_port,
- bool xnet, bool nocheck)
+ bool xnet, bool nocheck, u16 ipcb_flags)
{
struct udphdr *uh;
@@ -185,7 +189,8 @@ void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb
udp_set_csum(nocheck, skb, src, dst, skb->len);
- iptunnel_xmit(sk, rt, skb, src, dst, IPPROTO_UDP, tos, ttl, df, xnet);
+ iptunnel_xmit(sk, rt, skb, src, dst, IPPROTO_UDP, tos, ttl, df, xnet,
+ ipcb_flags);
}
EXPORT_SYMBOL_GPL(udp_tunnel_xmit_skb);
diff --git a/net/ipv4/udp_tunnel_nic.c b/net/ipv4/udp_tunnel_nic.c
index b6d2d16189c0..ff66db48453c 100644
--- a/net/ipv4/udp_tunnel_nic.c
+++ b/net/ipv4/udp_tunnel_nic.c
@@ -29,6 +29,7 @@ struct udp_tunnel_nic_table_entry {
* struct udp_tunnel_nic - UDP tunnel port offload state
* @work: async work for talking to hardware from process context
* @dev: netdev pointer
+ * @lock: protects all fields
* @need_sync: at least one port start changed
* @need_replay: space was freed, we need a replay of all ports
* @work_pending: @work is currently scheduled
@@ -41,6 +42,8 @@ struct udp_tunnel_nic {
struct net_device *dev;
+ struct mutex lock;
+
u8 need_sync:1;
u8 need_replay:1;
u8 work_pending:1;
@@ -298,22 +301,11 @@ __udp_tunnel_nic_device_sync(struct net_device *dev, struct udp_tunnel_nic *utn)
static void
udp_tunnel_nic_device_sync(struct net_device *dev, struct udp_tunnel_nic *utn)
{
- const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
- bool may_sleep;
-
if (!utn->need_sync)
return;
- /* Drivers which sleep in the callback need to update from
- * the workqueue, if we come from the tunnel driver's notification.
- */
- may_sleep = info->flags & UDP_TUNNEL_NIC_INFO_MAY_SLEEP;
- if (!may_sleep)
- __udp_tunnel_nic_device_sync(dev, utn);
- if (may_sleep || utn->need_replay) {
- queue_work(udp_tunnel_nic_workqueue, &utn->work);
- utn->work_pending = 1;
- }
+ queue_work(udp_tunnel_nic_workqueue, &utn->work);
+ utn->work_pending = 1;
}
static bool
@@ -554,12 +546,12 @@ static void __udp_tunnel_nic_reset_ntf(struct net_device *dev)
struct udp_tunnel_nic *utn;
unsigned int i, j;
- ASSERT_RTNL();
-
utn = dev->udp_tunnel_nic;
if (!utn)
return;
+ mutex_lock(&utn->lock);
+
utn->need_sync = false;
for (i = 0; i < utn->n_tables; i++)
for (j = 0; j < info->tables[i].n_entries; j++) {
@@ -569,7 +561,7 @@ static void __udp_tunnel_nic_reset_ntf(struct net_device *dev)
entry->flags &= ~(UDP_TUNNEL_NIC_ENTRY_DEL |
UDP_TUNNEL_NIC_ENTRY_OP_FAIL);
- /* We don't release rtnl across ops */
+ /* We don't release utn lock across ops */
WARN_ON(entry->flags & UDP_TUNNEL_NIC_ENTRY_FROZEN);
if (!entry->use_cnt)
continue;
@@ -579,6 +571,8 @@ static void __udp_tunnel_nic_reset_ntf(struct net_device *dev)
}
__udp_tunnel_nic_device_sync(dev, utn);
+
+ mutex_unlock(&utn->lock);
}
static size_t
@@ -643,6 +637,33 @@ err_cancel:
return -EMSGSIZE;
}
+static void __udp_tunnel_nic_assert_locked(struct net_device *dev)
+{
+ struct udp_tunnel_nic *utn;
+
+ utn = dev->udp_tunnel_nic;
+ if (utn)
+ lockdep_assert_held(&utn->lock);
+}
+
+static void __udp_tunnel_nic_lock(struct net_device *dev)
+{
+ struct udp_tunnel_nic *utn;
+
+ utn = dev->udp_tunnel_nic;
+ if (utn)
+ mutex_lock(&utn->lock);
+}
+
+static void __udp_tunnel_nic_unlock(struct net_device *dev)
+{
+ struct udp_tunnel_nic *utn;
+
+ utn = dev->udp_tunnel_nic;
+ if (utn)
+ mutex_unlock(&utn->lock);
+}
+
static const struct udp_tunnel_nic_ops __udp_tunnel_nic_ops = {
.get_port = __udp_tunnel_nic_get_port,
.set_port_priv = __udp_tunnel_nic_set_port_priv,
@@ -651,6 +672,9 @@ static const struct udp_tunnel_nic_ops __udp_tunnel_nic_ops = {
.reset_ntf = __udp_tunnel_nic_reset_ntf,
.dump_size = __udp_tunnel_nic_dump_size,
.dump_write = __udp_tunnel_nic_dump_write,
+ .assert_locked = __udp_tunnel_nic_assert_locked,
+ .lock = __udp_tunnel_nic_lock,
+ .unlock = __udp_tunnel_nic_unlock,
};
static void
@@ -710,11 +734,15 @@ static void udp_tunnel_nic_device_sync_work(struct work_struct *work)
container_of(work, struct udp_tunnel_nic, work);
rtnl_lock();
+ mutex_lock(&utn->lock);
+
utn->work_pending = 0;
__udp_tunnel_nic_device_sync(utn->dev, utn);
if (utn->need_replay)
udp_tunnel_nic_replay(utn->dev, utn);
+
+ mutex_unlock(&utn->lock);
rtnl_unlock();
}
@@ -730,6 +758,7 @@ udp_tunnel_nic_alloc(const struct udp_tunnel_nic_info *info,
return NULL;
utn->n_tables = n_tables;
INIT_WORK(&utn->work, udp_tunnel_nic_device_sync_work);
+ mutex_init(&utn->lock);
for (i = 0; i < n_tables; i++) {
utn->entries[i] = kcalloc(info->tables[i].n_entries,
@@ -821,8 +850,11 @@ static int udp_tunnel_nic_register(struct net_device *dev)
dev_hold(dev);
dev->udp_tunnel_nic = utn;
- if (!(info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY))
+ if (!(info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY)) {
+ udp_tunnel_nic_lock(dev);
udp_tunnel_get_rx_info(dev);
+ udp_tunnel_nic_unlock(dev);
+ }
return 0;
}
@@ -832,6 +864,8 @@ udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn)
{
const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info;
+ udp_tunnel_nic_lock(dev);
+
/* For a shared table remove this dev from the list of sharing devices
* and if there are other devices just detach.
*/
@@ -841,8 +875,10 @@ udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn)
list_for_each_entry(node, &info->shared->devices, list)
if (node->dev == dev)
break;
- if (list_entry_is_head(node, &info->shared->devices, list))
+ if (list_entry_is_head(node, &info->shared->devices, list)) {
+ udp_tunnel_nic_unlock(dev);
return;
+ }
list_del(&node->list);
kfree(node);
@@ -852,6 +888,7 @@ udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn)
if (first) {
udp_tunnel_drop_rx_info(dev);
utn->dev = first->dev;
+ udp_tunnel_nic_unlock(dev);
goto release_dev;
}
@@ -862,6 +899,7 @@ udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn)
* from the work which we will boot immediately.
*/
udp_tunnel_nic_flush(dev, utn);
+ udp_tunnel_nic_unlock(dev);
/* Wait for the work to be done using the state, netdev core will
* retry unregister until we give up our reference on this device.
@@ -910,12 +948,16 @@ udp_tunnel_nic_netdevice_event(struct notifier_block *unused,
return NOTIFY_DONE;
if (event == NETDEV_UP) {
+ udp_tunnel_nic_lock(dev);
WARN_ON(!udp_tunnel_nic_is_empty(dev, utn));
udp_tunnel_get_rx_info(dev);
+ udp_tunnel_nic_unlock(dev);
return NOTIFY_OK;
}
if (event == NETDEV_GOING_DOWN) {
+ udp_tunnel_nic_lock(dev);
udp_tunnel_nic_flush(dev, utn);
+ udp_tunnel_nic_unlock(dev);
return NOTIFY_OK;
}
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index af37af3ab727..d3e621a11a1a 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -60,7 +60,7 @@ struct proto udplite_prot = {
.rehash = udp_v4_rehash,
.get_port = udp_v4_get_port,
- .memory_allocated = &udp_memory_allocated,
+ .memory_allocated = &net_aligned_data.udp_memory_allocated,
.per_cpu_fw_alloc = &udp_memory_per_cpu_fw_alloc,
.sysctl_mem = sysctl_udp_mem,
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 0d31a8c108d4..f28cfd88eaf5 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -202,6 +202,9 @@ struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
if (len <= sizeof(struct ip_esp_hdr) || udpdata32[0] == 0)
goto out;
+ /* set the transport header to ESP */
+ skb_set_transport_header(skb, offset);
+
NAPI_GRO_CB(skb)->proto = IPPROTO_UDP;
pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 3cff51ba72bb..0ae67d537499 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -31,7 +31,7 @@ static int __xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb)
int xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
- net, sk, skb, skb->dev, skb_dst(skb)->dev,
+ net, sk, skb, skb->dev, skb_dst_dev(skb),
__xfrm4_output,
!(IPCB(skb)->flags & IPSKB_REROUTED));
}
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index ba2ec7c870cc..f17a5dd4789f 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -239,6 +239,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.ndisc_evict_nocarrier = 1,
.ra_honor_pio_life = 0,
.ra_honor_pio_pflag = 0,
+ .force_forwarding = 0,
};
static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -303,6 +304,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.ndisc_evict_nocarrier = 1,
.ra_honor_pio_life = 0,
.ra_honor_pio_pflag = 0,
+ .force_forwarding = 0,
};
/* Check if link is ready: is it up and is a valid qdisc available */
@@ -857,6 +859,9 @@ static void addrconf_forward_change(struct net *net, __s32 newf)
idev = __in6_dev_get_rtnl_net(dev);
if (idev) {
int changed = (!idev->cnf.forwarding) ^ (!newf);
+ /* Disabling all.forwarding sets 0 to force_forwarding for all interfaces */
+ if (newf == 0)
+ WRITE_ONCE(idev->cnf.force_forwarding, 0);
WRITE_ONCE(idev->cnf.forwarding, newf);
if (changed)
@@ -2229,32 +2234,29 @@ errdad:
in6_ifa_put(ifp);
}
-/* Join to solicited addr multicast group.
- * caller must hold RTNL */
+/* Join to solicited addr multicast group. */
void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr)
{
struct in6_addr maddr;
- if (dev->flags&(IFF_LOOPBACK|IFF_NOARP))
+ if (READ_ONCE(dev->flags) & (IFF_LOOPBACK | IFF_NOARP))
return;
addrconf_addr_solict_mult(addr, &maddr);
ipv6_dev_mc_inc(dev, &maddr);
}
-/* caller must hold RTNL */
void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr)
{
struct in6_addr maddr;
- if (idev->dev->flags&(IFF_LOOPBACK|IFF_NOARP))
+ if (READ_ONCE(idev->dev->flags) & (IFF_LOOPBACK | IFF_NOARP))
return;
addrconf_addr_solict_mult(addr, &maddr);
__ipv6_dev_mc_dec(idev, &maddr);
}
-/* caller must hold RTNL */
static void addrconf_join_anycast(struct inet6_ifaddr *ifp)
{
struct in6_addr addr;
@@ -2267,7 +2269,6 @@ static void addrconf_join_anycast(struct inet6_ifaddr *ifp)
__ipv6_dev_ac_inc(ifp->idev, &addr);
}
-/* caller must hold RTNL */
static void addrconf_leave_anycast(struct inet6_ifaddr *ifp)
{
struct in6_addr addr;
@@ -3208,7 +3209,7 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
}
}
-#if IS_ENABLED(CONFIG_IPV6_SIT) || IS_ENABLED(CONFIG_NET_IPGRE) || IS_ENABLED(CONFIG_IPV6_GRE)
+#if IS_ENABLED(CONFIG_IPV6_SIT) || IS_ENABLED(CONFIG_NET_IPGRE)
static void add_v4_addrs(struct inet6_dev *idev)
{
struct in6_addr addr;
@@ -3367,7 +3368,7 @@ static int ipv6_generate_stable_address(struct in6_addr *address,
retry:
spin_lock_bh(&lock);
- sha1_init(digest);
+ sha1_init_raw(digest);
memset(&data, 0, sizeof(data));
memset(workspace, 0, sizeof(workspace));
memcpy(data.hwaddr, idev->dev->perm_addr, idev->dev->addr_len);
@@ -3463,6 +3464,7 @@ static void addrconf_dev_config(struct net_device *dev)
(dev->type != ARPHRD_IEEE1394) &&
(dev->type != ARPHRD_TUNNEL6) &&
(dev->type != ARPHRD_6LOWPAN) &&
+ (dev->type != ARPHRD_IP6GRE) &&
(dev->type != ARPHRD_TUNNEL) &&
(dev->type != ARPHRD_NONE) &&
(dev->type != ARPHRD_RAWIP)) {
@@ -3518,34 +3520,29 @@ static void addrconf_sit_config(struct net_device *dev)
}
#endif
-#if IS_ENABLED(CONFIG_NET_IPGRE) || IS_ENABLED(CONFIG_IPV6_GRE)
+#if IS_ENABLED(CONFIG_NET_IPGRE)
static void addrconf_gre_config(struct net_device *dev)
{
struct inet6_dev *idev;
ASSERT_RTNL();
- idev = ipv6_find_idev(dev);
- if (IS_ERR(idev)) {
- pr_debug("%s: add_dev failed\n", __func__);
+ idev = addrconf_add_dev(dev);
+ if (IS_ERR(idev))
return;
- }
/* Generate the IPv6 link-local address using addrconf_addr_gen(),
* unless we have an IPv4 GRE device not bound to an IP address and
* which is in EUI64 mode (as __ipv6_isatap_ifid() would fail in this
* case). Such devices fall back to add_v4_addrs() instead.
*/
- if (!(dev->type == ARPHRD_IPGRE && *(__be32 *)dev->dev_addr == 0 &&
+ if (!(*(__be32 *)dev->dev_addr == 0 &&
idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64)) {
addrconf_addr_gen(idev, true);
return;
}
add_v4_addrs(idev);
-
- if (dev->flags & IFF_POINTOPOINT)
- addrconf_add_mroute(dev);
}
#endif
@@ -3557,8 +3554,7 @@ static void addrconf_init_auto_addrs(struct net_device *dev)
addrconf_sit_config(dev);
break;
#endif
-#if IS_ENABLED(CONFIG_NET_IPGRE) || IS_ENABLED(CONFIG_IPV6_GRE)
- case ARPHRD_IP6GRE:
+#if IS_ENABLED(CONFIG_NET_IPGRE)
case ARPHRD_IPGRE:
addrconf_gre_config(dev);
break;
@@ -3865,7 +3861,7 @@ static int addrconf_ifdown(struct net_device *dev, bool unregister)
* Do not dev_put!
*/
if (unregister) {
- idev->dead = 1;
+ WRITE_ONCE(idev->dead, 1);
/* protected by rtnl_lock */
RCU_INIT_POINTER(dev->ip6_ptr, NULL);
@@ -5719,6 +5715,7 @@ static void ipv6_store_devconf(const struct ipv6_devconf *cnf,
array[DEVCONF_ACCEPT_UNTRACKED_NA] =
READ_ONCE(cnf->accept_untracked_na);
array[DEVCONF_ACCEPT_RA_MIN_LFT] = READ_ONCE(cnf->accept_ra_min_lft);
+ array[DEVCONF_FORCE_FORWARDING] = READ_ONCE(cnf->force_forwarding);
}
static inline size_t inet6_ifla6_size(void)
@@ -6081,7 +6078,7 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
hdr->ifi_type = dev->type;
ifindex = READ_ONCE(dev->ifindex);
hdr->ifi_index = ifindex;
- hdr->ifi_flags = dev_get_flags(dev);
+ hdr->ifi_flags = netif_get_flags(dev);
hdr->ifi_change = 0;
iflink = dev_get_iflink(dev);
@@ -6747,6 +6744,75 @@ static int addrconf_sysctl_disable_policy(const struct ctl_table *ctl, int write
return ret;
}
+static void addrconf_force_forward_change(struct net *net, __s32 newf)
+{
+ struct net_device *dev;
+ struct inet6_dev *idev;
+
+ for_each_netdev(net, dev) {
+ idev = __in6_dev_get_rtnl_net(dev);
+ if (idev) {
+ int changed = (!idev->cnf.force_forwarding) ^ (!newf);
+
+ WRITE_ONCE(idev->cnf.force_forwarding, newf);
+ if (changed)
+ inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
+ NETCONFA_FORCE_FORWARDING,
+ dev->ifindex, &idev->cnf);
+ }
+ }
+}
+
+static int addrconf_sysctl_force_forwarding(const struct ctl_table *ctl, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct inet6_dev *idev = ctl->extra1;
+ struct ctl_table tmp_ctl = *ctl;
+ struct net *net = ctl->extra2;
+ int *valp = ctl->data;
+ int new_val = *valp;
+ int old_val = *valp;
+ loff_t pos = *ppos;
+ int ret;
+
+ tmp_ctl.extra1 = SYSCTL_ZERO;
+ tmp_ctl.extra2 = SYSCTL_ONE;
+ tmp_ctl.data = &new_val;
+
+ ret = proc_douintvec_minmax(&tmp_ctl, write, buffer, lenp, ppos);
+
+ if (write && old_val != new_val) {
+ if (!rtnl_net_trylock(net))
+ return restart_syscall();
+
+ WRITE_ONCE(*valp, new_val);
+
+ if (valp == &net->ipv6.devconf_dflt->force_forwarding) {
+ inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_FORCE_FORWARDING,
+ NETCONFA_IFINDEX_DEFAULT,
+ net->ipv6.devconf_dflt);
+ } else if (valp == &net->ipv6.devconf_all->force_forwarding) {
+ inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_FORCE_FORWARDING,
+ NETCONFA_IFINDEX_ALL,
+ net->ipv6.devconf_all);
+
+ addrconf_force_forward_change(net, new_val);
+ } else {
+ inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
+ NETCONFA_FORCE_FORWARDING,
+ idev->dev->ifindex,
+ &idev->cnf);
+ }
+ rtnl_net_unlock(net);
+ }
+
+ if (ret)
+ *ppos = pos;
+ return ret;
+}
+
static int minus_one = -1;
static const int two_five_five = 255;
static u32 ioam6_if_id_max = U16_MAX;
@@ -7217,6 +7283,13 @@ static const struct ctl_table addrconf_sysctl[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_TWO,
},
+ {
+ .procname = "force_forwarding",
+ .data = &ipv6_devconf.force_forwarding,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = addrconf_sysctl_force_forwarding,
+ },
};
static int __addrconf_sysctl_register(struct net *net, char *dev_name,
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index fb63ffbcfc64..567efd626ab4 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -20,12 +20,6 @@
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
-#if 0
-#define ADDRLABEL(x...) printk(x)
-#else
-#define ADDRLABEL(x...) do { ; } while (0)
-#endif
-
/*
* Policy Table
*/
@@ -150,8 +144,8 @@ u32 ipv6_addr_label(struct net *net,
label = p ? p->label : IPV6_ADDR_LABEL_DEFAULT;
rcu_read_unlock();
- ADDRLABEL(KERN_DEBUG "%s(addr=%pI6, type=%d, ifindex=%d) => %08x\n",
- __func__, addr, type, ifindex, label);
+ net_dbg_ratelimited("%s(addr=%pI6, type=%d, ifindex=%d) => %08x\n", __func__, addr, type,
+ ifindex, label);
return label;
}
@@ -164,8 +158,8 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix,
struct ip6addrlbl_entry *newp;
int addrtype;
- ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u)\n",
- __func__, prefix, prefixlen, ifindex, (unsigned int)label);
+ net_dbg_ratelimited("%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u)\n", __func__,
+ prefix, prefixlen, ifindex, (unsigned int)label);
addrtype = ipv6_addr_type(prefix) & (IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK);
@@ -207,8 +201,7 @@ static int __ip6addrlbl_add(struct net *net, struct ip6addrlbl_entry *newp,
struct hlist_node *n;
int ret = 0;
- ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", __func__, newp,
- replace);
+ net_dbg_ratelimited("%s(newp=%p, replace=%d)\n", __func__, newp, replace);
hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) {
if (p->prefixlen == newp->prefixlen &&
@@ -247,9 +240,8 @@ static int ip6addrlbl_add(struct net *net,
struct ip6addrlbl_entry *newp;
int ret = 0;
- ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n",
- __func__, prefix, prefixlen, ifindex, (unsigned int)label,
- replace);
+ net_dbg_ratelimited("%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n",
+ __func__, prefix, prefixlen, ifindex, (unsigned int)label, replace);
newp = ip6addrlbl_alloc(prefix, prefixlen, ifindex, label);
if (IS_ERR(newp))
@@ -271,8 +263,8 @@ static int __ip6addrlbl_del(struct net *net,
struct hlist_node *n;
int ret = -ESRCH;
- ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n",
- __func__, prefix, prefixlen, ifindex);
+ net_dbg_ratelimited("%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", __func__, prefix,
+ prefixlen, ifindex);
hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) {
if (p->prefixlen == prefixlen &&
@@ -294,8 +286,8 @@ static int ip6addrlbl_del(struct net *net,
struct in6_addr prefix_buf;
int ret;
- ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n",
- __func__, prefix, prefixlen, ifindex);
+ net_dbg_ratelimited("%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", __func__, prefix,
+ prefixlen, ifindex);
ipv6_addr_prefix(&prefix_buf, prefix, prefixlen);
spin_lock(&net->ipv6.ip6addrlbl_table.lock);
@@ -312,8 +304,6 @@ static int __net_init ip6addrlbl_net_init(struct net *net)
int err;
int i;
- ADDRLABEL(KERN_DEBUG "%s\n", __func__);
-
spin_lock_init(&net->ipv6.ip6addrlbl_table.lock);
INIT_HLIST_HEAD(&net->ipv6.ip6addrlbl_table.head);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index acaff1296783..1992621e3f3f 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -842,7 +842,7 @@ int inet6_sk_rebuild_header(struct sock *sk)
fl6.flowi6_mark = sk->sk_mark;
fl6.fl6_dport = inet->inet_dport;
fl6.fl6_sport = inet->inet_sport;
- fl6.flowi6_uid = sk->sk_uid;
+ fl6.flowi6_uid = sk_uid(sk);
security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
rcu_read_lock();
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 21e01695b48c..f8a8e46286b8 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -47,6 +47,9 @@
static struct hlist_head inet6_acaddr_lst[IN6_ADDR_HSIZE];
static DEFINE_SPINLOCK(acaddr_hash_lock);
+#define ac_dereference(a, idev) \
+ rcu_dereference_protected(a, lockdep_is_held(&(idev)->lock))
+
static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr);
static u32 inet6_acaddr_hash(const struct net *net,
@@ -64,14 +67,12 @@ static u32 inet6_acaddr_hash(const struct net *net,
int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
{
struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6_ac_socklist *pac = NULL;
+ struct net *net = sock_net(sk);
+ netdevice_tracker dev_tracker;
struct net_device *dev = NULL;
struct inet6_dev *idev;
- struct ipv6_ac_socklist *pac;
- struct net *net = sock_net(sk);
- int ishost = !net->ipv6.devconf_all->forwarding;
- int err = 0;
-
- ASSERT_RTNL();
+ int err = 0, ishost;
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
@@ -79,32 +80,43 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
return -EINVAL;
if (ifindex)
- dev = __dev_get_by_index(net, ifindex);
+ dev = netdev_get_by_index(net, ifindex, &dev_tracker, GFP_KERNEL);
- if (ipv6_chk_addr_and_flags(net, addr, dev, true, 0, IFA_F_TENTATIVE))
- return -EINVAL;
+ if (ipv6_chk_addr_and_flags(net, addr, dev, true, 0, IFA_F_TENTATIVE)) {
+ err = -EINVAL;
+ goto error;
+ }
pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL);
- if (!pac)
- return -ENOMEM;
+ if (!pac) {
+ err = -ENOMEM;
+ goto error;
+ }
+
pac->acl_next = NULL;
pac->acl_addr = *addr;
+ ishost = !READ_ONCE(net->ipv6.devconf_all->forwarding);
+
if (ifindex == 0) {
struct rt6_info *rt;
+ rcu_read_lock();
rt = rt6_lookup(net, addr, NULL, 0, NULL, 0);
if (rt) {
- dev = rt->dst.dev;
+ dev = dst_dev(&rt->dst);
+ netdev_hold(dev, &dev_tracker, GFP_ATOMIC);
ip6_rt_put(rt);
} else if (ishost) {
+ rcu_read_unlock();
err = -EADDRNOTAVAIL;
goto error;
} else {
/* router, no matching interface: just pick one */
- dev = __dev_get_by_flags(net, IFF_UP,
- IFF_UP | IFF_LOOPBACK);
+ dev = netdev_get_by_flags_rcu(net, &dev_tracker, IFF_UP,
+ IFF_UP | IFF_LOOPBACK);
}
+ rcu_read_unlock();
}
if (!dev) {
@@ -112,7 +124,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
goto error;
}
- idev = __in6_dev_get(dev);
+ idev = in6_dev_get(dev);
if (!idev) {
if (ifindex)
err = -ENODEV;
@@ -120,8 +132,9 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
err = -EADDRNOTAVAIL;
goto error;
}
+
/* reset ishost, now that we have a specific device */
- ishost = !idev->cnf.forwarding;
+ ishost = !READ_ONCE(idev->cnf.forwarding);
pac->acl_ifindex = dev->ifindex;
@@ -134,7 +147,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
if (ishost)
err = -EADDRNOTAVAIL;
if (err)
- goto error;
+ goto error_idev;
}
err = __ipv6_dev_ac_inc(idev, addr);
@@ -144,7 +157,11 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
pac = NULL;
}
+error_idev:
+ in6_dev_put(idev);
error:
+ netdev_put(dev, &dev_tracker);
+
if (pac)
sock_kfree_s(sk, pac, sizeof(*pac));
return err;
@@ -155,12 +172,10 @@ error:
*/
int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
{
- struct ipv6_pinfo *np = inet6_sk(sk);
- struct net_device *dev;
struct ipv6_ac_socklist *pac, *prev_pac;
+ struct ipv6_pinfo *np = inet6_sk(sk);
struct net *net = sock_net(sk);
-
- ASSERT_RTNL();
+ struct net_device *dev;
prev_pac = NULL;
for (pac = np->ipv6_ac_list; pac; pac = pac->acl_next) {
@@ -176,9 +191,11 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
else
np->ipv6_ac_list = pac->acl_next;
- dev = __dev_get_by_index(net, pac->acl_ifindex);
- if (dev)
+ dev = dev_get_by_index(net, pac->acl_ifindex);
+ if (dev) {
ipv6_dev_ac_dec(dev, &pac->acl_addr);
+ dev_put(dev);
+ }
sock_kfree_s(sk, pac, sizeof(*pac));
return 0;
@@ -187,21 +204,20 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
void __ipv6_sock_ac_close(struct sock *sk)
{
struct ipv6_pinfo *np = inet6_sk(sk);
+ struct net *net = sock_net(sk);
struct net_device *dev = NULL;
struct ipv6_ac_socklist *pac;
- struct net *net = sock_net(sk);
- int prev_index;
+ int prev_index = 0;
- ASSERT_RTNL();
pac = np->ipv6_ac_list;
np->ipv6_ac_list = NULL;
- prev_index = 0;
while (pac) {
struct ipv6_ac_socklist *next = pac->acl_next;
if (pac->acl_ifindex != prev_index) {
- dev = __dev_get_by_index(net, pac->acl_ifindex);
+ dev_put(dev);
+ dev = dev_get_by_index(net, pac->acl_ifindex);
prev_index = pac->acl_ifindex;
}
if (dev)
@@ -209,6 +225,8 @@ void __ipv6_sock_ac_close(struct sock *sk)
sock_kfree_s(sk, pac, sizeof(*pac));
pac = next;
}
+
+ dev_put(dev);
}
void ipv6_sock_ac_close(struct sock *sk)
@@ -217,9 +235,8 @@ void ipv6_sock_ac_close(struct sock *sk)
if (!np->ipv6_ac_list)
return;
- rtnl_lock();
+
__ipv6_sock_ac_close(sk);
- rtnl_unlock();
}
static void ipv6_add_acaddr_hash(struct net *net, struct ifacaddr6 *aca)
@@ -319,16 +336,14 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
struct net *net;
int err;
- ASSERT_RTNL();
-
write_lock_bh(&idev->lock);
if (idev->dead) {
err = -ENODEV;
goto out;
}
- for (aca = rtnl_dereference(idev->ac_list); aca;
- aca = rtnl_dereference(aca->aca_next)) {
+ for (aca = ac_dereference(idev->ac_list, idev); aca;
+ aca = ac_dereference(aca->aca_next, idev)) {
if (ipv6_addr_equal(&aca->aca_addr, addr)) {
aca->aca_users++;
err = 0;
@@ -380,12 +395,10 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
{
struct ifacaddr6 *aca, *prev_aca;
- ASSERT_RTNL();
-
write_lock_bh(&idev->lock);
prev_aca = NULL;
- for (aca = rtnl_dereference(idev->ac_list); aca;
- aca = rtnl_dereference(aca->aca_next)) {
+ for (aca = ac_dereference(idev->ac_list, idev); aca;
+ aca = ac_dereference(aca->aca_next, idev)) {
if (ipv6_addr_equal(&aca->aca_addr, addr))
break;
prev_aca = aca;
@@ -414,14 +427,18 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
return 0;
}
-/* called with rtnl_lock() */
static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr)
{
- struct inet6_dev *idev = __in6_dev_get(dev);
+ struct inet6_dev *idev = in6_dev_get(dev);
+ int err;
if (!idev)
return -ENODEV;
- return __ipv6_dev_ac_dec(idev, addr);
+
+ err = __ipv6_dev_ac_dec(idev, addr);
+ in6_dev_put(idev);
+
+ return err;
}
void ipv6_ac_destroy_dev(struct inet6_dev *idev)
@@ -429,7 +446,7 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev)
struct ifacaddr6 *aca;
write_lock_bh(&idev->lock);
- while ((aca = rtnl_dereference(idev->ac_list)) != NULL) {
+ while ((aca = ac_dereference(idev->ac_list, idev)) != NULL) {
rcu_assign_pointer(idev->ac_list, aca->aca_next);
write_unlock_bh(&idev->lock);
diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c
index 62618a058b8f..df1986973430 100644
--- a/net/ipv6/calipso.c
+++ b/net/ipv6/calipso.c
@@ -32,7 +32,7 @@
#include <linux/unaligned.h>
#include <linux/crc-ccitt.h>
-/* Maximium size of the calipso option including
+/* Maximum size of the calipso option including
* the two-byte TLV header.
*/
#define CALIPSO_OPT_LEN_MAX (2 + 252)
@@ -42,13 +42,13 @@
*/
#define CALIPSO_HDR_LEN (2 + 8)
-/* Maximium size of the calipso option including
+/* Maximum size of the calipso option including
* the two-byte TLV header and upto 3 bytes of
* leading pad and 7 bytes of trailing pad.
*/
#define CALIPSO_OPT_LEN_MAX_WITH_PAD (3 + CALIPSO_OPT_LEN_MAX + 7)
- /* Maximium size of u32 aligned buffer required to hold calipso
+ /* Maximum size of u32 aligned buffer required to hold calipso
* option. Max of 3 initial pad bytes starting from buffer + 3.
* i.e. the worst case is when the previous tlv finishes on 4n + 3.
*/
@@ -1207,6 +1207,10 @@ static int calipso_req_setattr(struct request_sock *req,
struct ipv6_opt_hdr *old, *new;
struct sock *sk = sk_to_full_sk(req_to_sk(req));
+ /* sk is NULL for SYN+ACK w/ SYN Cookie */
+ if (!sk)
+ return -ENOMEM;
+
if (req_inet->ipv6_opt && req_inet->ipv6_opt->hopopt)
old = req_inet->ipv6_opt->hopopt;
else
@@ -1247,6 +1251,10 @@ static void calipso_req_delattr(struct request_sock *req)
struct ipv6_txoptions *txopts;
struct sock *sk = sk_to_full_sk(req_to_sk(req));
+ /* sk is NULL for SYN+ACK w/ SYN Cookie */
+ if (!sk)
+ return;
+
if (!req_inet->ipv6_opt || !req_inet->ipv6_opt->hopopt)
return;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index fff78496803d..972bf0426d59 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -53,7 +53,7 @@ static void ip6_datagram_flow_key_init(struct flowi6 *fl6,
fl6->fl6_dport = inet->inet_dport;
fl6->fl6_sport = inet->inet_sport;
fl6->flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
- fl6->flowi6_uid = sk->sk_uid;
+ fl6->flowi6_uid = sk_uid(sk);
if (!oif)
oif = np->sticky_pktinfo.ipi6_ifindex;
@@ -127,7 +127,7 @@ void ip6_datagram_release_cb(struct sock *sk)
rcu_read_lock();
dst = __sk_dst_get(sk);
- if (!dst || !dst->obsolete ||
+ if (!dst || !READ_ONCE(dst->obsolete) ||
dst->ops->check(dst, inet6_sk(sk)->dst_cookie)) {
rcu_read_unlock();
return;
@@ -1064,7 +1064,7 @@ void __ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp,
sk_wmem_alloc_get(sp),
rqueue,
0, 0L, 0,
- from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
+ from_kuid_munged(seq_user_ns(seq), sk_uid(sp)),
0,
sock_i_ino(sp),
refcount_read(&sp->sk_refcnt), sp,
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 457de0745a33..d1ef9644f826 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -306,7 +306,7 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
!pskb_may_pull(skb, (skb_transport_offset(skb) +
((skb_transport_header(skb)[1] + 1) << 3)))) {
- __IP6_INC_STATS(dev_net(dst->dev), idev,
+ __IP6_INC_STATS(dev_net(dst_dev(dst)), idev,
IPSTATS_MIB_INHDRERRORS);
fail_and_free:
kfree_skb(skb);
@@ -460,7 +460,7 @@ looped_back:
return -1;
}
- if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) {
+ if (skb_dst_dev(skb)->flags & IFF_LOOPBACK) {
if (ipv6_hdr(skb)->hop_limit <= 1) {
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
icmpv6_send(skb, ICMPV6_TIME_EXCEED,
@@ -621,7 +621,7 @@ looped_back:
return -1;
}
- if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) {
+ if (skb_dst_dev(skb)->flags & IFF_LOOPBACK) {
if (ipv6_hdr(skb)->hop_limit <= 1) {
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
icmpv6_send(skb, ICMPV6_TIME_EXCEED,
@@ -783,7 +783,7 @@ looped_back:
kfree_skb(skb);
return -1;
}
- if (!ipv6_chk_home_addr(dev_net(skb_dst(skb)->dev), addr)) {
+ if (!ipv6_chk_home_addr(skb_dst_dev_net(skb), addr)) {
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
kfree_skb(skb);
return -1;
@@ -809,7 +809,7 @@ looped_back:
return -1;
}
- if (skb_dst(skb)->dev->flags&IFF_LOOPBACK) {
+ if (skb_dst_dev(skb)->flags & IFF_LOOPBACK) {
if (ipv6_hdr(skb)->hop_limit <= 1) {
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 3fd19a84b358..44550957fd4e 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -196,6 +196,7 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
struct flowi6 *fl6, bool apply_ratelimit)
{
struct net *net = sock_net(sk);
+ struct net_device *dev;
struct dst_entry *dst;
bool res = false;
@@ -208,10 +209,11 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
* this lookup should be more aggressive (not longer than timeout).
*/
dst = ip6_route_output(net, sk, fl6);
+ dev = dst_dev(dst);
if (dst->error) {
IP6_INC_STATS(net, ip6_dst_idev(dst),
IPSTATS_MIB_OUTNOROUTES);
- } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
+ } else if (dev && (dev->flags & IFF_LOOPBACK)) {
res = true;
} else {
struct rt6_info *rt = dst_rt6_info(dst);
diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c
index 7d574f5132e2..7bb9edc5c28c 100644
--- a/net/ipv6/ila/ila_lwt.c
+++ b/net/ipv6/ila/ila_lwt.c
@@ -70,7 +70,7 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
*/
memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_oif = orig_dst->dev->ifindex;
+ fl6.flowi6_oif = dst_dev(orig_dst)->ifindex;
fl6.flowi6_iif = LOOPBACK_IFINDEX;
fl6.daddr = *rt6_nexthop(dst_rt6_info(orig_dst),
&ip6h->daddr);
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 8f500eaf33cf..333e43434dd7 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -45,7 +45,7 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk,
fl6->flowi6_mark = ireq->ir_mark;
fl6->fl6_dport = ireq->ir_rmt_port;
fl6->fl6_sport = htons(ireq->ir_num);
- fl6->flowi6_uid = sk->sk_uid;
+ fl6->flowi6_uid = sk_uid(sk);
security_req_classify_flow(req, flowi6_to_flowi_common(fl6));
dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_p);
@@ -79,7 +79,7 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk,
fl6->flowi6_mark = sk->sk_mark;
fl6->fl6_sport = inet->inet_sport;
fl6->fl6_dport = inet->inet_dport;
- fl6->flowi6_uid = sk->sk_uid;
+ fl6->flowi6_uid = sk_uid(sk);
security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
rcu_read_lock();
diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c
index a84d332f952f..9553a3200081 100644
--- a/net/ipv6/ioam6.c
+++ b/net/ipv6/ioam6.c
@@ -696,6 +696,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb,
struct ioam6_schema *sc,
u8 sclen, bool is_input)
{
+ struct net_device *dev = skb_dst_dev(skb);
struct timespec64 ts;
ktime_t tstamp;
u64 raw64;
@@ -712,7 +713,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb,
if (is_input)
byte--;
- raw32 = dev_net(skb_dst(skb)->dev)->ipv6.sysctl.ioam6_id;
+ raw32 = dev_net(dev)->ipv6.sysctl.ioam6_id;
*(__be32 *)data = cpu_to_be32((byte << 24) | raw32);
data += sizeof(__be32);
@@ -728,10 +729,10 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb,
*(__be16 *)data = cpu_to_be16(raw16);
data += sizeof(__be16);
- if (skb_dst(skb)->dev->flags & IFF_LOOPBACK)
+ if (dev->flags & IFF_LOOPBACK)
raw16 = IOAM6_U16_UNAVAILABLE;
else
- raw16 = (__force u16)READ_ONCE(__in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id);
+ raw16 = (__force u16)READ_ONCE(__in6_dev_get(dev)->cnf.ioam6_id);
*(__be16 *)data = cpu_to_be16(raw16);
data += sizeof(__be16);
@@ -783,10 +784,10 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb,
struct Qdisc *qdisc;
__u32 qlen, backlog;
- if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) {
+ if (dev->flags & IFF_LOOPBACK) {
*(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
} else {
- queue = skb_get_tx_queue(skb_dst(skb)->dev, skb);
+ queue = skb_get_tx_queue(dev, skb);
qdisc = rcu_dereference(queue->qdisc);
qdisc_qstats_qlen_backlog(qdisc, &qlen, &backlog);
@@ -807,7 +808,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb,
if (is_input)
byte--;
- raw64 = dev_net(skb_dst(skb)->dev)->ipv6.sysctl.ioam6_id_wide;
+ raw64 = dev_net(dev)->ipv6.sysctl.ioam6_id_wide;
*(__be64 *)data = cpu_to_be64(((u64)byte << 56) | raw64);
data += sizeof(__be64);
@@ -823,10 +824,10 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb,
*(__be32 *)data = cpu_to_be32(raw32);
data += sizeof(__be32);
- if (skb_dst(skb)->dev->flags & IFF_LOOPBACK)
+ if (dev->flags & IFF_LOOPBACK)
raw32 = IOAM6_U32_UNAVAILABLE;
else
- raw32 = READ_ONCE(__in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id_wide);
+ raw32 = READ_ONCE(__in6_dev_get(dev)->cnf.ioam6_id_wide);
*(__be32 *)data = cpu_to_be32(raw32);
data += sizeof(__be32);
diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c
index 40df8bdfaacd..1fe7894f14dd 100644
--- a/net/ipv6/ioam6_iptunnel.c
+++ b/net/ipv6/ioam6_iptunnel.c
@@ -335,7 +335,7 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
if (has_tunsrc)
memcpy(&hdr->saddr, tunsrc, sizeof(*tunsrc));
else
- ipv6_dev_get_saddr(net, dst->dev, &hdr->daddr,
+ ipv6_dev_get_saddr(net, dst_dev(dst), &hdr->daddr,
IPV6_PREFER_SRC_PUBLIC, &hdr->saddr);
skb_postpush_rcsum(skb, hdr, len);
@@ -442,7 +442,7 @@ do_encap:
dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr);
local_bh_enable();
- err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(dst_dev(dst)));
if (unlikely(err))
goto drop;
}
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 93578b2ec35f..02c16909f618 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -445,15 +445,17 @@ struct fib6_dump_arg {
static int fib6_rt_dump(struct fib6_info *rt, struct fib6_dump_arg *arg)
{
enum fib_event_type fib_event = FIB_EVENT_ENTRY_REPLACE;
+ unsigned int nsiblings;
int err;
if (!rt || rt == arg->net->ipv6.fib6_null_entry)
return 0;
- if (rt->fib6_nsiblings)
+ nsiblings = READ_ONCE(rt->fib6_nsiblings);
+ if (nsiblings)
err = call_fib6_multipath_entry_notifier(arg->nb, fib_event,
rt,
- rt->fib6_nsiblings,
+ nsiblings,
arg->extack);
else
err = call_fib6_entry_notifier(arg->nb, fib_event, rt,
@@ -963,8 +965,7 @@ insert_above:
}
static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh,
- const struct fib6_info *match,
- const struct fib6_table *table)
+ const struct fib6_info *match)
{
int cpu;
@@ -999,21 +1000,15 @@ static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh,
rcu_read_unlock();
}
-struct fib6_nh_pcpu_arg {
- struct fib6_info *from;
- const struct fib6_table *table;
-};
-
static int fib6_nh_drop_pcpu_from(struct fib6_nh *nh, void *_arg)
{
- struct fib6_nh_pcpu_arg *arg = _arg;
+ struct fib6_info *arg = _arg;
- __fib6_drop_pcpu_from(nh, arg->from, arg->table);
+ __fib6_drop_pcpu_from(nh, arg);
return 0;
}
-static void fib6_drop_pcpu_from(struct fib6_info *f6i,
- const struct fib6_table *table)
+static void fib6_drop_pcpu_from(struct fib6_info *f6i)
{
/* Make sure rt6_make_pcpu_route() wont add other percpu routes
* while we are cleaning them here.
@@ -1022,19 +1017,14 @@ static void fib6_drop_pcpu_from(struct fib6_info *f6i,
mb(); /* paired with the cmpxchg() in rt6_make_pcpu_route() */
if (f6i->nh) {
- struct fib6_nh_pcpu_arg arg = {
- .from = f6i,
- .table = table
- };
-
rcu_read_lock();
- nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_drop_pcpu_from, &arg);
+ nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_drop_pcpu_from, f6i);
rcu_read_unlock();
} else {
struct fib6_nh *fib6_nh;
fib6_nh = f6i->fib6_nh;
- __fib6_drop_pcpu_from(fib6_nh, f6i, table);
+ __fib6_drop_pcpu_from(fib6_nh, f6i);
}
}
@@ -1045,7 +1035,7 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
/* Flush all cached dst in exception table */
rt6_flush_exceptions(rt);
- fib6_drop_pcpu_from(rt, table);
+ fib6_drop_pcpu_from(rt);
if (rt->nh) {
spin_lock(&rt->nh->lock);
@@ -1138,7 +1128,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
if (rt6_duplicate_nexthop(iter, rt)) {
if (rt->fib6_nsiblings)
- rt->fib6_nsiblings = 0;
+ WRITE_ONCE(rt->fib6_nsiblings, 0);
if (!(iter->fib6_flags & RTF_EXPIRES))
return -EEXIST;
if (!(rt->fib6_flags & RTF_EXPIRES)) {
@@ -1167,7 +1157,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
*/
if (rt_can_ecmp &&
rt6_qualify_for_ecmp(iter))
- rt->fib6_nsiblings++;
+ WRITE_ONCE(rt->fib6_nsiblings,
+ rt->fib6_nsiblings + 1);
}
if (iter->fib6_metric > rt->fib6_metric)
@@ -1217,7 +1208,8 @@ next_iter:
fib6_nsiblings = 0;
list_for_each_entry_safe(sibling, temp_sibling,
&rt->fib6_siblings, fib6_siblings) {
- sibling->fib6_nsiblings++;
+ WRITE_ONCE(sibling->fib6_nsiblings,
+ sibling->fib6_nsiblings + 1);
BUG_ON(sibling->fib6_nsiblings != rt->fib6_nsiblings);
fib6_nsiblings++;
}
@@ -1264,8 +1256,9 @@ add:
list_for_each_entry_safe(sibling, next_sibling,
&rt->fib6_siblings,
fib6_siblings)
- sibling->fib6_nsiblings--;
- rt->fib6_nsiblings = 0;
+ WRITE_ONCE(sibling->fib6_nsiblings,
+ sibling->fib6_nsiblings - 1);
+ WRITE_ONCE(rt->fib6_nsiblings, 0);
list_del_rcu(&rt->fib6_siblings);
rcu_read_lock();
rt6_multipath_rebalance(next_sibling);
@@ -2014,8 +2007,9 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
notify_del = true;
list_for_each_entry_safe(sibling, next_sibling,
&rt->fib6_siblings, fib6_siblings)
- sibling->fib6_nsiblings--;
- rt->fib6_nsiblings = 0;
+ WRITE_ONCE(sibling->fib6_nsiblings,
+ sibling->fib6_nsiblings - 1);
+ WRITE_ONCE(rt->fib6_nsiblings, 0);
list_del_rcu(&rt->fib6_siblings);
rt6_multipath_rebalance(next_sibling);
}
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 2dc9dcffe2ca..74d49dd6124d 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -111,8 +111,32 @@ static u32 HASH_ADDR(const struct in6_addr *addr)
#define tunnels_l tunnels[1]
#define tunnels_wc tunnels[0]
-/* Given src, dst and key, find appropriate for input tunnel. */
+static bool ip6gre_tunnel_match(struct ip6_tnl *t, int dev_type, int link,
+ int *cand_score, struct ip6_tnl **ret)
+{
+ int score = 0;
+
+ if (t->dev->type != ARPHRD_IP6GRE &&
+ t->dev->type != dev_type)
+ return false;
+
+ if (t->parms.link != link)
+ score |= 1;
+ if (t->dev->type != dev_type)
+ score |= 2;
+ if (score == 0) {
+ *ret = t;
+ return true;
+ }
+
+ if (score < *cand_score) {
+ *ret = t;
+ *cand_score = score;
+ }
+ return false;
+}
+/* Given src, dst and key, find appropriate for input tunnel. */
static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
const struct in6_addr *remote, const struct in6_addr *local,
__be32 key, __be16 gre_proto)
@@ -127,8 +151,8 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
gre_proto == htons(ETH_P_ERSPAN) ||
gre_proto == htons(ETH_P_ERSPAN2)) ?
ARPHRD_ETHER : ARPHRD_IP6GRE;
- int score, cand_score = 4;
struct net_device *ndev;
+ int cand_score = 4;
for_each_ip_tunnel_rcu(t, ign->tunnels_r_l[h0 ^ h1]) {
if (!ipv6_addr_equal(local, &t->parms.laddr) ||
@@ -137,22 +161,8 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
!(t->dev->flags & IFF_UP))
continue;
- if (t->dev->type != ARPHRD_IP6GRE &&
- t->dev->type != dev_type)
- continue;
-
- score = 0;
- if (t->parms.link != link)
- score |= 1;
- if (t->dev->type != dev_type)
- score |= 2;
- if (score == 0)
- return t;
-
- if (score < cand_score) {
- cand = t;
- cand_score = score;
- }
+ if (ip6gre_tunnel_match(t, dev_type, link, &cand_score, &cand))
+ return cand;
}
for_each_ip_tunnel_rcu(t, ign->tunnels_r[h0 ^ h1]) {
@@ -161,22 +171,8 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
!(t->dev->flags & IFF_UP))
continue;
- if (t->dev->type != ARPHRD_IP6GRE &&
- t->dev->type != dev_type)
- continue;
-
- score = 0;
- if (t->parms.link != link)
- score |= 1;
- if (t->dev->type != dev_type)
- score |= 2;
- if (score == 0)
- return t;
-
- if (score < cand_score) {
- cand = t;
- cand_score = score;
- }
+ if (ip6gre_tunnel_match(t, dev_type, link, &cand_score, &cand))
+ return cand;
}
for_each_ip_tunnel_rcu(t, ign->tunnels_l[h1]) {
@@ -187,22 +183,8 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
!(t->dev->flags & IFF_UP))
continue;
- if (t->dev->type != ARPHRD_IP6GRE &&
- t->dev->type != dev_type)
- continue;
-
- score = 0;
- if (t->parms.link != link)
- score |= 1;
- if (t->dev->type != dev_type)
- score |= 2;
- if (score == 0)
- return t;
-
- if (score < cand_score) {
- cand = t;
- cand_score = score;
- }
+ if (ip6gre_tunnel_match(t, dev_type, link, &cand_score, &cand))
+ return cand;
}
for_each_ip_tunnel_rcu(t, ign->tunnels_wc[h1]) {
@@ -210,22 +192,8 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
!(t->dev->flags & IFF_UP))
continue;
- if (t->dev->type != ARPHRD_IP6GRE &&
- t->dev->type != dev_type)
- continue;
-
- score = 0;
- if (t->parms.link != link)
- score |= 1;
- if (t->dev->type != dev_type)
- score |= 2;
- if (score == 0)
- return t;
-
- if (score < cand_score) {
- cand = t;
- cand_score = score;
- }
+ if (ip6gre_tunnel_match(t, dev_type, link, &cand_score, &cand))
+ return cand;
}
if (cand)
@@ -1085,9 +1053,11 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
htonl(atomic_fetch_inc(&t->o_seqno)));
/* TooBig packet may have updated dst->dev's mtu */
- if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu)
- dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu, false);
-
+ if (!t->parms.collect_md && dst) {
+ mtu = READ_ONCE(dst_dev(dst)->mtu);
+ if (dst_mtu(dst) > mtu)
+ dst->ops->update_pmtu(dst, NULL, skb, mtu, false);
+ }
err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
NEXTHDR_GRE);
if (err != 0) {
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 39da6a7ce5f1..168ec07e31cc 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -187,7 +187,9 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev,
* arrived via the sending interface (ethX), because of the
* nature of scoping architecture. --yoshfuji
*/
- IP6CB(skb)->iif = skb_valid_dst(skb) ? ip6_dst_idev(skb_dst(skb))->dev->ifindex : dev->ifindex;
+ IP6CB(skb)->iif = skb_valid_dst(skb) ?
+ ip6_dst_idev(skb_dst(skb))->dev->ifindex :
+ dev->ifindex;
if (unlikely(!pskb_may_pull(skb, sizeof(*hdr))))
goto err;
@@ -476,6 +478,13 @@ discard:
static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
+ if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC))) {
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_INDISCARDS);
+ kfree_skb_reason(skb, SKB_DROP_REASON_NOMEM);
+ return 0;
+ }
+
skb_clear_delivery_time(skb);
ip6_protocol_deliver_rcu(net, skb, 0, false);
@@ -499,38 +508,32 @@ EXPORT_SYMBOL_GPL(ip6_input);
int ip6_mc_input(struct sk_buff *skb)
{
+ struct net_device *dev = skb->dev;
int sdif = inet6_sdif(skb);
const struct ipv6hdr *hdr;
- struct net_device *dev;
bool deliver;
- __IP6_UPD_PO_STATS(dev_net(skb_dst(skb)->dev),
- __in6_dev_get_safely(skb->dev), IPSTATS_MIB_INMCAST,
- skb->len);
+ __IP6_UPD_PO_STATS(skb_dst_dev_net_rcu(skb),
+ __in6_dev_get_safely(dev), IPSTATS_MIB_INMCAST,
+ skb->len);
/* skb->dev passed may be master dev for vrfs. */
if (sdif) {
- rcu_read_lock();
- dev = dev_get_by_index_rcu(dev_net(skb->dev), sdif);
+ dev = dev_get_by_index_rcu(dev_net_rcu(dev), sdif);
if (!dev) {
- rcu_read_unlock();
kfree_skb(skb);
return -ENODEV;
}
- } else {
- dev = skb->dev;
}
hdr = ipv6_hdr(skb);
deliver = ipv6_chk_mcast_addr(dev, &hdr->daddr, NULL);
- if (sdif)
- rcu_read_unlock();
#ifdef CONFIG_IPV6_MROUTE
/*
* IPv6 multicast router mode is now supported ;)
*/
- if (atomic_read(&dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding) &&
+ if (atomic_read(&dev_net_rcu(skb->dev)->ipv6.devconf_all->mc_forwarding) &&
!(ipv6_addr_type(&hdr->daddr) &
(IPV6_ADDR_LOOPBACK|IPV6_ADDR_LINKLOCAL)) &&
likely(!(IP6CB(skb)->flags & IP6SKB_FORWARDED))) {
@@ -571,22 +574,21 @@ int ip6_mc_input(struct sk_buff *skb)
/* unknown RA - process it normally */
}
- if (deliver)
+ if (deliver) {
skb2 = skb_clone(skb, GFP_ATOMIC);
- else {
+ } else {
skb2 = skb;
skb = NULL;
}
- if (skb2) {
+ if (skb2)
ip6_mr_input(skb2);
- }
}
out:
#endif
- if (likely(deliver))
+ if (likely(deliver)) {
ip6_input(skb);
- else {
+ } else {
/* discard */
kfree_skb(skb);
}
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 7bd29a9ff0db..1e1410237b6e 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -60,7 +60,7 @@
static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
- struct net_device *dev = dst->dev;
+ struct net_device *dev = dst_dev(dst);
struct inet6_dev *idev = ip6_dst_idev(dst);
unsigned int hh_len = LL_RESERVED_SPACE(dev);
const struct in6_addr *daddr, *nexthop;
@@ -232,8 +232,9 @@ static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
- struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
+ struct dst_entry *dst = skb_dst(skb);
+ struct net_device *dev = dst_dev(dst), *indev = skb->dev;
+ struct inet6_dev *idev = ip6_dst_idev(dst);
skb->protocol = htons(ETH_P_IPV6);
skb->dev = dev;
@@ -271,7 +272,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
const struct ipv6_pinfo *np = inet6_sk(sk);
struct in6_addr *first_hop = &fl6->daddr;
struct dst_entry *dst = skb_dst(skb);
- struct net_device *dev = dst->dev;
+ struct net_device *dev = dst_dev(dst);
struct inet6_dev *idev = ip6_dst_idev(dst);
struct hop_jumbo_hdr *hop_jumbo;
int hoplen = sizeof(*hop_jumbo);
@@ -503,13 +504,15 @@ int ip6_forward(struct sk_buff *skb)
struct dst_entry *dst = skb_dst(skb);
struct ipv6hdr *hdr = ipv6_hdr(skb);
struct inet6_skb_parm *opt = IP6CB(skb);
- struct net *net = dev_net(dst->dev);
+ struct net *net = dev_net(dst_dev(dst));
+ struct net_device *dev;
struct inet6_dev *idev;
SKB_DR(reason);
u32 mtu;
idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
- if (READ_ONCE(net->ipv6.devconf_all->forwarding) == 0)
+ if (!READ_ONCE(net->ipv6.devconf_all->forwarding) &&
+ (!idev || !READ_ONCE(idev->cnf.force_forwarding)))
goto error;
if (skb->pkt_type != PACKET_HOST)
@@ -561,7 +564,7 @@ int ip6_forward(struct sk_buff *skb)
/* XXX: idev->cnf.proxy_ndp? */
if (READ_ONCE(net->ipv6.devconf_all->proxy_ndp) &&
- pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
+ pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev)) {
int proxied = ip6_forward_proxy_check(skb);
if (proxied > 0) {
/* It's tempting to decrease the hop limit
@@ -591,12 +594,12 @@ int ip6_forward(struct sk_buff *skb)
goto drop;
}
dst = skb_dst(skb);
-
+ dev = dst_dev(dst);
/* IPv6 specs say nothing about it, but it is clear that we cannot
send redirects to source routed frames.
We don't send redirects to frames decapsulated from IPsec.
*/
- if (IP6CB(skb)->iif == dst->dev->ifindex &&
+ if (IP6CB(skb)->iif == dev->ifindex &&
opt->srcrt == 0 && !skb_sec_path(skb)) {
struct in6_addr *target = NULL;
struct inet_peer *peer;
@@ -644,7 +647,7 @@ int ip6_forward(struct sk_buff *skb)
if (ip6_pkt_too_big(skb, mtu)) {
/* Again, force OUTPUT device used as source address */
- skb->dev = dst->dev;
+ skb->dev = dev;
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
__IP6_INC_STATS(net, ip6_dst_idev(dst),
@@ -653,7 +656,7 @@ int ip6_forward(struct sk_buff *skb)
return -EMSGSIZE;
}
- if (skb_cow(skb, dst->dev->hard_header_len)) {
+ if (skb_cow(skb, dev->hard_header_len)) {
__IP6_INC_STATS(net, ip6_dst_idev(dst),
IPSTATS_MIB_OUTDISCARDS);
goto drop;
@@ -666,7 +669,7 @@ int ip6_forward(struct sk_buff *skb)
hdr->hop_limit--;
return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
- net, NULL, skb, skb->dev, dst->dev,
+ net, NULL, skb, skb->dev, dev,
ip6_forward_finish);
error:
@@ -1093,7 +1096,7 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
#ifdef CONFIG_IPV6_SUBTREES
ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
#endif
- (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
+ (fl6->flowi6_oif && fl6->flowi6_oif != dst_dev(dst)->ifindex)) {
dst_release(dst);
dst = NULL;
}
@@ -1760,8 +1763,7 @@ alloc_new_skb:
if (WARN_ON_ONCE(copy > msg->msg_iter.count))
goto error;
- err = skb_splice_from_iter(skb, &msg->msg_iter, copy,
- sk->sk_allocation);
+ err = skb_splice_from_iter(skb, &msg->msg_iter, copy);
if (err < 0)
goto error;
copy = err;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 894d3158a6f0..3262e81223df 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -632,7 +632,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
} else {
if (ip_route_input(skb2, eiph->daddr, eiph->saddr,
ip4h_dscp(eiph), skb2->dev) ||
- skb_dst(skb2)->dev->type != ARPHRD_TUNNEL6)
+ skb_dst_dev(skb2)->type != ARPHRD_TUNNEL6)
goto out;
}
@@ -1179,7 +1179,7 @@ route_lookup:
ndst = dst;
}
- tdev = dst->dev;
+ tdev = dst_dev(dst);
if (tdev == dev) {
DEV_STATS_INC(dev, collisions);
@@ -1255,7 +1255,7 @@ route_lookup:
/* Calculate max headroom for all the headers and adjust
* needed_headroom if necessary.
*/
- max_headroom = LL_RESERVED_SPACE(dst->dev) + sizeof(struct ipv6hdr)
+ max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr)
+ dst->header_len + t->hlen;
if (max_headroom > READ_ONCE(dev->needed_headroom))
WRITE_ONCE(dev->needed_headroom, max_headroom);
@@ -1278,7 +1278,7 @@ route_lookup:
ipv6h->nexthdr = proto;
ipv6h->saddr = fl6->saddr;
ipv6h->daddr = fl6->daddr;
- ip6tunnel_xmit(NULL, skb, dev);
+ ip6tunnel_xmit(NULL, skb, dev, 0);
return 0;
tx_err_link_failure:
DEV_STATS_INC(dev, tx_carrier_errors);
@@ -1562,11 +1562,22 @@ static void ip6_tnl_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p)
netdev_state_change(t->dev);
}
-static void ip6_tnl0_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p)
+static int ip6_tnl0_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p,
+ bool strict)
{
- /* for default tnl0 device allow to change only the proto */
+ /* For the default ip6tnl0 device, allow changing only the protocol
+ * (the IP6_TNL_F_CAP_PER_PACKET flag is set on ip6tnl0, and all other
+ * parameters are 0).
+ */
+ if (strict &&
+ (!ipv6_addr_any(&p->laddr) || !ipv6_addr_any(&p->raddr) ||
+ p->flags != t->parms.flags || p->hop_limit || p->encap_limit ||
+ p->flowinfo || p->link || p->fwmark || p->collect_md))
+ return -EINVAL;
+
t->parms.proto = p->proto;
netdev_state_change(t->dev);
+ return 0;
}
static void
@@ -1680,7 +1691,7 @@ ip6_tnl_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
} else
t = netdev_priv(dev);
if (dev == ip6n->fb_tnl_dev)
- ip6_tnl0_update(t, &p1);
+ ip6_tnl0_update(t, &p1, false);
else
ip6_tnl_update(t, &p1);
}
@@ -2053,8 +2064,28 @@ static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[],
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
struct ip_tunnel_encap ipencap;
- if (dev == ip6n->fb_tnl_dev)
- return -EINVAL;
+ if (dev == ip6n->fb_tnl_dev) {
+ if (ip_tunnel_netlink_encap_parms(data, &ipencap)) {
+ /* iproute2 always sets TUNNEL_ENCAP_FLAG_CSUM6, so
+ * let's ignore this flag.
+ */
+ ipencap.flags &= ~TUNNEL_ENCAP_FLAG_CSUM6;
+ if (memchr_inv(&ipencap, 0, sizeof(ipencap))) {
+ NL_SET_ERR_MSG(extack,
+ "Only protocol can be changed for fallback tunnel, not encap params");
+ return -EINVAL;
+ }
+ }
+
+ ip6_tnl_netlink_parms(data, &p);
+ if (ip6_tnl0_update(t, &p, true) < 0) {
+ NL_SET_ERR_MSG(extack,
+ "Only protocol can be changed for fallback tunnel");
+ return -EINVAL;
+ }
+
+ return 0;
+ }
if (ip_tunnel_netlink_encap_parms(data, &ipencap)) {
int err = ip6_tnl_encap_setup(t, &ipencap);
diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
index c99053189ea8..0ff547a4bff7 100644
--- a/net/ipv6/ip6_udp_tunnel.c
+++ b/net/ipv6/ip6_udp_tunnel.c
@@ -74,13 +74,14 @@ error:
}
EXPORT_SYMBOL_GPL(udp_sock_create6);
-int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
- struct sk_buff *skb,
- struct net_device *dev,
- const struct in6_addr *saddr,
- const struct in6_addr *daddr,
- __u8 prio, __u8 ttl, __be32 label,
- __be16 src_port, __be16 dst_port, bool nocheck)
+void udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb,
+ struct net_device *dev,
+ const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
+ __u8 prio, __u8 ttl, __be32 label,
+ __be16 src_port, __be16 dst_port, bool nocheck,
+ u16 ip6cb_flags)
{
struct udphdr *uh;
struct ipv6hdr *ip6h;
@@ -108,8 +109,7 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
ip6h->daddr = *daddr;
ip6h->saddr = *saddr;
- ip6tunnel_xmit(sk, skb, dev);
- return 0;
+ ip6tunnel_xmit(sk, skb, dev, ip6cb_flags);
}
EXPORT_SYMBOL_GPL(udp_tunnel6_xmit_skb);
@@ -168,7 +168,7 @@ struct dst_entry *udp_tunnel6_dst_lookup(struct sk_buff *skb,
netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr);
return ERR_PTR(-ENETUNREACH);
}
- if (dst->dev == dev) { /* is this necessary? */
+ if (dst_dev(dst) == dev) { /* is this necessary? */
netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr);
dst_release(dst);
return ERR_PTR(-ELOOP);
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 40464a88bca6..ad5290be4dd6 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -497,7 +497,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
(const struct in6_addr *)&x->id.daddr))
goto tx_err_link_failure;
- tdev = dst->dev;
+ tdev = dst_dev(dst);
if (tdev == dev) {
DEV_STATS_INC(dev, collisions);
@@ -529,7 +529,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
xmit:
skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev)));
skb_dst_set(skb, dst);
- skb->dev = skb_dst(skb)->dev;
+ skb->dev = dst_dev(dst);
err = dst_output(t->net, skb->sk, skb);
if (net_xmit_eval(err) == 0)
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 9db31e5b998c..e047a4680ab0 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -2035,8 +2035,8 @@ static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct
* Processing handlers for ip6mr_forward
*/
-static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
- struct sk_buff *skb, int vifi)
+static int ip6mr_prepare_xmit(struct net *net, struct mr_table *mrt,
+ struct sk_buff *skb, int vifi)
{
struct vif_device *vif = &mrt->vif_table[vifi];
struct net_device *vif_dev;
@@ -2046,7 +2046,7 @@ static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
vif_dev = vif_dev_read(vif);
if (!vif_dev)
- goto out_free;
+ return -1;
#ifdef CONFIG_IPV6_PIMSM_V2
if (vif->flags & MIFF_REGISTER) {
@@ -2055,7 +2055,7 @@ static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
DEV_STATS_ADD(vif_dev, tx_bytes, skb->len);
DEV_STATS_INC(vif_dev, tx_packets);
ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
- goto out_free;
+ return -1;
}
#endif
@@ -2069,7 +2069,7 @@ static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
dst = ip6_route_output(net, NULL, &fl6);
if (dst->error) {
dst_release(dst);
- goto out_free;
+ return -1;
}
skb_dst_drop(skb);
@@ -2093,20 +2093,43 @@ static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
/* We are about to write */
/* XXX: extension headers? */
if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(vif_dev)))
- goto out_free;
+ return -1;
ipv6h = ipv6_hdr(skb);
ipv6h->hop_limit--;
+ return 0;
+}
+
+static void ip6mr_forward2(struct net *net, struct mr_table *mrt,
+ struct sk_buff *skb, int vifi)
+{
+ struct net_device *indev = skb->dev;
+
+ if (ip6mr_prepare_xmit(net, mrt, skb, vifi))
+ goto out_free;
IP6CB(skb)->flags |= IP6SKB_FORWARDED;
- return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
- net, NULL, skb, skb->dev, vif_dev,
- ip6mr_forward2_finish);
+ NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
+ net, NULL, skb, indev, skb->dev,
+ ip6mr_forward2_finish);
+ return;
+
+out_free:
+ kfree_skb(skb);
+}
+
+static void ip6mr_output2(struct net *net, struct mr_table *mrt,
+ struct sk_buff *skb, int vifi)
+{
+ if (ip6mr_prepare_xmit(net, mrt, skb, vifi))
+ goto out_free;
+
+ ip6_output(net, NULL, skb);
+ return;
out_free:
kfree_skb(skb);
- return 0;
}
/* Called with rcu_read_lock() */
@@ -2221,6 +2244,56 @@ dont_forward:
kfree_skb(skb);
}
+/* Called under rcu_read_lock() */
+static void ip6_mr_output_finish(struct net *net, struct mr_table *mrt,
+ struct net_device *dev, struct sk_buff *skb,
+ struct mfc6_cache *c)
+{
+ int psend = -1;
+ int ct;
+
+ WARN_ON_ONCE(!rcu_read_lock_held());
+
+ atomic_long_inc(&c->_c.mfc_un.res.pkt);
+ atomic_long_add(skb->len, &c->_c.mfc_un.res.bytes);
+ WRITE_ONCE(c->_c.mfc_un.res.lastuse, jiffies);
+
+ /* Forward the frame */
+ if (ipv6_addr_any(&c->mf6c_origin) &&
+ ipv6_addr_any(&c->mf6c_mcastgrp)) {
+ if (ipv6_hdr(skb)->hop_limit >
+ c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
+ /* It's an (*,*) entry and the packet is not coming from
+ * the upstream: forward the packet to the upstream
+ * only.
+ */
+ psend = c->_c.mfc_parent;
+ goto last_forward;
+ }
+ goto dont_forward;
+ }
+ for (ct = c->_c.mfc_un.res.maxvif - 1;
+ ct >= c->_c.mfc_un.res.minvif; ct--) {
+ if (ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
+ if (psend != -1) {
+ struct sk_buff *skb2;
+
+ skb2 = skb_clone(skb, GFP_ATOMIC);
+ if (skb2)
+ ip6mr_output2(net, mrt, skb2, psend);
+ }
+ psend = ct;
+ }
+ }
+last_forward:
+ if (psend != -1) {
+ ip6mr_output2(net, mrt, skb, psend);
+ return;
+ }
+
+dont_forward:
+ kfree_skb(skb);
+}
/*
* Multicast packets for forwarding arrive here
@@ -2228,21 +2301,20 @@ dont_forward:
int ip6_mr_input(struct sk_buff *skb)
{
+ struct net_device *dev = skb->dev;
+ struct net *net = dev_net_rcu(dev);
struct mfc6_cache *cache;
- struct net *net = dev_net(skb->dev);
struct mr_table *mrt;
struct flowi6 fl6 = {
- .flowi6_iif = skb->dev->ifindex,
+ .flowi6_iif = dev->ifindex,
.flowi6_mark = skb->mark,
};
int err;
- struct net_device *dev;
/* skb->dev passed in is the master dev for vrfs.
* Get the proper interface that does have a vif associated with it.
*/
- dev = skb->dev;
- if (netif_is_l3_master(skb->dev)) {
+ if (netif_is_l3_master(dev)) {
dev = dev_get_by_index_rcu(net, IPCB(skb)->iif);
if (!dev) {
kfree_skb(skb);
@@ -2288,6 +2360,61 @@ int ip6_mr_input(struct sk_buff *skb)
return 0;
}
+int ip6_mr_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+ struct net_device *dev = skb_dst(skb)->dev;
+ struct flowi6 fl6 = (struct flowi6) {
+ .flowi6_iif = LOOPBACK_IFINDEX,
+ .flowi6_mark = skb->mark,
+ };
+ struct mfc6_cache *cache;
+ struct mr_table *mrt;
+ int err;
+ int vif;
+
+ guard(rcu)();
+
+ if (IP6CB(skb)->flags & IP6SKB_FORWARDED)
+ goto ip6_output;
+ if (!(IP6CB(skb)->flags & IP6SKB_MCROUTE))
+ goto ip6_output;
+
+ err = ip6mr_fib_lookup(net, &fl6, &mrt);
+ if (err < 0) {
+ kfree_skb(skb);
+ return err;
+ }
+
+ cache = ip6mr_cache_find(mrt,
+ &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
+ if (!cache) {
+ vif = ip6mr_find_vif(mrt, dev);
+ if (vif >= 0)
+ cache = ip6mr_cache_find_any(mrt,
+ &ipv6_hdr(skb)->daddr,
+ vif);
+ }
+
+ /* No usable cache entry */
+ if (!cache) {
+ vif = ip6mr_find_vif(mrt, dev);
+ if (vif >= 0)
+ return ip6mr_cache_unresolved(mrt, vif, skb, dev);
+ goto ip6_output;
+ }
+
+ /* Wrong interface */
+ vif = cache->_c.mfc_parent;
+ if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev)
+ goto ip6_output;
+
+ ip6_mr_output_finish(net, mrt, dev, skb, cache);
+ return 0;
+
+ip6_output:
+ return ip6_output(net, sk, skb);
+}
+
int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
u32 portid)
{
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 72d4858dec18..8607569de34f 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -71,6 +71,7 @@ static int ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
return 0;
}
+static struct lock_class_key xfrm_state_lock_key;
static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
{
struct net *net = xs_net(x);
@@ -79,6 +80,7 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
t = xfrm_state_alloc(net);
if (!t)
goto out;
+ lockdep_set_class(&t->lock, &xfrm_state_lock_key);
t->id.proto = IPPROTO_IPV6;
t->id.spi = xfrm6_tunnel_alloc_spi(net, (xfrm_address_t *)&x->props.saddr);
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 1e225e6489ea..e66ec623972e 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -117,26 +117,6 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk,
return opt;
}
-static bool setsockopt_needs_rtnl(int optname)
-{
- switch (optname) {
- case IPV6_ADDRFORM:
- case IPV6_ADD_MEMBERSHIP:
- case IPV6_DROP_MEMBERSHIP:
- case IPV6_JOIN_ANYCAST:
- case IPV6_LEAVE_ANYCAST:
- case MCAST_JOIN_GROUP:
- case MCAST_LEAVE_GROUP:
- case MCAST_JOIN_SOURCE_GROUP:
- case MCAST_LEAVE_SOURCE_GROUP:
- case MCAST_BLOCK_SOURCE:
- case MCAST_UNBLOCK_SOURCE:
- case MCAST_MSFILTER:
- return true;
- }
- return false;
-}
-
static int copy_group_source_from_sockptr(struct group_source_req *greqs,
sockptr_t optval, int optlen)
{
@@ -395,9 +375,8 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
{
struct ipv6_pinfo *np = inet6_sk(sk);
struct net *net = sock_net(sk);
- int val, valbool;
int retv = -ENOPROTOOPT;
- bool needs_rtnl = setsockopt_needs_rtnl(optname);
+ int val, valbool;
if (sockptr_is_null(optval))
val = 0;
@@ -562,8 +541,7 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
return 0;
}
}
- if (needs_rtnl)
- rtnl_lock();
+
sockopt_lock_sock(sk);
/* Another thread has converted the socket into IPv4 with
@@ -969,8 +947,6 @@ done:
unlock:
sockopt_release_sock(sk);
- if (needs_rtnl)
- rtnl_unlock();
return retv;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 65831b4fee1f..36ca27496b3c 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -108,9 +108,9 @@ static int __ipv6_dev_mc_inc(struct net_device *dev,
int sysctl_mld_max_msf __read_mostly = IPV6_MLD_MAX_MSF;
int sysctl_mld_qrv __read_mostly = MLD_QRV_DEFAULT;
-/*
- * socket join on multicast group
- */
+#define mc_assert_locked(idev) \
+ lockdep_assert_held(&(idev)->mc_lock)
+
#define mc_dereference(e, idev) \
rcu_dereference_protected(e, lockdep_is_held(&(idev)->mc_lock))
@@ -169,17 +169,18 @@ static int unsolicited_report_interval(struct inet6_dev *idev)
return iv > 0 ? iv : 1;
}
+/*
+ * socket join on multicast group
+ */
static int __ipv6_sock_mc_join(struct sock *sk, int ifindex,
const struct in6_addr *addr, unsigned int mode)
{
- struct net_device *dev = NULL;
- struct ipv6_mc_socklist *mc_lst;
struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6_mc_socklist *mc_lst;
struct net *net = sock_net(sk);
+ struct net_device *dev = NULL;
int err;
- ASSERT_RTNL();
-
if (!ipv6_addr_is_multicast(addr))
return -EINVAL;
@@ -199,13 +200,18 @@ static int __ipv6_sock_mc_join(struct sock *sk, int ifindex,
if (ifindex == 0) {
struct rt6_info *rt;
+
+ rcu_read_lock();
rt = rt6_lookup(net, addr, NULL, 0, NULL, 0);
if (rt) {
- dev = rt->dst.dev;
+ dev = dst_dev(&rt->dst);
+ dev_hold(dev);
ip6_rt_put(rt);
}
- } else
- dev = __dev_get_by_index(net, ifindex);
+ rcu_read_unlock();
+ } else {
+ dev = dev_get_by_index(net, ifindex);
+ }
if (!dev) {
sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
@@ -216,12 +222,11 @@ static int __ipv6_sock_mc_join(struct sock *sk, int ifindex,
mc_lst->sfmode = mode;
RCU_INIT_POINTER(mc_lst->sflist, NULL);
- /*
- * now add/increase the group membership on the device
- */
-
+ /* now add/increase the group membership on the device */
err = __ipv6_dev_mc_inc(dev, addr, mode);
+ dev_put(dev);
+
if (err) {
sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
return err;
@@ -248,14 +253,36 @@ int ipv6_sock_mc_join_ssm(struct sock *sk, int ifindex,
/*
* socket leave on multicast group
*/
+static void __ipv6_sock_mc_drop(struct sock *sk, struct ipv6_mc_socklist *mc_lst)
+{
+ struct net *net = sock_net(sk);
+ struct net_device *dev;
+
+ dev = dev_get_by_index(net, mc_lst->ifindex);
+ if (dev) {
+ struct inet6_dev *idev = in6_dev_get(dev);
+
+ ip6_mc_leave_src(sk, mc_lst, idev);
+
+ if (idev) {
+ __ipv6_dev_mc_dec(idev, &mc_lst->addr);
+ in6_dev_put(idev);
+ }
+
+ dev_put(dev);
+ } else {
+ ip6_mc_leave_src(sk, mc_lst, NULL);
+ }
+
+ atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc);
+ kfree_rcu(mc_lst, rcu);
+}
+
int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
{
struct ipv6_pinfo *np = inet6_sk(sk);
- struct ipv6_mc_socklist *mc_lst;
struct ipv6_mc_socklist __rcu **lnk;
- struct net *net = sock_net(sk);
-
- ASSERT_RTNL();
+ struct ipv6_mc_socklist *mc_lst;
if (!ipv6_addr_is_multicast(addr))
return -EINVAL;
@@ -265,23 +292,8 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
lnk = &mc_lst->next) {
if ((ifindex == 0 || mc_lst->ifindex == ifindex) &&
ipv6_addr_equal(&mc_lst->addr, addr)) {
- struct net_device *dev;
-
*lnk = mc_lst->next;
-
- dev = __dev_get_by_index(net, mc_lst->ifindex);
- if (dev) {
- struct inet6_dev *idev = __in6_dev_get(dev);
-
- ip6_mc_leave_src(sk, mc_lst, idev);
- if (idev)
- __ipv6_dev_mc_dec(idev, &mc_lst->addr);
- } else {
- ip6_mc_leave_src(sk, mc_lst, NULL);
- }
-
- atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc);
- kfree_rcu(mc_lst, rcu);
+ __ipv6_sock_mc_drop(sk, mc_lst);
return 0;
}
}
@@ -290,31 +302,33 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
}
EXPORT_SYMBOL(ipv6_sock_mc_drop);
-static struct inet6_dev *ip6_mc_find_dev_rtnl(struct net *net,
- const struct in6_addr *group,
- int ifindex)
+static struct inet6_dev *ip6_mc_find_dev(struct net *net,
+ const struct in6_addr *group,
+ int ifindex)
{
struct net_device *dev = NULL;
- struct inet6_dev *idev = NULL;
+ struct inet6_dev *idev;
if (ifindex == 0) {
- struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, NULL, 0);
+ struct rt6_info *rt;
+ rcu_read_lock();
+ rt = rt6_lookup(net, group, NULL, 0, NULL, 0);
if (rt) {
- dev = rt->dst.dev;
+ dev = dst_dev(&rt->dst);
+ dev_hold(dev);
ip6_rt_put(rt);
}
+ rcu_read_unlock();
} else {
- dev = __dev_get_by_index(net, ifindex);
+ dev = dev_get_by_index(net, ifindex);
}
-
if (!dev)
return NULL;
- idev = __in6_dev_get(dev);
- if (!idev)
- return NULL;
- if (idev->dead)
- return NULL;
+
+ idev = in6_dev_get(dev);
+ dev_put(dev);
+
return idev;
}
@@ -322,28 +336,10 @@ void __ipv6_sock_mc_close(struct sock *sk)
{
struct ipv6_pinfo *np = inet6_sk(sk);
struct ipv6_mc_socklist *mc_lst;
- struct net *net = sock_net(sk);
-
- ASSERT_RTNL();
while ((mc_lst = sock_dereference(np->ipv6_mc_list, sk)) != NULL) {
- struct net_device *dev;
-
np->ipv6_mc_list = mc_lst->next;
-
- dev = __dev_get_by_index(net, mc_lst->ifindex);
- if (dev) {
- struct inet6_dev *idev = __in6_dev_get(dev);
-
- ip6_mc_leave_src(sk, mc_lst, idev);
- if (idev)
- __ipv6_dev_mc_dec(idev, &mc_lst->addr);
- } else {
- ip6_mc_leave_src(sk, mc_lst, NULL);
- }
-
- atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc);
- kfree_rcu(mc_lst, rcu);
+ __ipv6_sock_mc_drop(sk, mc_lst);
}
}
@@ -354,24 +350,22 @@ void ipv6_sock_mc_close(struct sock *sk)
if (!rcu_access_pointer(np->ipv6_mc_list))
return;
- rtnl_lock();
lock_sock(sk);
__ipv6_sock_mc_close(sk);
release_sock(sk);
- rtnl_unlock();
}
int ip6_mc_source(int add, int omode, struct sock *sk,
- struct group_source_req *pgsr)
+ struct group_source_req *pgsr)
{
+ struct ipv6_pinfo *inet6 = inet6_sk(sk);
struct in6_addr *source, *group;
+ struct net *net = sock_net(sk);
struct ipv6_mc_socklist *pmc;
- struct inet6_dev *idev;
- struct ipv6_pinfo *inet6 = inet6_sk(sk);
struct ip6_sf_socklist *psl;
- struct net *net = sock_net(sk);
- int i, j, rv;
+ struct inet6_dev *idev;
int leavegroup = 0;
+ int i, j, rv;
int err;
source = &((struct sockaddr_in6 *)&pgsr->gsr_source)->sin6_addr;
@@ -380,13 +374,19 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
if (!ipv6_addr_is_multicast(group))
return -EINVAL;
- idev = ip6_mc_find_dev_rtnl(net, group, pgsr->gsr_interface);
+ idev = ip6_mc_find_dev(net, group, pgsr->gsr_interface);
if (!idev)
return -ENODEV;
+ mutex_lock(&idev->mc_lock);
+
+ if (idev->dead) {
+ err = -ENODEV;
+ goto done;
+ }
+
err = -EADDRNOTAVAIL;
- mutex_lock(&idev->mc_lock);
for_each_pmc_socklock(inet6, sk, pmc) {
if (pgsr->gsr_interface && pmc->ifindex != pgsr->gsr_interface)
continue;
@@ -483,6 +483,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
ip6_mc_add_src(idev, group, omode, 1, source, 1);
done:
mutex_unlock(&idev->mc_lock);
+ in6_dev_put(idev);
if (leavegroup)
err = ipv6_sock_mc_drop(sk, pgsr->gsr_interface, group);
return err;
@@ -491,12 +492,12 @@ done:
int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf,
struct sockaddr_storage *list)
{
- const struct in6_addr *group;
- struct ipv6_mc_socklist *pmc;
- struct inet6_dev *idev;
struct ipv6_pinfo *inet6 = inet6_sk(sk);
struct ip6_sf_socklist *newpsl, *psl;
struct net *net = sock_net(sk);
+ const struct in6_addr *group;
+ struct ipv6_mc_socklist *pmc;
+ struct inet6_dev *idev;
int leavegroup = 0;
int i, err;
@@ -508,10 +509,17 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf,
gsf->gf_fmode != MCAST_EXCLUDE)
return -EINVAL;
- idev = ip6_mc_find_dev_rtnl(net, group, gsf->gf_interface);
+ idev = ip6_mc_find_dev(net, group, gsf->gf_interface);
if (!idev)
return -ENODEV;
+ mutex_lock(&idev->mc_lock);
+
+ if (idev->dead) {
+ err = -ENODEV;
+ goto done;
+ }
+
err = 0;
if (gsf->gf_fmode == MCAST_INCLUDE && gsf->gf_numsrc == 0) {
@@ -544,24 +552,19 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf,
psin6 = (struct sockaddr_in6 *)list;
newpsl->sl_addr[i] = psin6->sin6_addr;
}
- mutex_lock(&idev->mc_lock);
+
err = ip6_mc_add_src(idev, group, gsf->gf_fmode,
newpsl->sl_count, newpsl->sl_addr, 0);
if (err) {
- mutex_unlock(&idev->mc_lock);
sock_kfree_s(sk, newpsl, struct_size(newpsl, sl_addr,
newpsl->sl_max));
goto done;
}
- mutex_unlock(&idev->mc_lock);
} else {
newpsl = NULL;
- mutex_lock(&idev->mc_lock);
ip6_mc_add_src(idev, group, gsf->gf_fmode, 0, NULL, 0);
- mutex_unlock(&idev->mc_lock);
}
- mutex_lock(&idev->mc_lock);
psl = sock_dereference(pmc->sflist, sk);
if (psl) {
ip6_mc_del_src(idev, group, pmc->sfmode,
@@ -571,12 +574,14 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf,
} else {
ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0);
}
+
rcu_assign_pointer(pmc->sflist, newpsl);
- mutex_unlock(&idev->mc_lock);
kfree_rcu(psl, rcu);
pmc->sfmode = gsf->gf_fmode;
err = 0;
done:
+ mutex_unlock(&idev->mc_lock);
+ in6_dev_put(idev);
if (leavegroup)
err = ipv6_sock_mc_drop(sk, gsf->gf_interface, group);
return err;
@@ -597,10 +602,6 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
if (!ipv6_addr_is_multicast(group))
return -EINVAL;
- /* changes to the ipv6_mc_list require the socket lock and
- * rtnl lock. We have the socket lock, so reading the list is safe.
- */
-
for_each_pmc_socklock(inet6, sk, pmc) {
if (pmc->ifindex != gsf->gf_interface)
continue;
@@ -668,12 +669,13 @@ bool inet6_mc_check(const struct sock *sk, const struct in6_addr *mc_addr,
return rv;
}
-/* called with mc_lock */
static void igmp6_group_added(struct ifmcaddr6 *mc)
{
struct net_device *dev = mc->idev->dev;
char buf[MAX_ADDR_LEN];
+ mc_assert_locked(mc->idev);
+
if (IPV6_ADDR_MC_SCOPE(&mc->mca_addr) <
IPV6_ADDR_SCOPE_LINKLOCAL)
return;
@@ -703,12 +705,13 @@ static void igmp6_group_added(struct ifmcaddr6 *mc)
mld_ifc_event(mc->idev);
}
-/* called with mc_lock */
static void igmp6_group_dropped(struct ifmcaddr6 *mc)
{
struct net_device *dev = mc->idev->dev;
char buf[MAX_ADDR_LEN];
+ mc_assert_locked(mc->idev);
+
if (IPV6_ADDR_MC_SCOPE(&mc->mca_addr) <
IPV6_ADDR_SCOPE_LINKLOCAL)
return;
@@ -729,14 +732,13 @@ static void igmp6_group_dropped(struct ifmcaddr6 *mc)
refcount_dec(&mc->mca_refcnt);
}
-/*
- * deleted ifmcaddr6 manipulation
- * called with mc_lock
- */
+/* deleted ifmcaddr6 manipulation */
static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
{
struct ifmcaddr6 *pmc;
+ mc_assert_locked(idev);
+
/* this is an "ifmcaddr6" for convenience; only the fields below
* are actually used. In particular, the refcnt and users are not
* used for management of the delete list. Using the same structure
@@ -770,54 +772,54 @@ static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
rcu_assign_pointer(idev->mc_tomb, pmc);
}
-/* called with mc_lock */
static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
{
struct ip6_sf_list *psf, *sources, *tomb;
struct in6_addr *pmca = &im->mca_addr;
struct ifmcaddr6 *pmc, *pmc_prev;
+ mc_assert_locked(idev);
+
pmc_prev = NULL;
for_each_mc_tomb(idev, pmc) {
if (ipv6_addr_equal(&pmc->mca_addr, pmca))
break;
pmc_prev = pmc;
}
- if (pmc) {
- if (pmc_prev)
- rcu_assign_pointer(pmc_prev->next, pmc->next);
- else
- rcu_assign_pointer(idev->mc_tomb, pmc->next);
- }
-
- if (pmc) {
- im->idev = pmc->idev;
- if (im->mca_sfmode == MCAST_INCLUDE) {
- tomb = rcu_replace_pointer(im->mca_tomb,
- mc_dereference(pmc->mca_tomb, pmc->idev),
- lockdep_is_held(&im->idev->mc_lock));
- rcu_assign_pointer(pmc->mca_tomb, tomb);
-
- sources = rcu_replace_pointer(im->mca_sources,
- mc_dereference(pmc->mca_sources, pmc->idev),
- lockdep_is_held(&im->idev->mc_lock));
- rcu_assign_pointer(pmc->mca_sources, sources);
- for_each_psf_mclock(im, psf)
- psf->sf_crcount = idev->mc_qrv;
- } else {
- im->mca_crcount = idev->mc_qrv;
- }
- in6_dev_put(pmc->idev);
- ip6_mc_clear_src(pmc);
- kfree_rcu(pmc, rcu);
+ if (!pmc)
+ return;
+ if (pmc_prev)
+ rcu_assign_pointer(pmc_prev->next, pmc->next);
+ else
+ rcu_assign_pointer(idev->mc_tomb, pmc->next);
+
+ im->idev = pmc->idev;
+ if (im->mca_sfmode == MCAST_INCLUDE) {
+ tomb = rcu_replace_pointer(im->mca_tomb,
+ mc_dereference(pmc->mca_tomb, pmc->idev),
+ lockdep_is_held(&im->idev->mc_lock));
+ rcu_assign_pointer(pmc->mca_tomb, tomb);
+
+ sources = rcu_replace_pointer(im->mca_sources,
+ mc_dereference(pmc->mca_sources, pmc->idev),
+ lockdep_is_held(&im->idev->mc_lock));
+ rcu_assign_pointer(pmc->mca_sources, sources);
+ for_each_psf_mclock(im, psf)
+ psf->sf_crcount = idev->mc_qrv;
+ } else {
+ im->mca_crcount = idev->mc_qrv;
}
+ ip6_mc_clear_src(pmc);
+ in6_dev_put(pmc->idev);
+ kfree_rcu(pmc, rcu);
}
-/* called with mc_lock */
static void mld_clear_delrec(struct inet6_dev *idev)
{
struct ifmcaddr6 *pmc, *nextpmc;
+ mc_assert_locked(idev);
+
pmc = mc_dereference(idev->mc_tomb, idev);
RCU_INIT_POINTER(idev->mc_tomb, NULL);
@@ -843,29 +845,18 @@ static void mld_clear_delrec(struct inet6_dev *idev)
static void mld_clear_query(struct inet6_dev *idev)
{
- struct sk_buff *skb;
-
spin_lock_bh(&idev->mc_query_lock);
- while ((skb = __skb_dequeue(&idev->mc_query_queue)))
- kfree_skb(skb);
+ __skb_queue_purge(&idev->mc_query_queue);
spin_unlock_bh(&idev->mc_query_lock);
}
static void mld_clear_report(struct inet6_dev *idev)
{
- struct sk_buff *skb;
-
spin_lock_bh(&idev->mc_report_lock);
- while ((skb = __skb_dequeue(&idev->mc_report_queue)))
- kfree_skb(skb);
+ __skb_queue_purge(&idev->mc_report_queue);
spin_unlock_bh(&idev->mc_report_lock);
}
-static void mca_get(struct ifmcaddr6 *mc)
-{
- refcount_inc(&mc->mca_refcnt);
-}
-
static void ma_put(struct ifmcaddr6 *mc)
{
if (refcount_dec_and_test(&mc->mca_refcnt)) {
@@ -874,13 +865,14 @@ static void ma_put(struct ifmcaddr6 *mc)
}
}
-/* called with mc_lock */
static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev,
const struct in6_addr *addr,
unsigned int mode)
{
struct ifmcaddr6 *mc;
+ mc_assert_locked(idev);
+
mc = kzalloc(sizeof(*mc), GFP_KERNEL);
if (!mc)
return NULL;
@@ -945,23 +937,22 @@ error:
static int __ipv6_dev_mc_inc(struct net_device *dev,
const struct in6_addr *addr, unsigned int mode)
{
- struct ifmcaddr6 *mc;
struct inet6_dev *idev;
-
- ASSERT_RTNL();
+ struct ifmcaddr6 *mc;
/* we need to take a reference on idev */
idev = in6_dev_get(dev);
-
if (!idev)
return -EINVAL;
- if (idev->dead) {
+ mutex_lock(&idev->mc_lock);
+
+ if (READ_ONCE(idev->dead)) {
+ mutex_unlock(&idev->mc_lock);
in6_dev_put(idev);
return -ENODEV;
}
- mutex_lock(&idev->mc_lock);
for_each_mc_mclock(idev, mc) {
if (ipv6_addr_equal(&mc->mca_addr, addr)) {
mc->mca_users++;
@@ -982,13 +973,11 @@ static int __ipv6_dev_mc_inc(struct net_device *dev,
rcu_assign_pointer(mc->next, idev->mc_list);
rcu_assign_pointer(idev->mc_list, mc);
- mca_get(mc);
-
mld_del_delrec(idev, mc);
igmp6_group_added(mc);
inet6_ifmcaddr_notify(dev, mc, RTM_NEWMULTICAST);
mutex_unlock(&idev->mc_lock);
- ma_put(mc);
+
return 0;
}
@@ -1005,9 +994,8 @@ int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr)
{
struct ifmcaddr6 *ma, __rcu **map;
- ASSERT_RTNL();
-
mutex_lock(&idev->mc_lock);
+
for (map = &idev->mc_list;
(ma = mc_dereference(*map, idev));
map = &ma->next) {
@@ -1038,13 +1026,12 @@ int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr)
struct inet6_dev *idev;
int err;
- ASSERT_RTNL();
-
- idev = __in6_dev_get(dev);
+ idev = in6_dev_get(dev);
if (!idev)
- err = -ENODEV;
- else
- err = __ipv6_dev_mc_dec(idev, addr);
+ return -ENODEV;
+
+ err = __ipv6_dev_mc_dec(idev, addr);
+ in6_dev_put(idev);
return err;
}
@@ -1091,46 +1078,51 @@ unlock:
return rv;
}
-/* called with mc_lock */
static void mld_gq_start_work(struct inet6_dev *idev)
{
unsigned long tv = get_random_u32_below(idev->mc_maxdelay);
+ mc_assert_locked(idev);
+
idev->mc_gq_running = 1;
if (!mod_delayed_work(mld_wq, &idev->mc_gq_work, tv + 2))
in6_dev_hold(idev);
}
-/* called with mc_lock */
static void mld_gq_stop_work(struct inet6_dev *idev)
{
+ mc_assert_locked(idev);
+
idev->mc_gq_running = 0;
if (cancel_delayed_work(&idev->mc_gq_work))
__in6_dev_put(idev);
}
-/* called with mc_lock */
static void mld_ifc_start_work(struct inet6_dev *idev, unsigned long delay)
{
unsigned long tv = get_random_u32_below(delay);
+ mc_assert_locked(idev);
+
if (!mod_delayed_work(mld_wq, &idev->mc_ifc_work, tv + 2))
in6_dev_hold(idev);
}
-/* called with mc_lock */
static void mld_ifc_stop_work(struct inet6_dev *idev)
{
+ mc_assert_locked(idev);
+
idev->mc_ifc_count = 0;
if (cancel_delayed_work(&idev->mc_ifc_work))
__in6_dev_put(idev);
}
-/* called with mc_lock */
static void mld_dad_start_work(struct inet6_dev *idev, unsigned long delay)
{
unsigned long tv = get_random_u32_below(delay);
+ mc_assert_locked(idev);
+
if (!mod_delayed_work(mld_wq, &idev->mc_dad_work, tv + 2))
in6_dev_hold(idev);
}
@@ -1155,14 +1147,13 @@ static void mld_report_stop_work(struct inet6_dev *idev)
__in6_dev_put(idev);
}
-/*
- * IGMP handling (alias multicast ICMPv6 messages)
- * called with mc_lock
- */
+/* IGMP handling (alias multicast ICMPv6 messages) */
static void igmp6_group_queried(struct ifmcaddr6 *ma, unsigned long resptime)
{
unsigned long delay = resptime;
+ mc_assert_locked(ma->idev);
+
/* Do not start work for these addresses */
if (ipv6_addr_is_ll_all_nodes(&ma->mca_addr) ||
IPV6_ADDR_MC_SCOPE(&ma->mca_addr) < IPV6_ADDR_SCOPE_LINKLOCAL)
@@ -1181,15 +1172,15 @@ static void igmp6_group_queried(struct ifmcaddr6 *ma, unsigned long resptime)
ma->mca_flags |= MAF_TIMER_RUNNING;
}
-/* mark EXCLUDE-mode sources
- * called with mc_lock
- */
+/* mark EXCLUDE-mode sources */
static bool mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs,
const struct in6_addr *srcs)
{
struct ip6_sf_list *psf;
int i, scount;
+ mc_assert_locked(pmc->idev);
+
scount = 0;
for_each_psf_mclock(pmc, psf) {
if (scount == nsrcs)
@@ -1212,13 +1203,14 @@ static bool mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs,
return true;
}
-/* called with mc_lock */
static bool mld_marksources(struct ifmcaddr6 *pmc, int nsrcs,
const struct in6_addr *srcs)
{
struct ip6_sf_list *psf;
int i, scount;
+ mc_assert_locked(pmc->idev);
+
if (pmc->mca_sfmode == MCAST_EXCLUDE)
return mld_xmarksources(pmc, nsrcs, srcs);
@@ -1913,7 +1905,6 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc,
#define AVAILABLE(skb) ((skb) ? skb_availroom(skb) : 0)
-/* called with mc_lock */
static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
int type, int gdeleted, int sdeleted,
int crsend)
@@ -1927,6 +1918,8 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
struct mld2_report *pmr;
unsigned int mtu;
+ mc_assert_locked(idev);
+
if (pmc->mca_flags & MAF_NOREPORT)
return skb;
@@ -2045,12 +2038,13 @@ empty_source:
return skb;
}
-/* called with mc_lock */
static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc)
{
struct sk_buff *skb = NULL;
int type;
+ mc_assert_locked(idev);
+
if (!pmc) {
for_each_mc_mclock(idev, pmc) {
if (pmc->mca_flags & MAF_NOREPORT)
@@ -2072,10 +2066,7 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc)
mld_sendpack(skb);
}
-/*
- * remove zero-count source records from a source filter list
- * called with mc_lock
- */
+/* remove zero-count source records from a source filter list */
static void mld_clear_zeros(struct ip6_sf_list __rcu **ppsf, struct inet6_dev *idev)
{
struct ip6_sf_list *psf_prev, *psf_next, *psf;
@@ -2099,7 +2090,6 @@ static void mld_clear_zeros(struct ip6_sf_list __rcu **ppsf, struct inet6_dev *i
}
}
-/* called with mc_lock */
static void mld_send_cr(struct inet6_dev *idev)
{
struct ifmcaddr6 *pmc, *pmc_prev, *pmc_next;
@@ -2263,13 +2253,14 @@ err_out:
goto out;
}
-/* called with mc_lock */
static void mld_send_initial_cr(struct inet6_dev *idev)
{
- struct sk_buff *skb;
struct ifmcaddr6 *pmc;
+ struct sk_buff *skb;
int type;
+ mc_assert_locked(idev);
+
if (mld_in_v1_mode(idev))
return;
@@ -2316,13 +2307,14 @@ static void mld_dad_work(struct work_struct *work)
in6_dev_put(idev);
}
-/* called with mc_lock */
static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode,
- const struct in6_addr *psfsrc)
+ const struct in6_addr *psfsrc)
{
struct ip6_sf_list *psf, *psf_prev;
int rv = 0;
+ mc_assert_locked(pmc->idev);
+
psf_prev = NULL;
for_each_psf_mclock(pmc, psf) {
if (ipv6_addr_equal(&psf->sf_addr, psfsrc))
@@ -2359,7 +2351,6 @@ static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode,
return rv;
}
-/* called with mc_lock */
static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca,
int sfmode, int sfcount, const struct in6_addr *psfsrc,
int delta)
@@ -2371,6 +2362,8 @@ static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca,
if (!idev)
return -ENODEV;
+ mc_assert_locked(idev);
+
for_each_mc_mclock(idev, pmc) {
if (ipv6_addr_equal(pmca, &pmc->mca_addr))
break;
@@ -2412,15 +2405,14 @@ static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca,
return err;
}
-/*
- * Add multicast single-source filter to the interface list
- * called with mc_lock
- */
+/* Add multicast single-source filter to the interface list */
static int ip6_mc_add1_src(struct ifmcaddr6 *pmc, int sfmode,
- const struct in6_addr *psfsrc)
+ const struct in6_addr *psfsrc)
{
struct ip6_sf_list *psf, *psf_prev;
+ mc_assert_locked(pmc->idev);
+
psf_prev = NULL;
for_each_psf_mclock(pmc, psf) {
if (ipv6_addr_equal(&psf->sf_addr, psfsrc))
@@ -2443,11 +2435,12 @@ static int ip6_mc_add1_src(struct ifmcaddr6 *pmc, int sfmode,
return 0;
}
-/* called with mc_lock */
static void sf_markstate(struct ifmcaddr6 *pmc)
{
- struct ip6_sf_list *psf;
int mca_xcount = pmc->mca_sfcount[MCAST_EXCLUDE];
+ struct ip6_sf_list *psf;
+
+ mc_assert_locked(pmc->idev);
for_each_psf_mclock(pmc, psf) {
if (pmc->mca_sfcount[MCAST_EXCLUDE]) {
@@ -2460,14 +2453,15 @@ static void sf_markstate(struct ifmcaddr6 *pmc)
}
}
-/* called with mc_lock */
static int sf_setstate(struct ifmcaddr6 *pmc)
{
- struct ip6_sf_list *psf, *dpsf;
int mca_xcount = pmc->mca_sfcount[MCAST_EXCLUDE];
+ struct ip6_sf_list *psf, *dpsf;
int qrv = pmc->idev->mc_qrv;
int new_in, rv;
+ mc_assert_locked(pmc->idev);
+
rv = 0;
for_each_psf_mclock(pmc, psf) {
if (pmc->mca_sfcount[MCAST_EXCLUDE]) {
@@ -2526,10 +2520,7 @@ static int sf_setstate(struct ifmcaddr6 *pmc)
return rv;
}
-/*
- * Add multicast source filter list to the interface list
- * called with mc_lock
- */
+/* Add multicast source filter list to the interface list */
static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
int sfmode, int sfcount, const struct in6_addr *psfsrc,
int delta)
@@ -2541,6 +2532,8 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
if (!idev)
return -ENODEV;
+ mc_assert_locked(idev);
+
for_each_mc_mclock(idev, pmc) {
if (ipv6_addr_equal(pmca, &pmc->mca_addr))
break;
@@ -2588,11 +2581,12 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
return err;
}
-/* called with mc_lock */
static void ip6_mc_clear_src(struct ifmcaddr6 *pmc)
{
struct ip6_sf_list *psf, *nextpsf;
+ mc_assert_locked(pmc->idev);
+
for (psf = mc_dereference(pmc->mca_tomb, pmc->idev);
psf;
psf = nextpsf) {
@@ -2613,11 +2607,12 @@ static void ip6_mc_clear_src(struct ifmcaddr6 *pmc)
WRITE_ONCE(pmc->mca_sfcount[MCAST_EXCLUDE], 1);
}
-/* called with mc_lock */
static void igmp6_join_group(struct ifmcaddr6 *ma)
{
unsigned long delay;
+ mc_assert_locked(ma->idev);
+
if (ma->mca_flags & MAF_NOREPORT)
return;
@@ -2664,9 +2659,10 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
return err;
}
-/* called with mc_lock */
static void igmp6_leave_group(struct ifmcaddr6 *ma)
{
+ mc_assert_locked(ma->idev);
+
if (mld_in_v1_mode(ma->idev)) {
if (ma->mca_flags & MAF_LAST_REPORTER) {
igmp6_send(&ma->mca_addr, ma->idev->dev,
@@ -2711,9 +2707,10 @@ static void mld_ifc_work(struct work_struct *work)
in6_dev_put(idev);
}
-/* called with mc_lock */
static void mld_ifc_event(struct inet6_dev *idev)
{
+ mc_assert_locked(idev);
+
if (mld_in_v1_mode(idev))
return;
@@ -2868,8 +2865,6 @@ static void ipv6_mc_rejoin_groups(struct inet6_dev *idev)
{
struct ifmcaddr6 *pmc;
- ASSERT_RTNL();
-
mutex_lock(&idev->mc_lock);
if (mld_in_v1_mode(idev)) {
for_each_mc_mclock(idev, pmc)
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index ecb5c4b8518f..7d5abb3158ec 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -243,9 +243,8 @@ struct ndisc_options *ndisc_parse_options(const struct net_device *dev,
case ND_OPT_NONCE:
case ND_OPT_REDIRECT_HDR:
if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
- ND_PRINTK(2, warn,
- "%s: duplicated ND6 option found: type=%d\n",
- __func__, nd_opt->nd_opt_type);
+ net_dbg_ratelimited("%s: duplicated ND6 option found: type=%d\n",
+ __func__, nd_opt->nd_opt_type);
} else {
ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt;
}
@@ -275,11 +274,8 @@ struct ndisc_options *ndisc_parse_options(const struct net_device *dev,
* to accommodate future extension to the
* protocol.
*/
- ND_PRINTK(2, notice,
- "%s: ignored unsupported option; type=%d, len=%d\n",
- __func__,
- nd_opt->nd_opt_type,
- nd_opt->nd_opt_len);
+ net_dbg_ratelimited("%s: ignored unsupported option; type=%d, len=%d\n",
+ __func__, nd_opt->nd_opt_type, nd_opt->nd_opt_len);
}
next_opt:
opt_len -= l;
@@ -377,24 +373,25 @@ static int ndisc_constructor(struct neighbour *neigh)
static int pndisc_constructor(struct pneigh_entry *n)
{
struct in6_addr *addr = (struct in6_addr *)&n->key;
- struct in6_addr maddr;
struct net_device *dev = n->dev;
+ struct in6_addr maddr;
- if (!dev || !__in6_dev_get(dev))
+ if (!dev)
return -EINVAL;
+
addrconf_addr_solict_mult(addr, &maddr);
- ipv6_dev_mc_inc(dev, &maddr);
- return 0;
+ return ipv6_dev_mc_inc(dev, &maddr);
}
static void pndisc_destructor(struct pneigh_entry *n)
{
struct in6_addr *addr = (struct in6_addr *)&n->key;
- struct in6_addr maddr;
struct net_device *dev = n->dev;
+ struct in6_addr maddr;
- if (!dev || !__in6_dev_get(dev))
+ if (!dev)
return;
+
addrconf_addr_solict_mult(addr, &maddr);
ipv6_dev_mc_dec(dev, &maddr);
}
@@ -473,6 +470,7 @@ void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr,
{
struct icmp6hdr *icmp6h = icmp6_hdr(skb);
struct dst_entry *dst = skb_dst(skb);
+ struct net_device *dev;
struct inet6_dev *idev;
struct net *net;
struct sock *sk;
@@ -507,11 +505,12 @@ void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr,
ip6_nd_hdr(skb, saddr, daddr, READ_ONCE(inet6_sk(sk)->hop_limit), skb->len);
- idev = __in6_dev_get(dst->dev);
+ dev = dst_dev(dst);
+ idev = __in6_dev_get(dev);
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS);
err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
- net, sk, skb, NULL, dst->dev,
+ net, sk, skb, NULL, dev,
dst_output);
if (!err) {
ICMP6MSGOUT_INC_STATS(net, idev, type);
@@ -751,9 +750,8 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES);
if (probes < 0) {
if (!(READ_ONCE(neigh->nud_state) & NUD_VALID)) {
- ND_PRINTK(1, dbg,
- "%s: trying to ucast probe in NUD_INVALID: %pI6\n",
- __func__, target);
+ net_dbg_ratelimited("%s: trying to ucast probe in NUD_INVALID: %pI6\n",
+ __func__, target);
}
ndisc_send_ns(dev, target, target, saddr, 0);
} else if ((probes -= NEIGH_VAR(neigh->parms, APP_PROBES)) < 0) {
@@ -770,11 +768,9 @@ static int pndisc_is_router(const void *pkey,
struct pneigh_entry *n;
int ret = -1;
- read_lock_bh(&nd_tbl.lock);
- n = __pneigh_lookup(&nd_tbl, dev_net(dev), pkey, dev);
+ n = pneigh_lookup(&nd_tbl, dev_net(dev), pkey, dev);
if (n)
- ret = !!(n->flags & NTF_ROUTER);
- read_unlock_bh(&nd_tbl.lock);
+ ret = !!(READ_ONCE(n->flags) & NTF_ROUTER);
return ret;
}
@@ -811,7 +807,7 @@ static enum skb_drop_reason ndisc_recv_ns(struct sk_buff *skb)
return SKB_DROP_REASON_PKT_TOO_SMALL;
if (ipv6_addr_is_multicast(&msg->target)) {
- ND_PRINTK(2, warn, "NS: multicast target address\n");
+ net_dbg_ratelimited("NS: multicast target address\n");
return reason;
}
@@ -820,7 +816,7 @@ static enum skb_drop_reason ndisc_recv_ns(struct sk_buff *skb)
* DAD has to be destined for solicited node multicast address.
*/
if (dad && !ipv6_addr_is_solict_mult(daddr)) {
- ND_PRINTK(2, warn, "NS: bad DAD packet (wrong destination)\n");
+ net_dbg_ratelimited("NS: bad DAD packet (wrong destination)\n");
return reason;
}
@@ -830,8 +826,7 @@ static enum skb_drop_reason ndisc_recv_ns(struct sk_buff *skb)
if (ndopts.nd_opts_src_lladdr) {
lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr, dev);
if (!lladdr) {
- ND_PRINTK(2, warn,
- "NS: invalid link-layer address length\n");
+ net_dbg_ratelimited("NS: invalid link-layer address length\n");
return reason;
}
@@ -841,8 +836,7 @@ static enum skb_drop_reason ndisc_recv_ns(struct sk_buff *skb)
* in the message.
*/
if (dad) {
- ND_PRINTK(2, warn,
- "NS: bad DAD packet (link-layer address option)\n");
+ net_dbg_ratelimited("NS: bad DAD packet (link-layer address option)\n");
return reason;
}
}
@@ -859,10 +853,8 @@ have_ifp:
if (nonce != 0 && ifp->dad_nonce == nonce) {
u8 *np = (u8 *)&nonce;
/* Matching nonce if looped back */
- ND_PRINTK(2, notice,
- "%s: IPv6 DAD loopback for address %pI6c nonce %pM ignored\n",
- ifp->idev->dev->name,
- &ifp->addr, np);
+ net_dbg_ratelimited("%s: IPv6 DAD loopback for address %pI6c nonce %pM ignored\n",
+ ifp->idev->dev->name, &ifp->addr, np);
goto out;
}
/*
@@ -1013,13 +1005,13 @@ static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb)
return SKB_DROP_REASON_PKT_TOO_SMALL;
if (ipv6_addr_is_multicast(&msg->target)) {
- ND_PRINTK(2, warn, "NA: target address is multicast\n");
+ net_dbg_ratelimited("NA: target address is multicast\n");
return reason;
}
if (ipv6_addr_is_multicast(daddr) &&
msg->icmph.icmp6_solicited) {
- ND_PRINTK(2, warn, "NA: solicited NA is multicasted\n");
+ net_dbg_ratelimited("NA: solicited NA is multicasted\n");
return reason;
}
@@ -1038,8 +1030,7 @@ static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb)
if (ndopts.nd_opts_tgt_lladdr) {
lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, dev);
if (!lladdr) {
- ND_PRINTK(2, warn,
- "NA: invalid link-layer address length\n");
+ net_dbg_ratelimited("NA: invalid link-layer address length\n");
return reason;
}
}
@@ -1060,9 +1051,9 @@ static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb)
unsolicited advertisement.
*/
if (skb->pkt_type != PACKET_LOOPBACK)
- ND_PRINTK(1, warn,
- "NA: %pM advertised our address %pI6c on %s!\n",
- eth_hdr(skb)->h_source, &ifp->addr, ifp->idev->dev->name);
+ net_warn_ratelimited("NA: %pM advertised our address %pI6c on %s!\n",
+ eth_hdr(skb)->h_source, &ifp->addr,
+ ifp->idev->dev->name);
in6_ifa_put(ifp);
return reason;
}
@@ -1107,7 +1098,7 @@ static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb)
if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) &&
READ_ONCE(net->ipv6.devconf_all->forwarding) &&
READ_ONCE(net->ipv6.devconf_all->proxy_ndp) &&
- pneigh_lookup(&nd_tbl, net, &msg->target, dev, 0)) {
+ pneigh_lookup(&nd_tbl, net, &msg->target, dev)) {
/* XXX: idev->cnf.proxy_ndp */
goto out;
}
@@ -1149,7 +1140,7 @@ static enum skb_drop_reason ndisc_recv_rs(struct sk_buff *skb)
idev = __in6_dev_get(skb->dev);
if (!idev) {
- ND_PRINTK(1, err, "RS: can't find in6 device\n");
+ net_err_ratelimited("RS: can't find in6 device\n");
return reason;
}
@@ -1257,11 +1248,9 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
optlen = (skb_tail_pointer(skb) - skb_transport_header(skb)) -
sizeof(struct ra_msg);
- ND_PRINTK(2, info,
- "RA: %s, dev: %s\n",
- __func__, skb->dev->name);
+ net_dbg_ratelimited("RA: %s, dev: %s\n", __func__, skb->dev->name);
if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
- ND_PRINTK(2, warn, "RA: source address is not link-local\n");
+ net_dbg_ratelimited("RA: source address is not link-local\n");
return reason;
}
if (optlen < 0)
@@ -1269,15 +1258,14 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
#ifdef CONFIG_IPV6_NDISC_NODETYPE
if (skb->ndisc_nodetype == NDISC_NODETYPE_HOST) {
- ND_PRINTK(2, warn, "RA: from host or unauthorized router\n");
+ net_dbg_ratelimited("RA: from host or unauthorized router\n");
return reason;
}
#endif
in6_dev = __in6_dev_get(skb->dev);
if (!in6_dev) {
- ND_PRINTK(0, err, "RA: can't find inet6 device for %s\n",
- skb->dev->name);
+ net_err_ratelimited("RA: can't find inet6 device for %s\n", skb->dev->name);
return reason;
}
@@ -1285,18 +1273,16 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
return SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS;
if (!ipv6_accept_ra(in6_dev)) {
- ND_PRINTK(2, info,
- "RA: %s, did not accept ra for dev: %s\n",
- __func__, skb->dev->name);
+ net_dbg_ratelimited("RA: %s, did not accept ra for dev: %s\n", __func__,
+ skb->dev->name);
goto skip_linkparms;
}
#ifdef CONFIG_IPV6_NDISC_NODETYPE
/* skip link-specific parameters from interior routers */
if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) {
- ND_PRINTK(2, info,
- "RA: %s, nodetype is NODEFAULT, dev: %s\n",
- __func__, skb->dev->name);
+ net_dbg_ratelimited("RA: %s, nodetype is NODEFAULT, dev: %s\n", __func__,
+ skb->dev->name);
goto skip_linkparms;
}
#endif
@@ -1325,18 +1311,16 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
send_ifinfo_notify = true;
if (!READ_ONCE(in6_dev->cnf.accept_ra_defrtr)) {
- ND_PRINTK(2, info,
- "RA: %s, defrtr is false for dev: %s\n",
- __func__, skb->dev->name);
+ net_dbg_ratelimited("RA: %s, defrtr is false for dev: %s\n", __func__,
+ skb->dev->name);
goto skip_defrtr;
}
lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime);
if (lifetime != 0 &&
lifetime < READ_ONCE(in6_dev->cnf.accept_ra_min_lft)) {
- ND_PRINTK(2, info,
- "RA: router lifetime (%ds) is too short: %s\n",
- lifetime, skb->dev->name);
+ net_dbg_ratelimited("RA: router lifetime (%ds) is too short: %s\n", lifetime,
+ skb->dev->name);
goto skip_defrtr;
}
@@ -1346,9 +1330,8 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
net = dev_net(in6_dev->dev);
if (!READ_ONCE(in6_dev->cnf.accept_ra_from_local) &&
ipv6_chk_addr(net, &ipv6_hdr(skb)->saddr, in6_dev->dev, 0)) {
- ND_PRINTK(2, info,
- "RA from local address detected on dev: %s: default router ignored\n",
- skb->dev->name);
+ net_dbg_ratelimited("RA from local address detected on dev: %s: default router ignored\n",
+ skb->dev->name);
goto skip_defrtr;
}
@@ -1366,9 +1349,8 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
rt->fib6_nh->fib_nh_dev, NULL,
&ipv6_hdr(skb)->saddr);
if (!neigh) {
- ND_PRINTK(0, err,
- "RA: %s got default router without neighbour\n",
- __func__);
+ net_err_ratelimited("RA: %s got default router without neighbour\n",
+ __func__);
fib6_info_release(rt);
return reason;
}
@@ -1381,10 +1363,10 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
rt = NULL;
}
- ND_PRINTK(3, info, "RA: rt: %p lifetime: %d, metric: %d, for dev: %s\n",
- rt, lifetime, defrtr_usr_metric, skb->dev->name);
+ net_dbg_ratelimited("RA: rt: %p lifetime: %d, metric: %d, for dev: %s\n", rt, lifetime,
+ defrtr_usr_metric, skb->dev->name);
if (!rt && lifetime) {
- ND_PRINTK(3, info, "RA: adding default router\n");
+ net_dbg_ratelimited("RA: adding default router\n");
if (neigh)
neigh_release(neigh);
@@ -1393,9 +1375,7 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
skb->dev, pref, defrtr_usr_metric,
lifetime);
if (!rt) {
- ND_PRINTK(0, err,
- "RA: %s failed to add default route\n",
- __func__);
+ net_err_ratelimited("RA: %s failed to add default route\n", __func__);
return reason;
}
@@ -1403,9 +1383,8 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
rt->fib6_nh->fib_nh_dev, NULL,
&ipv6_hdr(skb)->saddr);
if (!neigh) {
- ND_PRINTK(0, err,
- "RA: %s got default router without neighbour\n",
- __func__);
+ net_err_ratelimited("RA: %s got default router without neighbour\n",
+ __func__);
fib6_info_release(rt);
return reason;
}
@@ -1436,7 +1415,7 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
fib6_metric_set(rt, RTAX_HOPLIMIT,
ra_msg->icmph.icmp6_hop_limit);
} else {
- ND_PRINTK(2, warn, "RA: Got route advertisement with lower hop_limit than minimum\n");
+ net_dbg_ratelimited("RA: Got route advertisement with lower hop_limit than minimum\n");
}
}
@@ -1492,8 +1471,7 @@ skip_linkparms:
lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr,
skb->dev);
if (!lladdr) {
- ND_PRINTK(2, warn,
- "RA: invalid link-layer address length\n");
+ net_dbg_ratelimited("RA: invalid link-layer address length\n");
goto out;
}
}
@@ -1507,9 +1485,8 @@ skip_linkparms:
}
if (!ipv6_accept_ra(in6_dev)) {
- ND_PRINTK(2, info,
- "RA: %s, accept_ra is false for dev: %s\n",
- __func__, skb->dev->name);
+ net_dbg_ratelimited("RA: %s, accept_ra is false for dev: %s\n", __func__,
+ skb->dev->name);
goto out;
}
@@ -1517,9 +1494,8 @@ skip_linkparms:
if (!READ_ONCE(in6_dev->cnf.accept_ra_from_local) &&
ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr,
in6_dev->dev, 0)) {
- ND_PRINTK(2, info,
- "RA from local address detected on dev: %s: router info ignored.\n",
- skb->dev->name);
+ net_dbg_ratelimited("RA from local address detected on dev: %s: router info ignored.\n",
+ skb->dev->name);
goto skip_routeinfo;
}
@@ -1555,9 +1531,8 @@ skip_routeinfo:
#ifdef CONFIG_IPV6_NDISC_NODETYPE
/* skip link-specific ndopts from interior routers */
if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) {
- ND_PRINTK(2, info,
- "RA: %s, nodetype is NODEFAULT (interior routes), dev: %s\n",
- __func__, skb->dev->name);
+ net_dbg_ratelimited("RA: %s, nodetype is NODEFAULT (interior routes), dev: %s\n",
+ __func__, skb->dev->name);
goto out;
}
#endif
@@ -1586,7 +1561,7 @@ skip_routeinfo:
}
if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) {
- ND_PRINTK(2, warn, "RA: invalid mtu: %d\n", mtu);
+ net_dbg_ratelimited("RA: invalid mtu: %d\n", mtu);
} else if (READ_ONCE(in6_dev->cnf.mtu6) != mtu) {
WRITE_ONCE(in6_dev->cnf.mtu6, mtu);
fib6_metric_set(rt, RTAX_MTU, mtu);
@@ -1605,7 +1580,7 @@ skip_routeinfo:
}
if (ndopts.nd_opts_tgt_lladdr || ndopts.nd_opts_rh) {
- ND_PRINTK(2, warn, "RA: invalid RA options\n");
+ net_dbg_ratelimited("RA: invalid RA options\n");
}
out:
/* Send a notify if RA changed managed/otherconf flags or
@@ -1633,15 +1608,13 @@ static enum skb_drop_reason ndisc_redirect_rcv(struct sk_buff *skb)
switch (skb->ndisc_nodetype) {
case NDISC_NODETYPE_HOST:
case NDISC_NODETYPE_NODEFAULT:
- ND_PRINTK(2, warn,
- "Redirect: from host or unauthorized router\n");
+ net_dbg_ratelimited("Redirect: from host or unauthorized router\n");
return reason;
}
#endif
if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
- ND_PRINTK(2, warn,
- "Redirect: source address is not link-local\n");
+ net_dbg_ratelimited("Redirect: source address is not link-local\n");
return reason;
}
@@ -1702,15 +1675,13 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
}
if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) {
- ND_PRINTK(2, warn, "Redirect: no link-local address on %s\n",
- dev->name);
+ net_dbg_ratelimited("Redirect: no link-local address on %s\n", dev->name);
return;
}
if (!ipv6_addr_equal(&ipv6_hdr(skb)->daddr, target) &&
ipv6_addr_type(target) != (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
- ND_PRINTK(2, warn,
- "Redirect: target address is not link-local unicast\n");
+ net_dbg_ratelimited("Redirect: target address is not link-local unicast\n");
return;
}
@@ -1729,8 +1700,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
rt = dst_rt6_info(dst);
if (rt->rt6i_flags & RTF_GATEWAY) {
- ND_PRINTK(2, warn,
- "Redirect: destination is not a neighbour\n");
+ net_dbg_ratelimited("Redirect: destination is not a neighbour\n");
goto release;
}
@@ -1743,8 +1713,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
if (dev->addr_len) {
struct neighbour *neigh = dst_neigh_lookup(skb_dst(skb), target);
if (!neigh) {
- ND_PRINTK(2, warn,
- "Redirect: no neigh for target address\n");
+ net_dbg_ratelimited("Redirect: no neigh for target address\n");
goto release;
}
@@ -1845,14 +1814,12 @@ enum skb_drop_reason ndisc_rcv(struct sk_buff *skb)
__skb_push(skb, skb->data - skb_transport_header(skb));
if (ipv6_hdr(skb)->hop_limit != 255) {
- ND_PRINTK(2, warn, "NDISC: invalid hop-limit: %d\n",
- ipv6_hdr(skb)->hop_limit);
+ net_dbg_ratelimited("NDISC: invalid hop-limit: %d\n", ipv6_hdr(skb)->hop_limit);
return SKB_DROP_REASON_IPV6_NDISC_HOP_LIMIT;
}
if (msg->icmph.icmp6_code != 0) {
- ND_PRINTK(2, warn, "NDISC: invalid ICMPv6 code: %d\n",
- msg->icmph.icmp6_code);
+ net_dbg_ratelimited("NDISC: invalid ICMPv6 code: %d\n", msg->icmph.icmp6_code);
return SKB_DROP_REASON_IPV6_NDISC_BAD_CODE;
}
@@ -2003,9 +1970,8 @@ static int __net_init ndisc_net_init(struct net *net)
err = inet_ctl_sock_create(&sk, PF_INET6,
SOCK_RAW, IPPROTO_ICMPV6, net);
if (err < 0) {
- ND_PRINTK(0, err,
- "NDISC: Failed to initialize the control socket (err %d)\n",
- err);
+ net_err_ratelimited("NDISC: Failed to initialize the control socket (err %d)\n",
+ err);
return err;
}
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 4541836ee3da..45f9105f9ac1 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -24,7 +24,7 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff
{
const struct ipv6hdr *iph = ipv6_hdr(skb);
struct sock *sk = sk_to_full_sk(sk_partial);
- struct net_device *dev = skb_dst(skb)->dev;
+ struct net_device *dev = skb_dst_dev(skb);
struct flow_keys flkeys;
unsigned int hh_len;
struct dst_entry *dst;
@@ -72,7 +72,7 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff
#endif
/* Change in oif may mean change in hh_len. */
- hh_len = skb_dst(skb)->dev->hard_header_len;
+ hh_len = skb_dst_dev(skb)->hard_header_len;
if (skb_headroom(skb) < hh_len &&
pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)),
0, GFP_ATOMIC))
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index e087a8e97ba7..276860f65baa 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -9,9 +9,8 @@ menu "IPv6: Netfilter Configuration"
# old sockopt interface and eval loop
config IP6_NF_IPTABLES_LEGACY
tristate "Legacy IP6 tables support"
- depends on INET && IPV6
- select NETFILTER_XTABLES
- default n
+ depends on INET && IPV6 && NETFILTER_XTABLES_LEGACY
+ default m if NETFILTER_XTABLES_LEGACY
help
ip6tables is a legacy packet classifier.
This is not needed if you are using iptables over nftables
@@ -196,8 +195,8 @@ config IP6_NF_TARGET_HL
config IP6_NF_FILTER
tristate "Packet filtering"
- default m if NETFILTER_ADVANCED=n
- select IP6_NF_IPTABLES_LEGACY
+ default m if NETFILTER_ADVANCED=n || IP6_NF_IPTABLES_LEGACY
+ depends on IP6_NF_IPTABLES_LEGACY
tristate
help
Packet filtering defines a table `filter', which has a series of
@@ -233,8 +232,8 @@ config IP6_NF_TARGET_SYNPROXY
config IP6_NF_MANGLE
tristate "Packet mangling"
- default m if NETFILTER_ADVANCED=n
- select IP6_NF_IPTABLES_LEGACY
+ default m if NETFILTER_ADVANCED=n || IP6_NF_IPTABLES_LEGACY
+ depends on IP6_NF_IPTABLES_LEGACY
help
This option adds a `mangle' table to iptables: see the man page for
iptables(8). This table is used for various packet alterations
@@ -244,7 +243,7 @@ config IP6_NF_MANGLE
config IP6_NF_RAW
tristate 'raw table support (required for TRACE)'
- select IP6_NF_IPTABLES_LEGACY
+ depends on IP6_NF_IPTABLES_LEGACY
help
This option adds a `raw' table to ip6tables. This table is the very
first in the netfilter framework and hooks in at the PREROUTING
@@ -258,7 +257,7 @@ config IP6_NF_SECURITY
tristate "Security table"
depends on SECURITY
depends on NETFILTER_ADVANCED
- select IP6_NF_IPTABLES_LEGACY
+ depends on IP6_NF_IPTABLES_LEGACY
help
This option adds a `security' table to iptables, for use
with Mandatory Access Control (MAC) policy.
@@ -269,8 +268,8 @@ config IP6_NF_NAT
tristate "ip6tables NAT support"
depends on NF_CONNTRACK
depends on NETFILTER_ADVANCED
+ depends on IP6_NF_IPTABLES_LEGACY
select NF_NAT
- select IP6_NF_IPTABLES_LEGACY
select NETFILTER_XT_NAT
help
This enables the `nat' table in ip6tables. This allows masquerading,
diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c
index b903c62c00c9..6da3102b7c1b 100644
--- a/net/ipv6/netfilter/nf_dup_ipv6.c
+++ b/net/ipv6/netfilter/nf_dup_ipv6.c
@@ -38,7 +38,7 @@ static bool nf_dup_ipv6_route(struct net *net, struct sk_buff *skb,
}
skb_dst_drop(skb);
skb_dst_set(skb, dst);
- skb->dev = dst->dev;
+ skb->dev = dst_dev(dst);
skb->protocol = htons(ETH_P_IPV6);
return true;
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index 9ae2b2725bf9..838295fa32e3 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -300,7 +300,7 @@ void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb,
skb_dst_set(oldskb, dst);
}
- fl6.flowi6_oif = l3mdev_master_ifindex(skb_dst(oldskb)->dev);
+ fl6.flowi6_oif = l3mdev_master_ifindex(skb_dst_dev(oldskb));
fl6.flowi6_mark = IP6_REPLY_MARK(net, oldskb->mark);
security_skb_classify_flow(oldskb, flowi6_to_flowi_common(&fl6));
dst = ip6_route_output(net, NULL, &fl6);
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index 806d4b5dd1e6..d21fe27fe21e 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -105,7 +105,7 @@ int ip6_dst_hoplimit(struct dst_entry *dst)
{
int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
if (hoplimit == 0) {
- struct net_device *dev = dst->dev;
+ struct net_device *dev = dst_dev(dst);
struct inet6_dev *idev;
rcu_read_lock();
@@ -141,7 +141,7 @@ int __ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
skb->protocol = htons(ETH_P_IPV6);
return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
- net, sk, skb, NULL, skb_dst(skb)->dev,
+ net, sk, skb, NULL, skb_dst_dev(skb),
dst_output);
}
EXPORT_SYMBOL_GPL(__ip6_local_out);
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 84d90dd8b3f0..82b0492923d4 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -142,7 +142,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
fl6.saddr = np->saddr;
fl6.daddr = *daddr;
fl6.flowi6_mark = ipc6.sockc.mark;
- fl6.flowi6_uid = sk->sk_uid;
+ fl6.flowi6_uid = sk_uid(sk);
fl6.fl6_icmp_type = user_icmph.icmp6_type;
fl6.fl6_icmp_code = user_icmph.icmp6_code;
security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index fda640ebd53f..4c3f8245c40f 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -777,7 +777,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_mark = ipc6.sockc.mark;
- fl6.flowi6_uid = sk->sk_uid;
+ fl6.flowi6_uid = sk_uid(sk);
if (sin6) {
if (addr_len < SIN6_LEN_RFC2133)
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 7d4bcf3fda5b..25ec8001898d 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -104,11 +104,11 @@ fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif)
return container_of(q, struct frag_queue, q);
}
-static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
+static int ip6_frag_queue(struct net *net,
+ struct frag_queue *fq, struct sk_buff *skb,
struct frag_hdr *fhdr, int nhoff,
u32 *prob_offset, int *refs)
{
- struct net *net = dev_net(skb_dst(skb)->dev);
int offset, end, fragsize;
struct sk_buff *prev_tail;
struct net_device *dev;
@@ -324,10 +324,10 @@ out_fail:
static int ipv6_frag_rcv(struct sk_buff *skb)
{
+ const struct ipv6hdr *hdr = ipv6_hdr(skb);
+ struct net *net = skb_dst_dev_net(skb);
struct frag_hdr *fhdr;
struct frag_queue *fq;
- const struct ipv6hdr *hdr = ipv6_hdr(skb);
- struct net *net = dev_net(skb_dst(skb)->dev);
u8 nexthdr;
int iif;
@@ -384,7 +384,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
spin_lock(&fq->q.lock);
fq->iif = iif;
- ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff,
+ ret = ip6_frag_queue(net, fq, skb, fhdr, IP6CB(skb)->nhoff,
&prob_offset, &refs);
spin_unlock(&fq->q.lock);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 0143262094b0..3299cfa12e21 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -228,13 +228,13 @@ static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
const struct rt6_info *rt = dst_rt6_info(dst);
return ip6_neigh_lookup(rt6_nexthop(rt, &in6addr_any),
- dst->dev, skb, daddr);
+ dst_dev(dst), skb, daddr);
}
static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
{
const struct rt6_info *rt = dst_rt6_info(dst);
- struct net_device *dev = dst->dev;
+ struct net_device *dev = dst_dev(dst);
daddr = choose_neigh_daddr(rt6_nexthop(rt, &in6addr_any), NULL, daddr);
if (!daddr)
@@ -391,9 +391,8 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
static bool __rt6_check_expired(const struct rt6_info *rt)
{
if (rt->rt6i_flags & RTF_EXPIRES)
- return time_after(jiffies, rt->dst.expires);
- else
- return false;
+ return time_after(jiffies, READ_ONCE(rt->dst.expires));
+ return false;
}
static bool rt6_check_expired(const struct rt6_info *rt)
@@ -403,10 +402,10 @@ static bool rt6_check_expired(const struct rt6_info *rt)
from = rcu_dereference(rt->from);
if (rt->rt6i_flags & RTF_EXPIRES) {
- if (time_after(jiffies, rt->dst.expires))
+ if (time_after(jiffies, READ_ONCE(rt->dst.expires)))
return true;
} else if (from) {
- return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
+ return READ_ONCE(rt->dst.obsolete) != DST_OBSOLETE_FORCE_CHK ||
fib6_check_expired(from);
}
return false;
@@ -1145,6 +1144,7 @@ static void ip6_rt_init_dst(struct rt6_info *rt, const struct fib6_result *res)
rt->dst.input = ip6_input;
} else if (ipv6_addr_type(&f6i->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
rt->dst.input = ip6_mc_input;
+ rt->dst.output = ip6_mr_output;
} else {
rt->dst.input = ip6_forward;
}
@@ -2133,12 +2133,13 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
* expired, independently from their aging, as per RFC 8201 section 4
*/
if (!(rt->rt6i_flags & RTF_EXPIRES)) {
- if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
+ if (time_after_eq(now, READ_ONCE(rt->dst.lastuse) +
+ gc_args->timeout)) {
pr_debug("aging clone %p\n", rt);
rt6_remove_exception(bucket, rt6_ex);
return;
}
- } else if (time_after(jiffies, rt->dst.expires)) {
+ } else if (time_after(jiffies, READ_ONCE(rt->dst.expires))) {
pr_debug("purging expired route %p\n", rt);
rt6_remove_exception(bucket, rt6_ex);
return;
@@ -2776,11 +2777,10 @@ static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
u32 cookie)
{
if (!__rt6_check_expired(rt) &&
- rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
+ READ_ONCE(rt->dst.obsolete) == DST_OBSOLETE_FORCE_CHK &&
fib6_check(from, cookie))
return &rt->dst;
- else
- return NULL;
+ return NULL;
}
INDIRECT_CALLABLE_SCOPE struct dst_entry *ip6_dst_check(struct dst_entry *dst,
@@ -2870,7 +2870,7 @@ static void rt6_update_expires(struct rt6_info *rt0, int timeout)
rcu_read_lock();
from = rcu_dereference(rt0->from);
if (from)
- rt0->dst.expires = from->expires;
+ WRITE_ONCE(rt0->dst.expires, from->expires);
rcu_read_unlock();
}
@@ -2943,7 +2943,7 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
if (res.f6i->nh) {
struct fib6_nh_match_arg arg = {
- .dev = dst->dev,
+ .dev = dst_dev(dst),
.gw = &rt6->rt6i_gateway,
};
@@ -3010,10 +3010,10 @@ void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
oif = l3mdev_master_ifindex(skb->dev);
ip6_update_pmtu(skb, sock_net(sk), mtu, oif, READ_ONCE(sk->sk_mark),
- sk->sk_uid);
+ sk_uid(sk));
dst = __sk_dst_get(sk);
- if (!dst || !dst->obsolete ||
+ if (!dst || !READ_ONCE(dst->obsolete) ||
dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
return;
@@ -3232,13 +3232,13 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif)
void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
{
ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if,
- READ_ONCE(sk->sk_mark), sk->sk_uid);
+ READ_ONCE(sk->sk_mark), sk_uid(sk));
}
EXPORT_SYMBOL_GPL(ip6_sk_redirect);
static unsigned int ip6_default_advmss(const struct dst_entry *dst)
{
- struct net_device *dev = dst->dev;
+ struct net_device *dev = dst_dev(dst);
unsigned int mtu = dst_mtu(dst);
struct net *net;
@@ -3737,6 +3737,53 @@ void fib6_nh_release_dsts(struct fib6_nh *fib6_nh)
}
}
+static int fib6_config_validate(struct fib6_config *cfg,
+ struct netlink_ext_ack *extack)
+{
+ /* RTF_PCPU is an internal flag; can not be set by userspace */
+ if (cfg->fc_flags & RTF_PCPU) {
+ NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
+ goto errout;
+ }
+
+ /* RTF_CACHE is an internal flag; can not be set by userspace */
+ if (cfg->fc_flags & RTF_CACHE) {
+ NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
+ goto errout;
+ }
+
+ if (cfg->fc_type > RTN_MAX) {
+ NL_SET_ERR_MSG(extack, "Invalid route type");
+ goto errout;
+ }
+
+ if (cfg->fc_dst_len > 128) {
+ NL_SET_ERR_MSG(extack, "Invalid prefix length");
+ goto errout;
+ }
+
+#ifdef CONFIG_IPV6_SUBTREES
+ if (cfg->fc_src_len > 128) {
+ NL_SET_ERR_MSG(extack, "Invalid source address length");
+ goto errout;
+ }
+
+ if (cfg->fc_nh_id && cfg->fc_src_len) {
+ NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing");
+ goto errout;
+ }
+#else
+ if (cfg->fc_src_len) {
+ NL_SET_ERR_MSG(extack,
+ "Specifying source address requires IPV6_SUBTREES to be enabled");
+ goto errout;
+ }
+#endif
+ return 0;
+errout:
+ return -EINVAL;
+}
+
static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
gfp_t gfp_flags,
struct netlink_ext_ack *extack)
@@ -3886,6 +3933,10 @@ int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
struct fib6_info *rt;
int err;
+ err = fib6_config_validate(cfg, extack);
+ if (err)
+ return err;
+
rt = ip6_route_info_create(cfg, gfp_flags, extack);
if (IS_ERR(rt))
return PTR_ERR(rt);
@@ -4250,7 +4301,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
if (res.f6i->nh) {
struct fib6_nh_match_arg arg = {
- .dev = dst->dev,
+ .dev = dst_dev(dst),
.gw = &rt->rt6i_gateway,
};
@@ -4479,53 +4530,6 @@ void rt6_purge_dflt_routers(struct net *net)
rcu_read_unlock();
}
-static int fib6_config_validate(struct fib6_config *cfg,
- struct netlink_ext_ack *extack)
-{
- /* RTF_PCPU is an internal flag; can not be set by userspace */
- if (cfg->fc_flags & RTF_PCPU) {
- NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
- goto errout;
- }
-
- /* RTF_CACHE is an internal flag; can not be set by userspace */
- if (cfg->fc_flags & RTF_CACHE) {
- NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
- goto errout;
- }
-
- if (cfg->fc_type > RTN_MAX) {
- NL_SET_ERR_MSG(extack, "Invalid route type");
- goto errout;
- }
-
- if (cfg->fc_dst_len > 128) {
- NL_SET_ERR_MSG(extack, "Invalid prefix length");
- goto errout;
- }
-
-#ifdef CONFIG_IPV6_SUBTREES
- if (cfg->fc_src_len > 128) {
- NL_SET_ERR_MSG(extack, "Invalid source address length");
- goto errout;
- }
-
- if (cfg->fc_nh_id && cfg->fc_src_len) {
- NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing");
- goto errout;
- }
-#else
- if (cfg->fc_src_len) {
- NL_SET_ERR_MSG(extack,
- "Specifying source address requires IPV6_SUBTREES to be enabled");
- goto errout;
- }
-#endif
- return 0;
-errout:
- return -EINVAL;
-}
-
static void rtmsg_to_fib6_config(struct net *net,
struct in6_rtmsg *rtmsg,
struct fib6_config *cfg)
@@ -4563,10 +4567,6 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, struct in6_rtmsg *rtmsg)
switch (cmd) {
case SIOCADDRT:
- err = fib6_config_validate(&cfg, NULL);
- if (err)
- break;
-
/* Only do the default setting of fc_metric in route adding */
if (cfg.fc_metric == 0)
cfg.fc_metric = IP6_RT_PRIO_USER;
@@ -4587,13 +4587,14 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, struct in6_rtmsg *rtmsg)
static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
{
struct dst_entry *dst = skb_dst(skb);
- struct net *net = dev_net(dst->dev);
+ struct net_device *dev = dst_dev(dst);
+ struct net *net = dev_net(dev);
struct inet6_dev *idev;
SKB_DR(reason);
int type;
if (netif_is_l3_master(skb->dev) ||
- dst->dev == net->loopback_dev)
+ dev == net->loopback_dev)
idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
else
idev = ip6_dst_idev(dst);
@@ -4630,7 +4631,7 @@ static int ip6_pkt_discard(struct sk_buff *skb)
static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- skb->dev = skb_dst(skb)->dev;
+ skb->dev = skb_dst_dev(skb);
return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
}
@@ -4641,7 +4642,7 @@ static int ip6_pkt_prohibit(struct sk_buff *skb)
static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- skb->dev = skb_dst(skb)->dev;
+ skb->dev = skb_dst_dev(skb);
return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
}
@@ -5346,7 +5347,8 @@ static void ip6_route_mpath_notify(struct fib6_info *rt,
*/
rcu_read_lock();
- if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
+ if ((nlflags & NLM_F_APPEND) && rt_last &&
+ READ_ONCE(rt_last->fib6_nsiblings)) {
rt = list_first_or_null_rcu(&rt_last->fib6_siblings,
struct fib6_info,
fib6_siblings);
@@ -5402,6 +5404,10 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
int nhn = 0;
int err;
+ err = fib6_config_validate(cfg, extack);
+ if (err)
+ return err;
+
replace = (cfg->fc_nlinfo.nlh &&
(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
@@ -5636,10 +5642,6 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0)
return err;
- err = fib6_config_validate(&cfg, extack);
- if (err)
- return err;
-
if (cfg.fc_metric == 0)
cfg.fc_metric = IP6_RT_PRIO_USER;
@@ -5670,32 +5672,34 @@ static int rt6_nh_nlmsg_size(struct fib6_nh *nh, void *arg)
static size_t rt6_nlmsg_size(struct fib6_info *f6i)
{
+ struct fib6_info *sibling;
+ struct fib6_nh *nh;
int nexthop_len;
if (f6i->nh) {
nexthop_len = nla_total_size(4); /* RTA_NH_ID */
nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_nlmsg_size,
&nexthop_len);
- } else {
- struct fib6_nh *nh = f6i->fib6_nh;
- struct fib6_info *sibling;
-
- nexthop_len = 0;
- if (f6i->fib6_nsiblings) {
- rt6_nh_nlmsg_size(nh, &nexthop_len);
-
- rcu_read_lock();
+ goto common;
+ }
- list_for_each_entry_rcu(sibling, &f6i->fib6_siblings,
- fib6_siblings) {
- rt6_nh_nlmsg_size(sibling->fib6_nh, &nexthop_len);
- }
+ rcu_read_lock();
+retry:
+ nh = f6i->fib6_nh;
+ nexthop_len = 0;
+ if (READ_ONCE(f6i->fib6_nsiblings)) {
+ rt6_nh_nlmsg_size(nh, &nexthop_len);
- rcu_read_unlock();
+ list_for_each_entry_rcu(sibling, &f6i->fib6_siblings,
+ fib6_siblings) {
+ rt6_nh_nlmsg_size(sibling->fib6_nh, &nexthop_len);
+ if (!READ_ONCE(f6i->fib6_nsiblings))
+ goto retry;
}
- nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws);
}
-
+ rcu_read_unlock();
+ nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws);
+common:
return NLMSG_ALIGN(sizeof(struct rtmsg))
+ nla_total_size(16) /* RTA_SRC */
+ nla_total_size(16) /* RTA_DST */
@@ -5844,17 +5848,19 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
* each as a nexthop within RTA_MULTIPATH.
*/
if (rt6) {
+ struct net_device *dev;
+
if (rt6_flags & RTF_GATEWAY &&
nla_put_in6_addr(skb, RTA_GATEWAY, &rt6->rt6i_gateway))
goto nla_put_failure;
- if (dst->dev && nla_put_u32(skb, RTA_OIF, dst->dev->ifindex))
+ dev = dst_dev(dst);
+ if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex))
goto nla_put_failure;
- if (dst->lwtstate &&
- lwtunnel_fill_encap(skb, dst->lwtstate, RTA_ENCAP, RTA_ENCAP_TYPE) < 0)
+ if (lwtunnel_fill_encap(skb, dst->lwtstate, RTA_ENCAP, RTA_ENCAP_TYPE) < 0)
goto nla_put_failure;
- } else if (rt->fib6_nsiblings) {
+ } else if (READ_ONCE(rt->fib6_nsiblings)) {
struct fib6_info *sibling;
struct nlattr *mp;
@@ -5904,7 +5910,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
}
if (rt6_flags & RTF_EXPIRES) {
- expires = dst ? dst->expires : rt->expires;
+ expires = dst ? READ_ONCE(dst->expires) : rt->expires;
expires -= jiffies;
}
@@ -5956,16 +5962,21 @@ static bool fib6_info_uses_dev(const struct fib6_info *f6i,
if (f6i->fib6_nh->fib_nh_dev == dev)
return true;
- if (f6i->fib6_nsiblings) {
- struct fib6_info *sibling, *next_sibling;
+ if (READ_ONCE(f6i->fib6_nsiblings)) {
+ const struct fib6_info *sibling;
- list_for_each_entry_safe(sibling, next_sibling,
- &f6i->fib6_siblings, fib6_siblings) {
- if (sibling->fib6_nh->fib_nh_dev == dev)
+ rcu_read_lock();
+ list_for_each_entry_rcu(sibling, &f6i->fib6_siblings,
+ fib6_siblings) {
+ if (sibling->fib6_nh->fib_nh_dev == dev) {
+ rcu_read_unlock();
return true;
+ }
+ if (!READ_ONCE(f6i->fib6_nsiblings))
+ break;
}
+ rcu_read_unlock();
}
-
return false;
}
@@ -6321,8 +6332,9 @@ errout:
void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
unsigned int nlm_flags)
{
- struct sk_buff *skb;
struct net *net = info->nl_net;
+ struct sk_buff *skb;
+ size_t sz;
u32 seq;
int err;
@@ -6330,17 +6342,21 @@ void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
seq = info->nlh ? info->nlh->nlmsg_seq : 0;
rcu_read_lock();
-
- skb = nlmsg_new(rt6_nlmsg_size(rt), GFP_ATOMIC);
+ sz = rt6_nlmsg_size(rt);
+retry:
+ skb = nlmsg_new(sz, GFP_ATOMIC);
if (!skb)
goto errout;
err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
event, info->portid, seq, nlm_flags);
if (err < 0) {
- /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
- WARN_ON(err == -EMSGSIZE);
kfree_skb(skb);
+ /* -EMSGSIZE implies needed space grew under us. */
+ if (err == -EMSGSIZE) {
+ sz = max(rt6_nlmsg_size(rt), sz << 1);
+ goto retry;
+ }
goto errout;
}
@@ -6805,8 +6821,7 @@ void __init ip6_route_init_special_entries(void)
#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
DEFINE_BPF_ITER_FUNC(ipv6_route, struct bpf_iter_meta *meta, struct fib6_info *rt)
-BTF_ID_LIST(btf_fib6_info_id)
-BTF_ID(struct, fib6_info)
+BTF_ID_LIST_SINGLE(btf_fib6_info_id, struct, fib6_info)
static const struct bpf_iter_seq_info ipv6_route_seq_info = {
.seq_ops = &ipv6_route_seq_ops,
diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c
index 7c05ac846646..c7942cf65567 100644
--- a/net/ipv6/rpl_iptunnel.c
+++ b/net/ipv6/rpl_iptunnel.c
@@ -129,13 +129,13 @@ static int rpl_do_srh_inline(struct sk_buff *skb, const struct rpl_lwt *rlwt,
struct dst_entry *cache_dst)
{
struct ipv6_rpl_sr_hdr *isrh, *csrh;
- const struct ipv6hdr *oldhdr;
+ struct ipv6hdr oldhdr;
struct ipv6hdr *hdr;
unsigned char *buf;
size_t hdrlen;
int err;
- oldhdr = ipv6_hdr(skb);
+ memcpy(&oldhdr, ipv6_hdr(skb), sizeof(oldhdr));
buf = kcalloc(struct_size(srh, segments.addr, srh->segments_left), 2, GFP_ATOMIC);
if (!buf)
@@ -147,7 +147,7 @@ static int rpl_do_srh_inline(struct sk_buff *skb, const struct rpl_lwt *rlwt,
memcpy(isrh, srh, sizeof(*isrh));
memcpy(isrh->rpl_segaddr, &srh->rpl_segaddr[1],
(srh->segments_left - 1) * 16);
- isrh->rpl_segaddr[srh->segments_left - 1] = oldhdr->daddr;
+ isrh->rpl_segaddr[srh->segments_left - 1] = oldhdr.daddr;
ipv6_rpl_srh_compress(csrh, isrh, &srh->rpl_segaddr[0],
isrh->segments_left - 1);
@@ -169,7 +169,7 @@ static int rpl_do_srh_inline(struct sk_buff *skb, const struct rpl_lwt *rlwt,
skb_mac_header_rebuild(skb);
hdr = ipv6_hdr(skb);
- memmove(hdr, oldhdr, sizeof(*hdr));
+ memmove(hdr, &oldhdr, sizeof(*hdr));
isrh = (void *)hdr + sizeof(*hdr);
memcpy(isrh, csrh, hdrlen);
@@ -242,7 +242,7 @@ static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb)
local_bh_enable();
}
- err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(dst_dev(dst)));
if (unlikely(err))
goto drop;
}
@@ -297,7 +297,7 @@ static int rpl_input(struct sk_buff *skb)
local_bh_enable();
}
- err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(dst_dev(dst)));
if (unlikely(err))
goto drop;
} else {
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 51583461ae29..3e1b9991131a 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -128,7 +128,8 @@ static int __seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh,
int proto, struct dst_entry *cache_dst)
{
struct dst_entry *dst = skb_dst(skb);
- struct net *net = dev_net(dst->dev);
+ struct net_device *dev = dst_dev(dst);
+ struct net *net = dev_net(dev);
struct ipv6hdr *hdr, *inner_hdr;
struct ipv6_sr_hdr *isrh;
int hdrlen, tot_len, err;
@@ -181,7 +182,7 @@ static int __seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh,
isrh->nexthdr = proto;
hdr->daddr = isrh->segments[isrh->first_segment];
- set_tun_src(net, dst->dev, &hdr->daddr, &hdr->saddr);
+ set_tun_src(net, dev, &hdr->daddr, &hdr->saddr);
#ifdef CONFIG_IPV6_SEG6_HMAC
if (sr_has_hmac(isrh)) {
@@ -212,7 +213,8 @@ static int seg6_do_srh_encap_red(struct sk_buff *skb,
{
__u8 first_seg = osrh->first_segment;
struct dst_entry *dst = skb_dst(skb);
- struct net *net = dev_net(dst->dev);
+ struct net_device *dev = dst_dev(dst);
+ struct net *net = dev_net(dev);
struct ipv6hdr *hdr, *inner_hdr;
int hdrlen = ipv6_optlen(osrh);
int red_tlv_offset, tlv_offset;
@@ -270,7 +272,7 @@ static int seg6_do_srh_encap_red(struct sk_buff *skb,
if (skip_srh) {
hdr->nexthdr = proto;
- set_tun_src(net, dst->dev, &hdr->daddr, &hdr->saddr);
+ set_tun_src(net, dev, &hdr->daddr, &hdr->saddr);
goto out;
}
@@ -306,7 +308,7 @@ static int seg6_do_srh_encap_red(struct sk_buff *skb,
srcaddr:
isrh->nexthdr = proto;
- set_tun_src(net, dst->dev, &hdr->daddr, &hdr->saddr);
+ set_tun_src(net, dev, &hdr->daddr, &hdr->saddr);
#ifdef CONFIG_IPV6_SEG6_HMAC
if (unlikely(!skip_srh && sr_has_hmac(isrh))) {
@@ -362,7 +364,7 @@ static int __seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh,
#ifdef CONFIG_IPV6_SEG6_HMAC
if (sr_has_hmac(isrh)) {
- struct net *net = dev_net(skb_dst(skb)->dev);
+ struct net *net = skb_dst_dev_net(skb);
err = seg6_push_hmac(net, &hdr->saddr, isrh);
if (unlikely(err))
@@ -507,7 +509,7 @@ static int seg6_input_core(struct net *net, struct sock *sk,
local_bh_enable();
}
- err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(dst_dev(dst)));
if (unlikely(err))
goto drop;
} else {
@@ -518,7 +520,7 @@ static int seg6_input_core(struct net *net, struct sock *sk,
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
dev_net(skb->dev), NULL, skb, NULL,
- skb_dst(skb)->dev, seg6_input_finish);
+ skb_dst_dev(skb), seg6_input_finish);
return seg6_input_finish(dev_net(skb->dev), NULL, skb);
drop:
@@ -528,7 +530,7 @@ drop:
static int seg6_input_nf(struct sk_buff *skb)
{
- struct net_device *dev = skb_dst(skb)->dev;
+ struct net_device *dev = skb_dst_dev(skb);
struct net *net = dev_net(skb->dev);
switch (skb->protocol) {
@@ -593,7 +595,7 @@ static int seg6_output_core(struct net *net, struct sock *sk,
local_bh_enable();
}
- err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(dst_dev(dst)));
if (unlikely(err))
goto drop;
}
@@ -603,7 +605,7 @@ static int seg6_output_core(struct net *net, struct sock *sk,
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb,
- NULL, skb_dst(skb)->dev, dst_output);
+ NULL, dst_dev(dst), dst_output);
return dst_output(net, sk, skb);
drop:
@@ -614,7 +616,7 @@ drop:
static int seg6_output_nf(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- struct net_device *dev = skb_dst(skb)->dev;
+ struct net_device *dev = skb_dst_dev(skb);
switch (skb->protocol) {
case htons(ETH_P_IP):
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index a11a02b4ba95..2b41e4c0dddd 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -270,7 +270,7 @@ static void advance_nextseg(struct ipv6_sr_hdr *srh, struct in6_addr *daddr)
static int
seg6_lookup_any_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
- u32 tbl_id, bool local_delivery)
+ u32 tbl_id, bool local_delivery, int oif)
{
struct net *net = dev_net(skb->dev);
struct ipv6hdr *hdr = ipv6_hdr(skb);
@@ -282,6 +282,7 @@ seg6_lookup_any_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_iif = skb->dev->ifindex;
+ fl6.flowi6_oif = oif;
fl6.daddr = nhaddr ? *nhaddr : hdr->daddr;
fl6.saddr = hdr->saddr;
fl6.flowlabel = ip6_flowinfo(hdr);
@@ -291,17 +292,19 @@ seg6_lookup_any_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
if (nhaddr)
fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH;
- if (!tbl_id) {
+ if (!tbl_id && !oif) {
dst = ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags);
- } else {
+ } else if (tbl_id) {
struct fib6_table *table;
table = fib6_get_table(net, tbl_id);
if (!table)
goto out;
- rt = ip6_pol_route(net, table, 0, &fl6, skb, flags);
+ rt = ip6_pol_route(net, table, oif, &fl6, skb, flags);
dst = &rt->dst;
+ } else {
+ dst = ip6_route_output(net, NULL, &fl6);
}
/* we want to discard traffic destined for local packet processing,
@@ -310,7 +313,7 @@ seg6_lookup_any_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
if (!local_delivery)
dev_flags |= IFF_LOOPBACK;
- if (dst && (dst->dev->flags & dev_flags) && !dst->error) {
+ if (dst && (dst_dev(dst)->flags & dev_flags) && !dst->error) {
dst_release(dst);
dst = NULL;
}
@@ -330,7 +333,7 @@ out:
int seg6_lookup_nexthop(struct sk_buff *skb,
struct in6_addr *nhaddr, u32 tbl_id)
{
- return seg6_lookup_any_nexthop(skb, nhaddr, tbl_id, false);
+ return seg6_lookup_any_nexthop(skb, nhaddr, tbl_id, false, 0);
}
static __u8 seg6_flv_lcblock_octects(const struct seg6_flavors_info *finfo)
@@ -418,7 +421,7 @@ static int end_next_csid_core(struct sk_buff *skb, struct seg6_local_lwt *slwt)
static int input_action_end_x_finish(struct sk_buff *skb,
struct seg6_local_lwt *slwt)
{
- seg6_lookup_nexthop(skb, &slwt->nh6, 0);
+ seg6_lookup_any_nexthop(skb, &slwt->nh6, 0, false, slwt->oif);
return dst_input(skb);
}
@@ -1277,7 +1280,7 @@ static int input_action_end_dt6(struct sk_buff *skb,
/* note: this time we do not need to specify the table because the VRF
* takes care of selecting the correct table.
*/
- seg6_lookup_any_nexthop(skb, NULL, 0, true);
+ seg6_lookup_any_nexthop(skb, NULL, 0, true, 0);
return dst_input(skb);
@@ -1285,7 +1288,7 @@ legacy_mode:
#endif
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
- seg6_lookup_any_nexthop(skb, NULL, slwt->table, true);
+ seg6_lookup_any_nexthop(skb, NULL, slwt->table, true, 0);
return dst_input(skb);
@@ -1477,7 +1480,8 @@ static struct seg6_action_desc seg6_action_table[] = {
.action = SEG6_LOCAL_ACTION_END_X,
.attrs = SEG6_F_ATTR(SEG6_LOCAL_NH6),
.optattrs = SEG6_F_LOCAL_COUNTERS |
- SEG6_F_LOCAL_FLAVORS,
+ SEG6_F_LOCAL_FLAVORS |
+ SEG6_F_ATTR(SEG6_LOCAL_OIF),
.input = input_action_end_x,
},
{
@@ -2083,7 +2087,7 @@ struct nla_policy seg6_local_flavors_policy[SEG6_LOCAL_FLV_MAX + 1] = {
static int seg6_chk_next_csid_cfg(__u8 block_len, __u8 func_len)
{
/* Locator-Block and Locator-Node Function cannot exceed 128 bits
- * (i.e. C-SID container lenghts).
+ * (i.e. C-SID container length).
*/
if (next_csid_chk_cntr_bits(block_len, func_len))
return -EINVAL;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index a72dbca9e8fc..12496ba1b7d4 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1035,7 +1035,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
skb_set_inner_ipproto(skb, IPPROTO_IPV6);
iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
- df, !net_eq(tunnel->net, dev_net(dev)));
+ df, !net_eq(tunnel->net, dev_net(dev)), 0);
return NETDEV_TX_OK;
tx_error_icmp:
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 9d83eadd308b..f0ee1a909771 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -236,7 +236,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
fl6.flowi6_mark = ireq->ir_mark;
fl6.fl6_dport = ireq->ir_rmt_port;
fl6.fl6_sport = inet_sk(sk)->inet_sport;
- fl6.flowi6_uid = sk->sk_uid;
+ fl6.flowi6_uid = sk_uid(sk);
security_req_classify_flow(req, flowi6_to_flowi_common(&fl6));
dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index e8e68a142649..7577e7eb2c97 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -41,6 +41,7 @@
#include <linux/random.h>
#include <linux/indirect_call_wrapper.h>
+#include <net/aligned_data.h>
#include <net/tcp.h>
#include <net/ndisc.h>
#include <net/inet6_hashtables.h>
@@ -269,7 +270,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
fl6.fl6_sport = inet->inet_sport;
if (IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) && !fl6.fl6_sport)
fl6.flowi6_flags = FLOWI_FLAG_ANY_SPORT;
- fl6.flowi6_uid = sk->sk_uid;
+ fl6.flowi6_uid = sk_uid(sk);
opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
final_p = fl6_update_dst(&fl6, opt, &final);
@@ -835,7 +836,6 @@ static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
.family = AF_INET6,
.obj_size = sizeof(struct tcp6_request_sock),
- .rtx_syn_ack = tcp_rtx_synack,
.send_ack = tcp_v6_reqsk_send_ack,
.destructor = tcp_v6_reqsk_destructor,
.send_reset = tcp_v6_send_reset,
@@ -868,7 +868,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
int oif, int rst, u8 tclass, __be32 label,
u32 priority, u32 txhash, struct tcp_key *key)
{
- struct net *net = sk ? sock_net(sk) : dev_net_rcu(skb_dst(skb)->dev);
+ struct net *net = sk ? sock_net(sk) : skb_dst_dev_net_rcu(skb);
unsigned int tot_len = sizeof(struct tcphdr);
struct sock *ctl_sk = net->ipv6.tcp_sk;
const struct tcphdr *th = tcp_hdr(skb);
@@ -1043,7 +1043,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb,
if (!sk && !ipv6_unicast_destination(skb))
return;
- net = sk ? sock_net(sk) : dev_net_rcu(skb_dst(skb)->dev);
+ net = sk ? sock_net(sk) : skb_dst_dev_net_rcu(skb);
/* Invalid TCP option size or twice included auth */
if (tcp_parse_auth_options(th, &md5_hash_location, &aoh))
return;
@@ -1834,14 +1834,12 @@ lookup:
}
refcounted = true;
nsk = NULL;
- if (!tcp_filter(sk, skb)) {
+ if (!tcp_filter(sk, skb, &drop_reason)) {
th = (const struct tcphdr *)skb->data;
hdr = ipv6_hdr(skb);
tcp_v6_fill_cb(skb, hdr, th);
nsk = tcp_check_req(sk, skb, req, false, &req_stolen,
&drop_reason);
- } else {
- drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
}
if (!nsk) {
reqsk_put(req);
@@ -1897,10 +1895,9 @@ process:
nf_reset_ct(skb);
- if (tcp_filter(sk, skb)) {
- drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
+ if (tcp_filter(sk, skb, &drop_reason))
goto discard_and_relse;
- }
+
th = (const struct tcphdr *)skb->data;
hdr = ipv6_hdr(skb);
tcp_v6_fill_cb(skb, hdr, th);
@@ -2168,7 +2165,7 @@ static void get_openreq6(struct seq_file *seq,
jiffies_to_clock_t(ttd),
req->num_timeout,
from_kuid_munged(seq_user_ns(seq),
- sock_i_uid(req->rsk_listener)),
+ sk_uid(req->rsk_listener)),
0, /* non standard timer */
0, /* open_requests have no inode */
0, req);
@@ -2234,7 +2231,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
timer_active,
jiffies_delta_to_clock_t(timer_expires - jiffies),
icsk->icsk_retransmits,
- from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
+ from_kuid_munged(seq_user_ns(seq), sk_uid(sp)),
icsk->icsk_probes_out,
sock_i_ino(sp),
refcount_read(&sp->sk_refcnt), sp,
@@ -2357,7 +2354,7 @@ struct proto tcpv6_prot = {
.stream_memory_free = tcp_stream_memory_free,
.sockets_allocated = &tcp_sockets_allocated,
- .memory_allocated = &tcp_memory_allocated,
+ .memory_allocated = &net_aligned_data.tcp_memory_allocated,
.per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc,
.memory_pressure = &tcp_memory_pressure,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 7317f8e053f1..6a68f77da44b 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -750,7 +750,8 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (type == NDISC_REDIRECT) {
if (tunnel) {
ip6_redirect(skb, sock_net(sk), inet6_iif(skb),
- READ_ONCE(sk->sk_mark), sk->sk_uid);
+ READ_ONCE(sk->sk_mark),
+ sk_uid(sk));
} else {
ip6_sk_redirect(skb, sk);
}
@@ -893,10 +894,8 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
udp_lib_checksum_complete(skb))
goto csum_error;
- if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr))) {
- drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
+ if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr), &drop_reason))
goto drop;
- }
udp_csum_pull_header(skb);
@@ -1620,7 +1619,7 @@ do_udp_sendmsg:
if (!fl6->flowi6_oif)
fl6->flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
- fl6->flowi6_uid = sk->sk_uid;
+ fl6->flowi6_uid = sk_uid(sk);
if (msg->msg_controllen) {
opt = &opt_space;
@@ -1924,7 +1923,7 @@ struct proto udpv6_prot = {
.psock_update_sk_prot = udp_bpf_update_proto,
#endif
- .memory_allocated = &udp_memory_allocated,
+ .memory_allocated = &net_aligned_data.udp_memory_allocated,
.per_cpu_fw_alloc = &udp_memory_per_cpu_fw_alloc,
.sysctl_mem = sysctl_udp_mem,
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index 0590f566379d..8a406be25a3a 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -1,6 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _UDP6_IMPL_H
#define _UDP6_IMPL_H
+#include <net/aligned_data.h>
#include <net/udp.h>
#include <net/udplite.h>
#include <net/protocol.h>
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index a60bec9b14f1..2cec542437f7 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -59,7 +59,7 @@ struct proto udplitev6_prot = {
.rehash = udp_v6_rehash,
.get_port = udp_v6_get_port,
- .memory_allocated = &udp_memory_allocated,
+ .memory_allocated = &net_aligned_data.udp_memory_allocated,
.per_cpu_fw_alloc = &udp_memory_per_cpu_fw_alloc,
.sysctl_mem = sysctl_udp_mem,
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 841c81abaaf4..9005fc156a20 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -202,6 +202,9 @@ struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
if (len <= sizeof(struct ip_esp_hdr) || udpdata32[0] == 0)
goto out;
+ /* set the transport header to ESP */
+ skb_set_transport_header(skb, offset);
+
NAPI_GRO_CB(skb)->proto = IPPROTO_UDP;
pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index b3d5d1f266ee..512bdaf13699 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -106,7 +106,7 @@ skip_frag:
int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
- net, sk, skb, skb->dev, skb_dst(skb)->dev,
+ net, sk, skb, skb->dev, skb_dst_dev(skb),
__xfrm6_output,
!(IP6CB(skb)->flags & IP6SKB_REROUTED));
}
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index bf140ef781c1..5120a763da0d 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -334,8 +334,8 @@ static void __net_exit xfrm6_tunnel_net_exit(struct net *net)
struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
unsigned int i;
+ xfrm_state_flush(net, IPSEC_PROTO_ANY, false);
xfrm_flush_gc();
- xfrm_state_flush(net, 0, false, true);
for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++)
WARN_ON_ONCE(!hlist_empty(&xfrm6_tn->spi_byaddr[i]));
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 83070a2e4485..473a7847d80b 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -24,6 +24,7 @@
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/kernel_stat.h>
+#include <linux/export.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/spinlock.h>
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 24aec295a51c..a0be3896a934 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -835,8 +835,7 @@ start:
if (!sk_wmem_schedule(sk, copy))
goto wait_for_memory;
- err = skb_splice_from_iter(skb, &msg->msg_iter, copy,
- sk->sk_allocation);
+ err = skb_splice_from_iter(skb, &msg->msg_iter, copy);
if (err < 0) {
if (err == -EMSGSIZE)
goto wait_for_memory;
diff --git a/net/key/af_key.c b/net/key/af_key.c
index efc2a91f4c48..2ebde0352245 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1766,7 +1766,7 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, const struct sadb_m
if (proto == 0)
return -EINVAL;
- err = xfrm_state_flush(net, proto, true, false);
+ err = xfrm_state_flush(net, proto, true);
err2 = unicast_flush_resp(sk, hdr);
if (err || err2) {
if (err == -ESRCH) /* empty table - go quietly */
@@ -3788,7 +3788,7 @@ static int pfkey_seq_show(struct seq_file *f, void *v)
refcount_read(&s->sk_refcnt),
sk_rmem_alloc_get(s),
sk_wmem_alloc_get(s),
- from_kuid_munged(seq_user_ns(f), sock_i_uid(s)),
+ from_kuid_munged(seq_user_ns(f), sk_uid(s)),
sock_i_ino(s)
);
return 0;
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index b98d13584c81..ea232f338dcb 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -545,7 +545,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_mark = READ_ONCE(sk->sk_mark);
- fl6.flowi6_uid = sk->sk_uid;
+ fl6.flowi6_uid = sk_uid(sk);
ipcm6_init_sk(&ipc6, sk);
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index cc77ec5769d8..5958a80fe14c 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -210,7 +210,7 @@ static int llc_ui_release(struct socket *sock)
dprintk("%s: closing local(%02X) remote(%02X)\n", __func__,
llc->laddr.lsap, llc->daddr.lsap);
if (!llc_send_disc(sk))
- llc_ui_wait_for_disc(sk, sk->sk_rcvtimeo);
+ llc_ui_wait_for_disc(sk, READ_ONCE(sk->sk_rcvtimeo));
if (!sock_flag(sk, SOCK_ZAPPED)) {
struct llc_sap *sap = llc->sap;
@@ -455,7 +455,7 @@ static int llc_ui_shutdown(struct socket *sock, int how)
goto out;
rc = llc_send_disc(sk);
if (!rc)
- rc = llc_ui_wait_for_disc(sk, sk->sk_rcvtimeo);
+ rc = llc_ui_wait_for_disc(sk, READ_ONCE(sk->sk_rcvtimeo));
/* Wake up anyone sleeping in poll */
sk->sk_state_change(sk);
out:
@@ -712,7 +712,7 @@ static int llc_ui_accept(struct socket *sock, struct socket *newsock,
goto out;
/* wait for a connection to arrive. */
if (skb_queue_empty(&sk->sk_receive_queue)) {
- rc = llc_wait_data(sk, sk->sk_rcvtimeo);
+ rc = llc_wait_data(sk, READ_ONCE(sk->sk_rcvtimeo));
if (rc)
goto out;
}
diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c
index 07e9abb5978a..aa81c67b24a1 100644
--- a/net/llc/llc_proc.c
+++ b/net/llc/llc_proc.c
@@ -151,7 +151,7 @@ static int llc_seq_socket_show(struct seq_file *seq, void *v)
sk_wmem_alloc_get(sk),
sk_rmem_alloc_get(sk) - llc->copied_seq,
sk->sk_state,
- from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)),
+ from_kuid_munged(seq_user_ns(seq), sk_uid(sk)),
llc->link);
out:
return 0;
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index ee534797c033..e38f46ffebfa 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -299,7 +299,8 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
if (!sta->sta.valid_links &&
!sta->sta.deflink.ht_cap.ht_supported &&
- !sta->sta.deflink.he_cap.has_he) {
+ !sta->sta.deflink.he_cap.has_he &&
+ !sta->sta.deflink.s1g_cap.s1g) {
ht_dbg(sta->sdata,
"STA %pM erroneously requests BA session on tid %d w/o HT\n",
sta->sta.addr, tid);
@@ -327,7 +328,8 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
/* XXX: check own ht delayed BA capability?? */
if (((ba_policy != 1) &&
(sta->sta.valid_links ||
- !(sta->sta.deflink.ht_cap.cap & IEEE80211_HT_CAP_DELAY_BA))) ||
+ !(sta->sta.deflink.ht_cap.cap & IEEE80211_HT_CAP_DELAY_BA) ||
+ !(sta->sta.deflink.s1g_cap.cap[3] & S1G_CAP3_HT_DELAYED_BA))) ||
(buf_size > max_buf_size)) {
status = WLAN_STATUS_INVALID_QOS_PARAM;
ht_dbg_ratelimited(sta->sdata,
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index dbd9ad5f3992..d981b0fc57bf 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -616,7 +616,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
!pubsta->deflink.ht_cap.ht_supported &&
!pubsta->deflink.vht_cap.vht_supported &&
!pubsta->deflink.he_cap.has_he &&
- !pubsta->deflink.eht_cap.has_eht)
+ !pubsta->deflink.eht_cap.has_eht &&
+ !pubsta->deflink.s1g_cap.s1g)
return -EINVAL;
if (WARN_ON_ONCE(!local->ops->ampdu_action))
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index d9d88f2f2831..2ed07fa121ab 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -178,6 +178,7 @@ static int ieee80211_set_ap_mbssid_options(struct ieee80211_sub_if_data *sdata,
link_conf->nontransmitted = true;
link_conf->bssid_index = params->index;
+ link_conf->bssid_indicator = tx_bss_conf->bssid_indicator;
}
if (params->ema)
link_conf->ema_ap = true;
@@ -885,6 +886,13 @@ static int ieee80211_dump_station(struct wiphy *wiphy, struct net_device *dev,
ret = 0;
memcpy(mac, sta->sta.addr, ETH_ALEN);
sta_set_sinfo(sta, sinfo, true);
+
+ /* Add accumulated removed link data to sinfo data for
+ * consistency for MLO
+ */
+ if (sinfo->valid_links)
+ sta_set_accumulated_removed_links_sinfo(sta, sinfo);
+
}
return ret;
@@ -912,6 +920,12 @@ static int ieee80211_get_station(struct wiphy *wiphy, struct net_device *dev,
if (sta) {
ret = 0;
sta_set_sinfo(sta, sinfo, true);
+
+ /* Add accumulated removed link data to sinfo data for
+ * consistency for MLO
+ */
+ if (sinfo->valid_links)
+ sta_set_accumulated_removed_links_sinfo(sta, sinfo);
}
return ret;
@@ -1057,6 +1071,47 @@ ieee80211_set_unsol_bcast_probe_resp(struct ieee80211_sub_if_data *sdata,
return 0;
}
+static int
+ieee80211_set_s1g_short_beacon(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_link_data *link,
+ struct cfg80211_s1g_short_beacon *params)
+{
+ struct s1g_short_beacon_data *new;
+ struct s1g_short_beacon_data *old =
+ sdata_dereference(link->u.ap.s1g_short_beacon, sdata);
+ size_t new_len =
+ sizeof(*new) + params->short_head_len + params->short_tail_len;
+
+ if (!params->update)
+ return 0;
+
+ if (!params->short_head)
+ return -EINVAL;
+
+ new = kzalloc(new_len, GFP_KERNEL);
+ if (!new)
+ return -ENOMEM;
+
+ /* Memory layout: | struct | head | tail | */
+ new->short_head = (u8 *)new + sizeof(*new);
+ new->short_head_len = params->short_head_len;
+ memcpy(new->short_head, params->short_head, params->short_head_len);
+
+ if (params->short_tail) {
+ new->short_tail = new->short_head + params->short_head_len;
+ new->short_tail_len = params->short_tail_len;
+ memcpy(new->short_tail, params->short_tail,
+ params->short_tail_len);
+ }
+
+ rcu_assign_pointer(link->u.ap.s1g_short_beacon, new);
+
+ if (old)
+ kfree_rcu(old, rcu_head);
+
+ return 0;
+}
+
static int ieee80211_set_ftm_responder_params(
struct ieee80211_sub_if_data *sdata,
const u8 *lci, size_t lci_len,
@@ -1121,13 +1176,13 @@ ieee80211_copy_rnr_beacon(u8 *pos, struct cfg80211_rnr_elems *dst,
{
int i, offset = 0;
+ dst->cnt = src->cnt;
for (i = 0; i < src->cnt; i++) {
memcpy(pos + offset, src->elem[i].data, src->elem[i].len);
dst->elem[i].len = src->elem[i].len;
dst->elem[i].data = pos + offset;
offset += dst->elem[i].len;
}
- dst->cnt = src->cnt;
return offset;
}
@@ -1218,8 +1273,11 @@ ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata,
ieee80211_copy_rnr_beacon(pos, new->rnr_ies, rnr);
}
/* update bssid_indicator */
- link_conf->bssid_indicator =
- ilog2(__roundup_pow_of_two(mbssid->cnt + 1));
+ if (new->mbssid_ies->cnt && new->mbssid_ies->elem[0].len > 2)
+ link_conf->bssid_indicator =
+ *(new->mbssid_ies->elem[0].data + 2);
+ else
+ link_conf->bssid_indicator = 0;
}
if (csa) {
@@ -1324,6 +1382,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
struct ieee80211_link_data *link;
struct ieee80211_bss_conf *link_conf;
struct ieee80211_chan_req chanreq = { .oper = params->chandef };
+ u64 tsf;
lockdep_assert_wiphy(local->hw.wiphy);
@@ -1476,8 +1535,8 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
link_conf->twt_responder = params->twt_responder;
link_conf->he_obss_pd = params->he_obss_pd;
link_conf->he_bss_color = params->beacon.he_bss_color;
- sdata->vif.cfg.s1g = params->chandef.chan->band ==
- NL80211_BAND_S1GHZ;
+ link_conf->s1g_long_beacon_period = params->s1g_long_beacon_period;
+ sdata->vif.cfg.s1g = params->chandef.chan->band == NL80211_BAND_S1GHZ;
sdata->vif.cfg.ssid_len = params->ssid_len;
if (params->ssid_len)
@@ -1524,6 +1583,13 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
if (err < 0)
goto error;
+ if (sdata->vif.cfg.s1g) {
+ err = ieee80211_set_s1g_short_beacon(sdata, link,
+ &params->s1g_short_beacon);
+ if (err < 0)
+ goto error;
+ }
+
err = drv_start_ap(sdata->local, sdata, link_conf);
if (err) {
old = sdata_dereference(link->u.ap.beacon, sdata);
@@ -1538,7 +1604,12 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
goto error;
}
- ieee80211_recalc_dtim(local, sdata);
+ tsf = drv_get_tsf(local, sdata);
+ ieee80211_recalc_dtim(sdata, tsf);
+
+ if (link->u.ap.s1g_short_beacon)
+ ieee80211_recalc_sb_count(sdata, tsf);
+
ieee80211_vif_cfg_change_notify(sdata, BSS_CHANGED_SSID);
ieee80211_link_info_change_notify(sdata, link, changed);
@@ -1602,6 +1673,13 @@ static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev,
if (err < 0)
return err;
+ if (link->u.ap.s1g_short_beacon) {
+ err = ieee80211_set_s1g_short_beacon(sdata, link,
+ &params->s1g_short_beacon);
+ if (err < 0)
+ return err;
+ }
+
if (beacon->he_bss_color_valid &&
beacon->he_bss_color.enabled != link_conf->he_bss_color.enabled) {
link_conf->he_bss_color.enabled = beacon->he_bss_color.enabled;
@@ -1633,6 +1711,7 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev,
struct probe_resp *old_probe_resp;
struct fils_discovery_data *old_fils_discovery;
struct unsol_bcast_probe_resp_data *old_unsol_bcast_probe_resp;
+ struct s1g_short_beacon_data *old_s1g_short_beacon;
struct cfg80211_chan_def chandef;
struct ieee80211_link_data *link =
sdata_dereference(sdata->link[link_id], sdata);
@@ -1651,6 +1730,8 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev,
old_unsol_bcast_probe_resp =
sdata_dereference(link->u.ap.unsol_bcast_probe_resp,
sdata);
+ old_s1g_short_beacon =
+ sdata_dereference(link->u.ap.s1g_short_beacon, sdata);
/* abort any running channel switch or color change */
link_conf->csa_active = false;
@@ -1673,6 +1754,7 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev,
RCU_INIT_POINTER(link->u.ap.probe_resp, NULL);
RCU_INIT_POINTER(link->u.ap.fils_discovery, NULL);
RCU_INIT_POINTER(link->u.ap.unsol_bcast_probe_resp, NULL);
+ RCU_INIT_POINTER(link->u.ap.s1g_short_beacon, NULL);
kfree_rcu(old_beacon, rcu_head);
if (old_probe_resp)
kfree_rcu(old_probe_resp, rcu_head);
@@ -1680,6 +1762,8 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev,
kfree_rcu(old_fils_discovery, rcu_head);
if (old_unsol_bcast_probe_resp)
kfree_rcu(old_unsol_bcast_probe_resp, rcu_head);
+ if (old_s1g_short_beacon)
+ kfree_rcu(old_s1g_short_beacon, rcu_head);
kfree(link_conf->ftmr_params);
link_conf->ftmr_params = NULL;
@@ -1703,6 +1787,7 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev,
link_conf->enable_beacon = false;
sdata->beacon_rate_set = false;
sdata->vif.cfg.ssid_len = 0;
+ sdata->vif.cfg.s1g = false;
clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state);
ieee80211_link_info_change_notify(sdata, link,
BSS_CHANGED_BEACON_ENABLED);
@@ -1878,6 +1963,7 @@ static int sta_link_apply_parameters(struct ieee80211_local *local,
params->vht_capa ||
params->he_capa ||
params->eht_capa ||
+ params->s1g_capa ||
params->opmode_notif_used;
switch (mode) {
@@ -1956,9 +2042,27 @@ static int sta_link_apply_parameters(struct ieee80211_local *local,
params->eht_capa_len,
link_sta);
+ if (params->s1g_capa)
+ ieee80211_s1g_cap_to_sta_s1g_cap(sdata, params->s1g_capa,
+ link_sta);
+
ieee80211_sta_init_nss(link_sta);
if (params->opmode_notif_used) {
+ enum nl80211_chan_width width = link->conf->chanreq.oper.width;
+
+ switch (width) {
+ case NL80211_CHAN_WIDTH_20:
+ case NL80211_CHAN_WIDTH_40:
+ case NL80211_CHAN_WIDTH_80:
+ case NL80211_CHAN_WIDTH_160:
+ case NL80211_CHAN_WIDTH_80P80:
+ case NL80211_CHAN_WIDTH_320: /* not VHT, allowed for HE/EHT */
+ break;
+ default:
+ return -EINVAL;
+ }
+
/* returned value is only needed for rc update, but the
* rc isn't initialized here yet, so ignore it
*/
@@ -3028,7 +3132,8 @@ static int ieee80211_set_mcast_rate(struct wiphy *wiphy, struct net_device *dev,
return 0;
}
-static int ieee80211_set_wiphy_params(struct wiphy *wiphy, u32 changed)
+static int ieee80211_set_wiphy_params(struct wiphy *wiphy, int radio_idx,
+ u32 changed)
{
struct ieee80211_local *local = wiphy_priv(wiphy);
int err;
@@ -3036,7 +3141,8 @@ static int ieee80211_set_wiphy_params(struct wiphy *wiphy, u32 changed)
if (changed & WIPHY_PARAM_FRAG_THRESHOLD) {
ieee80211_check_fast_xmit_all(local);
- err = drv_set_frag_threshold(local, wiphy->frag_threshold);
+ err = drv_set_frag_threshold(local, radio_idx,
+ wiphy->frag_threshold);
if (err) {
ieee80211_check_fast_xmit_all(local);
@@ -3050,14 +3156,23 @@ static int ieee80211_set_wiphy_params(struct wiphy *wiphy, u32 changed)
coverage_class = changed & WIPHY_PARAM_COVERAGE_CLASS ?
wiphy->coverage_class : -1;
- err = drv_set_coverage_class(local, coverage_class);
+ err = drv_set_coverage_class(local, radio_idx,
+ coverage_class);
if (err)
return err;
}
if (changed & WIPHY_PARAM_RTS_THRESHOLD) {
- err = drv_set_rts_threshold(local, wiphy->rts_threshold);
+ u32 rts_threshold;
+
+ if ((radio_idx == -1) || (radio_idx >= wiphy->n_radio))
+ rts_threshold = wiphy->rts_threshold;
+ else
+ rts_threshold =
+ wiphy->radio_cfg[radio_idx].rts_threshold;
+
+ err = drv_set_rts_threshold(local, radio_idx, rts_threshold);
if (err)
return err;
@@ -3075,18 +3190,19 @@ static int ieee80211_set_wiphy_params(struct wiphy *wiphy, u32 changed)
}
if (changed &
(WIPHY_PARAM_RETRY_SHORT | WIPHY_PARAM_RETRY_LONG))
- ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_RETRY_LIMITS);
+ ieee80211_hw_config(local, radio_idx,
+ IEEE80211_CONF_CHANGE_RETRY_LIMITS);
if (changed & (WIPHY_PARAM_TXQ_LIMIT |
WIPHY_PARAM_TXQ_MEMORY_LIMIT |
WIPHY_PARAM_TXQ_QUANTUM))
- ieee80211_txq_set_params(local);
+ ieee80211_txq_set_params(local, radio_idx);
return 0;
}
static int ieee80211_set_tx_power(struct wiphy *wiphy,
- struct wireless_dev *wdev,
+ struct wireless_dev *wdev, int radio_idx,
enum nl80211_tx_power_setting type, int mbm)
{
struct ieee80211_local *local = wiphy_priv(wiphy);
@@ -3214,6 +3330,7 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy,
static int ieee80211_get_tx_power(struct wiphy *wiphy,
struct wireless_dev *wdev,
+ int radio_idx,
unsigned int link_id,
int *dbm)
{
@@ -3392,7 +3509,7 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
}
if (ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS))
- ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
+ ieee80211_hw_config(local, -1, IEEE80211_CONF_CHANGE_PS);
ieee80211_recalc_ps(local);
ieee80211_recalc_ps_vif(sdata);
@@ -3549,6 +3666,56 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
return 0;
}
+static bool ieee80211_is_scan_ongoing(struct wiphy *wiphy,
+ struct ieee80211_local *local,
+ struct cfg80211_chan_def *chandef)
+{
+ struct cfg80211_scan_request *scan_req;
+ int chan_radio_idx, req_radio_idx;
+ struct ieee80211_roc_work *roc;
+
+ if (list_empty(&local->roc_list) && !local->scanning)
+ return false;
+
+ if (wiphy->n_radio < 2)
+ return true;
+
+ req_radio_idx = cfg80211_get_radio_idx_by_chan(wiphy, chandef->chan);
+ if (req_radio_idx < 0)
+ return true;
+
+ if (local->scanning) {
+ scan_req = wiphy_dereference(wiphy, local->scan_req);
+ /*
+ * Scan is going on but info is not there. Should not happen
+ * but if it does, let's not take risk and assume we can't use
+ * the hw hence return true
+ */
+ if (WARN_ON_ONCE(!scan_req))
+ return true;
+
+ return ieee80211_is_radio_idx_in_scan_req(wiphy, scan_req,
+ req_radio_idx);
+ }
+
+ list_for_each_entry(roc, &local->roc_list, list) {
+ chan_radio_idx = cfg80211_get_radio_idx_by_chan(wiphy,
+ roc->chan);
+ /*
+ * The roc work is added but chan_radio_idx is invalid.
+ * Should not happen but if it does, let's not take
+ * risk and return true.
+ */
+ if (chan_radio_idx < 0)
+ return true;
+
+ if (chan_radio_idx == req_radio_idx)
+ return true;
+ }
+
+ return false;
+}
+
static int ieee80211_start_radar_detection(struct wiphy *wiphy,
struct net_device *dev,
struct cfg80211_chan_def *chandef,
@@ -3562,7 +3729,7 @@ static int ieee80211_start_radar_detection(struct wiphy *wiphy,
lockdep_assert_wiphy(local->hw.wiphy);
- if (!list_empty(&local->roc_list) || local->scanning)
+ if (ieee80211_is_scan_ongoing(wiphy, local, chandef))
return -EBUSY;
link_data = sdata_dereference(sdata->link[link_id], sdata);
@@ -3742,7 +3909,7 @@ void ieee80211_csa_finish(struct ieee80211_vif *vif, unsigned int link_id)
*/
struct ieee80211_link_data *iter;
- for_each_sdata_link(local, iter) {
+ for_each_sdata_link_rcu(local, iter) {
if (iter->sdata == sdata ||
rcu_access_pointer(iter->conf->tx_bss_conf) != tx_bss_conf)
continue;
@@ -4054,7 +4221,7 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
lockdep_assert_wiphy(local->hw.wiphy);
- if (!list_empty(&local->roc_list) || local->scanning)
+ if (ieee80211_is_scan_ongoing(wiphy, local, &params->chandef))
return -EBUSY;
if (sdata->wdev.links[link_id].cac_started)
@@ -4088,6 +4255,12 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
goto out;
}
+ err = ieee80211_set_unsol_bcast_probe_resp(sdata,
+ &params->unsol_bcast_probe_resp,
+ link_data, link_conf, &changed);
+ if (err)
+ goto out;
+
chanctx = container_of(conf, struct ieee80211_chanctx, conf);
ch_switch.timestamp = 0;
@@ -4238,7 +4411,8 @@ ieee80211_update_mgmt_frame_registrations(struct wiphy *wiphy,
ieee80211_configure_filter(local);
}
-static int ieee80211_set_antenna(struct wiphy *wiphy, u32 tx_ant, u32 rx_ant)
+static int ieee80211_set_antenna(struct wiphy *wiphy, int radio_idx,
+ u32 tx_ant, u32 rx_ant)
{
struct ieee80211_local *local = wiphy_priv(wiphy);
int ret;
@@ -4254,11 +4428,12 @@ static int ieee80211_set_antenna(struct wiphy *wiphy, u32 tx_ant, u32 rx_ant)
return 0;
}
-static int ieee80211_get_antenna(struct wiphy *wiphy, u32 *tx_ant, u32 *rx_ant)
+static int ieee80211_get_antenna(struct wiphy *wiphy, int radio_idx,
+ u32 *tx_ant, u32 *rx_ant)
{
struct ieee80211_local *local = wiphy_priv(wiphy);
- return drv_get_antenna(local, tx_ant, rx_ant);
+ return drv_get_antenna(local, radio_idx, tx_ant, rx_ant);
}
static int ieee80211_set_rekey_data(struct wiphy *wiphy,
@@ -5027,6 +5202,12 @@ ieee80211_color_change(struct wiphy *wiphy, struct net_device *dev,
goto out;
}
+ err = ieee80211_set_unsol_bcast_probe_resp(sdata,
+ &params->unsol_bcast_probe_resp,
+ link, link_conf, &changed);
+ if (err)
+ goto out;
+
err = ieee80211_set_color_change_beacon(link, params, &changed);
if (err)
goto out;
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 3aaf5abf1acc..c9cea0e7ac16 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -644,15 +644,39 @@ ieee80211_find_chanctx(struct ieee80211_local *local,
return NULL;
}
-bool ieee80211_is_radar_required(struct ieee80211_local *local)
+bool ieee80211_is_radar_required(struct ieee80211_local *local,
+ struct cfg80211_scan_request *req)
{
+ struct wiphy *wiphy = local->hw.wiphy;
struct ieee80211_link_data *link;
+ struct ieee80211_channel *chan;
+ int radio_idx;
lockdep_assert_wiphy(local->hw.wiphy);
+ if (!req)
+ return false;
+
for_each_sdata_link(local, link) {
- if (link->radar_required)
- return true;
+ if (link->radar_required) {
+ if (wiphy->n_radio < 2)
+ return true;
+
+ chan = link->conf->chanreq.oper.chan;
+ radio_idx = cfg80211_get_radio_idx_by_chan(wiphy, chan);
+ /*
+ * The radio index (radio_idx) is expected to be valid,
+ * as it's derived from a channel tied to a link. If
+ * it's invalid (i.e., negative), return true to avoid
+ * potential issues with radar-sensitive operations.
+ */
+ if (radio_idx < 0)
+ return true;
+
+ if (ieee80211_is_radio_idx_in_scan_req(wiphy, req,
+ radio_idx))
+ return true;
+ }
}
return false;
@@ -720,7 +744,7 @@ static int ieee80211_add_chanctx(struct ieee80211_local *local,
/* turn idle off *before* setting channel -- some drivers need that */
changed = ieee80211_idle_off(local);
if (changed)
- ieee80211_hw_config(local, changed);
+ ieee80211_hw_config(local, -1, changed);
err = drv_add_chanctx(local, ctx);
if (err) {
@@ -886,7 +910,7 @@ static int ieee80211_assign_link_chanctx(struct ieee80211_link_data *link,
conf = rcu_dereference_protected(link->conf->chanctx_conf,
lockdep_is_held(&local->hw.wiphy->mtx));
- if (conf) {
+ if (conf && !local->in_reconfig) {
curr_ctx = container_of(conf, struct ieee80211_chanctx, conf);
drv_unassign_vif_chanctx(local, sdata, link->conf, curr_ctx);
@@ -906,8 +930,9 @@ static int ieee80211_assign_link_chanctx(struct ieee80211_link_data *link,
/* succeeded, so commit it to the data structures */
conf = &new_ctx->conf;
- list_add(&link->assigned_chanctx_list,
- &new_ctx->assigned_links);
+ if (!local->in_reconfig)
+ list_add(&link->assigned_chanctx_list,
+ &new_ctx->assigned_links);
}
} else {
ret = 0;
@@ -1084,7 +1109,7 @@ void ieee80211_link_copy_chanctx_to_vlans(struct ieee80211_link_data *link,
__ieee80211_link_copy_chanctx_to_vlans(link, clear);
}
-int ieee80211_link_unreserve_chanctx(struct ieee80211_link_data *link)
+void ieee80211_link_unreserve_chanctx(struct ieee80211_link_data *link)
{
struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_chanctx *ctx = link->reserved_chanctx;
@@ -1092,7 +1117,7 @@ int ieee80211_link_unreserve_chanctx(struct ieee80211_link_data *link)
lockdep_assert_wiphy(sdata->local->hw.wiphy);
if (WARN_ON(!ctx))
- return -EINVAL;
+ return;
list_del(&link->reserved_chanctx_list);
link->reserved_chanctx = NULL;
@@ -1100,7 +1125,7 @@ int ieee80211_link_unreserve_chanctx(struct ieee80211_link_data *link)
if (ieee80211_chanctx_refcount(sdata->local, ctx) == 0) {
if (ctx->replace_state == IEEE80211_CHANCTX_REPLACES_OTHER) {
if (WARN_ON(!ctx->replace_ctx))
- return -EINVAL;
+ return;
WARN_ON(ctx->replace_ctx->replace_state !=
IEEE80211_CHANCTX_WILL_BE_REPLACED);
@@ -1116,8 +1141,6 @@ int ieee80211_link_unreserve_chanctx(struct ieee80211_link_data *link)
ieee80211_free_chanctx(sdata->local, ctx, false);
}
}
-
- return 0;
}
static struct ieee80211_chanctx *
@@ -1381,6 +1404,7 @@ ieee80211_link_use_reserved_reassign(struct ieee80211_link_data *link)
goto out;
}
+ link->radar_required = link->reserved_radar_required;
list_move(&link->assigned_chanctx_list, &new_ctx->assigned_links);
rcu_assign_pointer(link_conf->chanctx_conf, &new_ctx->conf);
@@ -1909,7 +1933,8 @@ int _ieee80211_link_use_channel(struct ieee80211_link_data *link,
if (ret < 0)
goto out;
- __ieee80211_link_release_channel(link, false);
+ if (!local->in_reconfig)
+ __ieee80211_link_release_channel(link, false);
ctx = ieee80211_find_chanctx(local, link, chanreq, mode);
/* Note: context is now reserved */
diff --git a/net/mac80211/debug.h b/net/mac80211/debug.h
index 5b81998cb0c9..ef7c1a68d88d 100644
--- a/net/mac80211/debug.h
+++ b/net/mac80211/debug.h
@@ -1,10 +1,11 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Portions
- * Copyright (C) 2022 - 2024 Intel Corporation
+ * Copyright (C) 2022 - 2025 Intel Corporation
*/
#ifndef __MAC80211_DEBUG_H
#define __MAC80211_DEBUG_H
+#include <linux/once_lite.h>
#include <net/cfg80211.h>
#ifdef CONFIG_MAC80211_OCB_DEBUG
@@ -152,6 +153,8 @@ do { \
else \
_sdata_err((link)->sdata, fmt, ##__VA_ARGS__); \
} while (0)
+#define link_err_once(link, fmt, ...) \
+ DO_ONCE_LITE(link_err, link, fmt, ##__VA_ARGS__)
#define link_id_info(sdata, link_id, fmt, ...) \
do { \
if (ieee80211_vif_is_mld(&sdata->vif)) \
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 69e03630f64c..e8b78ec682da 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -4,7 +4,7 @@
*
* Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
- * Copyright (C) 2018 - 2019, 2021-2024 Intel Corporation
+ * Copyright (C) 2018 - 2019, 2021-2025 Intel Corporation
*/
#include <linux/debugfs.h>
@@ -490,7 +490,6 @@ static const char *hw_flag_names[] = {
FLAG(DETECTS_COLOR_COLLISION),
FLAG(MLO_MCAST_MULTI_LINK_TX),
FLAG(DISALLOW_PUNCTURING),
- FLAG(DISALLOW_PUNCTURING_5GHZ),
FLAG(HANDLES_QUIET_CSA),
FLAG(STRICT),
#undef FLAG
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 54c479910d05..1dac78271045 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -704,7 +704,7 @@ static ssize_t ieee80211_if_parse_tsf(
}
}
- ieee80211_recalc_dtim(local, sdata);
+ ieee80211_recalc_dtim(sdata, drv_get_tsf(local, sdata));
return buflen;
}
IEEE80211_IF_FILE_RW(tsf);
diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c
index 35349a7f16cb..ba9fba165926 100644
--- a/net/mac80211/driver-ops.c
+++ b/net/mac80211/driver-ops.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright 2015 Intel Deutschland GmbH
- * Copyright (C) 2022-2024 Intel Corporation
+ * Copyright (C) 2022-2025 Intel Corporation
*/
#include <net/mac80211.h>
#include "ieee80211_i.h"
@@ -515,6 +515,9 @@ int drv_set_key(struct ieee80211_local *local,
!(sdata->vif.active_links & BIT(key->link_id))))
return -ENOLINK;
+ if (fips_enabled)
+ return -EOPNOTSUPP;
+
trace_drv_set_key(local, cmd, sdata, sta, key);
ret = local->ops->set_key(&local->hw, cmd, &sdata->vif, sta, key);
trace_drv_return_int(local, ret);
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 307587c8a003..181bcb34b795 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -8,6 +8,7 @@
#ifndef __MAC80211_DRIVER_OPS
#define __MAC80211_DRIVER_OPS
+#include <linux/fips.h>
#include <net/mac80211.h>
#include "ieee80211_i.h"
#include "trace.h"
@@ -143,15 +144,16 @@ int drv_change_interface(struct ieee80211_local *local,
void drv_remove_interface(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata);
-static inline int drv_config(struct ieee80211_local *local, u32 changed)
+static inline int drv_config(struct ieee80211_local *local, int radio_idx,
+ u32 changed)
{
int ret;
might_sleep();
lockdep_assert_wiphy(local->hw.wiphy);
- trace_drv_config(local, changed);
- ret = local->ops->config(&local->hw, changed);
+ trace_drv_config(local, radio_idx, changed);
+ ret = local->ops->config(&local->hw, radio_idx, changed);
trace_drv_return_int(local, ret);
return ret;
}
@@ -387,45 +389,47 @@ static inline void drv_get_key_seq(struct ieee80211_local *local,
}
static inline int drv_set_frag_threshold(struct ieee80211_local *local,
- u32 value)
+ int radio_idx, u32 value)
{
int ret = 0;
might_sleep();
lockdep_assert_wiphy(local->hw.wiphy);
- trace_drv_set_frag_threshold(local, value);
+ trace_drv_set_frag_threshold(local, radio_idx, value);
if (local->ops->set_frag_threshold)
- ret = local->ops->set_frag_threshold(&local->hw, value);
+ ret = local->ops->set_frag_threshold(&local->hw, radio_idx,
+ value);
trace_drv_return_int(local, ret);
return ret;
}
static inline int drv_set_rts_threshold(struct ieee80211_local *local,
- u32 value)
+ int radio_idx, u32 value)
{
int ret = 0;
might_sleep();
lockdep_assert_wiphy(local->hw.wiphy);
- trace_drv_set_rts_threshold(local, value);
+ trace_drv_set_rts_threshold(local, radio_idx, value);
if (local->ops->set_rts_threshold)
- ret = local->ops->set_rts_threshold(&local->hw, value);
+ ret = local->ops->set_rts_threshold(&local->hw, radio_idx,
+ value);
trace_drv_return_int(local, ret);
return ret;
}
static inline int drv_set_coverage_class(struct ieee80211_local *local,
- s16 value)
+ int radio_idx, s16 value)
{
int ret = 0;
might_sleep();
lockdep_assert_wiphy(local->hw.wiphy);
- trace_drv_set_coverage_class(local, value);
+ trace_drv_set_coverage_class(local, radio_idx, value);
if (local->ops->set_coverage_class)
- local->ops->set_coverage_class(&local->hw, value);
+ local->ops->set_coverage_class(&local->hw, radio_idx, value);
else
ret = -EOPNOTSUPP;
@@ -631,6 +635,25 @@ static inline void drv_sta_statistics(struct ieee80211_local *local,
trace_drv_return_void(local);
}
+static inline void drv_link_sta_statistics(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_link_sta *link_sta,
+ struct link_station_info *link_sinfo)
+{
+ might_sleep();
+ lockdep_assert_wiphy(local->hw.wiphy);
+
+ sdata = get_bss_sdata(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return;
+
+ trace_drv_link_sta_statistics(local, sdata, link_sta);
+ if (local->ops->link_sta_statistics)
+ local->ops->link_sta_statistics(&local->hw, &sdata->vif,
+ link_sta, link_sinfo);
+ trace_drv_return_void(local);
+}
+
int drv_conf_tx(struct ieee80211_local *local,
struct ieee80211_link_data *link, u16 ac,
const struct ieee80211_tx_queue_params *params);
@@ -753,20 +776,21 @@ static inline int drv_set_antenna(struct ieee80211_local *local,
might_sleep();
lockdep_assert_wiphy(local->hw.wiphy);
if (local->ops->set_antenna)
- ret = local->ops->set_antenna(&local->hw, tx_ant, rx_ant);
+ ret = local->ops->set_antenna(&local->hw, -1, tx_ant, rx_ant);
trace_drv_set_antenna(local, tx_ant, rx_ant, ret);
return ret;
}
-static inline int drv_get_antenna(struct ieee80211_local *local,
+static inline int drv_get_antenna(struct ieee80211_local *local, int radio_idx,
u32 *tx_ant, u32 *rx_ant)
{
int ret = -EOPNOTSUPP;
might_sleep();
lockdep_assert_wiphy(local->hw.wiphy);
if (local->ops->get_antenna)
- ret = local->ops->get_antenna(&local->hw, tx_ant, rx_ant);
- trace_drv_get_antenna(local, *tx_ant, *rx_ant, ret);
+ ret = local->ops->get_antenna(&local->hw, radio_idx,
+ tx_ant, rx_ant);
+ trace_drv_get_antenna(local, radio_idx, *tx_ant, *rx_ant, ret);
return ret;
}
@@ -879,6 +903,9 @@ static inline void drv_set_rekey_data(struct ieee80211_local *local,
if (!check_sdata_in_driver(sdata))
return;
+ if (fips_enabled)
+ return;
+
trace_drv_set_rekey_data(local, sdata, data);
if (local->ops->set_rekey_data)
local->ops->set_rekey_data(&local->hw, &sdata->vif, data);
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 32390d8a9d75..1c82a28b03de 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -9,7 +9,7 @@
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2007-2010, Intel Corporation
* Copyright 2017 Intel Deutschland GmbH
- * Copyright(c) 2020-2024 Intel Corporation
+ * Copyright(c) 2020-2025 Intel Corporation
*/
#include <linux/ieee80211.h>
@@ -603,3 +603,41 @@ out:
}
/* this might change ... don't want non-open drivers using it */
EXPORT_SYMBOL_GPL(ieee80211_request_smps);
+
+void ieee80211_ht_handle_chanwidth_notif(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct sta_info *sta,
+ struct link_sta_info *link_sta,
+ u8 chanwidth, enum nl80211_band band)
+{
+ enum ieee80211_sta_rx_bandwidth max_bw, new_bw;
+ struct ieee80211_supported_band *sband;
+ struct sta_opmode_info sta_opmode = {};
+
+ lockdep_assert_wiphy(local->hw.wiphy);
+
+ if (chanwidth == IEEE80211_HT_CHANWIDTH_20MHZ)
+ max_bw = IEEE80211_STA_RX_BW_20;
+ else
+ max_bw = ieee80211_sta_cap_rx_bw(link_sta);
+
+ /* set cur_max_bandwidth and recalc sta bw */
+ link_sta->cur_max_bandwidth = max_bw;
+ new_bw = ieee80211_sta_cur_vht_bw(link_sta);
+
+ if (link_sta->pub->bandwidth == new_bw)
+ return;
+
+ link_sta->pub->bandwidth = new_bw;
+ sband = local->hw.wiphy->bands[band];
+ sta_opmode.bw =
+ ieee80211_sta_rx_bw_to_chan_width(link_sta);
+ sta_opmode.changed = STA_OPMODE_MAX_BW_CHANGED;
+
+ rate_control_rate_update(local, sband, link_sta,
+ IEEE80211_RC_BW_CHANGED);
+ cfg80211_sta_opmode_change_notify(sdata->dev,
+ sta->addr,
+ &sta_opmode,
+ GFP_KERNEL);
+}
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 9ed87d6f5019..6e36b09fe97f 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -635,7 +635,7 @@ static int ieee80211_sta_active_ibss(struct ieee80211_sub_if_data *sdata)
rcu_read_lock();
list_for_each_entry_rcu(sta, &local->sta_list, list) {
- unsigned long last_active = ieee80211_sta_last_active(sta);
+ unsigned long last_active = ieee80211_sta_last_active(sta, -1);
if (sta->sdata == sdata &&
time_is_after_jiffies(last_active +
@@ -1228,7 +1228,7 @@ static void ieee80211_ibss_sta_expire(struct ieee80211_sub_if_data *sdata)
lockdep_assert_wiphy(local->hw.wiphy);
list_for_each_entry_safe(sta, tmp, &local->sta_list, list) {
- unsigned long last_active = ieee80211_sta_last_active(sta);
+ unsigned long last_active = ieee80211_sta_last_active(sta, -1);
if (sdata != sta->sdata)
continue;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 30809f0b35f7..8afa2404eaa8 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -296,6 +296,14 @@ struct unsol_bcast_probe_resp_data {
u8 data[];
};
+struct s1g_short_beacon_data {
+ struct rcu_head rcu_head;
+ u8 *short_head;
+ u8 *short_tail;
+ int short_head_len;
+ int short_tail_len;
+};
+
struct ps_data {
/* yes, this looks ugly, but guarantees that we can later use
* bitmap_empty :)
@@ -306,6 +314,7 @@ struct ps_data {
atomic_t num_sta_ps; /* number of stations in PS mode */
int dtim_count;
bool dtim_bc_mc;
+ int sb_count; /* num short beacons til next long beacon */
};
struct ieee80211_if_ap {
@@ -1042,6 +1051,7 @@ struct ieee80211_link_data_ap {
struct probe_resp __rcu *probe_resp;
struct fils_discovery_data __rcu *fils_discovery;
struct unsol_bcast_probe_resp_data __rcu *unsol_bcast_probe_resp;
+ struct s1g_short_beacon_data __rcu *s1g_short_beacon;
/* to be used after channel switch. */
struct cfg80211_beacon_data *next_beacon;
@@ -1226,8 +1236,25 @@ struct ieee80211_sub_if_data *vif_to_sdata(struct ieee80211_vif *p)
if ((_link = wiphy_dereference((_local)->hw.wiphy, \
___sdata->link[___link_id])))
+/*
+ * for_each_sdata_link_rcu() must be used under RCU read lock.
+ */
+#define for_each_sdata_link_rcu(_local, _link) \
+ /* outer loop just to define the variables ... */ \
+ for (struct ieee80211_sub_if_data *___sdata = NULL; \
+ !___sdata; \
+ ___sdata = (void *)~0 /* always stop */) \
+ list_for_each_entry_rcu(___sdata, &(_local)->interfaces, list) \
+ if (ieee80211_sdata_running(___sdata)) \
+ for (int ___link_id = 0; \
+ ___link_id < ARRAY_SIZE((___sdata)->link); \
+ ___link_id++) \
+ if ((_link = rcu_dereference((___sdata)->link[___link_id])))
+
#define for_each_link_data(sdata, __link) \
- struct ieee80211_sub_if_data *__sdata = sdata; \
+ /* outer loop just to define the variable ... */ \
+ for (struct ieee80211_sub_if_data *__sdata = (sdata); __sdata; \
+ __sdata = NULL /* always stop */) \
for (int __link_id = 0; \
__link_id < ARRAY_SIZE((__sdata)->link); __link_id++) \
if ((!(__sdata)->vif.valid_links || \
@@ -1235,6 +1262,19 @@ struct ieee80211_sub_if_data *vif_to_sdata(struct ieee80211_vif *p)
((__link) = sdata_dereference((__sdata)->link[__link_id], \
(__sdata))))
+/*
+ * for_each_link_data_rcu should be used under RCU read lock.
+ */
+#define for_each_link_data_rcu(sdata, __link) \
+ /* outer loop just to define the variable ... */ \
+ for (struct ieee80211_sub_if_data *__sdata = (sdata); __sdata; \
+ __sdata = NULL /* always stop */) \
+ for (int __link_id = 0; \
+ __link_id < ARRAY_SIZE((__sdata)->link); __link_id++) \
+ if ((!(__sdata)->vif.valid_links || \
+ (__sdata)->vif.valid_links & BIT(__link_id)) && \
+ ((__link) = rcu_dereference((__sdata)->link[__link_id]))) \
+
static inline int
ieee80211_get_mbssid_beacon_len(struct cfg80211_mbssid_elems *elems,
struct cfg80211_rnr_elems *rnr_elems,
@@ -1403,8 +1443,6 @@ struct ieee80211_local {
bool rx_mcast_action_reg;
unsigned int filter_flags; /* FIF_* */
- bool wiphy_ciphers_allocated;
-
struct cfg80211_chan_def dflt_chandef;
bool emulate_chanctx;
@@ -1872,7 +1910,8 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
struct ieee80211_rx_status *status,
unsigned int mpdu_len,
unsigned int mpdu_offset);
-int ieee80211_hw_config(struct ieee80211_local *local, u32 changed);
+int ieee80211_hw_config(struct ieee80211_local *local, int radio_idx,
+ u32 changed);
int ieee80211_hw_conf_chan(struct ieee80211_local *local);
void ieee80211_hw_conf_init(struct ieee80211_local *local);
void ieee80211_tx_set_protected(struct ieee80211_tx_data *tx);
@@ -2205,6 +2244,12 @@ u8 ieee80211_mcs_to_chains(const struct ieee80211_mcs_info *mcs);
enum nl80211_smps_mode
ieee80211_smps_mode_to_smps_mode(enum ieee80211_smps_mode smps);
+void ieee80211_ht_handle_chanwidth_notif(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct sta_info *sta,
+ struct link_sta_info *link_sta,
+ u8 chanwidth, enum nl80211_band band);
+
/* VHT */
void
ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
@@ -2269,6 +2314,9 @@ void ieee80211_s1g_rx_twt_action(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb);
void ieee80211_s1g_status_twt_action(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb);
+void ieee80211_s1g_cap_to_sta_s1g_cap(struct ieee80211_sub_if_data *sdata,
+ const struct ieee80211_s1g_cap *s1g_cap_ie,
+ struct link_sta_info *link_sta);
/* Spectrum management */
void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
@@ -2542,7 +2590,7 @@ static inline bool ieee80211_can_run_worker(struct ieee80211_local *local)
}
int ieee80211_txq_setup_flows(struct ieee80211_local *local);
-void ieee80211_txq_set_params(struct ieee80211_local *local);
+void ieee80211_txq_set_params(struct ieee80211_local *local, int radio_idx);
void ieee80211_txq_teardown_flows(struct ieee80211_local *local);
void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta,
@@ -2638,6 +2686,8 @@ int ieee80211_put_eht_cap(struct sk_buff *skb,
struct ieee80211_sub_if_data *sdata,
const struct ieee80211_supported_band *sband,
const struct ieee80211_conn_settings *conn);
+int ieee80211_put_reg_conn(struct sk_buff *skb,
+ enum ieee80211_channel_flags flags);
/* channel management */
bool ieee80211_chandef_ht_oper(const struct ieee80211_ht_operation *ht_oper,
@@ -2691,7 +2741,7 @@ ieee80211_link_reserve_chanctx(struct ieee80211_link_data *link,
bool radar_required);
int __must_check
ieee80211_link_use_reserved_context(struct ieee80211_link_data *link);
-int ieee80211_link_unreserve_chanctx(struct ieee80211_link_data *link);
+void ieee80211_link_unreserve_chanctx(struct ieee80211_link_data *link);
int __must_check
ieee80211_link_change_chanreq(struct ieee80211_link_data *link,
@@ -2712,7 +2762,11 @@ void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local,
struct ieee80211_chanctx *ctx,
struct ieee80211_link_data *rsvd_for,
bool check_reserved);
-bool ieee80211_is_radar_required(struct ieee80211_local *local);
+bool ieee80211_is_radar_required(struct ieee80211_local *local,
+ struct cfg80211_scan_request *req);
+bool ieee80211_is_radio_idx_in_scan_req(struct wiphy *wiphy,
+ struct cfg80211_scan_request *scan_req,
+ int radio_idx);
void ieee80211_dfs_cac_timer_work(struct wiphy *wiphy, struct wiphy_work *work);
void ieee80211_dfs_cac_cancel(struct ieee80211_local *local,
@@ -2721,9 +2775,8 @@ void ieee80211_dfs_radar_detected_work(struct wiphy *wiphy,
struct wiphy_work *work);
int ieee80211_send_action_csa(struct ieee80211_sub_if_data *sdata,
struct cfg80211_csa_settings *csa_settings);
-
-void ieee80211_recalc_dtim(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata);
+void ieee80211_recalc_sb_count(struct ieee80211_sub_if_data *sdata, u64 tsf);
+void ieee80211_recalc_dtim(struct ieee80211_sub_if_data *sdata, u64 tsf);
int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata,
const struct cfg80211_chan_def *chandef,
enum ieee80211_chanctx_mode chanmode,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 7c27f3cd841c..07ba68f7cd81 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -146,7 +146,7 @@ void ieee80211_recalc_idle(struct ieee80211_local *local)
{
u32 change = __ieee80211_recalc_idle(local, false);
if (change)
- ieee80211_hw_config(local, change);
+ ieee80211_hw_config(local, -1, change);
}
static int ieee80211_verify_mac(struct ieee80211_sub_if_data *sdata, u8 *addr,
@@ -726,7 +726,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
/* do after stop to avoid reconfiguring when we stop anyway */
ieee80211_configure_filter(local);
- ieee80211_hw_config(local, hw_reconf_flags);
+ ieee80211_hw_config(local, -1, hw_reconf_flags);
if (local->virt_monitors == local->open_count)
ieee80211_add_virtual_monitor(local);
@@ -1150,6 +1150,8 @@ static void ieee80211_sdata_init(struct ieee80211_local *local,
{
sdata->local = local;
+ INIT_LIST_HEAD(&sdata->key_list);
+
/*
* Initialize the default link, so we can use link_id 0 for non-MLD,
* and that continues to work for non-MLD-aware drivers that use just
@@ -1491,7 +1493,7 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
if (local->open_count == 1)
ieee80211_hw_conf_init(local);
else if (hw_reconf_flags)
- ieee80211_hw_config(local, hw_reconf_flags);
+ ieee80211_hw_config(local, -1, hw_reconf_flags);
ieee80211_recalc_ps(local);
@@ -1555,6 +1557,35 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local,
}
}
} else if (ieee80211_is_action(mgmt->frame_control) &&
+ mgmt->u.action.category == WLAN_CATEGORY_HT) {
+ switch (mgmt->u.action.u.ht_smps.action) {
+ case WLAN_HT_ACTION_NOTIFY_CHANWIDTH: {
+ u8 chanwidth = mgmt->u.action.u.ht_notify_cw.chanwidth;
+ struct ieee80211_rx_status *status;
+ struct link_sta_info *link_sta;
+ struct sta_info *sta;
+
+ sta = sta_info_get_bss(sdata, mgmt->sa);
+ if (!sta)
+ break;
+
+ status = IEEE80211_SKB_RXCB(skb);
+ if (!status->link_valid)
+ link_sta = &sta->deflink;
+ else
+ link_sta = rcu_dereference_protected(sta->link[status->link_id],
+ lockdep_is_held(&local->hw.wiphy->mtx));
+ if (link_sta)
+ ieee80211_ht_handle_chanwidth_notif(local, sdata, sta,
+ link_sta, chanwidth,
+ status->band);
+ break;
+ }
+ default:
+ WARN_ON(1);
+ break;
+ }
+ } else if (ieee80211_is_action(mgmt->frame_control) &&
mgmt->u.action.category == WLAN_CATEGORY_VHT) {
switch (mgmt->u.action.u.vht_group_notif.action_code) {
case WLAN_VHT_ACTION_OPMODE_NOTIF: {
@@ -2210,8 +2241,6 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
ieee80211_init_frag_cache(&sdata->frags);
- INIT_LIST_HEAD(&sdata->key_list);
-
wiphy_delayed_work_init(&sdata->dec_tailroom_needed_wk,
ieee80211_delayed_tailroom_dec);
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index dcf8643a0baa..b14e9cd9713f 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -6,7 +6,7 @@
* Copyright 2007-2008 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2015-2017 Intel Deutschland GmbH
- * Copyright 2018-2020, 2022-2024 Intel Corporation
+ * Copyright 2018-2020, 2022-2025 Intel Corporation
*/
#include <crypto/utils.h>
@@ -510,7 +510,8 @@ static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
} else {
if (!new->local->wowlan)
ret = ieee80211_key_enable_hw_accel(new);
- else
+ else if (link_id < 0 || !sdata->vif.active_links ||
+ BIT(link_id) & sdata->vif.active_links)
new->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE;
}
@@ -1353,38 +1354,14 @@ void ieee80211_set_key_rx_seq(struct ieee80211_key_conf *keyconf,
}
EXPORT_SYMBOL_GPL(ieee80211_set_key_rx_seq);
-void ieee80211_remove_key(struct ieee80211_key_conf *keyconf)
-{
- struct ieee80211_key *key;
-
- key = container_of(keyconf, struct ieee80211_key, conf);
-
- lockdep_assert_wiphy(key->local->hw.wiphy);
-
- /*
- * if key was uploaded, we assume the driver will/has remove(d)
- * it, so adjust bookkeeping accordingly
- */
- if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) {
- key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE;
-
- if (!(key->conf.flags & (IEEE80211_KEY_FLAG_GENERATE_MMIC |
- IEEE80211_KEY_FLAG_PUT_MIC_SPACE |
- IEEE80211_KEY_FLAG_RESERVE_TAILROOM)))
- increment_tailroom_need_count(key->sdata);
- }
-
- ieee80211_key_free(key, false);
-}
-EXPORT_SYMBOL_GPL(ieee80211_remove_key);
-
struct ieee80211_key_conf *
ieee80211_gtk_rekey_add(struct ieee80211_vif *vif,
- struct ieee80211_key_conf *keyconf,
+ u8 idx, u8 *key_data, u8 key_len,
int link_id)
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
struct ieee80211_local *local = sdata->local;
+ struct ieee80211_key *prev_key;
struct ieee80211_key *key;
int err;
struct ieee80211_link_data *link_data =
@@ -1400,8 +1377,37 @@ ieee80211_gtk_rekey_add(struct ieee80211_vif *vif,
if (WARN_ON(vif->type != NL80211_IFTYPE_STATION))
return ERR_PTR(-EINVAL);
- key = ieee80211_key_alloc(keyconf->cipher, keyconf->keyidx,
- keyconf->keylen, keyconf->key,
+ if (WARN_ON(idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS +
+ NUM_DEFAULT_BEACON_KEYS))
+ return ERR_PTR(-EINVAL);
+
+ prev_key = wiphy_dereference(local->hw.wiphy,
+ link_data->gtk[idx]);
+ if (!prev_key) {
+ if (idx < NUM_DEFAULT_KEYS) {
+ for (int i = 0; i < NUM_DEFAULT_KEYS; i++) {
+ if (i == idx)
+ continue;
+ prev_key = wiphy_dereference(local->hw.wiphy,
+ link_data->gtk[i]);
+ if (prev_key)
+ break;
+ }
+ } else {
+ /* For IGTK we have 4 and 5 and for BIGTK - 6 and 7 */
+ prev_key = wiphy_dereference(local->hw.wiphy,
+ link_data->gtk[idx ^ 1]);
+ }
+ }
+
+ if (WARN_ON(!prev_key))
+ return ERR_PTR(-EINVAL);
+
+ if (WARN_ON(key_len < prev_key->conf.keylen))
+ return ERR_PTR(-EINVAL);
+
+ key = ieee80211_key_alloc(prev_key->conf.cipher, idx,
+ prev_key->conf.keylen, key_data,
0, NULL);
if (IS_ERR(key))
return ERR_CAST(key);
diff --git a/net/mac80211/link.c b/net/mac80211/link.c
index d40c2bd3b50b..d71eabe5abf8 100644
--- a/net/mac80211/link.c
+++ b/net/mac80211/link.c
@@ -2,7 +2,7 @@
/*
* MLO link handling
*
- * Copyright (C) 2022-2024 Intel Corporation
+ * Copyright (C) 2022-2025 Intel Corporation
*/
#include <linux/slab.h>
#include <linux/kernel.h>
@@ -93,9 +93,6 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata,
if (link_id < 0)
link_id = 0;
- rcu_assign_pointer(sdata->vif.link_conf[link_id], link_conf);
- rcu_assign_pointer(sdata->link[link_id], link);
-
if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) {
struct ieee80211_sub_if_data *ap_bss;
struct ieee80211_bss_conf *ap_bss_conf;
@@ -145,6 +142,9 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata,
ieee80211_link_debugfs_add(link);
}
+
+ rcu_assign_pointer(sdata->vif.link_conf[link_id], link_conf);
+ rcu_assign_pointer(sdata->link[link_id], link);
}
void ieee80211_link_stop(struct ieee80211_link_data *link)
@@ -368,6 +368,13 @@ static int ieee80211_vif_update_links(struct ieee80211_sub_if_data *sdata,
ieee80211_update_apvlan_links(sdata);
}
+ /*
+ * Ignore errors if we are only removing links as removal should
+ * always succeed
+ */
+ if (!new_links)
+ ret = 0;
+
if (ret) {
/* restore config */
memcpy(sdata->link, old_data, sizeof(old_data));
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 6b6de43d9420..9c8f18b258a6 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -5,7 +5,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2024 Intel Corporation
+ * Copyright (C) 2018-2025 Intel Corporation
*/
#include <net/mac80211.h>
@@ -190,7 +190,8 @@ static u32 ieee80211_calc_hw_conf_chan(struct ieee80211_local *local,
return changed;
}
-int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
+int ieee80211_hw_config(struct ieee80211_local *local, int radio_idx,
+ u32 changed)
{
int ret = 0;
@@ -201,7 +202,7 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
IEEE80211_CONF_CHANGE_SMPS));
if (changed && local->open_count) {
- ret = drv_config(local, changed);
+ ret = drv_config(local, radio_idx, changed);
/*
* Goal:
* HW reconfiguration should never fail, the driver has told
@@ -235,7 +236,7 @@ static int _ieee80211_hw_conf_chan(struct ieee80211_local *local,
if (!changed)
return 0;
- return drv_config(local, changed);
+ return drv_config(local, -1, changed);
}
int ieee80211_hw_conf_chan(struct ieee80211_local *local)
@@ -269,7 +270,7 @@ void ieee80211_hw_conf_init(struct ieee80211_local *local)
ctx ? &ctx->conf : NULL);
}
- WARN_ON(drv_config(local, changed));
+ WARN_ON(drv_config(local, -1, changed));
}
int ieee80211_emulate_add_chanctx(struct ieee80211_hw *hw,
@@ -407,9 +408,20 @@ void ieee80211_link_info_change_notify(struct ieee80211_sub_if_data *sdata,
WARN_ON_ONCE(changed & BSS_CHANGED_VIF_CFG_FLAGS);
- if (!changed || sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+ if (!changed)
return;
+ switch (sdata->vif.type) {
+ case NL80211_IFTYPE_AP_VLAN:
+ return;
+ case NL80211_IFTYPE_MONITOR:
+ if (!ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF))
+ return;
+ break;
+ default:
+ break;
+ }
+
if (!check_sdata_in_driver(sdata))
return;
@@ -1024,12 +1036,9 @@ EXPORT_SYMBOL(ieee80211_alloc_hw_nm);
static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
{
- bool have_wep = !fips_enabled; /* FIPS does not permit the use of RC4 */
bool have_mfp = ieee80211_hw_check(&local->hw, MFP_CAPABLE);
- int r = 0, w = 0;
- u32 *suites;
static const u32 cipher_suites[] = {
- /* keep WEP first, it may be removed below */
+ /* keep WEP and TKIP first, they may be removed below */
WLAN_CIPHER_SUITE_WEP40,
WLAN_CIPHER_SUITE_WEP104,
WLAN_CIPHER_SUITE_TKIP,
@@ -1045,34 +1054,17 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
WLAN_CIPHER_SUITE_BIP_GMAC_256,
};
- if (ieee80211_hw_check(&local->hw, SW_CRYPTO_CONTROL) ||
- local->hw.wiphy->cipher_suites) {
- /* If the driver advertises, or doesn't support SW crypto,
- * we only need to remove WEP if necessary.
- */
- if (have_wep)
- return 0;
-
- /* well if it has _no_ ciphers ... fine */
- if (!local->hw.wiphy->n_cipher_suites)
- return 0;
-
- /* Driver provides cipher suites, but we need to exclude WEP */
- suites = kmemdup_array(local->hw.wiphy->cipher_suites,
- local->hw.wiphy->n_cipher_suites,
- sizeof(u32), GFP_KERNEL);
- if (!suites)
- return -ENOMEM;
-
- for (r = 0; r < local->hw.wiphy->n_cipher_suites; r++) {
- u32 suite = local->hw.wiphy->cipher_suites[r];
-
- if (suite == WLAN_CIPHER_SUITE_WEP40 ||
- suite == WLAN_CIPHER_SUITE_WEP104)
- continue;
- suites[w++] = suite;
- }
- } else {
+ if (ieee80211_hw_check(&local->hw, SW_CRYPTO_CONTROL) && fips_enabled) {
+ dev_err(local->hw.wiphy->dev.parent,
+ "Drivers with SW_CRYPTO_CONTROL cannot work with FIPS\n");
+ return -EINVAL;
+ }
+
+ if (WARN_ON(ieee80211_hw_check(&local->hw, SW_CRYPTO_CONTROL) &&
+ !local->hw.wiphy->cipher_suites))
+ return -EINVAL;
+
+ if (fips_enabled || !local->hw.wiphy->cipher_suites) {
/* assign the (software supported and perhaps offloaded)
* cipher suites
*/
@@ -1082,19 +1074,13 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
if (!have_mfp)
local->hw.wiphy->n_cipher_suites -= 4;
- if (!have_wep) {
- local->hw.wiphy->cipher_suites += 2;
- local->hw.wiphy->n_cipher_suites -= 2;
+ /* FIPS does not permit the use of RC4 */
+ if (fips_enabled) {
+ local->hw.wiphy->cipher_suites += 3;
+ local->hw.wiphy->n_cipher_suites -= 3;
}
-
- /* not dynamically allocated, so just return */
- return 0;
}
- local->hw.wiphy->cipher_suites = suites;
- local->hw.wiphy->n_cipher_suites = w;
- local->wiphy_ciphers_allocated = true;
-
return 0;
}
@@ -1359,7 +1345,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
GFP_KERNEL);
if (!local->int_scan_req)
return -ENOMEM;
- local->int_scan_req->n_channels = channels;
eth_broadcast_addr(local->int_scan_req->bssid);
@@ -1650,10 +1635,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
ieee80211_led_exit(local);
destroy_workqueue(local->workqueue);
fail_workqueue:
- if (local->wiphy_ciphers_allocated) {
- kfree(local->hw.wiphy->cipher_suites);
- local->wiphy_ciphers_allocated = false;
- }
kfree(local->int_scan_req);
return result;
}
@@ -1724,11 +1705,6 @@ void ieee80211_free_hw(struct ieee80211_hw *hw)
mutex_destroy(&local->iflist_mtx);
- if (local->wiphy_ciphers_allocated) {
- kfree(local->hw.wiphy->cipher_suites);
- local->wiphy_ciphers_allocated = false;
- }
-
idr_for_each(&local->ack_status_frames,
ieee80211_free_ack_frame, NULL);
idr_destroy(&local->ack_status_frames);
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index d00d9d413c5c..a4a715f6f1c3 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -1202,7 +1202,7 @@ int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata)
return -ENOMEM;
}
- ieee80211_recalc_dtim(local, sdata);
+ ieee80211_recalc_dtim(sdata, drv_get_tsf(local, sdata));
ieee80211_link_info_change_notify(sdata, &sdata->deflink, changed);
netif_carrier_on(sdata->dev);
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 2d46d4af60d7..1008eb8e9b13 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -776,10 +776,6 @@ static bool ieee80211_chandef_usable(struct ieee80211_sub_if_data *sdata,
ieee80211_hw_check(&sdata->local->hw, DISALLOW_PUNCTURING))
return false;
- if (chandef->punctured && chandef->chan->band == NL80211_BAND_5GHZ &&
- ieee80211_hw_check(&sdata->local->hw, DISALLOW_PUNCTURING_5GHZ))
- return false;
-
return true;
}
@@ -1218,18 +1214,36 @@ EXPORT_SYMBOL_IF_MAC80211_KUNIT(ieee80211_determine_chan_mode);
static int ieee80211_config_bw(struct ieee80211_link_data *link,
struct ieee802_11_elems *elems,
- bool update, u64 *changed,
- const char *frame)
+ bool update, u64 *changed, u16 stype)
{
struct ieee80211_channel *channel = link->conf->chanreq.oper.chan;
struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_chan_req chanreq = {};
struct cfg80211_chan_def ap_chandef;
enum ieee80211_conn_mode ap_mode;
+ const char *frame;
u32 vht_cap_info = 0;
u16 ht_opmode;
int ret;
+ switch (stype) {
+ case IEEE80211_STYPE_BEACON:
+ frame = "beacon";
+ break;
+ case IEEE80211_STYPE_ASSOC_RESP:
+ frame = "assoc response";
+ break;
+ case IEEE80211_STYPE_REASSOC_RESP:
+ frame = "reassoc response";
+ break;
+ case IEEE80211_STYPE_ACTION:
+ /* the only action frame that gets here */
+ frame = "ML reconf response";
+ break;
+ default:
+ return -EINVAL;
+ }
+
/* don't track any bandwidth changes in legacy/S1G modes */
if (link->u.mgd.conn.mode == IEEE80211_CONN_MODE_LEGACY ||
link->u.mgd.conn.mode == IEEE80211_CONN_MODE_S1G)
@@ -1278,7 +1292,9 @@ static int ieee80211_config_bw(struct ieee80211_link_data *link,
ieee80211_min_bw_limit_from_chandef(&chanreq.oper))
ieee80211_chandef_downgrade(&chanreq.oper, NULL);
- if (ap_chandef.chan->band == NL80211_BAND_6GHZ &&
+ /* TPE element is not present in (re)assoc/ML reconfig response */
+ if (stype == IEEE80211_STYPE_BEACON &&
+ ap_chandef.chan->band == NL80211_BAND_6GHZ &&
link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_HE) {
ieee80211_rearrange_tpe(&elems->tpe, &ap_chandef,
&chanreq.oper);
@@ -1645,6 +1661,30 @@ static size_t ieee80211_add_before_he_elems(struct sk_buff *skb,
return noffset;
}
+static size_t ieee80211_add_before_reg_conn(struct sk_buff *skb,
+ const u8 *elems, size_t elems_len,
+ size_t offset)
+{
+ static const u8 before_reg_conn[] = {
+ /*
+ * no need to list the ones split off before HE
+ * or generated here
+ */
+ WLAN_EID_EXTENSION, WLAN_EID_EXT_DH_PARAMETER,
+ WLAN_EID_EXTENSION, WLAN_EID_EXT_KNOWN_STA_IDENTIFCATION,
+ };
+ size_t noffset;
+
+ if (!elems_len)
+ return offset;
+
+ noffset = ieee80211_ie_split(elems, elems_len, before_reg_conn,
+ ARRAY_SIZE(before_reg_conn), offset);
+ skb_put_data(skb, elems + offset, noffset - offset);
+
+ return noffset;
+}
+
#define PRESENT_ELEMS_MAX 8
#define PRESENT_ELEM_EXT_OFFS 0x100
@@ -1806,6 +1846,22 @@ ieee80211_add_link_elems(struct ieee80211_sub_if_data *sdata,
}
/*
+ * if present, add any custom IEs that go before regulatory
+ * connectivity element
+ */
+ offset = ieee80211_add_before_reg_conn(skb, extra_elems,
+ extra_elems_len, offset);
+
+ if (sband->band == NL80211_BAND_6GHZ) {
+ /*
+ * as per Section E.2.7 of IEEE 802.11 REVme D7.0, non-AP STA
+ * capable of operating on the 6 GHz band shall transmit
+ * regulatory connectivity element.
+ */
+ ieee80211_put_reg_conn(skb, chan->flags);
+ }
+
+ /*
* careful - need to know about all the present elems before
* calling ieee80211_assoc_add_ml_elem(), so add this one if
* we're going to put it after the ML element
@@ -1943,14 +1999,7 @@ ieee80211_assoc_add_ml_elem(struct ieee80211_sub_if_data *sdata,
}
skb_put_data(skb, &mld_capa_ops, sizeof(mld_capa_ops));
- /* Many APs have broken parsing of the extended MLD capa/ops field,
- * dropping (re-)association request frames or replying with association
- * response with a failure status if it's present. Without a clear
- * indication as to whether the AP supports parsing this field or not do
- * not include it in the common information unless strict mode is set.
- */
- if (ieee80211_hw_check(&local->hw, STRICT) &&
- assoc_data->ext_mld_capa_ops) {
+ if (assoc_data->ext_mld_capa_ops) {
ml_elem->control |=
cpu_to_le16(IEEE80211_MLC_BASIC_PRES_EXT_MLD_CAPA_OP);
common->len += 2;
@@ -2381,9 +2430,26 @@ static void ieee80211_csa_switch_work(struct wiphy *wiphy,
* update cfg80211 directly.
*/
if (!ieee80211_vif_link_active(&sdata->vif, link->link_id)) {
+ struct link_sta_info *link_sta;
+ struct sta_info *ap_sta;
+
link->conf->chanreq = link->csa.chanreq;
cfg80211_ch_switch_notify(sdata->dev, &link->csa.chanreq.oper,
link->link_id);
+ link->conf->csa_active = false;
+
+ ap_sta = sta_info_get(sdata, sdata->vif.cfg.ap_addr);
+ if (WARN_ON(!ap_sta))
+ return;
+
+ link_sta = wiphy_dereference(wiphy,
+ ap_sta->link[link->link_id]);
+ if (WARN_ON(!link_sta))
+ return;
+
+ link_sta->pub->bandwidth =
+ _ieee80211_sta_cur_vht_bw(link_sta,
+ &link->csa.chanreq.oper);
return;
}
@@ -2439,6 +2505,21 @@ static void ieee80211_csa_switch_work(struct wiphy *wiphy,
}
}
+ /*
+ * It is not necessary to reset these timers if any link does not
+ * have an active CSA and that link still receives the beacons
+ * when other links have active CSA.
+ */
+ for_each_link_data(sdata, link) {
+ if (!link->conf->csa_active)
+ return;
+ }
+
+ /*
+ * Reset the beacon monitor and connection monitor timers when CSA
+ * is active for all links in MLO when channel switch occurs in all
+ * the links.
+ */
ieee80211_sta_reset_beacon_monitor(sdata);
ieee80211_sta_reset_conn_monitor(sdata);
}
@@ -2515,7 +2596,8 @@ ieee80211_sta_abort_chanswitch(struct ieee80211_link_data *link)
if (!local->ops->abort_channel_switch)
return;
- ieee80211_link_unreserve_chanctx(link);
+ if (rcu_access_pointer(link->conf->chanctx_conf))
+ ieee80211_link_unreserve_chanctx(link);
ieee80211_vif_unblock_queues_csa(sdata);
@@ -3181,7 +3263,7 @@ static void ieee80211_enable_ps(struct ieee80211_local *local,
return;
conf->flags |= IEEE80211_CONF_PS;
- ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
+ ieee80211_hw_config(local, -1, IEEE80211_CONF_CHANGE_PS);
}
}
@@ -3193,7 +3275,7 @@ static void ieee80211_change_ps(struct ieee80211_local *local)
ieee80211_enable_ps(local, local->ps_sdata);
} else if (conf->flags & IEEE80211_CONF_PS) {
conf->flags &= ~IEEE80211_CONF_PS;
- ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
+ ieee80211_hw_config(local, -1, IEEE80211_CONF_CHANGE_PS);
timer_delete_sync(&local->dynamic_ps_timer);
wiphy_work_cancel(local->hw.wiphy,
&local->dynamic_ps_enable_work);
@@ -3302,7 +3384,7 @@ void ieee80211_dynamic_ps_disable_work(struct wiphy *wiphy,
if (local->hw.conf.flags & IEEE80211_CONF_PS) {
local->hw.conf.flags &= ~IEEE80211_CONF_PS;
- ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
+ ieee80211_hw_config(local, -1, IEEE80211_CONF_CHANGE_PS);
}
ieee80211_wake_queues_by_reason(&local->hw,
@@ -3377,7 +3459,7 @@ void ieee80211_dynamic_ps_enable_work(struct wiphy *wiphy,
(ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED)) {
ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED;
local->hw.conf.flags |= IEEE80211_CONF_PS;
- ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
+ ieee80211_hw_config(local, -1, IEEE80211_CONF_CHANGE_PS);
}
}
@@ -3934,6 +4016,9 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
lockdep_assert_wiphy(local->hw.wiphy);
+ if (frame_buf)
+ memset(frame_buf, 0, IEEE80211_DEAUTH_FRAME_LEN);
+
if (WARN_ON(!ap_sta))
return;
@@ -3986,7 +4071,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
*/
if (local->hw.conf.flags & IEEE80211_CONF_PS) {
local->hw.conf.flags &= ~IEEE80211_CONF_PS;
- ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
+ ieee80211_hw_config(local, -1, IEEE80211_CONF_CHANGE_PS);
}
local->ps_sdata = NULL;
@@ -4282,9 +4367,6 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
lockdep_assert_wiphy(sdata->local->hw.wiphy);
- if (WARN_ON(ieee80211_vif_is_mld(&sdata->vif)))
- return;
-
/*
* Try sending broadcast probe requests for the last three
* probe requests after the first ones failed since some
@@ -4330,9 +4412,6 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata,
lockdep_assert_wiphy(sdata->local->hw.wiphy);
- if (WARN_ON_ONCE(ieee80211_vif_is_mld(&sdata->vif)))
- return;
-
if (!ieee80211_sdata_running(sdata))
return;
@@ -4734,6 +4813,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
struct ieee80211_prep_tx_info info = {
.subtype = IEEE80211_STYPE_AUTH,
};
+ bool sae_need_confirm = false;
lockdep_assert_wiphy(sdata->local->hw.wiphy);
@@ -4779,6 +4859,8 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
jiffies + IEEE80211_AUTH_WAIT_SAE_RETRY;
ifmgd->auth_data->timeout_started = true;
run_again(sdata, ifmgd->auth_data->timeout);
+ if (auth_transaction == 1)
+ sae_need_confirm = true;
goto notify_driver;
}
@@ -4822,6 +4904,9 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
if (!ieee80211_mark_sta_auth(sdata))
return; /* ignore frame -- wait for timeout */
} else if (ifmgd->auth_data->algorithm == WLAN_AUTH_SAE &&
+ auth_transaction == 1) {
+ sae_need_confirm = true;
+ } else if (ifmgd->auth_data->algorithm == WLAN_AUTH_SAE &&
auth_transaction == 2) {
sdata_info(sdata, "SAE peer confirmed\n");
ifmgd->auth_data->peer_confirmed = true;
@@ -4829,7 +4914,8 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len);
notify_driver:
- drv_mgd_complete_tx(sdata->local, sdata, &info);
+ if (!sae_need_confirm)
+ drv_mgd_complete_tx(sdata->local, sdata, &info);
}
#define case_WLAN(type) \
@@ -5291,7 +5377,9 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
/* check/update if AP changed anything in assoc response vs. scan */
if (ieee80211_config_bw(link, elems,
link_id == assoc_data->assoc_link_id,
- changed, "assoc response")) {
+ changed,
+ le16_to_cpu(mgmt->frame_control) &
+ IEEE80211_FCTL_STYPE)) {
ret = false;
goto out;
}
@@ -5399,6 +5487,12 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
bss_conf->epcs_support = false;
}
+ if (elems->s1g_oper &&
+ link->u.mgd.conn.mode == IEEE80211_CONN_MODE_S1G &&
+ elems->s1g_capab)
+ ieee80211_s1g_cap_to_sta_s1g_cap(sdata, elems->s1g_capab,
+ link_sta);
+
bss_conf->twt_broadcast =
ieee80211_twt_bcast_support(sdata, bss_conf, sband, link_sta);
@@ -5919,6 +6013,7 @@ ieee80211_ap_power_type(u8 control)
return IEEE80211_REG_LPI_AP;
case IEEE80211_6GHZ_CTRL_REG_SP_AP:
case IEEE80211_6GHZ_CTRL_REG_INDOOR_SP_AP:
+ case IEEE80211_6GHZ_CTRL_REG_INDOOR_SP_AP_OLD:
return IEEE80211_REG_SP_AP;
case IEEE80211_6GHZ_CTRL_REG_VLP_AP:
return IEEE80211_REG_VLP_AP;
@@ -7195,6 +7290,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
struct ieee80211_bss_conf *bss_conf = link->conf;
struct ieee80211_vif_cfg *vif_cfg = &sdata->vif.cfg;
struct ieee80211_mgmt *mgmt = (void *) hdr;
+ struct ieee80211_ext *ext = NULL;
size_t baselen;
struct ieee802_11_elems *elems;
struct ieee80211_local *local = sdata->local;
@@ -7220,7 +7316,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
/* Process beacon from the current BSS */
bssid = ieee80211_get_bssid(hdr, len, sdata->vif.type);
if (ieee80211_is_s1g_beacon(mgmt->frame_control)) {
- struct ieee80211_ext *ext = (void *) mgmt;
+ ext = (void *)mgmt;
variable = ext->u.s1g_beacon.variable +
ieee80211_s1g_optional_len(ext->frame_control);
}
@@ -7340,7 +7436,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
if (local->hw.conf.dynamic_ps_timeout > 0) {
if (local->hw.conf.flags & IEEE80211_CONF_PS) {
local->hw.conf.flags &= ~IEEE80211_CONF_PS;
- ieee80211_hw_config(local,
+ ieee80211_hw_config(local, -1,
IEEE80211_CONF_CHANGE_PS);
}
ieee80211_send_nullfunc(local, sdata, false);
@@ -7407,7 +7503,9 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
}
if ((ncrc == link->u.mgd.beacon_crc && link->u.mgd.beacon_crc_valid) ||
- ieee80211_is_s1g_short_beacon(mgmt->frame_control))
+ (ext && ieee80211_is_s1g_short_beacon(ext->frame_control,
+ parse_params.start,
+ parse_params.len)))
goto free;
link->u.mgd.beacon_crc = ncrc;
link->u.mgd.beacon_crc_valid = true;
@@ -7476,7 +7574,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
changed |= ieee80211_recalc_twt_req(sdata, sband, link, link_sta, elems);
- if (ieee80211_config_bw(link, elems, true, &changed, "beacon")) {
+ if (ieee80211_config_bw(link, elems, true, &changed,
+ IEEE80211_STYPE_BEACON)) {
ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH,
WLAN_REASON_DEAUTH_LEAVING,
true, deauth_buf);
@@ -8383,16 +8482,32 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
}
}
+static bool
+ieee80211_is_csa_in_progress(struct ieee80211_sub_if_data *sdata)
+{
+ /*
+ * In MLO, check the CSA flags 'active' and 'waiting_bcn' for all
+ * the links.
+ */
+ struct ieee80211_link_data *link;
+
+ guard(rcu)();
+
+ for_each_link_data_rcu(sdata, link) {
+ if (!(link->conf->csa_active &&
+ !link->u.mgd.csa.waiting_bcn))
+ return false;
+ }
+
+ return true;
+}
+
static void ieee80211_sta_bcn_mon_timer(struct timer_list *t)
{
struct ieee80211_sub_if_data *sdata =
timer_container_of(sdata, t, u.mgd.bcn_mon_timer);
- if (WARN_ON(ieee80211_vif_is_mld(&sdata->vif)))
- return;
-
- if (sdata->vif.bss_conf.csa_active &&
- !sdata->deflink.u.mgd.csa.waiting_bcn)
+ if (ieee80211_is_csa_in_progress(sdata))
return;
if (sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER)
@@ -8403,36 +8518,69 @@ static void ieee80211_sta_bcn_mon_timer(struct timer_list *t)
&sdata->u.mgd.beacon_connection_loss_work);
}
+static unsigned long
+ieee80211_latest_active_link_conn_timeout(struct ieee80211_sub_if_data *sdata)
+{
+ unsigned long latest_timeout = jiffies;
+ unsigned int link_id;
+ struct sta_info *sta;
+
+ guard(rcu)();
+
+ sta = sta_info_get(sdata, sdata->vif.cfg.ap_addr);
+ if (!sta)
+ return 0;
+
+ for (link_id = 0; link_id < ARRAY_SIZE(sta->link);
+ link_id++) {
+ struct link_sta_info *link_sta;
+ unsigned long timeout;
+
+ link_sta = rcu_dereference(sta->link[link_id]);
+ if (!link_sta)
+ continue;
+
+ timeout = link_sta->status_stats.last_ack;
+ if (time_before(timeout, link_sta->rx_stats.last_rx))
+ timeout = link_sta->rx_stats.last_rx;
+
+ timeout += IEEE80211_CONNECTION_IDLE_TIME;
+
+ /*
+ * latest_timeout holds the timeout of the link
+ * that will expire last among all links in an
+ * non-AP MLD STA. This ensures that the connection
+ * monitor timer is only reset if at least one link
+ * is still active, and it is scheduled to fire at
+ * the latest possible timeout.
+ */
+ if (time_after(timeout, latest_timeout))
+ latest_timeout = timeout;
+ }
+
+ return latest_timeout;
+}
+
static void ieee80211_sta_conn_mon_timer(struct timer_list *t)
{
struct ieee80211_sub_if_data *sdata =
timer_container_of(sdata, t, u.mgd.conn_mon_timer);
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
struct ieee80211_local *local = sdata->local;
- struct sta_info *sta;
- unsigned long timeout;
+ unsigned long latest_timeout;
- if (WARN_ON(ieee80211_vif_is_mld(&sdata->vif)))
+ if (ieee80211_is_csa_in_progress(sdata))
return;
- if (sdata->vif.bss_conf.csa_active &&
- !sdata->deflink.u.mgd.csa.waiting_bcn)
- return;
+ latest_timeout = ieee80211_latest_active_link_conn_timeout(sdata);
- sta = sta_info_get(sdata, sdata->vif.cfg.ap_addr);
- if (!sta)
- return;
-
- timeout = sta->deflink.status_stats.last_ack;
- if (time_before(sta->deflink.status_stats.last_ack, sta->deflink.rx_stats.last_rx))
- timeout = sta->deflink.rx_stats.last_rx;
- timeout += IEEE80211_CONNECTION_IDLE_TIME;
-
- /* If timeout is after now, then update timer to fire at
+ /*
+ * If latest timeout is after now, then update timer to fire at
* the later date, but do not actually probe at this time.
*/
- if (time_is_after_jiffies(timeout)) {
- mod_timer(&ifmgd->conn_mon_timer, round_jiffies_up(timeout));
+ if (time_is_after_jiffies(latest_timeout)) {
+ mod_timer(&ifmgd->conn_mon_timer,
+ round_jiffies_up(latest_timeout));
return;
}
@@ -8692,21 +8840,33 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
bool have_sta = false;
bool mlo;
int err;
+ u16 new_links;
if (link_id >= 0) {
mlo = true;
if (WARN_ON(!ap_mld_addr))
return -EINVAL;
- err = ieee80211_vif_set_links(sdata, BIT(link_id), 0);
+ new_links = BIT(link_id);
} else {
if (WARN_ON(ap_mld_addr))
return -EINVAL;
ap_mld_addr = cbss->bssid;
- err = ieee80211_vif_set_links(sdata, 0, 0);
+ new_links = 0;
link_id = 0;
mlo = false;
}
+ if (assoc) {
+ rcu_read_lock();
+ have_sta = sta_info_get(sdata, ap_mld_addr);
+ rcu_read_unlock();
+ }
+
+ if (mlo && !have_sta &&
+ WARN_ON(sdata->vif.valid_links || sdata->vif.active_links))
+ return -EINVAL;
+
+ err = ieee80211_vif_set_links(sdata, new_links, 0);
if (err)
return err;
@@ -8727,12 +8887,6 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
goto out_err;
}
- if (assoc) {
- rcu_read_lock();
- have_sta = sta_info_get(sdata, ap_mld_addr);
- rcu_read_unlock();
- }
-
if (!have_sta) {
if (mlo)
new_sta = sta_info_alloc_with_link(sdata, ap_mld_addr,
@@ -9332,6 +9486,39 @@ out_rcu:
return err;
}
+static bool
+ieee80211_mgd_assoc_bss_has_mld_ext_capa_ops(struct cfg80211_assoc_request *req)
+{
+ const struct cfg80211_bss_ies *ies;
+ struct cfg80211_bss *bss;
+ const struct element *ml;
+
+ /* not an MLO connection if link_id < 0, so irrelevant */
+ if (req->link_id < 0)
+ return false;
+
+ bss = req->links[req->link_id].bss;
+
+ guard(rcu)();
+ ies = rcu_dereference(bss->ies);
+ for_each_element_extid(ml, WLAN_EID_EXT_EHT_MULTI_LINK,
+ ies->data, ies->len) {
+ const struct ieee80211_multi_link_elem *mle;
+
+ if (!ieee80211_mle_type_ok(ml->data + 1,
+ IEEE80211_ML_CONTROL_TYPE_BASIC,
+ ml->datalen - 1))
+ continue;
+
+ mle = (void *)(ml->data + 1);
+ if (mle->control & cpu_to_le16(IEEE80211_MLC_BASIC_PRES_EXT_MLD_CAPA_OP))
+ return true;
+ }
+
+ return false;
+
+}
+
int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
struct cfg80211_assoc_request *req)
{
@@ -9384,7 +9571,17 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
else
memcpy(assoc_data->ap_addr, cbss->bssid, ETH_ALEN);
- assoc_data->ext_mld_capa_ops = cpu_to_le16(req->ext_mld_capa_ops);
+ /*
+ * Many APs have broken parsing of the extended MLD capa/ops field,
+ * dropping (re-)association request frames or replying with association
+ * response with a failure status if it's present.
+ * Set our value from the userspace request only in strict mode or if
+ * the AP also had that field present.
+ */
+ if (ieee80211_hw_check(&local->hw, STRICT) ||
+ ieee80211_mgd_assoc_bss_has_mld_ext_capa_ops(req))
+ assoc_data->ext_mld_capa_ops =
+ cpu_to_le16(req->ext_mld_capa_ops);
if (ifmgd->associated) {
u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
@@ -10027,7 +10224,6 @@ void ieee80211_process_ml_reconf_resp(struct ieee80211_sub_if_data *sdata,
for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) {
if (!add_links_data->link[link_id].bss ||
!(sdata->u.mgd.reconf.added_links & BIT(link_id)))
-
continue;
valid_links |= BIT(link_id);
@@ -10699,8 +10895,8 @@ static void ieee80211_ml_epcs(struct ieee80211_sub_if_data *sdata,
*/
for_each_mle_subelement(sub, (const u8 *)elems->ml_epcs,
elems->ml_epcs_len) {
+ struct ieee802_11_elems *link_elems __free(kfree) = NULL;
struct ieee80211_link_data *link;
- struct ieee802_11_elems *link_elems __free(kfree);
u8 *pos = (void *)sub->data;
u16 control;
ssize_t len;
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index 2b9abc27462e..13df6321634d 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -39,7 +39,7 @@ static void ieee80211_offchannel_ps_enable(struct ieee80211_sub_if_data *sdata)
if (local->hw.conf.flags & IEEE80211_CONF_PS) {
offchannel_ps_enabled = true;
local->hw.conf.flags &= ~IEEE80211_CONF_PS;
- ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
+ ieee80211_hw_config(local, -1, IEEE80211_CONF_CHANGE_PS);
}
if (!offchannel_ps_enabled ||
@@ -567,6 +567,7 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
{
struct ieee80211_roc_work *roc, *tmp;
bool queued = false, combine_started = true;
+ struct cfg80211_scan_request *req;
int ret;
lockdep_assert_wiphy(local->hw.wiphy);
@@ -612,9 +613,11 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
roc->mgmt_tx_cookie = *cookie;
}
+ req = wiphy_dereference(local->hw.wiphy, local->scan_req);
+
/* if there's no need to queue, handle it immediately */
if (list_empty(&local->roc_list) &&
- !local->scanning && !ieee80211_is_radar_required(local)) {
+ !local->scanning && !ieee80211_is_radar_required(local, req)) {
/* if not HW assist, just queue & schedule work */
if (!local->ops->remain_on_channel) {
list_add_tail(&roc->list, &local->roc_list);
diff --git a/net/mac80211/parse.c b/net/mac80211/parse.c
index 96584b39215e..c5e0f7f46004 100644
--- a/net/mac80211/parse.c
+++ b/net/mac80211/parse.c
@@ -758,7 +758,6 @@ static size_t ieee802_11_find_bssid_profile(const u8 *start, size_t len,
{
const struct element *elem, *sub;
size_t profile_len = 0;
- bool found = false;
if (!bss || !bss->transmitted_bss)
return profile_len;
@@ -809,15 +808,14 @@ static size_t ieee802_11_find_bssid_profile(const u8 *start, size_t len,
index[2],
new_bssid);
if (ether_addr_equal(new_bssid, bss->bssid)) {
- found = true;
elems->bssid_index_len = index[1];
elems->bssid_index = (void *)&index[2];
- break;
+ return profile_len;
}
}
}
- return found ? profile_len : 0;
+ return 0;
}
static void
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index a9cc832240a5..5a508d99e84f 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -108,7 +108,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
sdata->u.mgd.powersave &&
!(local->hw.conf.flags & IEEE80211_CONF_PS)) {
local->hw.conf.flags |= IEEE80211_CONF_PS;
- ieee80211_hw_config(local,
+ ieee80211_hw_config(local, -1,
IEEE80211_CONF_CHANGE_PS);
}
}
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 09beb65d6108..4d4ff4d4917a 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -231,8 +231,19 @@ static void __ieee80211_queue_skb_to_iface(struct ieee80211_sub_if_data *sdata,
skb_queue_tail(&sdata->skb_queue, skb);
wiphy_work_queue(sdata->local->hw.wiphy, &sdata->work);
- if (sta)
- sta->deflink.rx_stats.packets++;
+ if (sta) {
+ struct link_sta_info *link_sta_info;
+
+ if (link_id >= 0) {
+ link_sta_info = rcu_dereference(sta->link[link_id]);
+ if (!link_sta_info)
+ return;
+ } else {
+ link_sta_info = &sta->deflink;
+ }
+
+ link_sta_info->rx_stats.packets++;
+ }
}
static void ieee80211_queue_skb_to_iface(struct ieee80211_sub_if_data *sdata,
@@ -1521,9 +1532,8 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx)
}
if (rx->sdata->vif.type == NL80211_IFTYPE_AP &&
- cfg80211_rx_spurious_frame(rx->sdata->dev,
- hdr->addr2,
- GFP_ATOMIC))
+ cfg80211_rx_spurious_frame(rx->sdata->dev, hdr->addr2,
+ rx->link_id, GFP_ATOMIC))
return RX_DROP_U_SPURIOUS;
return RX_DROP;
@@ -1861,7 +1871,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
if (!test_and_set_sta_flag(sta, WLAN_STA_4ADDR_EVENT))
cfg80211_rx_unexpected_4addr_frame(
rx->sdata->dev, sta->sta.addr,
- GFP_ATOMIC);
+ rx->link_id, GFP_ATOMIC);
return RX_DROP_U_UNEXPECTED_4ADDR_FRAME;
}
/*
@@ -3022,7 +3032,6 @@ __ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx, u8 data_offset)
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
__le16 fc = hdr->frame_control;
struct sk_buff_head frame_list;
- ieee80211_rx_result res;
struct ethhdr ethhdr;
const u8 *check_da = ethhdr.h_dest, *check_sa = ethhdr.h_source;
@@ -3084,24 +3093,18 @@ __ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx, u8 data_offset)
while (!skb_queue_empty(&frame_list)) {
rx->skb = __skb_dequeue(&frame_list);
- res = ieee80211_rx_mesh_data(rx->sdata, rx->sta, rx->skb);
- switch (res) {
+ switch (ieee80211_rx_mesh_data(rx->sdata, rx->sta, rx->skb)) {
case RX_QUEUED:
- continue;
- case RX_CONTINUE:
break;
+ case RX_CONTINUE:
+ if (ieee80211_frame_allowed(rx, fc)) {
+ ieee80211_deliver_skb(rx);
+ break;
+ }
+ fallthrough;
default:
- goto free;
+ dev_kfree_skb(rx->skb);
}
-
- if (!ieee80211_frame_allowed(rx, fc))
- goto free;
-
- ieee80211_deliver_skb(rx);
- continue;
-
-free:
- dev_kfree_skb(rx->skb);
}
return RX_QUEUED;
@@ -3187,7 +3190,8 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx)
if (rx->sta &&
!test_and_set_sta_flag(rx->sta, WLAN_STA_4ADDR_EVENT))
cfg80211_rx_unexpected_4addr_frame(
- rx->sdata->dev, rx->sta->sta.addr, GFP_ATOMIC);
+ rx->sdata->dev, rx->sta->sta.addr, rx->link_id,
+ GFP_ATOMIC);
return RX_DROP;
}
@@ -3576,41 +3580,18 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
goto handled;
}
case WLAN_HT_ACTION_NOTIFY_CHANWIDTH: {
- struct ieee80211_supported_band *sband;
u8 chanwidth = mgmt->u.action.u.ht_notify_cw.chanwidth;
- enum ieee80211_sta_rx_bandwidth max_bw, new_bw;
- struct sta_opmode_info sta_opmode = {};
+
+ if (chanwidth != IEEE80211_HT_CHANWIDTH_20MHZ &&
+ chanwidth != IEEE80211_HT_CHANWIDTH_ANY)
+ goto invalid;
/* If it doesn't support 40 MHz it can't change ... */
if (!(rx->link_sta->pub->ht_cap.cap &
- IEEE80211_HT_CAP_SUP_WIDTH_20_40))
- goto handled;
-
- if (chanwidth == IEEE80211_HT_CHANWIDTH_20MHZ)
- max_bw = IEEE80211_STA_RX_BW_20;
- else
- max_bw = ieee80211_sta_cap_rx_bw(rx->link_sta);
-
- /* set cur_max_bandwidth and recalc sta bw */
- rx->link_sta->cur_max_bandwidth = max_bw;
- new_bw = ieee80211_sta_cur_vht_bw(rx->link_sta);
-
- if (rx->link_sta->pub->bandwidth == new_bw)
+ IEEE80211_HT_CAP_SUP_WIDTH_20_40))
goto handled;
- rx->link_sta->pub->bandwidth = new_bw;
- sband = rx->local->hw.wiphy->bands[status->band];
- sta_opmode.bw =
- ieee80211_sta_rx_bw_to_chan_width(rx->link_sta);
- sta_opmode.changed = STA_OPMODE_MAX_BW_CHANGED;
-
- rate_control_rate_update(local, sband, rx->link_sta,
- IEEE80211_RC_BW_CHANGED);
- cfg80211_sta_opmode_change_notify(sdata->dev,
- rx->sta->addr,
- &sta_opmode,
- GFP_ATOMIC);
- goto handled;
+ goto queue;
}
default:
goto invalid;
@@ -4234,10 +4215,16 @@ static bool ieee80211_rx_data_set_sta(struct ieee80211_rx_data *rx,
rx->link_sta = NULL;
}
- if (link_id < 0)
- rx->link = &rx->sdata->deflink;
- else if (!ieee80211_rx_data_set_link(rx, link_id))
+ if (link_id < 0) {
+ if (ieee80211_vif_is_mld(&rx->sdata->vif) &&
+ sta && !sta->sta.valid_links)
+ rx->link =
+ rcu_dereference(rx->sdata->link[sta->deflink.link_id]);
+ else
+ rx->link = &rx->sdata->deflink;
+ } else if (!ieee80211_rx_data_set_link(rx, link_id)) {
return false;
+ }
return true;
}
@@ -4432,6 +4419,10 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx)
if (!multicast &&
!ether_addr_equal(sdata->dev->dev_addr, hdr->addr1))
return false;
+ /* reject invalid/our STA address */
+ if (!is_valid_ether_addr(hdr->addr2) ||
+ ether_addr_equal(sdata->dev->dev_addr, hdr->addr2))
+ return false;
if (!rx->sta) {
int rate_idx;
if (status->encoding != RX_ENC_LEGACY)
@@ -5115,8 +5106,24 @@ static bool ieee80211_rx_for_interface(struct ieee80211_rx_data *rx,
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
sta = sta_info_get_bss(rx->sdata, hdr->addr2);
- if (status->link_valid)
+ if (status->link_valid) {
link_id = status->link_id;
+ } else if (ieee80211_vif_is_mld(&rx->sdata->vif) &&
+ status->freq) {
+ struct ieee80211_link_data *link;
+ struct ieee80211_chanctx_conf *conf;
+
+ for_each_link_data_rcu(rx->sdata, link) {
+ conf = rcu_dereference(link->conf->chanctx_conf);
+ if (!conf || !conf->def.chan)
+ continue;
+
+ if (status->freq == conf->def.chan->center_freq) {
+ link_id = link->link_id;
+ break;
+ }
+ }
+ }
}
if (!ieee80211_rx_data_set_sta(rx, sta, link_id))
diff --git a/net/mac80211/s1g.c b/net/mac80211/s1g.c
index d4ed0c0a335c..1f68df6e8067 100644
--- a/net/mac80211/s1g.c
+++ b/net/mac80211/s1g.c
@@ -194,3 +194,29 @@ void ieee80211_s1g_status_twt_action(struct ieee80211_sub_if_data *sdata,
break;
}
}
+
+void ieee80211_s1g_cap_to_sta_s1g_cap(struct ieee80211_sub_if_data *sdata,
+ const struct ieee80211_s1g_cap *s1g_cap_ie,
+ struct link_sta_info *link_sta)
+{
+ struct ieee80211_sta_s1g_cap *s1g_cap = &link_sta->pub->s1g_cap;
+
+ memset(s1g_cap, 0, sizeof(*s1g_cap));
+
+ memcpy(s1g_cap->cap, s1g_cap_ie->capab_info, sizeof(s1g_cap->cap));
+ memcpy(s1g_cap->nss_mcs, s1g_cap_ie->supp_mcs_nss,
+ sizeof(s1g_cap->nss_mcs));
+
+ s1g_cap->s1g = true;
+
+ /* Maximum MPDU length is 1 bit for S1G */
+ if (s1g_cap->cap[3] & S1G_CAP3_MAX_MPDU_LEN) {
+ link_sta->pub->agg.max_amsdu_len =
+ IEEE80211_MAX_MPDU_LEN_VHT_7991;
+ } else {
+ link_sta->pub->agg.max_amsdu_len =
+ IEEE80211_MAX_MPDU_LEN_VHT_3895;
+ }
+
+ ieee80211_sta_recalc_aggregates(&link_sta->sta->sta);
+}
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index cd8385ecafd9..dbf98aa4cd67 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -9,7 +9,7 @@
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2013-2015 Intel Mobile Communications GmbH
* Copyright 2016-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2024 Intel Corporation
+ * Copyright (C) 2018-2025 Intel Corporation
*/
#include <linux/if_arp.h>
@@ -586,7 +586,8 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local,
return 0;
}
-static bool __ieee80211_can_leave_ch(struct ieee80211_sub_if_data *sdata)
+static bool __ieee80211_can_leave_ch(struct ieee80211_sub_if_data *sdata,
+ struct cfg80211_scan_request *req)
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_sub_if_data *sdata_iter;
@@ -594,7 +595,7 @@ static bool __ieee80211_can_leave_ch(struct ieee80211_sub_if_data *sdata)
lockdep_assert_wiphy(local->hw.wiphy);
- if (!ieee80211_is_radar_required(local))
+ if (!ieee80211_is_radar_required(local, req))
return true;
if (!regulatory_pre_cac_allowed(local->hw.wiphy))
@@ -610,9 +611,10 @@ static bool __ieee80211_can_leave_ch(struct ieee80211_sub_if_data *sdata)
}
static bool ieee80211_can_scan(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata)
+ struct ieee80211_sub_if_data *sdata,
+ struct cfg80211_scan_request *req)
{
- if (!__ieee80211_can_leave_ch(sdata))
+ if (!__ieee80211_can_leave_ch(sdata, req))
return false;
if (!list_empty(&local->roc_list))
@@ -627,15 +629,19 @@ static bool ieee80211_can_scan(struct ieee80211_local *local,
void ieee80211_run_deferred_scan(struct ieee80211_local *local)
{
+ struct cfg80211_scan_request *req;
+
lockdep_assert_wiphy(local->hw.wiphy);
if (!local->scan_req || local->scanning)
return;
+ req = wiphy_dereference(local->hw.wiphy, local->scan_req);
if (!ieee80211_can_scan(local,
rcu_dereference_protected(
local->scan_sdata,
- lockdep_is_held(&local->hw.wiphy->mtx))))
+ lockdep_is_held(&local->hw.wiphy->mtx)),
+ req))
return;
wiphy_delayed_work_queue(local->hw.wiphy, &local->scan_work,
@@ -732,10 +738,10 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
!(sdata->vif.active_links & BIT(req->tsf_report_link_id)))
return -EINVAL;
- if (!__ieee80211_can_leave_ch(sdata))
+ if (!__ieee80211_can_leave_ch(sdata, req))
return -EBUSY;
- if (!ieee80211_can_scan(local, sdata)) {
+ if (!ieee80211_can_scan(local, sdata, req)) {
/* wait for the work to finish/time out */
rcu_assign_pointer(local->scan_req, req);
rcu_assign_pointer(local->scan_sdata, sdata);
@@ -794,6 +800,7 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
local->hw_scan_req->req.scan_6ghz_params =
req->scan_6ghz_params;
local->hw_scan_req->req.scan_6ghz = req->scan_6ghz;
+ local->hw_scan_req->req.first_part = req->first_part;
/*
* After allocating local->hw_scan_req, we must
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 61583173629e..8c550aab9bdc 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -4,7 +4,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2024 Intel Corporation
+ * Copyright (C) 2018-2025 Intel Corporation
*/
#include <linux/module.h>
@@ -355,6 +355,50 @@ static void sta_info_free_link(struct link_sta_info *link_sta)
free_percpu(link_sta->pcpu_rx_stats);
}
+static void sta_accumulate_removed_link_stats(struct sta_info *sta, int link_id)
+{
+ struct link_sta_info *link_sta = wiphy_dereference(sta->local->hw.wiphy,
+ sta->link[link_id]);
+ struct ieee80211_link_data *link;
+ int ac, tid;
+ u32 thr;
+
+ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+ sta->rem_link_stats.tx_packets +=
+ link_sta->tx_stats.packets[ac];
+ sta->rem_link_stats.tx_bytes += link_sta->tx_stats.bytes[ac];
+ }
+
+ sta->rem_link_stats.rx_packets += link_sta->rx_stats.packets;
+ sta->rem_link_stats.rx_bytes += link_sta->rx_stats.bytes;
+ sta->rem_link_stats.tx_retries += link_sta->status_stats.retry_count;
+ sta->rem_link_stats.tx_failed += link_sta->status_stats.retry_failed;
+ sta->rem_link_stats.rx_dropped_misc += link_sta->rx_stats.dropped;
+
+ thr = sta_get_expected_throughput(sta);
+ if (thr != 0)
+ sta->rem_link_stats.expected_throughput += thr;
+
+ for (tid = 0; tid < IEEE80211_NUM_TIDS; tid++) {
+ sta->rem_link_stats.pertid_stats.rx_msdu +=
+ link_sta->rx_stats.msdu[tid];
+ sta->rem_link_stats.pertid_stats.tx_msdu +=
+ link_sta->tx_stats.msdu[tid];
+ sta->rem_link_stats.pertid_stats.tx_msdu_retries +=
+ link_sta->status_stats.msdu_retries[tid];
+ sta->rem_link_stats.pertid_stats.tx_msdu_failed +=
+ link_sta->status_stats.msdu_failed[tid];
+ }
+
+ if (sta->sdata->vif.type == NL80211_IFTYPE_STATION) {
+ link = wiphy_dereference(sta->sdata->local->hw.wiphy,
+ sta->sdata->link[link_id]);
+ if (link)
+ sta->rem_link_stats.beacon_loss_count +=
+ link->u.mgd.beacon_loss_count;
+ }
+}
+
static void sta_remove_link(struct sta_info *sta, unsigned int link_id,
bool unhash)
{
@@ -377,6 +421,10 @@ static void sta_remove_link(struct sta_info *sta, unsigned int link_id,
alloc = container_of(link_sta, typeof(*alloc), info);
sta->sta.valid_links &= ~BIT(link_id);
+
+ /* store removed link info for accumulated stats consistency */
+ sta_accumulate_removed_link_stats(sta, link_id);
+
RCU_INIT_POINTER(sta->link[link_id], NULL);
RCU_INIT_POINTER(sta->sta.link[link_id], NULL);
if (alloc) {
@@ -681,6 +729,7 @@ __sta_info_alloc(struct ieee80211_sub_if_data *sdata,
IEEE80211_RATE_MANDATORY_G;
break;
case NL80211_BAND_5GHZ:
+ case NL80211_BAND_6GHZ:
mandatory = IEEE80211_RATE_MANDATORY_A;
break;
case NL80211_BAND_60GHZ:
@@ -1651,7 +1700,7 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata,
lockdep_assert_wiphy(local->hw.wiphy);
list_for_each_entry_safe(sta, tmp, &local->sta_list, list) {
- unsigned long last_active = ieee80211_sta_last_active(sta);
+ unsigned long last_active = ieee80211_sta_last_active(sta, -1);
if (sdata != sta->sdata)
continue;
@@ -2420,18 +2469,27 @@ void ieee80211_sta_update_pending_airtime(struct ieee80211_local *local,
}
static struct ieee80211_sta_rx_stats *
-sta_get_last_rx_stats(struct sta_info *sta)
+sta_get_last_rx_stats(struct sta_info *sta, int link_id)
{
- struct ieee80211_sta_rx_stats *stats = &sta->deflink.rx_stats;
+ struct ieee80211_sta_rx_stats *stats;
+ struct link_sta_info *link_sta_info;
int cpu;
- if (!sta->deflink.pcpu_rx_stats)
+ if (link_id < 0)
+ link_sta_info = &sta->deflink;
+ else
+ link_sta_info = wiphy_dereference(sta->local->hw.wiphy,
+ sta->link[link_id]);
+
+ stats = &link_sta_info->rx_stats;
+
+ if (!link_sta_info->pcpu_rx_stats)
return stats;
for_each_possible_cpu(cpu) {
struct ieee80211_sta_rx_stats *cpustats;
- cpustats = per_cpu_ptr(sta->deflink.pcpu_rx_stats, cpu);
+ cpustats = per_cpu_ptr(link_sta_info->pcpu_rx_stats, cpu);
if (time_after(cpustats->last_rx, stats->last_rx))
stats = cpustats;
@@ -2499,9 +2557,10 @@ static void sta_stats_decode_rate(struct ieee80211_local *local, u32 rate,
}
}
-static int sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo)
+static int sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo,
+ int link_id)
{
- u32 rate = READ_ONCE(sta_get_last_rx_stats(sta)->last_rate);
+ u32 rate = READ_ONCE(sta_get_last_rx_stats(sta, link_id)->last_rate);
if (rate == STA_STATS_RATE_INVALID)
return -EINVAL;
@@ -2526,20 +2585,28 @@ static inline u64 sta_get_tidstats_msdu(struct ieee80211_sta_rx_stats *rxstats,
static void sta_set_tidstats(struct sta_info *sta,
struct cfg80211_tid_stats *tidstats,
- int tid)
+ int tid, int link_id)
{
struct ieee80211_local *local = sta->local;
+ struct link_sta_info *link_sta_info;
int cpu;
+ if (link_id < 0)
+ link_sta_info = &sta->deflink;
+ else
+ link_sta_info = wiphy_dereference(sta->local->hw.wiphy,
+ sta->link[link_id]);
+
if (!(tidstats->filled & BIT(NL80211_TID_STATS_RX_MSDU))) {
- tidstats->rx_msdu += sta_get_tidstats_msdu(&sta->deflink.rx_stats,
- tid);
+ tidstats->rx_msdu +=
+ sta_get_tidstats_msdu(&link_sta_info->rx_stats,
+ tid);
- if (sta->deflink.pcpu_rx_stats) {
+ if (link_sta_info->pcpu_rx_stats) {
for_each_possible_cpu(cpu) {
struct ieee80211_sta_rx_stats *cpurxs;
- cpurxs = per_cpu_ptr(sta->deflink.pcpu_rx_stats,
+ cpurxs = per_cpu_ptr(link_sta_info->pcpu_rx_stats,
cpu);
tidstats->rx_msdu +=
sta_get_tidstats_msdu(cpurxs, tid);
@@ -2551,22 +2618,24 @@ static void sta_set_tidstats(struct sta_info *sta,
if (!(tidstats->filled & BIT(NL80211_TID_STATS_TX_MSDU))) {
tidstats->filled |= BIT(NL80211_TID_STATS_TX_MSDU);
- tidstats->tx_msdu = sta->deflink.tx_stats.msdu[tid];
+ tidstats->tx_msdu = link_sta_info->tx_stats.msdu[tid];
}
if (!(tidstats->filled & BIT(NL80211_TID_STATS_TX_MSDU_RETRIES)) &&
ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
tidstats->filled |= BIT(NL80211_TID_STATS_TX_MSDU_RETRIES);
- tidstats->tx_msdu_retries = sta->deflink.status_stats.msdu_retries[tid];
+ tidstats->tx_msdu_retries =
+ link_sta_info->status_stats.msdu_retries[tid];
}
if (!(tidstats->filled & BIT(NL80211_TID_STATS_TX_MSDU_FAILED)) &&
ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
tidstats->filled |= BIT(NL80211_TID_STATS_TX_MSDU_FAILED);
- tidstats->tx_msdu_failed = sta->deflink.status_stats.msdu_failed[tid];
+ tidstats->tx_msdu_failed =
+ link_sta_info->status_stats.msdu_failed[tid];
}
- if (tid < IEEE80211_NUM_TIDS) {
+ if (link_id < 0 && tid < IEEE80211_NUM_TIDS) {
spin_lock_bh(&local->fq.lock);
rcu_read_lock();
@@ -2625,16 +2694,278 @@ static void sta_set_mesh_sinfo(struct sta_info *sta,
}
#endif
+void sta_set_accumulated_removed_links_sinfo(struct sta_info *sta,
+ struct station_info *sinfo)
+{
+ /* Accumulating the removed link statistics. */
+ sinfo->tx_packets = sta->rem_link_stats.tx_packets;
+ sinfo->rx_packets = sta->rem_link_stats.rx_packets;
+ sinfo->tx_bytes = sta->rem_link_stats.tx_bytes;
+ sinfo->rx_bytes = sta->rem_link_stats.rx_bytes;
+ sinfo->tx_retries = sta->rem_link_stats.tx_retries;
+ sinfo->tx_failed = sta->rem_link_stats.tx_failed;
+ sinfo->rx_dropped_misc = sta->rem_link_stats.rx_dropped_misc;
+ sinfo->beacon_loss_count = sta->rem_link_stats.beacon_loss_count;
+ sinfo->expected_throughput = sta->rem_link_stats.expected_throughput;
+
+ if (sinfo->pertid) {
+ sinfo->pertid->rx_msdu =
+ sta->rem_link_stats.pertid_stats.rx_msdu;
+ sinfo->pertid->tx_msdu =
+ sta->rem_link_stats.pertid_stats.tx_msdu;
+ sinfo->pertid->tx_msdu_retries =
+ sta->rem_link_stats.pertid_stats.tx_msdu_retries;
+ sinfo->pertid->tx_msdu_failed =
+ sta->rem_link_stats.pertid_stats.tx_msdu_failed;
+ }
+}
+
+static void sta_set_link_sinfo(struct sta_info *sta,
+ struct link_station_info *link_sinfo,
+ struct ieee80211_link_data *link,
+ bool tidstats)
+{
+ struct ieee80211_sub_if_data *sdata = sta->sdata;
+ struct ieee80211_sta_rx_stats *last_rxstats;
+ int i, ac, cpu, link_id = link->link_id;
+ struct link_sta_info *link_sta_info;
+ u32 thr = 0;
+
+ last_rxstats = sta_get_last_rx_stats(sta, link_id);
+
+ link_sta_info = wiphy_dereference(sta->local->hw.wiphy,
+ sta->link[link_id]);
+
+ /* do before driver, so beacon filtering drivers have a
+ * chance to e.g. just add the number of filtered beacons
+ * (or just modify the value entirely, of course)
+ */
+ if (sdata->vif.type == NL80211_IFTYPE_STATION)
+ link_sinfo->rx_beacon = link->u.mgd.count_beacon_signal;
+
+ ether_addr_copy(link_sinfo->addr, link_sta_info->addr);
+
+ drv_link_sta_statistics(sta->local, sdata,
+ link_sta_info->pub,
+ link_sinfo);
+
+ link_sinfo->filled |= BIT_ULL(NL80211_STA_INFO_INACTIVE_TIME) |
+ BIT_ULL(NL80211_STA_INFO_BSS_PARAM) |
+ BIT_ULL(NL80211_STA_INFO_RX_DROP_MISC);
+
+ if (sdata->vif.type == NL80211_IFTYPE_STATION) {
+ link_sinfo->beacon_loss_count =
+ link->u.mgd.beacon_loss_count;
+ link_sinfo->filled |= BIT_ULL(NL80211_STA_INFO_BEACON_LOSS);
+ }
+
+ link_sinfo->inactive_time =
+ jiffies_to_msecs(jiffies - ieee80211_sta_last_active(sta, link_id));
+
+ if (!(link_sinfo->filled & (BIT_ULL(NL80211_STA_INFO_TX_BYTES64) |
+ BIT_ULL(NL80211_STA_INFO_TX_BYTES)))) {
+ link_sinfo->tx_bytes = 0;
+ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
+ link_sinfo->tx_bytes +=
+ link_sta_info->tx_stats.bytes[ac];
+ link_sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BYTES64);
+ }
+
+ if (!(link_sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_PACKETS))) {
+ link_sinfo->tx_packets = 0;
+ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
+ link_sinfo->tx_packets +=
+ link_sta_info->tx_stats.packets[ac];
+ link_sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_PACKETS);
+ }
+
+ if (!(link_sinfo->filled & (BIT_ULL(NL80211_STA_INFO_RX_BYTES64) |
+ BIT_ULL(NL80211_STA_INFO_RX_BYTES)))) {
+ link_sinfo->rx_bytes +=
+ sta_get_stats_bytes(&link_sta_info->rx_stats);
+
+ if (link_sta_info->pcpu_rx_stats) {
+ for_each_possible_cpu(cpu) {
+ struct ieee80211_sta_rx_stats *cpurxs;
+
+ cpurxs = per_cpu_ptr(link_sta_info->pcpu_rx_stats,
+ cpu);
+ link_sinfo->rx_bytes +=
+ sta_get_stats_bytes(cpurxs);
+ }
+ }
+
+ link_sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_BYTES64);
+ }
+
+ if (!(link_sinfo->filled & BIT_ULL(NL80211_STA_INFO_RX_PACKETS))) {
+ link_sinfo->rx_packets = link_sta_info->rx_stats.packets;
+ if (link_sta_info->pcpu_rx_stats) {
+ for_each_possible_cpu(cpu) {
+ struct ieee80211_sta_rx_stats *cpurxs;
+
+ cpurxs = per_cpu_ptr(link_sta_info->pcpu_rx_stats,
+ cpu);
+ link_sinfo->rx_packets += cpurxs->packets;
+ }
+ }
+ link_sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_PACKETS);
+ }
+
+ if (!(link_sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_RETRIES))) {
+ link_sinfo->tx_retries =
+ link_sta_info->status_stats.retry_count;
+ link_sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_RETRIES);
+ }
+
+ if (!(link_sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_FAILED))) {
+ link_sinfo->tx_failed =
+ link_sta_info->status_stats.retry_failed;
+ link_sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_FAILED);
+ }
+
+ if (!(link_sinfo->filled & BIT_ULL(NL80211_STA_INFO_RX_DURATION))) {
+ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
+ link_sinfo->rx_duration += sta->airtime[ac].rx_airtime;
+ link_sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_DURATION);
+ }
+
+ if (!(link_sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_DURATION))) {
+ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
+ link_sinfo->tx_duration += sta->airtime[ac].tx_airtime;
+ link_sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_DURATION);
+ }
+
+ if (!(link_sinfo->filled & BIT_ULL(NL80211_STA_INFO_AIRTIME_WEIGHT))) {
+ link_sinfo->airtime_weight = sta->airtime_weight;
+ link_sinfo->filled |= BIT_ULL(NL80211_STA_INFO_AIRTIME_WEIGHT);
+ }
+
+ link_sinfo->rx_dropped_misc = link_sta_info->rx_stats.dropped;
+ if (link_sta_info->pcpu_rx_stats) {
+ for_each_possible_cpu(cpu) {
+ struct ieee80211_sta_rx_stats *cpurxs;
+
+ cpurxs = per_cpu_ptr(link_sta_info->pcpu_rx_stats,
+ cpu);
+ link_sinfo->rx_dropped_misc += cpurxs->dropped;
+ }
+ }
+
+ if (sdata->vif.type == NL80211_IFTYPE_STATION &&
+ !(sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER)) {
+ link_sinfo->filled |= BIT_ULL(NL80211_STA_INFO_BEACON_RX) |
+ BIT_ULL(NL80211_STA_INFO_BEACON_SIGNAL_AVG);
+ link_sinfo->rx_beacon_signal_avg =
+ ieee80211_ave_rssi(&sdata->vif, -1);
+ }
+
+ if (ieee80211_hw_check(&sta->local->hw, SIGNAL_DBM) ||
+ ieee80211_hw_check(&sta->local->hw, SIGNAL_UNSPEC)) {
+ if (!(link_sinfo->filled & BIT_ULL(NL80211_STA_INFO_SIGNAL))) {
+ link_sinfo->signal = (s8)last_rxstats->last_signal;
+ link_sinfo->filled |= BIT_ULL(NL80211_STA_INFO_SIGNAL);
+ }
+
+ if (!link_sta_info->pcpu_rx_stats &&
+ !(link_sinfo->filled &
+ BIT_ULL(NL80211_STA_INFO_SIGNAL_AVG))) {
+ link_sinfo->signal_avg =
+ -ewma_signal_read(&link_sta_info->rx_stats_avg.signal);
+ link_sinfo->filled |=
+ BIT_ULL(NL80211_STA_INFO_SIGNAL_AVG);
+ }
+ }
+
+ /* for the average - if pcpu_rx_stats isn't set - rxstats must point to
+ * the sta->rx_stats struct, so the check here is fine with and without
+ * pcpu statistics
+ */
+ if (last_rxstats->chains &&
+ !(link_sinfo->filled & (BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL) |
+ BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL_AVG)))) {
+ link_sinfo->filled |= BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL);
+ if (!link_sta_info->pcpu_rx_stats)
+ link_sinfo->filled |=
+ BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL_AVG);
+
+ link_sinfo->chains = last_rxstats->chains;
+
+ for (i = 0; i < ARRAY_SIZE(link_sinfo->chain_signal); i++) {
+ link_sinfo->chain_signal[i] =
+ last_rxstats->chain_signal_last[i];
+ link_sinfo->chain_signal_avg[i] =
+ -ewma_signal_read(
+ &link_sta_info->rx_stats_avg.chain_signal[i]);
+ }
+ }
+
+ if (!(link_sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_BITRATE)) &&
+ ieee80211_rate_valid(&link_sta_info->tx_stats.last_rate)) {
+ sta_set_rate_info_tx(sta, &link_sta_info->tx_stats.last_rate,
+ &link_sinfo->txrate);
+ link_sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BITRATE);
+ }
+
+ if (!(link_sinfo->filled & BIT_ULL(NL80211_STA_INFO_RX_BITRATE))) {
+ if (sta_set_rate_info_rx(sta, &link_sinfo->rxrate,
+ link_id) == 0)
+ link_sinfo->filled |=
+ BIT_ULL(NL80211_STA_INFO_RX_BITRATE);
+ }
+
+ if (tidstats && !cfg80211_link_sinfo_alloc_tid_stats(link_sinfo,
+ GFP_KERNEL)) {
+ for (i = 0; i < IEEE80211_NUM_TIDS + 1; i++)
+ sta_set_tidstats(sta, &link_sinfo->pertid[i], i,
+ link_id);
+ }
+
+ link_sinfo->bss_param.flags = 0;
+ if (sdata->vif.bss_conf.use_cts_prot)
+ link_sinfo->bss_param.flags |= BSS_PARAM_FLAGS_CTS_PROT;
+ if (sdata->vif.bss_conf.use_short_preamble)
+ link_sinfo->bss_param.flags |= BSS_PARAM_FLAGS_SHORT_PREAMBLE;
+ if (sdata->vif.bss_conf.use_short_slot)
+ link_sinfo->bss_param.flags |= BSS_PARAM_FLAGS_SHORT_SLOT_TIME;
+ link_sinfo->bss_param.dtim_period = link->conf->dtim_period;
+ link_sinfo->bss_param.beacon_interval = link->conf->beacon_int;
+
+ thr = sta_get_expected_throughput(sta);
+
+ if (thr != 0) {
+ link_sinfo->filled |=
+ BIT_ULL(NL80211_STA_INFO_EXPECTED_THROUGHPUT);
+ link_sinfo->expected_throughput = thr;
+ }
+
+ if (!(link_sinfo->filled & BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL)) &&
+ link_sta_info->status_stats.ack_signal_filled) {
+ link_sinfo->ack_signal =
+ link_sta_info->status_stats.last_ack_signal;
+ link_sinfo->filled |= BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL);
+ }
+
+ if (!(link_sinfo->filled & BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL_AVG)) &&
+ link_sta_info->status_stats.ack_signal_filled) {
+ link_sinfo->avg_ack_signal =
+ -(s8)ewma_avg_signal_read(
+ &link_sta_info->status_stats.avg_ack_signal);
+ link_sinfo->filled |=
+ BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL_AVG);
+ }
+}
+
void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
bool tidstats)
{
struct ieee80211_sub_if_data *sdata = sta->sdata;
struct ieee80211_local *local = sdata->local;
u32 thr = 0;
- int i, ac, cpu;
+ int i, ac, cpu, link_id;
struct ieee80211_sta_rx_stats *last_rxstats;
- last_rxstats = sta_get_last_rx_stats(sta);
+ last_rxstats = sta_get_last_rx_stats(sta, -1);
sinfo->generation = sdata->local->sta_generation;
@@ -2662,7 +2993,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
sinfo->connected_time = ktime_get_seconds() - sta->last_connected;
sinfo->assoc_at = sta->assoc_at;
sinfo->inactive_time =
- jiffies_to_msecs(jiffies - ieee80211_sta_last_active(sta));
+ jiffies_to_msecs(jiffies - ieee80211_sta_last_active(sta, -1));
if (!(sinfo->filled & (BIT_ULL(NL80211_STA_INFO_TX_BYTES64) |
BIT_ULL(NL80211_STA_INFO_TX_BYTES)))) {
@@ -2751,7 +3082,8 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
!(sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER)) {
sinfo->filled |= BIT_ULL(NL80211_STA_INFO_BEACON_RX) |
BIT_ULL(NL80211_STA_INFO_BEACON_SIGNAL_AVG);
- sinfo->rx_beacon_signal_avg = ieee80211_ave_rssi(&sdata->vif);
+ sinfo->rx_beacon_signal_avg =
+ ieee80211_ave_rssi(&sdata->vif, -1);
}
if (ieee80211_hw_check(&sta->local->hw, SIGNAL_DBM) ||
@@ -2800,13 +3132,13 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_RX_BITRATE)) &&
!sta->sta.valid_links) {
- if (sta_set_rate_info_rx(sta, &sinfo->rxrate) == 0)
+ if (sta_set_rate_info_rx(sta, &sinfo->rxrate, -1) == 0)
sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_BITRATE);
}
if (tidstats && !cfg80211_sinfo_alloc_tid_stats(sinfo, GFP_KERNEL)) {
for (i = 0; i < IEEE80211_NUM_TIDS + 1; i++)
- sta_set_tidstats(sta, &sinfo->pertid[i], i);
+ sta_set_tidstats(sta, &sinfo->pertid[i], i, -1);
}
#ifdef CONFIG_MAC80211_MESH
@@ -2868,6 +3200,26 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
sinfo->filled |=
BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL_AVG);
}
+
+ if (sta->sta.valid_links) {
+ struct ieee80211_link_data *link;
+ struct link_sta_info *link_sta;
+
+ ether_addr_copy(sinfo->mld_addr, sta->addr);
+ for_each_valid_link(sinfo, link_id) {
+ link_sta = wiphy_dereference(sta->local->hw.wiphy,
+ sta->link[link_id]);
+ link = wiphy_dereference(sdata->local->hw.wiphy,
+ sdata->link[link_id]);
+
+ if (!link_sta || !sinfo->links[link_id] || !link)
+ continue;
+
+ sinfo->valid_links = sta->sta.valid_links;
+ sta_set_link_sinfo(sta, sinfo->links[link_id],
+ link, tidstats);
+ }
+ }
}
u32 sta_get_expected_throughput(struct sta_info *sta)
@@ -2889,14 +3241,24 @@ u32 sta_get_expected_throughput(struct sta_info *sta)
return thr;
}
-unsigned long ieee80211_sta_last_active(struct sta_info *sta)
+unsigned long ieee80211_sta_last_active(struct sta_info *sta, int link_id)
{
- struct ieee80211_sta_rx_stats *stats = sta_get_last_rx_stats(sta);
+ struct ieee80211_sta_rx_stats *stats;
+ struct link_sta_info *link_sta_info;
- if (!sta->deflink.status_stats.last_ack ||
- time_after(stats->last_rx, sta->deflink.status_stats.last_ack))
+ stats = sta_get_last_rx_stats(sta, link_id);
+
+ if (link_id < 0)
+ link_sta_info = &sta->deflink;
+ else
+ link_sta_info = wiphy_dereference(sta->local->hw.wiphy,
+ sta->link[link_id]);
+
+ if (!link_sta_info->status_stats.last_ack ||
+ time_after(stats->last_rx, link_sta_info->status_stats.last_ack))
return stats->last_rx;
- return sta->deflink.status_stats.last_ack;
+
+ return link_sta_info->status_stats.last_ack;
}
int ieee80211_sta_allocate_link(struct sta_info *sta, unsigned int link_id)
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 7a95d8d34fca..5288d5286651 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -569,6 +569,58 @@ struct link_sta_info {
};
/**
+ * struct ieee80211_sta_removed_link_stats - Removed link sta data
+ *
+ * keep required accumulated removed link data for stats
+ *
+ * @rx_packets: accumulated packets (MSDUs & MMPDUs) received from
+ * this station for removed links
+ * @tx_packets: accumulated packets (MSDUs & MMPDUs) transmitted to
+ * this station for removed links
+ * @rx_bytes: accumulated bytes (size of MPDUs) received from this
+ * station for removed links
+ * @tx_bytes: accumulated bytes (size of MPDUs) transmitted to this
+ * station for removed links
+ * @tx_retries: cumulative retry counts (MPDUs) for removed links
+ * @tx_failed: accumulated number of failed transmissions (MPDUs)
+ * (retries exceeded, no ACK) for removed links
+ * @rx_dropped_misc: accumulated dropped packets for un-specified reason
+ * from this station for removed links
+ * @beacon_loss_count: Number of times beacon loss event has triggered
+ * from this station for removed links.
+ * @expected_throughput: expected throughput in kbps (including 802.11
+ * headers) towards this station for removed links
+ * @pertid_stats: accumulated per-TID statistics for removed link of
+ * station
+ * @pertid_stats.rx_msdu : accumulated number of received MSDUs towards
+ * this station for removed links.
+ * @pertid_stats.tx_msdu: accumulated number of (attempted) transmitted
+ * MSDUs towards this station for removed links
+ * @pertid_stats.tx_msdu_retries: accumulated number of retries (not
+ * counting the first) for transmitted MSDUs towards this station
+ * for removed links
+ * @pertid_stats.tx_msdu_failed: accumulated number of failed transmitted
+ * MSDUs towards this station for removed links
+ */
+struct ieee80211_sta_removed_link_stats {
+ u32 rx_packets;
+ u32 tx_packets;
+ u64 rx_bytes;
+ u64 tx_bytes;
+ u32 tx_retries;
+ u32 tx_failed;
+ u32 rx_dropped_misc;
+ u32 beacon_loss_count;
+ u32 expected_throughput;
+ struct {
+ u64 rx_msdu;
+ u64 tx_msdu;
+ u64 tx_msdu_retries;
+ u64 tx_msdu_failed;
+ } pertid_stats;
+};
+
+/**
* struct sta_info - STA information
*
* This structure collects information about a station that
@@ -644,6 +696,7 @@ struct link_sta_info {
* @deflink address and remaining would be allocated and the address
* would be assigned to link[link_id] where link_id is the id assigned
* by the AP.
+ * @rem_link_stats: accumulated removed link stats
*/
struct sta_info {
/* General information, mostly static */
@@ -718,6 +771,7 @@ struct sta_info {
struct ieee80211_sta_aggregates cur;
struct link_sta_info deflink;
struct link_sta_info __rcu *link[IEEE80211_MLD_MAX_NUM_LINKS];
+ struct ieee80211_sta_removed_link_stats rem_link_stats;
/* keep last! */
struct ieee80211_sta sta;
@@ -922,6 +976,9 @@ void sta_set_rate_info_tx(struct sta_info *sta,
void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
bool tidstats);
+void sta_set_accumulated_removed_links_sinfo(struct sta_info *sta,
+ struct station_info *sinfo);
+
u32 sta_get_expected_throughput(struct sta_info *sta);
void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata,
@@ -936,7 +993,7 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta);
void ieee80211_sta_ps_deliver_poll_response(struct sta_info *sta);
void ieee80211_sta_ps_deliver_uapsd(struct sta_info *sta);
-unsigned long ieee80211_sta_last_active(struct sta_info *sta);
+unsigned long ieee80211_sta_last_active(struct sta_info *sta, int link_id);
void ieee80211_sta_set_max_amsdu_subframes(struct sta_info *sta,
const u8 *ext_capab,
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index 94714f8ffd22..ba5fbacbeeda 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -1422,7 +1422,7 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
if (!(wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS))
return -EOPNOTSUPP;
- if (sdata->vif.type != NL80211_IFTYPE_STATION)
+ if (sdata->vif.type != NL80211_IFTYPE_STATION || !sdata->vif.cfg.assoc)
return -EINVAL;
switch (oper) {
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 72fad8ea8bb9..0bfbce157486 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -384,12 +384,14 @@ DEFINE_EVENT(local_sdata_addr_evt, drv_remove_interface,
TRACE_EVENT(drv_config,
TP_PROTO(struct ieee80211_local *local,
+ int radio_idx,
u32 changed),
- TP_ARGS(local, changed),
+ TP_ARGS(local, radio_idx, changed),
TP_STRUCT__entry(
LOCAL_ENTRY
+ __field(int, radio_idx)
__field(u32, changed)
__field(u32, flags)
__field(int, power_level)
@@ -403,6 +405,7 @@ TRACE_EVENT(drv_config,
TP_fast_assign(
LOCAL_ASSIGN;
+ __entry->radio_idx = radio_idx;
__entry->changed = changed;
__entry->flags = local->hw.conf.flags;
__entry->power_level = local->hw.conf.power_level;
@@ -417,8 +420,8 @@ TRACE_EVENT(drv_config,
),
TP_printk(
- LOCAL_PR_FMT " ch:%#x" CHANDEF_PR_FMT,
- LOCAL_PR_ARG, __entry->changed, CHANDEF_PR_ARG
+ LOCAL_PR_FMT " radio_idx:%d ch:%#x" CHANDEF_PR_FMT,
+ LOCAL_PR_ARG, __entry->radio_idx, __entry->changed, CHANDEF_PR_ARG
)
);
@@ -818,34 +821,71 @@ TRACE_EVENT(drv_get_key_seq,
)
);
-DEFINE_EVENT(local_u32_evt, drv_set_frag_threshold,
- TP_PROTO(struct ieee80211_local *local, u32 value),
- TP_ARGS(local, value)
+TRACE_EVENT(drv_set_frag_threshold,
+ TP_PROTO(struct ieee80211_local *local, int radio_idx, u32 value),
+
+ TP_ARGS(local, radio_idx, value),
+
+ TP_STRUCT__entry(
+ LOCAL_ENTRY
+ __field(int, radio_idx)
+ __field(u32, value)
+ ),
+
+ TP_fast_assign(
+ LOCAL_ASSIGN;
+ __entry->radio_idx = radio_idx;
+ __entry->value = value;
+ ),
+
+ TP_printk(
+ LOCAL_PR_FMT " radio_id:%d value:%u",
+ LOCAL_PR_ARG, __entry->radio_idx, __entry->value
+ )
);
-DEFINE_EVENT(local_u32_evt, drv_set_rts_threshold,
- TP_PROTO(struct ieee80211_local *local, u32 value),
- TP_ARGS(local, value)
+TRACE_EVENT(drv_set_rts_threshold,
+ TP_PROTO(struct ieee80211_local *local, int radio_idx, u32 value),
+
+ TP_ARGS(local, radio_idx, value),
+
+ TP_STRUCT__entry(
+ LOCAL_ENTRY
+ __field(int, radio_idx)
+ __field(u32, value)
+ ),
+ TP_fast_assign(
+ LOCAL_ASSIGN;
+ __entry->radio_idx = radio_idx;
+ __entry->value = value;
+ ),
+
+ TP_printk(
+ LOCAL_PR_FMT " radio_id:%d value:%u",
+ LOCAL_PR_ARG, __entry->radio_idx, __entry->value
+ )
);
TRACE_EVENT(drv_set_coverage_class,
- TP_PROTO(struct ieee80211_local *local, s16 value),
+ TP_PROTO(struct ieee80211_local *local, int radio_idx, s16 value),
- TP_ARGS(local, value),
+ TP_ARGS(local, radio_idx, value),
TP_STRUCT__entry(
LOCAL_ENTRY
+ __field(int, radio_idx)
__field(s16, value)
),
TP_fast_assign(
LOCAL_ASSIGN;
+ __entry->radio_idx = radio_idx;
__entry->value = value;
),
TP_printk(
- LOCAL_PR_FMT " value:%d",
- LOCAL_PR_ARG, __entry->value
+ LOCAL_PR_FMT " radio_id:%d value:%d",
+ LOCAL_PR_ARG, __entry->radio_idx, __entry->value
)
);
@@ -1002,6 +1042,33 @@ DEFINE_EVENT(sta_event, drv_sta_statistics,
TP_ARGS(local, sdata, sta)
);
+TRACE_EVENT(drv_link_sta_statistics,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_link_sta *link_sta),
+
+ TP_ARGS(local, sdata, link_sta),
+
+ TP_STRUCT__entry(
+ LOCAL_ENTRY
+ VIF_ENTRY
+ STA_ENTRY
+ __field(u32, link_id)
+ ),
+
+ TP_fast_assign(
+ LOCAL_ASSIGN;
+ VIF_ASSIGN;
+ STA_NAMED_ASSIGN(link_sta->sta);
+ __entry->link_id = link_sta->link_id;
+ ),
+
+ TP_printk(
+ LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " (link %d)",
+ LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->link_id
+ )
+);
+
DEFINE_EVENT(sta_event, drv_sta_add,
TP_PROTO(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
@@ -1291,12 +1358,14 @@ TRACE_EVENT(drv_set_antenna,
);
TRACE_EVENT(drv_get_antenna,
- TP_PROTO(struct ieee80211_local *local, u32 tx_ant, u32 rx_ant, int ret),
+ TP_PROTO(struct ieee80211_local *local, int radio_idx, u32 tx_ant,
+ u32 rx_ant, int ret),
- TP_ARGS(local, tx_ant, rx_ant, ret),
+ TP_ARGS(local, radio_idx, tx_ant, rx_ant, ret),
TP_STRUCT__entry(
LOCAL_ENTRY
+ __field(int, radio_idx)
__field(u32, tx_ant)
__field(u32, rx_ant)
__field(int, ret)
@@ -1304,14 +1373,16 @@ TRACE_EVENT(drv_get_antenna,
TP_fast_assign(
LOCAL_ASSIGN;
+ __entry->radio_idx = radio_idx;
__entry->tx_ant = tx_ant;
__entry->rx_ant = rx_ant;
__entry->ret = ret;
),
TP_printk(
- LOCAL_PR_FMT " tx_ant:%d rx_ant:%d ret:%d",
- LOCAL_PR_ARG, __entry->tx_ant, __entry->rx_ant, __entry->ret
+ LOCAL_PR_FMT " radio_idx:%d tx_ant:%d rx_ant:%d ret:%d",
+ LOCAL_PR_ARG, __entry->radio_idx, __entry->tx_ant,
+ __entry->rx_ant, __entry->ret
)
);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index d8d4f3d7d7f2..00671ae45b2f 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -5,7 +5,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
- * Copyright (C) 2018-2024 Intel Corporation
+ * Copyright (C) 2018-2025 Intel Corporation
*
* Transmit and frame generation functions.
*/
@@ -612,6 +612,12 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
else
tx->key = NULL;
+ if (info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP) {
+ if (tx->key && tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)
+ info->control.hw_key = &tx->key->conf;
+ return TX_CONTINUE;
+ }
+
if (tx->key) {
bool skip_hw = false;
@@ -1173,7 +1179,8 @@ void ieee80211_aggr_check(struct ieee80211_sub_if_data *sdata,
return;
if (!sta ||
- (!sta->sta.valid_links && !sta->sta.deflink.ht_cap.ht_supported) ||
+ (!sta->sta.valid_links && !sta->sta.deflink.ht_cap.ht_supported &&
+ !sta->sta.deflink.s1g_cap.s1g) ||
!sta->sta.wme || skb_get_queue_mapping(skb) == IEEE80211_AC_VO ||
skb->protocol == sdata->control_port_protocol)
return;
@@ -1428,7 +1435,7 @@ static void ieee80211_txq_enqueue(struct ieee80211_local *local,
{
struct fq *fq = &local->fq;
struct fq_tin *tin = &txqi->tin;
- u32 flow_idx = fq_flow_idx(fq, skb);
+ u32 flow_idx;
ieee80211_set_skb_enqueue_time(skb);
@@ -1444,6 +1451,7 @@ static void ieee80211_txq_enqueue(struct ieee80211_local *local,
IEEE80211_TX_INTCFL_NEED_TXPROCESSING;
__skb_queue_tail(&txqi->frags, skb);
} else {
+ flow_idx = fq_flow_idx(fq, skb);
fq_tin_enqueue(fq, tin, flow_idx, skb,
fq_skb_free_func);
}
@@ -1541,7 +1549,7 @@ void ieee80211_txq_purge(struct ieee80211_local *local,
spin_unlock_bh(&local->active_txq_lock[txqi->txq.ac]);
}
-void ieee80211_txq_set_params(struct ieee80211_local *local)
+void ieee80211_txq_set_params(struct ieee80211_local *local, int radio_idx)
{
if (local->hw.wiphy->txq_limit)
local->fq.limit = local->hw.wiphy->txq_limit;
@@ -1605,7 +1613,7 @@ int ieee80211_txq_setup_flows(struct ieee80211_local *local)
for (i = 0; i < fq->flows_cnt; i++)
codel_vars_init(&local->cvars[i]);
- ieee80211_txq_set_params(local);
+ ieee80211_txq_set_params(local, -1);
return 0;
}
@@ -3876,6 +3884,7 @@ begin:
* The key can be removed while the packet was queued, so need to call
* this here to get the current key.
*/
+ info->control.hw_key = NULL;
r = ieee80211_tx_h_select_key(&tx);
if (r != TX_CONTINUE) {
ieee80211_free_txskb(&local->hw, skb);
@@ -4098,7 +4107,9 @@ void __ieee80211_schedule_txq(struct ieee80211_hw *hw,
spin_lock_bh(&local->active_txq_lock[txq->ac]);
- has_queue = force || txq_has_queue(txq);
+ has_queue = force ||
+ (!test_bit(IEEE80211_TXQ_STOP, &txqi->flags) &&
+ txq_has_queue(txq));
if (list_empty(&txqi->schedule_order) &&
(has_queue || ieee80211_txq_keep_active(txqi))) {
/* If airtime accounting is active, always enqueue STAs at the
@@ -5016,12 +5027,25 @@ static void ieee80211_set_beacon_cntdwn(struct ieee80211_sub_if_data *sdata,
}
}
-static u8 __ieee80211_beacon_update_cntdwn(struct beacon_data *beacon)
+static u8 __ieee80211_beacon_update_cntdwn(struct ieee80211_link_data *link,
+ struct beacon_data *beacon)
{
- beacon->cntdwn_current_counter--;
+ if (beacon->cntdwn_current_counter == 1) {
+ /*
+ * Channel switch handling is done by a worker thread while
+ * beacons get pulled from hardware timers. It's therefore
+ * possible that software threads are slow enough to not be
+ * able to complete CSA handling in a single beacon interval,
+ * in which case we get here. There isn't much to do about
+ * it, other than letting the user know that the AP isn't
+ * behaving correctly.
+ */
+ link_err_once(link,
+ "beacon TX faster than countdown (channel/color switch) completion\n");
+ return 0;
+ }
- /* the counter should never reach 0 */
- WARN_ON_ONCE(!beacon->cntdwn_current_counter);
+ beacon->cntdwn_current_counter--;
return beacon->cntdwn_current_counter;
}
@@ -5052,7 +5076,7 @@ u8 ieee80211_beacon_update_cntdwn(struct ieee80211_vif *vif, unsigned int link_i
if (!beacon)
goto unlock;
- count = __ieee80211_beacon_update_cntdwn(beacon);
+ count = __ieee80211_beacon_update_cntdwn(link, beacon);
unlock:
rcu_read_unlock();
@@ -5276,14 +5300,14 @@ ieee80211_beacon_add_mbssid(struct sk_buff *skb, struct beacon_data *beacon,
}
static struct sk_buff *
-ieee80211_beacon_get_ap(struct ieee80211_hw *hw,
- struct ieee80211_vif *vif,
- struct ieee80211_link_data *link,
- struct ieee80211_mutable_offsets *offs,
- bool is_template,
- struct beacon_data *beacon,
- struct ieee80211_chanctx_conf *chanctx_conf,
- u8 ema_index)
+__ieee80211_beacon_get_ap(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ struct ieee80211_link_data *link,
+ struct ieee80211_mutable_offsets *offs,
+ bool is_template,
+ struct beacon_data *beacon,
+ struct ieee80211_chanctx_conf *chanctx_conf,
+ u8 ema_index)
{
struct ieee80211_local *local = hw_to_local(hw);
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
@@ -5344,6 +5368,71 @@ ieee80211_beacon_get_ap(struct ieee80211_hw *hw,
return skb;
}
+static bool ieee80211_s1g_need_long_beacon(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_link_data *link)
+{
+ struct ps_data *ps = &sdata->u.ap.ps;
+
+ if (ps->sb_count == 0)
+ ps->sb_count = link->conf->s1g_long_beacon_period - 1;
+ else
+ ps->sb_count--;
+
+ return ps->sb_count == 0;
+}
+
+static struct sk_buff *
+ieee80211_s1g_short_beacon_get(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ struct ieee80211_link_data *link,
+ struct ieee80211_chanctx_conf *chanctx_conf,
+ struct s1g_short_beacon_data *sb,
+ bool is_template)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+ struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+ struct ieee80211_if_ap *ap = &sdata->u.ap;
+ struct sk_buff *skb;
+
+ skb = dev_alloc_skb(local->tx_headroom + sb->short_head_len +
+ sb->short_tail_len + 256 +
+ local->hw.extra_beacon_tailroom);
+ if (!skb)
+ return NULL;
+
+ skb_reserve(skb, local->tx_headroom);
+ skb_put_data(skb, sb->short_head, sb->short_head_len);
+
+ ieee80211_beacon_add_tim(sdata, link, &ap->ps, skb, is_template);
+
+ if (sb->short_tail)
+ skb_put_data(skb, sb->short_tail, sb->short_tail_len);
+
+ ieee80211_beacon_get_finish(hw, vif, link, NULL, NULL, skb,
+ chanctx_conf, 0);
+ return skb;
+}
+
+static struct sk_buff *
+ieee80211_beacon_get_ap(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+ struct ieee80211_link_data *link,
+ struct ieee80211_mutable_offsets *offs,
+ bool is_template, struct beacon_data *beacon,
+ struct ieee80211_chanctx_conf *chanctx_conf,
+ u8 ema_index, struct s1g_short_beacon_data *s1g_sb)
+{
+ struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+
+ if (!sdata->vif.cfg.s1g || !s1g_sb ||
+ ieee80211_s1g_need_long_beacon(sdata, link))
+ return __ieee80211_beacon_get_ap(hw, vif, link, offs,
+ is_template, beacon,
+ chanctx_conf, ema_index);
+
+ return ieee80211_s1g_short_beacon_get(hw, vif, link, chanctx_conf,
+ s1g_sb, is_template);
+}
+
static struct ieee80211_ema_beacons *
ieee80211_beacon_get_ap_ema_list(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
@@ -5367,7 +5456,7 @@ ieee80211_beacon_get_ap_ema_list(struct ieee80211_hw *hw,
ieee80211_beacon_get_ap(hw, vif, link,
&ema->bcn[ema->cnt].offs,
is_template, beacon,
- chanctx_conf, ema->cnt);
+ chanctx_conf, ema->cnt, NULL);
if (!ema->bcn[ema->cnt].skb)
break;
}
@@ -5396,6 +5485,7 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
struct ieee80211_sub_if_data *sdata = NULL;
struct ieee80211_chanctx_conf *chanctx_conf;
struct ieee80211_link_data *link;
+ struct s1g_short_beacon_data *s1g_short_bcn = NULL;
rcu_read_lock();
@@ -5417,6 +5507,13 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
if (!beacon)
goto out;
+ if (vif->cfg.s1g && link->u.ap.s1g_short_beacon) {
+ s1g_short_bcn =
+ rcu_dereference(link->u.ap.s1g_short_beacon);
+ if (!s1g_short_bcn)
+ goto out;
+ }
+
if (ema_beacons) {
*ema_beacons =
ieee80211_beacon_get_ap_ema_list(hw, vif, link,
@@ -5437,8 +5534,8 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
skb = ieee80211_beacon_get_ap(hw, vif, link, offs,
is_template, beacon,
- chanctx_conf,
- ema_index);
+ chanctx_conf, ema_index,
+ s1g_short_bcn);
}
} else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) {
struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
@@ -5450,7 +5547,7 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
if (beacon->cntdwn_counter_offsets[0]) {
if (!is_template)
- __ieee80211_beacon_update_cntdwn(beacon);
+ __ieee80211_beacon_update_cntdwn(link, beacon);
ieee80211_set_beacon_cntdwn(sdata, beacon, link);
}
@@ -5482,7 +5579,7 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
* for now we leave it consistent with overall
* mac80211's behavior.
*/
- __ieee80211_beacon_update_cntdwn(beacon);
+ __ieee80211_beacon_update_cntdwn(link, beacon);
ieee80211_set_beacon_cntdwn(sdata, beacon, link);
}
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 27d414efa3fd..32f1bc5908c5 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1756,6 +1756,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
bool sched_scan_stopped = false;
bool suspended = local->suspended;
bool in_reconfig = false;
+ u32 rts_threshold;
lockdep_assert_wiphy(local->hw.wiphy);
@@ -1826,13 +1827,20 @@ int ieee80211_reconfig(struct ieee80211_local *local)
}
/* setup fragmentation threshold */
- drv_set_frag_threshold(local, hw->wiphy->frag_threshold);
+ drv_set_frag_threshold(local, -1, hw->wiphy->frag_threshold);
/* setup RTS threshold */
- drv_set_rts_threshold(local, hw->wiphy->rts_threshold);
+ if (hw->wiphy->n_radio > 0) {
+ for (i = 0; i < hw->wiphy->n_radio; i++) {
+ rts_threshold = hw->wiphy->radio_cfg[i].rts_threshold;
+ drv_set_rts_threshold(local, i, rts_threshold);
+ }
+ } else {
+ drv_set_rts_threshold(local, -1, hw->wiphy->rts_threshold);
+ }
/* reset coverage class */
- drv_set_coverage_class(local, hw->wiphy->coverage_class);
+ drv_set_coverage_class(local, -1, hw->wiphy->coverage_class);
ieee80211_led_radio(local, true);
ieee80211_mod_tpt_led_trig(local,
@@ -1890,11 +1898,11 @@ int ieee80211_reconfig(struct ieee80211_local *local)
ieee80211_assign_chanctx(local, sdata, &sdata->deflink);
/* reconfigure hardware */
- ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_LISTEN_INTERVAL |
- IEEE80211_CONF_CHANGE_MONITOR |
- IEEE80211_CONF_CHANGE_PS |
- IEEE80211_CONF_CHANGE_RETRY_LIMITS |
- IEEE80211_CONF_CHANGE_IDLE);
+ ieee80211_hw_config(local, -1, IEEE80211_CONF_CHANGE_LISTEN_INTERVAL |
+ IEEE80211_CONF_CHANGE_MONITOR |
+ IEEE80211_CONF_CHANGE_PS |
+ IEEE80211_CONF_CHANGE_RETRY_LIMITS |
+ IEEE80211_CONF_CHANGE_IDLE);
ieee80211_configure_filter(local);
@@ -2144,11 +2152,6 @@ int ieee80211_reconfig(struct ieee80211_local *local)
cfg80211_sched_scan_stopped_locked(local->hw.wiphy, 0);
wake_up:
-
- if (local->virt_monitors > 0 &&
- local->virt_monitors == local->open_count)
- ieee80211_add_virtual_monitor(local);
-
/*
* Clear the WLAN_STA_BLOCK_BA flag so new aggregation
* sessions can be established after a resume.
@@ -2202,6 +2205,10 @@ int ieee80211_reconfig(struct ieee80211_local *local)
}
}
+ if (local->virt_monitors > 0 &&
+ local->virt_monitors == local->open_count)
+ ieee80211_add_virtual_monitor(local);
+
if (!suspended)
return 0;
@@ -2548,6 +2555,23 @@ end:
return 0;
}
+int ieee80211_put_reg_conn(struct sk_buff *skb,
+ enum ieee80211_channel_flags flags)
+{
+ u8 reg_conn = IEEE80211_REG_CONN_LPI_VALID |
+ IEEE80211_REG_CONN_LPI_VALUE |
+ IEEE80211_REG_CONN_SP_VALID;
+
+ if (!(flags & IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT))
+ reg_conn |= IEEE80211_REG_CONN_SP_VALUE;
+
+ skb_put_u8(skb, WLAN_EID_EXTENSION);
+ skb_put_u8(skb, 1 + sizeof(reg_conn));
+ skb_put_u8(skb, WLAN_EID_EXT_NON_AP_STA_REG_CON);
+ skb_put_u8(skb, reg_conn);
+ return 0;
+}
+
int ieee80211_put_he_6ghz_cap(struct sk_buff *skb,
struct ieee80211_sub_if_data *sdata,
enum ieee80211_smps_mode smps_mode)
@@ -3265,14 +3289,24 @@ int ieee80211_put_srates_elem(struct sk_buff *skb,
return 0;
}
-int ieee80211_ave_rssi(struct ieee80211_vif *vif)
+int ieee80211_ave_rssi(struct ieee80211_vif *vif, int link_id)
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+ struct ieee80211_link_data *link_data;
if (WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_STATION))
return 0;
- return -ewma_beacon_signal_read(&sdata->deflink.u.mgd.ave_beacon_signal);
+ if (link_id < 0)
+ link_data = &sdata->deflink;
+ else
+ link_data = wiphy_dereference(sdata->local->hw.wiphy,
+ sdata->link[link_id]);
+
+ if (WARN_ON_ONCE(!link_data))
+ return -99;
+
+ return -ewma_beacon_signal_read(&link_data->u.mgd.ave_beacon_signal);
}
EXPORT_SYMBOL_GPL(ieee80211_ave_rssi);
@@ -3879,12 +3913,10 @@ int ieee80211_parse_p2p_noa(const struct ieee80211_p2p_noa_attr *attr,
}
EXPORT_SYMBOL(ieee80211_parse_p2p_noa);
-void ieee80211_recalc_dtim(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata)
+void ieee80211_recalc_dtim(struct ieee80211_sub_if_data *sdata, u64 tsf)
{
- u64 tsf = drv_get_tsf(local, sdata);
u64 dtim_count = 0;
- u16 beacon_int = sdata->vif.bss_conf.beacon_int * 1024;
+ u32 beacon_int = sdata->vif.bss_conf.beacon_int * 1024;
u8 dtim_period = sdata->vif.bss_conf.dtim_period;
struct ps_data *ps;
u8 bcns_from_dtim;
@@ -3920,6 +3952,33 @@ void ieee80211_recalc_dtim(struct ieee80211_local *local,
ps->dtim_count = dtim_count;
}
+/*
+ * Given a long beacon period, calculate the current index into
+ * that period to determine the number of TSBTTs until the next TBTT.
+ * It is completely valid to have a short beacon period that differs
+ * from the dtim period (i.e a TBTT thats not a DTIM).
+ */
+void ieee80211_recalc_sb_count(struct ieee80211_sub_if_data *sdata, u64 tsf)
+{
+ u32 sb_idx;
+ struct ps_data *ps = &sdata->bss->ps;
+ u8 lb_period = sdata->vif.bss_conf.s1g_long_beacon_period;
+ u32 beacon_int = sdata->vif.bss_conf.beacon_int * 1024;
+
+ /* No mesh / IBSS support for short beaconing */
+ if (tsf == -1ULL || !lb_period ||
+ (sdata->vif.type != NL80211_IFTYPE_AP &&
+ sdata->vif.type != NL80211_IFTYPE_AP_VLAN))
+ return;
+
+ /* find the current TSBTT index in our lb_period */
+ do_div(tsf, beacon_int);
+ sb_idx = do_div(tsf, lb_period);
+
+ /* num TSBTTs until the next TBTT */
+ ps->sb_count = sb_idx ? lb_period - sb_idx : 0;
+}
+
static u8 ieee80211_chanctx_radar_detect(struct ieee80211_local *local,
struct ieee80211_chanctx *ctx)
{
@@ -3953,6 +4012,33 @@ static u8 ieee80211_chanctx_radar_detect(struct ieee80211_local *local,
return radar_detect;
}
+bool ieee80211_is_radio_idx_in_scan_req(struct wiphy *wiphy,
+ struct cfg80211_scan_request *scan_req,
+ int radio_idx)
+{
+ struct ieee80211_channel *chan;
+ int i, chan_radio_idx;
+
+ for (i = 0; i < scan_req->n_channels; i++) {
+ chan = scan_req->channels[i];
+ chan_radio_idx = cfg80211_get_radio_idx_by_chan(wiphy, chan);
+ /*
+ * The chan_radio_idx should be valid since it's taken from a
+ * valid scan request.
+ * However, if chan_radio_idx is unexpectedly invalid (negative),
+ * we take a conservative approach and assume the scan request
+ * might use the specified radio_idx. Hence, return true.
+ */
+ if (WARN_ON(chan_radio_idx < 0))
+ return true;
+
+ if (chan_radio_idx == radio_idx)
+ return true;
+ }
+
+ return false;
+}
+
static u32
__ieee80211_get_radio_mask(struct ieee80211_sub_if_data *sdata)
{
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index c5c5d16ed6c8..b099d79e8fbb 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -672,8 +672,9 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
sta_opmode.changed |= STA_OPMODE_N_SS_CHANGED;
}
} else {
- pr_warn_ratelimited("Ignoring NSS change in VHT Operating Mode Notification from %pM with invalid nss %d",
- link_sta->pub->addr, nss);
+ sdata_dbg(sdata,
+ "Ignore NSS change to invalid %d in VHT opmode notif from %pM",
+ nss, link_sta->pub->addr);
}
}
diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c
index 9b12ca97f412..df4e8cf33899 100644
--- a/net/mctp/af_mctp.c
+++ b/net/mctp/af_mctp.c
@@ -53,6 +53,7 @@ static int mctp_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
{
struct sock *sk = sock->sk;
struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
+ struct net *net = sock_net(&msk->sk);
struct sockaddr_mctp *smctp;
int rc;
@@ -73,14 +74,48 @@ static int mctp_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
lock_sock(sk);
- /* TODO: allow rebind */
if (sk_hashed(sk)) {
rc = -EADDRINUSE;
goto out_release;
}
- msk->bind_net = smctp->smctp_network;
- msk->bind_addr = smctp->smctp_addr.s_addr;
- msk->bind_type = smctp->smctp_type & 0x7f; /* ignore the IC bit */
+
+ msk->bind_local_addr = smctp->smctp_addr.s_addr;
+
+ /* MCTP_NET_ANY with a specific EID is resolved to the default net
+ * at bind() time.
+ * For bind_addr=MCTP_ADDR_ANY it is handled specially at route
+ * lookup time.
+ */
+ if (smctp->smctp_network == MCTP_NET_ANY &&
+ msk->bind_local_addr != MCTP_ADDR_ANY) {
+ msk->bind_net = mctp_default_net(net);
+ } else {
+ msk->bind_net = smctp->smctp_network;
+ }
+
+ /* ignore the IC bit */
+ smctp->smctp_type &= 0x7f;
+
+ if (msk->bind_peer_set) {
+ if (msk->bind_type != smctp->smctp_type) {
+ /* Prior connect() had a different type */
+ rc = -EINVAL;
+ goto out_release;
+ }
+
+ if (msk->bind_net == MCTP_NET_ANY) {
+ /* Restrict to the network passed to connect() */
+ msk->bind_net = msk->bind_peer_net;
+ }
+
+ if (msk->bind_net != msk->bind_peer_net) {
+ /* connect() had a different net to bind() */
+ rc = -EINVAL;
+ goto out_release;
+ }
+ } else {
+ msk->bind_type = smctp->smctp_type;
+ }
rc = sk->sk_prot->hash(sk);
@@ -90,6 +125,67 @@ out_release:
return rc;
}
+/* Used to set a specific peer prior to bind. Not used for outbound
+ * connections (Tag Owner set) since MCTP is a datagram protocol.
+ */
+static int mctp_connect(struct socket *sock, struct sockaddr *addr,
+ int addrlen, int flags)
+{
+ struct sock *sk = sock->sk;
+ struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
+ struct net *net = sock_net(&msk->sk);
+ struct sockaddr_mctp *smctp;
+ int rc;
+
+ if (addrlen != sizeof(*smctp))
+ return -EINVAL;
+
+ if (addr->sa_family != AF_MCTP)
+ return -EAFNOSUPPORT;
+
+ /* It's a valid sockaddr for MCTP, cast and do protocol checks */
+ smctp = (struct sockaddr_mctp *)addr;
+
+ if (!mctp_sockaddr_is_ok(smctp))
+ return -EINVAL;
+
+ /* Can't bind by tag */
+ if (smctp->smctp_tag)
+ return -EINVAL;
+
+ /* IC bit must be unset */
+ if (smctp->smctp_type & 0x80)
+ return -EINVAL;
+
+ lock_sock(sk);
+
+ if (sk_hashed(sk)) {
+ /* bind() already */
+ rc = -EADDRINUSE;
+ goto out_release;
+ }
+
+ if (msk->bind_peer_set) {
+ /* connect() already */
+ rc = -EADDRINUSE;
+ goto out_release;
+ }
+
+ msk->bind_peer_set = true;
+ msk->bind_peer_addr = smctp->smctp_addr.s_addr;
+ msk->bind_type = smctp->smctp_type;
+ if (smctp->smctp_network == MCTP_NET_ANY)
+ msk->bind_peer_net = mctp_default_net(net);
+ else
+ msk->bind_peer_net = smctp->smctp_network;
+
+ rc = 0;
+
+out_release:
+ release_sock(sk);
+ return rc;
+}
+
static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
{
DECLARE_SOCKADDR(struct sockaddr_mctp *, addr, msg->msg_name);
@@ -97,8 +193,8 @@ static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
struct sock *sk = sock->sk;
struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
struct mctp_skb_cb *cb;
- struct mctp_route *rt;
struct sk_buff *skb = NULL;
+ struct mctp_dst dst;
int hlen;
if (addr) {
@@ -133,34 +229,30 @@ static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
if (msk->addr_ext && addrlen >= sizeof(struct sockaddr_mctp_ext)) {
DECLARE_SOCKADDR(struct sockaddr_mctp_ext *,
extaddr, msg->msg_name);
- struct net_device *dev;
-
- rc = -EINVAL;
- rcu_read_lock();
- dev = dev_get_by_index_rcu(sock_net(sk), extaddr->smctp_ifindex);
- /* check for correct halen */
- if (dev && extaddr->smctp_halen == dev->addr_len) {
- hlen = LL_RESERVED_SPACE(dev) + sizeof(struct mctp_hdr);
- rc = 0;
- }
- rcu_read_unlock();
+
+ if (!mctp_sockaddr_ext_is_ok(extaddr))
+ return -EINVAL;
+
+ rc = mctp_dst_from_extaddr(&dst, sock_net(sk),
+ extaddr->smctp_ifindex,
+ extaddr->smctp_halen,
+ extaddr->smctp_haddr);
if (rc)
- goto err_free;
- rt = NULL;
+ return rc;
+
} else {
- rt = mctp_route_lookup(sock_net(sk), addr->smctp_network,
- addr->smctp_addr.s_addr);
- if (!rt) {
- rc = -EHOSTUNREACH;
- goto err_free;
- }
- hlen = LL_RESERVED_SPACE(rt->dev->dev) + sizeof(struct mctp_hdr);
+ rc = mctp_route_lookup(sock_net(sk), addr->smctp_network,
+ addr->smctp_addr.s_addr, &dst);
+ if (rc)
+ return rc;
}
+ hlen = LL_RESERVED_SPACE(dst.dev->dev) + sizeof(struct mctp_hdr);
+
skb = sock_alloc_send_skb(sk, hlen + 1 + len,
msg->msg_flags & MSG_DONTWAIT, &rc);
if (!skb)
- return rc;
+ goto err_release_dst;
skb_reserve(skb, hlen);
@@ -175,30 +267,16 @@ static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
cb = __mctp_cb(skb);
cb->net = addr->smctp_network;
- if (!rt) {
- /* fill extended address in cb */
- DECLARE_SOCKADDR(struct sockaddr_mctp_ext *,
- extaddr, msg->msg_name);
-
- if (!mctp_sockaddr_ext_is_ok(extaddr) ||
- extaddr->smctp_halen > sizeof(cb->haddr)) {
- rc = -EINVAL;
- goto err_free;
- }
-
- cb->ifindex = extaddr->smctp_ifindex;
- /* smctp_halen is checked above */
- cb->halen = extaddr->smctp_halen;
- memcpy(cb->haddr, extaddr->smctp_haddr, cb->halen);
- }
-
- rc = mctp_local_output(sk, rt, skb, addr->smctp_addr.s_addr,
+ rc = mctp_local_output(sk, &dst, skb, addr->smctp_addr.s_addr,
addr->smctp_tag);
+ mctp_dst_release(&dst);
return rc ? : len;
err_free:
kfree_skb(skb);
+err_release_dst:
+ mctp_dst_release(&dst);
return rc;
}
@@ -551,7 +629,7 @@ static const struct proto_ops mctp_dgram_ops = {
.family = PF_MCTP,
.release = mctp_release,
.bind = mctp_bind,
- .connect = sock_no_connect,
+ .connect = mctp_connect,
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = sock_no_getname,
@@ -618,6 +696,7 @@ static int mctp_sk_init(struct sock *sk)
INIT_HLIST_HEAD(&msk->keys);
timer_setup(&msk->key_expiry, mctp_sk_expire_keys, 0);
+ msk->bind_peer_set = false;
return 0;
}
@@ -629,15 +708,48 @@ static void mctp_sk_close(struct sock *sk, long timeout)
static int mctp_sk_hash(struct sock *sk)
{
struct net *net = sock_net(sk);
+ struct sock *existing;
+ struct mctp_sock *msk;
+ mctp_eid_t remote;
+ u32 hash;
+ int rc;
+
+ msk = container_of(sk, struct mctp_sock, sk);
+
+ if (msk->bind_peer_set)
+ remote = msk->bind_peer_addr;
+ else
+ remote = MCTP_ADDR_ANY;
+ hash = mctp_bind_hash(msk->bind_type, msk->bind_local_addr, remote);
+
+ mutex_lock(&net->mctp.bind_lock);
+
+ /* Prevent duplicate binds. */
+ sk_for_each(existing, &net->mctp.binds[hash]) {
+ struct mctp_sock *mex =
+ container_of(existing, struct mctp_sock, sk);
+
+ bool same_peer = (mex->bind_peer_set && msk->bind_peer_set &&
+ mex->bind_peer_addr == msk->bind_peer_addr) ||
+ (!mex->bind_peer_set && !msk->bind_peer_set);
+
+ if (mex->bind_type == msk->bind_type &&
+ mex->bind_local_addr == msk->bind_local_addr && same_peer &&
+ mex->bind_net == msk->bind_net) {
+ rc = -EADDRINUSE;
+ goto out;
+ }
+ }
/* Bind lookup runs under RCU, remain live during that. */
sock_set_flag(sk, SOCK_RCU_FREE);
- mutex_lock(&net->mctp.bind_lock);
- sk_add_node_rcu(sk, &net->mctp.binds);
- mutex_unlock(&net->mctp.bind_lock);
+ sk_add_node_rcu(sk, &net->mctp.binds[hash]);
+ rc = 0;
- return 0;
+out:
+ mutex_unlock(&net->mctp.bind_lock);
+ return rc;
}
static void mctp_sk_unhash(struct sock *sk)
@@ -793,3 +905,7 @@ MODULE_DESCRIPTION("MCTP core");
MODULE_AUTHOR("Jeremy Kerr <jk@codeconstruct.com.au>");
MODULE_ALIAS_NETPROTO(PF_MCTP);
+
+#if IS_ENABLED(CONFIG_MCTP_TEST)
+#include "test/sock-test.c"
+#endif
diff --git a/net/mctp/route.c b/net/mctp/route.c
index d9c8e5a5f9ce..2b2b958ef6a3 100644
--- a/net/mctp/route.c
+++ b/net/mctp/route.c
@@ -17,6 +17,8 @@
#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
+#include <kunit/static_stub.h>
+
#include <uapi/linux/if_arp.h>
#include <net/mctp.h>
@@ -32,39 +34,42 @@ static const unsigned long mctp_key_lifetime = 6 * CONFIG_HZ;
static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev);
/* route output callbacks */
-static int mctp_route_discard(struct mctp_route *route, struct sk_buff *skb)
+static int mctp_dst_discard(struct mctp_dst *dst, struct sk_buff *skb)
{
kfree_skb(skb);
return 0;
}
-static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb)
+static struct mctp_sock *mctp_lookup_bind_details(struct net *net,
+ struct sk_buff *skb,
+ u8 type, u8 dest,
+ u8 src, bool allow_net_any)
{
struct mctp_skb_cb *cb = mctp_cb(skb);
- struct mctp_hdr *mh;
struct sock *sk;
- u8 type;
+ u8 hash;
- WARN_ON(!rcu_read_lock_held());
+ WARN_ON_ONCE(!rcu_read_lock_held());
- /* TODO: look up in skb->cb? */
- mh = mctp_hdr(skb);
-
- if (!skb_headlen(skb))
- return NULL;
+ hash = mctp_bind_hash(type, dest, src);
- type = (*(u8 *)skb->data) & 0x7f;
-
- sk_for_each_rcu(sk, &net->mctp.binds) {
+ sk_for_each_rcu(sk, &net->mctp.binds[hash]) {
struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
+ if (!allow_net_any && msk->bind_net == MCTP_NET_ANY)
+ continue;
+
if (msk->bind_net != MCTP_NET_ANY && msk->bind_net != cb->net)
continue;
if (msk->bind_type != type)
continue;
- if (!mctp_address_matches(msk->bind_addr, mh->dest))
+ if (msk->bind_peer_set &&
+ !mctp_address_matches(msk->bind_peer_addr, src))
+ continue;
+
+ if (!mctp_address_matches(msk->bind_local_addr, dest))
continue;
return msk;
@@ -73,6 +78,54 @@ static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb)
return NULL;
}
+static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb)
+{
+ struct mctp_sock *msk;
+ struct mctp_hdr *mh;
+ u8 type;
+
+ /* TODO: look up in skb->cb? */
+ mh = mctp_hdr(skb);
+
+ if (!skb_headlen(skb))
+ return NULL;
+
+ type = (*(u8 *)skb->data) & 0x7f;
+
+ /* Look for binds in order of widening scope. A given destination or
+ * source address also implies matching on a particular network.
+ *
+ * - Matching destination and source
+ * - Matching destination
+ * - Matching source
+ * - Matching network, any address
+ * - Any network or address
+ */
+
+ msk = mctp_lookup_bind_details(net, skb, type, mh->dest, mh->src,
+ false);
+ if (msk)
+ return msk;
+ msk = mctp_lookup_bind_details(net, skb, type, MCTP_ADDR_ANY, mh->src,
+ false);
+ if (msk)
+ return msk;
+ msk = mctp_lookup_bind_details(net, skb, type, mh->dest, MCTP_ADDR_ANY,
+ false);
+ if (msk)
+ return msk;
+ msk = mctp_lookup_bind_details(net, skb, type, MCTP_ADDR_ANY,
+ MCTP_ADDR_ANY, false);
+ if (msk)
+ return msk;
+ msk = mctp_lookup_bind_details(net, skb, type, MCTP_ADDR_ANY,
+ MCTP_ADDR_ANY, true);
+ if (msk)
+ return msk;
+
+ return NULL;
+}
+
/* A note on the key allocations.
*
* struct net->mctp.keys contains our set of currently-allocated keys for
@@ -368,7 +421,7 @@ static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb)
return 0;
}
-static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
+static int mctp_dst_input(struct mctp_dst *dst, struct sk_buff *skb)
{
struct mctp_sk_key *key, *any_key = NULL;
struct net *net = dev_net(skb->dev);
@@ -392,6 +445,9 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
*/
skb_orphan(skb);
+ if (skb->pkt_type == PACKET_OUTGOING)
+ skb->pkt_type = PACKET_LOOPBACK;
+
/* ensure we have enough data for a header and a type */
if (skb->len < sizeof(struct mctp_hdr) + 1)
goto out;
@@ -556,39 +612,31 @@ out:
return rc;
}
-static unsigned int mctp_route_mtu(struct mctp_route *rt)
-{
- return rt->mtu ?: READ_ONCE(rt->dev->dev->mtu);
-}
-
-static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb)
+static int mctp_dst_output(struct mctp_dst *dst, struct sk_buff *skb)
{
- struct mctp_skb_cb *cb = mctp_cb(skb);
- struct mctp_hdr *hdr = mctp_hdr(skb);
char daddr_buf[MAX_ADDR_LEN];
char *daddr = NULL;
- unsigned int mtu;
int rc;
skb->protocol = htons(ETH_P_MCTP);
+ skb->pkt_type = PACKET_OUTGOING;
- mtu = READ_ONCE(skb->dev->mtu);
- if (skb->len > mtu) {
+ if (skb->len > dst->mtu) {
kfree_skb(skb);
return -EMSGSIZE;
}
- if (cb->ifindex) {
- /* direct route; use the hwaddr we stashed in sendmsg */
- if (cb->halen != skb->dev->addr_len) {
+ /* direct route; use the hwaddr we stashed in sendmsg */
+ if (dst->halen) {
+ if (dst->halen != skb->dev->addr_len) {
/* sanity check, sendmsg should have already caught this */
kfree_skb(skb);
return -EMSGSIZE;
}
- daddr = cb->haddr;
+ daddr = dst->haddr;
} else {
/* If lookup fails let the device handle daddr==NULL */
- if (mctp_neigh_lookup(route->dev, hdr->dest, daddr_buf) == 0)
+ if (mctp_neigh_lookup(dst->dev, dst->nexthop, daddr_buf) == 0)
daddr = daddr_buf;
}
@@ -599,7 +647,7 @@ static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb)
return -EHOSTUNREACH;
}
- mctp_flow_prepare_output(skb, route->dev);
+ mctp_flow_prepare_output(skb, dst->dev);
rc = dev_queue_xmit(skb);
if (rc)
@@ -612,7 +660,8 @@ static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb)
static void mctp_route_release(struct mctp_route *rt)
{
if (refcount_dec_and_test(&rt->refs)) {
- mctp_dev_put(rt->dev);
+ if (rt->dst_type == MCTP_ROUTE_DIRECT)
+ mctp_dev_put(rt->dev);
kfree_rcu(rt, rcu);
}
}
@@ -628,7 +677,7 @@ static struct mctp_route *mctp_route_alloc(void)
INIT_LIST_HEAD(&rt->list);
refcount_set(&rt->refs, 1);
- rt->output = mctp_route_discard;
+ rt->output = mctp_dst_discard;
return rt;
}
@@ -801,10 +850,16 @@ static struct mctp_sk_key *mctp_lookup_prealloc_tag(struct mctp_sock *msk,
}
/* routing lookups */
+static unsigned int mctp_route_netid(struct mctp_route *rt)
+{
+ return rt->dst_type == MCTP_ROUTE_DIRECT ?
+ READ_ONCE(rt->dev->net) : rt->gateway.net;
+}
+
static bool mctp_rt_match_eid(struct mctp_route *rt,
unsigned int net, mctp_eid_t eid)
{
- return READ_ONCE(rt->dev->net) == net &&
+ return mctp_route_netid(rt) == net &&
rt->min <= eid && rt->max >= eid;
}
@@ -813,54 +868,150 @@ static bool mctp_rt_compare_exact(struct mctp_route *rt1,
struct mctp_route *rt2)
{
ASSERT_RTNL();
- return rt1->dev->net == rt2->dev->net &&
+ return mctp_route_netid(rt1) == mctp_route_netid(rt2) &&
rt1->min == rt2->min &&
rt1->max == rt2->max;
}
-struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet,
- mctp_eid_t daddr)
+/* must only be called on a direct route, as the final output hop */
+static void mctp_dst_from_route(struct mctp_dst *dst, mctp_eid_t eid,
+ unsigned int mtu, struct mctp_route *route)
+{
+ mctp_dev_hold(route->dev);
+ dst->nexthop = eid;
+ dst->dev = route->dev;
+ dst->mtu = READ_ONCE(dst->dev->dev->mtu);
+ if (mtu)
+ dst->mtu = min(dst->mtu, mtu);
+ dst->halen = 0;
+ dst->output = route->output;
+}
+
+int mctp_dst_from_extaddr(struct mctp_dst *dst, struct net *net, int ifindex,
+ unsigned char halen, const unsigned char *haddr)
{
- struct mctp_route *tmp, *rt = NULL;
+ struct net_device *netdev;
+ struct mctp_dev *dev;
+ int rc = -ENOENT;
+
+ if (halen > sizeof(dst->haddr))
+ return -EINVAL;
rcu_read_lock();
- list_for_each_entry_rcu(tmp, &net->mctp.routes, list) {
- /* TODO: add metrics */
- if (mctp_rt_match_eid(tmp, dnet, daddr)) {
- if (refcount_inc_not_zero(&tmp->refs)) {
- rt = tmp;
- break;
- }
- }
+ netdev = dev_get_by_index_rcu(net, ifindex);
+ if (!netdev)
+ goto out_unlock;
+
+ if (netdev->addr_len != halen) {
+ rc = -EINVAL;
+ goto out_unlock;
}
+ dev = __mctp_dev_get(netdev);
+ if (!dev)
+ goto out_unlock;
+
+ dst->dev = dev;
+ dst->mtu = READ_ONCE(netdev->mtu);
+ dst->halen = halen;
+ dst->output = mctp_dst_output;
+ dst->nexthop = 0;
+ memcpy(dst->haddr, haddr, halen);
+
+ rc = 0;
+
+out_unlock:
rcu_read_unlock();
+ return rc;
+}
- return rt;
+void mctp_dst_release(struct mctp_dst *dst)
+{
+ mctp_dev_put(dst->dev);
+}
+
+static struct mctp_route *mctp_route_lookup_single(struct net *net,
+ unsigned int dnet,
+ mctp_eid_t daddr)
+{
+ struct mctp_route *rt;
+
+ list_for_each_entry_rcu(rt, &net->mctp.routes, list) {
+ if (mctp_rt_match_eid(rt, dnet, daddr))
+ return rt;
+ }
+
+ return NULL;
}
-static struct mctp_route *mctp_route_lookup_null(struct net *net,
- struct net_device *dev)
+/* populates *dst on successful lookup, if set */
+int mctp_route_lookup(struct net *net, unsigned int dnet,
+ mctp_eid_t daddr, struct mctp_dst *dst)
{
- struct mctp_route *tmp, *rt = NULL;
+ const unsigned int max_depth = 32;
+ unsigned int depth, mtu = 0;
+ int rc = -EHOSTUNREACH;
rcu_read_lock();
- list_for_each_entry_rcu(tmp, &net->mctp.routes, list) {
- if (tmp->dev->dev == dev && tmp->type == RTN_LOCAL &&
- refcount_inc_not_zero(&tmp->refs)) {
- rt = tmp;
+ for (depth = 0; depth < max_depth; depth++) {
+ struct mctp_route *rt;
+
+ rt = mctp_route_lookup_single(net, dnet, daddr);
+ if (!rt)
+ break;
+
+ /* clamp mtu to the smallest in the path, allowing 0
+ * to specify no restrictions
+ */
+ if (mtu && rt->mtu)
+ mtu = min(mtu, rt->mtu);
+ else
+ mtu = mtu ?: rt->mtu;
+
+ if (rt->dst_type == MCTP_ROUTE_DIRECT) {
+ if (dst)
+ mctp_dst_from_route(dst, daddr, mtu, rt);
+ rc = 0;
break;
+
+ } else if (rt->dst_type == MCTP_ROUTE_GATEWAY) {
+ daddr = rt->gateway.eid;
}
}
rcu_read_unlock();
- return rt;
+ return rc;
+}
+
+static int mctp_route_lookup_null(struct net *net, struct net_device *dev,
+ struct mctp_dst *dst)
+{
+ int rc = -EHOSTUNREACH;
+ struct mctp_route *rt;
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(rt, &net->mctp.routes, list) {
+ if (rt->dst_type != MCTP_ROUTE_DIRECT || rt->type != RTN_LOCAL)
+ continue;
+
+ if (rt->dev->dev != dev)
+ continue;
+
+ mctp_dst_from_route(dst, 0, 0, rt);
+ rc = 0;
+ break;
+ }
+
+ rcu_read_unlock();
+
+ return rc;
}
-static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb,
+static int mctp_do_fragment_route(struct mctp_dst *dst, struct sk_buff *skb,
unsigned int mtu, u8 tag)
{
const unsigned int hlen = sizeof(struct mctp_hdr);
@@ -933,7 +1084,7 @@ static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb,
skb_ext_copy(skb2, skb);
/* do route */
- rc = rt->output(rt, skb2);
+ rc = dst->output(dst, skb2);
if (rc)
break;
@@ -945,68 +1096,34 @@ static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb,
return rc;
}
-int mctp_local_output(struct sock *sk, struct mctp_route *rt,
+int mctp_local_output(struct sock *sk, struct mctp_dst *dst,
struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag)
{
struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
- struct mctp_skb_cb *cb = mctp_cb(skb);
- struct mctp_route tmp_rt = {0};
struct mctp_sk_key *key;
struct mctp_hdr *hdr;
unsigned long flags;
unsigned int netid;
unsigned int mtu;
mctp_eid_t saddr;
- bool ext_rt;
int rc;
u8 tag;
- rc = -ENODEV;
-
- if (rt) {
- ext_rt = false;
- if (WARN_ON(!rt->dev))
- goto out_release;
-
- } else if (cb->ifindex) {
- struct net_device *dev;
-
- ext_rt = true;
- rt = &tmp_rt;
-
- rcu_read_lock();
- dev = dev_get_by_index_rcu(sock_net(sk), cb->ifindex);
- if (!dev) {
- rcu_read_unlock();
- goto out_free;
- }
- rt->dev = __mctp_dev_get(dev);
- rcu_read_unlock();
-
- if (!rt->dev)
- goto out_release;
+ KUNIT_STATIC_STUB_REDIRECT(mctp_local_output, sk, dst, skb, daddr,
+ req_tag);
- /* establish temporary route - we set up enough to keep
- * mctp_route_output happy
- */
- rt->output = mctp_route_output;
- rt->mtu = 0;
-
- } else {
- rc = -EINVAL;
- goto out_free;
- }
+ rc = -ENODEV;
- spin_lock_irqsave(&rt->dev->addrs_lock, flags);
- if (rt->dev->num_addrs == 0) {
+ spin_lock_irqsave(&dst->dev->addrs_lock, flags);
+ if (dst->dev->num_addrs == 0) {
rc = -EHOSTUNREACH;
} else {
/* use the outbound interface's first address as our source */
- saddr = rt->dev->addrs[0];
+ saddr = dst->dev->addrs[0];
rc = 0;
}
- spin_unlock_irqrestore(&rt->dev->addrs_lock, flags);
- netid = READ_ONCE(rt->dev->net);
+ spin_unlock_irqrestore(&dst->dev->addrs_lock, flags);
+ netid = READ_ONCE(dst->dev->net);
if (rc)
goto out_release;
@@ -1032,15 +1149,13 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
tag = req_tag & MCTP_TAG_MASK;
}
+ skb->pkt_type = PACKET_OUTGOING;
skb->protocol = htons(ETH_P_MCTP);
skb->priority = 0;
skb_reset_transport_header(skb);
skb_push(skb, sizeof(struct mctp_hdr));
skb_reset_network_header(skb);
- skb->dev = rt->dev->dev;
-
- /* cb->net will have been set on initial ingress */
- cb->src = saddr;
+ skb->dev = dst->dev->dev;
/* set up common header fields */
hdr = mctp_hdr(skb);
@@ -1048,73 +1163,64 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
hdr->dest = daddr;
hdr->src = saddr;
- mtu = mctp_route_mtu(rt);
+ mtu = dst->mtu;
if (skb->len + sizeof(struct mctp_hdr) <= mtu) {
hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM |
MCTP_HDR_FLAG_EOM | tag;
- rc = rt->output(rt, skb);
+ rc = dst->output(dst, skb);
} else {
- rc = mctp_do_fragment_route(rt, skb, mtu, tag);
+ rc = mctp_do_fragment_route(dst, skb, mtu, tag);
}
/* route output functions consume the skb, even on error */
skb = NULL;
out_release:
- if (!ext_rt)
- mctp_route_release(rt);
-
- mctp_dev_put(tmp_rt.dev);
-
-out_free:
kfree_skb(skb);
return rc;
}
/* route management */
-static int mctp_route_add(struct mctp_dev *mdev, mctp_eid_t daddr_start,
- unsigned int daddr_extent, unsigned int mtu,
- unsigned char type)
+
+/* mctp_route_add(): Add the provided route, previously allocated via
+ * mctp_route_alloc(). On success, takes ownership of @rt, which includes a
+ * hold on rt->dev for usage in the route table. On failure a caller will want
+ * to mctp_route_release().
+ *
+ * We expect that the caller has set rt->type, rt->dst_type, rt->min, rt->max,
+ * rt->mtu and either rt->dev (with a reference held appropriately) or
+ * rt->gateway. Other fields will be populated.
+ */
+static int mctp_route_add(struct net *net, struct mctp_route *rt)
{
- int (*rtfn)(struct mctp_route *rt, struct sk_buff *skb);
- struct net *net = dev_net(mdev->dev);
- struct mctp_route *rt, *ert;
+ struct mctp_route *ert;
- if (!mctp_address_unicast(daddr_start))
+ if (!mctp_address_unicast(rt->min) || !mctp_address_unicast(rt->max))
return -EINVAL;
- if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255)
+ if (rt->dst_type == MCTP_ROUTE_DIRECT && !rt->dev)
+ return -EINVAL;
+
+ if (rt->dst_type == MCTP_ROUTE_GATEWAY && !rt->gateway.eid)
return -EINVAL;
- switch (type) {
+ switch (rt->type) {
case RTN_LOCAL:
- rtfn = mctp_route_input;
+ rt->output = mctp_dst_input;
break;
case RTN_UNICAST:
- rtfn = mctp_route_output;
+ rt->output = mctp_dst_output;
break;
default:
return -EINVAL;
}
- rt = mctp_route_alloc();
- if (!rt)
- return -ENOMEM;
-
- rt->min = daddr_start;
- rt->max = daddr_start + daddr_extent;
- rt->mtu = mtu;
- rt->dev = mdev;
- mctp_dev_hold(rt->dev);
- rt->type = type;
- rt->output = rtfn;
-
ASSERT_RTNL();
+
/* Prevent duplicate identical routes. */
list_for_each_entry(ert, &net->mctp.routes, list) {
if (mctp_rt_compare_exact(rt, ert)) {
- mctp_route_release(rt);
return -EEXIST;
}
}
@@ -1124,10 +1230,10 @@ static int mctp_route_add(struct mctp_dev *mdev, mctp_eid_t daddr_start,
return 0;
}
-static int mctp_route_remove(struct mctp_dev *mdev, mctp_eid_t daddr_start,
- unsigned int daddr_extent, unsigned char type)
+static int mctp_route_remove(struct net *net, unsigned int netid,
+ mctp_eid_t daddr_start, unsigned int daddr_extent,
+ unsigned char type)
{
- struct net *net = dev_net(mdev->dev);
struct mctp_route *rt, *tmp;
mctp_eid_t daddr_end;
bool dropped;
@@ -1141,7 +1247,7 @@ static int mctp_route_remove(struct mctp_dev *mdev, mctp_eid_t daddr_start,
ASSERT_RTNL();
list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) {
- if (rt->dev == mdev &&
+ if (mctp_route_netid(rt) == netid &&
rt->min == daddr_start && rt->max == daddr_end &&
rt->type == type) {
list_del_rcu(&rt->list);
@@ -1156,12 +1262,32 @@ static int mctp_route_remove(struct mctp_dev *mdev, mctp_eid_t daddr_start,
int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr)
{
- return mctp_route_add(mdev, addr, 0, 0, RTN_LOCAL);
+ struct mctp_route *rt;
+ int rc;
+
+ rt = mctp_route_alloc();
+ if (!rt)
+ return -ENOMEM;
+
+ rt->min = addr;
+ rt->max = addr;
+ rt->dst_type = MCTP_ROUTE_DIRECT;
+ rt->dev = mdev;
+ rt->type = RTN_LOCAL;
+
+ mctp_dev_hold(rt->dev);
+
+ rc = mctp_route_add(dev_net(mdev->dev), rt);
+ if (rc)
+ mctp_route_release(rt);
+
+ return rc;
}
int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr)
{
- return mctp_route_remove(mdev, addr, 0, RTN_LOCAL);
+ return mctp_route_remove(dev_net(mdev->dev), mdev->net,
+ addr, 0, RTN_LOCAL);
}
/* removes all entries for a given device */
@@ -1172,7 +1298,7 @@ void mctp_route_remove_dev(struct mctp_dev *mdev)
ASSERT_RTNL();
list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) {
- if (rt->dev == mdev) {
+ if (rt->dst_type == MCTP_ROUTE_DIRECT && rt->dev == mdev) {
list_del_rcu(&rt->list);
/* TODO: immediate RTM_DELROUTE */
mctp_route_release(rt);
@@ -1189,8 +1315,9 @@ static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev,
struct net *net = dev_net(dev);
struct mctp_dev *mdev;
struct mctp_skb_cb *cb;
- struct mctp_route *rt;
+ struct mctp_dst dst;
struct mctp_hdr *mh;
+ int rc;
rcu_read_lock();
mdev = __mctp_dev_get(dev);
@@ -1232,17 +1359,17 @@ static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev,
cb->net = READ_ONCE(mdev->net);
cb->ifindex = dev->ifindex;
- rt = mctp_route_lookup(net, cb->net, mh->dest);
+ rc = mctp_route_lookup(net, cb->net, mh->dest, &dst);
/* NULL EID, but addressed to our physical address */
- if (!rt && mh->dest == MCTP_ADDR_NULL && skb->pkt_type == PACKET_HOST)
- rt = mctp_route_lookup_null(net, dev);
+ if (rc && mh->dest == MCTP_ADDR_NULL && skb->pkt_type == PACKET_HOST)
+ rc = mctp_route_lookup_null(net, dev, &dst);
- if (!rt)
+ if (rc)
goto err_drop;
- rt->output(rt, skb);
- mctp_route_release(rt);
+ dst.output(&dst, skb);
+ mctp_dst_release(&dst);
mctp_dev_put(mdev);
return NET_RX_SUCCESS;
@@ -1264,19 +1391,28 @@ static const struct nla_policy rta_mctp_policy[RTA_MAX + 1] = {
[RTA_DST] = { .type = NLA_U8 },
[RTA_METRICS] = { .type = NLA_NESTED },
[RTA_OIF] = { .type = NLA_U32 },
+ [RTA_GATEWAY] = NLA_POLICY_EXACT_LEN(sizeof(struct mctp_fq_addr)),
+};
+
+static const struct nla_policy rta_metrics_policy[RTAX_MAX + 1] = {
+ [RTAX_MTU] = { .type = NLA_U32 },
};
-/* Common part for RTM_NEWROUTE and RTM_DELROUTE parsing.
- * tb must hold RTA_MAX+1 elements.
+/* base parsing; common to both _lookup and _populate variants.
+ *
+ * For gateway routes (which have a RTA_GATEWAY, and no RTA_OIF), we populate
+ * *gatweayp. for direct routes (RTA_OIF, no RTA_GATEWAY), we populate *mdev.
*/
-static int mctp_route_nlparse(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct netlink_ext_ack *extack,
- struct nlattr **tb, struct rtmsg **rtm,
- struct mctp_dev **mdev, mctp_eid_t *daddr_start)
+static int mctp_route_nlparse_common(struct net *net, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack,
+ struct nlattr **tb, struct rtmsg **rtm,
+ struct mctp_dev **mdev,
+ struct mctp_fq_addr *gatewayp,
+ mctp_eid_t *daddr_start)
{
- struct net *net = sock_net(skb->sk);
+ struct mctp_fq_addr *gateway = NULL;
+ unsigned int ifindex = 0;
struct net_device *dev;
- unsigned int ifindex;
int rc;
rc = nlmsg_parse(nlh, sizeof(struct rtmsg), tb, RTA_MAX,
@@ -1292,11 +1428,44 @@ static int mctp_route_nlparse(struct sk_buff *skb, struct nlmsghdr *nlh,
}
*daddr_start = nla_get_u8(tb[RTA_DST]);
- if (!tb[RTA_OIF]) {
- NL_SET_ERR_MSG(extack, "ifindex missing");
+ if (tb[RTA_OIF])
+ ifindex = nla_get_u32(tb[RTA_OIF]);
+
+ if (tb[RTA_GATEWAY])
+ gateway = nla_data(tb[RTA_GATEWAY]);
+
+ if (ifindex && gateway) {
+ NL_SET_ERR_MSG(extack,
+ "cannot specify both ifindex and gateway");
+ return -EINVAL;
+
+ } else if (ifindex) {
+ dev = __dev_get_by_index(net, ifindex);
+ if (!dev) {
+ NL_SET_ERR_MSG(extack, "bad ifindex");
+ return -ENODEV;
+ }
+ *mdev = mctp_dev_get_rtnl(dev);
+ if (!*mdev)
+ return -ENODEV;
+ gatewayp->eid = 0;
+
+ } else if (gateway) {
+ if (!mctp_address_unicast(gateway->eid)) {
+ NL_SET_ERR_MSG(extack, "bad gateway");
+ return -EINVAL;
+ }
+
+ gatewayp->eid = gateway->eid;
+ gatewayp->net = gateway->net != MCTP_NET_ANY ?
+ gateway->net :
+ READ_ONCE(net->mctp.default_net);
+ *mdev = NULL;
+
+ } else {
+ NL_SET_ERR_MSG(extack, "no route output provided");
return -EINVAL;
}
- ifindex = nla_get_u32(tb[RTA_OIF]);
*rtm = nlmsg_data(nlh);
if ((*rtm)->rtm_family != AF_MCTP) {
@@ -1304,82 +1473,157 @@ static int mctp_route_nlparse(struct sk_buff *skb, struct nlmsghdr *nlh,
return -EINVAL;
}
- dev = __dev_get_by_index(net, ifindex);
- if (!dev) {
- NL_SET_ERR_MSG(extack, "bad ifindex");
- return -ENODEV;
- }
- *mdev = mctp_dev_get_rtnl(dev);
- if (!*mdev)
- return -ENODEV;
-
- if (dev->flags & IFF_LOOPBACK) {
- NL_SET_ERR_MSG(extack, "no routes to loopback");
+ if ((*rtm)->rtm_type != RTN_UNICAST) {
+ NL_SET_ERR_MSG(extack, "rtm_type must be RTN_UNICAST");
return -EINVAL;
}
return 0;
}
-static const struct nla_policy rta_metrics_policy[RTAX_MAX + 1] = {
- [RTAX_MTU] = { .type = NLA_U32 },
-};
-
-static int mctp_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct netlink_ext_ack *extack)
+/* Route parsing for lookup operations; we only need the "route target"
+ * components (ie., network and dest-EID range).
+ */
+static int mctp_route_nlparse_lookup(struct net *net, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack,
+ unsigned char *type, unsigned int *netid,
+ mctp_eid_t *daddr_start,
+ unsigned int *daddr_extent)
{
struct nlattr *tb[RTA_MAX + 1];
+ struct mctp_fq_addr gw;
+ struct mctp_dev *mdev;
+ struct rtmsg *rtm;
+ int rc;
+
+ rc = mctp_route_nlparse_common(net, nlh, extack, tb, &rtm,
+ &mdev, &gw, daddr_start);
+ if (rc)
+ return rc;
+
+ if (mdev) {
+ *netid = mdev->net;
+ } else if (gw.eid) {
+ *netid = gw.net;
+ } else {
+ /* bug: _nlparse_common should not allow this */
+ return -1;
+ }
+
+ *type = rtm->rtm_type;
+ *daddr_extent = rtm->rtm_dst_len;
+
+ return 0;
+}
+
+/* Full route parse for RTM_NEWROUTE: populate @rt. On success,
+ * MCTP_ROUTE_DIRECT routes (ie, those with a direct dev) will hold a reference
+ * to that dev.
+ */
+static int mctp_route_nlparse_populate(struct net *net, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack,
+ struct mctp_route *rt)
+{
struct nlattr *tbx[RTAX_MAX + 1];
+ struct nlattr *tb[RTA_MAX + 1];
+ unsigned int daddr_extent;
+ struct mctp_fq_addr gw;
mctp_eid_t daddr_start;
- struct mctp_dev *mdev;
+ struct mctp_dev *dev;
struct rtmsg *rtm;
- unsigned int mtu;
+ u32 mtu = 0;
int rc;
- rc = mctp_route_nlparse(skb, nlh, extack, tb,
- &rtm, &mdev, &daddr_start);
- if (rc < 0)
+ rc = mctp_route_nlparse_common(net, nlh, extack, tb, &rtm,
+ &dev, &gw, &daddr_start);
+ if (rc)
return rc;
- if (rtm->rtm_type != RTN_UNICAST) {
- NL_SET_ERR_MSG(extack, "rtm_type must be RTN_UNICAST");
+ daddr_extent = rtm->rtm_dst_len;
+
+ if (daddr_extent > 0xff || daddr_extent + daddr_start >= 255) {
+ NL_SET_ERR_MSG(extack, "invalid eid range");
return -EINVAL;
}
- mtu = 0;
if (tb[RTA_METRICS]) {
rc = nla_parse_nested(tbx, RTAX_MAX, tb[RTA_METRICS],
rta_metrics_policy, NULL);
- if (rc < 0)
+ if (rc < 0) {
+ NL_SET_ERR_MSG(extack, "incorrect RTA_METRICS format");
return rc;
+ }
if (tbx[RTAX_MTU])
mtu = nla_get_u32(tbx[RTAX_MTU]);
}
- rc = mctp_route_add(mdev, daddr_start, rtm->rtm_dst_len, mtu,
- rtm->rtm_type);
+ rt->type = rtm->rtm_type;
+ rt->min = daddr_start;
+ rt->max = daddr_start + daddr_extent;
+ rt->mtu = mtu;
+ if (gw.eid) {
+ rt->dst_type = MCTP_ROUTE_GATEWAY;
+ rt->gateway.eid = gw.eid;
+ rt->gateway.net = gw.net;
+ } else {
+ rt->dst_type = MCTP_ROUTE_DIRECT;
+ rt->dev = dev;
+ mctp_dev_hold(rt->dev);
+ }
+
+ return 0;
+}
+
+static int mctp_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = sock_net(skb->sk);
+ struct mctp_route *rt;
+ int rc;
+
+ rt = mctp_route_alloc();
+ if (!rt)
+ return -ENOMEM;
+
+ rc = mctp_route_nlparse_populate(net, nlh, extack, rt);
+ if (rc < 0)
+ goto err_free;
+
+ if (rt->dst_type == MCTP_ROUTE_DIRECT &&
+ rt->dev->dev->flags & IFF_LOOPBACK) {
+ NL_SET_ERR_MSG(extack, "no routes to loopback");
+ rc = -EINVAL;
+ goto err_free;
+ }
+
+ rc = mctp_route_add(net, rt);
+ if (!rc)
+ return 0;
+
+err_free:
+ mctp_route_release(rt);
return rc;
}
static int mctp_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
- struct nlattr *tb[RTA_MAX + 1];
+ struct net *net = sock_net(skb->sk);
+ unsigned int netid, daddr_extent;
+ unsigned char type = RTN_UNSPEC;
mctp_eid_t daddr_start;
- struct mctp_dev *mdev;
- struct rtmsg *rtm;
int rc;
- rc = mctp_route_nlparse(skb, nlh, extack, tb,
- &rtm, &mdev, &daddr_start);
+ rc = mctp_route_nlparse_lookup(net, nlh, extack, &type, &netid,
+ &daddr_start, &daddr_extent);
if (rc < 0)
return rc;
/* we only have unicast routes */
- if (rtm->rtm_type != RTN_UNICAST)
+ if (type != RTN_UNICAST)
return -EINVAL;
- rc = mctp_route_remove(mdev, daddr_start, rtm->rtm_dst_len, RTN_UNICAST);
+ rc = mctp_route_remove(net, netid, daddr_start, daddr_extent, type);
return rc;
}
@@ -1405,7 +1649,6 @@ static int mctp_fill_rtinfo(struct sk_buff *skb, struct mctp_route *rt,
hdr->rtm_tos = 0;
hdr->rtm_table = RT_TABLE_DEFAULT;
hdr->rtm_protocol = RTPROT_STATIC; /* everything is user-defined */
- hdr->rtm_scope = RT_SCOPE_LINK; /* TODO: scope in mctp_route? */
hdr->rtm_type = rt->type;
if (nla_put_u8(skb, RTA_DST, rt->min))
@@ -1422,13 +1665,17 @@ static int mctp_fill_rtinfo(struct sk_buff *skb, struct mctp_route *rt,
nla_nest_end(skb, metrics);
- if (rt->dev) {
+ if (rt->dst_type == MCTP_ROUTE_DIRECT) {
+ hdr->rtm_scope = RT_SCOPE_LINK;
if (nla_put_u32(skb, RTA_OIF, rt->dev->dev->ifindex))
goto cancel;
+ } else if (rt->dst_type == MCTP_ROUTE_GATEWAY) {
+ hdr->rtm_scope = RT_SCOPE_UNIVERSE;
+ if (nla_put(skb, RTA_GATEWAY,
+ sizeof(rt->gateway), &rt->gateway))
+ goto cancel;
}
- /* TODO: conditional neighbour physaddr? */
-
nlmsg_end(skb, nlh);
return 0;
@@ -1475,7 +1722,7 @@ static int __net_init mctp_routes_net_init(struct net *net)
struct netns_mctp *ns = &net->mctp;
INIT_LIST_HEAD(&ns->routes);
- INIT_HLIST_HEAD(&ns->binds);
+ hash_init(ns->binds);
mutex_init(&ns->bind_lock);
INIT_HLIST_HEAD(&ns->keys);
spin_lock_init(&ns->keys_lock);
diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c
index 06c1897b685a..fb6b46a952cb 100644
--- a/net/mctp/test/route-test.c
+++ b/net/mctp/test/route-test.c
@@ -2,132 +2,11 @@
#include <kunit/test.h>
-#include "utils.h"
-
-struct mctp_test_route {
- struct mctp_route rt;
- struct sk_buff_head pkts;
-};
-
-static int mctp_test_route_output(struct mctp_route *rt, struct sk_buff *skb)
-{
- struct mctp_test_route *test_rt = container_of(rt, struct mctp_test_route, rt);
-
- skb_queue_tail(&test_rt->pkts, skb);
-
- return 0;
-}
-
-/* local version of mctp_route_alloc() */
-static struct mctp_test_route *mctp_route_test_alloc(void)
-{
- struct mctp_test_route *rt;
+/* keep clangd happy when compiled outside of the route.c include */
+#include <net/mctp.h>
+#include <net/mctpdevice.h>
- rt = kzalloc(sizeof(*rt), GFP_KERNEL);
- if (!rt)
- return NULL;
-
- INIT_LIST_HEAD(&rt->rt.list);
- refcount_set(&rt->rt.refs, 1);
- rt->rt.output = mctp_test_route_output;
-
- skb_queue_head_init(&rt->pkts);
-
- return rt;
-}
-
-static struct mctp_test_route *mctp_test_create_route(struct net *net,
- struct mctp_dev *dev,
- mctp_eid_t eid,
- unsigned int mtu)
-{
- struct mctp_test_route *rt;
-
- rt = mctp_route_test_alloc();
- if (!rt)
- return NULL;
-
- rt->rt.min = eid;
- rt->rt.max = eid;
- rt->rt.mtu = mtu;
- rt->rt.type = RTN_UNSPEC;
- if (dev)
- mctp_dev_hold(dev);
- rt->rt.dev = dev;
-
- list_add_rcu(&rt->rt.list, &net->mctp.routes);
-
- return rt;
-}
-
-static void mctp_test_route_destroy(struct kunit *test,
- struct mctp_test_route *rt)
-{
- unsigned int refs;
-
- rtnl_lock();
- list_del_rcu(&rt->rt.list);
- rtnl_unlock();
-
- skb_queue_purge(&rt->pkts);
- if (rt->rt.dev)
- mctp_dev_put(rt->rt.dev);
-
- refs = refcount_read(&rt->rt.refs);
- KUNIT_ASSERT_EQ_MSG(test, refs, 1, "route ref imbalance");
-
- kfree_rcu(&rt->rt, rcu);
-}
-
-static void mctp_test_skb_set_dev(struct sk_buff *skb,
- struct mctp_test_dev *dev)
-{
- struct mctp_skb_cb *cb;
-
- cb = mctp_cb(skb);
- cb->net = READ_ONCE(dev->mdev->net);
- skb->dev = dev->ndev;
-}
-
-static struct sk_buff *mctp_test_create_skb(const struct mctp_hdr *hdr,
- unsigned int data_len)
-{
- size_t hdr_len = sizeof(*hdr);
- struct sk_buff *skb;
- unsigned int i;
- u8 *buf;
-
- skb = alloc_skb(hdr_len + data_len, GFP_KERNEL);
- if (!skb)
- return NULL;
-
- __mctp_cb(skb);
- memcpy(skb_put(skb, hdr_len), hdr, hdr_len);
-
- buf = skb_put(skb, data_len);
- for (i = 0; i < data_len; i++)
- buf[i] = i & 0xff;
-
- return skb;
-}
-
-static struct sk_buff *__mctp_test_create_skb_data(const struct mctp_hdr *hdr,
- const void *data,
- size_t data_len)
-{
- size_t hdr_len = sizeof(*hdr);
- struct sk_buff *skb;
-
- skb = alloc_skb(hdr_len + data_len, GFP_KERNEL);
- if (!skb)
- return NULL;
-
- __mctp_cb(skb);
- memcpy(skb_put(skb, hdr_len), hdr, hdr_len);
- memcpy(skb_put(skb, data_len), data, data_len);
-
- return skb;
-}
+#include "utils.h"
#define mctp_test_create_skb_data(h, d) \
__mctp_test_create_skb_data(h, d, sizeof(*d))
@@ -141,8 +20,10 @@ struct mctp_frag_test {
static void mctp_test_fragment(struct kunit *test)
{
const struct mctp_frag_test *params;
+ struct mctp_test_pktqueue tpq;
int rc, i, n, mtu, msgsize;
- struct mctp_test_route *rt;
+ struct mctp_test_dev *dev;
+ struct mctp_dst dst;
struct sk_buff *skb;
struct mctp_hdr hdr;
u8 seq;
@@ -159,13 +40,15 @@ static void mctp_test_fragment(struct kunit *test)
skb = mctp_test_create_skb(&hdr, msgsize);
KUNIT_ASSERT_TRUE(test, skb);
- rt = mctp_test_create_route(&init_net, NULL, 10, mtu);
- KUNIT_ASSERT_TRUE(test, rt);
+ dev = mctp_test_create_dev();
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
+
+ mctp_test_dst_setup(test, &dst, dev, &tpq, mtu);
- rc = mctp_do_fragment_route(&rt->rt, skb, mtu, MCTP_TAG_OWNER);
+ rc = mctp_do_fragment_route(&dst, skb, mtu, MCTP_TAG_OWNER);
KUNIT_EXPECT_FALSE(test, rc);
- n = rt->pkts.qlen;
+ n = tpq.pkts.qlen;
KUNIT_EXPECT_EQ(test, n, params->n_frags);
@@ -178,7 +61,7 @@ static void mctp_test_fragment(struct kunit *test)
first = i == 0;
last = i == (n - 1);
- skb2 = skb_dequeue(&rt->pkts);
+ skb2 = skb_dequeue(&tpq.pkts);
if (!skb2)
break;
@@ -216,7 +99,8 @@ static void mctp_test_fragment(struct kunit *test)
kfree_skb(skb2);
}
- mctp_test_route_destroy(test, rt);
+ mctp_test_dst_release(&dst, &tpq);
+ mctp_test_destroy_dev(dev);
}
static const struct mctp_frag_test mctp_frag_tests[] = {
@@ -246,25 +130,30 @@ struct mctp_rx_input_test {
static void mctp_test_rx_input(struct kunit *test)
{
const struct mctp_rx_input_test *params;
+ struct mctp_test_pktqueue tpq;
struct mctp_test_route *rt;
struct mctp_test_dev *dev;
struct sk_buff *skb;
params = test->param_value;
+ test->priv = &tpq;
dev = mctp_test_create_dev();
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
- rt = mctp_test_create_route(&init_net, dev->mdev, 8, 68);
+ rt = mctp_test_create_route_direct(&init_net, dev->mdev, 8, 68);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt);
skb = mctp_test_create_skb(&params->hdr, 1);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb);
+ mctp_test_pktqueue_init(&tpq);
+
mctp_pkttype_receive(skb, dev->ndev, &mctp_packet_type, NULL);
- KUNIT_EXPECT_EQ(test, !!rt->pkts.qlen, params->input);
+ KUNIT_EXPECT_EQ(test, !!tpq.pkts.qlen, params->input);
+ skb_queue_purge(&tpq.pkts);
mctp_test_route_destroy(test, rt);
mctp_test_destroy_dev(dev);
}
@@ -292,12 +181,12 @@ KUNIT_ARRAY_PARAM(mctp_rx_input, mctp_rx_input_tests,
/* set up a local dev, route on EID 8, and a socket listening on type 0 */
static void __mctp_route_test_init(struct kunit *test,
struct mctp_test_dev **devp,
- struct mctp_test_route **rtp,
+ struct mctp_dst *dst,
+ struct mctp_test_pktqueue *tpq,
struct socket **sockp,
unsigned int netid)
{
struct sockaddr_mctp addr = {0};
- struct mctp_test_route *rt;
struct mctp_test_dev *dev;
struct socket *sock;
int rc;
@@ -307,8 +196,7 @@ static void __mctp_route_test_init(struct kunit *test,
if (netid != MCTP_NET_ANY)
WRITE_ONCE(dev->mdev->net, netid);
- rt = mctp_test_create_route(&init_net, dev->mdev, 8, 68);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt);
+ mctp_test_dst_setup(test, dst, dev, tpq, 68);
rc = sock_create_kern(&init_net, AF_MCTP, SOCK_DGRAM, 0, &sock);
KUNIT_ASSERT_EQ(test, rc, 0);
@@ -320,18 +208,18 @@ static void __mctp_route_test_init(struct kunit *test,
rc = kernel_bind(sock, (struct sockaddr *)&addr, sizeof(addr));
KUNIT_ASSERT_EQ(test, rc, 0);
- *rtp = rt;
*devp = dev;
*sockp = sock;
}
static void __mctp_route_test_fini(struct kunit *test,
struct mctp_test_dev *dev,
- struct mctp_test_route *rt,
+ struct mctp_dst *dst,
+ struct mctp_test_pktqueue *tpq,
struct socket *sock)
{
sock_release(sock);
- mctp_test_route_destroy(test, rt);
+ mctp_test_dst_release(dst, tpq);
mctp_test_destroy_dev(dev);
}
@@ -344,22 +232,24 @@ struct mctp_route_input_sk_test {
static void mctp_test_route_input_sk(struct kunit *test)
{
const struct mctp_route_input_sk_test *params;
+ struct mctp_test_pktqueue tpq;
struct sk_buff *skb, *skb2;
- struct mctp_test_route *rt;
struct mctp_test_dev *dev;
+ struct mctp_dst dst;
struct socket *sock;
int rc;
params = test->param_value;
- __mctp_route_test_init(test, &dev, &rt, &sock, MCTP_NET_ANY);
+ __mctp_route_test_init(test, &dev, &dst, &tpq, &sock, MCTP_NET_ANY);
skb = mctp_test_create_skb_data(&params->hdr, &params->type);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb);
mctp_test_skb_set_dev(skb, dev);
+ mctp_test_pktqueue_init(&tpq);
- rc = mctp_route_input(&rt->rt, skb);
+ rc = mctp_dst_input(&dst, skb);
if (params->deliver) {
KUNIT_EXPECT_EQ(test, rc, 0);
@@ -376,7 +266,7 @@ static void mctp_test_route_input_sk(struct kunit *test)
KUNIT_EXPECT_NULL(test, skb2);
}
- __mctp_route_test_fini(test, dev, rt, sock);
+ __mctp_route_test_fini(test, dev, &dst, &tpq, sock);
}
#define FL_S (MCTP_HDR_FLAG_SOM)
@@ -413,16 +303,17 @@ struct mctp_route_input_sk_reasm_test {
static void mctp_test_route_input_sk_reasm(struct kunit *test)
{
const struct mctp_route_input_sk_reasm_test *params;
+ struct mctp_test_pktqueue tpq;
struct sk_buff *skb, *skb2;
- struct mctp_test_route *rt;
struct mctp_test_dev *dev;
+ struct mctp_dst dst;
struct socket *sock;
int i, rc;
u8 c;
params = test->param_value;
- __mctp_route_test_init(test, &dev, &rt, &sock, MCTP_NET_ANY);
+ __mctp_route_test_init(test, &dev, &dst, &tpq, &sock, MCTP_NET_ANY);
for (i = 0; i < params->n_hdrs; i++) {
c = i;
@@ -431,7 +322,7 @@ static void mctp_test_route_input_sk_reasm(struct kunit *test)
mctp_test_skb_set_dev(skb, dev);
- rc = mctp_route_input(&rt->rt, skb);
+ rc = mctp_dst_input(&dst, skb);
}
skb2 = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc);
@@ -445,7 +336,7 @@ static void mctp_test_route_input_sk_reasm(struct kunit *test)
KUNIT_EXPECT_NULL(test, skb2);
}
- __mctp_route_test_fini(test, dev, rt, sock);
+ __mctp_route_test_fini(test, dev, &dst, &tpq, sock);
}
#define RX_FRAG(f, s) RX_HDR(1, 10, 8, FL_TO | (f) | ((s) << MCTP_HDR_SEQ_SHIFT))
@@ -547,7 +438,7 @@ struct mctp_route_input_sk_keys_test {
static void mctp_test_route_input_sk_keys(struct kunit *test)
{
const struct mctp_route_input_sk_keys_test *params;
- struct mctp_test_route *rt;
+ struct mctp_test_pktqueue tpq;
struct sk_buff *skb, *skb2;
struct mctp_test_dev *dev;
struct mctp_sk_key *key;
@@ -555,6 +446,7 @@ static void mctp_test_route_input_sk_keys(struct kunit *test)
struct mctp_sock *msk;
struct socket *sock;
unsigned long flags;
+ struct mctp_dst dst;
unsigned int net;
int rc;
u8 c;
@@ -565,8 +457,7 @@ static void mctp_test_route_input_sk_keys(struct kunit *test)
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
net = READ_ONCE(dev->mdev->net);
- rt = mctp_test_create_route(&init_net, dev->mdev, 8, 68);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt);
+ mctp_test_dst_setup(test, &dst, dev, &tpq, 68);
rc = sock_create_kern(&init_net, AF_MCTP, SOCK_DGRAM, 0, &sock);
KUNIT_ASSERT_EQ(test, rc, 0);
@@ -592,7 +483,7 @@ static void mctp_test_route_input_sk_keys(struct kunit *test)
mctp_test_skb_set_dev(skb, dev);
- rc = mctp_route_input(&rt->rt, skb);
+ rc = mctp_dst_input(&dst, skb);
/* (potentially) receive message */
skb2 = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc);
@@ -606,7 +497,7 @@ static void mctp_test_route_input_sk_keys(struct kunit *test)
skb_free_datagram(sock->sk, skb2);
mctp_key_unref(key);
- __mctp_route_test_fini(test, dev, rt, sock);
+ __mctp_route_test_fini(test, dev, &dst, &tpq, sock);
}
static const struct mctp_route_input_sk_keys_test mctp_route_input_sk_keys_tests[] = {
@@ -681,7 +572,8 @@ KUNIT_ARRAY_PARAM(mctp_route_input_sk_keys, mctp_route_input_sk_keys_tests,
struct test_net {
unsigned int netid;
struct mctp_test_dev *dev;
- struct mctp_test_route *rt;
+ struct mctp_test_pktqueue tpq;
+ struct mctp_dst dst;
struct socket *sock;
struct sk_buff *skb;
struct mctp_sk_key *key;
@@ -699,18 +591,20 @@ mctp_test_route_input_multiple_nets_bind_init(struct kunit *test,
t->msg.data = t->netid;
- __mctp_route_test_init(test, &t->dev, &t->rt, &t->sock, t->netid);
+ __mctp_route_test_init(test, &t->dev, &t->dst, &t->tpq, &t->sock,
+ t->netid);
t->skb = mctp_test_create_skb_data(&hdr, &t->msg);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, t->skb);
mctp_test_skb_set_dev(t->skb, t->dev);
+ mctp_test_pktqueue_init(&t->tpq);
}
static void
mctp_test_route_input_multiple_nets_bind_fini(struct kunit *test,
struct test_net *t)
{
- __mctp_route_test_fini(test, t->dev, t->rt, t->sock);
+ __mctp_route_test_fini(test, t->dev, &t->dst, &t->tpq, t->sock);
}
/* Test that skbs from different nets (otherwise identical) get routed to their
@@ -731,9 +625,9 @@ static void mctp_test_route_input_multiple_nets_bind(struct kunit *test)
mctp_test_route_input_multiple_nets_bind_init(test, &t1);
mctp_test_route_input_multiple_nets_bind_init(test, &t2);
- rc = mctp_route_input(&t1.rt->rt, t1.skb);
+ rc = mctp_dst_input(&t1.dst, t1.skb);
KUNIT_ASSERT_EQ(test, rc, 0);
- rc = mctp_route_input(&t2.rt->rt, t2.skb);
+ rc = mctp_dst_input(&t2.dst, t2.skb);
KUNIT_ASSERT_EQ(test, rc, 0);
rx_skb1 = skb_recv_datagram(t1.sock->sk, MSG_DONTWAIT, &rc);
@@ -767,7 +661,8 @@ mctp_test_route_input_multiple_nets_key_init(struct kunit *test,
t->msg.data = t->netid;
- __mctp_route_test_init(test, &t->dev, &t->rt, &t->sock, t->netid);
+ __mctp_route_test_init(test, &t->dev, &t->dst, &t->tpq, &t->sock,
+ t->netid);
msk = container_of(t->sock->sk, struct mctp_sock, sk);
@@ -790,7 +685,7 @@ mctp_test_route_input_multiple_nets_key_fini(struct kunit *test,
struct test_net *t)
{
mctp_key_unref(t->key);
- __mctp_route_test_fini(test, t->dev, t->rt, t->sock);
+ __mctp_route_test_fini(test, t->dev, &t->dst, &t->tpq, t->sock);
}
/* test that skbs from different nets (otherwise identical) get routed to their
@@ -812,9 +707,9 @@ static void mctp_test_route_input_multiple_nets_key(struct kunit *test)
mctp_test_route_input_multiple_nets_key_init(test, &t1);
mctp_test_route_input_multiple_nets_key_init(test, &t2);
- rc = mctp_route_input(&t1.rt->rt, t1.skb);
+ rc = mctp_dst_input(&t1.dst, t1.skb);
KUNIT_ASSERT_EQ(test, rc, 0);
- rc = mctp_route_input(&t2.rt->rt, t2.skb);
+ rc = mctp_dst_input(&t2.dst, t2.skb);
KUNIT_ASSERT_EQ(test, rc, 0);
rx_skb1 = skb_recv_datagram(t1.sock->sk, MSG_DONTWAIT, &rc);
@@ -843,13 +738,14 @@ static void mctp_test_route_input_multiple_nets_key(struct kunit *test)
static void mctp_test_route_input_sk_fail_single(struct kunit *test)
{
const struct mctp_hdr hdr = RX_HDR(1, 10, 8, FL_S | FL_E | FL_TO);
- struct mctp_test_route *rt;
+ struct mctp_test_pktqueue tpq;
struct mctp_test_dev *dev;
+ struct mctp_dst dst;
struct socket *sock;
struct sk_buff *skb;
int rc;
- __mctp_route_test_init(test, &dev, &rt, &sock, MCTP_NET_ANY);
+ __mctp_route_test_init(test, &dev, &dst, &tpq, &sock, MCTP_NET_ANY);
/* No rcvbuf space, so delivery should fail. __sock_set_rcvbuf will
* clamp the minimum to SOCK_MIN_RCVBUF, so we open-code this.
@@ -865,14 +761,14 @@ static void mctp_test_route_input_sk_fail_single(struct kunit *test)
mctp_test_skb_set_dev(skb, dev);
/* do route input, which should fail */
- rc = mctp_route_input(&rt->rt, skb);
+ rc = mctp_dst_input(&dst, skb);
KUNIT_EXPECT_NE(test, rc, 0);
/* we should hold the only reference to skb */
KUNIT_EXPECT_EQ(test, refcount_read(&skb->users), 1);
kfree_skb(skb);
- __mctp_route_test_fini(test, dev, rt, sock);
+ __mctp_route_test_fini(test, dev, &dst, &tpq, sock);
}
/* Input route to socket, using a fragmented message, where sock delivery fails.
@@ -880,14 +776,15 @@ static void mctp_test_route_input_sk_fail_single(struct kunit *test)
static void mctp_test_route_input_sk_fail_frag(struct kunit *test)
{
const struct mctp_hdr hdrs[2] = { RX_FRAG(FL_S, 0), RX_FRAG(FL_E, 1) };
- struct mctp_test_route *rt;
+ struct mctp_test_pktqueue tpq;
struct mctp_test_dev *dev;
struct sk_buff *skbs[2];
+ struct mctp_dst dst;
struct socket *sock;
unsigned int i;
int rc;
- __mctp_route_test_init(test, &dev, &rt, &sock, MCTP_NET_ANY);
+ __mctp_route_test_init(test, &dev, &dst, &tpq, &sock, MCTP_NET_ANY);
lock_sock(sock->sk);
WRITE_ONCE(sock->sk->sk_rcvbuf, 0);
@@ -904,11 +801,11 @@ static void mctp_test_route_input_sk_fail_frag(struct kunit *test)
/* first route input should succeed, we're only queueing to the
* frag list
*/
- rc = mctp_route_input(&rt->rt, skbs[0]);
+ rc = mctp_dst_input(&dst, skbs[0]);
KUNIT_EXPECT_EQ(test, rc, 0);
/* final route input should fail to deliver to the socket */
- rc = mctp_route_input(&rt->rt, skbs[1]);
+ rc = mctp_dst_input(&dst, skbs[1]);
KUNIT_EXPECT_NE(test, rc, 0);
/* we should hold the only reference to both skbs */
@@ -918,7 +815,7 @@ static void mctp_test_route_input_sk_fail_frag(struct kunit *test)
KUNIT_EXPECT_EQ(test, refcount_read(&skbs[1]->users), 1);
kfree_skb(skbs[1]);
- __mctp_route_test_fini(test, dev, rt, sock);
+ __mctp_route_test_fini(test, dev, &dst, &tpq, sock);
}
/* Input route to socket, using a fragmented message created from clones.
@@ -933,23 +830,22 @@ static void mctp_test_route_input_cloned_frag(struct kunit *test)
RX_FRAG(FL_S, 0),
RX_FRAG(FL_E, 1),
};
- struct mctp_test_route *rt;
+ const size_t data_len = 3; /* arbitrary */
+ u8 compare[3 * ARRAY_SIZE(hdrs)];
+ u8 flat[3 * ARRAY_SIZE(hdrs)];
+ struct mctp_test_pktqueue tpq;
struct mctp_test_dev *dev;
struct sk_buff *skb[5];
struct sk_buff *rx_skb;
+ struct mctp_dst dst;
struct socket *sock;
- size_t data_len;
- u8 compare[100];
- u8 flat[100];
size_t total;
void *p;
int rc;
- /* Arbitrary length */
- data_len = 3;
total = data_len + sizeof(struct mctp_hdr);
- __mctp_route_test_init(test, &dev, &rt, &sock, MCTP_NET_ANY);
+ __mctp_route_test_init(test, &dev, &dst, &tpq, &sock, MCTP_NET_ANY);
/* Create a single skb initially with concatenated packets */
skb[0] = mctp_test_create_skb(&hdrs[0], 5 * total);
@@ -988,7 +884,7 @@ static void mctp_test_route_input_cloned_frag(struct kunit *test)
/* Feed the fragments into MCTP core */
for (int i = 0; i < 5; i++) {
- rc = mctp_route_input(&rt->rt, skb[i]);
+ rc = mctp_dst_input(&dst, skb[i]);
KUNIT_EXPECT_EQ(test, rc, 0);
}
@@ -1026,29 +922,29 @@ static void mctp_test_route_input_cloned_frag(struct kunit *test)
kfree_skb(skb[i]);
}
- __mctp_route_test_fini(test, dev, rt, sock);
+ __mctp_route_test_fini(test, dev, &dst, &tpq, sock);
}
#if IS_ENABLED(CONFIG_MCTP_FLOWS)
static void mctp_test_flow_init(struct kunit *test,
struct mctp_test_dev **devp,
- struct mctp_test_route **rtp,
+ struct mctp_dst *dst,
+ struct mctp_test_pktqueue *tpq,
struct socket **sock,
struct sk_buff **skbp,
unsigned int len)
{
- struct mctp_test_route *rt;
struct mctp_test_dev *dev;
struct sk_buff *skb;
/* we have a slightly odd routing setup here; the test route
* is for EID 8, which is our local EID. We don't do a routing
* lookup, so that's fine - all we require is a path through
- * mctp_local_output, which will call rt->output on whatever
+ * mctp_local_output, which will call dst->output on whatever
* route we provide
*/
- __mctp_route_test_init(test, &dev, &rt, sock, MCTP_NET_ANY);
+ __mctp_route_test_init(test, &dev, dst, tpq, sock, MCTP_NET_ANY);
/* Assign a single EID. ->addrs is freed on mctp netdev release */
dev->mdev->addrs = kmalloc(sizeof(u8), GFP_KERNEL);
@@ -1061,42 +957,41 @@ static void mctp_test_flow_init(struct kunit *test,
skb_reserve(skb, sizeof(struct mctp_hdr) + 1);
memset(skb_put(skb, len), 0, len);
- /* take a ref for the route, we'll decrement in local output */
- refcount_inc(&rt->rt.refs);
*devp = dev;
- *rtp = rt;
*skbp = skb;
}
static void mctp_test_flow_fini(struct kunit *test,
struct mctp_test_dev *dev,
- struct mctp_test_route *rt,
+ struct mctp_dst *dst,
+ struct mctp_test_pktqueue *tpq,
struct socket *sock)
{
- __mctp_route_test_fini(test, dev, rt, sock);
+ __mctp_route_test_fini(test, dev, dst, tpq, sock);
}
/* test that an outgoing skb has the correct MCTP extension data set */
static void mctp_test_packet_flow(struct kunit *test)
{
+ struct mctp_test_pktqueue tpq;
struct sk_buff *skb, *skb2;
- struct mctp_test_route *rt;
struct mctp_test_dev *dev;
+ struct mctp_dst dst;
struct mctp_flow *flow;
struct socket *sock;
- u8 dst = 8;
+ u8 dst_eid = 8;
int n, rc;
- mctp_test_flow_init(test, &dev, &rt, &sock, &skb, 30);
+ mctp_test_flow_init(test, &dev, &dst, &tpq, &sock, &skb, 30);
- rc = mctp_local_output(sock->sk, &rt->rt, skb, dst, MCTP_TAG_OWNER);
+ rc = mctp_local_output(sock->sk, &dst, skb, dst_eid, MCTP_TAG_OWNER);
KUNIT_ASSERT_EQ(test, rc, 0);
- n = rt->pkts.qlen;
+ n = tpq.pkts.qlen;
KUNIT_ASSERT_EQ(test, n, 1);
- skb2 = skb_dequeue(&rt->pkts);
+ skb2 = skb_dequeue(&tpq.pkts);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb2);
flow = skb_ext_find(skb2, SKB_EXT_MCTP);
@@ -1105,7 +1000,7 @@ static void mctp_test_packet_flow(struct kunit *test)
KUNIT_ASSERT_PTR_EQ(test, flow->key->sk, sock->sk);
kfree_skb(skb2);
- mctp_test_flow_fini(test, dev, rt, sock);
+ mctp_test_flow_fini(test, dev, &dst, &tpq, sock);
}
/* test that outgoing skbs, after fragmentation, all have the correct MCTP
@@ -1113,26 +1008,27 @@ static void mctp_test_packet_flow(struct kunit *test)
*/
static void mctp_test_fragment_flow(struct kunit *test)
{
+ struct mctp_test_pktqueue tpq;
struct mctp_flow *flows[2];
struct sk_buff *tx_skbs[2];
- struct mctp_test_route *rt;
struct mctp_test_dev *dev;
+ struct mctp_dst dst;
struct sk_buff *skb;
struct socket *sock;
- u8 dst = 8;
+ u8 dst_eid = 8;
int n, rc;
- mctp_test_flow_init(test, &dev, &rt, &sock, &skb, 100);
+ mctp_test_flow_init(test, &dev, &dst, &tpq, &sock, &skb, 100);
- rc = mctp_local_output(sock->sk, &rt->rt, skb, dst, MCTP_TAG_OWNER);
+ rc = mctp_local_output(sock->sk, &dst, skb, dst_eid, MCTP_TAG_OWNER);
KUNIT_ASSERT_EQ(test, rc, 0);
- n = rt->pkts.qlen;
+ n = tpq.pkts.qlen;
KUNIT_ASSERT_EQ(test, n, 2);
/* both resulting packets should have the same flow data */
- tx_skbs[0] = skb_dequeue(&rt->pkts);
- tx_skbs[1] = skb_dequeue(&rt->pkts);
+ tx_skbs[0] = skb_dequeue(&tpq.pkts);
+ tx_skbs[1] = skb_dequeue(&tpq.pkts);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, tx_skbs[0]);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, tx_skbs[1]);
@@ -1148,7 +1044,7 @@ static void mctp_test_fragment_flow(struct kunit *test)
kfree_skb(tx_skbs[0]);
kfree_skb(tx_skbs[1]);
- mctp_test_flow_fini(test, dev, rt, sock);
+ mctp_test_flow_fini(test, dev, &dst, &tpq, sock);
}
#else
@@ -1166,15 +1062,16 @@ static void mctp_test_fragment_flow(struct kunit *test)
/* Test that outgoing skbs cause a suitable tag to be created */
static void mctp_test_route_output_key_create(struct kunit *test)
{
+ const u8 dst_eid = 26, src_eid = 15;
+ struct mctp_test_pktqueue tpq;
const unsigned int netid = 50;
- const u8 dst = 26, src = 15;
- struct mctp_test_route *rt;
struct mctp_test_dev *dev;
struct mctp_sk_key *key;
struct netns_mctp *mns;
unsigned long flags;
struct socket *sock;
struct sk_buff *skb;
+ struct mctp_dst dst;
bool empty, single;
const int len = 2;
int rc;
@@ -1183,15 +1080,14 @@ static void mctp_test_route_output_key_create(struct kunit *test)
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
WRITE_ONCE(dev->mdev->net, netid);
- rt = mctp_test_create_route(&init_net, dev->mdev, dst, 68);
- KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt);
+ mctp_test_dst_setup(test, &dst, dev, &tpq, 68);
rc = sock_create_kern(&init_net, AF_MCTP, SOCK_DGRAM, 0, &sock);
KUNIT_ASSERT_EQ(test, rc, 0);
dev->mdev->addrs = kmalloc(sizeof(u8), GFP_KERNEL);
dev->mdev->num_addrs = 1;
- dev->mdev->addrs[0] = src;
+ dev->mdev->addrs[0] = src_eid;
skb = alloc_skb(sizeof(struct mctp_hdr) + 1 + len, GFP_KERNEL);
KUNIT_ASSERT_TRUE(test, skb);
@@ -1199,8 +1095,6 @@ static void mctp_test_route_output_key_create(struct kunit *test)
skb_reserve(skb, sizeof(struct mctp_hdr) + 1 + len);
memset(skb_put(skb, len), 0, len);
- refcount_inc(&rt->rt.refs);
-
mns = &sock_net(sock->sk)->mctp;
/* We assume we're starting from an empty keys list, which requires
@@ -1211,7 +1105,7 @@ static void mctp_test_route_output_key_create(struct kunit *test)
spin_unlock_irqrestore(&mns->keys_lock, flags);
KUNIT_ASSERT_TRUE(test, empty);
- rc = mctp_local_output(sock->sk, &rt->rt, skb, dst, MCTP_TAG_OWNER);
+ rc = mctp_local_output(sock->sk, &dst, skb, dst_eid, MCTP_TAG_OWNER);
KUNIT_ASSERT_EQ(test, rc, 0);
key = NULL;
@@ -1227,16 +1121,480 @@ static void mctp_test_route_output_key_create(struct kunit *test)
KUNIT_ASSERT_TRUE(test, single);
KUNIT_EXPECT_EQ(test, key->net, netid);
- KUNIT_EXPECT_EQ(test, key->local_addr, src);
- KUNIT_EXPECT_EQ(test, key->peer_addr, dst);
+ KUNIT_EXPECT_EQ(test, key->local_addr, src_eid);
+ KUNIT_EXPECT_EQ(test, key->peer_addr, dst_eid);
/* key has incoming tag, so inverse of what we sent */
KUNIT_EXPECT_FALSE(test, key->tag & MCTP_TAG_OWNER);
sock_release(sock);
- mctp_test_route_destroy(test, rt);
+ mctp_test_dst_release(&dst, &tpq);
mctp_test_destroy_dev(dev);
}
+static void mctp_test_route_extaddr_input(struct kunit *test)
+{
+ static const unsigned char haddr[] = { 0xaa, 0x55 };
+ struct mctp_test_pktqueue tpq;
+ struct mctp_skb_cb *cb, *cb2;
+ const unsigned int len = 40;
+ struct mctp_test_dev *dev;
+ struct sk_buff *skb, *skb2;
+ struct mctp_dst dst;
+ struct mctp_hdr hdr;
+ struct socket *sock;
+ int rc;
+
+ hdr.ver = 1;
+ hdr.src = 10;
+ hdr.dest = 8;
+ hdr.flags_seq_tag = FL_S | FL_E | FL_TO;
+
+ __mctp_route_test_init(test, &dev, &dst, &tpq, &sock, MCTP_NET_ANY);
+
+ skb = mctp_test_create_skb(&hdr, len);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb);
+
+ /* set our hardware addressing data */
+ cb = mctp_cb(skb);
+ memcpy(cb->haddr, haddr, sizeof(haddr));
+ cb->halen = sizeof(haddr);
+
+ mctp_test_skb_set_dev(skb, dev);
+
+ rc = mctp_dst_input(&dst, skb);
+ KUNIT_ASSERT_EQ(test, rc, 0);
+
+ skb2 = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb2);
+ KUNIT_ASSERT_EQ(test, skb2->len, len);
+
+ cb2 = mctp_cb(skb2);
+
+ /* Received SKB should have the hardware addressing as set above.
+ * We're likely to have the same actual cb here (ie., cb == cb2),
+ * but it's the comparison that we care about
+ */
+ KUNIT_EXPECT_EQ(test, cb2->halen, sizeof(haddr));
+ KUNIT_EXPECT_MEMEQ(test, cb2->haddr, haddr, sizeof(haddr));
+
+ kfree_skb(skb2);
+ __mctp_route_test_fini(test, dev, &dst, &tpq, sock);
+}
+
+static void mctp_test_route_gw_lookup(struct kunit *test)
+{
+ struct mctp_test_route *rt1, *rt2;
+ struct mctp_dst dst = { 0 };
+ struct mctp_test_dev *dev;
+ int rc;
+
+ dev = mctp_test_create_dev();
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
+
+ /* 8 (local) -> 10 (gateway) via 9 (direct) */
+ rt1 = mctp_test_create_route_direct(&init_net, dev->mdev, 9, 0);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt1);
+ rt2 = mctp_test_create_route_gw(&init_net, dev->mdev->net, 10, 9, 0);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt2);
+
+ rc = mctp_route_lookup(&init_net, dev->mdev->net, 10, &dst);
+ KUNIT_EXPECT_EQ(test, rc, 0);
+ KUNIT_EXPECT_PTR_EQ(test, dst.dev, dev->mdev);
+ KUNIT_EXPECT_EQ(test, dst.mtu, dev->ndev->mtu);
+ KUNIT_EXPECT_EQ(test, dst.nexthop, 9);
+ KUNIT_EXPECT_EQ(test, dst.halen, 0);
+
+ mctp_dst_release(&dst);
+
+ mctp_test_route_destroy(test, rt2);
+ mctp_test_route_destroy(test, rt1);
+ mctp_test_destroy_dev(dev);
+}
+
+static void mctp_test_route_gw_loop(struct kunit *test)
+{
+ struct mctp_test_route *rt1, *rt2;
+ struct mctp_dst dst = { 0 };
+ struct mctp_test_dev *dev;
+ int rc;
+
+ dev = mctp_test_create_dev();
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
+
+ /* two routes using each other as the gw */
+ rt1 = mctp_test_create_route_gw(&init_net, dev->mdev->net, 9, 10, 0);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt1);
+ rt2 = mctp_test_create_route_gw(&init_net, dev->mdev->net, 10, 9, 0);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt2);
+
+ /* this should fail, rather than infinite-loop */
+ rc = mctp_route_lookup(&init_net, dev->mdev->net, 10, &dst);
+ KUNIT_EXPECT_NE(test, rc, 0);
+
+ mctp_test_route_destroy(test, rt2);
+ mctp_test_route_destroy(test, rt1);
+ mctp_test_destroy_dev(dev);
+}
+
+struct mctp_route_gw_mtu_test {
+ /* working away from the local stack */
+ unsigned int dev, neigh, gw, dst;
+ unsigned int exp;
+};
+
+static void mctp_route_gw_mtu_to_desc(const struct mctp_route_gw_mtu_test *t,
+ char *desc)
+{
+ sprintf(desc, "dev %d, neigh %d, gw %d, dst %d -> %d",
+ t->dev, t->neigh, t->gw, t->dst, t->exp);
+}
+
+static const struct mctp_route_gw_mtu_test mctp_route_gw_mtu_tests[] = {
+ /* no route-specific MTUs */
+ { 68, 0, 0, 0, 68 },
+ { 100, 0, 0, 0, 100 },
+ /* one route MTU (smaller than dev mtu), others unrestricted */
+ { 100, 68, 0, 0, 68 },
+ { 100, 0, 68, 0, 68 },
+ { 100, 0, 0, 68, 68 },
+ /* smallest applied, regardless of order */
+ { 100, 99, 98, 68, 68 },
+ { 99, 100, 98, 68, 68 },
+ { 98, 99, 100, 68, 68 },
+ { 68, 98, 99, 100, 68 },
+};
+
+KUNIT_ARRAY_PARAM(mctp_route_gw_mtu, mctp_route_gw_mtu_tests,
+ mctp_route_gw_mtu_to_desc);
+
+static void mctp_test_route_gw_mtu(struct kunit *test)
+{
+ const struct mctp_route_gw_mtu_test *mtus = test->param_value;
+ struct mctp_test_route *rt1, *rt2, *rt3;
+ struct mctp_dst dst = { 0 };
+ struct mctp_test_dev *dev;
+ struct mctp_dev *mdev;
+ unsigned int netid;
+ int rc;
+
+ dev = mctp_test_create_dev();
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
+ dev->ndev->mtu = mtus->dev;
+ mdev = dev->mdev;
+ netid = mdev->net;
+
+ /* 8 (local) -> 11 (dst) via 10 (gw) via 9 (neigh) */
+ rt1 = mctp_test_create_route_direct(&init_net, mdev, 9, mtus->neigh);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt1);
+
+ rt2 = mctp_test_create_route_gw(&init_net, netid, 10, 9, mtus->gw);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt2);
+
+ rt3 = mctp_test_create_route_gw(&init_net, netid, 11, 10, mtus->dst);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt3);
+
+ rc = mctp_route_lookup(&init_net, dev->mdev->net, 11, &dst);
+ KUNIT_EXPECT_EQ(test, rc, 0);
+ KUNIT_EXPECT_EQ(test, dst.mtu, mtus->exp);
+
+ mctp_dst_release(&dst);
+
+ mctp_test_route_destroy(test, rt3);
+ mctp_test_route_destroy(test, rt2);
+ mctp_test_route_destroy(test, rt1);
+ mctp_test_destroy_dev(dev);
+}
+
+#define MCTP_TEST_LLADDR_LEN 2
+struct mctp_test_llhdr {
+ unsigned int magic;
+ unsigned char src[MCTP_TEST_LLADDR_LEN];
+ unsigned char dst[MCTP_TEST_LLADDR_LEN];
+};
+
+static const unsigned int mctp_test_llhdr_magic = 0x5c78339c;
+
+static int test_dev_header_create(struct sk_buff *skb, struct net_device *dev,
+ unsigned short type, const void *daddr,
+ const void *saddr, unsigned int len)
+{
+ struct kunit *test = current->kunit_test;
+ struct mctp_test_llhdr *hdr;
+
+ hdr = skb_push(skb, sizeof(*hdr));
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, hdr);
+ skb_reset_mac_header(skb);
+
+ hdr->magic = mctp_test_llhdr_magic;
+ memcpy(&hdr->src, saddr, sizeof(hdr->src));
+ memcpy(&hdr->dst, daddr, sizeof(hdr->dst));
+
+ return 0;
+}
+
+/* Test the dst_output path for a gateway-routed skb: we should have it
+ * lookup the nexthop EID in the neighbour table, and call into
+ * header_ops->create to resolve that to a lladdr. Our mock header_ops->create
+ * will just set a synthetic link-layer header, which we check after transmit.
+ */
+static void mctp_test_route_gw_output(struct kunit *test)
+{
+ const unsigned char haddr_self[MCTP_TEST_LLADDR_LEN] = { 0xaa, 0x03 };
+ const unsigned char haddr_peer[MCTP_TEST_LLADDR_LEN] = { 0xaa, 0x02 };
+ const struct header_ops ops = {
+ .create = test_dev_header_create,
+ };
+ struct mctp_neigh neigh = { 0 };
+ struct mctp_test_llhdr *ll_hdr;
+ struct mctp_dst dst = { 0 };
+ struct mctp_hdr hdr = { 0 };
+ struct mctp_test_dev *dev;
+ struct sk_buff *skb;
+ unsigned char *buf;
+ int i, rc;
+
+ dev = mctp_test_create_dev_lladdr(sizeof(haddr_self), haddr_self);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
+ dev->ndev->header_ops = &ops;
+
+ dst.dev = dev->mdev;
+ __mctp_dev_get(dst.dev->dev);
+ dst.mtu = 68;
+ dst.nexthop = 9;
+
+ /* simple mctp_neigh_add for the gateway (not dest!) endpoint */
+ INIT_LIST_HEAD(&neigh.list);
+ neigh.dev = dev->mdev;
+ mctp_dev_hold(dev->mdev);
+ neigh.eid = 9;
+ neigh.source = MCTP_NEIGH_STATIC;
+ memcpy(neigh.ha, haddr_peer, sizeof(haddr_peer));
+ list_add_rcu(&neigh.list, &init_net.mctp.neighbours);
+
+ hdr.ver = 1;
+ hdr.src = 8;
+ hdr.dest = 10;
+ hdr.flags_seq_tag = FL_S | FL_E | FL_TO;
+
+ /* construct enough for a future link-layer header, the provided
+ * mctp header, and 4 bytes of data
+ */
+ skb = alloc_skb(sizeof(*ll_hdr) + sizeof(hdr) + 4, GFP_KERNEL);
+ skb->dev = dev->ndev;
+ __mctp_cb(skb);
+
+ skb_reserve(skb, sizeof(*ll_hdr));
+
+ memcpy(skb_put(skb, sizeof(hdr)), &hdr, sizeof(hdr));
+ buf = skb_put(skb, 4);
+ for (i = 0; i < 4; i++)
+ buf[i] = i;
+
+ /* extra ref over the dev_xmit */
+ skb_get(skb);
+
+ rc = mctp_dst_output(&dst, skb);
+ KUNIT_EXPECT_EQ(test, rc, 0);
+
+ mctp_dst_release(&dst);
+ list_del_rcu(&neigh.list);
+ mctp_dev_put(dev->mdev);
+
+ /* check that we have our header created with the correct neighbour */
+ ll_hdr = (void *)skb_mac_header(skb);
+ KUNIT_EXPECT_EQ(test, ll_hdr->magic, mctp_test_llhdr_magic);
+ KUNIT_EXPECT_MEMEQ(test, ll_hdr->src, haddr_self, sizeof(haddr_self));
+ KUNIT_EXPECT_MEMEQ(test, ll_hdr->dst, haddr_peer, sizeof(haddr_peer));
+ kfree_skb(skb);
+}
+
+struct mctp_bind_lookup_test {
+ /* header of incoming message */
+ struct mctp_hdr hdr;
+ u8 ty;
+ /* mctp network of incoming interface (smctp_network) */
+ unsigned int net;
+
+ /* expected socket, matches .name in lookup_binds, NULL for dropped */
+ const char *expect;
+};
+
+/* Single-packet TO-set message */
+#define LK(src, dst) RX_HDR(1, (src), (dst), FL_S | FL_E | FL_TO)
+
+/* Input message test cases for bind lookup tests.
+ *
+ * 10 and 11 are local EIDs.
+ * 20 and 21 are remote EIDs.
+ */
+static const struct mctp_bind_lookup_test mctp_bind_lookup_tests[] = {
+ /* both local-eid and remote-eid binds, remote eid is preferenced */
+ { .hdr = LK(20, 10), .ty = 1, .net = 1, .expect = "remote20" },
+
+ { .hdr = LK(20, 255), .ty = 1, .net = 1, .expect = "remote20" },
+ { .hdr = LK(20, 0), .ty = 1, .net = 1, .expect = "remote20" },
+ { .hdr = LK(0, 255), .ty = 1, .net = 1, .expect = "any" },
+ { .hdr = LK(0, 11), .ty = 1, .net = 1, .expect = "any" },
+ { .hdr = LK(0, 0), .ty = 1, .net = 1, .expect = "any" },
+ { .hdr = LK(0, 10), .ty = 1, .net = 1, .expect = "local10" },
+ { .hdr = LK(21, 10), .ty = 1, .net = 1, .expect = "local10" },
+ { .hdr = LK(21, 11), .ty = 1, .net = 1, .expect = "remote21local11" },
+
+ /* both src and dest set to eid=99. unusual, but accepted
+ * by MCTP stack currently.
+ */
+ { .hdr = LK(99, 99), .ty = 1, .net = 1, .expect = "any" },
+
+ /* unbound smctp_type */
+ { .hdr = LK(20, 10), .ty = 3, .net = 1, .expect = NULL },
+
+ /* smctp_network tests */
+
+ { .hdr = LK(0, 0), .ty = 1, .net = 7, .expect = "any" },
+ { .hdr = LK(21, 10), .ty = 1, .net = 2, .expect = "any" },
+
+ /* remote EID 20 matches, but MCTP_NET_ANY in "remote20" resolved
+ * to net=1, so lookup doesn't match "remote20"
+ */
+ { .hdr = LK(20, 10), .ty = 1, .net = 3, .expect = "any" },
+
+ { .hdr = LK(21, 10), .ty = 1, .net = 3, .expect = "remote21net3" },
+ { .hdr = LK(21, 10), .ty = 1, .net = 4, .expect = "remote21net4" },
+ { .hdr = LK(21, 10), .ty = 1, .net = 5, .expect = "remote21net5" },
+
+ { .hdr = LK(21, 10), .ty = 1, .net = 5, .expect = "remote21net5" },
+
+ { .hdr = LK(99, 10), .ty = 1, .net = 8, .expect = "local10net8" },
+
+ { .hdr = LK(99, 10), .ty = 1, .net = 9, .expect = "anynet9" },
+ { .hdr = LK(0, 0), .ty = 1, .net = 9, .expect = "anynet9" },
+ { .hdr = LK(99, 99), .ty = 1, .net = 9, .expect = "anynet9" },
+ { .hdr = LK(20, 10), .ty = 1, .net = 9, .expect = "anynet9" },
+};
+
+/* Binds to create during the lookup tests */
+static const struct mctp_test_bind_setup lookup_binds[] = {
+ /* any address and net, type 1 */
+ { .name = "any", .bind_addr = MCTP_ADDR_ANY,
+ .bind_net = MCTP_NET_ANY, .bind_type = 1, },
+ /* local eid 10, net 1 (resolved from MCTP_NET_ANY) */
+ { .name = "local10", .bind_addr = 10,
+ .bind_net = MCTP_NET_ANY, .bind_type = 1, },
+ /* local eid 10, net 8 */
+ { .name = "local10net8", .bind_addr = 10,
+ .bind_net = 8, .bind_type = 1, },
+ /* any EID, net 9 */
+ { .name = "anynet9", .bind_addr = MCTP_ADDR_ANY,
+ .bind_net = 9, .bind_type = 1, },
+
+ /* remote eid 20, net 1, any local eid */
+ { .name = "remote20", .bind_addr = MCTP_ADDR_ANY,
+ .bind_net = MCTP_NET_ANY, .bind_type = 1,
+ .have_peer = true, .peer_addr = 20, .peer_net = MCTP_NET_ANY, },
+
+ /* remote eid 20, net 1, local eid 11 */
+ { .name = "remote21local11", .bind_addr = 11,
+ .bind_net = MCTP_NET_ANY, .bind_type = 1,
+ .have_peer = true, .peer_addr = 21, .peer_net = MCTP_NET_ANY, },
+
+ /* remote eid 21, specific net=3 for connect() */
+ { .name = "remote21net3", .bind_addr = MCTP_ADDR_ANY,
+ .bind_net = MCTP_NET_ANY, .bind_type = 1,
+ .have_peer = true, .peer_addr = 21, .peer_net = 3, },
+
+ /* remote eid 21, net 4 for bind, specific net=4 for connect() */
+ { .name = "remote21net4", .bind_addr = MCTP_ADDR_ANY,
+ .bind_net = 4, .bind_type = 1,
+ .have_peer = true, .peer_addr = 21, .peer_net = 4, },
+
+ /* remote eid 21, net 5 for bind, specific net=5 for connect() */
+ { .name = "remote21net5", .bind_addr = MCTP_ADDR_ANY,
+ .bind_net = 5, .bind_type = 1,
+ .have_peer = true, .peer_addr = 21, .peer_net = 5, },
+};
+
+static void mctp_bind_lookup_desc(const struct mctp_bind_lookup_test *t,
+ char *desc)
+{
+ snprintf(desc, KUNIT_PARAM_DESC_SIZE,
+ "{src %d dst %d ty %d net %d expect %s}",
+ t->hdr.src, t->hdr.dest, t->ty, t->net, t->expect);
+}
+
+KUNIT_ARRAY_PARAM(mctp_bind_lookup, mctp_bind_lookup_tests,
+ mctp_bind_lookup_desc);
+
+static void mctp_test_bind_lookup(struct kunit *test)
+{
+ const struct mctp_bind_lookup_test *rx;
+ struct socket *socks[ARRAY_SIZE(lookup_binds)];
+ struct sk_buff *skb_pkt = NULL, *skb_sock = NULL;
+ struct socket *sock_ty0, *sock_expect = NULL;
+ struct mctp_test_pktqueue tpq;
+ struct mctp_test_dev *dev;
+ struct mctp_dst dst;
+ int rc;
+
+ rx = test->param_value;
+
+ __mctp_route_test_init(test, &dev, &dst, &tpq, &sock_ty0, rx->net);
+ /* Create all binds */
+ for (size_t i = 0; i < ARRAY_SIZE(lookup_binds); i++) {
+ mctp_test_bind_run(test, &lookup_binds[i],
+ &rc, &socks[i]);
+ KUNIT_ASSERT_EQ(test, rc, 0);
+
+ /* Record the expected receive socket */
+ if (rx->expect &&
+ strcmp(rx->expect, lookup_binds[i].name) == 0) {
+ KUNIT_ASSERT_NULL(test, sock_expect);
+ sock_expect = socks[i];
+ }
+ }
+ KUNIT_ASSERT_EQ(test, !!sock_expect, !!rx->expect);
+
+ /* Create test message */
+ skb_pkt = mctp_test_create_skb_data(&rx->hdr, &rx->ty);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb_pkt);
+ mctp_test_skb_set_dev(skb_pkt, dev);
+ mctp_test_pktqueue_init(&tpq);
+
+ rc = mctp_dst_input(&dst, skb_pkt);
+ if (rx->expect) {
+ /* Test the message is received on the expected socket */
+ KUNIT_EXPECT_EQ(test, rc, 0);
+ skb_sock = skb_recv_datagram(sock_expect->sk,
+ MSG_DONTWAIT, &rc);
+ if (!skb_sock) {
+ /* Find which socket received it instead */
+ for (size_t i = 0; i < ARRAY_SIZE(lookup_binds); i++) {
+ skb_sock = skb_recv_datagram(socks[i]->sk,
+ MSG_DONTWAIT, &rc);
+ if (skb_sock) {
+ KUNIT_FAIL(test,
+ "received on incorrect socket '%s', expect '%s'",
+ lookup_binds[i].name,
+ rx->expect);
+ goto cleanup;
+ }
+ }
+ KUNIT_FAIL(test, "no message received");
+ }
+ } else {
+ KUNIT_EXPECT_NE(test, rc, 0);
+ }
+
+cleanup:
+ kfree_skb(skb_sock);
+ kfree_skb(skb_pkt);
+
+ /* Drop all binds */
+ for (size_t i = 0; i < ARRAY_SIZE(lookup_binds); i++)
+ sock_release(socks[i]);
+
+ __mctp_route_test_fini(test, dev, &dst, &tpq, sock_ty0);
+}
+
static struct kunit_case mctp_test_cases[] = {
KUNIT_CASE_PARAM(mctp_test_fragment, mctp_frag_gen_params),
KUNIT_CASE_PARAM(mctp_test_rx_input, mctp_rx_input_gen_params),
@@ -1253,11 +1611,17 @@ static struct kunit_case mctp_test_cases[] = {
KUNIT_CASE(mctp_test_fragment_flow),
KUNIT_CASE(mctp_test_route_output_key_create),
KUNIT_CASE(mctp_test_route_input_cloned_frag),
+ KUNIT_CASE(mctp_test_route_extaddr_input),
+ KUNIT_CASE(mctp_test_route_gw_lookup),
+ KUNIT_CASE(mctp_test_route_gw_loop),
+ KUNIT_CASE_PARAM(mctp_test_route_gw_mtu, mctp_route_gw_mtu_gen_params),
+ KUNIT_CASE(mctp_test_route_gw_output),
+ KUNIT_CASE_PARAM(mctp_test_bind_lookup, mctp_bind_lookup_gen_params),
{}
};
static struct kunit_suite mctp_test_suite = {
- .name = "mctp",
+ .name = "mctp-route",
.test_cases = mctp_test_cases,
};
diff --git a/net/mctp/test/sock-test.c b/net/mctp/test/sock-test.c
new file mode 100644
index 000000000000..b0942deb5019
--- /dev/null
+++ b/net/mctp/test/sock-test.c
@@ -0,0 +1,396 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <kunit/static_stub.h>
+#include <kunit/test.h>
+
+#include <linux/socket.h>
+#include <linux/spinlock.h>
+
+#include "utils.h"
+
+static const u8 dev_default_lladdr[] = { 0x01, 0x02 };
+
+/* helper for simple sock setup: single device, with dev_default_lladdr as its
+ * hardware address, assigned with a local EID 8, and a route to EID 9
+ */
+static void __mctp_sock_test_init(struct kunit *test,
+ struct mctp_test_dev **devp,
+ struct mctp_test_route **rtp,
+ struct socket **sockp)
+{
+ struct mctp_test_route *rt;
+ struct mctp_test_dev *dev;
+ struct socket *sock;
+ unsigned long flags;
+ u8 *addrs;
+ int rc;
+
+ dev = mctp_test_create_dev_lladdr(sizeof(dev_default_lladdr),
+ dev_default_lladdr);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
+
+ addrs = kmalloc(1, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, addrs);
+ addrs[0] = 8;
+
+ spin_lock_irqsave(&dev->mdev->addrs_lock, flags);
+ dev->mdev->num_addrs = 1;
+ swap(addrs, dev->mdev->addrs);
+ spin_unlock_irqrestore(&dev->mdev->addrs_lock, flags);
+
+ kfree(addrs);
+
+ rt = mctp_test_create_route_direct(dev_net(dev->ndev), dev->mdev, 9, 0);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt);
+
+ rc = sock_create_kern(&init_net, AF_MCTP, SOCK_DGRAM, 0, &sock);
+ KUNIT_ASSERT_EQ(test, rc, 0);
+
+ *devp = dev;
+ *rtp = rt;
+ *sockp = sock;
+}
+
+static void __mctp_sock_test_fini(struct kunit *test,
+ struct mctp_test_dev *dev,
+ struct mctp_test_route *rt,
+ struct socket *sock)
+{
+ sock_release(sock);
+ mctp_test_route_destroy(test, rt);
+ mctp_test_destroy_dev(dev);
+}
+
+struct mctp_test_sock_local_output_config {
+ struct mctp_test_dev *dev;
+ size_t halen;
+ u8 haddr[MAX_ADDR_LEN];
+ bool invoked;
+ int rc;
+};
+
+static int mctp_test_sock_local_output(struct sock *sk,
+ struct mctp_dst *dst,
+ struct sk_buff *skb,
+ mctp_eid_t daddr, u8 req_tag)
+{
+ struct kunit *test = kunit_get_current_test();
+ struct mctp_test_sock_local_output_config *cfg = test->priv;
+
+ KUNIT_EXPECT_PTR_EQ(test, dst->dev, cfg->dev->mdev);
+ KUNIT_EXPECT_EQ(test, dst->halen, cfg->halen);
+ KUNIT_EXPECT_MEMEQ(test, dst->haddr, cfg->haddr, dst->halen);
+
+ cfg->invoked = true;
+
+ kfree_skb(skb);
+
+ return cfg->rc;
+}
+
+static void mctp_test_sock_sendmsg_extaddr(struct kunit *test)
+{
+ struct sockaddr_mctp_ext addr = {
+ .smctp_base = {
+ .smctp_family = AF_MCTP,
+ .smctp_tag = MCTP_TAG_OWNER,
+ .smctp_network = MCTP_NET_ANY,
+ },
+ };
+ struct mctp_test_sock_local_output_config cfg = { 0 };
+ u8 haddr[] = { 0xaa, 0x01 };
+ u8 buf[4] = { 0, 1, 2, 3 };
+ struct mctp_test_route *rt;
+ struct msghdr msg = { 0 };
+ struct mctp_test_dev *dev;
+ struct mctp_sock *msk;
+ struct socket *sock;
+ ssize_t send_len;
+ struct kvec vec = {
+ .iov_base = buf,
+ .iov_len = sizeof(buf),
+ };
+
+ __mctp_sock_test_init(test, &dev, &rt, &sock);
+
+ /* Expect to see the dst configured up with the addressing data we
+ * provide in the struct sockaddr_mctp_ext
+ */
+ cfg.dev = dev;
+ cfg.halen = sizeof(haddr);
+ memcpy(cfg.haddr, haddr, sizeof(haddr));
+
+ test->priv = &cfg;
+
+ kunit_activate_static_stub(test, mctp_local_output,
+ mctp_test_sock_local_output);
+
+ /* enable and configure direct addressing */
+ msk = container_of(sock->sk, struct mctp_sock, sk);
+ msk->addr_ext = true;
+
+ addr.smctp_ifindex = dev->ndev->ifindex;
+ addr.smctp_halen = sizeof(haddr);
+ memcpy(addr.smctp_haddr, haddr, sizeof(haddr));
+
+ msg.msg_name = &addr;
+ msg.msg_namelen = sizeof(addr);
+
+ iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, &vec, 1, sizeof(buf));
+ send_len = mctp_sendmsg(sock, &msg, sizeof(buf));
+ KUNIT_EXPECT_EQ(test, send_len, sizeof(buf));
+ KUNIT_EXPECT_TRUE(test, cfg.invoked);
+
+ __mctp_sock_test_fini(test, dev, rt, sock);
+}
+
+static void mctp_test_sock_recvmsg_extaddr(struct kunit *test)
+{
+ struct sockaddr_mctp_ext recv_addr = { 0 };
+ u8 rcv_buf[1], rcv_data[] = { 0, 1 };
+ u8 haddr[] = { 0xaa, 0x02 };
+ struct mctp_test_route *rt;
+ struct mctp_test_dev *dev;
+ struct mctp_skb_cb *cb;
+ struct mctp_sock *msk;
+ struct sk_buff *skb;
+ struct mctp_hdr hdr;
+ struct socket *sock;
+ struct msghdr msg;
+ ssize_t recv_len;
+ int rc;
+ struct kvec vec = {
+ .iov_base = rcv_buf,
+ .iov_len = sizeof(rcv_buf),
+ };
+
+ __mctp_sock_test_init(test, &dev, &rt, &sock);
+
+ /* enable extended addressing on recv */
+ msk = container_of(sock->sk, struct mctp_sock, sk);
+ msk->addr_ext = true;
+
+ /* base incoming header, using a nul-EID dest */
+ hdr.ver = 1;
+ hdr.dest = 0;
+ hdr.src = 9;
+ hdr.flags_seq_tag = MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM |
+ MCTP_HDR_FLAG_TO;
+
+ skb = mctp_test_create_skb_data(&hdr, &rcv_data);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb);
+
+ mctp_test_skb_set_dev(skb, dev);
+
+ /* set incoming extended address data */
+ cb = mctp_cb(skb);
+ cb->halen = sizeof(haddr);
+ cb->ifindex = dev->ndev->ifindex;
+ memcpy(cb->haddr, haddr, sizeof(haddr));
+
+ /* Deliver to socket. The route input path pulls the network header,
+ * leaving skb data at type byte onwards. recvmsg will consume the
+ * type for addr.smctp_type
+ */
+ skb_pull(skb, sizeof(hdr));
+ rc = sock_queue_rcv_skb(sock->sk, skb);
+ KUNIT_ASSERT_EQ(test, rc, 0);
+
+ msg.msg_name = &recv_addr;
+ msg.msg_namelen = sizeof(recv_addr);
+ iov_iter_kvec(&msg.msg_iter, ITER_DEST, &vec, 1, sizeof(rcv_buf));
+
+ recv_len = mctp_recvmsg(sock, &msg, sizeof(rcv_buf),
+ MSG_DONTWAIT | MSG_TRUNC);
+
+ KUNIT_EXPECT_EQ(test, recv_len, sizeof(rcv_buf));
+
+ /* expect our extended address to be populated from hdr and cb */
+ KUNIT_EXPECT_EQ(test, msg.msg_namelen, sizeof(recv_addr));
+ KUNIT_EXPECT_EQ(test, recv_addr.smctp_base.smctp_family, AF_MCTP);
+ KUNIT_EXPECT_EQ(test, recv_addr.smctp_ifindex, dev->ndev->ifindex);
+ KUNIT_EXPECT_EQ(test, recv_addr.smctp_halen, sizeof(haddr));
+ KUNIT_EXPECT_MEMEQ(test, recv_addr.smctp_haddr, haddr, sizeof(haddr));
+
+ __mctp_sock_test_fini(test, dev, rt, sock);
+}
+
+static const struct mctp_test_bind_setup bind_addrany_netdefault_type1 = {
+ .bind_addr = MCTP_ADDR_ANY, .bind_net = MCTP_NET_ANY, .bind_type = 1,
+};
+
+static const struct mctp_test_bind_setup bind_addrany_net2_type1 = {
+ .bind_addr = MCTP_ADDR_ANY, .bind_net = 2, .bind_type = 1,
+};
+
+/* 1 is default net */
+static const struct mctp_test_bind_setup bind_addr8_net1_type1 = {
+ .bind_addr = 8, .bind_net = 1, .bind_type = 1,
+};
+
+static const struct mctp_test_bind_setup bind_addrany_net1_type1 = {
+ .bind_addr = MCTP_ADDR_ANY, .bind_net = 1, .bind_type = 1,
+};
+
+/* 2 is an arbitrary net */
+static const struct mctp_test_bind_setup bind_addr8_net2_type1 = {
+ .bind_addr = 8, .bind_net = 2, .bind_type = 1,
+};
+
+static const struct mctp_test_bind_setup bind_addr8_netdefault_type1 = {
+ .bind_addr = 8, .bind_net = MCTP_NET_ANY, .bind_type = 1,
+};
+
+static const struct mctp_test_bind_setup bind_addrany_net2_type2 = {
+ .bind_addr = MCTP_ADDR_ANY, .bind_net = 2, .bind_type = 2,
+};
+
+static const struct mctp_test_bind_setup bind_addrany_net2_type1_peer9 = {
+ .bind_addr = MCTP_ADDR_ANY, .bind_net = 2, .bind_type = 1,
+ .have_peer = true, .peer_addr = 9, .peer_net = 2,
+};
+
+struct mctp_bind_pair_test {
+ const struct mctp_test_bind_setup *bind1;
+ const struct mctp_test_bind_setup *bind2;
+ int error;
+};
+
+/* Pairs of binds and whether they will conflict */
+static const struct mctp_bind_pair_test mctp_bind_pair_tests[] = {
+ /* Both ADDR_ANY, conflict */
+ { &bind_addrany_netdefault_type1, &bind_addrany_netdefault_type1,
+ EADDRINUSE },
+ /* Same specific EID, conflict */
+ { &bind_addr8_netdefault_type1, &bind_addr8_netdefault_type1,
+ EADDRINUSE },
+ /* ADDR_ANY vs specific EID, OK */
+ { &bind_addrany_netdefault_type1, &bind_addr8_netdefault_type1, 0 },
+ /* ADDR_ANY different types, OK */
+ { &bind_addrany_net2_type2, &bind_addrany_net2_type1, 0 },
+ /* ADDR_ANY different nets, OK */
+ { &bind_addrany_net2_type1, &bind_addrany_netdefault_type1, 0 },
+
+ /* specific EID, NET_ANY (resolves to default)
+ * vs specific EID, explicit default net 1, conflict
+ */
+ { &bind_addr8_netdefault_type1, &bind_addr8_net1_type1, EADDRINUSE },
+
+ /* specific EID, net 1 vs specific EID, net 2, ok */
+ { &bind_addr8_net1_type1, &bind_addr8_net2_type1, 0 },
+
+ /* ANY_ADDR, NET_ANY (doesn't resolve to default)
+ * vs ADDR_ANY, explicit default net 1, OK
+ */
+ { &bind_addrany_netdefault_type1, &bind_addrany_net1_type1, 0 },
+
+ /* specific remote peer doesn't conflict with any-peer bind */
+ { &bind_addrany_net2_type1_peer9, &bind_addrany_net2_type1, 0 },
+
+ /* bind() NET_ANY is allowed with a connect() net */
+ { &bind_addrany_net2_type1_peer9, &bind_addrany_netdefault_type1, 0 },
+};
+
+static void mctp_bind_pair_desc(const struct mctp_bind_pair_test *t, char *desc)
+{
+ char peer1[25] = {0}, peer2[25] = {0};
+
+ if (t->bind1->have_peer)
+ snprintf(peer1, sizeof(peer1), ", peer %d net %d",
+ t->bind1->peer_addr, t->bind1->peer_net);
+ if (t->bind2->have_peer)
+ snprintf(peer2, sizeof(peer2), ", peer %d net %d",
+ t->bind2->peer_addr, t->bind2->peer_net);
+
+ snprintf(desc, KUNIT_PARAM_DESC_SIZE,
+ "{bind(addr %d, type %d, net %d%s)} {bind(addr %d, type %d, net %d%s)} -> error %d",
+ t->bind1->bind_addr, t->bind1->bind_type,
+ t->bind1->bind_net, peer1,
+ t->bind2->bind_addr, t->bind2->bind_type,
+ t->bind2->bind_net, peer2, t->error);
+}
+
+KUNIT_ARRAY_PARAM(mctp_bind_pair, mctp_bind_pair_tests, mctp_bind_pair_desc);
+
+static void mctp_test_bind_invalid(struct kunit *test)
+{
+ struct socket *sock;
+ int rc;
+
+ /* bind() fails if the bind() vs connect() networks mismatch. */
+ const struct mctp_test_bind_setup bind_connect_net_mismatch = {
+ .bind_addr = MCTP_ADDR_ANY, .bind_net = 1, .bind_type = 1,
+ .have_peer = true, .peer_addr = 9, .peer_net = 2,
+ };
+ mctp_test_bind_run(test, &bind_connect_net_mismatch, &rc, &sock);
+ KUNIT_EXPECT_EQ(test, -rc, EINVAL);
+ sock_release(sock);
+}
+
+static int
+mctp_test_bind_conflicts_inner(struct kunit *test,
+ const struct mctp_test_bind_setup *bind1,
+ const struct mctp_test_bind_setup *bind2)
+{
+ struct socket *sock1 = NULL, *sock2 = NULL, *sock3 = NULL;
+ int bind_errno;
+
+ /* Bind to first address, always succeeds */
+ mctp_test_bind_run(test, bind1, &bind_errno, &sock1);
+ KUNIT_EXPECT_EQ(test, bind_errno, 0);
+
+ /* A second identical bind always fails */
+ mctp_test_bind_run(test, bind1, &bind_errno, &sock2);
+ KUNIT_EXPECT_EQ(test, -bind_errno, EADDRINUSE);
+
+ /* A different bind, result is returned */
+ mctp_test_bind_run(test, bind2, &bind_errno, &sock3);
+
+ if (sock1)
+ sock_release(sock1);
+ if (sock2)
+ sock_release(sock2);
+ if (sock3)
+ sock_release(sock3);
+
+ return bind_errno;
+}
+
+static void mctp_test_bind_conflicts(struct kunit *test)
+{
+ const struct mctp_bind_pair_test *pair;
+ int bind_errno;
+
+ pair = test->param_value;
+
+ bind_errno =
+ mctp_test_bind_conflicts_inner(test, pair->bind1, pair->bind2);
+ KUNIT_EXPECT_EQ(test, -bind_errno, pair->error);
+
+ /* swapping the calls, the second bind should still fail */
+ bind_errno =
+ mctp_test_bind_conflicts_inner(test, pair->bind2, pair->bind1);
+ KUNIT_EXPECT_EQ(test, -bind_errno, pair->error);
+}
+
+static void mctp_test_assumptions(struct kunit *test)
+{
+ /* check assumption of default net from bind_addr8_net1_type1 */
+ KUNIT_ASSERT_EQ(test, mctp_default_net(&init_net), 1);
+}
+
+static struct kunit_case mctp_test_cases[] = {
+ KUNIT_CASE(mctp_test_assumptions),
+ KUNIT_CASE(mctp_test_sock_sendmsg_extaddr),
+ KUNIT_CASE(mctp_test_sock_recvmsg_extaddr),
+ KUNIT_CASE_PARAM(mctp_test_bind_conflicts, mctp_bind_pair_gen_params),
+ KUNIT_CASE(mctp_test_bind_invalid),
+ {}
+};
+
+static struct kunit_suite mctp_test_suite = {
+ .name = "mctp-sock",
+ .test_cases = mctp_test_cases,
+};
+
+kunit_test_suite(mctp_test_suite);
diff --git a/net/mctp/test/utils.c b/net/mctp/test/utils.c
index 565763eb0211..953d41902771 100644
--- a/net/mctp/test/utils.c
+++ b/net/mctp/test/utils.c
@@ -26,19 +26,22 @@ static void mctp_test_dev_setup(struct net_device *ndev)
ndev->type = ARPHRD_MCTP;
ndev->mtu = MCTP_DEV_TEST_MTU;
ndev->hard_header_len = 0;
- ndev->addr_len = 0;
ndev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
ndev->flags = IFF_NOARP;
ndev->netdev_ops = &mctp_test_netdev_ops;
ndev->needs_free_netdev = true;
}
-struct mctp_test_dev *mctp_test_create_dev(void)
+static struct mctp_test_dev *__mctp_test_create_dev(unsigned short lladdr_len,
+ const unsigned char *lladdr)
{
struct mctp_test_dev *dev;
struct net_device *ndev;
int rc;
+ if (WARN_ON(lladdr_len > MAX_ADDR_LEN))
+ return NULL;
+
ndev = alloc_netdev(sizeof(*dev), "mctptest%d", NET_NAME_ENUM,
mctp_test_dev_setup);
if (!ndev)
@@ -46,6 +49,8 @@ struct mctp_test_dev *mctp_test_create_dev(void)
dev = netdev_priv(ndev);
dev->ndev = ndev;
+ ndev->addr_len = lladdr_len;
+ dev_addr_set(ndev, lladdr);
rc = register_netdev(ndev);
if (rc) {
@@ -61,8 +66,231 @@ struct mctp_test_dev *mctp_test_create_dev(void)
return dev;
}
+struct mctp_test_dev *mctp_test_create_dev(void)
+{
+ return __mctp_test_create_dev(0, NULL);
+}
+
+struct mctp_test_dev *mctp_test_create_dev_lladdr(unsigned short lladdr_len,
+ const unsigned char *lladdr)
+{
+ return __mctp_test_create_dev(lladdr_len, lladdr);
+}
+
void mctp_test_destroy_dev(struct mctp_test_dev *dev)
{
mctp_dev_put(dev->mdev);
unregister_netdev(dev->ndev);
}
+
+static const unsigned int test_pktqueue_magic = 0x5f713aef;
+
+void mctp_test_pktqueue_init(struct mctp_test_pktqueue *tpq)
+{
+ tpq->magic = test_pktqueue_magic;
+ skb_queue_head_init(&tpq->pkts);
+}
+
+static int mctp_test_dst_output(struct mctp_dst *dst, struct sk_buff *skb)
+{
+ struct kunit *test = current->kunit_test;
+ struct mctp_test_pktqueue *tpq = test->priv;
+
+ KUNIT_ASSERT_EQ(test, tpq->magic, test_pktqueue_magic);
+
+ skb_queue_tail(&tpq->pkts, skb);
+
+ return 0;
+}
+
+/* local version of mctp_route_alloc() */
+static struct mctp_test_route *mctp_route_test_alloc(void)
+{
+ struct mctp_test_route *rt;
+
+ rt = kzalloc(sizeof(*rt), GFP_KERNEL);
+ if (!rt)
+ return NULL;
+
+ INIT_LIST_HEAD(&rt->rt.list);
+ refcount_set(&rt->rt.refs, 1);
+ rt->rt.output = mctp_test_dst_output;
+
+ return rt;
+}
+
+struct mctp_test_route *mctp_test_create_route_direct(struct net *net,
+ struct mctp_dev *dev,
+ mctp_eid_t eid,
+ unsigned int mtu)
+{
+ struct mctp_test_route *rt;
+
+ rt = mctp_route_test_alloc();
+ if (!rt)
+ return NULL;
+
+ rt->rt.min = eid;
+ rt->rt.max = eid;
+ rt->rt.mtu = mtu;
+ rt->rt.type = RTN_UNSPEC;
+ rt->rt.dst_type = MCTP_ROUTE_DIRECT;
+ if (dev)
+ mctp_dev_hold(dev);
+ rt->rt.dev = dev;
+
+ list_add_rcu(&rt->rt.list, &net->mctp.routes);
+
+ return rt;
+}
+
+struct mctp_test_route *mctp_test_create_route_gw(struct net *net,
+ unsigned int netid,
+ mctp_eid_t eid,
+ mctp_eid_t gw,
+ unsigned int mtu)
+{
+ struct mctp_test_route *rt;
+
+ rt = mctp_route_test_alloc();
+ if (!rt)
+ return NULL;
+
+ rt->rt.min = eid;
+ rt->rt.max = eid;
+ rt->rt.mtu = mtu;
+ rt->rt.type = RTN_UNSPEC;
+ rt->rt.dst_type = MCTP_ROUTE_GATEWAY;
+ rt->rt.gateway.eid = gw;
+ rt->rt.gateway.net = netid;
+
+ list_add_rcu(&rt->rt.list, &net->mctp.routes);
+
+ return rt;
+}
+
+/* Convenience function for our test dst; release with mctp_test_dst_release()
+ */
+void mctp_test_dst_setup(struct kunit *test, struct mctp_dst *dst,
+ struct mctp_test_dev *dev,
+ struct mctp_test_pktqueue *tpq, unsigned int mtu)
+{
+ KUNIT_EXPECT_NOT_ERR_OR_NULL(test, dev);
+
+ memset(dst, 0, sizeof(*dst));
+
+ dst->dev = dev->mdev;
+ __mctp_dev_get(dst->dev->dev);
+ dst->mtu = mtu;
+ dst->output = mctp_test_dst_output;
+ mctp_test_pktqueue_init(tpq);
+ test->priv = tpq;
+}
+
+void mctp_test_dst_release(struct mctp_dst *dst,
+ struct mctp_test_pktqueue *tpq)
+{
+ mctp_dst_release(dst);
+ skb_queue_purge(&tpq->pkts);
+}
+
+void mctp_test_route_destroy(struct kunit *test, struct mctp_test_route *rt)
+{
+ unsigned int refs;
+
+ rtnl_lock();
+ list_del_rcu(&rt->rt.list);
+ rtnl_unlock();
+
+ if (rt->rt.dst_type == MCTP_ROUTE_DIRECT && rt->rt.dev)
+ mctp_dev_put(rt->rt.dev);
+
+ refs = refcount_read(&rt->rt.refs);
+ KUNIT_ASSERT_EQ_MSG(test, refs, 1, "route ref imbalance");
+
+ kfree_rcu(&rt->rt, rcu);
+}
+
+void mctp_test_skb_set_dev(struct sk_buff *skb, struct mctp_test_dev *dev)
+{
+ struct mctp_skb_cb *cb;
+
+ cb = mctp_cb(skb);
+ cb->net = READ_ONCE(dev->mdev->net);
+ skb->dev = dev->ndev;
+}
+
+struct sk_buff *mctp_test_create_skb(const struct mctp_hdr *hdr,
+ unsigned int data_len)
+{
+ size_t hdr_len = sizeof(*hdr);
+ struct sk_buff *skb;
+ unsigned int i;
+ u8 *buf;
+
+ skb = alloc_skb(hdr_len + data_len, GFP_KERNEL);
+ if (!skb)
+ return NULL;
+
+ __mctp_cb(skb);
+ memcpy(skb_put(skb, hdr_len), hdr, hdr_len);
+
+ buf = skb_put(skb, data_len);
+ for (i = 0; i < data_len; i++)
+ buf[i] = i & 0xff;
+
+ return skb;
+}
+
+struct sk_buff *__mctp_test_create_skb_data(const struct mctp_hdr *hdr,
+ const void *data, size_t data_len)
+{
+ size_t hdr_len = sizeof(*hdr);
+ struct sk_buff *skb;
+
+ skb = alloc_skb(hdr_len + data_len, GFP_KERNEL);
+ if (!skb)
+ return NULL;
+
+ __mctp_cb(skb);
+ memcpy(skb_put(skb, hdr_len), hdr, hdr_len);
+ memcpy(skb_put(skb, data_len), data, data_len);
+
+ return skb;
+}
+
+void mctp_test_bind_run(struct kunit *test,
+ const struct mctp_test_bind_setup *setup,
+ int *ret_bind_errno, struct socket **sock)
+{
+ struct sockaddr_mctp addr;
+ int rc;
+
+ *ret_bind_errno = -EIO;
+
+ rc = sock_create_kern(&init_net, AF_MCTP, SOCK_DGRAM, 0, sock);
+ KUNIT_ASSERT_EQ(test, rc, 0);
+
+ /* connect() if requested */
+ if (setup->have_peer) {
+ memset(&addr, 0x0, sizeof(addr));
+ addr.smctp_family = AF_MCTP;
+ addr.smctp_network = setup->peer_net;
+ addr.smctp_addr.s_addr = setup->peer_addr;
+ /* connect() type must match bind() type */
+ addr.smctp_type = setup->bind_type;
+ rc = kernel_connect(*sock, (struct sockaddr *)&addr,
+ sizeof(addr), 0);
+ KUNIT_EXPECT_EQ(test, rc, 0);
+ }
+
+ /* bind() */
+ memset(&addr, 0x0, sizeof(addr));
+ addr.smctp_family = AF_MCTP;
+ addr.smctp_network = setup->bind_net;
+ addr.smctp_addr.s_addr = setup->bind_addr;
+ addr.smctp_type = setup->bind_type;
+
+ *ret_bind_errno =
+ kernel_bind(*sock, (struct sockaddr *)&addr, sizeof(addr));
+}
diff --git a/net/mctp/test/utils.h b/net/mctp/test/utils.h
index df6aa1c03440..06bdb6cb5eff 100644
--- a/net/mctp/test/utils.h
+++ b/net/mctp/test/utils.h
@@ -3,6 +3,11 @@
#ifndef __NET_MCTP_TEST_UTILS_H
#define __NET_MCTP_TEST_UTILS_H
+#include <uapi/linux/netdevice.h>
+
+#include <net/mctp.h>
+#include <net/mctpdevice.h>
+
#include <kunit/test.h>
#define MCTP_DEV_TEST_MTU 68
@@ -10,11 +15,67 @@
struct mctp_test_dev {
struct net_device *ndev;
struct mctp_dev *mdev;
+
+ unsigned short lladdr_len;
+ unsigned char lladdr[MAX_ADDR_LEN];
};
struct mctp_test_dev;
+struct mctp_test_route {
+ struct mctp_route rt;
+};
+
+struct mctp_test_pktqueue {
+ unsigned int magic;
+ struct sk_buff_head pkts;
+};
+
+struct mctp_test_bind_setup {
+ mctp_eid_t bind_addr;
+ int bind_net;
+ u8 bind_type;
+
+ bool have_peer;
+ mctp_eid_t peer_addr;
+ int peer_net;
+
+ /* optional name. Used for comparison in "lookup" tests */
+ const char *name;
+};
+
struct mctp_test_dev *mctp_test_create_dev(void);
+struct mctp_test_dev *mctp_test_create_dev_lladdr(unsigned short lladdr_len,
+ const unsigned char *lladdr);
void mctp_test_destroy_dev(struct mctp_test_dev *dev);
+struct mctp_test_route *mctp_test_create_route_direct(struct net *net,
+ struct mctp_dev *dev,
+ mctp_eid_t eid,
+ unsigned int mtu);
+struct mctp_test_route *mctp_test_create_route_gw(struct net *net,
+ unsigned int netid,
+ mctp_eid_t eid,
+ mctp_eid_t gw,
+ unsigned int mtu);
+void mctp_test_dst_setup(struct kunit *test, struct mctp_dst *dst,
+ struct mctp_test_dev *dev,
+ struct mctp_test_pktqueue *tpq, unsigned int mtu);
+void mctp_test_dst_release(struct mctp_dst *dst,
+ struct mctp_test_pktqueue *tpq);
+void mctp_test_pktqueue_init(struct mctp_test_pktqueue *tpq);
+void mctp_test_route_destroy(struct kunit *test, struct mctp_test_route *rt);
+void mctp_test_skb_set_dev(struct sk_buff *skb, struct mctp_test_dev *dev);
+struct sk_buff *mctp_test_create_skb(const struct mctp_hdr *hdr,
+ unsigned int data_len);
+struct sk_buff *__mctp_test_create_skb_data(const struct mctp_hdr *hdr,
+ const void *data, size_t data_len);
+
+#define mctp_test_create_skb_data(h, d) \
+ __mctp_test_create_skb_data(h, d, sizeof(*d))
+
+void mctp_test_bind_run(struct kunit *test,
+ const struct mctp_test_bind_setup *setup,
+ int *ret_bind_errno, struct socket **sock);
+
#endif /* __NET_MCTP_TEST_UTILS_H */
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index d536c97144e9..25c88cba5c48 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -81,8 +81,8 @@ static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned index)
if (index < net->mpls.platform_labels) {
struct mpls_route __rcu **platform_label =
- rcu_dereference(net->mpls.platform_label);
- rt = rcu_dereference(platform_label[index]);
+ rcu_dereference_rtnl(net->mpls.platform_label);
+ rt = rcu_dereference_rtnl(platform_label[index]);
}
return rt;
}
@@ -706,7 +706,7 @@ static int mpls_nh_assign_dev(struct net *net, struct mpls_route *rt,
} else {
unsigned int flags;
- flags = dev_get_flags(dev);
+ flags = netif_get_flags(dev);
if (!(flags & (IFF_RUNNING | IFF_LOWER_UP)))
nh->nh_flags |= RTNH_F_LINKDOWN;
}
@@ -1616,14 +1616,14 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
return notifier_from_errno(err);
break;
case NETDEV_UP:
- flags = dev_get_flags(dev);
+ flags = netif_get_flags(dev);
if (flags & (IFF_RUNNING | IFF_LOWER_UP))
mpls_ifup(dev, RTNH_F_DEAD | RTNH_F_LINKDOWN);
else
mpls_ifup(dev, RTNH_F_DEAD);
break;
case NETDEV_CHANGE:
- flags = dev_get_flags(dev);
+ flags = netif_get_flags(dev);
if (flags & (IFF_RUNNING | IFF_LOWER_UP)) {
mpls_ifup(dev, RTNH_F_DEAD | RTNH_F_LINKDOWN);
} else {
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
index d9290c5bb6c7..fed40dae5583 100644
--- a/net/mptcp/ctrl.c
+++ b/net/mptcp/ctrl.c
@@ -533,9 +533,9 @@ void mptcp_active_detect_blackhole(struct sock *ssk, bool expired)
to_max = mptcp_get_pernet(net)->syn_retrans_before_tcp_fallback;
if (timeouts == to_max || (timeouts < to_max && expired)) {
- MPTCP_INC_STATS(net, MPTCP_MIB_MPCAPABLEACTIVEDROP);
subflow->mpc_drop = 1;
- mptcp_subflow_early_fallback(mptcp_sk(subflow->conn), subflow);
+ mptcp_early_fallback(mptcp_sk(subflow->conn), subflow,
+ MPTCP_MIB_MPCAPABLEACTIVEDROP);
}
}
diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index 0c24545f0e8d..cf879c188ca2 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -80,6 +80,11 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("RcvWndConflict", MPTCP_MIB_RCVWNDCONFLICT),
SNMP_MIB_ITEM("MPCurrEstab", MPTCP_MIB_CURRESTAB),
SNMP_MIB_ITEM("Blackhole", MPTCP_MIB_BLACKHOLE),
+ SNMP_MIB_ITEM("MPCapableDataFallback", MPTCP_MIB_MPCAPABLEDATAFALLBACK),
+ SNMP_MIB_ITEM("MD5SigFallback", MPTCP_MIB_MD5SIGFALLBACK),
+ SNMP_MIB_ITEM("DssFallback", MPTCP_MIB_DSSFALLBACK),
+ SNMP_MIB_ITEM("SimultConnectFallback", MPTCP_MIB_SIMULTCONNFALLBACK),
+ SNMP_MIB_ITEM("FallbackFailed", MPTCP_MIB_FALLBACKFAILED),
SNMP_MIB_SENTINEL
};
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index 250c6b77977e..309bac6fea32 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -81,6 +81,13 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_RCVWNDCONFLICT, /* Conflict with while updating msk rcv wnd */
MPTCP_MIB_CURRESTAB, /* Current established MPTCP connections */
MPTCP_MIB_BLACKHOLE, /* A blackhole has been detected */
+ MPTCP_MIB_MPCAPABLEDATAFALLBACK, /* Missing DSS/MPC+data on first
+ * established packet
+ */
+ MPTCP_MIB_MD5SIGFALLBACK, /* Conflicting TCP option enabled */
+ MPTCP_MIB_DSSFALLBACK, /* Bad or missing DSS */
+ MPTCP_MIB_SIMULTCONNFALLBACK, /* Simultaneous connect */
+ MPTCP_MIB_FALLBACKFAILED, /* Can't fallback due to msk status */
__MPTCP_MIB_MAX
};
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 421ced031289..70c0ab0ecf90 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -978,8 +978,10 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
if (subflow->mp_join)
goto reset;
subflow->mp_capable = 0;
- pr_fallback(msk);
- mptcp_do_fallback(ssk);
+ if (!mptcp_try_fallback(ssk, MPTCP_MIB_MPCAPABLEDATAFALLBACK)) {
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_FALLBACKFAILED);
+ goto reset;
+ }
return false;
}
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index feb01747d7d8..420d416e2603 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -765,8 +765,14 @@ void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
pr_debug("fail_seq=%llu\n", fail_seq);
- if (!READ_ONCE(msk->allow_infinite_fallback))
+ /* After accepting the fail, we can't create any other subflows */
+ spin_lock_bh(&msk->fallback_lock);
+ if (!msk->allow_infinite_fallback) {
+ spin_unlock_bh(&msk->fallback_lock);
return;
+ }
+ msk->allow_subflows = false;
+ spin_unlock_bh(&msk->fallback_lock);
if (!subflow->fail_tout) {
pr_debug("send MP_FAIL response and infinite map\n");
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index edf14c2c2062..9a287b75c1b3 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -11,6 +11,7 @@
#include <linux/netdevice.h>
#include <linux/sched/signal.h>
#include <linux/atomic.h>
+#include <net/aligned_data.h>
#include <net/sock.h>
#include <net/inet_common.h>
#include <net/inet_hashtables.h>
@@ -67,6 +68,26 @@ static const struct proto_ops *mptcp_fallback_tcp_ops(const struct sock *sk)
return &inet_stream_ops;
}
+bool __mptcp_try_fallback(struct mptcp_sock *msk, int fb_mib)
+{
+ struct net *net = sock_net((struct sock *)msk);
+
+ if (__mptcp_check_fallback(msk))
+ return true;
+
+ spin_lock_bh(&msk->fallback_lock);
+ if (!msk->allow_infinite_fallback) {
+ spin_unlock_bh(&msk->fallback_lock);
+ return false;
+ }
+
+ msk->allow_subflows = false;
+ set_bit(MPTCP_FALLBACK_DONE, &msk->flags);
+ __MPTCP_INC_STATS(net, fb_mib);
+ spin_unlock_bh(&msk->fallback_lock);
+ return true;
+}
+
static int __mptcp_socket_create(struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow;
@@ -560,11 +581,7 @@ static bool mptcp_check_data_fin(struct sock *sk)
static void mptcp_dss_corruption(struct mptcp_sock *msk, struct sock *ssk)
{
- if (READ_ONCE(msk->allow_infinite_fallback)) {
- MPTCP_INC_STATS(sock_net(ssk),
- MPTCP_MIB_DSSCORRUPTIONFALLBACK);
- mptcp_do_fallback(ssk);
- } else {
+ if (!mptcp_try_fallback(ssk, MPTCP_MIB_DSSCORRUPTIONFALLBACK)) {
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSCORRUPTIONRESET);
mptcp_subflow_reset(ssk);
}
@@ -792,7 +809,7 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
static void mptcp_subflow_joined(struct mptcp_sock *msk, struct sock *ssk)
{
mptcp_subflow_ctx(ssk)->map_seq = READ_ONCE(msk->ack_seq);
- WRITE_ONCE(msk->allow_infinite_fallback, false);
+ msk->allow_infinite_fallback = false;
mptcp_event(MPTCP_EVENT_SUB_ESTABLISHED, msk, ssk, GFP_ATOMIC);
}
@@ -803,6 +820,14 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
if (sk->sk_state != TCP_ESTABLISHED)
return false;
+ spin_lock_bh(&msk->fallback_lock);
+ if (!msk->allow_subflows) {
+ spin_unlock_bh(&msk->fallback_lock);
+ return false;
+ }
+ mptcp_subflow_joined(msk, ssk);
+ spin_unlock_bh(&msk->fallback_lock);
+
/* attach to msk socket only after we are sure we will deal with it
* at close time
*/
@@ -811,7 +836,6 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
mptcp_subflow_ctx(ssk)->subflow_id = msk->subflow_id++;
mptcp_sockopt_sync_locked(msk, ssk);
- mptcp_subflow_joined(msk, ssk);
mptcp_stop_tout_timer(sk);
__mptcp_propagate_sndbuf(sk, ssk);
return true;
@@ -1136,10 +1160,13 @@ static void mptcp_update_infinite_map(struct mptcp_sock *msk,
mpext->infinite_map = 1;
mpext->data_len = 0;
- MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPTX);
+ if (!mptcp_try_fallback(ssk, MPTCP_MIB_INFINITEMAPTX)) {
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_FALLBACKFAILED);
+ mptcp_subflow_reset(ssk);
+ return;
+ }
+
mptcp_subflow_ctx(ssk)->send_infinite_map = 0;
- pr_fallback(msk);
- mptcp_do_fallback(ssk);
}
#define MPTCP_MAX_GSO_SIZE (GSO_LEGACY_MAX_SIZE - (MAX_TCP_HEADER + 1))
@@ -1376,7 +1403,7 @@ struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
* - estimate the faster flow linger time
* - use the above to estimate the amount of byte transferred
* by the faster flow
- * - check that the amount of queued data is greter than the above,
+ * - check that the amount of queued data is greater than the above,
* otherwise do not use the picked, slower, subflow
* We select the subflow with the shorter estimated time to flush
* the queued mem, which basically ensure the above. We just need
@@ -2543,9 +2570,9 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
static void __mptcp_retrans(struct sock *sk)
{
+ struct mptcp_sendmsg_info info = { .data_lock_held = true, };
struct mptcp_sock *msk = mptcp_sk(sk);
struct mptcp_subflow_context *subflow;
- struct mptcp_sendmsg_info info = {};
struct mptcp_data_frag *dfrag;
struct sock *ssk;
int ret, err;
@@ -2590,6 +2617,18 @@ static void __mptcp_retrans(struct sock *sk)
info.sent = 0;
info.limit = READ_ONCE(msk->csum_enabled) ? dfrag->data_len :
dfrag->already_sent;
+
+ /*
+ * make the whole retrans decision, xmit, disallow
+ * fallback atomic
+ */
+ spin_lock_bh(&msk->fallback_lock);
+ if (__mptcp_check_fallback(msk)) {
+ spin_unlock_bh(&msk->fallback_lock);
+ release_sock(ssk);
+ return;
+ }
+
while (info.sent < info.limit) {
ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
if (ret <= 0)
@@ -2603,8 +2642,9 @@ static void __mptcp_retrans(struct sock *sk)
len = max(copied, len);
tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
info.size_goal);
- WRITE_ONCE(msk->allow_infinite_fallback, false);
+ msk->allow_infinite_fallback = false;
}
+ spin_unlock_bh(&msk->fallback_lock);
release_sock(ssk);
}
@@ -2730,7 +2770,8 @@ static void __mptcp_init_sock(struct sock *sk)
WRITE_ONCE(msk->first, NULL);
inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;
WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
- WRITE_ONCE(msk->allow_infinite_fallback, true);
+ msk->allow_infinite_fallback = true;
+ msk->allow_subflows = true;
msk->recovery = false;
msk->subflow_id = 1;
msk->last_data_sent = tcp_jiffies32;
@@ -2738,6 +2779,7 @@ static void __mptcp_init_sock(struct sock *sk)
msk->last_ack_recv = tcp_jiffies32;
mptcp_pm_data_init(msk);
+ spin_lock_init(&msk->fallback_lock);
/* re-use the csk retrans timer for MPTCP-level retrans */
timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0);
@@ -3117,7 +3159,16 @@ static int mptcp_disconnect(struct sock *sk, int flags)
* subflow
*/
mptcp_destroy_common(msk, MPTCP_CF_FASTCLOSE);
+
+ /* The first subflow is already in TCP_CLOSE status, the following
+ * can't overlap with a fallback anymore
+ */
+ spin_lock_bh(&msk->fallback_lock);
+ msk->allow_subflows = true;
+ msk->allow_infinite_fallback = true;
WRITE_ONCE(msk->flags, 0);
+ spin_unlock_bh(&msk->fallback_lock);
+
msk->cb_flags = 0;
msk->recovery = false;
WRITE_ONCE(msk->can_ack, false);
@@ -3503,7 +3554,7 @@ void mptcp_sock_graft(struct sock *sk, struct socket *parent)
write_lock_bh(&sk->sk_callback_lock);
rcu_assign_pointer(sk->sk_wq, &parent->wq);
sk_set_socket(sk, parent);
- sk->sk_uid = SOCK_INODE(parent)->i_uid;
+ WRITE_ONCE(sk->sk_uid, SOCK_INODE(parent)->i_uid);
write_unlock_bh(&sk->sk_callback_lock);
}
@@ -3524,7 +3575,13 @@ bool mptcp_finish_join(struct sock *ssk)
/* active subflow, already present inside the conn_list */
if (!list_empty(&subflow->node)) {
+ spin_lock_bh(&msk->fallback_lock);
+ if (!msk->allow_subflows) {
+ spin_unlock_bh(&msk->fallback_lock);
+ return false;
+ }
mptcp_subflow_joined(msk, ssk);
+ spin_unlock_bh(&msk->fallback_lock);
mptcp_propagate_sndbuf(parent, ssk);
return true;
}
@@ -3648,16 +3705,15 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
* TCP option space.
*/
if (rcu_access_pointer(tcp_sk(ssk)->md5sig_info))
- mptcp_subflow_early_fallback(msk, subflow);
+ mptcp_early_fallback(msk, subflow, MPTCP_MIB_MD5SIGFALLBACK);
#endif
if (subflow->request_mptcp) {
- if (mptcp_active_should_disable(sk)) {
- MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEACTIVEDISABLED);
- mptcp_subflow_early_fallback(msk, subflow);
- } else if (mptcp_token_new_connect(ssk) < 0) {
- MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_TOKENFALLBACKINIT);
- mptcp_subflow_early_fallback(msk, subflow);
- }
+ if (mptcp_active_should_disable(sk))
+ mptcp_early_fallback(msk, subflow,
+ MPTCP_MIB_MPCAPABLEACTIVEDISABLED);
+ else if (mptcp_token_new_connect(ssk) < 0)
+ mptcp_early_fallback(msk, subflow,
+ MPTCP_MIB_TOKENFALLBACKINIT);
}
WRITE_ONCE(msk->write_seq, subflow->idsn);
@@ -3729,7 +3785,7 @@ static struct proto mptcp_prot = {
.stream_memory_free = mptcp_stream_memory_free,
.sockets_allocated = &mptcp_sockets_allocated,
- .memory_allocated = &tcp_memory_allocated,
+ .memory_allocated = &net_aligned_data.tcp_memory_allocated,
.per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc,
.memory_pressure = &tcp_memory_pressure,
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 3dd11dd3ba16..b15d7fab5c4b 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -326,6 +326,7 @@ struct mptcp_sock {
int keepalive_cnt;
int keepalive_idle;
int keepalive_intvl;
+ int maxseg;
struct work_struct work;
struct sk_buff *ooo_last_skb;
struct rb_root out_of_order_queue;
@@ -346,10 +347,16 @@ struct mptcp_sock {
u64 rtt_us; /* last maximum rtt of subflows */
} rcvq_space;
u8 scaling_ratio;
+ bool allow_subflows;
u32 subflow_id;
u32 setsockopt_seq;
char ca_name[TCP_CA_NAME_MAX];
+
+ spinlock_t fallback_lock; /* protects fallback,
+ * allow_infinite_fallback and
+ * allow_join
+ */
};
#define mptcp_data_lock(sk) spin_lock_bh(&(sk)->sk_lock.slock)
@@ -1216,17 +1223,6 @@ static inline bool mptcp_check_fallback(const struct sock *sk)
return __mptcp_check_fallback(msk);
}
-static inline void __mptcp_do_fallback(struct mptcp_sock *msk)
-{
- if (__mptcp_check_fallback(msk)) {
- pr_debug("TCP fallback already done (msk=%p)\n", msk);
- return;
- }
- if (WARN_ON_ONCE(!READ_ONCE(msk->allow_infinite_fallback)))
- return;
- set_bit(MPTCP_FALLBACK_DONE, &msk->flags);
-}
-
static inline bool __mptcp_has_initial_subflow(const struct mptcp_sock *msk)
{
struct sock *ssk = READ_ONCE(msk->first);
@@ -1236,14 +1232,17 @@ static inline bool __mptcp_has_initial_subflow(const struct mptcp_sock *msk)
TCPF_SYN_RECV | TCPF_LISTEN));
}
-static inline void mptcp_do_fallback(struct sock *ssk)
+bool __mptcp_try_fallback(struct mptcp_sock *msk, int fb_mib);
+
+static inline bool mptcp_try_fallback(struct sock *ssk, int fb_mib)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
struct sock *sk = subflow->conn;
struct mptcp_sock *msk;
msk = mptcp_sk(sk);
- __mptcp_do_fallback(msk);
+ if (!__mptcp_try_fallback(msk, fb_mib))
+ return false;
if (READ_ONCE(msk->snd_data_fin_enable) && !(ssk->sk_shutdown & SEND_SHUTDOWN)) {
gfp_t saved_allocation = ssk->sk_allocation;
@@ -1255,16 +1254,15 @@ static inline void mptcp_do_fallback(struct sock *ssk)
tcp_shutdown(ssk, SEND_SHUTDOWN);
ssk->sk_allocation = saved_allocation;
}
+ return true;
}
-#define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)\n", __func__, a)
-
-static inline void mptcp_subflow_early_fallback(struct mptcp_sock *msk,
- struct mptcp_subflow_context *subflow)
+static inline void mptcp_early_fallback(struct mptcp_sock *msk,
+ struct mptcp_subflow_context *subflow,
+ int fb_mib)
{
- pr_fallback(msk);
subflow->request_mptcp = 0;
- __mptcp_do_fallback(msk);
+ WARN_ON_ONCE(!__mptcp_try_fallback(msk, fb_mib));
}
static inline bool mptcp_check_infinite_map(struct sk_buff *skb)
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 3caa0a9d3b38..2c267aff95be 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -798,6 +798,23 @@ unlock:
return ret;
}
+static int mptcp_setsockopt_all_sf(struct mptcp_sock *msk, int level,
+ int optname, sockptr_t optval,
+ unsigned int optlen)
+{
+ struct mptcp_subflow_context *subflow;
+ int ret = 0;
+
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+
+ ret = tcp_setsockopt(ssk, level, optname, optval, optlen);
+ if (ret)
+ break;
+ }
+ return ret;
+}
+
static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
sockptr_t optval, unsigned int optlen)
{
@@ -859,6 +876,11 @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
&msk->keepalive_cnt,
val);
break;
+ case TCP_MAXSEG:
+ msk->maxseg = val;
+ ret = mptcp_setsockopt_all_sf(msk, SOL_TCP, optname, optval,
+ optlen);
+ break;
default:
ret = -ENOPROTOOPT;
}
@@ -914,10 +936,8 @@ static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int
lock_sock(sk);
ssk = msk->first;
- if (ssk) {
- ret = tcp_getsockopt(ssk, level, optname, optval, optlen);
- goto out;
- }
+ if (ssk)
+ goto get;
ssk = __mptcp_nmpc_sk(msk);
if (IS_ERR(ssk)) {
@@ -925,6 +945,7 @@ static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int
goto out;
}
+get:
ret = tcp_getsockopt(ssk, level, optname, optval, optlen);
out:
@@ -1407,6 +1428,9 @@ static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
return mptcp_put_int_option(msk, optval, optlen, msk->notsent_lowat);
case TCP_IS_MPTCP:
return mptcp_put_int_option(msk, optval, optlen, 1);
+ case TCP_MAXSEG:
+ return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname,
+ optval, optlen);
}
return -EOPNOTSUPP;
}
@@ -1553,6 +1577,7 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
tcp_sock_set_keepidle_locked(ssk, msk->keepalive_idle);
tcp_sock_set_keepintvl(ssk, msk->keepalive_intvl);
tcp_sock_set_keepcnt(ssk, msk->keepalive_cnt);
+ tcp_sock_set_maxseg(ssk, msk->maxseg);
inet_assign_bit(TRANSPARENT, ssk, inet_test_bit(TRANSPARENT, sk));
inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk));
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 15613d691bfe..3f1b62a9fe88 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -544,10 +544,13 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
mptcp_get_options(skb, &mp_opt);
if (subflow->request_mptcp) {
if (!(mp_opt.suboptions & OPTION_MPTCP_MPC_SYNACK)) {
- MPTCP_INC_STATS(sock_net(sk),
- MPTCP_MIB_MPCAPABLEACTIVEFALLBACK);
- mptcp_do_fallback(sk);
- pr_fallback(msk);
+ if (!mptcp_try_fallback(sk,
+ MPTCP_MIB_MPCAPABLEACTIVEFALLBACK)) {
+ MPTCP_INC_STATS(sock_net(sk),
+ MPTCP_MIB_FALLBACKFAILED);
+ goto do_reset;
+ }
+
goto fallback;
}
@@ -1300,20 +1303,29 @@ static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ss
mptcp_schedule_work(sk);
}
-static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk)
+static bool mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
unsigned long fail_tout;
+ /* we are really failing, prevent any later subflow join */
+ spin_lock_bh(&msk->fallback_lock);
+ if (!msk->allow_infinite_fallback) {
+ spin_unlock_bh(&msk->fallback_lock);
+ return false;
+ }
+ msk->allow_subflows = false;
+ spin_unlock_bh(&msk->fallback_lock);
+
/* graceful failure can happen only on the MPC subflow */
if (WARN_ON_ONCE(ssk != READ_ONCE(msk->first)))
- return;
+ return false;
/* since the close timeout take precedence on the fail one,
* no need to start the latter when the first is already set
*/
if (sock_flag((struct sock *)msk, SOCK_DEAD))
- return;
+ return true;
/* we don't need extreme accuracy here, use a zero fail_tout as special
* value meaning no fail timeout at all;
@@ -1325,6 +1337,7 @@ static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk)
tcp_send_ack(ssk);
mptcp_reset_tout_timer(msk, subflow->fail_tout);
+ return true;
}
static bool subflow_check_data_avail(struct sock *ssk)
@@ -1385,17 +1398,16 @@ fallback:
(subflow->mp_join || subflow->valid_csum_seen)) {
subflow->send_mp_fail = 1;
- if (!READ_ONCE(msk->allow_infinite_fallback)) {
+ if (!mptcp_subflow_fail(msk, ssk)) {
subflow->reset_transient = 0;
subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
goto reset;
}
- mptcp_subflow_fail(msk, ssk);
WRITE_ONCE(subflow->data_avail, true);
return true;
}
- if (!READ_ONCE(msk->allow_infinite_fallback)) {
+ if (!mptcp_try_fallback(ssk, MPTCP_MIB_DSSFALLBACK)) {
/* fatal protocol error, close the socket.
* subflow_error_report() will introduce the appropriate barriers
*/
@@ -1413,8 +1425,6 @@ reset:
WRITE_ONCE(subflow->data_avail, false);
return false;
}
-
- mptcp_do_fallback(ssk);
}
skb = skb_peek(&ssk->sk_receive_queue);
@@ -1679,7 +1689,6 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_pm_local *local,
/* discard the subflow socket */
mptcp_sock_graft(ssk, sk->sk_socket);
iput(SOCK_INODE(sf));
- WRITE_ONCE(msk->allow_infinite_fallback, false);
mptcp_stop_tout_timer(sk);
return 0;
@@ -1845,14 +1854,11 @@ static void subflow_state_change(struct sock *sk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct sock *parent = subflow->conn;
- struct mptcp_sock *msk;
__subflow_state_change(sk);
- msk = mptcp_sk(parent);
if (subflow_simultaneous_connect(sk)) {
- mptcp_do_fallback(sk);
- pr_fallback(msk);
+ WARN_ON_ONCE(!mptcp_try_fallback(sk, MPTCP_MIB_SIMULTCONNFALLBACK));
subflow->conn_finished = 1;
mptcp_propagate_state(parent, sk, subflow, NULL);
}
diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index e76c6de0c784..adee6dcabdc3 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -110,7 +110,7 @@ struct ncsi_channel_version {
u8 update; /* NCSI version update */
char alpha1; /* NCSI version alpha1 */
char alpha2; /* NCSI version alpha2 */
- u8 fw_name[12]; /* Firmware name string */
+ u8 fw_name[12 + 1]; /* Firmware name string */
u32 fw_version; /* Firmware version */
u16 pci_ids[4]; /* PCI identification */
u32 mf_id; /* Manufacture ID */
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index 472cc68ad86f..271ec6c3929e 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -775,6 +775,7 @@ static int ncsi_rsp_handler_gvi(struct ncsi_request *nr)
ncv->alpha1 = rsp->alpha1;
ncv->alpha2 = rsp->alpha2;
memcpy(ncv->fw_name, rsp->fw_name, 12);
+ ncv->fw_name[12] = '\0';
ncv->fw_version = ntohl(rsp->fw_version);
for (i = 0; i < ARRAY_SIZE(ncv->pci_ids); i++)
ncv->pci_ids[i] = ntohs(rsp->pci_ids[i]);
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 2560416218d0..6cdc994fdc8a 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -195,16 +195,6 @@ config NF_CONNTRACK_LABELS
config NF_CONNTRACK_OVS
bool
-config NF_CT_PROTO_DCCP
- bool 'DCCP protocol connection tracking support'
- depends on NETFILTER_ADVANCED
- default y
- help
- With this option enabled, the layer 3 independent connection
- tracking code will be able to do state tracking on DCCP connections.
-
- If unsure, say Y.
-
config NF_CT_PROTO_GRE
bool
@@ -516,6 +506,12 @@ config NFT_CT
This option adds the "ct" expression that you can use to match
connection tracking information such as the flow state.
+config NFT_EXTHDR_DCCP
+ bool "Netfilter nf_tables exthdr DCCP support (DEPRECATED)"
+ default n
+ help
+ This option adds support for matching on DCCP extension headers.
+
config NFT_FLOW_OFFLOAD
depends on NF_CONNTRACK && NF_FLOW_TABLE
tristate "Netfilter nf_tables hardware flow offload module"
@@ -762,6 +758,16 @@ config NETFILTER_XTABLES_COMPAT
If unsure, say N.
+config NETFILTER_XTABLES_LEGACY
+ bool "Netfilter legacy tables support"
+ depends on !PREEMPT_RT
+ help
+ Say Y here if you still require support for legacy tables. This is
+ required by the legacy tools (iptables-legacy) and is not needed if
+ you use iptables over nftables (iptables-nft).
+ Legacy support is not limited to IP, it also includes EBTABLES and
+ ARPTABLES.
+
comment "Xtables combined modules"
config NETFILTER_XT_MARK
@@ -1278,9 +1284,9 @@ config NETFILTER_XT_MATCH_CPU
To compile it as a module, choose M here. If unsure, say N.
config NETFILTER_XT_MATCH_DCCP
- tristate '"dccp" protocol match support'
+ tristate '"dccp" protocol match support (DEPRECATED)'
depends on NETFILTER_ADVANCED
- default IP_DCCP
+ default n
help
With this option enabled, you will be able to use the iptables
`dccp' match in order to match on DCCP source/destination ports
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index f0aa4d7ef499..e43e20f529f8 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -12,7 +12,6 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_OVS) += nf_conntrack_ovs.o
-nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o
nf_conntrack-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o
nf_conntrack-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o
ifeq ($(CONFIG_NF_CONNTRACK),m)
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 44b2ad695c15..965f3c8e5089 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -926,7 +926,7 @@ static void ip_vs_conn_expire(struct timer_list *t)
void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
{
/* Using mod_timer_pending will ensure the timer is not
- * modified after the final del_timer in ip_vs_conn_expire.
+ * modified after the final timer_delete in ip_vs_conn_expire.
*/
if (timer_pending(&cp->timer) &&
time_after(cp->timer.expires, jiffies))
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 014f07740369..95af252b2939 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -97,7 +97,7 @@ __ip_vs_dst_check(struct ip_vs_dest *dest)
if (!dest_dst)
return NULL;
dst = dest_dst->dst_cache;
- if (dst->obsolete &&
+ if (READ_ONCE(dst->obsolete) &&
dst->ops->check(dst, dest_dst->dst_cookie) == NULL)
return NULL;
return dest_dst;
diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c
index 06b084844700..3e4fb9ddcd36 100644
--- a/net/netfilter/nf_bpf_link.c
+++ b/net/netfilter/nf_bpf_link.c
@@ -17,7 +17,7 @@ static unsigned int nf_hook_run_bpf(void *bpf_prog, struct sk_buff *skb,
.skb = skb,
};
- return bpf_prog_run(prog, &ctx);
+ return bpf_prog_run_pin_on_cpu(prog, &ctx);
}
struct bpf_nf_link {
@@ -225,7 +225,8 @@ int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
if (!link)
return -ENOMEM;
- bpf_link_init(&link->link, BPF_LINK_TYPE_NETFILTER, &bpf_nf_link_lops, prog);
+ bpf_link_init(&link->link, BPF_LINK_TYPE_NETFILTER, &bpf_nf_link_lops, prog,
+ attr->link_create.attach_type);
link->hook_ops.hook = nf_hook_run_bpf;
link->hook_ops.hook_ops_type = NF_HOOK_OP_BPF;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 201d3c4ec623..344f88295976 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -136,8 +136,8 @@ static void nf_conntrack_double_unlock(unsigned int h1, unsigned int h2)
}
/* return true if we need to recompute hashes (in case hash table was resized) */
-static bool nf_conntrack_double_lock(struct net *net, unsigned int h1,
- unsigned int h2, unsigned int sequence)
+static bool nf_conntrack_double_lock(unsigned int h1, unsigned int h2,
+ unsigned int sequence)
{
h1 %= CONNTRACK_LOCKS;
h2 %= CONNTRACK_LOCKS;
@@ -329,9 +329,6 @@ nf_ct_get_tuple(const struct sk_buff *skb,
#ifdef CONFIG_NF_CT_PROTO_SCTP
case IPPROTO_SCTP:
#endif
-#ifdef CONFIG_NF_CT_PROTO_DCCP
- case IPPROTO_DCCP:
-#endif
/* fallthrough */
return nf_ct_get_tuple_ports(skb, dataoff, tuple);
default:
@@ -616,7 +613,7 @@ static void __nf_ct_delete_from_lists(struct nf_conn *ct)
reply_hash = hash_conntrack(net,
&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY));
- } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
+ } while (nf_conntrack_double_lock(hash, reply_hash, sequence));
clean_from_lists(ct);
nf_conntrack_double_unlock(hash, reply_hash);
@@ -893,7 +890,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
reply_hash = hash_conntrack(net,
&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY));
- } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
+ } while (nf_conntrack_double_lock(hash, reply_hash, sequence));
max_chainlen = MIN_CHAINLEN + get_random_u32_below(MAX_CHAINLEN);
@@ -1124,6 +1121,12 @@ static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx)
hlist_nulls_add_head_rcu(&loser_ct->tuplehash[IP_CT_DIR_REPLY].hnnode,
&nf_conntrack_hash[repl_idx]);
+ /* confirmed bit must be set after hlist add, not before:
+ * loser_ct can still be visible to other cpu due to
+ * SLAB_TYPESAFE_BY_RCU.
+ */
+ smp_mb__before_atomic();
+ set_bit(IPS_CONFIRMED_BIT, &loser_ct->status);
NF_CT_STAT_INC(net, clash_resolve);
return NF_ACCEPT;
@@ -1231,7 +1234,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
reply_hash = hash_conntrack(net,
&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY));
- } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
+ } while (nf_conntrack_double_lock(hash, reply_hash, sequence));
/* We're not in hash table, and we refuse to set up related
* connections for unconfirmed conns. But packet copies and
@@ -1260,8 +1263,6 @@ __nf_conntrack_confirm(struct sk_buff *skb)
* user context, else we insert an already 'dead' hash, blocking
* further use of that particular connection -JM.
*/
- ct->status |= IPS_CONFIRMED;
-
if (unlikely(nf_ct_is_dying(ct))) {
NF_CT_STAT_INC(net, insert_failed);
goto dying;
@@ -1293,7 +1294,7 @@ chaintoolong:
}
}
- /* Timer relative to confirmation time, not original
+ /* Timeout is relative to confirmation time, not original
setting time, otherwise we'd get timer wrap in
weird delay cases. */
ct->timeout += nfct_time_stamp;
@@ -1301,11 +1302,21 @@ chaintoolong:
__nf_conntrack_insert_prepare(ct);
/* Since the lookup is lockless, hash insertion must be done after
- * starting the timer and setting the CONFIRMED bit. The RCU barriers
- * guarantee that no other CPU can find the conntrack before the above
- * stores are visible.
+ * setting ct->timeout. The RCU barriers guarantee that no other CPU
+ * can find the conntrack before the above stores are visible.
*/
__nf_conntrack_hash_insert(ct, hash, reply_hash);
+
+ /* IPS_CONFIRMED unset means 'ct not (yet) in hash', conntrack lookups
+ * skip entries that lack this bit. This happens when a CPU is looking
+ * at a stale entry that is being recycled due to SLAB_TYPESAFE_BY_RCU
+ * or when another CPU encounters this entry right after the insertion
+ * but before the set-confirm-bit below. This bit must not be set until
+ * after __nf_conntrack_hash_insert().
+ */
+ smp_mb__before_atomic();
+ set_bit(IPS_CONFIRMED_BIT, &ct->status);
+
nf_conntrack_double_unlock(hash, reply_hash);
local_bh_enable();
@@ -1662,7 +1673,11 @@ __nf_conntrack_alloc(struct net *net,
if (!conntrack_gc_work.early_drop)
conntrack_gc_work.early_drop = true;
atomic_dec(&cnet->count);
- net_warn_ratelimited("nf_conntrack: table full, dropping packet\n");
+ if (net == &init_net)
+ net_warn_ratelimited("nf_conntrack: table full, dropping packet\n");
+ else
+ net_warn_ratelimited("nf_conntrack: table full in netns %u, dropping packet\n",
+ net->ns.inum);
return ERR_PTR(-ENOMEM);
}
}
@@ -1982,11 +1997,6 @@ static int nf_conntrack_handle_packet(struct nf_conn *ct,
return nf_conntrack_sctp_packet(ct, skb, dataoff,
ctinfo, state);
#endif
-#ifdef CONFIG_NF_CT_PROTO_DCCP
- case IPPROTO_DCCP:
- return nf_conntrack_dccp_packet(ct, skb, dataoff,
- ctinfo, state);
-#endif
#ifdef CONFIG_NF_CT_PROTO_GRE
case IPPROTO_GRE:
return nf_conntrack_gre_packet(ct, skb, dataoff,
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 2cc0fde23344..486d52b45fe5 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2036,7 +2036,6 @@ static void ctnetlink_change_mark(struct nf_conn *ct,
static const struct nla_policy protoinfo_policy[CTA_PROTOINFO_MAX+1] = {
[CTA_PROTOINFO_TCP] = { .type = NLA_NESTED },
- [CTA_PROTOINFO_DCCP] = { .type = NLA_NESTED },
[CTA_PROTOINFO_SCTP] = { .type = NLA_NESTED },
};
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index f36727ed91e1..bc1d96686b9c 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -100,9 +100,6 @@ const struct nf_conntrack_l4proto *nf_ct_l4proto_find(u8 l4proto)
case IPPROTO_UDP: return &nf_conntrack_l4proto_udp;
case IPPROTO_TCP: return &nf_conntrack_l4proto_tcp;
case IPPROTO_ICMP: return &nf_conntrack_l4proto_icmp;
-#ifdef CONFIG_NF_CT_PROTO_DCCP
- case IPPROTO_DCCP: return &nf_conntrack_l4proto_dccp;
-#endif
#ifdef CONFIG_NF_CT_PROTO_SCTP
case IPPROTO_SCTP: return &nf_conntrack_l4proto_sctp;
#endif
@@ -681,9 +678,6 @@ void nf_conntrack_proto_pernet_init(struct net *net)
#if IS_ENABLED(CONFIG_IPV6)
nf_conntrack_icmpv6_init_net(net);
#endif
-#ifdef CONFIG_NF_CT_PROTO_DCCP
- nf_conntrack_dccp_init_net(net);
-#endif
#ifdef CONFIG_NF_CT_PROTO_SCTP
nf_conntrack_sctp_init_net(net);
#endif
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
deleted file mode 100644
index ebc4f733bb2e..000000000000
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ /dev/null
@@ -1,826 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * DCCP connection tracking protocol helper
- *
- * Copyright (c) 2005, 2006, 2008 Patrick McHardy <kaber@trash.net>
- */
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/sysctl.h>
-#include <linux/spinlock.h>
-#include <linux/skbuff.h>
-#include <linux/dccp.h>
-#include <linux/slab.h>
-
-#include <net/net_namespace.h>
-#include <net/netns/generic.h>
-
-#include <linux/netfilter/nfnetlink_conntrack.h>
-#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_conntrack_l4proto.h>
-#include <net/netfilter/nf_conntrack_ecache.h>
-#include <net/netfilter/nf_conntrack_timeout.h>
-#include <net/netfilter/nf_log.h>
-
-/* Timeouts are based on values from RFC4340:
- *
- * - REQUEST:
- *
- * 8.1.2. Client Request
- *
- * A client MAY give up on its DCCP-Requests after some time
- * (3 minutes, for example).
- *
- * - RESPOND:
- *
- * 8.1.3. Server Response
- *
- * It MAY also leave the RESPOND state for CLOSED after a timeout of
- * not less than 4MSL (8 minutes);
- *
- * - PARTOPEN:
- *
- * 8.1.5. Handshake Completion
- *
- * If the client remains in PARTOPEN for more than 4MSL (8 minutes),
- * it SHOULD reset the connection with Reset Code 2, "Aborted".
- *
- * - OPEN:
- *
- * The DCCP timestamp overflows after 11.9 hours. If the connection
- * stays idle this long the sequence number won't be recognized
- * as valid anymore.
- *
- * - CLOSEREQ/CLOSING:
- *
- * 8.3. Termination
- *
- * The retransmission timer should initially be set to go off in two
- * round-trip times and should back off to not less than once every
- * 64 seconds ...
- *
- * - TIMEWAIT:
- *
- * 4.3. States
- *
- * A server or client socket remains in this state for 2MSL (4 minutes)
- * after the connection has been town down, ...
- */
-
-#define DCCP_MSL (2 * 60 * HZ)
-
-#ifdef CONFIG_NF_CONNTRACK_PROCFS
-static const char * const dccp_state_names[] = {
- [CT_DCCP_NONE] = "NONE",
- [CT_DCCP_REQUEST] = "REQUEST",
- [CT_DCCP_RESPOND] = "RESPOND",
- [CT_DCCP_PARTOPEN] = "PARTOPEN",
- [CT_DCCP_OPEN] = "OPEN",
- [CT_DCCP_CLOSEREQ] = "CLOSEREQ",
- [CT_DCCP_CLOSING] = "CLOSING",
- [CT_DCCP_TIMEWAIT] = "TIMEWAIT",
- [CT_DCCP_IGNORE] = "IGNORE",
- [CT_DCCP_INVALID] = "INVALID",
-};
-#endif
-
-#define sNO CT_DCCP_NONE
-#define sRQ CT_DCCP_REQUEST
-#define sRS CT_DCCP_RESPOND
-#define sPO CT_DCCP_PARTOPEN
-#define sOP CT_DCCP_OPEN
-#define sCR CT_DCCP_CLOSEREQ
-#define sCG CT_DCCP_CLOSING
-#define sTW CT_DCCP_TIMEWAIT
-#define sIG CT_DCCP_IGNORE
-#define sIV CT_DCCP_INVALID
-
-/*
- * DCCP state transition table
- *
- * The assumption is the same as for TCP tracking:
- *
- * We are the man in the middle. All the packets go through us but might
- * get lost in transit to the destination. It is assumed that the destination
- * can't receive segments we haven't seen.
- *
- * The following states exist:
- *
- * NONE: Initial state, expecting Request
- * REQUEST: Request seen, waiting for Response from server
- * RESPOND: Response from server seen, waiting for Ack from client
- * PARTOPEN: Ack after Response seen, waiting for packet other than Response,
- * Reset or Sync from server
- * OPEN: Packet other than Response, Reset or Sync seen
- * CLOSEREQ: CloseReq from server seen, expecting Close from client
- * CLOSING: Close seen, expecting Reset
- * TIMEWAIT: Reset seen
- * IGNORE: Not determinable whether packet is valid
- *
- * Some states exist only on one side of the connection: REQUEST, RESPOND,
- * PARTOPEN, CLOSEREQ. For the other side these states are equivalent to
- * the one it was in before.
- *
- * Packets are marked as ignored (sIG) if we don't know if they're valid
- * (for example a reincarnation of a connection we didn't notice is dead
- * already) and the server may send back a connection closing Reset or a
- * Response. They're also used for Sync/SyncAck packets, which we don't
- * care about.
- */
-static const u_int8_t
-dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] = {
- [CT_DCCP_ROLE_CLIENT] = {
- [DCCP_PKT_REQUEST] = {
- /*
- * sNO -> sRQ Regular Request
- * sRQ -> sRQ Retransmitted Request or reincarnation
- * sRS -> sRS Retransmitted Request (apparently Response
- * got lost after we saw it) or reincarnation
- * sPO -> sIG Ignore, conntrack might be out of sync
- * sOP -> sIG Ignore, conntrack might be out of sync
- * sCR -> sIG Ignore, conntrack might be out of sync
- * sCG -> sIG Ignore, conntrack might be out of sync
- * sTW -> sRQ Reincarnation
- *
- * sNO, sRQ, sRS, sPO. sOP, sCR, sCG, sTW, */
- sRQ, sRQ, sRS, sIG, sIG, sIG, sIG, sRQ,
- },
- [DCCP_PKT_RESPONSE] = {
- /*
- * sNO -> sIV Invalid
- * sRQ -> sIG Ignore, might be response to ignored Request
- * sRS -> sIG Ignore, might be response to ignored Request
- * sPO -> sIG Ignore, might be response to ignored Request
- * sOP -> sIG Ignore, might be response to ignored Request
- * sCR -> sIG Ignore, might be response to ignored Request
- * sCG -> sIG Ignore, might be response to ignored Request
- * sTW -> sIV Invalid, reincarnation in reverse direction
- * goes through sRQ
- *
- * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
- sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIV,
- },
- [DCCP_PKT_ACK] = {
- /*
- * sNO -> sIV No connection
- * sRQ -> sIV No connection
- * sRS -> sPO Ack for Response, move to PARTOPEN (8.1.5.)
- * sPO -> sPO Retransmitted Ack for Response, remain in PARTOPEN
- * sOP -> sOP Regular ACK, remain in OPEN
- * sCR -> sCR Ack in CLOSEREQ MAY be processed (8.3.)
- * sCG -> sCG Ack in CLOSING MAY be processed (8.3.)
- * sTW -> sIV
- *
- * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
- sIV, sIV, sPO, sPO, sOP, sCR, sCG, sIV
- },
- [DCCP_PKT_DATA] = {
- /*
- * sNO -> sIV No connection
- * sRQ -> sIV No connection
- * sRS -> sIV No connection
- * sPO -> sIV MUST use DataAck in PARTOPEN state (8.1.5.)
- * sOP -> sOP Regular Data packet
- * sCR -> sCR Data in CLOSEREQ MAY be processed (8.3.)
- * sCG -> sCG Data in CLOSING MAY be processed (8.3.)
- * sTW -> sIV
- *
- * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
- sIV, sIV, sIV, sIV, sOP, sCR, sCG, sIV,
- },
- [DCCP_PKT_DATAACK] = {
- /*
- * sNO -> sIV No connection
- * sRQ -> sIV No connection
- * sRS -> sPO Ack for Response, move to PARTOPEN (8.1.5.)
- * sPO -> sPO Remain in PARTOPEN state
- * sOP -> sOP Regular DataAck packet in OPEN state
- * sCR -> sCR DataAck in CLOSEREQ MAY be processed (8.3.)
- * sCG -> sCG DataAck in CLOSING MAY be processed (8.3.)
- * sTW -> sIV
- *
- * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
- sIV, sIV, sPO, sPO, sOP, sCR, sCG, sIV
- },
- [DCCP_PKT_CLOSEREQ] = {
- /*
- * CLOSEREQ may only be sent by the server.
- *
- * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
- sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV
- },
- [DCCP_PKT_CLOSE] = {
- /*
- * sNO -> sIV No connection
- * sRQ -> sIV No connection
- * sRS -> sIV No connection
- * sPO -> sCG Client-initiated close
- * sOP -> sCG Client-initiated close
- * sCR -> sCG Close in response to CloseReq (8.3.)
- * sCG -> sCG Retransmit
- * sTW -> sIV Late retransmit, already in TIME_WAIT
- *
- * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
- sIV, sIV, sIV, sCG, sCG, sCG, sIV, sIV
- },
- [DCCP_PKT_RESET] = {
- /*
- * sNO -> sIV No connection
- * sRQ -> sTW Sync received or timeout, SHOULD send Reset (8.1.1.)
- * sRS -> sTW Response received without Request
- * sPO -> sTW Timeout, SHOULD send Reset (8.1.5.)
- * sOP -> sTW Connection reset
- * sCR -> sTW Connection reset
- * sCG -> sTW Connection reset
- * sTW -> sIG Ignore (don't refresh timer)
- *
- * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
- sIV, sTW, sTW, sTW, sTW, sTW, sTW, sIG
- },
- [DCCP_PKT_SYNC] = {
- /*
- * We currently ignore Sync packets
- *
- * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
- sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG,
- },
- [DCCP_PKT_SYNCACK] = {
- /*
- * We currently ignore SyncAck packets
- *
- * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
- sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG,
- },
- },
- [CT_DCCP_ROLE_SERVER] = {
- [DCCP_PKT_REQUEST] = {
- /*
- * sNO -> sIV Invalid
- * sRQ -> sIG Ignore, conntrack might be out of sync
- * sRS -> sIG Ignore, conntrack might be out of sync
- * sPO -> sIG Ignore, conntrack might be out of sync
- * sOP -> sIG Ignore, conntrack might be out of sync
- * sCR -> sIG Ignore, conntrack might be out of sync
- * sCG -> sIG Ignore, conntrack might be out of sync
- * sTW -> sRQ Reincarnation, must reverse roles
- *
- * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
- sIV, sIG, sIG, sIG, sIG, sIG, sIG, sRQ
- },
- [DCCP_PKT_RESPONSE] = {
- /*
- * sNO -> sIV Response without Request
- * sRQ -> sRS Response to clients Request
- * sRS -> sRS Retransmitted Response (8.1.3. SHOULD NOT)
- * sPO -> sIG Response to an ignored Request or late retransmit
- * sOP -> sIG Ignore, might be response to ignored Request
- * sCR -> sIG Ignore, might be response to ignored Request
- * sCG -> sIG Ignore, might be response to ignored Request
- * sTW -> sIV Invalid, Request from client in sTW moves to sRQ
- *
- * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
- sIV, sRS, sRS, sIG, sIG, sIG, sIG, sIV
- },
- [DCCP_PKT_ACK] = {
- /*
- * sNO -> sIV No connection
- * sRQ -> sIV No connection
- * sRS -> sIV No connection
- * sPO -> sOP Enter OPEN state (8.1.5.)
- * sOP -> sOP Regular Ack in OPEN state
- * sCR -> sIV Waiting for Close from client
- * sCG -> sCG Ack in CLOSING MAY be processed (8.3.)
- * sTW -> sIV
- *
- * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
- sIV, sIV, sIV, sOP, sOP, sIV, sCG, sIV
- },
- [DCCP_PKT_DATA] = {
- /*
- * sNO -> sIV No connection
- * sRQ -> sIV No connection
- * sRS -> sIV No connection
- * sPO -> sOP Enter OPEN state (8.1.5.)
- * sOP -> sOP Regular Data packet in OPEN state
- * sCR -> sIV Waiting for Close from client
- * sCG -> sCG Data in CLOSING MAY be processed (8.3.)
- * sTW -> sIV
- *
- * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
- sIV, sIV, sIV, sOP, sOP, sIV, sCG, sIV
- },
- [DCCP_PKT_DATAACK] = {
- /*
- * sNO -> sIV No connection
- * sRQ -> sIV No connection
- * sRS -> sIV No connection
- * sPO -> sOP Enter OPEN state (8.1.5.)
- * sOP -> sOP Regular DataAck in OPEN state
- * sCR -> sIV Waiting for Close from client
- * sCG -> sCG Data in CLOSING MAY be processed (8.3.)
- * sTW -> sIV
- *
- * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
- sIV, sIV, sIV, sOP, sOP, sIV, sCG, sIV
- },
- [DCCP_PKT_CLOSEREQ] = {
- /*
- * sNO -> sIV No connection
- * sRQ -> sIV No connection
- * sRS -> sIV No connection
- * sPO -> sOP -> sCR Move directly to CLOSEREQ (8.1.5.)
- * sOP -> sCR CloseReq in OPEN state
- * sCR -> sCR Retransmit
- * sCG -> sCR Simultaneous close, client sends another Close
- * sTW -> sIV Already closed
- *
- * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
- sIV, sIV, sIV, sCR, sCR, sCR, sCR, sIV
- },
- [DCCP_PKT_CLOSE] = {
- /*
- * sNO -> sIV No connection
- * sRQ -> sIV No connection
- * sRS -> sIV No connection
- * sPO -> sOP -> sCG Move direcly to CLOSING
- * sOP -> sCG Move to CLOSING
- * sCR -> sIV Close after CloseReq is invalid
- * sCG -> sCG Retransmit
- * sTW -> sIV Already closed
- *
- * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
- sIV, sIV, sIV, sCG, sCG, sIV, sCG, sIV
- },
- [DCCP_PKT_RESET] = {
- /*
- * sNO -> sIV No connection
- * sRQ -> sTW Reset in response to Request
- * sRS -> sTW Timeout, SHOULD send Reset (8.1.3.)
- * sPO -> sTW Timeout, SHOULD send Reset (8.1.3.)
- * sOP -> sTW
- * sCR -> sTW
- * sCG -> sTW
- * sTW -> sIG Ignore (don't refresh timer)
- *
- * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW, sTW */
- sIV, sTW, sTW, sTW, sTW, sTW, sTW, sTW, sIG
- },
- [DCCP_PKT_SYNC] = {
- /*
- * We currently ignore Sync packets
- *
- * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
- sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG,
- },
- [DCCP_PKT_SYNCACK] = {
- /*
- * We currently ignore SyncAck packets
- *
- * sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
- sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG,
- },
- },
-};
-
-static noinline bool
-dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
- const struct dccp_hdr *dh,
- const struct nf_hook_state *hook_state)
-{
- struct net *net = nf_ct_net(ct);
- struct nf_dccp_net *dn;
- const char *msg;
- u_int8_t state;
-
- state = dccp_state_table[CT_DCCP_ROLE_CLIENT][dh->dccph_type][CT_DCCP_NONE];
- switch (state) {
- default:
- dn = nf_dccp_pernet(net);
- if (dn->dccp_loose == 0) {
- msg = "not picking up existing connection ";
- goto out_invalid;
- }
- break;
- case CT_DCCP_REQUEST:
- break;
- case CT_DCCP_INVALID:
- msg = "invalid state transition ";
- goto out_invalid;
- }
-
- ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT;
- ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER;
- ct->proto.dccp.state = CT_DCCP_NONE;
- ct->proto.dccp.last_pkt = DCCP_PKT_REQUEST;
- ct->proto.dccp.last_dir = IP_CT_DIR_ORIGINAL;
- ct->proto.dccp.handshake_seq = 0;
- return true;
-
-out_invalid:
- nf_ct_l4proto_log_invalid(skb, ct, hook_state, "%s", msg);
- return false;
-}
-
-static u64 dccp_ack_seq(const struct dccp_hdr *dh)
-{
- const struct dccp_hdr_ack_bits *dhack;
-
- dhack = (void *)dh + __dccp_basic_hdr_len(dh);
- return ((u64)ntohs(dhack->dccph_ack_nr_high) << 32) +
- ntohl(dhack->dccph_ack_nr_low);
-}
-
-static bool dccp_error(const struct dccp_hdr *dh,
- struct sk_buff *skb, unsigned int dataoff,
- const struct nf_hook_state *state)
-{
- static const unsigned long require_seq48 = 1 << DCCP_PKT_REQUEST |
- 1 << DCCP_PKT_RESPONSE |
- 1 << DCCP_PKT_CLOSEREQ |
- 1 << DCCP_PKT_CLOSE |
- 1 << DCCP_PKT_RESET |
- 1 << DCCP_PKT_SYNC |
- 1 << DCCP_PKT_SYNCACK;
- unsigned int dccp_len = skb->len - dataoff;
- unsigned int cscov;
- const char *msg;
- u8 type;
-
- BUILD_BUG_ON(DCCP_PKT_INVALID >= BITS_PER_LONG);
-
- if (dh->dccph_doff * 4 < sizeof(struct dccp_hdr) ||
- dh->dccph_doff * 4 > dccp_len) {
- msg = "nf_ct_dccp: truncated/malformed packet ";
- goto out_invalid;
- }
-
- cscov = dccp_len;
- if (dh->dccph_cscov) {
- cscov = (dh->dccph_cscov - 1) * 4;
- if (cscov > dccp_len) {
- msg = "nf_ct_dccp: bad checksum coverage ";
- goto out_invalid;
- }
- }
-
- if (state->hook == NF_INET_PRE_ROUTING &&
- state->net->ct.sysctl_checksum &&
- nf_checksum_partial(skb, state->hook, dataoff, cscov,
- IPPROTO_DCCP, state->pf)) {
- msg = "nf_ct_dccp: bad checksum ";
- goto out_invalid;
- }
-
- type = dh->dccph_type;
- if (type >= DCCP_PKT_INVALID) {
- msg = "nf_ct_dccp: reserved packet type ";
- goto out_invalid;
- }
-
- if (test_bit(type, &require_seq48) && !dh->dccph_x) {
- msg = "nf_ct_dccp: type lacks 48bit sequence numbers";
- goto out_invalid;
- }
-
- return false;
-out_invalid:
- nf_l4proto_log_invalid(skb, state, IPPROTO_DCCP, "%s", msg);
- return true;
-}
-
-struct nf_conntrack_dccp_buf {
- struct dccp_hdr dh; /* generic header part */
- struct dccp_hdr_ext ext; /* optional depending dh->dccph_x */
- union { /* depends on header type */
- struct dccp_hdr_ack_bits ack;
- struct dccp_hdr_request req;
- struct dccp_hdr_response response;
- struct dccp_hdr_reset rst;
- } u;
-};
-
-static struct dccp_hdr *
-dccp_header_pointer(const struct sk_buff *skb, int offset, const struct dccp_hdr *dh,
- struct nf_conntrack_dccp_buf *buf)
-{
- unsigned int hdrlen = __dccp_hdr_len(dh);
-
- if (hdrlen > sizeof(*buf))
- return NULL;
-
- return skb_header_pointer(skb, offset, hdrlen, buf);
-}
-
-int nf_conntrack_dccp_packet(struct nf_conn *ct, struct sk_buff *skb,
- unsigned int dataoff,
- enum ip_conntrack_info ctinfo,
- const struct nf_hook_state *state)
-{
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
- struct nf_conntrack_dccp_buf _dh;
- u_int8_t type, old_state, new_state;
- enum ct_dccp_roles role;
- unsigned int *timeouts;
- struct dccp_hdr *dh;
-
- dh = skb_header_pointer(skb, dataoff, sizeof(*dh), &_dh.dh);
- if (!dh)
- return -NF_ACCEPT;
-
- if (dccp_error(dh, skb, dataoff, state))
- return -NF_ACCEPT;
-
- /* pull again, including possible 48 bit sequences and subtype header */
- dh = dccp_header_pointer(skb, dataoff, dh, &_dh);
- if (!dh)
- return -NF_ACCEPT;
-
- type = dh->dccph_type;
- if (!nf_ct_is_confirmed(ct) && !dccp_new(ct, skb, dh, state))
- return -NF_ACCEPT;
-
- if (type == DCCP_PKT_RESET &&
- !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
- /* Tear down connection immediately if only reply is a RESET */
- nf_ct_kill_acct(ct, ctinfo, skb);
- return NF_ACCEPT;
- }
-
- spin_lock_bh(&ct->lock);
-
- role = ct->proto.dccp.role[dir];
- old_state = ct->proto.dccp.state;
- new_state = dccp_state_table[role][type][old_state];
-
- switch (new_state) {
- case CT_DCCP_REQUEST:
- if (old_state == CT_DCCP_TIMEWAIT &&
- role == CT_DCCP_ROLE_SERVER) {
- /* Reincarnation in the reverse direction: reopen and
- * reverse client/server roles. */
- ct->proto.dccp.role[dir] = CT_DCCP_ROLE_CLIENT;
- ct->proto.dccp.role[!dir] = CT_DCCP_ROLE_SERVER;
- }
- break;
- case CT_DCCP_RESPOND:
- if (old_state == CT_DCCP_REQUEST)
- ct->proto.dccp.handshake_seq = dccp_hdr_seq(dh);
- break;
- case CT_DCCP_PARTOPEN:
- if (old_state == CT_DCCP_RESPOND &&
- type == DCCP_PKT_ACK &&
- dccp_ack_seq(dh) == ct->proto.dccp.handshake_seq)
- set_bit(IPS_ASSURED_BIT, &ct->status);
- break;
- case CT_DCCP_IGNORE:
- /*
- * Connection tracking might be out of sync, so we ignore
- * packets that might establish a new connection and resync
- * if the server responds with a valid Response.
- */
- if (ct->proto.dccp.last_dir == !dir &&
- ct->proto.dccp.last_pkt == DCCP_PKT_REQUEST &&
- type == DCCP_PKT_RESPONSE) {
- ct->proto.dccp.role[!dir] = CT_DCCP_ROLE_CLIENT;
- ct->proto.dccp.role[dir] = CT_DCCP_ROLE_SERVER;
- ct->proto.dccp.handshake_seq = dccp_hdr_seq(dh);
- new_state = CT_DCCP_RESPOND;
- break;
- }
- ct->proto.dccp.last_dir = dir;
- ct->proto.dccp.last_pkt = type;
-
- spin_unlock_bh(&ct->lock);
- nf_ct_l4proto_log_invalid(skb, ct, state, "%s", "invalid packet");
- return NF_ACCEPT;
- case CT_DCCP_INVALID:
- spin_unlock_bh(&ct->lock);
- nf_ct_l4proto_log_invalid(skb, ct, state, "%s", "invalid state transition");
- return -NF_ACCEPT;
- }
-
- ct->proto.dccp.last_dir = dir;
- ct->proto.dccp.last_pkt = type;
- ct->proto.dccp.state = new_state;
- spin_unlock_bh(&ct->lock);
-
- if (new_state != old_state)
- nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
-
- timeouts = nf_ct_timeout_lookup(ct);
- if (!timeouts)
- timeouts = nf_dccp_pernet(nf_ct_net(ct))->dccp_timeout;
- nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]);
-
- return NF_ACCEPT;
-}
-
-static bool dccp_can_early_drop(const struct nf_conn *ct)
-{
- switch (ct->proto.dccp.state) {
- case CT_DCCP_CLOSEREQ:
- case CT_DCCP_CLOSING:
- case CT_DCCP_TIMEWAIT:
- return true;
- default:
- break;
- }
-
- return false;
-}
-
-#ifdef CONFIG_NF_CONNTRACK_PROCFS
-static void dccp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
-{
- seq_printf(s, "%s ", dccp_state_names[ct->proto.dccp.state]);
-}
-#endif
-
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
- struct nf_conn *ct, bool destroy)
-{
- struct nlattr *nest_parms;
-
- spin_lock_bh(&ct->lock);
- nest_parms = nla_nest_start(skb, CTA_PROTOINFO_DCCP);
- if (!nest_parms)
- goto nla_put_failure;
- if (nla_put_u8(skb, CTA_PROTOINFO_DCCP_STATE, ct->proto.dccp.state))
- goto nla_put_failure;
-
- if (destroy)
- goto skip_state;
-
- if (nla_put_u8(skb, CTA_PROTOINFO_DCCP_ROLE,
- ct->proto.dccp.role[IP_CT_DIR_ORIGINAL]) ||
- nla_put_be64(skb, CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ,
- cpu_to_be64(ct->proto.dccp.handshake_seq),
- CTA_PROTOINFO_DCCP_PAD))
- goto nla_put_failure;
-skip_state:
- nla_nest_end(skb, nest_parms);
- spin_unlock_bh(&ct->lock);
-
- return 0;
-
-nla_put_failure:
- spin_unlock_bh(&ct->lock);
- return -1;
-}
-
-static const struct nla_policy dccp_nla_policy[CTA_PROTOINFO_DCCP_MAX + 1] = {
- [CTA_PROTOINFO_DCCP_STATE] = { .type = NLA_U8 },
- [CTA_PROTOINFO_DCCP_ROLE] = { .type = NLA_U8 },
- [CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ] = { .type = NLA_U64 },
- [CTA_PROTOINFO_DCCP_PAD] = { .type = NLA_UNSPEC },
-};
-
-#define DCCP_NLATTR_SIZE ( \
- NLA_ALIGN(NLA_HDRLEN + 1) + \
- NLA_ALIGN(NLA_HDRLEN + 1) + \
- NLA_ALIGN(NLA_HDRLEN + sizeof(u64)) + \
- NLA_ALIGN(NLA_HDRLEN + 0))
-
-static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
-{
- struct nlattr *attr = cda[CTA_PROTOINFO_DCCP];
- struct nlattr *tb[CTA_PROTOINFO_DCCP_MAX + 1];
- int err;
-
- if (!attr)
- return 0;
-
- err = nla_parse_nested_deprecated(tb, CTA_PROTOINFO_DCCP_MAX, attr,
- dccp_nla_policy, NULL);
- if (err < 0)
- return err;
-
- if (!tb[CTA_PROTOINFO_DCCP_STATE] ||
- !tb[CTA_PROTOINFO_DCCP_ROLE] ||
- nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) > CT_DCCP_ROLE_MAX ||
- nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]) >= CT_DCCP_IGNORE) {
- return -EINVAL;
- }
-
- spin_lock_bh(&ct->lock);
- ct->proto.dccp.state = nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]);
- if (nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) == CT_DCCP_ROLE_CLIENT) {
- ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT;
- ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER;
- } else {
- ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_SERVER;
- ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_CLIENT;
- }
- if (tb[CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ]) {
- ct->proto.dccp.handshake_seq =
- be64_to_cpu(nla_get_be64(tb[CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ]));
- }
- spin_unlock_bh(&ct->lock);
- return 0;
-}
-#endif
-
-#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
-
-#include <linux/netfilter/nfnetlink.h>
-#include <linux/netfilter/nfnetlink_cttimeout.h>
-
-static int dccp_timeout_nlattr_to_obj(struct nlattr *tb[],
- struct net *net, void *data)
-{
- struct nf_dccp_net *dn = nf_dccp_pernet(net);
- unsigned int *timeouts = data;
- int i;
-
- if (!timeouts)
- timeouts = dn->dccp_timeout;
-
- /* set default DCCP timeouts. */
- for (i=0; i<CT_DCCP_MAX; i++)
- timeouts[i] = dn->dccp_timeout[i];
-
- /* there's a 1:1 mapping between attributes and protocol states. */
- for (i=CTA_TIMEOUT_DCCP_UNSPEC+1; i<CTA_TIMEOUT_DCCP_MAX+1; i++) {
- if (tb[i]) {
- timeouts[i] = ntohl(nla_get_be32(tb[i])) * HZ;
- }
- }
-
- timeouts[CTA_TIMEOUT_DCCP_UNSPEC] = timeouts[CTA_TIMEOUT_DCCP_REQUEST];
- return 0;
-}
-
-static int
-dccp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
-{
- const unsigned int *timeouts = data;
- int i;
-
- for (i=CTA_TIMEOUT_DCCP_UNSPEC+1; i<CTA_TIMEOUT_DCCP_MAX+1; i++) {
- if (nla_put_be32(skb, i, htonl(timeouts[i] / HZ)))
- goto nla_put_failure;
- }
- return 0;
-
-nla_put_failure:
- return -ENOSPC;
-}
-
-static const struct nla_policy
-dccp_timeout_nla_policy[CTA_TIMEOUT_DCCP_MAX+1] = {
- [CTA_TIMEOUT_DCCP_REQUEST] = { .type = NLA_U32 },
- [CTA_TIMEOUT_DCCP_RESPOND] = { .type = NLA_U32 },
- [CTA_TIMEOUT_DCCP_PARTOPEN] = { .type = NLA_U32 },
- [CTA_TIMEOUT_DCCP_OPEN] = { .type = NLA_U32 },
- [CTA_TIMEOUT_DCCP_CLOSEREQ] = { .type = NLA_U32 },
- [CTA_TIMEOUT_DCCP_CLOSING] = { .type = NLA_U32 },
- [CTA_TIMEOUT_DCCP_TIMEWAIT] = { .type = NLA_U32 },
-};
-#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-
-void nf_conntrack_dccp_init_net(struct net *net)
-{
- struct nf_dccp_net *dn = nf_dccp_pernet(net);
-
- /* default values */
- dn->dccp_loose = 1;
- dn->dccp_timeout[CT_DCCP_REQUEST] = 2 * DCCP_MSL;
- dn->dccp_timeout[CT_DCCP_RESPOND] = 4 * DCCP_MSL;
- dn->dccp_timeout[CT_DCCP_PARTOPEN] = 4 * DCCP_MSL;
- dn->dccp_timeout[CT_DCCP_OPEN] = 12 * 3600 * HZ;
- dn->dccp_timeout[CT_DCCP_CLOSEREQ] = 64 * HZ;
- dn->dccp_timeout[CT_DCCP_CLOSING] = 64 * HZ;
- dn->dccp_timeout[CT_DCCP_TIMEWAIT] = 2 * DCCP_MSL;
-
- /* timeouts[0] is unused, make it same as SYN_SENT so
- * ->timeouts[0] contains 'new' timeout, like udp or icmp.
- */
- dn->dccp_timeout[CT_DCCP_NONE] = dn->dccp_timeout[CT_DCCP_REQUEST];
-}
-
-const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp = {
- .l4proto = IPPROTO_DCCP,
- .can_early_drop = dccp_can_early_drop,
-#ifdef CONFIG_NF_CONNTRACK_PROCFS
- .print_conntrack = dccp_print_conntrack,
-#endif
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
- .nlattr_size = DCCP_NLATTR_SIZE,
- .to_nlattr = dccp_to_nlattr,
- .from_nlattr = nlattr_to_dccp,
- .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
- .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
- .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
- .nla_policy = nf_ct_port_nla_policy,
-#endif
-#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
- .ctnl_timeout = {
- .nlattr_to_obj = dccp_timeout_nlattr_to_obj,
- .obj_to_nlattr = dccp_timeout_obj_to_nlattr,
- .nlattr_max = CTA_TIMEOUT_DCCP_MAX,
- .obj_size = sizeof(unsigned int) * CT_DCCP_MAX,
- .nla_policy = dccp_timeout_nla_policy,
- },
-#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-};
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 6c4cff10357d..9b8b10a85233 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -14,6 +14,7 @@
#include <linux/sysctl.h>
#endif
+#include <net/netfilter/nf_log.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
@@ -67,11 +68,6 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
ntohs(tuple->dst.u.udp.port));
break;
- case IPPROTO_DCCP:
- seq_printf(s, "sport=%hu dport=%hu ",
- ntohs(tuple->src.u.dccp.port),
- ntohs(tuple->dst.u.dccp.port));
- break;
case IPPROTO_SCTP:
seq_printf(s, "sport=%hu dport=%hu ",
ntohs(tuple->src.u.sctp.port),
@@ -279,7 +275,6 @@ static const char* l4proto_name(u16 proto)
case IPPROTO_ICMP: return "icmp";
case IPPROTO_TCP: return "tcp";
case IPPROTO_UDP: return "udp";
- case IPPROTO_DCCP: return "dccp";
case IPPROTO_GRE: return "gre";
case IPPROTO_SCTP: return "sctp";
case IPPROTO_UDPLITE: return "udplite";
@@ -561,6 +556,29 @@ nf_conntrack_hash_sysctl(const struct ctl_table *table, int write,
return ret;
}
+static int
+nf_conntrack_log_invalid_sysctl(const struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ int ret, i;
+
+ ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos);
+ if (ret < 0 || !write)
+ return ret;
+
+ if (*(u8 *)table->data == 0)
+ return ret;
+
+ /* Load nf_log_syslog only if no logger is currently registered */
+ for (i = 0; i < NFPROTO_NUMPROTO; i++) {
+ if (nf_log_is_registered(i))
+ return ret;
+ }
+ request_module("%s", "nf_log_syslog");
+
+ return ret;
+}
+
static struct ctl_table_header *nf_ct_netfilter_header;
enum nf_ct_sysctl_index {
@@ -612,16 +630,6 @@ enum nf_ct_sysctl_index {
NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT,
NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_SENT,
#endif
-#ifdef CONFIG_NF_CT_PROTO_DCCP
- NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_REQUEST,
- NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_RESPOND,
- NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_PARTOPEN,
- NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_OPEN,
- NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_CLOSEREQ,
- NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_CLOSING,
- NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_TIMEWAIT,
- NF_SYSCTL_CT_PROTO_DCCP_LOOSE,
-#endif
#ifdef CONFIG_NF_CT_PROTO_GRE
NF_SYSCTL_CT_PROTO_TIMEOUT_GRE,
NF_SYSCTL_CT_PROTO_TIMEOUT_GRE_STREAM,
@@ -667,7 +675,7 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.data = &init_net.ct.sysctl_log_invalid,
.maxlen = sizeof(u8),
.mode = 0644,
- .proc_handler = proc_dou8vec_minmax,
+ .proc_handler = nf_conntrack_log_invalid_sysctl,
},
[NF_SYSCTL_CT_EXPECT_MAX] = {
.procname = "nf_conntrack_expect_max",
@@ -895,58 +903,6 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.proc_handler = proc_dointvec_jiffies,
},
#endif
-#ifdef CONFIG_NF_CT_PROTO_DCCP
- [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_REQUEST] = {
- .procname = "nf_conntrack_dccp_timeout_request",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_RESPOND] = {
- .procname = "nf_conntrack_dccp_timeout_respond",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_PARTOPEN] = {
- .procname = "nf_conntrack_dccp_timeout_partopen",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_OPEN] = {
- .procname = "nf_conntrack_dccp_timeout_open",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_CLOSEREQ] = {
- .procname = "nf_conntrack_dccp_timeout_closereq",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_CLOSING] = {
- .procname = "nf_conntrack_dccp_timeout_closing",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_TIMEWAIT] = {
- .procname = "nf_conntrack_dccp_timeout_timewait",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- [NF_SYSCTL_CT_PROTO_DCCP_LOOSE] = {
- .procname = "nf_conntrack_dccp_loose",
- .maxlen = sizeof(u8),
- .mode = 0644,
- .proc_handler = proc_dou8vec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
- },
-#endif
#ifdef CONFIG_NF_CT_PROTO_GRE
[NF_SYSCTL_CT_PROTO_TIMEOUT_GRE] = {
.procname = "nf_conntrack_gre_timeout",
@@ -1032,29 +988,6 @@ static void nf_conntrack_standalone_init_sctp_sysctl(struct net *net,
#endif
}
-static void nf_conntrack_standalone_init_dccp_sysctl(struct net *net,
- struct ctl_table *table)
-{
-#ifdef CONFIG_NF_CT_PROTO_DCCP
- struct nf_dccp_net *dn = nf_dccp_pernet(net);
-
-#define XASSIGN(XNAME, dn) \
- table[NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_ ## XNAME].data = \
- &(dn)->dccp_timeout[CT_DCCP_ ## XNAME]
-
- XASSIGN(REQUEST, dn);
- XASSIGN(RESPOND, dn);
- XASSIGN(PARTOPEN, dn);
- XASSIGN(OPEN, dn);
- XASSIGN(CLOSEREQ, dn);
- XASSIGN(CLOSING, dn);
- XASSIGN(TIMEWAIT, dn);
-#undef XASSIGN
-
- table[NF_SYSCTL_CT_PROTO_DCCP_LOOSE].data = &dn->dccp_loose;
-#endif
-}
-
static void nf_conntrack_standalone_init_gre_sysctl(struct net *net,
struct ctl_table *table)
{
@@ -1100,7 +1033,6 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
nf_conntrack_standalone_init_tcp_sysctl(net, table);
nf_conntrack_standalone_init_sctp_sysctl(net, table);
- nf_conntrack_standalone_init_dccp_sysctl(net, table);
nf_conntrack_standalone_init_gre_sysctl(net, table);
/* Don't allow non-init_net ns to alter global sysctls */
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 6dd0de33eebd..74cef8bf554c 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -125,6 +125,32 @@ void nf_log_unregister(struct nf_logger *logger)
}
EXPORT_SYMBOL(nf_log_unregister);
+/**
+ * nf_log_is_registered - Check if any logger is registered for a given
+ * protocol family.
+ *
+ * @pf: Protocol family
+ *
+ * Returns: true if at least one logger is active for @pf, false otherwise.
+ */
+bool nf_log_is_registered(u_int8_t pf)
+{
+ int i;
+
+ if (pf >= NFPROTO_NUMPROTO) {
+ WARN_ON_ONCE(1);
+ return false;
+ }
+
+ for (i = 0; i < NF_LOG_TYPE_MAX; i++) {
+ if (rcu_access_pointer(loggers[pf][i]))
+ return true;
+ }
+
+ return false;
+}
+EXPORT_SYMBOL(nf_log_is_registered);
+
int nf_log_bind_pf(struct net *net, u_int8_t pf,
const struct nf_logger *logger)
{
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index f391cd267922..78a61dac4ade 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -69,7 +69,6 @@ static void nf_nat_ipv4_decode_session(struct sk_buff *skb,
if (t->dst.protonum == IPPROTO_TCP ||
t->dst.protonum == IPPROTO_UDP ||
t->dst.protonum == IPPROTO_UDPLITE ||
- t->dst.protonum == IPPROTO_DCCP ||
t->dst.protonum == IPPROTO_SCTP)
fl4->fl4_dport = t->dst.u.all;
}
@@ -81,7 +80,6 @@ static void nf_nat_ipv4_decode_session(struct sk_buff *skb,
if (t->dst.protonum == IPPROTO_TCP ||
t->dst.protonum == IPPROTO_UDP ||
t->dst.protonum == IPPROTO_UDPLITE ||
- t->dst.protonum == IPPROTO_DCCP ||
t->dst.protonum == IPPROTO_SCTP)
fl4->fl4_sport = t->src.u.all;
}
@@ -102,7 +100,6 @@ static void nf_nat_ipv6_decode_session(struct sk_buff *skb,
if (t->dst.protonum == IPPROTO_TCP ||
t->dst.protonum == IPPROTO_UDP ||
t->dst.protonum == IPPROTO_UDPLITE ||
- t->dst.protonum == IPPROTO_DCCP ||
t->dst.protonum == IPPROTO_SCTP)
fl6->fl6_dport = t->dst.u.all;
}
@@ -114,7 +111,6 @@ static void nf_nat_ipv6_decode_session(struct sk_buff *skb,
if (t->dst.protonum == IPPROTO_TCP ||
t->dst.protonum == IPPROTO_UDP ||
t->dst.protonum == IPPROTO_UDPLITE ||
- t->dst.protonum == IPPROTO_DCCP ||
t->dst.protonum == IPPROTO_SCTP)
fl6->fl6_sport = t->src.u.all;
}
@@ -432,7 +428,6 @@ static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple,
case IPPROTO_TCP:
case IPPROTO_UDP:
case IPPROTO_UDPLITE:
- case IPPROTO_DCCP:
case IPPROTO_SCTP:
if (maniptype == NF_NAT_MANIP_SRC)
port = tuple->src.u.all;
@@ -632,7 +627,6 @@ static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
case IPPROTO_UDPLITE:
case IPPROTO_TCP:
case IPPROTO_SCTP:
- case IPPROTO_DCCP:
if (maniptype == NF_NAT_MANIP_SRC)
keyptr = &tuple->src.u.all;
else
diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c
index dc450cc81222..b14a434b9561 100644
--- a/net/netfilter/nf_nat_proto.c
+++ b/net/netfilter/nf_nat_proto.c
@@ -180,46 +180,6 @@ tcp_manip_pkt(struct sk_buff *skb,
}
static bool
-dccp_manip_pkt(struct sk_buff *skb,
- unsigned int iphdroff, unsigned int hdroff,
- const struct nf_conntrack_tuple *tuple,
- enum nf_nat_manip_type maniptype)
-{
-#ifdef CONFIG_NF_CT_PROTO_DCCP
- struct dccp_hdr *hdr;
- __be16 *portptr, oldport, newport;
- int hdrsize = 8; /* DCCP connection tracking guarantees this much */
-
- if (skb->len >= hdroff + sizeof(struct dccp_hdr))
- hdrsize = sizeof(struct dccp_hdr);
-
- if (skb_ensure_writable(skb, hdroff + hdrsize))
- return false;
-
- hdr = (struct dccp_hdr *)(skb->data + hdroff);
-
- if (maniptype == NF_NAT_MANIP_SRC) {
- newport = tuple->src.u.dccp.port;
- portptr = &hdr->dccph_sport;
- } else {
- newport = tuple->dst.u.dccp.port;
- portptr = &hdr->dccph_dport;
- }
-
- oldport = *portptr;
- *portptr = newport;
-
- if (hdrsize < sizeof(*hdr))
- return true;
-
- nf_csum_update(skb, iphdroff, &hdr->dccph_checksum, tuple, maniptype);
- inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport,
- false);
-#endif
- return true;
-}
-
-static bool
icmp_manip_pkt(struct sk_buff *skb,
unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
@@ -338,9 +298,6 @@ static bool l4proto_manip_pkt(struct sk_buff *skb,
case IPPROTO_ICMPV6:
return icmpv6_manip_pkt(skb, iphdroff, hdroff,
tuple, maniptype);
- case IPPROTO_DCCP:
- return dccp_manip_pkt(skb, iphdroff, hdroff,
- tuple, maniptype);
case IPPROTO_GRE:
return gre_manip_pkt(skb, iphdroff, hdroff,
tuple, maniptype);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 24c71ecb2179..13d0ed9d1895 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1153,9 +1153,9 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
{
struct nlmsghdr *nlh;
- event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
- nlh = nfnl_msg_put(skb, portid, seq, event, flags, family,
- NFNETLINK_V0, nft_base_seq(net));
+ nlh = nfnl_msg_put(skb, portid, seq,
+ nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event),
+ flags, family, NFNETLINK_V0, nft_base_seq(net));
if (!nlh)
goto nla_put_failure;
@@ -1165,7 +1165,8 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
NFTA_TABLE_PAD))
goto nla_put_failure;
- if (event == NFT_MSG_DELTABLE) {
+ if (event == NFT_MSG_DELTABLE ||
+ event == NFT_MSG_DESTROYTABLE) {
nlmsg_end(skb, nlh);
return 0;
}
@@ -2016,9 +2017,9 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
{
struct nlmsghdr *nlh;
- event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
- nlh = nfnl_msg_put(skb, portid, seq, event, flags, family,
- NFNETLINK_V0, nft_base_seq(net));
+ nlh = nfnl_msg_put(skb, portid, seq,
+ nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event),
+ flags, family, NFNETLINK_V0, nft_base_seq(net));
if (!nlh)
goto nla_put_failure;
@@ -2028,7 +2029,9 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
NFTA_CHAIN_PAD))
goto nla_put_failure;
- if (event == NFT_MSG_DELCHAIN && !hook_list) {
+ if (!hook_list &&
+ (event == NFT_MSG_DELCHAIN ||
+ event == NFT_MSG_DESTROYCHAIN)) {
nlmsg_end(skb, nlh);
return 0;
}
@@ -4039,7 +4042,7 @@ void nf_tables_rule_destroy(const struct nft_ctx *ctx, struct nft_rule *rule)
/* can only be used if rule is no longer visible to dumps */
static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *rule)
{
- lockdep_commit_lock_is_held(ctx->net);
+ WARN_ON_ONCE(!lockdep_commit_lock_is_held(ctx->net));
nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_RELEASE);
nf_tables_rule_destroy(ctx, rule);
@@ -4845,9 +4848,10 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
u32 seq = ctx->seq;
int i;
- event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
- nlh = nfnl_msg_put(skb, portid, seq, event, flags, ctx->family,
- NFNETLINK_V0, nft_base_seq(ctx->net));
+ nlh = nfnl_msg_put(skb, portid, seq,
+ nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event),
+ flags, ctx->family, NFNETLINK_V0,
+ nft_base_seq(ctx->net));
if (!nlh)
goto nla_put_failure;
@@ -4859,7 +4863,8 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
NFTA_SET_PAD))
goto nla_put_failure;
- if (event == NFT_MSG_DELSET) {
+ if (event == NFT_MSG_DELSET ||
+ event == NFT_MSG_DESTROYSET) {
nlmsg_end(skb, nlh);
return 0;
}
@@ -5859,7 +5864,7 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_binding *binding,
enum nft_trans_phase phase)
{
- lockdep_commit_lock_is_held(ctx->net);
+ WARN_ON_ONCE(!lockdep_commit_lock_is_held(ctx->net));
switch (phase) {
case NFT_TRANS_PREPARE_ERROR:
@@ -8338,25 +8343,26 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net,
{
struct nlmsghdr *nlh;
- event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
- nlh = nfnl_msg_put(skb, portid, seq, event, flags, family,
- NFNETLINK_V0, nft_base_seq(net));
+ nlh = nfnl_msg_put(skb, portid, seq,
+ nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event),
+ flags, family, NFNETLINK_V0, nft_base_seq(net));
if (!nlh)
goto nla_put_failure;
if (nla_put_string(skb, NFTA_OBJ_TABLE, table->name) ||
nla_put_string(skb, NFTA_OBJ_NAME, obj->key.name) ||
+ nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->ops->type->type)) ||
nla_put_be64(skb, NFTA_OBJ_HANDLE, cpu_to_be64(obj->handle),
NFTA_OBJ_PAD))
goto nla_put_failure;
- if (event == NFT_MSG_DELOBJ) {
+ if (event == NFT_MSG_DELOBJ ||
+ event == NFT_MSG_DESTROYOBJ) {
nlmsg_end(skb, nlh);
return 0;
}
- if (nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->ops->type->type)) ||
- nla_put_be32(skb, NFTA_OBJ_USE, htonl(obj->use)) ||
+ if (nla_put_be32(skb, NFTA_OBJ_USE, htonl(obj->use)) ||
nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset))
goto nla_put_failure;
@@ -8889,11 +8895,12 @@ static int nft_flowtable_parse_hook(const struct nft_ctx *ctx,
list_for_each_entry(hook, &flowtable_hook->list, list) {
list_for_each_entry(ops, &hook->ops_list, list) {
- ops->pf = NFPROTO_NETDEV;
- ops->hooknum = flowtable_hook->num;
- ops->priority = flowtable_hook->priority;
- ops->priv = &flowtable->data;
- ops->hook = flowtable->data.type->hook;
+ ops->pf = NFPROTO_NETDEV;
+ ops->hooknum = flowtable_hook->num;
+ ops->priority = flowtable_hook->priority;
+ ops->priv = &flowtable->data;
+ ops->hook = flowtable->data.type->hook;
+ ops->hook_ops_type = NF_HOOK_OP_NFT_FT;
}
}
@@ -9382,9 +9389,9 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net,
struct nft_hook *hook;
struct nlmsghdr *nlh;
- event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
- nlh = nfnl_msg_put(skb, portid, seq, event, flags, family,
- NFNETLINK_V0, nft_base_seq(net));
+ nlh = nfnl_msg_put(skb, portid, seq,
+ nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event),
+ flags, family, NFNETLINK_V0, nft_base_seq(net));
if (!nlh)
goto nla_put_failure;
@@ -9394,7 +9401,9 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net,
NFTA_FLOWTABLE_PAD))
goto nla_put_failure;
- if (event == NFT_MSG_DELFLOWTABLE && !hook_list) {
+ if (!hook_list &&
+ (event == NFT_MSG_DELFLOWTABLE ||
+ event == NFT_MSG_DESTROYFLOWTABLE)) {
nlmsg_end(skb, nlh);
return 0;
}
@@ -9686,64 +9695,6 @@ struct nf_hook_ops *nft_hook_find_ops_rcu(const struct nft_hook *hook,
}
EXPORT_SYMBOL_GPL(nft_hook_find_ops_rcu);
-static void
-nf_tables_device_notify(const struct nft_table *table, int attr,
- const char *name, const struct nft_hook *hook,
- const struct net_device *dev, int event)
-{
- struct net *net = dev_net(dev);
- struct nlmsghdr *nlh;
- struct sk_buff *skb;
- u16 flags = 0;
-
- if (!nfnetlink_has_listeners(net, NFNLGRP_NFT_DEV))
- return;
-
- skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!skb)
- goto err;
-
- event = event == NETDEV_REGISTER ? NFT_MSG_NEWDEV : NFT_MSG_DELDEV;
- event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
- nlh = nfnl_msg_put(skb, 0, 0, event, flags, table->family,
- NFNETLINK_V0, nft_base_seq(net));
- if (!nlh)
- goto err;
-
- if (nla_put_string(skb, NFTA_DEVICE_TABLE, table->name) ||
- nla_put_string(skb, attr, name) ||
- nla_put(skb, NFTA_DEVICE_SPEC, hook->ifnamelen, hook->ifname) ||
- nla_put_string(skb, NFTA_DEVICE_NAME, dev->name))
- goto err;
-
- nlmsg_end(skb, nlh);
- nfnetlink_send(skb, net, 0, NFNLGRP_NFT_DEV,
- nlmsg_report(nlh), GFP_KERNEL);
- return;
-err:
- if (skb)
- kfree_skb(skb);
- nfnetlink_set_err(net, 0, NFNLGRP_NFT_DEV, -ENOBUFS);
-}
-
-void
-nf_tables_chain_device_notify(const struct nft_chain *chain,
- const struct nft_hook *hook,
- const struct net_device *dev, int event)
-{
- nf_tables_device_notify(chain->table, NFTA_DEVICE_CHAIN,
- chain->name, hook, dev, event);
-}
-
-static void
-nf_tables_flowtable_device_notify(const struct nft_flowtable *ft,
- const struct nft_hook *hook,
- const struct net_device *dev, int event)
-{
- nf_tables_device_notify(ft->table, NFTA_DEVICE_FLOWTABLE,
- ft->name, hook, dev, event);
-}
-
static int nft_flowtable_event(unsigned long event, struct net_device *dev,
struct nft_flowtable *flowtable, bool changename)
{
@@ -9777,12 +9728,13 @@ static int nft_flowtable_event(unsigned long event, struct net_device *dev,
if (!ops)
return 1;
- ops->pf = NFPROTO_NETDEV;
- ops->hooknum = flowtable->hooknum;
- ops->priority = flowtable->data.priority;
- ops->priv = &flowtable->data;
- ops->hook = flowtable->data.type->hook;
- ops->dev = dev;
+ ops->pf = NFPROTO_NETDEV;
+ ops->hooknum = flowtable->hooknum;
+ ops->priority = flowtable->data.priority;
+ ops->priv = &flowtable->data;
+ ops->hook = flowtable->data.type->hook;
+ ops->hook_ops_type = NF_HOOK_OP_NFT_FT;
+ ops->dev = dev;
if (nft_register_flowtable_ops(dev_net(dev),
flowtable, ops)) {
kfree(ops);
@@ -9791,7 +9743,6 @@ static int nft_flowtable_event(unsigned long event, struct net_device *dev,
list_add_tail_rcu(&ops->list, &hook->ops_list);
break;
}
- nf_tables_flowtable_device_notify(flowtable, hook, dev, event);
break;
}
return 0;
diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c
index ae3fe87195ab..a88abae5a9de 100644
--- a/net/netfilter/nf_tables_trace.c
+++ b/net/netfilter/nf_tables_trace.c
@@ -127,6 +127,9 @@ static int nf_trace_fill_ct_info(struct sk_buff *nlskb,
if (nla_put_be32(nlskb, NFTA_TRACE_CT_ID, (__force __be32)id))
return -1;
+ /* Kernel implementation detail, withhold this from userspace for now */
+ status &= ~IPS_NAT_CLASH;
+
if (status && nla_put_be32(nlskb, NFTA_TRACE_CT_STATUS, htonl(status)))
return -1;
}
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index ac77fc21632d..e598a2a252b0 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -86,7 +86,6 @@ static const int nfnl_group2type[NFNLGRP_MAX+1] = {
[NFNLGRP_NFTABLES] = NFNL_SUBSYS_NFTABLES,
[NFNLGRP_ACCT_QUOTA] = NFNL_SUBSYS_ACCT,
[NFNLGRP_NFTRACE] = NFNL_SUBSYS_NFTABLES,
- [NFNLGRP_NFT_DEV] = NFNL_SUBSYS_NFTABLES,
};
static struct nfnl_net *nfnl_pernet(struct net *net)
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index eab4f476b47f..38d75484e531 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -461,11 +461,6 @@ static int cttimeout_default_get(struct sk_buff *skb,
case IPPROTO_UDPLITE:
timeouts = nf_udp_pernet(info->net)->timeouts;
break;
- case IPPROTO_DCCP:
-#ifdef CONFIG_NF_CT_PROTO_DCCP
- timeouts = nf_dccp_pernet(info->net)->dccp_timeout;
-#endif
- break;
case IPPROTO_ICMPV6:
timeouts = &nf_icmpv6_pernet(info->net)->timeout;
break;
diff --git a/net/netfilter/nfnetlink_hook.c b/net/netfilter/nfnetlink_hook.c
index ade8ee1988b1..92d869317cba 100644
--- a/net/netfilter/nfnetlink_hook.c
+++ b/net/netfilter/nfnetlink_hook.c
@@ -109,13 +109,30 @@ cancel_nest:
return -EMSGSIZE;
}
+static int nfnl_hook_put_nft_info_desc(struct sk_buff *nlskb, const char *tname,
+ const char *name, u8 family)
+{
+ struct nlattr *nest;
+
+ nest = nla_nest_start(nlskb, NFNLA_HOOK_INFO_DESC);
+ if (!nest ||
+ nla_put_string(nlskb, NFNLA_CHAIN_TABLE, tname) ||
+ nla_put_string(nlskb, NFNLA_CHAIN_NAME, name) ||
+ nla_put_u8(nlskb, NFNLA_CHAIN_FAMILY, family)) {
+ nla_nest_cancel(nlskb, nest);
+ return -EMSGSIZE;
+ }
+ nla_nest_end(nlskb, nest);
+ return 0;
+}
+
static int nfnl_hook_put_nft_chain_info(struct sk_buff *nlskb,
const struct nfnl_dump_hook_data *ctx,
unsigned int seq,
struct nft_chain *chain)
{
struct net *net = sock_net(nlskb->sk);
- struct nlattr *nest, *nest2;
+ struct nlattr *nest;
int ret = 0;
if (WARN_ON_ONCE(!chain))
@@ -128,29 +145,47 @@ static int nfnl_hook_put_nft_chain_info(struct sk_buff *nlskb,
if (!nest)
return -EMSGSIZE;
- nest2 = nla_nest_start(nlskb, NFNLA_HOOK_INFO_DESC);
- if (!nest2)
- goto cancel_nest;
+ ret = nfnl_hook_put_nft_info_desc(nlskb, chain->table->name,
+ chain->name, chain->table->family);
+ if (ret) {
+ nla_nest_cancel(nlskb, nest);
+ return ret;
+ }
- ret = nla_put_string(nlskb, NFNLA_CHAIN_TABLE, chain->table->name);
- if (ret)
- goto cancel_nest;
+ nla_nest_end(nlskb, nest);
+ return 0;
+}
- ret = nla_put_string(nlskb, NFNLA_CHAIN_NAME, chain->name);
- if (ret)
- goto cancel_nest;
+static int nfnl_hook_put_nft_ft_info(struct sk_buff *nlskb,
+ const struct nfnl_dump_hook_data *ctx,
+ unsigned int seq,
+ struct nf_flowtable *nf_ft)
+{
+ struct nft_flowtable *ft =
+ container_of(nf_ft, struct nft_flowtable, data);
+ struct net *net = sock_net(nlskb->sk);
+ struct nlattr *nest;
+ int ret = 0;
- ret = nla_put_u8(nlskb, NFNLA_CHAIN_FAMILY, chain->table->family);
- if (ret)
- goto cancel_nest;
+ if (WARN_ON_ONCE(!nf_ft))
+ return 0;
- nla_nest_end(nlskb, nest2);
- nla_nest_end(nlskb, nest);
- return ret;
+ if (!nft_is_active(net, ft))
+ return 0;
-cancel_nest:
- nla_nest_cancel(nlskb, nest);
- return -EMSGSIZE;
+ nest = nfnl_start_info_type(nlskb, NFNL_HOOK_TYPE_NFT_FLOWTABLE);
+ if (!nest)
+ return -EMSGSIZE;
+
+ ret = nfnl_hook_put_nft_info_desc(nlskb, ft->table->name,
+ ft->name, ft->table->family);
+ if (ret) {
+ nla_nest_cancel(nlskb, nest);
+ return ret;
+ }
+
+ nla_nest_end(nlskb, nest);
+ return 0;
}
static int nfnl_hook_dump_one(struct sk_buff *nlskb,
@@ -220,6 +255,9 @@ static int nfnl_hook_dump_one(struct sk_buff *nlskb,
case NF_HOOK_OP_BPF:
ret = nfnl_hook_put_bpf_prog_info(nlskb, ctx, seq, ops->priv);
break;
+ case NF_HOOK_OP_NFT_FT:
+ ret = nfnl_hook_put_nft_ft_info(nlskb, ctx, seq, ops->priv);
+ break;
case NF_HOOK_OP_UNDEFINED:
break;
default:
diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c
index 846d48ba8965..b16185e9a6dd 100644
--- a/net/netfilter/nft_chain_filter.c
+++ b/net/netfilter/nft_chain_filter.c
@@ -363,8 +363,6 @@ static int nft_netdev_event(unsigned long event, struct net_device *dev,
list_add_tail_rcu(&ops->list, &hook->ops_list);
break;
}
- nf_tables_chain_device_notify(&basechain->chain,
- hook, dev, event);
break;
}
return 0;
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 88922e0e8e83..7807d8129664 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -44,9 +44,9 @@ static int nft_dynset_expr_setup(const struct nft_dynset *priv,
return 0;
}
-static struct nft_elem_priv *nft_dynset_new(struct nft_set *set,
- const struct nft_expr *expr,
- struct nft_regs *regs)
+struct nft_elem_priv *nft_dynset_new(struct nft_set *set,
+ const struct nft_expr *expr,
+ struct nft_regs *regs)
{
const struct nft_dynset *priv = nft_expr_priv(expr);
struct nft_set_ext *ext;
@@ -91,8 +91,8 @@ void nft_dynset_eval(const struct nft_expr *expr,
return;
}
- if (set->ops->update(set, &regs->data[priv->sreg_key], nft_dynset_new,
- expr, regs, &ext)) {
+ ext = set->ops->update(set, &regs->data[priv->sreg_key], expr, regs);
+ if (ext) {
if (priv->op == NFT_DYNSET_OP_UPDATE &&
nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) &&
READ_ONCE(nft_set_ext_timeout(ext)->timeout) != 0) {
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index c74012c99125..7eedf4e3ae9c 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -407,6 +407,7 @@ err:
regs->verdict.code = NFT_BREAK;
}
+#ifdef CONFIG_NFT_EXTHDR_DCCP
static void nft_exthdr_dccp_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -482,6 +483,7 @@ static void nft_exthdr_dccp_eval(const struct nft_expr *expr,
err:
*dest = 0;
}
+#endif
static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = {
[NFTA_EXTHDR_DREG] = { .type = NLA_U32 },
@@ -634,6 +636,7 @@ static int nft_exthdr_ipv4_init(const struct nft_ctx *ctx,
return 0;
}
+#ifdef CONFIG_NFT_EXTHDR_DCCP
static int nft_exthdr_dccp_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
@@ -649,6 +652,7 @@ static int nft_exthdr_dccp_init(const struct nft_ctx *ctx,
return 0;
}
+#endif
static int nft_exthdr_dump_common(struct sk_buff *skb, const struct nft_exthdr *priv)
{
@@ -779,6 +783,7 @@ static const struct nft_expr_ops nft_exthdr_sctp_ops = {
.reduce = nft_exthdr_reduce,
};
+#ifdef CONFIG_NFT_EXTHDR_DCCP
static const struct nft_expr_ops nft_exthdr_dccp_ops = {
.type = &nft_exthdr_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
@@ -787,6 +792,7 @@ static const struct nft_expr_ops nft_exthdr_dccp_ops = {
.dump = nft_exthdr_dump,
.reduce = nft_exthdr_reduce,
};
+#endif
static const struct nft_expr_ops *
nft_exthdr_select_ops(const struct nft_ctx *ctx,
@@ -822,10 +828,12 @@ nft_exthdr_select_ops(const struct nft_ctx *ctx,
if (tb[NFTA_EXTHDR_DREG])
return &nft_exthdr_sctp_ops;
break;
+#ifdef CONFIG_NFT_EXTHDR_DCCP
case NFT_EXTHDR_OP_DCCP:
if (tb[NFTA_EXTHDR_DREG])
return &nft_exthdr_dccp_ops;
break;
+#endif
}
return ERR_PTR(-EOPNOTSUPP);
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index 63ef832b8aa7..40c602ffbcba 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -25,32 +25,33 @@ struct nft_lookup {
};
#ifdef CONFIG_MITIGATION_RETPOLINE
-bool nft_set_do_lookup(const struct net *net, const struct nft_set *set,
- const u32 *key, const struct nft_set_ext **ext)
+const struct nft_set_ext *
+nft_set_do_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key)
{
if (set->ops == &nft_set_hash_fast_type.ops)
- return nft_hash_lookup_fast(net, set, key, ext);
+ return nft_hash_lookup_fast(net, set, key);
if (set->ops == &nft_set_hash_type.ops)
- return nft_hash_lookup(net, set, key, ext);
+ return nft_hash_lookup(net, set, key);
if (set->ops == &nft_set_rhash_type.ops)
- return nft_rhash_lookup(net, set, key, ext);
+ return nft_rhash_lookup(net, set, key);
if (set->ops == &nft_set_bitmap_type.ops)
- return nft_bitmap_lookup(net, set, key, ext);
+ return nft_bitmap_lookup(net, set, key);
if (set->ops == &nft_set_pipapo_type.ops)
- return nft_pipapo_lookup(net, set, key, ext);
+ return nft_pipapo_lookup(net, set, key);
#if defined(CONFIG_X86_64) && !defined(CONFIG_UML)
if (set->ops == &nft_set_pipapo_avx2_type.ops)
- return nft_pipapo_avx2_lookup(net, set, key, ext);
+ return nft_pipapo_avx2_lookup(net, set, key);
#endif
if (set->ops == &nft_set_rbtree_type.ops)
- return nft_rbtree_lookup(net, set, key, ext);
+ return nft_rbtree_lookup(net, set, key);
WARN_ON_ONCE(1);
- return set->ops->lookup(net, set, key, ext);
+ return set->ops->lookup(net, set, key);
}
EXPORT_SYMBOL_GPL(nft_set_do_lookup);
#endif
@@ -61,12 +62,12 @@ void nft_lookup_eval(const struct nft_expr *expr,
{
const struct nft_lookup *priv = nft_expr_priv(expr);
const struct nft_set *set = priv->set;
- const struct nft_set_ext *ext = NULL;
const struct net *net = nft_net(pkt);
+ const struct nft_set_ext *ext;
bool found;
- found = nft_set_do_lookup(net, set, &regs->data[priv->sreg], &ext) ^
- priv->invert;
+ ext = nft_set_do_lookup(net, set, &regs->data[priv->sreg]);
+ found = !!ext ^ priv->invert;
if (!found) {
ext = nft_set_catchall_lookup(net, set);
if (!ext) {
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index 09da7a3f9f96..8ee66a86c3bc 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -111,10 +111,9 @@ void nft_objref_map_eval(const struct nft_expr *expr,
struct net *net = nft_net(pkt);
const struct nft_set_ext *ext;
struct nft_object *obj;
- bool found;
- found = nft_set_do_lookup(net, set, &regs->data[priv->sreg], &ext);
- if (!found) {
+ ext = nft_set_do_lookup(net, set, &regs->data[priv->sreg]);
+ if (!ext) {
ext = nft_set_catchall_lookup(net, set);
if (!ext) {
regs->verdict.code = NFT_BREAK;
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index 12390d2e994f..c24c922f895d 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -75,16 +75,21 @@ nft_bitmap_active(const u8 *bitmap, u32 idx, u32 off, u8 genmask)
}
INDIRECT_CALLABLE_SCOPE
-bool nft_bitmap_lookup(const struct net *net, const struct nft_set *set,
- const u32 *key, const struct nft_set_ext **ext)
+const struct nft_set_ext *
+nft_bitmap_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key)
{
const struct nft_bitmap *priv = nft_set_priv(set);
+ static const struct nft_set_ext found;
u8 genmask = nft_genmask_cur(net);
u32 idx, off;
nft_bitmap_location(set, key, &idx, &off);
- return nft_bitmap_active(priv->bitmap, idx, off, genmask);
+ if (nft_bitmap_active(priv->bitmap, idx, off, genmask))
+ return &found;
+
+ return NULL;
}
static struct nft_bitmap_elem *
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index abb0c8ec6371..266d0c637225 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -81,8 +81,9 @@ static const struct rhashtable_params nft_rhash_params = {
};
INDIRECT_CALLABLE_SCOPE
-bool nft_rhash_lookup(const struct net *net, const struct nft_set *set,
- const u32 *key, const struct nft_set_ext **ext)
+const struct nft_set_ext *
+nft_rhash_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key)
{
struct nft_rhash *priv = nft_set_priv(set);
const struct nft_rhash_elem *he;
@@ -95,9 +96,9 @@ bool nft_rhash_lookup(const struct net *net, const struct nft_set *set,
he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
if (he != NULL)
- *ext = &he->ext;
+ return &he->ext;
- return !!he;
+ return NULL;
}
static struct nft_elem_priv *
@@ -120,14 +121,9 @@ nft_rhash_get(const struct net *net, const struct nft_set *set,
return ERR_PTR(-ENOENT);
}
-static bool nft_rhash_update(struct nft_set *set, const u32 *key,
- struct nft_elem_priv *
- (*new)(struct nft_set *,
- const struct nft_expr *,
- struct nft_regs *regs),
- const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_set_ext **ext)
+static const struct nft_set_ext *
+nft_rhash_update(struct nft_set *set, const u32 *key,
+ const struct nft_expr *expr, struct nft_regs *regs)
{
struct nft_rhash *priv = nft_set_priv(set);
struct nft_rhash_elem *he, *prev;
@@ -143,7 +139,7 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key,
if (he != NULL)
goto out;
- elem_priv = new(set, expr, regs);
+ elem_priv = nft_dynset_new(set, expr, regs);
if (!elem_priv)
goto err1;
@@ -161,14 +157,13 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key,
}
out:
- *ext = &he->ext;
- return true;
+ return &he->ext;
err2:
nft_set_elem_destroy(set, &he->priv, true);
atomic_dec(&set->nelems);
err1:
- return false;
+ return NULL;
}
static int nft_rhash_insert(const struct net *net, const struct nft_set *set,
@@ -507,8 +502,9 @@ struct nft_hash_elem {
};
INDIRECT_CALLABLE_SCOPE
-bool nft_hash_lookup(const struct net *net, const struct nft_set *set,
- const u32 *key, const struct nft_set_ext **ext)
+const struct nft_set_ext *
+nft_hash_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key)
{
struct nft_hash *priv = nft_set_priv(set);
u8 genmask = nft_genmask_cur(net);
@@ -519,12 +515,10 @@ bool nft_hash_lookup(const struct net *net, const struct nft_set *set,
hash = reciprocal_scale(hash, priv->buckets);
hlist_for_each_entry_rcu(he, &priv->table[hash], node) {
if (!memcmp(nft_set_ext_key(&he->ext), key, set->klen) &&
- nft_set_elem_active(&he->ext, genmask)) {
- *ext = &he->ext;
- return true;
- }
+ nft_set_elem_active(&he->ext, genmask))
+ return &he->ext;
}
- return false;
+ return NULL;
}
static struct nft_elem_priv *
@@ -547,9 +541,9 @@ nft_hash_get(const struct net *net, const struct nft_set *set,
}
INDIRECT_CALLABLE_SCOPE
-bool nft_hash_lookup_fast(const struct net *net,
- const struct nft_set *set,
- const u32 *key, const struct nft_set_ext **ext)
+const struct nft_set_ext *
+nft_hash_lookup_fast(const struct net *net, const struct nft_set *set,
+ const u32 *key)
{
struct nft_hash *priv = nft_set_priv(set);
u8 genmask = nft_genmask_cur(net);
@@ -562,12 +556,10 @@ bool nft_hash_lookup_fast(const struct net *net,
hlist_for_each_entry_rcu(he, &priv->table[hash], node) {
k2 = *(u32 *)nft_set_ext_key(&he->ext)->data;
if (k1 == k2 &&
- nft_set_elem_active(&he->ext, genmask)) {
- *ext = &he->ext;
- return true;
- }
+ nft_set_elem_active(&he->ext, genmask))
+ return &he->ext;
}
- return false;
+ return NULL;
}
static u32 nft_jhash(const struct nft_set *set, const struct nft_hash *priv,
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index c5855069bdab..1a19649c2851 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -397,34 +397,36 @@ int pipapo_refill(unsigned long *map, unsigned int len, unsigned int rules,
}
/**
- * nft_pipapo_lookup() - Lookup function
- * @net: Network namespace
- * @set: nftables API set representation
- * @key: nftables API element representation containing key data
- * @ext: nftables API extension pointer, filled with matching reference
+ * pipapo_get() - Get matching element reference given key data
+ * @m: storage containing the set elements
+ * @data: Key data to be matched against existing elements
+ * @genmask: If set, check that element is active in given genmask
+ * @tstamp: timestamp to check for expired elements
*
* For more details, see DOC: Theory of Operation.
*
- * Return: true on match, false otherwise.
+ * This is the main lookup function. It matches key data against either
+ * the working match set or the uncommitted copy, depending on what the
+ * caller passed to us.
+ * nft_pipapo_get (lookup from userspace/control plane) and nft_pipapo_lookup
+ * (datapath lookup) pass the active copy.
+ * The insertion path will pass the uncommitted working copy.
+ *
+ * Return: pointer to &struct nft_pipapo_elem on match, NULL otherwise.
*/
-bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
- const u32 *key, const struct nft_set_ext **ext)
+static struct nft_pipapo_elem *pipapo_get(const struct nft_pipapo_match *m,
+ const u8 *data, u8 genmask,
+ u64 tstamp)
{
- struct nft_pipapo *priv = nft_set_priv(set);
struct nft_pipapo_scratch *scratch;
unsigned long *res_map, *fill_map;
- u8 genmask = nft_genmask_cur(net);
- const struct nft_pipapo_match *m;
const struct nft_pipapo_field *f;
- const u8 *rp = (const u8 *)key;
bool map_index;
int i;
local_bh_disable();
- m = rcu_dereference(priv->match);
-
- if (unlikely(!m || !*raw_cpu_ptr(m->scratch)))
+ if (unlikely(!raw_cpu_ptr(m->scratch)))
goto out;
scratch = *raw_cpu_ptr(m->scratch);
@@ -444,12 +446,12 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
* packet bytes value, then AND bucket value
*/
if (likely(f->bb == 8))
- pipapo_and_field_buckets_8bit(f, res_map, rp);
+ pipapo_and_field_buckets_8bit(f, res_map, data);
else
- pipapo_and_field_buckets_4bit(f, res_map, rp);
+ pipapo_and_field_buckets_4bit(f, res_map, data);
NFT_PIPAPO_GROUP_BITS_ARE_8_OR_4;
- rp += f->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f);
+ data += f->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f);
/* Now populate the bitmap for the next field, unless this is
* the last field, in which case return the matched 'ext'
@@ -465,13 +467,15 @@ next_match:
scratch->map_index = map_index;
local_bh_enable();
- return false;
+ return NULL;
}
if (last) {
- *ext = &f->mt[b].e->ext;
- if (unlikely(nft_set_elem_expired(*ext) ||
- !nft_set_elem_active(*ext, genmask)))
+ struct nft_pipapo_elem *e;
+
+ e = f->mt[b].e;
+ if (unlikely(__nft_set_elem_expired(&e->ext, tstamp) ||
+ !nft_set_elem_active(&e->ext, genmask)))
goto next_match;
/* Last field: we're just returning the key without
@@ -481,8 +485,7 @@ next_match:
*/
scratch->map_index = map_index;
local_bh_enable();
-
- return true;
+ return e;
}
/* Swap bitmap indices: res_map is the initial bitmap for the
@@ -492,112 +495,38 @@ next_match:
map_index = !map_index;
swap(res_map, fill_map);
- rp += NFT_PIPAPO_GROUPS_PADDING(f);
+ data += NFT_PIPAPO_GROUPS_PADDING(f);
}
out:
local_bh_enable();
- return false;
+ return NULL;
}
/**
- * pipapo_get() - Get matching element reference given key data
+ * nft_pipapo_lookup() - Dataplane fronted for main lookup function
* @net: Network namespace
* @set: nftables API set representation
- * @m: storage containing active/existing elements
- * @data: Key data to be matched against existing elements
- * @genmask: If set, check that element is active in given genmask
- * @tstamp: timestamp to check for expired elements
- * @gfp: the type of memory to allocate (see kmalloc).
+ * @key: pointer to nft registers containing key data
*
- * This is essentially the same as the lookup function, except that it matches
- * key data against the uncommitted copy and doesn't use preallocated maps for
- * bitmap results.
+ * This function is called from the data path. It will search for
+ * an element matching the given key in the current active copy.
*
- * Return: pointer to &struct nft_pipapo_elem on match, error pointer otherwise.
+ * Return: ntables API extension pointer or NULL if no match.
*/
-static struct nft_pipapo_elem *pipapo_get(const struct net *net,
- const struct nft_set *set,
- const struct nft_pipapo_match *m,
- const u8 *data, u8 genmask,
- u64 tstamp, gfp_t gfp)
+const struct nft_set_ext *
+nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key)
{
- struct nft_pipapo_elem *ret = ERR_PTR(-ENOENT);
- unsigned long *res_map, *fill_map = NULL;
- const struct nft_pipapo_field *f;
- int i;
-
- if (m->bsize_max == 0)
- return ret;
-
- res_map = kmalloc_array(m->bsize_max, sizeof(*res_map), gfp);
- if (!res_map) {
- ret = ERR_PTR(-ENOMEM);
- goto out;
- }
-
- fill_map = kcalloc(m->bsize_max, sizeof(*res_map), gfp);
- if (!fill_map) {
- ret = ERR_PTR(-ENOMEM);
- goto out;
- }
-
- pipapo_resmap_init(m, res_map);
-
- nft_pipapo_for_each_field(f, i, m) {
- bool last = i == m->field_count - 1;
- int b;
-
- /* For each bit group: select lookup table bucket depending on
- * packet bytes value, then AND bucket value
- */
- if (f->bb == 8)
- pipapo_and_field_buckets_8bit(f, res_map, data);
- else if (f->bb == 4)
- pipapo_and_field_buckets_4bit(f, res_map, data);
- else
- BUG();
-
- data += f->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f);
-
- /* Now populate the bitmap for the next field, unless this is
- * the last field, in which case return the matched 'ext'
- * pointer if any.
- *
- * Now res_map contains the matching bitmap, and fill_map is the
- * bitmap for the next field.
- */
-next_match:
- b = pipapo_refill(res_map, f->bsize, f->rules, fill_map, f->mt,
- last);
- if (b < 0)
- goto out;
-
- if (last) {
- if (__nft_set_elem_expired(&f->mt[b].e->ext, tstamp))
- goto next_match;
- if ((genmask &&
- !nft_set_elem_active(&f->mt[b].e->ext, genmask)))
- goto next_match;
-
- ret = f->mt[b].e;
- goto out;
- }
-
- data += NFT_PIPAPO_GROUPS_PADDING(f);
+ struct nft_pipapo *priv = nft_set_priv(set);
+ u8 genmask = nft_genmask_cur(net);
+ const struct nft_pipapo_match *m;
+ const struct nft_pipapo_elem *e;
- /* Swap bitmap indices: fill_map will be the initial bitmap for
- * the next field (i.e. the new res_map), and res_map is
- * guaranteed to be all-zeroes at this point, ready to be filled
- * according to the next mapping table.
- */
- swap(res_map, fill_map);
- }
+ m = rcu_dereference(priv->match);
+ e = pipapo_get(m, (const u8 *)key, genmask, get_jiffies_64());
-out:
- kfree(fill_map);
- kfree(res_map);
- return ret;
+ return e ? &e->ext : NULL;
}
/**
@@ -606,6 +535,11 @@ out:
* @set: nftables API set representation
* @elem: nftables API element representation containing key data
* @flags: Unused
+ *
+ * This function is called from the control plane path under
+ * RCU read lock.
+ *
+ * Return: set element private pointer or ERR_PTR(-ENOENT).
*/
static struct nft_elem_priv *
nft_pipapo_get(const struct net *net, const struct nft_set *set,
@@ -615,11 +549,10 @@ nft_pipapo_get(const struct net *net, const struct nft_set *set,
struct nft_pipapo_match *m = rcu_dereference(priv->match);
struct nft_pipapo_elem *e;
- e = pipapo_get(net, set, m, (const u8 *)elem->key.val.data,
- nft_genmask_cur(net), get_jiffies_64(),
- GFP_ATOMIC);
- if (IS_ERR(e))
- return ERR_CAST(e);
+ e = pipapo_get(m, (const u8 *)elem->key.val.data,
+ nft_genmask_cur(net), get_jiffies_64());
+ if (!e)
+ return ERR_PTR(-ENOENT);
return &e->priv;
}
@@ -1219,7 +1152,7 @@ static void pipapo_free_scratch(const struct nft_pipapo_match *m, unsigned int c
mem = s;
mem -= s->align_off;
- kfree(mem);
+ kvfree(mem);
}
/**
@@ -1240,10 +1173,9 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
void *scratch_aligned;
u32 align_off;
#endif
- scratch = kzalloc_node(struct_size(scratch, map,
- bsize_max * 2) +
- NFT_PIPAPO_ALIGN_HEADROOM,
- GFP_KERNEL_ACCOUNT, cpu_to_node(i));
+ scratch = kvzalloc_node(struct_size(scratch, map, bsize_max * 2) +
+ NFT_PIPAPO_ALIGN_HEADROOM,
+ GFP_KERNEL_ACCOUNT, cpu_to_node(i));
if (!scratch) {
/* On failure, there's no need to undo previous
* allocations: this means that some scratch maps have
@@ -1345,8 +1277,8 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
else
end = start;
- dup = pipapo_get(net, set, m, start, genmask, tstamp, GFP_KERNEL);
- if (!IS_ERR(dup)) {
+ dup = pipapo_get(m, start, genmask, tstamp);
+ if (dup) {
/* Check if we already have the same exact entry */
const struct nft_data *dup_key, *dup_end;
@@ -1365,15 +1297,9 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
return -ENOTEMPTY;
}
- if (PTR_ERR(dup) == -ENOENT) {
- /* Look for partially overlapping entries */
- dup = pipapo_get(net, set, m, end, nft_genmask_next(net), tstamp,
- GFP_KERNEL);
- }
-
- if (PTR_ERR(dup) != -ENOENT) {
- if (IS_ERR(dup))
- return PTR_ERR(dup);
+ /* Look for partially overlapping entries */
+ dup = pipapo_get(m, end, nft_genmask_next(net), tstamp);
+ if (dup) {
*elem_priv = &dup->priv;
return -ENOTEMPTY;
}
@@ -1914,9 +1840,9 @@ nft_pipapo_deactivate(const struct net *net, const struct nft_set *set,
if (!m)
return NULL;
- e = pipapo_get(net, set, m, (const u8 *)elem->key.val.data,
- nft_genmask_next(net), nft_net_tstamp(net), GFP_KERNEL);
- if (IS_ERR(e))
+ e = pipapo_get(m, (const u8 *)elem->key.val.data,
+ nft_genmask_next(net), nft_net_tstamp(net));
+ if (!e)
return NULL;
nft_set_elem_change_active(net, set, &e->ext);
diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c
index be7c16c79f71..db5d367e43c4 100644
--- a/net/netfilter/nft_set_pipapo_avx2.c
+++ b/net/netfilter/nft_set_pipapo_avx2.c
@@ -1137,7 +1137,6 @@ static inline void pipapo_resmap_init_avx2(const struct nft_pipapo_match *m, uns
* @net: Network namespace
* @set: nftables API set representation
* @key: nftables API element representation containing key data
- * @ext: nftables API extension pointer, filled with matching reference
*
* For more details, see DOC: Theory of Operation in nft_set_pipapo.c.
*
@@ -1146,8 +1145,9 @@ static inline void pipapo_resmap_init_avx2(const struct nft_pipapo_match *m, uns
*
* Return: true on match, false otherwise.
*/
-bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
- const u32 *key, const struct nft_set_ext **ext)
+const struct nft_set_ext *
+nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key)
{
struct nft_pipapo *priv = nft_set_priv(set);
struct nft_pipapo_scratch *scratch;
@@ -1155,17 +1155,18 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
const struct nft_pipapo_match *m;
const struct nft_pipapo_field *f;
const u8 *rp = (const u8 *)key;
+ const struct nft_set_ext *ext;
unsigned long *res, *fill;
bool map_index;
- int i, ret = 0;
+ int i;
local_bh_disable();
if (unlikely(!irq_fpu_usable())) {
- bool fallback_res = nft_pipapo_lookup(net, set, key, ext);
+ ext = nft_pipapo_lookup(net, set, key);
local_bh_enable();
- return fallback_res;
+ return ext;
}
m = rcu_dereference(priv->match);
@@ -1182,7 +1183,7 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
if (unlikely(!scratch)) {
kernel_fpu_end();
local_bh_enable();
- return false;
+ return NULL;
}
map_index = scratch->map_index;
@@ -1197,6 +1198,7 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
next_match:
nft_pipapo_for_each_field(f, i, m) {
bool last = i == m->field_count - 1, first = !i;
+ int ret = 0;
#define NFT_SET_PIPAPO_AVX2_LOOKUP(b, n) \
(ret = nft_pipapo_avx2_lookup_##b##b_##n(res, fill, f, \
@@ -1244,10 +1246,10 @@ next_match:
goto out;
if (last) {
- *ext = &f->mt[ret].e->ext;
- if (unlikely(nft_set_elem_expired(*ext) ||
- !nft_set_elem_active(*ext, genmask))) {
- ret = 0;
+ ext = &f->mt[ret].e->ext;
+ if (unlikely(nft_set_elem_expired(ext) ||
+ !nft_set_elem_active(ext, genmask))) {
+ ext = NULL;
goto next_match;
}
@@ -1264,5 +1266,5 @@ out:
kernel_fpu_end();
local_bh_enable();
- return ret >= 0;
+ return ext;
}
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 2e8ef16ff191..938a257c069e 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -52,9 +52,9 @@ static bool nft_rbtree_elem_expired(const struct nft_rbtree_elem *rbe)
return nft_set_elem_expired(&rbe->ext);
}
-static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
- const u32 *key, const struct nft_set_ext **ext,
- unsigned int seq)
+static const struct nft_set_ext *
+__nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key, unsigned int seq)
{
struct nft_rbtree *priv = nft_set_priv(set);
const struct nft_rbtree_elem *rbe, *interval = NULL;
@@ -65,7 +65,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
parent = rcu_dereference_raw(priv->root.rb_node);
while (parent != NULL) {
if (read_seqcount_retry(&priv->count, seq))
- return false;
+ return NULL;
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
@@ -87,50 +87,48 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
}
if (nft_rbtree_elem_expired(rbe))
- return false;
+ return NULL;
if (nft_rbtree_interval_end(rbe)) {
if (nft_set_is_anonymous(set))
- return false;
+ return NULL;
parent = rcu_dereference_raw(parent->rb_left);
interval = NULL;
continue;
}
- *ext = &rbe->ext;
- return true;
+ return &rbe->ext;
}
}
if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
nft_set_elem_active(&interval->ext, genmask) &&
!nft_rbtree_elem_expired(interval) &&
- nft_rbtree_interval_start(interval)) {
- *ext = &interval->ext;
- return true;
- }
+ nft_rbtree_interval_start(interval))
+ return &interval->ext;
- return false;
+ return NULL;
}
INDIRECT_CALLABLE_SCOPE
-bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
- const u32 *key, const struct nft_set_ext **ext)
+const struct nft_set_ext *
+nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key)
{
struct nft_rbtree *priv = nft_set_priv(set);
unsigned int seq = read_seqcount_begin(&priv->count);
- bool ret;
+ const struct nft_set_ext *ext;
- ret = __nft_rbtree_lookup(net, set, key, ext, seq);
- if (ret || !read_seqcount_retry(&priv->count, seq))
- return ret;
+ ext = __nft_rbtree_lookup(net, set, key, seq);
+ if (ext || !read_seqcount_retry(&priv->count, seq))
+ return ext;
read_lock_bh(&priv->lock);
seq = read_seqcount_begin(&priv->count);
- ret = __nft_rbtree_lookup(net, set, key, ext, seq);
+ ext = __nft_rbtree_lookup(net, set, key, seq);
read_unlock_bh(&priv->lock);
- return ret;
+ return ext;
}
static bool __nft_rbtree_get(const struct net *net, const struct nft_set *set,
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 709840612f0d..90b7630421c4 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1317,12 +1317,13 @@ void xt_compat_unlock(u_int8_t af)
EXPORT_SYMBOL_GPL(xt_compat_unlock);
#endif
-DEFINE_PER_CPU(seqcount_t, xt_recseq);
-EXPORT_PER_CPU_SYMBOL_GPL(xt_recseq);
-
struct static_key xt_tee_enabled __read_mostly;
EXPORT_SYMBOL_GPL(xt_tee_enabled);
+#ifdef CONFIG_NETFILTER_XTABLES_LEGACY
+DEFINE_PER_CPU(seqcount_t, xt_recseq);
+EXPORT_PER_CPU_SYMBOL_GPL(xt_recseq);
+
static int xt_jumpstack_alloc(struct xt_table_info *i)
{
unsigned int size;
@@ -1514,6 +1515,7 @@ void *xt_unregister_table(struct xt_table *table)
return private;
}
EXPORT_SYMBOL_GPL(xt_unregister_table);
+#endif
#ifdef CONFIG_PROC_FS
static void *xt_table_seq_start(struct seq_file *seq, loff_t *pos)
@@ -1897,6 +1899,7 @@ void xt_proto_fini(struct net *net, u_int8_t af)
}
EXPORT_SYMBOL_GPL(xt_proto_fini);
+#ifdef CONFIG_NETFILTER_XTABLES_LEGACY
/**
* xt_percpu_counter_alloc - allocate x_tables rule counter
*
@@ -1951,6 +1954,7 @@ void xt_percpu_counter_free(struct xt_counters *counters)
free_percpu((void __percpu *)pcnt);
}
EXPORT_SYMBOL_GPL(xt_percpu_counter_free);
+#endif
static int __net_init xt_net_init(struct net *net)
{
@@ -1983,8 +1987,10 @@ static int __init xt_init(void)
unsigned int i;
int rv;
- for_each_possible_cpu(i) {
- seqcount_init(&per_cpu(xt_recseq, i));
+ if (IS_ENABLED(CONFIG_NETFILTER_XTABLES_LEGACY)) {
+ for_each_possible_cpu(i) {
+ seqcount_init(&per_cpu(xt_recseq, i));
+ }
}
xt = kcalloc(NFPROTO_NUMPROTO, sizeof(struct xt_af), GFP_KERNEL);
diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c
index 7c6bf1c16813..0ca1cdfc4095 100644
--- a/net/netfilter/xt_nfacct.c
+++ b/net/netfilter/xt_nfacct.c
@@ -38,8 +38,8 @@ nfacct_mt_checkentry(const struct xt_mtchk_param *par)
nfacct = nfnl_acct_find_get(par->net, info->name);
if (nfacct == NULL) {
- pr_info_ratelimited("accounting object `%s' does not exists\n",
- info->name);
+ pr_info_ratelimited("accounting object `%.*s' does not exist\n",
+ NFACCT_NAME_MAX, info->name);
return -ENOENT;
}
info->nfacct = nfacct;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index e8972a857e51..5949855fa29e 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -387,7 +387,6 @@ static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
WARN_ON(skb->sk != NULL);
skb->sk = sk;
skb->destructor = netlink_skb_destructor;
- atomic_add(skb->truesize, &sk->sk_rmem_alloc);
sk_mem_charge(sk, skb->truesize);
}
@@ -1212,41 +1211,48 @@ struct sk_buff *netlink_alloc_large_skb(unsigned int size, int broadcast)
int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
long *timeo, struct sock *ssk)
{
+ DECLARE_WAITQUEUE(wait, current);
struct netlink_sock *nlk;
+ unsigned int rmem;
nlk = nlk_sk(sk);
+ rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc);
- if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
- test_bit(NETLINK_S_CONGESTED, &nlk->state))) {
- DECLARE_WAITQUEUE(wait, current);
- if (!*timeo) {
- if (!ssk || netlink_is_kernel(ssk))
- netlink_overrun(sk);
- sock_put(sk);
- kfree_skb(skb);
- return -EAGAIN;
- }
-
- __set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue(&nlk->wait, &wait);
+ if ((rmem == skb->truesize || rmem < READ_ONCE(sk->sk_rcvbuf)) &&
+ !test_bit(NETLINK_S_CONGESTED, &nlk->state)) {
+ netlink_skb_set_owner_r(skb, sk);
+ return 0;
+ }
- if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
- test_bit(NETLINK_S_CONGESTED, &nlk->state)) &&
- !sock_flag(sk, SOCK_DEAD))
- *timeo = schedule_timeout(*timeo);
+ atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
- __set_current_state(TASK_RUNNING);
- remove_wait_queue(&nlk->wait, &wait);
+ if (!*timeo) {
+ if (!ssk || netlink_is_kernel(ssk))
+ netlink_overrun(sk);
sock_put(sk);
+ kfree_skb(skb);
+ return -EAGAIN;
+ }
- if (signal_pending(current)) {
- kfree_skb(skb);
- return sock_intr_errno(*timeo);
- }
- return 1;
+ __set_current_state(TASK_INTERRUPTIBLE);
+ add_wait_queue(&nlk->wait, &wait);
+ rmem = atomic_read(&sk->sk_rmem_alloc);
+
+ if (((rmem && rmem + skb->truesize > READ_ONCE(sk->sk_rcvbuf)) ||
+ test_bit(NETLINK_S_CONGESTED, &nlk->state)) &&
+ !sock_flag(sk, SOCK_DEAD))
+ *timeo = schedule_timeout(*timeo);
+
+ __set_current_state(TASK_RUNNING);
+ remove_wait_queue(&nlk->wait, &wait);
+ sock_put(sk);
+
+ if (signal_pending(current)) {
+ kfree_skb(skb);
+ return sock_intr_errno(*timeo);
}
- netlink_skb_set_owner_r(skb, sk);
- return 0;
+
+ return 1;
}
static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
@@ -1307,6 +1313,7 @@ static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb,
ret = -ECONNREFUSED;
if (nlk->netlink_rcv != NULL) {
ret = skb->len;
+ atomic_add(skb->truesize, &sk->sk_rmem_alloc);
netlink_skb_set_owner_r(skb, sk);
NETLINK_CB(skb).sk = ssk;
netlink_deliver_tap_kernel(sk, ssk, skb);
@@ -1383,13 +1390,19 @@ EXPORT_SYMBOL_GPL(netlink_strict_get_check);
static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
{
struct netlink_sock *nlk = nlk_sk(sk);
+ unsigned int rmem, rcvbuf;
- if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
+ rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc);
+ rcvbuf = READ_ONCE(sk->sk_rcvbuf);
+
+ if ((rmem == skb->truesize || rmem <= rcvbuf) &&
!test_bit(NETLINK_S_CONGESTED, &nlk->state)) {
netlink_skb_set_owner_r(skb, sk);
__netlink_sendskb(sk, skb);
- return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1);
+ return rmem > (rcvbuf >> 1);
}
+
+ atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
return -1;
}
@@ -2245,6 +2258,7 @@ static int netlink_dump(struct sock *sk, bool lock_taken)
struct netlink_ext_ack extack = {};
struct netlink_callback *cb;
struct sk_buff *skb = NULL;
+ unsigned int rmem, rcvbuf;
size_t max_recvmsg_len;
struct module *module;
int err = -ENOBUFS;
@@ -2258,9 +2272,6 @@ static int netlink_dump(struct sock *sk, bool lock_taken)
goto errout_skb;
}
- if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
- goto errout_skb;
-
/* NLMSG_GOODSIZE is small to avoid high order allocations being
* required, but it makes sense to _attempt_ a 32KiB allocation
* to reduce number of system calls on dump operations, if user
@@ -2283,6 +2294,13 @@ static int netlink_dump(struct sock *sk, bool lock_taken)
if (!skb)
goto errout_skb;
+ rcvbuf = READ_ONCE(sk->sk_rcvbuf);
+ rmem = atomic_add_return(skb->truesize, &sk->sk_rmem_alloc);
+ if (rmem != skb->truesize && rmem >= rcvbuf) {
+ atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
+ goto errout_skb;
+ }
+
/* Trim skb to allocated size. User is expected to provide buffer as
* large as max(min_dump_alloc, 32KiB (max_recvmsg_len capped at
* netlink_recvmsg())). dump will pack as many smaller messages as
@@ -2455,7 +2473,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
unsigned int flags = 0;
size_t tlvlen;
- /* Error messages get the original request appened, unless the user
+ /* Error messages get the original request appended, unless the user
* requests to cap the error message, and get extra error data if
* requested.
*/
@@ -2869,8 +2887,7 @@ static const struct rhashtable_params netlink_rhashtable_params = {
};
#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
-BTF_ID_LIST(btf_netlink_sock_id)
-BTF_ID(struct, netlink_sock)
+BTF_ID_LIST_SINGLE(btf_netlink_sock_id, struct, netlink_sock)
static const struct bpf_iter_seq_info netlink_seq_info = {
.seq_ops = &netlink_seq_ops,
diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c
index ed1508a9e093..aab107727f18 100644
--- a/net/nfc/nci/uart.c
+++ b/net/nfc/nci/uart.c
@@ -119,22 +119,22 @@ static int nci_uart_set_driver(struct tty_struct *tty, unsigned int driver)
memcpy(nu, nci_uart_drivers[driver], sizeof(struct nci_uart));
nu->tty = tty;
- tty->disc_data = nu;
skb_queue_head_init(&nu->tx_q);
INIT_WORK(&nu->write_work, nci_uart_write_work);
spin_lock_init(&nu->rx_lock);
ret = nu->ops.open(nu);
if (ret) {
- tty->disc_data = NULL;
kfree(nu);
+ return ret;
} else if (!try_module_get(nu->owner)) {
nu->ops.close(nu);
- tty->disc_data = NULL;
kfree(nu);
return -ENOENT;
}
- return ret;
+ tty->disc_data = nu;
+
+ return 0;
}
/* ------ LDISC part ------ */
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index 6a40b8d0350d..a18e2c503da6 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -1192,7 +1192,7 @@ static int nfc_genl_llc_sdreq(struct sk_buff *skb, struct genl_info *info)
continue;
uri = nla_data(sdp_attrs[NFC_SDP_ATTR_URI]);
- if (uri == NULL || *uri == 0)
+ if (*uri == 0)
continue;
tid = local->sdreq_next_tid++;
@@ -1540,10 +1540,6 @@ static int nfc_genl_se_io(struct sk_buff *skb, struct genl_info *info)
}
apdu = nla_data(info->attrs[NFC_ATTR_SE_APDU]);
- if (!apdu) {
- rc = -EINVAL;
- goto put_dev;
- }
ctx = kzalloc(sizeof(struct se_io_ctx), GFP_KERNEL);
if (!ctx) {
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index e7269a3eec79..2832e0794197 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -39,16 +39,14 @@
#include "flow_netlink.h"
#include "openvswitch_trace.h"
-DEFINE_PER_CPU(struct ovs_pcpu_storage, ovs_pcpu_storage) = {
- .bh_lock = INIT_LOCAL_LOCK(bh_lock),
-};
+struct ovs_pcpu_storage __percpu *ovs_pcpu_storage;
/* Make a clone of the 'key', using the pre-allocated percpu 'flow_keys'
* space. Return NULL if out of key spaces.
*/
static struct sw_flow_key *clone_key(const struct sw_flow_key *key_)
{
- struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(&ovs_pcpu_storage);
+ struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(ovs_pcpu_storage);
struct action_flow_keys *keys = &ovs_pcpu->flow_keys;
int level = ovs_pcpu->exec_level;
struct sw_flow_key *key = NULL;
@@ -94,7 +92,7 @@ static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
const struct nlattr *actions,
const int actions_len)
{
- struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage.action_fifos);
+ struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage->action_fifos);
struct deferred_action *da;
da = action_fifo_put(fifo);
@@ -755,7 +753,7 @@ static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key,
static int ovs_vport_output(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
- struct ovs_frag_data *data = this_cpu_ptr(&ovs_pcpu_storage.frag_data);
+ struct ovs_frag_data *data = this_cpu_ptr(&ovs_pcpu_storage->frag_data);
struct vport *vport = data->vport;
if (skb_cow_head(skb, data->l2_len) < 0) {
@@ -807,7 +805,7 @@ static void prepare_frag(struct vport *vport, struct sk_buff *skb,
unsigned int hlen = skb_network_offset(skb);
struct ovs_frag_data *data;
- data = this_cpu_ptr(&ovs_pcpu_storage.frag_data);
+ data = this_cpu_ptr(&ovs_pcpu_storage->frag_data);
data->dst = skb->_skb_refdst;
data->vport = vport;
data->cb = *OVS_CB(skb);
@@ -943,8 +941,10 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
break;
case OVS_USERSPACE_ATTR_PID:
- if (dp->user_features &
- OVS_DP_F_DISPATCH_UPCALL_PER_CPU)
+ if (OVS_CB(skb)->upcall_pid)
+ upcall.portid = OVS_CB(skb)->upcall_pid;
+ else if (dp->user_features &
+ OVS_DP_F_DISPATCH_UPCALL_PER_CPU)
upcall.portid =
ovs_dp_get_upcall_portid(dp,
smp_processor_id());
@@ -1566,16 +1566,15 @@ static int clone_execute(struct datapath *dp, struct sk_buff *skb,
clone = clone_flow_key ? clone_key(key) : key;
if (clone) {
int err = 0;
-
if (actions) { /* Sample action */
if (clone_flow_key)
- __this_cpu_inc(ovs_pcpu_storage.exec_level);
+ __this_cpu_inc(ovs_pcpu_storage->exec_level);
err = do_execute_actions(dp, skb, clone,
actions, len);
if (clone_flow_key)
- __this_cpu_dec(ovs_pcpu_storage.exec_level);
+ __this_cpu_dec(ovs_pcpu_storage->exec_level);
} else { /* Recirc action */
clone->recirc_id = recirc_id;
ovs_dp_process_packet(skb, clone);
@@ -1611,7 +1610,7 @@ static int clone_execute(struct datapath *dp, struct sk_buff *skb,
static void process_deferred_actions(struct datapath *dp)
{
- struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage.action_fifos);
+ struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage->action_fifos);
/* Do not touch the FIFO in case there is no deferred actions. */
if (action_fifo_is_empty(fifo))
@@ -1642,7 +1641,7 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
{
int err, level;
- level = __this_cpu_inc_return(ovs_pcpu_storage.exec_level);
+ level = __this_cpu_inc_return(ovs_pcpu_storage->exec_level);
if (unlikely(level > OVS_RECURSION_LIMIT)) {
net_crit_ratelimited("ovs: recursion limit reached on datapath %s, probable configuration error\n",
ovs_dp_name(dp));
@@ -1659,6 +1658,6 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
process_deferred_actions(dp);
out:
- __this_cpu_dec(ovs_pcpu_storage.exec_level);
+ __this_cpu_dec(ovs_pcpu_storage->exec_level);
return err;
}
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 6a304ae2d959..d5b6e2002bc1 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -244,7 +244,7 @@ void ovs_dp_detach_port(struct vport *p)
/* Must be called with rcu_read_lock. */
void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
{
- struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(&ovs_pcpu_storage);
+ struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(ovs_pcpu_storage);
const struct vport *p = OVS_CB(skb)->input_vport;
struct datapath *dp = p->dp;
struct sw_flow *flow;
@@ -267,7 +267,9 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
memset(&upcall, 0, sizeof(upcall));
upcall.cmd = OVS_PACKET_CMD_MISS;
- if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU)
+ if (OVS_CB(skb)->upcall_pid)
+ upcall.portid = OVS_CB(skb)->upcall_pid;
+ else if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU)
upcall.portid =
ovs_dp_get_upcall_portid(dp, smp_processor_id());
else
@@ -299,7 +301,7 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
* avoided.
*/
if (IS_ENABLED(CONFIG_PREEMPT_RT) && ovs_pcpu->owner != current) {
- local_lock_nested_bh(&ovs_pcpu_storage.bh_lock);
+ local_lock_nested_bh(&ovs_pcpu_storage->bh_lock);
ovs_pcpu->owner = current;
ovs_pcpu_locked = true;
}
@@ -310,7 +312,7 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
ovs_dp_name(dp), error);
if (ovs_pcpu_locked) {
ovs_pcpu->owner = NULL;
- local_unlock_nested_bh(&ovs_pcpu_storage.bh_lock);
+ local_unlock_nested_bh(&ovs_pcpu_storage->bh_lock);
}
stats_counter = &stats->n_hit;
@@ -651,6 +653,9 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
!!(hash & OVS_PACKET_HASH_L4_BIT));
}
+ OVS_CB(packet)->upcall_pid =
+ nla_get_u32_default(a[OVS_PACKET_ATTR_UPCALL_PID], 0);
+
/* Build an sw_flow for sending this packet. */
flow = ovs_flow_alloc();
err = PTR_ERR(flow);
@@ -689,13 +694,13 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
sf_acts = rcu_dereference(flow->sf_acts);
local_bh_disable();
- local_lock_nested_bh(&ovs_pcpu_storage.bh_lock);
+ local_lock_nested_bh(&ovs_pcpu_storage->bh_lock);
if (IS_ENABLED(CONFIG_PREEMPT_RT))
- this_cpu_write(ovs_pcpu_storage.owner, current);
+ this_cpu_write(ovs_pcpu_storage->owner, current);
err = ovs_execute_actions(dp, packet, sf_acts, &flow->key);
if (IS_ENABLED(CONFIG_PREEMPT_RT))
- this_cpu_write(ovs_pcpu_storage.owner, NULL);
- local_unlock_nested_bh(&ovs_pcpu_storage.bh_lock);
+ this_cpu_write(ovs_pcpu_storage->owner, NULL);
+ local_unlock_nested_bh(&ovs_pcpu_storage->bh_lock);
local_bh_enable();
rcu_read_unlock();
@@ -719,6 +724,7 @@ static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
[OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG },
[OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 },
[OVS_PACKET_ATTR_HASH] = { .type = NLA_U64 },
+ [OVS_PACKET_ATTR_UPCALL_PID] = { .type = NLA_U32 },
};
static const struct genl_small_ops dp_packet_genl_ops[] = {
@@ -2744,6 +2750,28 @@ static struct drop_reason_list drop_reason_list_ovs = {
.n_reasons = ARRAY_SIZE(ovs_drop_reasons),
};
+static int __init ovs_alloc_percpu_storage(void)
+{
+ unsigned int cpu;
+
+ ovs_pcpu_storage = alloc_percpu(*ovs_pcpu_storage);
+ if (!ovs_pcpu_storage)
+ return -ENOMEM;
+
+ for_each_possible_cpu(cpu) {
+ struct ovs_pcpu_storage *ovs_pcpu;
+
+ ovs_pcpu = per_cpu_ptr(ovs_pcpu_storage, cpu);
+ local_lock_init(&ovs_pcpu->bh_lock);
+ }
+ return 0;
+}
+
+static void ovs_free_percpu_storage(void)
+{
+ free_percpu(ovs_pcpu_storage);
+}
+
static int __init dp_init(void)
{
int err;
@@ -2753,6 +2781,10 @@ static int __init dp_init(void)
pr_info("Open vSwitch switching datapath\n");
+ err = ovs_alloc_percpu_storage();
+ if (err)
+ goto error;
+
err = ovs_internal_dev_rtnl_link_register();
if (err)
goto error;
@@ -2799,6 +2831,7 @@ error_flow_exit:
error_unreg_rtnl_link:
ovs_internal_dev_rtnl_link_unregister();
error:
+ ovs_free_percpu_storage();
return err;
}
@@ -2813,6 +2846,7 @@ static void dp_cleanup(void)
ovs_vport_exit();
ovs_flow_exit();
ovs_internal_dev_rtnl_link_unregister();
+ ovs_free_percpu_storage();
}
module_init(dp_init);
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 1b5348b0f559..db0c3e69d66c 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -121,6 +121,8 @@ struct datapath {
* @cutlen: The number of bytes from the packet end to be removed.
* @probability: The sampling probability that was applied to this skb; 0 means
* no sampling has occurred; U32_MAX means 100% probability.
+ * @upcall_pid: Netlink socket PID to use for sending this packet to userspace;
+ * 0 means "not set" and default per-CPU or per-vport dispatch should be used.
*/
struct ovs_skb_cb {
struct vport *input_vport;
@@ -128,6 +130,7 @@ struct ovs_skb_cb {
u16 acts_origlen;
u32 cutlen;
u32 probability;
+ u32 upcall_pid;
};
#define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
@@ -220,7 +223,8 @@ struct ovs_pcpu_storage {
struct task_struct *owner;
local_lock_t bh_lock;
};
-DECLARE_PER_CPU(struct ovs_pcpu_storage, ovs_pcpu_storage);
+
+extern struct ovs_pcpu_storage __percpu *ovs_pcpu_storage;
/**
* enum ovs_pkt_hash_types - hash info to include with a packet
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 8732f6e51ae5..6bbbc16ab778 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -501,6 +501,7 @@ int ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
OVS_CB(skb)->mru = 0;
OVS_CB(skb)->cutlen = 0;
OVS_CB(skb)->probability = 0;
+ OVS_CB(skb)->upcall_pid = 0;
if (unlikely(dev_net(skb->dev) != ovs_dp_get_net(vport->dp))) {
u32 mark;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 3d43f3eae759..bc438d0d96a7 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2785,7 +2785,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
int len_sum = 0;
int status = TP_STATUS_AVAILABLE;
int hlen, tlen, copylen = 0;
- long timeo = 0;
+ long timeo;
mutex_lock(&po->pg_vec_lock);
@@ -2839,22 +2839,28 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !vnet_hdr_sz)
size_max = dev->mtu + reserve + VLAN_HLEN;
+ timeo = sock_sndtimeo(&po->sk, msg->msg_flags & MSG_DONTWAIT);
reinit_completion(&po->skb_completion);
do {
ph = packet_current_frame(po, &po->tx_ring,
TP_STATUS_SEND_REQUEST);
if (unlikely(ph == NULL)) {
- if (need_wait && skb) {
- timeo = sock_sndtimeo(&po->sk, msg->msg_flags & MSG_DONTWAIT);
+ /* Note: packet_read_pending() might be slow if we
+ * have to call it as it's per_cpu variable, but in
+ * fast-path we don't have to call it, only when ph
+ * is NULL, we need to check the pending_refcnt.
+ */
+ if (need_wait && packet_read_pending(&po->tx_ring)) {
timeo = wait_for_completion_interruptible_timeout(&po->skb_completion, timeo);
if (timeo <= 0) {
err = !timeo ? -ETIMEDOUT : -ERESTARTSYS;
goto out_put;
}
- }
- /* check for additional frames */
- continue;
+ /* check for additional frames */
+ continue;
+ } else
+ break;
}
skb = NULL;
@@ -2943,14 +2949,7 @@ tpacket_error:
}
packet_increment_head(&po->tx_ring);
len_sum += tp_len;
- } while (likely((ph != NULL) ||
- /* Note: packet_read_pending() might be slow if we have
- * to call it as it's per_cpu variable, but in fast-path
- * we already short-circuit the loop with the first
- * condition, and luckily don't have to go that path
- * anyway.
- */
- (need_wait && packet_read_pending(&po->tx_ring))));
+ } while (1);
err = len_sum;
goto out_put;
@@ -4783,7 +4782,7 @@ static int packet_seq_show(struct seq_file *seq, void *v)
READ_ONCE(po->ifindex),
packet_sock_flag(po, PACKET_SOCK_RUNNING),
atomic_read(&s->sk_rmem_alloc),
- from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)),
+ from_kuid_munged(seq_user_ns(seq), sk_uid(s)),
sock_i_ino(s));
}
diff --git a/net/packet/diag.c b/net/packet/diag.c
index 47f69f3dbf73..6ce1dcc284d9 100644
--- a/net/packet/diag.c
+++ b/net/packet/diag.c
@@ -153,7 +153,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
if ((req->pdiag_show & PACKET_SHOW_INFO) &&
nla_put_u32(skb, PACKET_DIAG_UID,
- from_kuid_munged(user_ns, sock_i_uid(sk))))
+ from_kuid_munged(user_ns, sk_uid(sk))))
goto out_nlmsg_trim;
if ((req->pdiag_show & PACKET_SHOW_MCLIST) &&
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 53a858478e22..62527e1ebb88 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -826,6 +826,7 @@ static struct sock *pep_sock_accept(struct sock *sk,
}
/* Check for duplicate pipe handle */
+ pn_skb_get_dst_sockaddr(skb, &dst);
newsk = pep_find_pipe(&pn->hlist, &dst, pipe_handle);
if (unlikely(newsk)) {
__sock_put(newsk);
@@ -850,7 +851,6 @@ static struct sock *pep_sock_accept(struct sock *sk,
newsk->sk_destruct = pipe_destruct;
newpn = pep_sk(newsk);
- pn_skb_get_dst_sockaddr(skb, &dst);
pn_skb_get_src_sockaddr(skb, &src);
newpn->pn_sk.sobject = pn_sockaddr_get_object(&dst);
newpn->pn_sk.dobject = pn_sockaddr_get_object(&src);
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index 5ce0b3ee5def..ea4d5e6533db 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -584,7 +584,7 @@ static int pn_sock_seq_show(struct seq_file *seq, void *v)
sk->sk_protocol, pn->sobject, pn->dobject,
pn->resource, sk->sk_state,
sk_wmem_alloc_get(sk), sk_rmem_alloc_get(sk),
- from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)),
+ from_kuid_munged(seq_user_ns(seq), sk_uid(sk)),
sock_i_ino(sk),
refcount_read(&sk->sk_refcnt), sk,
atomic_read(&sk->sk_drops));
@@ -755,7 +755,7 @@ static int pn_res_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, "%02X %5u %lu",
(int) (psk - pnres.sk),
- from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)),
+ from_kuid_munged(seq_user_ns(seq), sk_uid(sk)),
sock_i_ino(sk));
}
seq_pad(seq, '\n');
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 8435a20968ef..086a13170e09 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -598,7 +598,7 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr,
}
if (addr_type & IPV6_ADDR_LINKLOCAL) {
- /* If socket is arleady bound to a link local address,
+ /* If socket is already bound to a link local address,
* the peer address must be on the same link.
*/
if (sin6->sin6_scope_id == 0 ||
diff --git a/net/rds/send.c b/net/rds/send.c
index 09a280110654..42d991bc8543 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -232,7 +232,7 @@ restart:
* If not already working on one, grab the next message.
*
* cp_xmit_rm holds a ref while we're sending this message down
- * the connction. We can use this ref while holding the
+ * the connection. We can use this ref while holding the
* send_sem.. rds_send_reset() is serialized with it.
*/
if (!rm) {
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index d89bd8d0c354..91e34af3fe5d 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -105,10 +105,6 @@ int rds_tcp_accept_one(struct socket *sock)
int conn_state;
struct rds_conn_path *cp;
struct in6_addr *my_addr, *peer_addr;
- struct proto_accept_arg arg = {
- .flags = O_NONBLOCK,
- .kern = true,
- };
#if !IS_ENABLED(CONFIG_IPV6)
struct in6_addr saddr, daddr;
#endif
@@ -117,25 +113,9 @@ int rds_tcp_accept_one(struct socket *sock)
if (!sock) /* module unload or netns delete in progress */
return -ENETUNREACH;
- ret = sock_create_lite(sock->sk->sk_family,
- sock->sk->sk_type, sock->sk->sk_protocol,
- &new_sock);
+ ret = kernel_accept(sock, &new_sock, O_NONBLOCK);
if (ret)
- goto out;
-
- ret = sock->ops->accept(sock, new_sock, &arg);
- if (ret < 0)
- goto out;
-
- /* sock_create_lite() does not get a hold on the owner module so we
- * need to do it here. Note that sock_release() uses sock->ops to
- * determine if it needs to decrement the reference count. So set
- * sock->ops after calling accept() in case that fails. And there's
- * no need to do try_module_get() as the listener should have a hold
- * already.
- */
- new_sock->ops = sock->ops;
- __module_get(new_sock->ops->owner);
+ return ret;
rds_tcp_keepalive(new_sock);
if (!rds_tcp_tune(new_sock)) {
@@ -298,15 +278,15 @@ struct socket *rds_tcp_listen_init(struct net *net, bool isv6)
sin6 = (struct sockaddr_in6 *)&ss;
sin6->sin6_family = PF_INET6;
sin6->sin6_addr = in6addr_any;
- sin6->sin6_port = (__force u16)htons(RDS_TCP_PORT);
+ sin6->sin6_port = htons(RDS_TCP_PORT);
sin6->sin6_scope_id = 0;
sin6->sin6_flowinfo = 0;
addr_len = sizeof(*sin6);
} else {
sin = (struct sockaddr_in *)&ss;
sin->sin_family = PF_INET;
- sin->sin_addr.s_addr = INADDR_ANY;
- sin->sin_port = (__force u16)htons(RDS_TCP_PORT);
+ sin->sin_addr.s_addr = htonl(INADDR_ANY);
+ sin->sin_port = htons(RDS_TCP_PORT);
addr_len = sizeof(*sin);
}
diff --git a/net/rose/rose_in.c b/net/rose/rose_in.c
index 4d67f36dce1b..3e99181e759f 100644
--- a/net/rose/rose_in.c
+++ b/net/rose/rose_in.c
@@ -101,6 +101,7 @@ static int rose_state2_machine(struct sock *sk, struct sk_buff *skb, int framety
*/
static int rose_state3_machine(struct sock *sk, struct sk_buff *skb, int frametype, int ns, int nr, int q, int d, int m)
{
+ enum skb_drop_reason dr; /* ignored */
struct rose_sock *rose = rose_sk(sk);
int queued = 0;
@@ -162,7 +163,7 @@ static int rose_state3_machine(struct sock *sk, struct sk_buff *skb, int framety
rose_frames_acked(sk, nr);
if (ns == rose->vr) {
rose_start_idletimer(sk);
- if (sk_filter_trim_cap(sk, skb, ROSE_MIN_LEN) == 0 &&
+ if (!sk_filter_trim_cap(sk, skb, ROSE_MIN_LEN, &dr) &&
__sock_queue_rcv_skb(sk, skb) == 0) {
rose->vr = (rose->vr + 1) % ROSE_MODULUS;
queued = 1;
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index 2dd6bd3a3011..b72bf8a08d48 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -497,22 +497,15 @@ void rose_rt_device_down(struct net_device *dev)
t = rose_node;
rose_node = rose_node->next;
- for (i = 0; i < t->count; i++) {
+ for (i = t->count - 1; i >= 0; i--) {
if (t->neighbour[i] != s)
continue;
t->count--;
- switch (i) {
- case 0:
- t->neighbour[0] = t->neighbour[1];
- fallthrough;
- case 1:
- t->neighbour[1] = t->neighbour[2];
- break;
- case 2:
- break;
- }
+ memmove(&t->neighbour[i], &t->neighbour[i + 1],
+ sizeof(t->neighbour[0]) *
+ (t->count - i));
}
if (t->count <= 0)
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 5bd3922c310d..5b7342d43486 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -44,6 +44,7 @@ enum rxrpc_skb_mark {
RXRPC_SKB_MARK_SERVICE_CONN_SECURED, /* Service connection response has been verified */
RXRPC_SKB_MARK_REJECT_BUSY, /* Reject with BUSY */
RXRPC_SKB_MARK_REJECT_ABORT, /* Reject with ABORT (code in skb->priority) */
+ RXRPC_SKB_MARK_REJECT_CONN_ABORT, /* Reject with connection ABORT (code in skb->priority) */
};
/*
@@ -361,12 +362,15 @@ struct rxrpc_local {
struct list_head new_client_calls; /* Newly created client calls need connection */
spinlock_t client_call_lock; /* Lock for ->new_client_calls */
struct sockaddr_rxrpc srx; /* local address */
- /* Provide a kvec table sufficiently large to manage either a DATA
- * packet with a maximum set of jumbo subpackets or a PING ACK padded
- * out to 64K with zeropages for PMTUD.
- */
- struct kvec kvec[1 + RXRPC_MAX_NR_JUMBO > 3 + 16 ?
- 1 + RXRPC_MAX_NR_JUMBO : 3 + 16];
+ union {
+ /* Provide a kvec table sufficiently large to manage either a
+ * DATA packet with a maximum set of jumbo subpackets or a PING
+ * ACK padded out to 64K with zeropages for PMTUD.
+ */
+ struct kvec kvec[1 + RXRPC_MAX_NR_JUMBO > 3 + 16 ?
+ 1 + RXRPC_MAX_NR_JUMBO : 3 + 16];
+ struct bio_vec bvec[3 + 16];
+ };
};
/*
@@ -1250,6 +1254,8 @@ int rxrpc_encap_rcv(struct sock *, struct sk_buff *);
void rxrpc_error_report(struct sock *);
bool rxrpc_direct_abort(struct sk_buff *skb, enum rxrpc_abort_reason why,
s32 abort_code, int err);
+bool rxrpc_direct_conn_abort(struct sk_buff *skb, enum rxrpc_abort_reason why,
+ s32 abort_code, int err);
int rxrpc_io_thread(void *data);
void rxrpc_post_response(struct rxrpc_connection *conn, struct sk_buff *skb);
static inline void rxrpc_wake_up_io_thread(struct rxrpc_local *local)
@@ -1380,6 +1386,7 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *,
const struct sockaddr_rxrpc *);
struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local,
struct sockaddr_rxrpc *srx, gfp_t gfp);
+void rxrpc_assess_MTU_size(struct rxrpc_local *local, struct rxrpc_peer *peer);
struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *, gfp_t,
enum rxrpc_peer_trace);
void rxrpc_new_incoming_peer(struct rxrpc_local *local, struct rxrpc_peer *peer);
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index a4b363b47cca..00982a030744 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -149,6 +149,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
id_in_use:
write_unlock(&rx->call_lock);
+ rxrpc_prefail_call(call, RXRPC_CALL_LOCAL_ERROR, -EBADSLT);
rxrpc_cleanup_call(call);
_leave(" = -EBADSLT");
return -EBADSLT;
@@ -218,6 +219,7 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx)
tail = b->call_backlog_tail;
while (CIRC_CNT(head, tail, size) > 0) {
struct rxrpc_call *call = b->call_backlog[tail];
+ rxrpc_see_call(call, rxrpc_call_see_discard);
rcu_assign_pointer(call->socket, rx);
if (rx->app_ops &&
rx->app_ops->discard_new_call) {
@@ -254,6 +256,9 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
unsigned short call_tail, conn_tail, peer_tail;
unsigned short call_count, conn_count;
+ if (!b)
+ return NULL;
+
/* #calls >= #conns >= #peers must hold true. */
call_head = smp_load_acquire(&b->call_backlog_head);
call_tail = b->call_backlog_tail;
@@ -369,8 +374,8 @@ bool rxrpc_new_incoming_call(struct rxrpc_local *local,
spin_lock(&rx->incoming_lock);
if (rx->sk.sk_state == RXRPC_SERVER_LISTEN_DISABLED ||
rx->sk.sk_state == RXRPC_CLOSE) {
- rxrpc_direct_abort(skb, rxrpc_abort_shut_down,
- RX_INVALID_OPERATION, -ESHUTDOWN);
+ rxrpc_direct_conn_abort(skb, rxrpc_abort_shut_down,
+ RX_INVALID_OPERATION, -ESHUTDOWN);
goto no_call;
}
@@ -402,6 +407,7 @@ bool rxrpc_new_incoming_call(struct rxrpc_local *local,
spin_unlock(&rx->incoming_lock);
read_unlock_irq(&local->services_lock);
+ rxrpc_assess_MTU_size(local, call->peer);
if (hlist_unhashed(&call->error_link)) {
spin_lock_irq(&call->peer->lock);
@@ -416,12 +422,12 @@ bool rxrpc_new_incoming_call(struct rxrpc_local *local,
unsupported_service:
read_unlock_irq(&local->services_lock);
- return rxrpc_direct_abort(skb, rxrpc_abort_service_not_offered,
- RX_INVALID_OPERATION, -EOPNOTSUPP);
+ return rxrpc_direct_conn_abort(skb, rxrpc_abort_service_not_offered,
+ RX_INVALID_OPERATION, -EOPNOTSUPP);
unsupported_security:
read_unlock_irq(&local->services_lock);
- return rxrpc_direct_abort(skb, rxrpc_abort_service_not_offered,
- RX_INVALID_OPERATION, -EKEYREJECTED);
+ return rxrpc_direct_conn_abort(skb, rxrpc_abort_service_not_offered,
+ RX_INVALID_OPERATION, -EKEYREJECTED);
no_call:
spin_unlock(&rx->incoming_lock);
read_unlock_irq(&local->services_lock);
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 15067ff7b1f2..918f41d97a2f 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -561,7 +561,7 @@ static void rxrpc_cleanup_rx_buffers(struct rxrpc_call *call)
void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
{
struct rxrpc_connection *conn = call->conn;
- bool put = false, putu = false;
+ bool putu = false;
_enter("{%d,%d}", call->debug_id, refcount_read(&call->ref));
@@ -573,23 +573,13 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
rxrpc_put_call_slot(call);
- /* Make sure we don't get any more notifications */
+ /* Note that at this point, the call may still be on or may have been
+ * added back on to the socket receive queue. recvmsg() must discard
+ * released calls. The CALL_RELEASED flag should prevent further
+ * notifications.
+ */
spin_lock_irq(&rx->recvmsg_lock);
-
- if (!list_empty(&call->recvmsg_link)) {
- _debug("unlinking once-pending call %p { e=%lx f=%lx }",
- call, call->events, call->flags);
- list_del(&call->recvmsg_link);
- put = true;
- }
-
- /* list_empty() must return false in rxrpc_notify_socket() */
- call->recvmsg_link.next = NULL;
- call->recvmsg_link.prev = NULL;
-
spin_unlock_irq(&rx->recvmsg_lock);
- if (put)
- rxrpc_put_call(call, rxrpc_call_put_unnotify);
write_lock(&rx->call_lock);
@@ -638,6 +628,12 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
rxrpc_put_call(call, rxrpc_call_put_release_sock);
}
+ while ((call = list_first_entry_or_null(&rx->recvmsg_q,
+ struct rxrpc_call, recvmsg_link))) {
+ list_del_init(&call->recvmsg_link);
+ rxrpc_put_call(call, rxrpc_call_put_release_recvmsg_q);
+ }
+
_leave("");
}
diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c
index 27b650d30f4d..e939ecf417c4 100644
--- a/net/rxrpc/io_thread.c
+++ b/net/rxrpc/io_thread.c
@@ -97,6 +97,20 @@ bool rxrpc_direct_abort(struct sk_buff *skb, enum rxrpc_abort_reason why,
return false;
}
+/*
+ * Directly produce a connection abort from a packet.
+ */
+bool rxrpc_direct_conn_abort(struct sk_buff *skb, enum rxrpc_abort_reason why,
+ s32 abort_code, int err)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ trace_rxrpc_abort(0, why, sp->hdr.cid, 0, sp->hdr.seq, abort_code, err);
+ skb->mark = RXRPC_SKB_MARK_REJECT_CONN_ABORT;
+ skb->priority = abort_code;
+ return false;
+}
+
static bool rxrpc_bad_message(struct sk_buff *skb, enum rxrpc_abort_reason why)
{
return rxrpc_direct_abort(skb, why, RX_PROTOCOL_ERROR, -EBADMSG);
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 0af19bcdc80a..8b5903b6e481 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -814,6 +814,9 @@ void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
__be32 code;
int ret, ioc;
+ if (sp->hdr.type == RXRPC_PACKET_TYPE_ABORT)
+ return; /* Never abort an abort. */
+
rxrpc_see_skb(skb, rxrpc_skb_see_reject);
iov[0].iov_base = &whdr;
@@ -826,7 +829,13 @@ void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
msg.msg_controllen = 0;
msg.msg_flags = 0;
- memset(&whdr, 0, sizeof(whdr));
+ whdr = (struct rxrpc_wire_header) {
+ .epoch = htonl(sp->hdr.epoch),
+ .cid = htonl(sp->hdr.cid),
+ .callNumber = htonl(sp->hdr.callNumber),
+ .serviceId = htons(sp->hdr.serviceId),
+ .flags = ~sp->hdr.flags & RXRPC_CLIENT_INITIATED,
+ };
switch (skb->mark) {
case RXRPC_SKB_MARK_REJECT_BUSY:
@@ -834,6 +843,9 @@ void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
size = sizeof(whdr);
ioc = 1;
break;
+ case RXRPC_SKB_MARK_REJECT_CONN_ABORT:
+ whdr.callNumber = 0;
+ fallthrough;
case RXRPC_SKB_MARK_REJECT_ABORT:
whdr.type = RXRPC_PACKET_TYPE_ABORT;
code = htonl(skb->priority);
@@ -847,14 +859,6 @@ void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
if (rxrpc_extract_addr_from_skb(&srx, skb) == 0) {
msg.msg_namelen = srx.transport_len;
- whdr.epoch = htonl(sp->hdr.epoch);
- whdr.cid = htonl(sp->hdr.cid);
- whdr.callNumber = htonl(sp->hdr.callNumber);
- whdr.serviceId = htons(sp->hdr.serviceId);
- whdr.flags = sp->hdr.flags;
- whdr.flags ^= RXRPC_CLIENT_INITIATED;
- whdr.flags &= RXRPC_CLIENT_INITIATED;
-
iov_iter_kvec(&msg.msg_iter, WRITE, iov, ioc, size);
ret = do_udp_sendmsg(local->socket, &msg, size);
if (ret < 0)
@@ -924,7 +928,7 @@ void rxrpc_send_response(struct rxrpc_connection *conn, struct sk_buff *response
{
struct rxrpc_skb_priv *sp = rxrpc_skb(response);
struct scatterlist sg[16];
- struct bio_vec bvec[16];
+ struct bio_vec *bvec = conn->local->bvec;
struct msghdr msg;
size_t len = sp->resp.len;
__be32 wserial;
@@ -938,6 +942,9 @@ void rxrpc_send_response(struct rxrpc_connection *conn, struct sk_buff *response
if (ret < 0)
goto fail;
nr_sg = ret;
+ ret = -EIO;
+ if (WARN_ON_ONCE(nr_sg > ARRAY_SIZE(conn->local->bvec)))
+ goto fail;
for (int i = 0; i < nr_sg; i++)
bvec_set_page(&bvec[i], sg_page(&sg[i]), sg[i].length, sg[i].offset);
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index e2f35e6c04d6..366431b0736c 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -149,8 +149,7 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *local,
* assess the MTU size for the network interface through which this peer is
* reached
*/
-static void rxrpc_assess_MTU_size(struct rxrpc_local *local,
- struct rxrpc_peer *peer)
+void rxrpc_assess_MTU_size(struct rxrpc_local *local, struct rxrpc_peer *peer)
{
struct net *net = local->net;
struct dst_entry *dst;
@@ -277,8 +276,6 @@ static void rxrpc_init_peer(struct rxrpc_local *local, struct rxrpc_peer *peer,
peer->hdrsize += sizeof(struct rxrpc_wire_header);
peer->max_data = peer->if_mtu - peer->hdrsize;
-
- rxrpc_assess_MTU_size(local, peer);
}
/*
@@ -297,6 +294,7 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_local *local,
if (peer) {
memcpy(&peer->srx, srx, sizeof(*srx));
rxrpc_init_peer(local, peer, hash_key);
+ rxrpc_assess_MTU_size(local, peer);
}
_leave(" = %p", peer);
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index 86a27fb55a1c..7fa7e77f6bb9 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -29,6 +29,10 @@ void rxrpc_notify_socket(struct rxrpc_call *call)
if (!list_empty(&call->recvmsg_link))
return;
+ if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) {
+ rxrpc_see_call(call, rxrpc_call_see_notify_released);
+ return;
+ }
rcu_read_lock();
@@ -447,6 +451,16 @@ try_again:
goto try_again;
}
+ rxrpc_see_call(call, rxrpc_call_see_recvmsg);
+ if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) {
+ rxrpc_see_call(call, rxrpc_call_see_already_released);
+ list_del_init(&call->recvmsg_link);
+ spin_unlock_irq(&rx->recvmsg_lock);
+ release_sock(&rx->sk);
+ trace_rxrpc_recvmsg(call->debug_id, rxrpc_recvmsg_unqueue, 0);
+ rxrpc_put_call(call, rxrpc_call_put_recvmsg);
+ goto try_again;
+ }
if (!(flags & MSG_PEEK))
list_del_init(&call->recvmsg_link);
else
@@ -470,8 +484,13 @@ try_again:
release_sock(&rx->sk);
- if (test_bit(RXRPC_CALL_RELEASED, &call->flags))
- BUG();
+ if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) {
+ rxrpc_see_call(call, rxrpc_call_see_already_released);
+ mutex_unlock(&call->user_mutex);
+ if (!(flags & MSG_PEEK))
+ rxrpc_put_call(call, rxrpc_call_put_recvmsg);
+ goto try_again;
+ }
ret = rxrpc_recvmsg_user_id(call, msg, flags);
if (ret < 0)
diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c
index 078d91a6b77f..2bfbf2b2bb37 100644
--- a/net/rxrpc/security.c
+++ b/net/rxrpc/security.c
@@ -140,15 +140,15 @@ const struct rxrpc_security *rxrpc_get_incoming_security(struct rxrpc_sock *rx,
sec = rxrpc_security_lookup(sp->hdr.securityIndex);
if (!sec) {
- rxrpc_direct_abort(skb, rxrpc_abort_unsupported_security,
- RX_INVALID_OPERATION, -EKEYREJECTED);
+ rxrpc_direct_conn_abort(skb, rxrpc_abort_unsupported_security,
+ RX_INVALID_OPERATION, -EKEYREJECTED);
return NULL;
}
if (sp->hdr.securityIndex != RXRPC_SECURITY_NONE &&
!rx->securities) {
- rxrpc_direct_abort(skb, rxrpc_abort_no_service_key,
- sec->no_key_abort, -EKEYREJECTED);
+ rxrpc_direct_conn_abort(skb, rxrpc_abort_no_service_key,
+ sec->no_key_abort, -EKEYREJECTED);
return NULL;
}
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index ad914d2b2e22..6ddff028b81a 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -415,6 +415,18 @@ config NET_SCH_BPF
If unsure, say N.
+config NET_SCH_DUALPI2
+ tristate "Dual Queue PI Square (DUALPI2) scheduler"
+ help
+ Say Y here if you want to use the Dual Queue Proportional Integral
+ Controller Improved with a Square scheduling algorithm.
+ For more information, please see https://tools.ietf.org/html/rfc9332
+
+ To compile this driver as a module, choose M here: the module
+ will be called sch_dualpi2.
+
+ If unsure, say N.
+
menuconfig NET_SCH_DEFAULT
bool "Allow override default queue discipline"
help
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 904d784902d1..5078ea84e6ad 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -63,6 +63,7 @@ obj-$(CONFIG_NET_SCH_CBS) += sch_cbs.o
obj-$(CONFIG_NET_SCH_ETF) += sch_etf.o
obj-$(CONFIG_NET_SCH_TAPRIO) += sch_taprio.o
obj-$(CONFIG_NET_SCH_BPF) += bpf_qdisc.o
+obj-$(CONFIG_NET_SCH_DUALPI2) += sch_dualpi2.o
obj-$(CONFIG_NET_CLS_U32) += cls_u32.o
obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 057e20cef375..9e468e463467 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -933,18 +933,25 @@ void tcf_idrinfo_destroy(const struct tc_action_ops *ops,
struct tcf_idrinfo *idrinfo)
{
struct idr *idr = &idrinfo->action_idr;
+ bool mutex_taken = false;
struct tc_action *p;
- int ret;
unsigned long id = 1;
unsigned long tmp;
+ int ret;
idr_for_each_entry_ul(idr, p, tmp, id) {
+ if (tc_act_in_hw(p) && !mutex_taken) {
+ rtnl_lock();
+ mutex_taken = true;
+ }
ret = __tcf_idr_release(p, false, true);
if (ret == ACT_P_DELETED)
module_put(ops->owner);
else if (ret < 0)
return;
}
+ if (mutex_taken)
+ rtnl_unlock();
idr_destroy(&idrinfo->action_idr);
}
EXPORT_SYMBOL(tcf_idrinfo_destroy);
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 0fce631e7c91..3e89927d7116 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -88,7 +88,7 @@ count:
/* using overlimits stats to count how many packets marked */
tcf_action_inc_overlimit_qstats(&ca->common);
out:
- return READ_ONCE(ca->tcf_action);
+ return parms->action;
}
static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = {
@@ -167,6 +167,8 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
if (err < 0)
goto release_idr;
+ nparms->action = parm->action;
+
spin_lock_bh(&ci->tcf_lock);
goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
oparms = rcu_replace_pointer(ci->parms, nparms, lockdep_is_held(&ci->tcf_lock));
@@ -190,20 +192,20 @@ out_free:
static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a,
int bind, int ref)
{
+ const struct tcf_connmark_info *ci = to_connmark(a);
unsigned char *b = skb_tail_pointer(skb);
- struct tcf_connmark_info *ci = to_connmark(a);
+ const struct tcf_connmark_parms *parms;
struct tc_connmark opt = {
.index = ci->tcf_index,
.refcnt = refcount_read(&ci->tcf_refcnt) - ref,
.bindcnt = atomic_read(&ci->tcf_bindcnt) - bind,
};
- struct tcf_connmark_parms *parms;
struct tcf_t t;
- spin_lock_bh(&ci->tcf_lock);
- parms = rcu_dereference_protected(ci->parms, lockdep_is_held(&ci->tcf_lock));
+ rcu_read_lock();
+ parms = rcu_dereference(ci->parms);
- opt.action = ci->tcf_action;
+ opt.action = parms->action;
opt.zone = parms->zone;
if (nla_put(skb, TCA_CONNMARK_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
@@ -212,12 +214,12 @@ static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a,
if (nla_put_64bit(skb, TCA_CONNMARK_TM, sizeof(t), &t,
TCA_CONNMARK_PAD))
goto nla_put_failure;
- spin_unlock_bh(&ci->tcf_lock);
+ rcu_read_unlock();
return skb->len;
nla_put_failure:
- spin_unlock_bh(&ci->tcf_lock);
+ rcu_read_unlock();
nlmsg_trim(skb, b);
return -1;
}
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 5cc8e407e791..0939e6b2ba4d 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -99,6 +99,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
goto put_chain;
}
params_new->update_flags = parm->update_flags;
+ params_new->action = parm->action;
spin_lock_bh(&p->tcf_lock);
goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
@@ -580,7 +581,7 @@ TC_INDIRECT_SCOPE int tcf_csum_act(struct sk_buff *skb,
tcf_lastuse_update(&p->tcf_tm);
tcf_action_update_bstats(&p->common, skb);
- action = READ_ONCE(p->tcf_action);
+ action = params->action;
if (unlikely(action == TC_ACT_SHOT))
goto drop;
@@ -631,9 +632,9 @@ drop:
static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind,
int ref)
{
+ const struct tcf_csum *p = to_tcf_csum(a);
unsigned char *b = skb_tail_pointer(skb);
- struct tcf_csum *p = to_tcf_csum(a);
- struct tcf_csum_params *params;
+ const struct tcf_csum_params *params;
struct tc_csum opt = {
.index = p->tcf_index,
.refcnt = refcount_read(&p->tcf_refcnt) - ref,
@@ -641,10 +642,9 @@ static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind,
};
struct tcf_t t;
- spin_lock_bh(&p->tcf_lock);
- params = rcu_dereference_protected(p->params,
- lockdep_is_held(&p->tcf_lock));
- opt.action = p->tcf_action;
+ rcu_read_lock();
+ params = rcu_dereference(p->params);
+ opt.action = params->action;
opt.update_flags = params->update_flags;
if (nla_put(skb, TCA_CSUM_PARMS, sizeof(opt), &opt))
@@ -653,12 +653,12 @@ static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind,
tcf_tm_dump(&t, &p->tcf_tm);
if (nla_put_64bit(skb, TCA_CSUM_TM, sizeof(t), &t, TCA_CSUM_PAD))
goto nla_put_failure;
- spin_unlock_bh(&p->tcf_lock);
+ rcu_read_unlock();
return skb->len;
nla_put_failure:
- spin_unlock_bh(&p->tcf_lock);
+ rcu_read_unlock();
nlmsg_trim(skb, b);
return -1;
}
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index c02f39efc6ef..6749a4a9a9cd 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -977,7 +977,7 @@ TC_INDIRECT_SCOPE int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
p = rcu_dereference_bh(c->params);
- retval = READ_ONCE(c->tcf_action);
+ retval = p->action;
commit = p->ct_action & TCA_CT_ACT_COMMIT;
clear = p->ct_action & TCA_CT_ACT_CLEAR;
tmpl = p->tmpl;
@@ -1409,6 +1409,7 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla,
if (err)
goto cleanup;
+ params->action = parm->action;
spin_lock_bh(&c->tcf_lock);
goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
params = rcu_replace_pointer(c->params, params,
@@ -1442,8 +1443,8 @@ static void tcf_ct_cleanup(struct tc_action *a)
}
static int tcf_ct_dump_key_val(struct sk_buff *skb,
- void *val, int val_type,
- void *mask, int mask_type,
+ const void *val, int val_type,
+ const void *mask, int mask_type,
int len)
{
int err;
@@ -1464,9 +1465,9 @@ static int tcf_ct_dump_key_val(struct sk_buff *skb,
return 0;
}
-static int tcf_ct_dump_nat(struct sk_buff *skb, struct tcf_ct_params *p)
+static int tcf_ct_dump_nat(struct sk_buff *skb, const struct tcf_ct_params *p)
{
- struct nf_nat_range2 *range = &p->range;
+ const struct nf_nat_range2 *range = &p->range;
if (!(p->ct_action & TCA_CT_ACT_NAT))
return 0;
@@ -1504,7 +1505,8 @@ static int tcf_ct_dump_nat(struct sk_buff *skb, struct tcf_ct_params *p)
return 0;
}
-static int tcf_ct_dump_helper(struct sk_buff *skb, struct nf_conntrack_helper *helper)
+static int tcf_ct_dump_helper(struct sk_buff *skb,
+ const struct nf_conntrack_helper *helper)
{
if (!helper)
return 0;
@@ -1521,9 +1523,8 @@ static inline int tcf_ct_dump(struct sk_buff *skb, struct tc_action *a,
int bind, int ref)
{
unsigned char *b = skb_tail_pointer(skb);
- struct tcf_ct *c = to_ct(a);
- struct tcf_ct_params *p;
-
+ const struct tcf_ct *c = to_ct(a);
+ const struct tcf_ct_params *p;
struct tc_ct opt = {
.index = c->tcf_index,
.refcnt = refcount_read(&c->tcf_refcnt) - ref,
@@ -1531,10 +1532,9 @@ static inline int tcf_ct_dump(struct sk_buff *skb, struct tc_action *a,
};
struct tcf_t t;
- spin_lock_bh(&c->tcf_lock);
- p = rcu_dereference_protected(c->params,
- lockdep_is_held(&c->tcf_lock));
- opt.action = c->tcf_action;
+ rcu_read_lock();
+ p = rcu_dereference(c->params);
+ opt.action = p->action;
if (tcf_ct_dump_key_val(skb,
&p->ct_action, TCA_CT_ACTION,
@@ -1579,11 +1579,11 @@ skip_dump:
tcf_tm_dump(&t, &c->tcf_tm);
if (nla_put_64bit(skb, TCA_CT_TM, sizeof(t), &t, TCA_CT_PAD))
goto nla_put_failure;
- spin_unlock_bh(&c->tcf_lock);
+ rcu_read_unlock();
return skb->len;
nla_put_failure:
- spin_unlock_bh(&c->tcf_lock);
+ rcu_read_unlock();
nlmsg_trim(skb, b);
return -1;
}
diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c
index 5b1241ddc758..71efe04d00b5 100644
--- a/net/sched/act_ctinfo.c
+++ b/net/sched/act_ctinfo.c
@@ -44,9 +44,9 @@ static void tcf_ctinfo_dscp_set(struct nf_conn *ct, struct tcf_ctinfo *ca,
ipv4_change_dsfield(ip_hdr(skb),
INET_ECN_MASK,
newdscp);
- ca->stats_dscp_set++;
+ atomic64_inc(&ca->stats_dscp_set);
} else {
- ca->stats_dscp_error++;
+ atomic64_inc(&ca->stats_dscp_error);
}
}
break;
@@ -57,9 +57,9 @@ static void tcf_ctinfo_dscp_set(struct nf_conn *ct, struct tcf_ctinfo *ca,
ipv6_change_dsfield(ipv6_hdr(skb),
INET_ECN_MASK,
newdscp);
- ca->stats_dscp_set++;
+ atomic64_inc(&ca->stats_dscp_set);
} else {
- ca->stats_dscp_error++;
+ atomic64_inc(&ca->stats_dscp_error);
}
}
break;
@@ -72,7 +72,7 @@ static void tcf_ctinfo_cpmark_set(struct nf_conn *ct, struct tcf_ctinfo *ca,
struct tcf_ctinfo_params *cp,
struct sk_buff *skb)
{
- ca->stats_cpmark_set++;
+ atomic64_inc(&ca->stats_cpmark_set);
skb->mark = READ_ONCE(ct->mark) & cp->cpmarkmask;
}
@@ -88,13 +88,11 @@ TC_INDIRECT_SCOPE int tcf_ctinfo_act(struct sk_buff *skb,
struct tcf_ctinfo_params *cp;
struct nf_conn *ct;
int proto, wlen;
- int action;
cp = rcu_dereference_bh(ca->params);
tcf_lastuse_update(&ca->tcf_tm);
tcf_action_update_bstats(&ca->common, skb);
- action = READ_ONCE(ca->tcf_action);
wlen = skb_network_offset(skb);
switch (skb_protocol(skb, true)) {
@@ -141,7 +139,7 @@ TC_INDIRECT_SCOPE int tcf_ctinfo_act(struct sk_buff *skb,
if (thash)
nf_ct_put(ct);
out:
- return action;
+ return cp->action;
}
static const struct nla_policy ctinfo_policy[TCA_CTINFO_MAX + 1] = {
@@ -258,6 +256,8 @@ static int tcf_ctinfo_init(struct net *net, struct nlattr *nla,
cp_new->mode |= CTINFO_MODE_CPMARK;
}
+ cp_new->action = actparm->action;
+
spin_lock_bh(&ci->tcf_lock);
goto_ch = tcf_action_set_ctrlact(*a, actparm->action, goto_ch);
cp_new = rcu_replace_pointer(ci->params, cp_new,
@@ -282,25 +282,24 @@ release_idr:
static int tcf_ctinfo_dump(struct sk_buff *skb, struct tc_action *a,
int bind, int ref)
{
- struct tcf_ctinfo *ci = to_ctinfo(a);
+ const struct tcf_ctinfo *ci = to_ctinfo(a);
+ unsigned char *b = skb_tail_pointer(skb);
+ const struct tcf_ctinfo_params *cp;
struct tc_ctinfo opt = {
.index = ci->tcf_index,
.refcnt = refcount_read(&ci->tcf_refcnt) - ref,
.bindcnt = atomic_read(&ci->tcf_bindcnt) - bind,
};
- unsigned char *b = skb_tail_pointer(skb);
- struct tcf_ctinfo_params *cp;
struct tcf_t t;
- spin_lock_bh(&ci->tcf_lock);
- cp = rcu_dereference_protected(ci->params,
- lockdep_is_held(&ci->tcf_lock));
+ rcu_read_lock();
+ cp = rcu_dereference(ci->params);
tcf_tm_dump(&t, &ci->tcf_tm);
if (nla_put_64bit(skb, TCA_CTINFO_TM, sizeof(t), &t, TCA_CTINFO_PAD))
goto nla_put_failure;
- opt.action = ci->tcf_action;
+ opt.action = cp->action;
if (nla_put(skb, TCA_CTINFO_ACT, sizeof(opt), &opt))
goto nla_put_failure;
@@ -323,22 +322,25 @@ static int tcf_ctinfo_dump(struct sk_buff *skb, struct tc_action *a,
}
if (nla_put_u64_64bit(skb, TCA_CTINFO_STATS_DSCP_SET,
- ci->stats_dscp_set, TCA_CTINFO_PAD))
+ atomic64_read(&ci->stats_dscp_set),
+ TCA_CTINFO_PAD))
goto nla_put_failure;
if (nla_put_u64_64bit(skb, TCA_CTINFO_STATS_DSCP_ERROR,
- ci->stats_dscp_error, TCA_CTINFO_PAD))
+ atomic64_read(&ci->stats_dscp_error),
+ TCA_CTINFO_PAD))
goto nla_put_failure;
if (nla_put_u64_64bit(skb, TCA_CTINFO_STATS_CPMARK_SET,
- ci->stats_cpmark_set, TCA_CTINFO_PAD))
+ atomic64_read(&ci->stats_cpmark_set),
+ TCA_CTINFO_PAD))
goto nla_put_failure;
- spin_unlock_bh(&ci->tcf_lock);
+ rcu_read_unlock();
return skb->len;
nla_put_failure:
- spin_unlock_bh(&ci->tcf_lock);
+ rcu_read_unlock();
nlmsg_trim(skb, b);
return -1;
}
diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c
index 9f86f4e666d3..6654011dcd2b 100644
--- a/net/sched/act_mpls.c
+++ b/net/sched/act_mpls.c
@@ -57,7 +57,7 @@ TC_INDIRECT_SCOPE int tcf_mpls_act(struct sk_buff *skb,
struct tcf_mpls *m = to_mpls(a);
struct tcf_mpls_params *p;
__be32 new_lse;
- int ret, mac_len;
+ int mac_len;
tcf_lastuse_update(&m->tcf_tm);
bstats_update(this_cpu_ptr(m->common.cpu_bstats), skb);
@@ -72,8 +72,6 @@ TC_INDIRECT_SCOPE int tcf_mpls_act(struct sk_buff *skb,
mac_len = skb_network_offset(skb);
}
- ret = READ_ONCE(m->tcf_action);
-
p = rcu_dereference_bh(m->mpls_p);
switch (p->tcfm_action) {
@@ -122,7 +120,7 @@ TC_INDIRECT_SCOPE int tcf_mpls_act(struct sk_buff *skb,
if (skb_at_tc_ingress(skb))
skb_pull_rcsum(skb, skb->mac_len);
- return ret;
+ return p->action;
drop:
qstats_drop_inc(this_cpu_ptr(m->common.cpu_qstats));
@@ -296,6 +294,7 @@ static int tcf_mpls_init(struct net *net, struct nlattr *nla,
ACT_MPLS_BOS_NOT_SET);
p->tcfm_proto = nla_get_be16_default(tb[TCA_MPLS_PROTO],
htons(ETH_P_MPLS_UC));
+ p->action = parm->action;
spin_lock_bh(&m->tcf_lock);
goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
@@ -330,8 +329,8 @@ static int tcf_mpls_dump(struct sk_buff *skb, struct tc_action *a,
int bind, int ref)
{
unsigned char *b = skb_tail_pointer(skb);
- struct tcf_mpls *m = to_mpls(a);
- struct tcf_mpls_params *p;
+ const struct tcf_mpls *m = to_mpls(a);
+ const struct tcf_mpls_params *p;
struct tc_mpls opt = {
.index = m->tcf_index,
.refcnt = refcount_read(&m->tcf_refcnt) - ref,
@@ -339,10 +338,10 @@ static int tcf_mpls_dump(struct sk_buff *skb, struct tc_action *a,
};
struct tcf_t t;
- spin_lock_bh(&m->tcf_lock);
- opt.action = m->tcf_action;
- p = rcu_dereference_protected(m->mpls_p, lockdep_is_held(&m->tcf_lock));
+ rcu_read_lock();
+ p = rcu_dereference(m->mpls_p);
opt.m_action = p->tcfm_action;
+ opt.action = p->action;
if (nla_put(skb, TCA_MPLS_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
@@ -370,12 +369,12 @@ static int tcf_mpls_dump(struct sk_buff *skb, struct tc_action *a,
if (nla_put_64bit(skb, TCA_MPLS_TM, sizeof(t), &t, TCA_MPLS_PAD))
goto nla_put_failure;
- spin_unlock_bh(&m->tcf_lock);
+ rcu_read_unlock();
return skb->len;
nla_put_failure:
- spin_unlock_bh(&m->tcf_lock);
+ rcu_read_unlock();
nlmsg_trim(skb, b);
return -EMSGSIZE;
}
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index d541f553805f..26241d80ebe0 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -91,6 +91,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
nparm->new_addr = parm->new_addr;
nparm->mask = parm->mask;
nparm->flags = parm->flags;
+ nparm->action = parm->action;
p = to_tcf_nat(*a);
@@ -130,17 +131,16 @@ TC_INDIRECT_SCOPE int tcf_nat_act(struct sk_buff *skb,
tcf_lastuse_update(&p->tcf_tm);
tcf_action_update_bstats(&p->common, skb);
- action = READ_ONCE(p->tcf_action);
-
parms = rcu_dereference_bh(p->parms);
+ action = parms->action;
+ if (unlikely(action == TC_ACT_SHOT))
+ goto drop;
+
old_addr = parms->old_addr;
new_addr = parms->new_addr;
mask = parms->mask;
egress = parms->flags & TCA_NAT_FLAG_EGRESS;
- if (unlikely(action == TC_ACT_SHOT))
- goto drop;
-
noff = skb_network_offset(skb);
if (!pskb_may_pull(skb, sizeof(*iph) + noff))
goto drop;
@@ -268,21 +268,20 @@ static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a,
int bind, int ref)
{
unsigned char *b = skb_tail_pointer(skb);
- struct tcf_nat *p = to_tcf_nat(a);
+ const struct tcf_nat *p = to_tcf_nat(a);
+ const struct tcf_nat_parms *parms;
struct tc_nat opt = {
.index = p->tcf_index,
.refcnt = refcount_read(&p->tcf_refcnt) - ref,
.bindcnt = atomic_read(&p->tcf_bindcnt) - bind,
};
- struct tcf_nat_parms *parms;
struct tcf_t t;
- spin_lock_bh(&p->tcf_lock);
-
- opt.action = p->tcf_action;
+ rcu_read_lock();
- parms = rcu_dereference_protected(p->parms, lockdep_is_held(&p->tcf_lock));
+ parms = rcu_dereference(p->parms);
+ opt.action = parms->action;
opt.old_addr = parms->old_addr;
opt.new_addr = parms->new_addr;
opt.mask = parms->mask;
@@ -294,12 +293,12 @@ static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a,
tcf_tm_dump(&t, &p->tcf_tm);
if (nla_put_64bit(skb, TCA_NAT_TM, sizeof(t), &t, TCA_NAT_PAD))
goto nla_put_failure;
- spin_unlock_bh(&p->tcf_lock);
+ rcu_read_unlock();
return skb->len;
nla_put_failure:
- spin_unlock_bh(&p->tcf_lock);
+ rcu_read_unlock();
nlmsg_trim(skb, b);
return -1;
}
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index fc0a35a7b62a..4b65901397a8 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -279,7 +279,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
}
p = to_pedit(*a);
-
+ nparms->action = parm->action;
spin_lock_bh(&p->tcf_lock);
goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
oparms = rcu_replace_pointer(p->parms, nparms, 1);
@@ -483,7 +483,7 @@ TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb,
bad:
tcf_action_inc_overlimit_qstats(&p->common);
done:
- return p->tcf_action;
+ return parms->action;
}
static void tcf_pedit_stats_update(struct tc_action *a, u64 bytes, u64 packets,
@@ -500,19 +500,19 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
int bind, int ref)
{
unsigned char *b = skb_tail_pointer(skb);
- struct tcf_pedit *p = to_pedit(a);
- struct tcf_pedit_parms *parms;
+ const struct tcf_pedit *p = to_pedit(a);
+ const struct tcf_pedit_parms *parms;
struct tc_pedit *opt;
struct tcf_t t;
int s;
- spin_lock_bh(&p->tcf_lock);
- parms = rcu_dereference_protected(p->parms, 1);
+ rcu_read_lock();
+ parms = rcu_dereference(p->parms);
s = struct_size(opt, keys, parms->tcfp_nkeys);
opt = kzalloc(s, GFP_ATOMIC);
if (unlikely(!opt)) {
- spin_unlock_bh(&p->tcf_lock);
+ rcu_read_unlock();
return -ENOBUFS;
}
opt->nkeys = parms->tcfp_nkeys;
@@ -521,7 +521,7 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
flex_array_size(opt, keys, parms->tcfp_nkeys));
opt->index = p->tcf_index;
opt->flags = parms->tcfp_flags;
- opt->action = p->tcf_action;
+ opt->action = parms->action;
opt->refcnt = refcount_read(&p->tcf_refcnt) - ref;
opt->bindcnt = atomic_read(&p->tcf_bindcnt) - bind;
@@ -540,13 +540,13 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
tcf_tm_dump(&t, &p->tcf_tm);
if (nla_put_64bit(skb, TCA_PEDIT_TM, sizeof(t), &t, TCA_PEDIT_PAD))
goto nla_put_failure;
- spin_unlock_bh(&p->tcf_lock);
+ rcu_read_unlock();
kfree(opt);
return skb->len;
nla_put_failure:
- spin_unlock_bh(&p->tcf_lock);
+ rcu_read_unlock();
nlmsg_trim(skb, b);
kfree(opt);
return -1;
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index a214ed681142..0e1c61183379 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -198,6 +198,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
psched_ppscfg_precompute(&new->ppsrate, pps);
}
+ new->action = parm->action;
spin_lock_bh(&police->tcf_lock);
spin_lock_bh(&police->tcfp_lock);
police->tcfp_t_c = ktime_get_ns();
@@ -254,8 +255,8 @@ TC_INDIRECT_SCOPE int tcf_police_act(struct sk_buff *skb,
tcf_lastuse_update(&police->tcf_tm);
bstats_update(this_cpu_ptr(police->common.cpu_bstats), skb);
- ret = READ_ONCE(police->tcf_action);
p = rcu_dereference_bh(police->params);
+ ret = p->action;
if (p->tcfp_ewma_rate) {
struct gnet_stats_rate_est64 sample;
@@ -338,9 +339,9 @@ static void tcf_police_stats_update(struct tc_action *a,
static int tcf_police_dump(struct sk_buff *skb, struct tc_action *a,
int bind, int ref)
{
+ const struct tcf_police *police = to_police(a);
unsigned char *b = skb_tail_pointer(skb);
- struct tcf_police *police = to_police(a);
- struct tcf_police_params *p;
+ const struct tcf_police_params *p;
struct tc_police opt = {
.index = police->tcf_index,
.refcnt = refcount_read(&police->tcf_refcnt) - ref,
@@ -348,10 +349,9 @@ static int tcf_police_dump(struct sk_buff *skb, struct tc_action *a,
};
struct tcf_t t;
- spin_lock_bh(&police->tcf_lock);
- opt.action = police->tcf_action;
- p = rcu_dereference_protected(police->params,
- lockdep_is_held(&police->tcf_lock));
+ rcu_read_lock();
+ p = rcu_dereference(police->params);
+ opt.action = p->action;
opt.mtu = p->tcfp_mtu;
opt.burst = PSCHED_NS2TICKS(p->tcfp_burst);
if (p->rate_present) {
@@ -392,12 +392,12 @@ static int tcf_police_dump(struct sk_buff *skb, struct tc_action *a,
tcf_tm_dump(&t, &police->tcf_tm);
if (nla_put_64bit(skb, TCA_POLICE_TM, sizeof(t), &t, TCA_POLICE_PAD))
goto nla_put_failure;
- spin_unlock_bh(&police->tcf_lock);
+ rcu_read_unlock();
return skb->len;
nla_put_failure:
- spin_unlock_bh(&police->tcf_lock);
+ rcu_read_unlock();
nlmsg_trim(skb, b);
return -1;
}
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 1f1d9ce3e968..8c1d1554f657 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -43,13 +43,11 @@ TC_INDIRECT_SCOPE int tcf_skbedit_act(struct sk_buff *skb,
{
struct tcf_skbedit *d = to_skbedit(a);
struct tcf_skbedit_params *params;
- int action;
tcf_lastuse_update(&d->tcf_tm);
bstats_update(this_cpu_ptr(d->common.cpu_bstats), skb);
params = rcu_dereference_bh(d->params);
- action = READ_ONCE(d->tcf_action);
if (params->flags & SKBEDIT_F_PRIORITY)
skb->priority = params->priority;
@@ -85,7 +83,7 @@ TC_INDIRECT_SCOPE int tcf_skbedit_act(struct sk_buff *skb,
}
if (params->flags & SKBEDIT_F_PTYPE)
skb->pkt_type = params->ptype;
- return action;
+ return params->action;
err:
qstats_drop_inc(this_cpu_ptr(d->common.cpu_qstats));
@@ -262,6 +260,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
if (flags & SKBEDIT_F_MASK)
params_new->mask = *mask;
+ params_new->action = parm->action;
spin_lock_bh(&d->tcf_lock);
goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
params_new = rcu_replace_pointer(d->params, params_new,
@@ -284,9 +283,9 @@ release_idr:
static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
int bind, int ref)
{
+ const struct tcf_skbedit *d = to_skbedit(a);
unsigned char *b = skb_tail_pointer(skb);
- struct tcf_skbedit *d = to_skbedit(a);
- struct tcf_skbedit_params *params;
+ const struct tcf_skbedit_params *params;
struct tc_skbedit opt = {
.index = d->tcf_index,
.refcnt = refcount_read(&d->tcf_refcnt) - ref,
@@ -295,10 +294,9 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
u64 pure_flags = 0;
struct tcf_t t;
- spin_lock_bh(&d->tcf_lock);
- params = rcu_dereference_protected(d->params,
- lockdep_is_held(&d->tcf_lock));
- opt.action = d->tcf_action;
+ rcu_read_lock();
+ params = rcu_dereference(d->params);
+ opt.action = params->action;
if (nla_put(skb, TCA_SKBEDIT_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
@@ -333,12 +331,12 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
tcf_tm_dump(&t, &d->tcf_tm);
if (nla_put_64bit(skb, TCA_SKBEDIT_TM, sizeof(t), &t, TCA_SKBEDIT_PAD))
goto nla_put_failure;
- spin_unlock_bh(&d->tcf_lock);
+ rcu_read_unlock();
return skb->len;
nla_put_failure:
- spin_unlock_bh(&d->tcf_lock);
+ rcu_read_unlock();
nlmsg_trim(skb, b);
return -1;
}
diff --git a/net/sched/bpf_qdisc.c b/net/sched/bpf_qdisc.c
index 7ea8b54b2ab1..adcb618a2bfc 100644
--- a/net/sched/bpf_qdisc.c
+++ b/net/sched/bpf_qdisc.c
@@ -130,8 +130,7 @@ static int bpf_qdisc_btf_struct_access(struct bpf_verifier_log *log,
return 0;
}
-BTF_ID_LIST(bpf_qdisc_init_prologue_ids)
-BTF_ID(func, bpf_qdisc_init_prologue)
+BTF_ID_LIST_SINGLE(bpf_qdisc_init_prologue_ids, func, bpf_qdisc_init_prologue)
static int bpf_qdisc_gen_prologue(struct bpf_insn *insn_buf, bool direct_write,
const struct bpf_prog *prog)
@@ -161,8 +160,7 @@ static int bpf_qdisc_gen_prologue(struct bpf_insn *insn_buf, bool direct_write,
return insn - insn_buf;
}
-BTF_ID_LIST(bpf_qdisc_reset_destroy_epilogue_ids)
-BTF_ID(func, bpf_qdisc_reset_destroy_epilogue)
+BTF_ID_LIST_SINGLE(bpf_qdisc_reset_destroy_epilogue_ids, func, bpf_qdisc_reset_destroy_epilogue)
static int bpf_qdisc_gen_epilogue(struct bpf_insn *insn_buf, const struct bpf_prog *prog,
s16 ctx_stack_off)
@@ -451,8 +449,7 @@ static struct bpf_struct_ops bpf_Qdisc_ops = {
.owner = THIS_MODULE,
};
-BTF_ID_LIST(bpf_sk_buff_dtor_ids)
-BTF_ID(func, bpf_kfree_skb)
+BTF_ID_LIST_SINGLE(bpf_sk_buff_dtor_ids, func, bpf_kfree_skb)
static int __init bpf_qdisc_kfunc_init(void)
{
diff --git a/net/sched/em_text.c b/net/sched/em_text.c
index 420c66203b17..6b3d0af72c39 100644
--- a/net/sched/em_text.c
+++ b/net/sched/em_text.c
@@ -108,7 +108,7 @@ static int em_text_dump(struct sk_buff *skb, struct tcf_ematch *m)
struct text_match *tm = EM_TEXT_PRIV(m);
struct tcf_em_text conf;
- strncpy(conf.algo, tm->config->ops->name, sizeof(conf.algo) - 1);
+ strscpy(conf.algo, tm->config->ops->name);
conf.from_offset = tm->from_offset;
conf.to_offset = tm->to_offset;
conf.from_layer = tm->from_layer;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index c5e3673aadbe..d7c767b861a4 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -336,17 +336,22 @@ out:
return q;
}
-static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
+static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid,
+ struct netlink_ext_ack *extack)
{
unsigned long cl;
const struct Qdisc_class_ops *cops = p->ops->cl_ops;
- if (cops == NULL)
- return NULL;
+ if (cops == NULL) {
+ NL_SET_ERR_MSG(extack, "Parent qdisc is not classful");
+ return ERR_PTR(-EOPNOTSUPP);
+ }
cl = cops->find(p, classid);
- if (cl == 0)
- return NULL;
+ if (cl == 0) {
+ NL_SET_ERR_MSG(extack, "Specified class not found");
+ return ERR_PTR(-ENOENT);
+ }
return cops->leaf(p, cl);
}
@@ -596,16 +601,6 @@ out:
qdisc_skb_cb(skb)->pkt_len = pkt_len;
}
-void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
-{
- if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
- pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
- txt, qdisc->ops->id, qdisc->handle >> 16);
- qdisc->flags |= TCQ_F_WARN_NONWC;
- }
-}
-EXPORT_SYMBOL(qdisc_warn_nonwc);
-
static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
{
struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
@@ -780,15 +775,12 @@ static u32 qdisc_alloc_handle(struct net_device *dev)
void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
{
- bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
const struct Qdisc_class_ops *cops;
unsigned long cl;
u32 parentid;
bool notify;
int drops;
- if (n == 0 && len == 0)
- return;
drops = max_t(int, n, 0);
rcu_read_lock();
while ((parentid = sch->parent)) {
@@ -797,17 +789,8 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
if (sch->flags & TCQ_F_NOPARENT)
break;
- /* Notify parent qdisc only if child qdisc becomes empty.
- *
- * If child was empty even before update then backlog
- * counter is screwed and we skip notification because
- * parent class is already passive.
- *
- * If the original child was offloaded then it is allowed
- * to be seem as empty, so the parent is notified anyway.
- */
- notify = !sch->q.qlen && !WARN_ON_ONCE(!n &&
- !qdisc_is_offloaded);
+ /* Notify parent qdisc only if child qdisc becomes empty. */
+ notify = !sch->q.qlen;
/* TODO: perform the search on a per txq basis */
sch = qdisc_lookup_rcu(qdisc_dev(sch), TC_H_MAJ(parentid));
if (sch == NULL) {
@@ -816,6 +799,9 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
}
cops = sch->ops->cl_ops;
if (notify && cops->qlen_notify) {
+ /* Note that qlen_notify must be idempotent as it may get called
+ * multiple times.
+ */
cl = cops->find(sch, parentid);
cops->qlen_notify(sch, cl);
}
@@ -1499,7 +1485,7 @@ static int __tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid");
return -ENOENT;
}
- q = qdisc_leaf(p, clid);
+ q = qdisc_leaf(p, clid, extack);
} else if (dev_ingress_queue(dev)) {
q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping);
}
@@ -1510,6 +1496,8 @@ static int __tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device");
return -ENOENT;
}
+ if (IS_ERR(q))
+ return PTR_ERR(q);
if (tcm->tcm_handle && q->handle != tcm->tcm_handle) {
NL_SET_ERR_MSG(extack, "Invalid handle");
@@ -1611,7 +1599,9 @@ static int __tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
NL_SET_ERR_MSG(extack, "Failed to find specified qdisc");
return -ENOENT;
}
- q = qdisc_leaf(p, clid);
+ q = qdisc_leaf(p, clid, extack);
+ if (IS_ERR(q))
+ return PTR_ERR(q);
} else if (dev_ingress_queue_create(dev)) {
q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping);
}
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 48dd8c88903f..dbcfb948c867 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -1407,7 +1407,10 @@ static u32 cake_overhead(struct cake_sched_data *q, const struct sk_buff *skb)
return cake_calc_overhead(q, len, off);
/* borrowed from qdisc_pkt_len_init() */
- hdr_len = skb_transport_offset(skb);
+ if (!skb->encapsulation)
+ hdr_len = skb_transport_offset(skb);
+ else
+ hdr_len = skb_inner_transport_offset(skb);
/* + transport layer */
if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 |
diff --git a/net/sched/sch_dualpi2.c b/net/sched/sch_dualpi2.c
new file mode 100644
index 000000000000..845375ebd4ea
--- /dev/null
+++ b/net/sched/sch_dualpi2.c
@@ -0,0 +1,1175 @@
+// SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+/* Copyright (C) 2024 Nokia
+ *
+ * Author: Koen De Schepper <koen.de_schepper@nokia-bell-labs.com>
+ * Author: Olga Albisser <olga@albisser.org>
+ * Author: Henrik Steen <henrist@henrist.net>
+ * Author: Olivier Tilmans <olivier.tilmans@nokia.com>
+ * Author: Chia-Yu Chang <chia-yu.chang@nokia-bell-labs.com>
+ *
+ * DualPI Improved with a Square (dualpi2):
+ * - Supports congestion controls that comply with the Prague requirements
+ * in RFC9331 (e.g. TCP-Prague)
+ * - Supports coupled dual-queue with PI2 as defined in RFC9332
+ * - Supports ECN L4S-identifier (IP.ECN==0b*1)
+ *
+ * note: Although DCTCP and BBRv3 can use shallow-threshold ECN marks,
+ * they do not meet the 'Prague L4S Requirements' listed in RFC 9331
+ * Section 4, so they can only be used with DualPI2 in a datacenter
+ * context.
+ *
+ * References:
+ * - RFC9332: https://datatracker.ietf.org/doc/html/rfc9332
+ * - De Schepper, Koen, et al. "PI 2: A linearized AQM for both classic and
+ * scalable TCP." in proc. ACM CoNEXT'16, 2016.
+ */
+
+#include <linux/errno.h>
+#include <linux/hrtimer.h>
+#include <linux/if_vlan.h>
+#include <linux/kernel.h>
+#include <linux/limits.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/types.h>
+
+#include <net/gso.h>
+#include <net/inet_ecn.h>
+#include <net/pkt_cls.h>
+#include <net/pkt_sched.h>
+
+/* 32b enable to support flows with windows up to ~8.6 * 1e9 packets
+ * i.e., twice the maximal snd_cwnd.
+ * MAX_PROB must be consistent with the RNG in dualpi2_roll().
+ */
+#define MAX_PROB U32_MAX
+
+/* alpha/beta values exchanged over netlink are in units of 256ns */
+#define ALPHA_BETA_SHIFT 8
+
+/* Scaled values of alpha/beta must fit in 32b to avoid overflow in later
+ * computations. Consequently (see and dualpi2_scale_alpha_beta()), their
+ * netlink-provided values can use at most 31b, i.e. be at most (2^23)-1
+ * (~4MHz) as those are given in 1/256th. This enable to tune alpha/beta to
+ * control flows whose maximal RTTs can be in usec up to few secs.
+ */
+#define ALPHA_BETA_MAX ((1U << 31) - 1)
+
+/* Internal alpha/beta are in units of 64ns.
+ * This enables to use all alpha/beta values in the allowed range without loss
+ * of precision due to rounding when scaling them internally, e.g.,
+ * scale_alpha_beta(1) will not round down to 0.
+ */
+#define ALPHA_BETA_GRANULARITY 6
+
+#define ALPHA_BETA_SCALING (ALPHA_BETA_SHIFT - ALPHA_BETA_GRANULARITY)
+
+/* We express the weights (wc, wl) in %, i.e., wc + wl = 100 */
+#define MAX_WC 100
+
+struct dualpi2_sched_data {
+ struct Qdisc *l_queue; /* The L4S Low latency queue (L-queue) */
+ struct Qdisc *sch; /* The Classic queue (C-queue) */
+
+ /* Registered tc filters */
+ struct tcf_proto __rcu *tcf_filters;
+ struct tcf_block *tcf_block;
+
+ /* PI2 parameters */
+ u64 pi2_target; /* Target delay in nanoseconds */
+ u32 pi2_tupdate; /* Timer frequency in nanoseconds */
+ u32 pi2_prob; /* Base PI probability */
+ u32 pi2_alpha; /* Gain factor for the integral rate response */
+ u32 pi2_beta; /* Gain factor for the proportional response */
+ struct hrtimer pi2_timer; /* prob update timer */
+
+ /* Step AQM (L-queue only) parameters */
+ u32 step_thresh; /* Step threshold */
+ bool step_in_packets; /* Step thresh in packets (1) or time (0) */
+
+ /* C-queue starvation protection */
+ s32 c_protection_credit; /* Credit (sign indicates which queue) */
+ s32 c_protection_init; /* Reset value of the credit */
+ u8 c_protection_wc; /* C-queue weight (between 0 and MAX_WC) */
+ u8 c_protection_wl; /* L-queue weight (MAX_WC - wc) */
+
+ /* General dualQ parameters */
+ u32 memory_limit; /* Memory limit of both queues */
+ u8 coupling_factor;/* Coupling factor (k) between both queues */
+ u8 ecn_mask; /* Mask to match packets into L-queue */
+ u32 min_qlen_step; /* Minimum queue length to apply step thresh */
+ bool drop_early; /* Drop at enqueue (1) instead of dequeue (0) */
+ bool drop_overload; /* Drop (1) on overload, or overflow (0) */
+ bool split_gso; /* Split aggregated skb (1) or leave as is (0) */
+
+ /* Statistics */
+ u64 c_head_ts; /* Enqueue timestamp of the C-queue head */
+ u64 l_head_ts; /* Enqueue timestamp of the L-queue head */
+ u64 last_qdelay; /* Q delay val at the last probability update */
+ u32 packets_in_c; /* Enqueue packet counter of the C-queue */
+ u32 packets_in_l; /* Enqueue packet counter of the L-queue */
+ u32 maxq; /* Maximum queue size of the C-queue */
+ u32 ecn_mark; /* ECN mark pkt counter due to PI probability */
+ u32 step_marks; /* ECN mark pkt counter due to step AQM */
+ u32 memory_used; /* Memory used of both queues */
+ u32 max_memory_used;/* Maximum used memory */
+
+ /* Deferred drop statistics */
+ u32 deferred_drops_cnt; /* Packets dropped */
+ u32 deferred_drops_len; /* Bytes dropped */
+};
+
+struct dualpi2_skb_cb {
+ u64 ts; /* Timestamp at enqueue */
+ u8 apply_step:1, /* Can we apply the step threshold */
+ classified:2, /* Packet classification results */
+ ect:2; /* Packet ECT codepoint */
+};
+
+enum dualpi2_classification_results {
+ DUALPI2_C_CLASSIC = 0, /* C-queue */
+ DUALPI2_C_L4S = 1, /* L-queue (scale mark/classic drop) */
+ DUALPI2_C_LLLL = 2, /* L-queue (no drops/marks) */
+ __DUALPI2_C_MAX /* Keep last*/
+};
+
+static struct dualpi2_skb_cb *dualpi2_skb_cb(struct sk_buff *skb)
+{
+ qdisc_cb_private_validate(skb, sizeof(struct dualpi2_skb_cb));
+ return (struct dualpi2_skb_cb *)qdisc_skb_cb(skb)->data;
+}
+
+static u64 dualpi2_sojourn_time(struct sk_buff *skb, u64 reference)
+{
+ return reference - dualpi2_skb_cb(skb)->ts;
+}
+
+static u64 head_enqueue_time(struct Qdisc *q)
+{
+ struct sk_buff *skb = qdisc_peek_head(q);
+
+ return skb ? dualpi2_skb_cb(skb)->ts : 0;
+}
+
+static u32 dualpi2_scale_alpha_beta(u32 param)
+{
+ u64 tmp = ((u64)param * MAX_PROB >> ALPHA_BETA_SCALING);
+
+ do_div(tmp, NSEC_PER_SEC);
+ return tmp;
+}
+
+static u32 dualpi2_unscale_alpha_beta(u32 param)
+{
+ u64 tmp = ((u64)param * NSEC_PER_SEC << ALPHA_BETA_SCALING);
+
+ do_div(tmp, MAX_PROB);
+ return tmp;
+}
+
+static ktime_t next_pi2_timeout(struct dualpi2_sched_data *q)
+{
+ return ktime_add_ns(ktime_get_ns(), q->pi2_tupdate);
+}
+
+static bool skb_is_l4s(struct sk_buff *skb)
+{
+ return dualpi2_skb_cb(skb)->classified == DUALPI2_C_L4S;
+}
+
+static bool skb_in_l_queue(struct sk_buff *skb)
+{
+ return dualpi2_skb_cb(skb)->classified != DUALPI2_C_CLASSIC;
+}
+
+static bool skb_apply_step(struct sk_buff *skb, struct dualpi2_sched_data *q)
+{
+ return skb_is_l4s(skb) && qdisc_qlen(q->l_queue) >= q->min_qlen_step;
+}
+
+static bool dualpi2_mark(struct dualpi2_sched_data *q, struct sk_buff *skb)
+{
+ if (INET_ECN_set_ce(skb)) {
+ q->ecn_mark++;
+ return true;
+ }
+ return false;
+}
+
+static void dualpi2_reset_c_protection(struct dualpi2_sched_data *q)
+{
+ q->c_protection_credit = q->c_protection_init;
+}
+
+/* This computes the initial credit value and WRR weight for the L queue (wl)
+ * from the weight of the C queue (wc).
+ * If wl > wc, the scheduler will start with the L queue when reset.
+ */
+static void dualpi2_calculate_c_protection(struct Qdisc *sch,
+ struct dualpi2_sched_data *q, u32 wc)
+{
+ q->c_protection_wc = wc;
+ q->c_protection_wl = MAX_WC - wc;
+ q->c_protection_init = (s32)psched_mtu(qdisc_dev(sch)) *
+ ((int)q->c_protection_wc - (int)q->c_protection_wl);
+ dualpi2_reset_c_protection(q);
+}
+
+static bool dualpi2_roll(u32 prob)
+{
+ return get_random_u32() <= prob;
+}
+
+/* Packets in the C-queue are subject to a marking probability pC, which is the
+ * square of the internal PI probability (i.e., have an overall lower mark/drop
+ * probability). If the qdisc is overloaded, ignore ECT values and only drop.
+ *
+ * Note that this marking scheme is also applied to L4S packets during overload.
+ * Return true if packet dropping is required in C queue
+ */
+static bool dualpi2_classic_marking(struct dualpi2_sched_data *q,
+ struct sk_buff *skb, u32 prob,
+ bool overload)
+{
+ if (dualpi2_roll(prob) && dualpi2_roll(prob)) {
+ if (overload || dualpi2_skb_cb(skb)->ect == INET_ECN_NOT_ECT)
+ return true;
+ dualpi2_mark(q, skb);
+ }
+ return false;
+}
+
+/* Packets in the L-queue are subject to a marking probability pL given by the
+ * internal PI probability scaled by the coupling factor.
+ *
+ * On overload (i.e., @local_l_prob is >= 100%):
+ * - if the qdisc is configured to trade losses to preserve latency (i.e.,
+ * @q->drop_overload), apply classic drops first before marking.
+ * - otherwise, preserve the "no loss" property of ECN at the cost of queueing
+ * delay, eventually resulting in taildrop behavior once sch->limit is
+ * reached.
+ * Return true if packet dropping is required in L queue
+ */
+static bool dualpi2_scalable_marking(struct dualpi2_sched_data *q,
+ struct sk_buff *skb,
+ u64 local_l_prob, u32 prob,
+ bool overload)
+{
+ if (overload) {
+ /* Apply classic drop */
+ if (!q->drop_overload ||
+ !(dualpi2_roll(prob) && dualpi2_roll(prob)))
+ goto mark;
+ return true;
+ }
+
+ /* We can safely cut the upper 32b as overload==false */
+ if (dualpi2_roll(local_l_prob)) {
+ /* Non-ECT packets could have classified as L4S by filters. */
+ if (dualpi2_skb_cb(skb)->ect == INET_ECN_NOT_ECT)
+ return true;
+mark:
+ dualpi2_mark(q, skb);
+ }
+ return false;
+}
+
+/* Decide whether a given packet must be dropped (or marked if ECT), according
+ * to the PI2 probability.
+ *
+ * Never mark/drop if we have a standing queue of less than 2 MTUs.
+ */
+static bool must_drop(struct Qdisc *sch, struct dualpi2_sched_data *q,
+ struct sk_buff *skb)
+{
+ u64 local_l_prob;
+ bool overload;
+ u32 prob;
+
+ if (sch->qstats.backlog < 2 * psched_mtu(qdisc_dev(sch)))
+ return false;
+
+ prob = READ_ONCE(q->pi2_prob);
+ local_l_prob = (u64)prob * q->coupling_factor;
+ overload = local_l_prob > MAX_PROB;
+
+ switch (dualpi2_skb_cb(skb)->classified) {
+ case DUALPI2_C_CLASSIC:
+ return dualpi2_classic_marking(q, skb, prob, overload);
+ case DUALPI2_C_L4S:
+ return dualpi2_scalable_marking(q, skb, local_l_prob, prob,
+ overload);
+ default: /* DUALPI2_C_LLLL */
+ return false;
+ }
+}
+
+static void dualpi2_read_ect(struct sk_buff *skb)
+{
+ struct dualpi2_skb_cb *cb = dualpi2_skb_cb(skb);
+ int wlen = skb_network_offset(skb);
+
+ switch (skb_protocol(skb, true)) {
+ case htons(ETH_P_IP):
+ wlen += sizeof(struct iphdr);
+ if (!pskb_may_pull(skb, wlen) ||
+ skb_try_make_writable(skb, wlen))
+ goto not_ecn;
+
+ cb->ect = ipv4_get_dsfield(ip_hdr(skb)) & INET_ECN_MASK;
+ break;
+ case htons(ETH_P_IPV6):
+ wlen += sizeof(struct ipv6hdr);
+ if (!pskb_may_pull(skb, wlen) ||
+ skb_try_make_writable(skb, wlen))
+ goto not_ecn;
+
+ cb->ect = ipv6_get_dsfield(ipv6_hdr(skb)) & INET_ECN_MASK;
+ break;
+ default:
+ goto not_ecn;
+ }
+ return;
+
+not_ecn:
+ /* Non pullable/writable packets can only be dropped hence are
+ * classified as not ECT.
+ */
+ cb->ect = INET_ECN_NOT_ECT;
+}
+
+static int dualpi2_skb_classify(struct dualpi2_sched_data *q,
+ struct sk_buff *skb)
+{
+ struct dualpi2_skb_cb *cb = dualpi2_skb_cb(skb);
+ struct tcf_result res;
+ struct tcf_proto *fl;
+ int result;
+
+ dualpi2_read_ect(skb);
+ if (cb->ect & q->ecn_mask) {
+ cb->classified = DUALPI2_C_L4S;
+ return NET_XMIT_SUCCESS;
+ }
+
+ if (TC_H_MAJ(skb->priority) == q->sch->handle &&
+ TC_H_MIN(skb->priority) < __DUALPI2_C_MAX) {
+ cb->classified = TC_H_MIN(skb->priority);
+ return NET_XMIT_SUCCESS;
+ }
+
+ fl = rcu_dereference_bh(q->tcf_filters);
+ if (!fl) {
+ cb->classified = DUALPI2_C_CLASSIC;
+ return NET_XMIT_SUCCESS;
+ }
+
+ result = tcf_classify(skb, NULL, fl, &res, false);
+ if (result >= 0) {
+#ifdef CONFIG_NET_CLS_ACT
+ switch (result) {
+ case TC_ACT_STOLEN:
+ case TC_ACT_QUEUED:
+ case TC_ACT_TRAP:
+ return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+ case TC_ACT_SHOT:
+ return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
+ }
+#endif
+ cb->classified = TC_H_MIN(res.classid) < __DUALPI2_C_MAX ?
+ TC_H_MIN(res.classid) : DUALPI2_C_CLASSIC;
+ }
+ return NET_XMIT_SUCCESS;
+}
+
+static int dualpi2_enqueue_skb(struct sk_buff *skb, struct Qdisc *sch,
+ struct sk_buff **to_free)
+{
+ struct dualpi2_sched_data *q = qdisc_priv(sch);
+ struct dualpi2_skb_cb *cb;
+
+ if (unlikely(qdisc_qlen(sch) >= sch->limit) ||
+ unlikely((u64)q->memory_used + skb->truesize > q->memory_limit)) {
+ qdisc_qstats_overlimit(sch);
+ if (skb_in_l_queue(skb))
+ qdisc_qstats_overlimit(q->l_queue);
+ return qdisc_drop_reason(skb, sch, to_free,
+ SKB_DROP_REASON_QDISC_OVERLIMIT);
+ }
+
+ if (q->drop_early && must_drop(sch, q, skb)) {
+ qdisc_drop_reason(skb, sch, to_free,
+ SKB_DROP_REASON_QDISC_CONGESTED);
+ return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
+ }
+
+ cb = dualpi2_skb_cb(skb);
+ cb->ts = ktime_get_ns();
+ q->memory_used += skb->truesize;
+ if (q->memory_used > q->max_memory_used)
+ q->max_memory_used = q->memory_used;
+
+ if (qdisc_qlen(sch) > q->maxq)
+ q->maxq = qdisc_qlen(sch);
+
+ if (skb_in_l_queue(skb)) {
+ /* Apply step thresh if skb is L4S && L-queue len >= min_qlen */
+ dualpi2_skb_cb(skb)->apply_step = skb_apply_step(skb, q);
+
+ /* Keep the overall qdisc stats consistent */
+ ++sch->q.qlen;
+ qdisc_qstats_backlog_inc(sch, skb);
+ ++q->packets_in_l;
+ if (!q->l_head_ts)
+ q->l_head_ts = cb->ts;
+ return qdisc_enqueue_tail(skb, q->l_queue);
+ }
+ ++q->packets_in_c;
+ if (!q->c_head_ts)
+ q->c_head_ts = cb->ts;
+ return qdisc_enqueue_tail(skb, sch);
+}
+
+/* By default, dualpi2 will split GSO skbs into independent skbs and enqueue
+ * each of those individually. This yields the following benefits, at the
+ * expense of CPU usage:
+ * - Finer-grained AQM actions as the sub-packets of a burst no longer share the
+ * same fate (e.g., the random mark/drop probability is applied individually)
+ * - Improved precision of the starvation protection/WRR scheduler at dequeue,
+ * as the size of the dequeued packets will be smaller.
+ */
+static int dualpi2_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ struct sk_buff **to_free)
+{
+ struct dualpi2_sched_data *q = qdisc_priv(sch);
+ int err;
+
+ err = dualpi2_skb_classify(q, skb);
+ if (err != NET_XMIT_SUCCESS) {
+ if (err & __NET_XMIT_BYPASS)
+ qdisc_qstats_drop(sch);
+ __qdisc_drop(skb, to_free);
+ return err;
+ }
+
+ if (q->split_gso && skb_is_gso(skb)) {
+ netdev_features_t features;
+ struct sk_buff *nskb, *next;
+ int cnt, byte_len, orig_len;
+ int err;
+
+ features = netif_skb_features(skb);
+ nskb = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
+ if (IS_ERR_OR_NULL(nskb))
+ return qdisc_drop(skb, sch, to_free);
+
+ cnt = 1;
+ byte_len = 0;
+ orig_len = qdisc_pkt_len(skb);
+ skb_list_walk_safe(nskb, nskb, next) {
+ skb_mark_not_on_list(nskb);
+
+ /* Iterate through GSO fragments of an skb:
+ * (1) Set pkt_len from the single GSO fragments
+ * (2) Copy classified and ect values of an skb
+ * (3) Enqueue fragment & set ts in dualpi2_enqueue_skb
+ */
+ qdisc_skb_cb(nskb)->pkt_len = nskb->len;
+ dualpi2_skb_cb(nskb)->classified =
+ dualpi2_skb_cb(skb)->classified;
+ dualpi2_skb_cb(nskb)->ect = dualpi2_skb_cb(skb)->ect;
+ err = dualpi2_enqueue_skb(nskb, sch, to_free);
+
+ if (err == NET_XMIT_SUCCESS) {
+ /* Compute the backlog adjustment that needs
+ * to be propagated in the qdisc tree to reflect
+ * all new skbs successfully enqueued.
+ */
+ ++cnt;
+ byte_len += nskb->len;
+ }
+ }
+ if (cnt > 1) {
+ /* The caller will add the original skb stats to its
+ * backlog, compensate this if any nskb is enqueued.
+ */
+ --cnt;
+ byte_len -= orig_len;
+ }
+ qdisc_tree_reduce_backlog(sch, -cnt, -byte_len);
+ consume_skb(skb);
+ return err;
+ }
+ return dualpi2_enqueue_skb(skb, sch, to_free);
+}
+
+/* Select the queue from which the next packet can be dequeued, ensuring that
+ * neither queue can starve the other with a WRR scheduler.
+ *
+ * The sign of the WRR credit determines the next queue, while the size of
+ * the dequeued packet determines the magnitude of the WRR credit change. If
+ * either queue is empty, the WRR credit is kept unchanged.
+ *
+ * As the dequeued packet can be dropped later, the caller has to perform the
+ * qdisc_bstats_update() calls.
+ */
+static struct sk_buff *dequeue_packet(struct Qdisc *sch,
+ struct dualpi2_sched_data *q,
+ int *credit_change,
+ u64 now)
+{
+ struct sk_buff *skb = NULL;
+ int c_len;
+
+ *credit_change = 0;
+ c_len = qdisc_qlen(sch) - qdisc_qlen(q->l_queue);
+ if (qdisc_qlen(q->l_queue) && (!c_len || q->c_protection_credit <= 0)) {
+ skb = __qdisc_dequeue_head(&q->l_queue->q);
+ WRITE_ONCE(q->l_head_ts, head_enqueue_time(q->l_queue));
+ if (c_len)
+ *credit_change = q->c_protection_wc;
+ qdisc_qstats_backlog_dec(q->l_queue, skb);
+
+ /* Keep the global queue size consistent */
+ --sch->q.qlen;
+ q->memory_used -= skb->truesize;
+ } else if (c_len) {
+ skb = __qdisc_dequeue_head(&sch->q);
+ WRITE_ONCE(q->c_head_ts, head_enqueue_time(sch));
+ if (qdisc_qlen(q->l_queue))
+ *credit_change = ~((s32)q->c_protection_wl) + 1;
+ q->memory_used -= skb->truesize;
+ } else {
+ dualpi2_reset_c_protection(q);
+ return NULL;
+ }
+ *credit_change *= qdisc_pkt_len(skb);
+ qdisc_qstats_backlog_dec(sch, skb);
+ return skb;
+}
+
+static int do_step_aqm(struct dualpi2_sched_data *q, struct sk_buff *skb,
+ u64 now)
+{
+ u64 qdelay = 0;
+
+ if (q->step_in_packets)
+ qdelay = qdisc_qlen(q->l_queue);
+ else
+ qdelay = dualpi2_sojourn_time(skb, now);
+
+ if (dualpi2_skb_cb(skb)->apply_step && qdelay > q->step_thresh) {
+ if (!dualpi2_skb_cb(skb)->ect) {
+ /* Drop this non-ECT packet */
+ return 1;
+ }
+
+ if (dualpi2_mark(q, skb))
+ ++q->step_marks;
+ }
+ qdisc_bstats_update(q->l_queue, skb);
+ return 0;
+}
+
+static void drop_and_retry(struct dualpi2_sched_data *q, struct sk_buff *skb,
+ struct Qdisc *sch, enum skb_drop_reason reason)
+{
+ ++q->deferred_drops_cnt;
+ q->deferred_drops_len += qdisc_pkt_len(skb);
+ kfree_skb_reason(skb, reason);
+ qdisc_qstats_drop(sch);
+}
+
+static struct sk_buff *dualpi2_qdisc_dequeue(struct Qdisc *sch)
+{
+ struct dualpi2_sched_data *q = qdisc_priv(sch);
+ struct sk_buff *skb;
+ int credit_change;
+ u64 now;
+
+ now = ktime_get_ns();
+
+ while ((skb = dequeue_packet(sch, q, &credit_change, now))) {
+ if (!q->drop_early && must_drop(sch, q, skb)) {
+ drop_and_retry(q, skb, sch,
+ SKB_DROP_REASON_QDISC_CONGESTED);
+ continue;
+ }
+
+ if (skb_in_l_queue(skb) && do_step_aqm(q, skb, now)) {
+ qdisc_qstats_drop(q->l_queue);
+ drop_and_retry(q, skb, sch,
+ SKB_DROP_REASON_DUALPI2_STEP_DROP);
+ continue;
+ }
+
+ q->c_protection_credit += credit_change;
+ qdisc_bstats_update(sch, skb);
+ break;
+ }
+
+ if (q->deferred_drops_cnt) {
+ qdisc_tree_reduce_backlog(sch, q->deferred_drops_cnt,
+ q->deferred_drops_len);
+ q->deferred_drops_cnt = 0;
+ q->deferred_drops_len = 0;
+ }
+ return skb;
+}
+
+static s64 __scale_delta(u64 diff)
+{
+ do_div(diff, 1 << ALPHA_BETA_GRANULARITY);
+ return diff;
+}
+
+static void get_queue_delays(struct dualpi2_sched_data *q, u64 *qdelay_c,
+ u64 *qdelay_l)
+{
+ u64 now, qc, ql;
+
+ now = ktime_get_ns();
+ qc = READ_ONCE(q->c_head_ts);
+ ql = READ_ONCE(q->l_head_ts);
+
+ *qdelay_c = qc ? now - qc : 0;
+ *qdelay_l = ql ? now - ql : 0;
+}
+
+static u32 calculate_probability(struct Qdisc *sch)
+{
+ struct dualpi2_sched_data *q = qdisc_priv(sch);
+ u32 new_prob;
+ u64 qdelay_c;
+ u64 qdelay_l;
+ u64 qdelay;
+ s64 delta;
+
+ get_queue_delays(q, &qdelay_c, &qdelay_l);
+ qdelay = max(qdelay_l, qdelay_c);
+
+ /* Alpha and beta take at most 32b, i.e, the delay difference would
+ * overflow for queuing delay differences > ~4.2sec.
+ */
+ delta = ((s64)qdelay - (s64)q->pi2_target) * q->pi2_alpha;
+ delta += ((s64)qdelay - (s64)q->last_qdelay) * q->pi2_beta;
+ q->last_qdelay = qdelay;
+
+ /* Bound new_prob between 0 and MAX_PROB */
+ if (delta > 0) {
+ new_prob = __scale_delta(delta) + q->pi2_prob;
+ if (new_prob < q->pi2_prob)
+ new_prob = MAX_PROB;
+ } else {
+ new_prob = q->pi2_prob - __scale_delta(~delta + 1);
+ if (new_prob > q->pi2_prob)
+ new_prob = 0;
+ }
+
+ /* If we do not drop on overload, ensure we cap the L4S probability to
+ * 100% to keep window fairness when overflowing.
+ */
+ if (!q->drop_overload)
+ return min_t(u32, new_prob, MAX_PROB / q->coupling_factor);
+ return new_prob;
+}
+
+static u32 get_memory_limit(struct Qdisc *sch, u32 limit)
+{
+ /* Apply rule of thumb, i.e., doubling the packet length,
+ * to further include per packet overhead in memory_limit.
+ */
+ u64 memlim = mul_u32_u32(limit, 2 * psched_mtu(qdisc_dev(sch)));
+
+ if (upper_32_bits(memlim))
+ return U32_MAX;
+ else
+ return lower_32_bits(memlim);
+}
+
+static u32 convert_us_to_nsec(u32 us)
+{
+ u64 ns = mul_u32_u32(us, NSEC_PER_USEC);
+
+ if (upper_32_bits(ns))
+ return U32_MAX;
+
+ return lower_32_bits(ns);
+}
+
+static u32 convert_ns_to_usec(u64 ns)
+{
+ do_div(ns, NSEC_PER_USEC);
+ if (upper_32_bits(ns))
+ return U32_MAX;
+
+ return lower_32_bits(ns);
+}
+
+static enum hrtimer_restart dualpi2_timer(struct hrtimer *timer)
+{
+ struct dualpi2_sched_data *q = timer_container_of(q, timer, pi2_timer);
+ struct Qdisc *sch = q->sch;
+ spinlock_t *root_lock; /* to lock qdisc for probability calculations */
+
+ rcu_read_lock();
+ root_lock = qdisc_lock(qdisc_root_sleeping(sch));
+ spin_lock(root_lock);
+
+ WRITE_ONCE(q->pi2_prob, calculate_probability(sch));
+ hrtimer_set_expires(&q->pi2_timer, next_pi2_timeout(q));
+
+ spin_unlock(root_lock);
+ rcu_read_unlock();
+ return HRTIMER_RESTART;
+}
+
+static struct netlink_range_validation dualpi2_alpha_beta_range = {
+ .min = 1,
+ .max = ALPHA_BETA_MAX,
+};
+
+static const struct nla_policy dualpi2_policy[TCA_DUALPI2_MAX + 1] = {
+ [TCA_DUALPI2_LIMIT] = NLA_POLICY_MIN(NLA_U32, 1),
+ [TCA_DUALPI2_MEMORY_LIMIT] = NLA_POLICY_MIN(NLA_U32, 1),
+ [TCA_DUALPI2_TARGET] = { .type = NLA_U32 },
+ [TCA_DUALPI2_TUPDATE] = NLA_POLICY_MIN(NLA_U32, 1),
+ [TCA_DUALPI2_ALPHA] =
+ NLA_POLICY_FULL_RANGE(NLA_U32, &dualpi2_alpha_beta_range),
+ [TCA_DUALPI2_BETA] =
+ NLA_POLICY_FULL_RANGE(NLA_U32, &dualpi2_alpha_beta_range),
+ [TCA_DUALPI2_STEP_THRESH_PKTS] = { .type = NLA_U32 },
+ [TCA_DUALPI2_STEP_THRESH_US] = { .type = NLA_U32 },
+ [TCA_DUALPI2_MIN_QLEN_STEP] = { .type = NLA_U32 },
+ [TCA_DUALPI2_COUPLING] = NLA_POLICY_MIN(NLA_U8, 1),
+ [TCA_DUALPI2_DROP_OVERLOAD] =
+ NLA_POLICY_MAX(NLA_U8, TCA_DUALPI2_DROP_OVERLOAD_MAX),
+ [TCA_DUALPI2_DROP_EARLY] =
+ NLA_POLICY_MAX(NLA_U8, TCA_DUALPI2_DROP_EARLY_MAX),
+ [TCA_DUALPI2_C_PROTECTION] =
+ NLA_POLICY_RANGE(NLA_U8, 0, MAX_WC),
+ [TCA_DUALPI2_ECN_MASK] =
+ NLA_POLICY_RANGE(NLA_U8, TC_DUALPI2_ECN_MASK_L4S_ECT,
+ TCA_DUALPI2_ECN_MASK_MAX),
+ [TCA_DUALPI2_SPLIT_GSO] =
+ NLA_POLICY_MAX(NLA_U8, TCA_DUALPI2_SPLIT_GSO_MAX),
+};
+
+static int dualpi2_change(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[TCA_DUALPI2_MAX + 1];
+ struct dualpi2_sched_data *q;
+ int old_backlog;
+ int old_qlen;
+ int err;
+
+ if (!opt || !nla_len(opt)) {
+ NL_SET_ERR_MSG_MOD(extack, "Dualpi2 options are required");
+ return -EINVAL;
+ }
+ err = nla_parse_nested(tb, TCA_DUALPI2_MAX, opt, dualpi2_policy,
+ extack);
+ if (err < 0)
+ return err;
+ if (tb[TCA_DUALPI2_STEP_THRESH_PKTS] && tb[TCA_DUALPI2_STEP_THRESH_US]) {
+ NL_SET_ERR_MSG_MOD(extack, "multiple step thresh attributes");
+ return -EINVAL;
+ }
+
+ q = qdisc_priv(sch);
+ sch_tree_lock(sch);
+
+ if (tb[TCA_DUALPI2_LIMIT]) {
+ u32 limit = nla_get_u32(tb[TCA_DUALPI2_LIMIT]);
+
+ WRITE_ONCE(sch->limit, limit);
+ WRITE_ONCE(q->memory_limit, get_memory_limit(sch, limit));
+ }
+
+ if (tb[TCA_DUALPI2_MEMORY_LIMIT])
+ WRITE_ONCE(q->memory_limit,
+ nla_get_u32(tb[TCA_DUALPI2_MEMORY_LIMIT]));
+
+ if (tb[TCA_DUALPI2_TARGET]) {
+ u64 target = nla_get_u32(tb[TCA_DUALPI2_TARGET]);
+
+ WRITE_ONCE(q->pi2_target, target * NSEC_PER_USEC);
+ }
+
+ if (tb[TCA_DUALPI2_TUPDATE]) {
+ u64 tupdate = nla_get_u32(tb[TCA_DUALPI2_TUPDATE]);
+
+ WRITE_ONCE(q->pi2_tupdate, convert_us_to_nsec(tupdate));
+ }
+
+ if (tb[TCA_DUALPI2_ALPHA]) {
+ u32 alpha = nla_get_u32(tb[TCA_DUALPI2_ALPHA]);
+
+ WRITE_ONCE(q->pi2_alpha, dualpi2_scale_alpha_beta(alpha));
+ }
+
+ if (tb[TCA_DUALPI2_BETA]) {
+ u32 beta = nla_get_u32(tb[TCA_DUALPI2_BETA]);
+
+ WRITE_ONCE(q->pi2_beta, dualpi2_scale_alpha_beta(beta));
+ }
+
+ if (tb[TCA_DUALPI2_STEP_THRESH_PKTS]) {
+ u32 step_th = nla_get_u32(tb[TCA_DUALPI2_STEP_THRESH_PKTS]);
+
+ WRITE_ONCE(q->step_in_packets, true);
+ WRITE_ONCE(q->step_thresh, step_th);
+ } else if (tb[TCA_DUALPI2_STEP_THRESH_US]) {
+ u32 step_th = nla_get_u32(tb[TCA_DUALPI2_STEP_THRESH_US]);
+
+ WRITE_ONCE(q->step_in_packets, false);
+ WRITE_ONCE(q->step_thresh, convert_us_to_nsec(step_th));
+ }
+
+ if (tb[TCA_DUALPI2_MIN_QLEN_STEP])
+ WRITE_ONCE(q->min_qlen_step,
+ nla_get_u32(tb[TCA_DUALPI2_MIN_QLEN_STEP]));
+
+ if (tb[TCA_DUALPI2_COUPLING]) {
+ u8 coupling = nla_get_u8(tb[TCA_DUALPI2_COUPLING]);
+
+ WRITE_ONCE(q->coupling_factor, coupling);
+ }
+
+ if (tb[TCA_DUALPI2_DROP_OVERLOAD]) {
+ u8 drop_overload = nla_get_u8(tb[TCA_DUALPI2_DROP_OVERLOAD]);
+
+ WRITE_ONCE(q->drop_overload, (bool)drop_overload);
+ }
+
+ if (tb[TCA_DUALPI2_DROP_EARLY]) {
+ u8 drop_early = nla_get_u8(tb[TCA_DUALPI2_DROP_EARLY]);
+
+ WRITE_ONCE(q->drop_early, (bool)drop_early);
+ }
+
+ if (tb[TCA_DUALPI2_C_PROTECTION]) {
+ u8 wc = nla_get_u8(tb[TCA_DUALPI2_C_PROTECTION]);
+
+ dualpi2_calculate_c_protection(sch, q, wc);
+ }
+
+ if (tb[TCA_DUALPI2_ECN_MASK]) {
+ u8 ecn_mask = nla_get_u8(tb[TCA_DUALPI2_ECN_MASK]);
+
+ WRITE_ONCE(q->ecn_mask, ecn_mask);
+ }
+
+ if (tb[TCA_DUALPI2_SPLIT_GSO]) {
+ u8 split_gso = nla_get_u8(tb[TCA_DUALPI2_SPLIT_GSO]);
+
+ WRITE_ONCE(q->split_gso, (bool)split_gso);
+ }
+
+ old_qlen = qdisc_qlen(sch);
+ old_backlog = sch->qstats.backlog;
+ while (qdisc_qlen(sch) > sch->limit ||
+ q->memory_used > q->memory_limit) {
+ struct sk_buff *skb = qdisc_dequeue_internal(sch, true);
+
+ q->memory_used -= skb->truesize;
+ qdisc_qstats_backlog_dec(sch, skb);
+ rtnl_qdisc_drop(skb, sch);
+ }
+ qdisc_tree_reduce_backlog(sch, old_qlen - qdisc_qlen(sch),
+ old_backlog - sch->qstats.backlog);
+
+ sch_tree_unlock(sch);
+ return 0;
+}
+
+/* Default alpha/beta values give a 10dB stability margin with max_rtt=100ms. */
+static void dualpi2_reset_default(struct Qdisc *sch)
+{
+ struct dualpi2_sched_data *q = qdisc_priv(sch);
+
+ q->sch->limit = 10000; /* Max 125ms at 1Gbps */
+ q->memory_limit = get_memory_limit(sch, q->sch->limit);
+
+ q->pi2_target = 15 * NSEC_PER_MSEC;
+ q->pi2_tupdate = 16 * NSEC_PER_MSEC;
+ q->pi2_alpha = dualpi2_scale_alpha_beta(41); /* ~0.16 Hz * 256 */
+ q->pi2_beta = dualpi2_scale_alpha_beta(819); /* ~3.20 Hz * 256 */
+
+ q->step_thresh = 1 * NSEC_PER_MSEC;
+ q->step_in_packets = false;
+
+ dualpi2_calculate_c_protection(q->sch, q, 10); /* wc=10%, wl=90% */
+
+ q->ecn_mask = TC_DUALPI2_ECN_MASK_L4S_ECT; /* INET_ECN_ECT_1 */
+ q->min_qlen_step = 0; /* Always apply step mark in L-queue */
+ q->coupling_factor = 2; /* window fairness for equal RTTs */
+ q->drop_overload = TC_DUALPI2_DROP_OVERLOAD_DROP; /* Drop overload */
+ q->drop_early = TC_DUALPI2_DROP_EARLY_DROP_DEQUEUE; /* Drop dequeue */
+ q->split_gso = TC_DUALPI2_SPLIT_GSO_SPLIT_GSO; /* Split GSO */
+}
+
+static int dualpi2_init(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
+{
+ struct dualpi2_sched_data *q = qdisc_priv(sch);
+ int err;
+
+ q->l_queue = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
+ TC_H_MAKE(sch->handle, 1), extack);
+ if (!q->l_queue)
+ return -ENOMEM;
+
+ err = tcf_block_get(&q->tcf_block, &q->tcf_filters, sch, extack);
+ if (err)
+ return err;
+
+ q->sch = sch;
+ dualpi2_reset_default(sch);
+ hrtimer_setup(&q->pi2_timer, dualpi2_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
+
+ if (opt && nla_len(opt)) {
+ err = dualpi2_change(sch, opt, extack);
+
+ if (err)
+ return err;
+ }
+
+ hrtimer_start(&q->pi2_timer, next_pi2_timeout(q),
+ HRTIMER_MODE_ABS_PINNED);
+ return 0;
+}
+
+static int dualpi2_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+ struct dualpi2_sched_data *q = qdisc_priv(sch);
+ struct nlattr *opts;
+ bool step_in_pkts;
+ u32 step_th;
+
+ step_in_pkts = READ_ONCE(q->step_in_packets);
+ step_th = READ_ONCE(q->step_thresh);
+
+ opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
+ if (!opts)
+ goto nla_put_failure;
+
+ if (step_in_pkts &&
+ (nla_put_u32(skb, TCA_DUALPI2_LIMIT, READ_ONCE(sch->limit)) ||
+ nla_put_u32(skb, TCA_DUALPI2_MEMORY_LIMIT,
+ READ_ONCE(q->memory_limit)) ||
+ nla_put_u32(skb, TCA_DUALPI2_TARGET,
+ convert_ns_to_usec(READ_ONCE(q->pi2_target))) ||
+ nla_put_u32(skb, TCA_DUALPI2_TUPDATE,
+ convert_ns_to_usec(READ_ONCE(q->pi2_tupdate))) ||
+ nla_put_u32(skb, TCA_DUALPI2_ALPHA,
+ dualpi2_unscale_alpha_beta(READ_ONCE(q->pi2_alpha))) ||
+ nla_put_u32(skb, TCA_DUALPI2_BETA,
+ dualpi2_unscale_alpha_beta(READ_ONCE(q->pi2_beta))) ||
+ nla_put_u32(skb, TCA_DUALPI2_STEP_THRESH_PKTS, step_th) ||
+ nla_put_u32(skb, TCA_DUALPI2_MIN_QLEN_STEP,
+ READ_ONCE(q->min_qlen_step)) ||
+ nla_put_u8(skb, TCA_DUALPI2_COUPLING,
+ READ_ONCE(q->coupling_factor)) ||
+ nla_put_u8(skb, TCA_DUALPI2_DROP_OVERLOAD,
+ READ_ONCE(q->drop_overload)) ||
+ nla_put_u8(skb, TCA_DUALPI2_DROP_EARLY,
+ READ_ONCE(q->drop_early)) ||
+ nla_put_u8(skb, TCA_DUALPI2_C_PROTECTION,
+ READ_ONCE(q->c_protection_wc)) ||
+ nla_put_u8(skb, TCA_DUALPI2_ECN_MASK, READ_ONCE(q->ecn_mask)) ||
+ nla_put_u8(skb, TCA_DUALPI2_SPLIT_GSO, READ_ONCE(q->split_gso))))
+ goto nla_put_failure;
+
+ if (!step_in_pkts &&
+ (nla_put_u32(skb, TCA_DUALPI2_LIMIT, READ_ONCE(sch->limit)) ||
+ nla_put_u32(skb, TCA_DUALPI2_MEMORY_LIMIT,
+ READ_ONCE(q->memory_limit)) ||
+ nla_put_u32(skb, TCA_DUALPI2_TARGET,
+ convert_ns_to_usec(READ_ONCE(q->pi2_target))) ||
+ nla_put_u32(skb, TCA_DUALPI2_TUPDATE,
+ convert_ns_to_usec(READ_ONCE(q->pi2_tupdate))) ||
+ nla_put_u32(skb, TCA_DUALPI2_ALPHA,
+ dualpi2_unscale_alpha_beta(READ_ONCE(q->pi2_alpha))) ||
+ nla_put_u32(skb, TCA_DUALPI2_BETA,
+ dualpi2_unscale_alpha_beta(READ_ONCE(q->pi2_beta))) ||
+ nla_put_u32(skb, TCA_DUALPI2_STEP_THRESH_US,
+ convert_ns_to_usec(step_th)) ||
+ nla_put_u32(skb, TCA_DUALPI2_MIN_QLEN_STEP,
+ READ_ONCE(q->min_qlen_step)) ||
+ nla_put_u8(skb, TCA_DUALPI2_COUPLING,
+ READ_ONCE(q->coupling_factor)) ||
+ nla_put_u8(skb, TCA_DUALPI2_DROP_OVERLOAD,
+ READ_ONCE(q->drop_overload)) ||
+ nla_put_u8(skb, TCA_DUALPI2_DROP_EARLY,
+ READ_ONCE(q->drop_early)) ||
+ nla_put_u8(skb, TCA_DUALPI2_C_PROTECTION,
+ READ_ONCE(q->c_protection_wc)) ||
+ nla_put_u8(skb, TCA_DUALPI2_ECN_MASK, READ_ONCE(q->ecn_mask)) ||
+ nla_put_u8(skb, TCA_DUALPI2_SPLIT_GSO, READ_ONCE(q->split_gso))))
+ goto nla_put_failure;
+
+ return nla_nest_end(skb, opts);
+
+nla_put_failure:
+ nla_nest_cancel(skb, opts);
+ return -1;
+}
+
+static int dualpi2_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
+{
+ struct dualpi2_sched_data *q = qdisc_priv(sch);
+ struct tc_dualpi2_xstats st = {
+ .prob = READ_ONCE(q->pi2_prob),
+ .packets_in_c = q->packets_in_c,
+ .packets_in_l = q->packets_in_l,
+ .maxq = q->maxq,
+ .ecn_mark = q->ecn_mark,
+ .credit = q->c_protection_credit,
+ .step_marks = q->step_marks,
+ .memory_used = q->memory_used,
+ .max_memory_used = q->max_memory_used,
+ .memory_limit = q->memory_limit,
+ };
+ u64 qc, ql;
+
+ get_queue_delays(q, &qc, &ql);
+ st.delay_l = convert_ns_to_usec(ql);
+ st.delay_c = convert_ns_to_usec(qc);
+ return gnet_stats_copy_app(d, &st, sizeof(st));
+}
+
+/* Reset both L-queue and C-queue, internal packet counters, PI probability,
+ * C-queue protection credit, and timestamps, while preserving current
+ * configuration of DUALPI2.
+ */
+static void dualpi2_reset(struct Qdisc *sch)
+{
+ struct dualpi2_sched_data *q = qdisc_priv(sch);
+
+ qdisc_reset_queue(sch);
+ qdisc_reset_queue(q->l_queue);
+ q->c_head_ts = 0;
+ q->l_head_ts = 0;
+ q->pi2_prob = 0;
+ q->packets_in_c = 0;
+ q->packets_in_l = 0;
+ q->maxq = 0;
+ q->ecn_mark = 0;
+ q->step_marks = 0;
+ q->memory_used = 0;
+ q->max_memory_used = 0;
+ dualpi2_reset_c_protection(q);
+}
+
+static void dualpi2_destroy(struct Qdisc *sch)
+{
+ struct dualpi2_sched_data *q = qdisc_priv(sch);
+
+ q->pi2_tupdate = 0;
+ hrtimer_cancel(&q->pi2_timer);
+ if (q->l_queue)
+ qdisc_put(q->l_queue);
+ tcf_block_put(q->tcf_block);
+}
+
+static struct Qdisc *dualpi2_leaf(struct Qdisc *sch, unsigned long arg)
+{
+ return NULL;
+}
+
+static unsigned long dualpi2_find(struct Qdisc *sch, u32 classid)
+{
+ return 0;
+}
+
+static unsigned long dualpi2_bind(struct Qdisc *sch, unsigned long parent,
+ u32 classid)
+{
+ return 0;
+}
+
+static void dualpi2_unbind(struct Qdisc *q, unsigned long cl)
+{
+}
+
+static struct tcf_block *dualpi2_tcf_block(struct Qdisc *sch, unsigned long cl,
+ struct netlink_ext_ack *extack)
+{
+ struct dualpi2_sched_data *q = qdisc_priv(sch);
+
+ if (cl)
+ return NULL;
+ return q->tcf_block;
+}
+
+static void dualpi2_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+ unsigned int i;
+
+ if (arg->stop)
+ return;
+
+ /* We statically define only 2 queues */
+ for (i = 0; i < 2; i++) {
+ if (arg->count < arg->skip) {
+ arg->count++;
+ continue;
+ }
+ if (arg->fn(sch, i + 1, arg) < 0) {
+ arg->stop = 1;
+ break;
+ }
+ arg->count++;
+ }
+}
+
+/* Minimal class support to handle tc filters */
+static const struct Qdisc_class_ops dualpi2_class_ops = {
+ .leaf = dualpi2_leaf,
+ .find = dualpi2_find,
+ .tcf_block = dualpi2_tcf_block,
+ .bind_tcf = dualpi2_bind,
+ .unbind_tcf = dualpi2_unbind,
+ .walk = dualpi2_walk,
+};
+
+static struct Qdisc_ops dualpi2_qdisc_ops __read_mostly = {
+ .id = "dualpi2",
+ .cl_ops = &dualpi2_class_ops,
+ .priv_size = sizeof(struct dualpi2_sched_data),
+ .enqueue = dualpi2_qdisc_enqueue,
+ .dequeue = dualpi2_qdisc_dequeue,
+ .peek = qdisc_peek_dequeued,
+ .init = dualpi2_init,
+ .destroy = dualpi2_destroy,
+ .reset = dualpi2_reset,
+ .change = dualpi2_change,
+ .dump = dualpi2_dump,
+ .dump_stats = dualpi2_dump_stats,
+ .owner = THIS_MODULE,
+};
+
+static int __init dualpi2_module_init(void)
+{
+ return register_qdisc(&dualpi2_qdisc_ops);
+}
+
+static void __exit dualpi2_module_exit(void)
+{
+ unregister_qdisc(&dualpi2_qdisc_ops);
+}
+
+module_init(dualpi2_module_init);
+module_exit(dualpi2_module_exit);
+
+MODULE_DESCRIPTION("Dual Queue with Proportional Integral controller Improved with a Square (dualpi2) scheduler");
+MODULE_AUTHOR("Koen De Schepper <koen.de_schepper@nokia-bell-labs.com>");
+MODULE_AUTHOR("Chia-Yu Chang <chia-yu.chang@nokia-bell-labs.com>");
+MODULE_AUTHOR("Olga Albisser <olga@albisser.org>");
+MODULE_AUTHOR("Henrik Steen <henrist@henrist.net>");
+MODULE_AUTHOR("Olivier Tilmans <olivier.tilmans@nokia.com>");
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION("1.0");
diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c
index 2c069f0181c6..037f764822b9 100644
--- a/net/sched/sch_ets.c
+++ b/net/sched/sch_ets.c
@@ -661,7 +661,7 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
for (i = q->nbands; i < oldbands; i++) {
if (i >= q->nstrict && q->classes[i].qdisc->q.qlen)
list_del_init(&q->classes[i].alist);
- qdisc_tree_flush_backlog(q->classes[i].qdisc);
+ qdisc_purge_queue(q->classes[i].qdisc);
}
WRITE_ONCE(q->nstrict, nstrict);
memcpy(q->prio2band, priomap, sizeof(priomap));
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 16afb834fe4a..1e008a228ebd 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -740,6 +740,8 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
err = skb_array_produce(q, skb);
if (unlikely(err)) {
+ tcf_set_drop_reason(skb, SKB_DROP_REASON_QDISC_OVERLIMIT);
+
if (qdisc_is_percpu_stats(qdisc))
return qdisc_drop_cpu(skb, qdisc, to_free);
else
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 5a7745170e84..d8fd35da32a7 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -835,22 +835,6 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time)
}
}
-static unsigned int
-qdisc_peek_len(struct Qdisc *sch)
-{
- struct sk_buff *skb;
- unsigned int len;
-
- skb = sch->ops->peek(sch);
- if (unlikely(skb == NULL)) {
- qdisc_warn_nonwc("qdisc_peek_len", sch);
- return 0;
- }
- len = qdisc_pkt_len(skb);
-
- return len;
-}
-
static void
hfsc_adjust_levels(struct hfsc_class *cl)
{
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 14bf71f57057..c968ea763774 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -821,7 +821,9 @@ static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio)
u32 *pid;
} stk[TC_HTB_MAXDEPTH], *sp = stk;
- BUG_ON(!hprio->row.rb_node);
+ if (unlikely(!hprio->row.rb_node))
+ return NULL;
+
sp->root = hprio->row.rb_node;
sp->pptr = &hprio->ptr;
sp->pid = &hprio->last_ptr_id;
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index fdd79d3ccd8c..eafc316ae319 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -973,6 +973,41 @@ static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
return 0;
}
+static const struct Qdisc_class_ops netem_class_ops;
+
+static int check_netem_in_tree(struct Qdisc *sch, bool duplicates,
+ struct netlink_ext_ack *extack)
+{
+ struct Qdisc *root, *q;
+ unsigned int i;
+
+ root = qdisc_root_sleeping(sch);
+
+ if (sch != root && root->ops->cl_ops == &netem_class_ops) {
+ if (duplicates ||
+ ((struct netem_sched_data *)qdisc_priv(root))->duplicate)
+ goto err;
+ }
+
+ if (!qdisc_dev(root))
+ return 0;
+
+ hash_for_each(qdisc_dev(root)->qdisc_hash, i, q, hash) {
+ if (sch != q && q->ops->cl_ops == &netem_class_ops) {
+ if (duplicates ||
+ ((struct netem_sched_data *)qdisc_priv(q))->duplicate)
+ goto err;
+ }
+ }
+
+ return 0;
+
+err:
+ NL_SET_ERR_MSG(extack,
+ "netem: cannot mix duplicating netems with other netems in tree");
+ return -EINVAL;
+}
+
/* Parse netlink message to set options */
static int netem_change(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
@@ -1031,6 +1066,11 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt,
q->gap = qopt->gap;
q->counter = 0;
q->loss = qopt->loss;
+
+ ret = check_netem_in_tree(sch, qopt->duplicate, extack);
+ if (ret)
+ goto unlock;
+
q->duplicate = qopt->duplicate;
/* for compatibility with earlier versions.
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index cc30f7a32f1a..9e2b9a490db2 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -211,7 +211,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
for (i = q->bands; i < oldbands; i++)
- qdisc_tree_flush_backlog(q->queues[i]);
+ qdisc_purge_queue(q->queues[i]);
for (i = oldbands; i < q->bands; i++) {
q->queues[i] = queues[i];
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index bf1282cb22eb..2255355e51d3 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -412,7 +412,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
bool existing = false;
struct nlattr *tb[TCA_QFQ_MAX + 1];
struct qfq_aggregate *new_agg = NULL;
- u32 weight, lmax, inv_w;
+ u32 weight, lmax, inv_w, old_weight, old_lmax;
int err;
int delta_w;
@@ -443,12 +443,16 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
inv_w = ONE_FP / weight;
weight = ONE_FP / inv_w;
- if (cl != NULL &&
- lmax == cl->agg->lmax &&
- weight == cl->agg->class_weight)
- return 0; /* nothing to change */
+ if (cl != NULL) {
+ sch_tree_lock(sch);
+ old_weight = cl->agg->class_weight;
+ old_lmax = cl->agg->lmax;
+ sch_tree_unlock(sch);
+ if (lmax == old_lmax && weight == old_weight)
+ return 0; /* nothing to change */
+ }
- delta_w = weight - (cl ? cl->agg->class_weight : 0);
+ delta_w = weight - (cl ? old_weight : 0);
if (q->wsum + delta_w > QFQ_MAX_WSUM) {
NL_SET_ERR_MSG_FMT_MOD(extack,
@@ -532,9 +536,6 @@ destroy_class:
static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl)
{
- struct qfq_sched *q = qdisc_priv(sch);
-
- qfq_rm_from_agg(q, cl);
gen_kill_estimator(&cl->rate_est);
qdisc_put(cl->qdisc);
kfree(cl);
@@ -555,6 +556,7 @@ static int qfq_delete_class(struct Qdisc *sch, unsigned long arg,
qdisc_purge_queue(cl->qdisc);
qdisc_class_hash_remove(&q->clhash, &cl->common);
+ qfq_rm_from_agg(q, cl);
sch_tree_unlock(sch);
@@ -625,6 +627,7 @@ static int qfq_dump_class(struct Qdisc *sch, unsigned long arg,
{
struct qfq_class *cl = (struct qfq_class *)arg;
struct nlattr *nest;
+ u32 class_weight, lmax;
tcm->tcm_parent = TC_H_ROOT;
tcm->tcm_handle = cl->common.classid;
@@ -633,8 +636,13 @@ static int qfq_dump_class(struct Qdisc *sch, unsigned long arg,
nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
if (nest == NULL)
goto nla_put_failure;
- if (nla_put_u32(skb, TCA_QFQ_WEIGHT, cl->agg->class_weight) ||
- nla_put_u32(skb, TCA_QFQ_LMAX, cl->agg->lmax))
+
+ sch_tree_lock(sch);
+ class_weight = cl->agg->class_weight;
+ lmax = cl->agg->lmax;
+ sch_tree_unlock(sch);
+ if (nla_put_u32(skb, TCA_QFQ_WEIGHT, class_weight) ||
+ nla_put_u32(skb, TCA_QFQ_LMAX, lmax))
goto nla_put_failure;
return nla_nest_end(skb, nest);
@@ -651,8 +659,10 @@ static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
memset(&xstats, 0, sizeof(xstats));
+ sch_tree_lock(sch);
xstats.weight = cl->agg->class_weight;
xstats.lmax = cl->agg->lmax;
+ sch_tree_unlock(sch);
if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
@@ -989,7 +999,7 @@ static struct sk_buff *agg_dequeue(struct qfq_aggregate *agg,
if (cl->qdisc->q.qlen == 0) /* no more packets, remove from list */
list_del_init(&cl->alist);
- else if (cl->deficit < qdisc_pkt_len(cl->qdisc->ops->peek(cl->qdisc))) {
+ else if (cl->deficit < qdisc_peek_len(cl->qdisc)) {
cl->deficit += agg->lmax;
list_move_tail(&cl->alist, &agg->active);
}
@@ -1491,6 +1501,7 @@ static void qfq_destroy_qdisc(struct Qdisc *sch)
for (i = 0; i < q->clhash.hashsize; i++) {
hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
common.hnode) {
+ qfq_rm_from_agg(q, cl);
qfq_destroy_class(sch, cl);
}
}
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 339d70b4a4c5..479c42d11083 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -285,7 +285,7 @@ static int __red_change(struct Qdisc *sch, struct nlattr **tb,
q->userbits = userbits;
q->limit = ctl->limit;
if (child) {
- qdisc_tree_flush_backlog(q->qdisc);
+ qdisc_purge_queue(q->qdisc);
old_child = q->qdisc;
q->qdisc = child;
}
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index a8081492d671..96eb2f122973 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -310,7 +310,10 @@ drop:
/* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
x = q->tail->next;
slot = &q->slots[x];
- q->tail->next = slot->next;
+ if (slot->next == x)
+ q->tail = NULL; /* no more active slots */
+ else
+ q->tail->next = slot->next;
q->ht[slot->hash] = SFQ_EMPTY_SLOT;
goto drop;
}
@@ -653,6 +656,14 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt,
NL_SET_ERR_MSG_MOD(extack, "invalid quantum");
return -EINVAL;
}
+
+ if (ctl->perturb_period < 0 ||
+ ctl->perturb_period > INT_MAX / HZ) {
+ NL_SET_ERR_MSG_MOD(extack, "invalid perturb period");
+ return -EINVAL;
+ }
+ perturb_period = ctl->perturb_period * HZ;
+
if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max,
ctl_v1->Wlog, ctl_v1->Scell_log, NULL))
return -EINVAL;
@@ -669,14 +680,12 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt,
headdrop = q->headdrop;
maxdepth = q->maxdepth;
maxflows = q->maxflows;
- perturb_period = q->perturb_period;
quantum = q->quantum;
flags = q->flags;
/* update and validate configuration */
if (ctl->quantum)
quantum = ctl->quantum;
- perturb_period = ctl->perturb_period * HZ;
if (ctl->flows)
maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
if (ctl->divisor) {
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 14021b812329..e759e43ad27e 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -998,7 +998,7 @@ static const struct nla_policy entry_policy[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = {
static const struct nla_policy taprio_tc_policy[TCA_TAPRIO_TC_ENTRY_MAX + 1] = {
[TCA_TAPRIO_TC_ENTRY_INDEX] = NLA_POLICY_MAX(NLA_U32,
- TC_QOPT_MAX_QUEUE),
+ TC_QOPT_MAX_QUEUE - 1),
[TCA_TAPRIO_TC_ENTRY_MAX_SDU] = { .type = NLA_U32 },
[TCA_TAPRIO_TC_ENTRY_FP] = NLA_POLICY_RANGE(NLA_U32,
TC_FP_EXPRESS,
@@ -1328,13 +1328,15 @@ static int taprio_dev_notifier(struct notifier_block *nb, unsigned long event,
stab = rtnl_dereference(q->root->stab);
- oper = rtnl_dereference(q->oper_sched);
+ rcu_read_lock();
+ oper = rcu_dereference(q->oper_sched);
if (oper)
taprio_update_queue_max_sdu(q, oper, stab);
- admin = rtnl_dereference(q->admin_sched);
+ admin = rcu_dereference(q->admin_sched);
if (admin)
taprio_update_queue_max_sdu(q, admin, stab);
+ rcu_read_unlock();
break;
}
@@ -1696,19 +1698,15 @@ static int taprio_parse_tc_entry(struct Qdisc *sch,
if (err < 0)
return err;
- if (!tb[TCA_TAPRIO_TC_ENTRY_INDEX]) {
+ if (NL_REQ_ATTR_CHECK(extack, opt, tb, TCA_TAPRIO_TC_ENTRY_INDEX)) {
NL_SET_ERR_MSG_MOD(extack, "TC entry index missing");
return -EINVAL;
}
tc = nla_get_u32(tb[TCA_TAPRIO_TC_ENTRY_INDEX]);
- if (tc >= TC_QOPT_MAX_QUEUE) {
- NL_SET_ERR_MSG_MOD(extack, "TC entry index out of range");
- return -ERANGE;
- }
-
if (*seen_tcs & BIT(tc)) {
- NL_SET_ERR_MSG_MOD(extack, "Duplicate TC entry");
+ NL_SET_ERR_MSG_ATTR(extack, tb[TCA_TAPRIO_TC_ENTRY_INDEX],
+ "Duplicate tc entry");
return -EINVAL;
}
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index dc26b22d53c7..4c977f049670 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -452,7 +452,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt,
sch_tree_lock(sch);
if (child) {
- qdisc_tree_flush_backlog(q->qdisc);
+ qdisc_purge_queue(q->qdisc);
old = q->qdisc;
q->qdisc = child;
}
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 0c0d2757f6f8..2dc2666988fb 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -756,7 +756,7 @@ static int __sctp_hash_endpoint(struct sctp_endpoint *ep)
struct sock *sk2 = ep2->base.sk;
if (!net_eq(sock_net(sk2), net) || sk2 == sk ||
- !uid_eq(sock_i_uid(sk2), sock_i_uid(sk)) ||
+ !uid_eq(sk_uid(sk2), sk_uid(sk)) ||
!sk2->sk_reuseport)
continue;
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index a9ed2ccab1bd..3336dcfb4515 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -261,9 +261,10 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *t)
skb_set_inner_ipproto(skb, IPPROTO_SCTP);
label = ip6_make_flowlabel(sock_net(sk), skb, fl6->flowlabel, true, fl6);
- return udp_tunnel6_xmit_skb(dst, sk, skb, NULL, &fl6->saddr,
- &fl6->daddr, tclass, ip6_dst_hoplimit(dst),
- label, sctp_sk(sk)->udp_port, t->encap_port, false);
+ udp_tunnel6_xmit_skb(dst, sk, skb, NULL, &fl6->saddr, &fl6->daddr,
+ tclass, ip6_dst_hoplimit(dst), label,
+ sctp_sk(sk)->udp_port, t->encap_port, false, 0);
+ return 0;
}
/* Returns the dst cache entry for the given source and destination ip
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index ec00ee75d59a..74bff317e205 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -177,7 +177,7 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, "%8pK %8pK %-3d %-3d %-4d %-5d %5u %5lu ", ep, sk,
sctp_sk(sk)->type, sk->sk_state, hash,
ep->base.bind_addr.port,
- from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)),
+ from_kuid_munged(seq_user_ns(seq), sk_uid(sk)),
sock_i_ino(sk));
sctp_seq_dump_local_addrs(seq, &ep->base);
@@ -267,7 +267,7 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
assoc->assoc_id,
assoc->sndbuf_used,
atomic_read(&assoc->rmem_alloc),
- from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)),
+ from_kuid_munged(seq_user_ns(seq), sk_uid(sk)),
sock_i_ino(sk),
epb->bind_addr.port,
assoc->peer.port);
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index f402f90eb6b6..a5ccada55f2b 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1103,7 +1103,8 @@ static inline int sctp_v4_xmit(struct sk_buff *skb, struct sctp_transport *t)
skb_set_inner_ipproto(skb, IPPROTO_SCTP);
udp_tunnel_xmit_skb(dst_rtable(dst), sk, skb, fl4->saddr,
fl4->daddr, dscp, ip4_dst_hoplimit(dst), df,
- sctp_sk(sk)->udp_port, t->encap_port, false, false);
+ sctp_sk(sk)->udp_port, t->encap_port, false, false,
+ 0);
return 0;
}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 1e5739858c20..4921416434f9 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -8345,8 +8345,8 @@ static int sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
bool reuse = (sk->sk_reuse || sp->reuse);
struct sctp_bind_hashbucket *head; /* hash list */
struct net *net = sock_net(sk);
- kuid_t uid = sock_i_uid(sk);
struct sctp_bind_bucket *pp;
+ kuid_t uid = sk_uid(sk);
unsigned short snum;
int ret;
@@ -8444,7 +8444,7 @@ pp_found:
(reuse && (sk2->sk_reuse || sp2->reuse) &&
sk2->sk_state != SCTP_SS_LISTENING) ||
(sk->sk_reuseport && sk2->sk_reuseport &&
- uid_eq(uid, sock_i_uid(sk2))))
+ uid_eq(uid, sk_uid(sk2))))
continue;
if ((!sk->sk_bound_dev_if || !bound_dev_if2 ||
@@ -9492,8 +9492,8 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
newsk->sk_sndbuf = sk->sk_sndbuf;
newsk->sk_rcvbuf = sk->sk_rcvbuf;
newsk->sk_lingertime = sk->sk_lingertime;
- newsk->sk_rcvtimeo = sk->sk_rcvtimeo;
- newsk->sk_sndtimeo = sk->sk_sndtimeo;
+ newsk->sk_rcvtimeo = READ_ONCE(sk->sk_rcvtimeo);
+ newsk->sk_sndtimeo = READ_ONCE(sk->sk_sndtimeo);
newsk->sk_rxhash = sk->sk_rxhash;
newinet = inet_sk(newsk);
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 6946c1462793..4d258a6e8033 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -240,7 +240,7 @@ void sctp_transport_set_owner(struct sctp_transport *transport,
void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk)
{
/* If we don't have a fresh route, look one up */
- if (!transport->dst || transport->dst->obsolete) {
+ if (!transport->dst || READ_ONCE(transport->dst->obsolete)) {
sctp_transport_dst_release(transport);
transport->af_specific->get_dst(transport, &transport->saddr,
&transport->fl, sk);
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 3760131f1484..9311c38f7abe 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -30,6 +30,10 @@
#include <linux/splice.h>
#include <net/sock.h>
+#include <net/inet_common.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ipv6.h>
+#endif
#include <net/tcp.h>
#include <net/smc.h>
#include <asm/ioctls.h>
@@ -360,6 +364,16 @@ static void smc_destruct(struct sock *sk)
return;
if (!sock_flag(sk, SOCK_DEAD))
return;
+ switch (sk->sk_family) {
+ case AF_INET:
+ inet_sock_destruct(sk);
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case AF_INET6:
+ inet6_sock_destruct(sk);
+ break;
+#endif
+ }
}
static struct lock_class_key smc_key;
@@ -486,8 +500,8 @@ static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
{
/* options we don't get control via setsockopt for */
nsk->sk_type = osk->sk_type;
- nsk->sk_sndtimeo = osk->sk_sndtimeo;
- nsk->sk_rcvtimeo = osk->sk_rcvtimeo;
+ nsk->sk_sndtimeo = READ_ONCE(osk->sk_sndtimeo);
+ nsk->sk_rcvtimeo = READ_ONCE(osk->sk_rcvtimeo);
nsk->sk_mark = READ_ONCE(osk->sk_mark);
nsk->sk_priority = READ_ONCE(osk->sk_priority);
nsk->sk_rcvlowat = osk->sk_rcvlowat;
@@ -1585,7 +1599,7 @@ static void smc_connect_work(struct work_struct *work)
{
struct smc_sock *smc = container_of(work, struct smc_sock,
connect_work);
- long timeo = smc->sk.sk_sndtimeo;
+ long timeo = READ_ONCE(smc->sk.sk_sndtimeo);
int rc = 0;
if (!timeo)
@@ -2735,8 +2749,7 @@ int smc_accept(struct socket *sock, struct socket *new_sock,
if (lsmc->sockopt_defer_accept && !(arg->flags & O_NONBLOCK)) {
/* wait till data arrives on the socket */
- timeo = msecs_to_jiffies(lsmc->sockopt_defer_accept *
- MSEC_PER_SEC);
+ timeo = secs_to_jiffies(lsmc->sockopt_defer_accept);
if (smc_sk(nsk)->use_fallback) {
struct sock *clcsk = smc_sk(nsk)->clcsock->sk;
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 78ae10d06ed2..2c9084963739 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -283,10 +283,10 @@ struct smc_connection {
};
struct smc_sock { /* smc sock container */
- struct sock sk;
-#if IS_ENABLED(CONFIG_IPV6)
- struct ipv6_pinfo *pinet6;
-#endif
+ union {
+ struct sock sk;
+ struct inet_sock icsk_inet;
+ };
struct socket *clcsock; /* internal tcp socket */
void (*clcsk_state_change)(struct sock *sk);
/* original stat_change fct. */
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 521f5df80e10..5a4db151fe95 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -688,7 +688,7 @@ out:
int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
u8 expected_type, unsigned long timeout)
{
- long rcvtimeo = smc->clcsock->sk->sk_rcvtimeo;
+ long rcvtimeo = READ_ONCE(smc->clcsock->sk->sk_rcvtimeo);
struct sock *clc_sk = smc->clcsock->sk;
struct smc_clc_msg_hdr *clcm = buf;
struct msghdr msg = {NULL, 0};
@@ -707,7 +707,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
* sizeof(struct smc_clc_msg_hdr)
*/
krflags = MSG_PEEK | MSG_WAITALL;
- clc_sk->sk_rcvtimeo = timeout;
+ WRITE_ONCE(clc_sk->sk_rcvtimeo, timeout);
iov_iter_kvec(&msg.msg_iter, ITER_DEST, &vec, 1,
sizeof(struct smc_clc_msg_hdr));
len = sock_recvmsg(smc->clcsock, &msg, krflags);
@@ -795,7 +795,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
}
out:
- clc_sk->sk_rcvtimeo = rcvtimeo;
+ WRITE_ONCE(clc_sk->sk_rcvtimeo, rcvtimeo);
return reason_code;
}
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index ac07b963aede..262746e304dd 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -2100,8 +2100,7 @@ int smc_uncompress_bufsize(u8 compressed)
/* try to reuse a sndbuf or rmb description slot for a certain
* buffer size; if not available, return NULL
*/
-static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize,
- struct rw_semaphore *lock,
+static struct smc_buf_desc *smc_buf_get_slot(struct rw_semaphore *lock,
struct list_head *buf_list)
{
struct smc_buf_desc *buf_slot;
@@ -2442,7 +2441,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
bufsize = smc_uncompress_bufsize(bufsize_comp);
/* check for reusable slot in the link group */
- buf_desc = smc_buf_get_slot(bufsize_comp, lock, buf_list);
+ buf_desc = smc_buf_get_slot(lock, buf_list);
if (buf_desc) {
buf_desc->is_dma_need_sync = 0;
SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, true, bufsize);
diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
index 6fdb2d96777a..8ed2f6689b01 100644
--- a/net/smc/smc_diag.c
+++ b/net/smc/smc_diag.c
@@ -64,7 +64,7 @@ static int smc_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
if (nla_put_u8(skb, SMC_DIAG_SHUTDOWN, sk->sk_shutdown))
return 1;
- r->diag_uid = from_kuid_munged(user_ns, sock_i_uid(sk));
+ r->diag_uid = from_kuid_munged(user_ns, sk_uid(sk));
r->diag_inode = sock_i_ino(sk);
return 0;
}
diff --git a/net/smc/smc_loopback.c b/net/smc/smc_loopback.c
index 3c5f64ca4115..0eb00bbefd17 100644
--- a/net/smc/smc_loopback.c
+++ b/net/smc/smc_loopback.c
@@ -251,11 +251,6 @@ static int smc_lo_move_data(struct smcd_dev *smcd, u64 dmb_tok,
return 0;
}
-static int smc_lo_supports_v2(void)
-{
- return SMC_LO_V2_CAPABLE;
-}
-
static void smc_lo_get_local_gid(struct smcd_dev *smcd,
struct smcd_gid *smcd_gid)
{
@@ -288,7 +283,6 @@ static const struct smcd_ops lo_ops = {
.reset_vlan_required = NULL,
.signal_event = NULL,
.move_data = smc_lo_move_data,
- .supports_v2 = smc_lo_supports_v2,
.get_local_gid = smc_lo_get_local_gid,
.get_chid = smc_lo_get_chid,
.get_dev = smc_lo_get_dev,
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index b391c2ef463f..76ad29e31d60 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -370,7 +370,7 @@ static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net,
goto out_put;
new_pe->type = SMC_PNET_ETH;
memcpy(new_pe->pnet_name, pnet_name, SMC_MAX_PNETID_LEN);
- strncpy(new_pe->eth_name, eth_name, IFNAMSIZ);
+ strscpy(new_pe->eth_name, eth_name);
rc = -EEXIST;
new_netdev = true;
mutex_lock(&pnettable->lock);
diff --git a/net/socket.c b/net/socket.c
index 9a0e720f0859..682969deaed3 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -592,10 +592,12 @@ static int sockfs_setattr(struct mnt_idmap *idmap,
if (!err && (iattr->ia_valid & ATTR_UID)) {
struct socket *sock = SOCKET_I(d_inode(dentry));
- if (sock->sk)
- sock->sk->sk_uid = iattr->ia_uid;
- else
+ if (sock->sk) {
+ /* Paired with READ_ONCE() in sk_uid() */
+ WRITE_ONCE(sock->sk->sk_uid, iattr->ia_uid);
+ } else {
err = -ENOENT;
+ }
}
return err;
@@ -843,6 +845,52 @@ static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb,
sizeof(ts_pktinfo), &ts_pktinfo);
}
+bool skb_has_tx_timestamp(struct sk_buff *skb, const struct sock *sk)
+{
+ const struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
+ u32 tsflags = READ_ONCE(sk->sk_tsflags);
+
+ if (serr->ee.ee_errno != ENOMSG ||
+ serr->ee.ee_origin != SO_EE_ORIGIN_TIMESTAMPING)
+ return false;
+
+ /* software time stamp available and wanted */
+ if ((tsflags & SOF_TIMESTAMPING_SOFTWARE) && skb->tstamp)
+ return true;
+ /* hardware time stamps available and wanted */
+ return (tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
+ skb_hwtstamps(skb)->hwtstamp;
+}
+
+int skb_get_tx_timestamp(struct sk_buff *skb, struct sock *sk,
+ struct timespec64 *ts)
+{
+ u32 tsflags = READ_ONCE(sk->sk_tsflags);
+ ktime_t hwtstamp;
+ int if_index = 0;
+
+ if ((tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
+ ktime_to_timespec64_cond(skb->tstamp, ts))
+ return SOF_TIMESTAMPING_TX_SOFTWARE;
+
+ if (!(tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) ||
+ skb_is_swtx_tstamp(skb, false))
+ return -ENOENT;
+
+ if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP_NETDEV)
+ hwtstamp = get_timestamp(sk, skb, &if_index);
+ else
+ hwtstamp = skb_hwtstamps(skb)->hwtstamp;
+
+ if (tsflags & SOF_TIMESTAMPING_BIND_PHC)
+ hwtstamp = ptp_convert_timestamp(&hwtstamp,
+ READ_ONCE(sk->sk_bind_phc));
+ if (!ktime_to_timespec64_cond(hwtstamp, ts))
+ return -ENOENT;
+
+ return SOF_TIMESTAMPING_TX_HARDWARE;
+}
+
/*
* called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
*/
diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
index d946bfb424c7..43b1f558b33d 100644
--- a/net/strparser/strparser.c
+++ b/net/strparser/strparser.c
@@ -333,7 +333,7 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
struct strparser *strp = (struct strparser *)desc->arg.data;
return __strp_recv(desc, orig_skb, orig_offset, orig_len,
- strp->sk->sk_rcvbuf, strp->sk->sk_rcvtimeo);
+ strp->sk->sk_rcvbuf, READ_ONCE(strp->sk->sk_rcvtimeo));
}
static int default_read_sock_done(struct strparser *strp, int err)
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 0fa244f16876..5c095cb8cb20 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -887,25 +887,16 @@ static void gss_pipe_dentry_destroy(struct dentry *dir,
struct rpc_pipe_dir_object *pdo)
{
struct gss_pipe *gss_pipe = pdo->pdo_data;
- struct rpc_pipe *pipe = gss_pipe->pipe;
- if (pipe->dentry != NULL) {
- rpc_unlink(pipe->dentry);
- pipe->dentry = NULL;
- }
+ rpc_unlink(gss_pipe->pipe);
}
static int gss_pipe_dentry_create(struct dentry *dir,
struct rpc_pipe_dir_object *pdo)
{
struct gss_pipe *p = pdo->pdo_data;
- struct dentry *dentry;
- dentry = rpc_mkpipe_dentry(dir, p->name, p->clnt, p->pipe);
- if (IS_ERR(dentry))
- return PTR_ERR(dentry);
- p->pipe->dentry = dentry;
- return 0;
+ return rpc_mkpipe_dentry(dir, p->name, p->clnt, p->pipe);
}
static const struct rpc_pipe_dir_object_ops gss_pipe_dir_object_ops = {
@@ -1724,7 +1715,7 @@ gss_validate(struct rpc_task *task, struct xdr_stream *xdr)
maj_stat = gss_validate_seqno_mic(ctx, task->tk_rqstp->rq_seqnos[0], seq, p, len);
/* RFC 2203 5.3.3.1 - compute the checksum of each sequence number in the cache */
while (unlikely(maj_stat == GSS_S_BAD_SIG && i < task->tk_rqstp->rq_seqno_count))
- maj_stat = gss_validate_seqno_mic(ctx, task->tk_rqstp->rq_seqnos[i], seq, p, len);
+ maj_stat = gss_validate_seqno_mic(ctx, task->tk_rqstp->rq_seqnos[i++], seq, p, len);
if (maj_stat == GSS_S_CONTEXT_EXPIRED)
clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
if (maj_stat)
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 73a90ad873fb..e82212f6b562 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1628,7 +1628,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp)
int ret;
struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id);
- rqstp->rq_auth_stat = rpc_autherr_badcred;
+ rqstp->rq_auth_stat = rpc_autherr_failed;
if (!svcdata)
svcdata = kmalloc(sizeof(*svcdata), GFP_KERNEL);
if (!svcdata)
@@ -1638,6 +1638,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp)
svcdata->rsci = NULL;
gc = &svcdata->clcred;
+ rqstp->rq_auth_stat = rpc_autherr_badcred;
if (!svcauth_gss_decode_credbody(&rqstp->rq_arg_stream, gc, &rpcstart))
goto auth_err;
if (gc->gc_v != RPC_GSS_VERSION)
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 21426c3049d3..8ca354ecfd02 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -112,47 +112,46 @@ static void rpc_clnt_remove_pipedir(struct rpc_clnt *clnt)
}
}
-static struct dentry *rpc_setup_pipedir_sb(struct super_block *sb,
+static int rpc_setup_pipedir_sb(struct super_block *sb,
struct rpc_clnt *clnt)
{
static uint32_t clntid;
const char *dir_name = clnt->cl_program->pipe_dir_name;
char name[15];
- struct dentry *dir, *dentry;
+ struct dentry *dir;
+ int err;
dir = rpc_d_lookup_sb(sb, dir_name);
if (dir == NULL) {
pr_info("RPC: pipefs directory doesn't exist: %s\n", dir_name);
- return dir;
+ return -ENOENT;
}
for (;;) {
snprintf(name, sizeof(name), "clnt%x", (unsigned int)clntid++);
name[sizeof(name) - 1] = '\0';
- dentry = rpc_create_client_dir(dir, name, clnt);
- if (!IS_ERR(dentry))
+ err = rpc_create_client_dir(dir, name, clnt);
+ if (!err)
break;
- if (dentry == ERR_PTR(-EEXIST))
+ if (err == -EEXIST)
continue;
printk(KERN_INFO "RPC: Couldn't create pipefs entry"
- " %s/%s, error %ld\n",
- dir_name, name, PTR_ERR(dentry));
+ " %s/%s, error %d\n",
+ dir_name, name, err);
break;
}
dput(dir);
- return dentry;
+ return err;
}
static int
rpc_setup_pipedir(struct super_block *pipefs_sb, struct rpc_clnt *clnt)
{
- struct dentry *dentry;
-
clnt->pipefs_sb = pipefs_sb;
if (clnt->cl_program->pipe_dir_name != NULL) {
- dentry = rpc_setup_pipedir_sb(pipefs_sb, clnt);
- if (IS_ERR(dentry))
- return PTR_ERR(dentry);
+ int err = rpc_setup_pipedir_sb(pipefs_sb, clnt);
+ if (err && err != -ENOENT)
+ return err;
}
return 0;
}
@@ -180,16 +179,9 @@ static int rpc_clnt_skip_event(struct rpc_clnt *clnt, unsigned long event)
static int __rpc_clnt_handle_event(struct rpc_clnt *clnt, unsigned long event,
struct super_block *sb)
{
- struct dentry *dentry;
-
switch (event) {
case RPC_PIPEFS_MOUNT:
- dentry = rpc_setup_pipedir_sb(sb, clnt);
- if (!dentry)
- return -ENOENT;
- if (IS_ERR(dentry))
- return PTR_ERR(dentry);
- break;
+ return rpc_setup_pipedir_sb(sb, clnt);
case RPC_PIPEFS_UMOUNT:
__rpc_clnt_remove_pipedir(clnt);
break;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 98f78cd55905..0bd1df2ebb47 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -168,8 +168,9 @@ rpc_inode_setowner(struct inode *inode, void *private)
}
static void
-rpc_close_pipes(struct inode *inode)
+rpc_close_pipes(struct dentry *dentry)
{
+ struct inode *inode = dentry->d_inode;
struct rpc_pipe *pipe = RPC_I(inode)->pipe;
int need_release;
LIST_HEAD(free_list);
@@ -484,60 +485,6 @@ rpc_get_inode(struct super_block *sb, umode_t mode)
return inode;
}
-static int __rpc_create_common(struct inode *dir, struct dentry *dentry,
- umode_t mode,
- const struct file_operations *i_fop,
- void *private)
-{
- struct inode *inode;
-
- d_drop(dentry);
- inode = rpc_get_inode(dir->i_sb, mode);
- if (!inode)
- goto out_err;
- inode->i_ino = iunique(dir->i_sb, 100);
- if (i_fop)
- inode->i_fop = i_fop;
- if (private)
- rpc_inode_setowner(inode, private);
- d_add(dentry, inode);
- return 0;
-out_err:
- printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %pd\n",
- __FILE__, __func__, dentry);
- dput(dentry);
- return -ENOMEM;
-}
-
-static int __rpc_create(struct inode *dir, struct dentry *dentry,
- umode_t mode,
- const struct file_operations *i_fop,
- void *private)
-{
- int err;
-
- err = __rpc_create_common(dir, dentry, S_IFREG | mode, i_fop, private);
- if (err)
- return err;
- fsnotify_create(dir, dentry);
- return 0;
-}
-
-static int __rpc_mkdir(struct inode *dir, struct dentry *dentry,
- umode_t mode,
- const struct file_operations *i_fop,
- void *private)
-{
- int err;
-
- err = __rpc_create_common(dir, dentry, S_IFDIR | mode, i_fop, private);
- if (err)
- return err;
- inc_nlink(dir);
- fsnotify_mkdir(dir, dentry);
- return 0;
-}
-
static void
init_pipe(struct rpc_pipe *pipe)
{
@@ -574,119 +521,60 @@ struct rpc_pipe *rpc_mkpipe_data(const struct rpc_pipe_ops *ops, int flags)
}
EXPORT_SYMBOL_GPL(rpc_mkpipe_data);
-static int __rpc_mkpipe_dentry(struct inode *dir, struct dentry *dentry,
- umode_t mode,
- const struct file_operations *i_fop,
- void *private,
- struct rpc_pipe *pipe)
+static int rpc_new_file(struct dentry *parent,
+ const char *name,
+ umode_t mode,
+ const struct file_operations *i_fop,
+ void *private)
{
- struct rpc_inode *rpci;
- int err;
+ struct dentry *dentry = simple_start_creating(parent, name);
+ struct inode *dir = parent->d_inode;
+ struct inode *inode;
- err = __rpc_create_common(dir, dentry, S_IFIFO | mode, i_fop, private);
- if (err)
- return err;
- rpci = RPC_I(d_inode(dentry));
- rpci->private = private;
- rpci->pipe = pipe;
+ if (IS_ERR(dentry))
+ return PTR_ERR(dentry);
+
+ inode = rpc_get_inode(dir->i_sb, S_IFREG | mode);
+ if (unlikely(!inode)) {
+ dput(dentry);
+ inode_unlock(dir);
+ return -ENOMEM;
+ }
+ inode->i_ino = iunique(dir->i_sb, 100);
+ if (i_fop)
+ inode->i_fop = i_fop;
+ rpc_inode_setowner(inode, private);
+ d_instantiate(dentry, inode);
fsnotify_create(dir, dentry);
+ inode_unlock(dir);
return 0;
}
-static int __rpc_rmdir(struct inode *dir, struct dentry *dentry)
-{
- int ret;
-
- dget(dentry);
- ret = simple_rmdir(dir, dentry);
- d_drop(dentry);
- if (!ret)
- fsnotify_rmdir(dir, dentry);
- dput(dentry);
- return ret;
-}
-
-static int __rpc_unlink(struct inode *dir, struct dentry *dentry)
-{
- int ret;
-
- dget(dentry);
- ret = simple_unlink(dir, dentry);
- d_drop(dentry);
- if (!ret)
- fsnotify_unlink(dir, dentry);
- dput(dentry);
- return ret;
-}
-
-static int __rpc_rmpipe(struct inode *dir, struct dentry *dentry)
+static struct dentry *rpc_new_dir(struct dentry *parent,
+ const char *name,
+ umode_t mode)
{
- struct inode *inode = d_inode(dentry);
-
- rpc_close_pipes(inode);
- return __rpc_unlink(dir, dentry);
-}
+ struct dentry *dentry = simple_start_creating(parent, name);
+ struct inode *dir = parent->d_inode;
+ struct inode *inode;
-static struct dentry *__rpc_lookup_create_exclusive(struct dentry *parent,
- const char *name)
-{
- struct qstr q = QSTR(name);
- struct dentry *dentry = try_lookup_noperm(&q, parent);
- if (!dentry) {
- dentry = d_alloc(parent, &q);
- if (!dentry)
- return ERR_PTR(-ENOMEM);
- }
- if (d_really_is_negative(dentry))
+ if (IS_ERR(dentry))
return dentry;
- dput(dentry);
- return ERR_PTR(-EEXIST);
-}
-/*
- * FIXME: This probably has races.
- */
-static void __rpc_depopulate(struct dentry *parent,
- const struct rpc_filelist *files,
- int start, int eof)
-{
- struct inode *dir = d_inode(parent);
- struct dentry *dentry;
- struct qstr name;
- int i;
-
- for (i = start; i < eof; i++) {
- name.name = files[i].name;
- name.len = strlen(files[i].name);
- dentry = try_lookup_noperm(&name, parent);
-
- if (dentry == NULL)
- continue;
- if (d_really_is_negative(dentry))
- goto next;
- switch (d_inode(dentry)->i_mode & S_IFMT) {
- default:
- BUG();
- case S_IFREG:
- __rpc_unlink(dir, dentry);
- break;
- case S_IFDIR:
- __rpc_rmdir(dir, dentry);
- }
-next:
+ inode = rpc_get_inode(dir->i_sb, S_IFDIR | mode);
+ if (unlikely(!inode)) {
dput(dentry);
+ inode_unlock(dir);
+ return ERR_PTR(-ENOMEM);
}
-}
-static void rpc_depopulate(struct dentry *parent,
- const struct rpc_filelist *files,
- int start, int eof)
-{
- struct inode *dir = d_inode(parent);
-
- inode_lock_nested(dir, I_MUTEX_CHILD);
- __rpc_depopulate(parent, files, start, eof);
+ inode->i_ino = iunique(dir->i_sb, 100);
+ inc_nlink(dir);
+ d_instantiate(dentry, inode);
+ fsnotify_mkdir(dir, dentry);
inode_unlock(dir);
+
+ return dentry;
}
static int rpc_populate(struct dentry *parent,
@@ -694,92 +582,39 @@ static int rpc_populate(struct dentry *parent,
int start, int eof,
void *private)
{
- struct inode *dir = d_inode(parent);
struct dentry *dentry;
int i, err;
- inode_lock(dir);
for (i = start; i < eof; i++) {
- dentry = __rpc_lookup_create_exclusive(parent, files[i].name);
- err = PTR_ERR(dentry);
- if (IS_ERR(dentry))
- goto out_bad;
switch (files[i].mode & S_IFMT) {
default:
BUG();
case S_IFREG:
- err = __rpc_create(dir, dentry,
+ err = rpc_new_file(parent,
+ files[i].name,
files[i].mode,
files[i].i_fop,
private);
+ if (err)
+ goto out_bad;
break;
case S_IFDIR:
- err = __rpc_mkdir(dir, dentry,
- files[i].mode,
- NULL,
- private);
+ dentry = rpc_new_dir(parent,
+ files[i].name,
+ files[i].mode);
+ if (IS_ERR(dentry)) {
+ err = PTR_ERR(dentry);
+ goto out_bad;
+ }
}
- if (err != 0)
- goto out_bad;
}
- inode_unlock(dir);
return 0;
out_bad:
- __rpc_depopulate(parent, files, start, eof);
- inode_unlock(dir);
printk(KERN_WARNING "%s: %s failed to populate directory %pd\n",
__FILE__, __func__, parent);
return err;
}
-static struct dentry *rpc_mkdir_populate(struct dentry *parent,
- const char *name, umode_t mode, void *private,
- int (*populate)(struct dentry *, void *), void *args_populate)
-{
- struct dentry *dentry;
- struct inode *dir = d_inode(parent);
- int error;
-
- inode_lock_nested(dir, I_MUTEX_PARENT);
- dentry = __rpc_lookup_create_exclusive(parent, name);
- if (IS_ERR(dentry))
- goto out;
- error = __rpc_mkdir(dir, dentry, mode, NULL, private);
- if (error != 0)
- goto out_err;
- if (populate != NULL) {
- error = populate(dentry, args_populate);
- if (error)
- goto err_rmdir;
- }
-out:
- inode_unlock(dir);
- return dentry;
-err_rmdir:
- __rpc_rmdir(dir, dentry);
-out_err:
- dentry = ERR_PTR(error);
- goto out;
-}
-
-static int rpc_rmdir_depopulate(struct dentry *dentry,
- void (*depopulate)(struct dentry *))
-{
- struct dentry *parent;
- struct inode *dir;
- int error;
-
- parent = dget_parent(dentry);
- dir = d_inode(parent);
- inode_lock_nested(dir, I_MUTEX_PARENT);
- if (depopulate != NULL)
- depopulate(dentry);
- error = __rpc_rmdir(dir, dentry);
- inode_unlock(dir);
- dput(parent);
- return error;
-}
-
/**
* rpc_mkpipe_dentry - make an rpc_pipefs file for kernel<->userspace
* communication
@@ -799,11 +634,13 @@ static int rpc_rmdir_depopulate(struct dentry *dentry,
* The @private argument passed here will be available to all these methods
* from the file pointer, via RPC_I(file_inode(file))->private.
*/
-struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name,
+int rpc_mkpipe_dentry(struct dentry *parent, const char *name,
void *private, struct rpc_pipe *pipe)
{
- struct dentry *dentry;
struct inode *dir = d_inode(parent);
+ struct dentry *dentry;
+ struct inode *inode;
+ struct rpc_inode *rpci;
umode_t umode = S_IFIFO | 0600;
int err;
@@ -812,48 +649,53 @@ struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name,
if (pipe->ops->downcall == NULL)
umode &= ~0222;
- inode_lock_nested(dir, I_MUTEX_PARENT);
- dentry = __rpc_lookup_create_exclusive(parent, name);
- if (IS_ERR(dentry))
- goto out;
- err = __rpc_mkpipe_dentry(dir, dentry, umode, &rpc_pipe_fops,
- private, pipe);
- if (err)
- goto out_err;
-out:
+ dentry = simple_start_creating(parent, name);
+ if (IS_ERR(dentry)) {
+ err = PTR_ERR(dentry);
+ goto failed;
+ }
+
+ inode = rpc_get_inode(dir->i_sb, umode);
+ if (unlikely(!inode)) {
+ dput(dentry);
+ inode_unlock(dir);
+ err = -ENOMEM;
+ goto failed;
+ }
+ inode->i_ino = iunique(dir->i_sb, 100);
+ inode->i_fop = &rpc_pipe_fops;
+ rpci = RPC_I(inode);
+ rpci->private = private;
+ rpci->pipe = pipe;
+ rpc_inode_setowner(inode, private);
+ d_instantiate(dentry, inode);
+ pipe->dentry = dentry;
+ fsnotify_create(dir, dentry);
inode_unlock(dir);
- return dentry;
-out_err:
- dentry = ERR_PTR(err);
- printk(KERN_WARNING "%s: %s() failed to create pipe %pd/%s (errno = %d)\n",
- __FILE__, __func__, parent, name,
- err);
- goto out;
+ return 0;
+
+failed:
+ pr_warn("%s() failed to create pipe %pd/%s (errno = %d)\n",
+ __func__, parent, name, err);
+ return err;
}
EXPORT_SYMBOL_GPL(rpc_mkpipe_dentry);
/**
* rpc_unlink - remove a pipe
- * @dentry: dentry for the pipe, as returned from rpc_mkpipe
+ * @pipe: the pipe to be removed
*
* After this call, lookups will no longer find the pipe, and any
* attempts to read or write using preexisting opens of the pipe will
* return -EPIPE.
*/
-int
-rpc_unlink(struct dentry *dentry)
+void
+rpc_unlink(struct rpc_pipe *pipe)
{
- struct dentry *parent;
- struct inode *dir;
- int error = 0;
-
- parent = dget_parent(dentry);
- dir = d_inode(parent);
- inode_lock_nested(dir, I_MUTEX_PARENT);
- error = __rpc_rmpipe(dir, dentry);
- inode_unlock(dir);
- dput(parent);
- return error;
+ if (pipe->dentry) {
+ simple_recursive_removal(pipe->dentry, rpc_close_pipes);
+ pipe->dentry = NULL;
+ }
}
EXPORT_SYMBOL_GPL(rpc_unlink);
@@ -1010,31 +852,6 @@ rpc_destroy_pipe_dir_objects(struct rpc_pipe_dir_head *pdh)
pdo->pdo_ops->destroy(dir, pdo);
}
-enum {
- RPCAUTH_info,
- RPCAUTH_EOF
-};
-
-static const struct rpc_filelist authfiles[] = {
- [RPCAUTH_info] = {
- .name = "info",
- .i_fop = &rpc_info_operations,
- .mode = S_IFREG | 0400,
- },
-};
-
-static int rpc_clntdir_populate(struct dentry *dentry, void *private)
-{
- return rpc_populate(dentry,
- authfiles, RPCAUTH_info, RPCAUTH_EOF,
- private);
-}
-
-static void rpc_clntdir_depopulate(struct dentry *dentry)
-{
- rpc_depopulate(dentry, authfiles, RPCAUTH_info, RPCAUTH_EOF);
-}
-
/**
* rpc_create_client_dir - Create a new rpc_client directory in rpc_pipefs
* @dentry: the parent of new directory
@@ -1046,19 +863,27 @@ static void rpc_clntdir_depopulate(struct dentry *dentry)
* information about the client, together with any "pipes" that may
* later be created using rpc_mkpipe().
*/
-struct dentry *rpc_create_client_dir(struct dentry *dentry,
- const char *name,
- struct rpc_clnt *rpc_client)
+int rpc_create_client_dir(struct dentry *dentry,
+ const char *name,
+ struct rpc_clnt *rpc_client)
{
struct dentry *ret;
+ int err;
- ret = rpc_mkdir_populate(dentry, name, 0555, NULL,
- rpc_clntdir_populate, rpc_client);
- if (!IS_ERR(ret)) {
- rpc_client->cl_pipedir_objects.pdh_dentry = ret;
- rpc_create_pipe_dir_objects(&rpc_client->cl_pipedir_objects);
+ ret = rpc_new_dir(dentry, name, 0555);
+ if (IS_ERR(ret))
+ return PTR_ERR(ret);
+ err = rpc_new_file(ret, "info", S_IFREG | 0400,
+ &rpc_info_operations, rpc_client);
+ if (err) {
+ pr_warn("%s failed to populate directory %pd\n",
+ __func__, ret);
+ simple_recursive_removal(ret, NULL);
+ return err;
}
- return ret;
+ rpc_client->cl_pipedir_objects.pdh_dentry = ret;
+ rpc_create_pipe_dir_objects(&rpc_client->cl_pipedir_objects);
+ return 0;
}
/**
@@ -1073,7 +898,8 @@ int rpc_remove_client_dir(struct rpc_clnt *rpc_client)
return 0;
rpc_destroy_pipe_dir_objects(&rpc_client->cl_pipedir_objects);
rpc_client->cl_pipedir_objects.pdh_dentry = NULL;
- return rpc_rmdir_depopulate(dentry, rpc_clntdir_depopulate);
+ simple_recursive_removal(dentry, NULL);
+ return 0;
}
static const struct rpc_filelist cache_pipefs_files[3] = {
@@ -1094,28 +920,25 @@ static const struct rpc_filelist cache_pipefs_files[3] = {
},
};
-static int rpc_cachedir_populate(struct dentry *dentry, void *private)
-{
- return rpc_populate(dentry,
- cache_pipefs_files, 0, 3,
- private);
-}
-
-static void rpc_cachedir_depopulate(struct dentry *dentry)
-{
- rpc_depopulate(dentry, cache_pipefs_files, 0, 3);
-}
-
struct dentry *rpc_create_cache_dir(struct dentry *parent, const char *name,
umode_t umode, struct cache_detail *cd)
{
- return rpc_mkdir_populate(parent, name, umode, NULL,
- rpc_cachedir_populate, cd);
+ struct dentry *dentry;
+
+ dentry = rpc_new_dir(parent, name, umode);
+ if (!IS_ERR(dentry)) {
+ int error = rpc_populate(dentry, cache_pipefs_files, 0, 3, cd);
+ if (error) {
+ simple_recursive_removal(dentry, NULL);
+ return ERR_PTR(error);
+ }
+ }
+ return dentry;
}
void rpc_remove_cache_dir(struct dentry *dentry)
{
- rpc_rmdir_depopulate(dentry, rpc_cachedir_depopulate);
+ simple_recursive_removal(dentry, NULL);
}
/*
@@ -1141,7 +964,6 @@ enum {
RPCAUTH_nfsd4_cb,
RPCAUTH_cache,
RPCAUTH_nfsd,
- RPCAUTH_gssd,
RPCAUTH_RootEOF
};
@@ -1178,10 +1000,6 @@ static const struct rpc_filelist files[] = {
.name = "nfsd",
.mode = S_IFDIR | 0555,
},
- [RPCAUTH_gssd] = {
- .name = "gssd",
- .mode = S_IFDIR | 0555,
- },
};
/*
@@ -1241,13 +1059,6 @@ void rpc_put_sb_net(const struct net *net)
}
EXPORT_SYMBOL_GPL(rpc_put_sb_net);
-static const struct rpc_filelist gssd_dummy_clnt_dir[] = {
- [0] = {
- .name = "clntXX",
- .mode = S_IFDIR | 0555,
- },
-};
-
static ssize_t
dummy_downcall(struct file *filp, const char __user *src, size_t len)
{
@@ -1276,14 +1087,6 @@ rpc_dummy_info_show(struct seq_file *m, void *v)
}
DEFINE_SHOW_ATTRIBUTE(rpc_dummy_info);
-static const struct rpc_filelist gssd_dummy_info_file[] = {
- [0] = {
- .name = "info",
- .i_fop = &rpc_dummy_info_fops,
- .mode = S_IFREG | 0400,
- },
-};
-
/**
* rpc_gssd_dummy_populate - create a dummy gssd pipe
* @root: root of the rpc_pipefs filesystem
@@ -1292,69 +1095,32 @@ static const struct rpc_filelist gssd_dummy_info_file[] = {
* Create a dummy set of directories and a pipe that gssd can hold open to
* indicate that it is up and running.
*/
-static struct dentry *
+static int
rpc_gssd_dummy_populate(struct dentry *root, struct rpc_pipe *pipe_data)
{
- int ret = 0;
- struct dentry *gssd_dentry;
- struct dentry *clnt_dentry = NULL;
- struct dentry *pipe_dentry = NULL;
-
- /* We should never get this far if "gssd" doesn't exist */
- gssd_dentry = try_lookup_noperm(&QSTR(files[RPCAUTH_gssd].name), root);
- if (!gssd_dentry)
- return ERR_PTR(-ENOENT);
-
- ret = rpc_populate(gssd_dentry, gssd_dummy_clnt_dir, 0, 1, NULL);
- if (ret) {
- pipe_dentry = ERR_PTR(ret);
- goto out;
- }
+ struct dentry *gssd_dentry, *clnt_dentry;
+ int err;
- clnt_dentry = try_lookup_noperm(&QSTR(gssd_dummy_clnt_dir[0].name),
- gssd_dentry);
- if (!clnt_dentry) {
- __rpc_depopulate(gssd_dentry, gssd_dummy_clnt_dir, 0, 1);
- pipe_dentry = ERR_PTR(-ENOENT);
- goto out;
- }
+ gssd_dentry = rpc_new_dir(root, "gssd", 0555);
+ if (IS_ERR(gssd_dentry))
+ return -ENOENT;
- ret = rpc_populate(clnt_dentry, gssd_dummy_info_file, 0, 1, NULL);
- if (ret) {
- __rpc_depopulate(gssd_dentry, gssd_dummy_clnt_dir, 0, 1);
- pipe_dentry = ERR_PTR(ret);
- goto out;
- }
+ clnt_dentry = rpc_new_dir(gssd_dentry, "clntXX", 0555);
+ if (IS_ERR(clnt_dentry))
+ return -ENOENT;
- pipe_dentry = rpc_mkpipe_dentry(clnt_dentry, "gssd", NULL, pipe_data);
- if (IS_ERR(pipe_dentry)) {
- __rpc_depopulate(clnt_dentry, gssd_dummy_info_file, 0, 1);
- __rpc_depopulate(gssd_dentry, gssd_dummy_clnt_dir, 0, 1);
- }
-out:
- dput(clnt_dentry);
- dput(gssd_dentry);
- return pipe_dentry;
-}
-
-static void
-rpc_gssd_dummy_depopulate(struct dentry *pipe_dentry)
-{
- struct dentry *clnt_dir = pipe_dentry->d_parent;
- struct dentry *gssd_dir = clnt_dir->d_parent;
-
- dget(pipe_dentry);
- __rpc_rmpipe(d_inode(clnt_dir), pipe_dentry);
- __rpc_depopulate(clnt_dir, gssd_dummy_info_file, 0, 1);
- __rpc_depopulate(gssd_dir, gssd_dummy_clnt_dir, 0, 1);
- dput(pipe_dentry);
+ err = rpc_new_file(clnt_dentry, "info", 0400,
+ &rpc_dummy_info_fops, NULL);
+ if (!err)
+ err = rpc_mkpipe_dentry(clnt_dentry, "gssd", NULL, pipe_data);
+ return err;
}
static int
rpc_fill_super(struct super_block *sb, struct fs_context *fc)
{
struct inode *inode;
- struct dentry *root, *gssd_dentry;
+ struct dentry *root;
struct net *net = sb->s_fs_info;
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
int err;
@@ -1363,7 +1129,7 @@ rpc_fill_super(struct super_block *sb, struct fs_context *fc)
sb->s_blocksize_bits = PAGE_SHIFT;
sb->s_magic = RPCAUTH_GSSMAGIC;
sb->s_op = &s_ops;
- sb->s_d_op = &simple_dentry_operations;
+ sb->s_d_flags = DCACHE_DONTCACHE;
sb->s_time_gran = 1;
inode = rpc_get_inode(sb, S_IFDIR | 0555);
@@ -1373,11 +1139,9 @@ rpc_fill_super(struct super_block *sb, struct fs_context *fc)
if (rpc_populate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF, NULL))
return -ENOMEM;
- gssd_dentry = rpc_gssd_dummy_populate(root, sn->gssd_dummy);
- if (IS_ERR(gssd_dentry)) {
- __rpc_depopulate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF);
- return PTR_ERR(gssd_dentry);
- }
+ err = rpc_gssd_dummy_populate(root, sn->gssd_dummy);
+ if (err)
+ return err;
dprintk("RPC: sending pipefs MOUNT notification for net %x%s\n",
net->ns.inum, NET_NAME(net));
@@ -1386,18 +1150,6 @@ rpc_fill_super(struct super_block *sb, struct fs_context *fc)
err = blocking_notifier_call_chain(&rpc_pipefs_notifier_list,
RPC_PIPEFS_MOUNT,
sb);
- if (err)
- goto err_depopulate;
- mutex_unlock(&sn->pipefs_sb_lock);
- return 0;
-
-err_depopulate:
- rpc_gssd_dummy_depopulate(gssd_dentry);
- blocking_notifier_call_chain(&rpc_pipefs_notifier_list,
- RPC_PIPEFS_UMOUNT,
- sb);
- sn->pipefs_sb = NULL;
- __rpc_depopulate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF);
mutex_unlock(&sn->pipefs_sb_lock);
return err;
}
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
index 1b2b84feeec6..4e92e2a50168 100644
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c
@@ -27,97 +27,60 @@
struct xdr_skb_reader {
struct sk_buff *skb;
unsigned int offset;
+ bool need_checksum;
size_t count;
__wsum csum;
};
-typedef size_t (*xdr_skb_read_actor)(struct xdr_skb_reader *desc, void *to,
- size_t len);
-
/**
* xdr_skb_read_bits - copy some data bits from skb to internal buffer
* @desc: sk_buff copy helper
* @to: copy destination
* @len: number of bytes to copy
*
- * Possibly called several times to iterate over an sk_buff and copy
- * data out of it.
+ * Possibly called several times to iterate over an sk_buff and copy data out of
+ * it.
*/
static size_t
xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len)
{
- if (len > desc->count)
- len = desc->count;
- if (unlikely(skb_copy_bits(desc->skb, desc->offset, to, len)))
- return 0;
- desc->count -= len;
- desc->offset += len;
- return len;
-}
+ len = min(len, desc->count);
+
+ if (desc->need_checksum) {
+ __wsum csum;
+
+ csum = skb_copy_and_csum_bits(desc->skb, desc->offset, to, len);
+ desc->csum = csum_block_add(desc->csum, csum, desc->offset);
+ } else {
+ if (unlikely(skb_copy_bits(desc->skb, desc->offset, to, len)))
+ return 0;
+ }
-/**
- * xdr_skb_read_and_csum_bits - copy and checksum from skb to buffer
- * @desc: sk_buff copy helper
- * @to: copy destination
- * @len: number of bytes to copy
- *
- * Same as skb_read_bits, but calculate a checksum at the same time.
- */
-static size_t xdr_skb_read_and_csum_bits(struct xdr_skb_reader *desc, void *to, size_t len)
-{
- unsigned int pos;
- __wsum csum2;
-
- if (len > desc->count)
- len = desc->count;
- pos = desc->offset;
- csum2 = skb_copy_and_csum_bits(desc->skb, pos, to, len);
- desc->csum = csum_block_add(desc->csum, csum2, pos);
desc->count -= len;
desc->offset += len;
return len;
}
-/**
- * xdr_partial_copy_from_skb - copy data out of an skb
- * @xdr: target XDR buffer
- * @base: starting offset
- * @desc: sk_buff copy helper
- * @copy_actor: virtual method for copying data
- *
- */
static ssize_t
-xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct xdr_skb_reader *desc, xdr_skb_read_actor copy_actor)
+xdr_partial_copy_from_skb(struct xdr_buf *xdr, struct xdr_skb_reader *desc)
{
- struct page **ppage = xdr->pages;
- unsigned int len, pglen = xdr->page_len;
- ssize_t copied = 0;
- size_t ret;
-
- len = xdr->head[0].iov_len;
- if (base < len) {
- len -= base;
- ret = copy_actor(desc, (char *)xdr->head[0].iov_base + base, len);
- copied += ret;
- if (ret != len || !desc->count)
- goto out;
- base = 0;
- } else
- base -= len;
-
- if (unlikely(pglen == 0))
- goto copy_tail;
- if (unlikely(base >= pglen)) {
- base -= pglen;
- goto copy_tail;
- }
- if (base || xdr->page_base) {
- pglen -= base;
- base += xdr->page_base;
- ppage += base >> PAGE_SHIFT;
- base &= ~PAGE_MASK;
- }
- do {
+ struct page **ppage = xdr->pages + (xdr->page_base >> PAGE_SHIFT);
+ unsigned int poff = xdr->page_base & ~PAGE_MASK;
+ unsigned int pglen = xdr->page_len;
+ ssize_t copied = 0;
+ size_t ret;
+
+ if (xdr->head[0].iov_len == 0)
+ return 0;
+
+ ret = xdr_skb_read_bits(desc, xdr->head[0].iov_base,
+ xdr->head[0].iov_len);
+ if (ret != xdr->head[0].iov_len || !desc->count)
+ return ret;
+ copied += ret;
+
+ while (pglen) {
+ unsigned int len = min(PAGE_SIZE - poff, pglen);
char *kaddr;
/* ACL likes to be lazy in allocating pages - ACLs
@@ -126,36 +89,29 @@ xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct xdr_skb
*ppage = alloc_page(GFP_NOWAIT | __GFP_NOWARN);
if (unlikely(*ppage == NULL)) {
if (copied == 0)
- copied = -ENOMEM;
- goto out;
+ return -ENOMEM;
+ return copied;
}
}
- len = PAGE_SIZE;
kaddr = kmap_atomic(*ppage);
- if (base) {
- len -= base;
- if (pglen < len)
- len = pglen;
- ret = copy_actor(desc, kaddr + base, len);
- base = 0;
- } else {
- if (pglen < len)
- len = pglen;
- ret = copy_actor(desc, kaddr, len);
- }
+ ret = xdr_skb_read_bits(desc, kaddr + poff, len);
flush_dcache_page(*ppage);
kunmap_atomic(kaddr);
+
copied += ret;
if (ret != len || !desc->count)
- goto out;
+ return copied;
ppage++;
- } while ((pglen -= len) != 0);
-copy_tail:
- len = xdr->tail[0].iov_len;
- if (base < len)
- copied += copy_actor(desc, (char *)xdr->tail[0].iov_base + base, len - base);
-out:
+ pglen -= len;
+ poff = 0;
+ }
+
+ if (xdr->tail[0].iov_len) {
+ copied += xdr_skb_read_bits(desc, xdr->tail[0].iov_base,
+ xdr->tail[0].iov_len);
+ }
+
return copied;
}
@@ -169,17 +125,22 @@ out:
*/
int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
{
- struct xdr_skb_reader desc;
-
- desc.skb = skb;
- desc.offset = 0;
- desc.count = skb->len - desc.offset;
+ struct xdr_skb_reader desc = {
+ .skb = skb,
+ .count = skb->len - desc.offset,
+ };
- if (skb_csum_unnecessary(skb))
- goto no_checksum;
+ if (skb_csum_unnecessary(skb)) {
+ if (xdr_partial_copy_from_skb(xdr, &desc) < 0)
+ return -1;
+ if (desc.count)
+ return -1;
+ return 0;
+ }
+ desc.need_checksum = true;
desc.csum = csum_partial(skb->data, desc.offset, skb->csum);
- if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_and_csum_bits) < 0)
+ if (xdr_partial_copy_from_skb(xdr, &desc) < 0)
return -1;
if (desc.offset != skb->len) {
__wsum csum2;
@@ -194,14 +155,7 @@ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
!skb->csum_complete_sw)
netdev_rx_csum_fault(skb->dev, skb);
return 0;
-no_checksum:
- if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_bits) < 0)
- return -1;
- if (desc.count)
- return -1;
- return 0;
}
-EXPORT_SYMBOL_GPL(csum_partial_copy_to_xdr);
static inline int xprt_sendmsg(struct socket *sock, struct msghdr *msg,
size_t seek)
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 939b6239df8a..b1fab3a69544 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -638,8 +638,6 @@ EXPORT_SYMBOL_GPL(svc_destroy);
static bool
svc_init_buffer(struct svc_rqst *rqstp, const struct svc_serv *serv, int node)
{
- unsigned long ret;
-
rqstp->rq_maxpages = svc_serv_maxpages(serv);
/* rq_pages' last entry is NULL for historical reasons. */
@@ -649,9 +647,7 @@ svc_init_buffer(struct svc_rqst *rqstp, const struct svc_serv *serv, int node)
if (!rqstp->rq_pages)
return false;
- ret = alloc_pages_bulk_node(GFP_KERNEL, node, rqstp->rq_maxpages,
- rqstp->rq_pages);
- return ret == rqstp->rq_maxpages;
+ return true;
}
/*
@@ -755,14 +751,16 @@ void svc_pool_wake_idle_thread(struct svc_pool *pool)
WRITE_ONCE(rqstp->rq_qtime, ktime_get());
if (!task_is_running(rqstp->rq_task)) {
wake_up_process(rqstp->rq_task);
- trace_svc_wake_up(rqstp->rq_task->pid);
+ trace_svc_pool_thread_wake(pool, rqstp->rq_task->pid);
percpu_counter_inc(&pool->sp_threads_woken);
+ } else {
+ trace_svc_pool_thread_running(pool, rqstp->rq_task->pid);
}
rcu_read_unlock();
return;
}
rcu_read_unlock();
-
+ trace_svc_pool_thread_noidle(pool, 0);
}
EXPORT_SYMBOL_GPL(svc_pool_wake_idle_thread);
@@ -1336,6 +1334,9 @@ svc_process_common(struct svc_rqst *rqstp)
int pr, rc;
__be32 *p;
+ /* Reset the accept_stat for the RPC */
+ rqstp->rq_accept_statp = NULL;
+
/* Will be turned off only when NFSv4 Sessions are used */
set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
clear_bit(RQ_DROPME, &rqstp->rq_flags);
@@ -1375,9 +1376,8 @@ svc_process_common(struct svc_rqst *rqstp)
case SVC_OK:
break;
case SVC_GARBAGE:
- goto err_garbage_args;
- case SVC_SYSERR:
- goto err_system_err;
+ rqstp->rq_auth_stat = rpc_autherr_badcred;
+ goto err_bad_auth;
case SVC_DENIED:
goto err_bad_auth;
case SVC_CLOSE:
@@ -1388,7 +1388,8 @@ svc_process_common(struct svc_rqst *rqstp)
goto sendit;
default:
pr_warn_once("Unexpected svc_auth_status (%d)\n", auth_res);
- goto err_system_err;
+ rqstp->rq_auth_stat = rpc_autherr_failed;
+ goto err_bad_auth;
}
if (progp == NULL)
@@ -1515,20 +1516,6 @@ err_bad_proc:
serv->sv_stats->rpcbadfmt++;
*rqstp->rq_accept_statp = rpc_proc_unavail;
goto sendit;
-
-err_garbage_args:
- svc_printk(rqstp, "failed to decode RPC header\n");
-
- if (serv->sv_stats)
- serv->sv_stats->rpcbadfmt++;
- *rqstp->rq_accept_statp = rpc_garbage_args;
- goto sendit;
-
-err_system_err:
- if (serv->sv_stats)
- serv->sv_stats->rpcbadfmt++;
- *rqstp->rq_accept_statp = rpc_system_err;
- goto sendit;
}
/*
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index e1c85123b445..46c156b121db 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1197,7 +1197,7 @@ err_noclose:
* that the pages backing @xdr are unchanging.
*/
static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp,
- rpc_fraghdr marker, unsigned int *sentp)
+ rpc_fraghdr marker, int *sentp)
{
struct msghdr msg = {
.msg_flags = MSG_SPLICE_PAGES,
@@ -1247,8 +1247,7 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
struct xdr_buf *xdr = &rqstp->rq_res;
rpc_fraghdr marker = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT |
(u32)xdr->len);
- unsigned int sent;
- int err;
+ int sent, err;
svc_tcp_release_ctxt(xprt, rqstp->rq_xprt_ctxt);
rqstp->rq_xprt_ctxt = NULL;
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 2ea00e354ba6..1346fdf33835 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -993,21 +993,18 @@ EXPORT_SYMBOL_GPL(xdr_init_encode);
* xdr_init_encode_pages - Initialize an xdr_stream for encoding into pages
* @xdr: pointer to xdr_stream struct
* @buf: pointer to XDR buffer into which to encode data
- * @pages: list of pages to decode into
- * @rqst: pointer to controlling rpc_rqst, for debugging
*
*/
-void xdr_init_encode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
- struct page **pages, struct rpc_rqst *rqst)
+void xdr_init_encode_pages(struct xdr_stream *xdr, struct xdr_buf *buf)
{
xdr_reset_scratch_buffer(xdr);
xdr->buf = buf;
- xdr->page_ptr = pages;
+ xdr->page_ptr = buf->pages;
xdr->iov = NULL;
- xdr->p = page_address(*pages);
+ xdr->p = page_address(*xdr->page_ptr);
xdr->end = (void *)xdr->p + min_t(u32, buf->buflen, PAGE_SIZE);
- xdr->rqst = rqst;
+ xdr->rqst = NULL;
}
EXPORT_SYMBOL_GPL(xdr_init_encode_pages);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 7c61d47ea208..e028bf658499 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -3642,7 +3642,7 @@ int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct netlink_callback *cb,
nla_put_u32(skb, TIPC_NLA_SOCK_INO, sock_i_ino(sk)) ||
nla_put_u32(skb, TIPC_NLA_SOCK_UID,
from_kuid_munged(sk_user_ns(NETLINK_CB(cb->skb).sk),
- sock_i_uid(sk))) ||
+ sk_uid(sk))) ||
nla_put_u64_64bit(skb, TIPC_NLA_SOCK_COOKIE,
tipc_diag_gen_cookie(sk),
TIPC_NLA_SOCK_PAD))
diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c
index 8ee0c07d00e9..ffe577bf6b51 100644
--- a/net/tipc/topsrv.c
+++ b/net/tipc/topsrv.c
@@ -704,8 +704,10 @@ static void tipc_topsrv_stop(struct net *net)
for (id = 0; srv->idr_in_use; id++) {
con = idr_find(&srv->conn_idr, id);
if (con) {
+ conn_get(con);
spin_unlock_bh(&srv->idr_lock);
tipc_conn_close(con);
+ conn_put(con);
spin_lock_bh(&srv->idr_lock);
}
}
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index 108a4cc2e001..b85ab0fb3b8c 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -172,7 +172,7 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb,
struct udp_media_addr *dst, struct dst_cache *cache)
{
struct dst_entry *ndst;
- int ttl, err = 0;
+ int ttl, err;
local_bh_disable();
ndst = dst_cache_get(cache);
@@ -197,7 +197,7 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb,
ttl = ip4_dst_hoplimit(&rt->dst);
udp_tunnel_xmit_skb(rt, ub->ubsock->sk, skb, src->ipv4.s_addr,
dst->ipv4.s_addr, 0, ttl, 0, src->port,
- dst->port, false, true);
+ dst->port, false, true, 0);
#if IS_ENABLED(CONFIG_IPV6)
} else {
if (!ndst) {
@@ -217,13 +217,13 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb,
dst_cache_set_ip6(cache, ndst, &fl6.saddr);
}
ttl = ip6_dst_hoplimit(ndst);
- err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb, NULL,
- &src->ipv6, &dst->ipv6, 0, ttl, 0,
- src->port, dst->port, false);
+ udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb, NULL,
+ &src->ipv6, &dst->ipv6, 0, ttl, 0,
+ src->port, dst->port, false, 0);
#endif
}
local_bh_enable();
- return err;
+ return 0;
tx_error:
local_bh_enable();
@@ -489,7 +489,7 @@ int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb)
rtnl_lock();
b = tipc_bearer_find(net, bname);
- if (!b) {
+ if (!b || b->bcast_addr.media_id != TIPC_MEDIA_TYPE_UDP) {
rtnl_unlock();
return -EINVAL;
}
@@ -500,7 +500,7 @@ int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb)
rtnl_lock();
b = rtnl_dereference(tn->bearer_list[bid]);
- if (!b) {
+ if (!b || b->bcast_addr.media_id != TIPC_MEDIA_TYPE_UDP) {
rtnl_unlock();
return -EINVAL;
}
diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c
index 65b0da6fdf6a..095cf31bae0b 100644
--- a/net/tls/tls_strp.c
+++ b/net/tls/tls_strp.c
@@ -512,9 +512,8 @@ static int tls_strp_read_sock(struct tls_strparser *strp)
if (inq < strp->stm.full_len)
return tls_strp_read_copy(strp, true);
+ tls_strp_load_anchor_with_queue(strp, inq);
if (!strp->stm.full_len) {
- tls_strp_load_anchor_with_queue(strp, inq);
-
sz = tls_rx_msg_size(strp, strp->anchor);
if (sz < 0) {
tls_strp_abort_strp(strp, sz);
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index fc88e34b7f33..549d1ea01a72 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -872,6 +872,19 @@ more_data:
delta = msg->sg.size;
psock->eval = sk_psock_msg_verdict(sk, psock, msg);
delta -= msg->sg.size;
+
+ if ((s32)delta > 0) {
+ /* It indicates that we executed bpf_msg_pop_data(),
+ * causing the plaintext data size to decrease.
+ * Therefore the encrypted data size also needs to
+ * correspondingly decrease. We only need to subtract
+ * delta to calculate the new ciphertext length since
+ * ktls does not support block encryption.
+ */
+ struct sk_msg *enc = &ctx->open_rec->msg_encrypted;
+
+ sk_msg_trim(sk, enc, enc->sg.size - delta);
+ }
}
if (msg->cork_bytes && msg->cork_bytes > msg->sg.size &&
!enospc && !full_record) {
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 2e2e9997a68e..6d7c110814ff 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -646,9 +646,6 @@ static void unix_sock_destructor(struct sock *sk)
return;
}
- if (sk->sk_peer_pid)
- pidfs_put_pid(sk->sk_peer_pid);
-
if (u->addr)
unix_release_addr(u->addr);
@@ -660,6 +657,11 @@ static void unix_sock_destructor(struct sock *sk)
#endif
}
+static unsigned int unix_skb_len(const struct sk_buff *skb)
+{
+ return skb->len - UNIXCB(skb).consumed;
+}
+
static void unix_release_sock(struct sock *sk, int embrion)
{
struct unix_sock *u = unix_sk(sk);
@@ -694,10 +696,16 @@ static void unix_release_sock(struct sock *sk, int embrion)
if (skpair != NULL) {
if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
+ struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
+
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+ if (skb && !unix_skb_len(skb))
+ skb = skb_peek_next(skb, &sk->sk_receive_queue);
+#endif
unix_state_lock(skpair);
/* No more writes */
WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK);
- if (!skb_queue_empty_lockless(&sk->sk_receive_queue) || embrion)
+ if (skb || embrion)
WRITE_ONCE(skpair->sk_err, ECONNRESET);
unix_state_unlock(skpair);
skpair->sk_state_change(skpair);
@@ -769,7 +777,6 @@ static void drop_peercred(struct unix_peercred *peercred)
swap(peercred->peer_pid, pid);
swap(peercred->peer_cred, cred);
- pidfs_put_pid(pid);
put_pid(pid);
put_cred(cred);
}
@@ -802,7 +809,6 @@ static void copy_peercred(struct sock *sk, struct sock *peersk)
spin_lock(&sk->sk_peer_lock);
sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
- pidfs_get_pid(sk->sk_peer_pid);
sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
spin_unlock(&sk->sk_peer_lock);
}
@@ -923,6 +929,52 @@ static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
#define unix_show_fdinfo NULL
#endif
+static bool unix_custom_sockopt(int optname)
+{
+ switch (optname) {
+ case SO_INQ:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static int unix_setsockopt(struct socket *sock, int level, int optname,
+ sockptr_t optval, unsigned int optlen)
+{
+ struct unix_sock *u = unix_sk(sock->sk);
+ struct sock *sk = sock->sk;
+ int val;
+
+ if (level != SOL_SOCKET)
+ return -EOPNOTSUPP;
+
+ if (!unix_custom_sockopt(optname))
+ return sock_setsockopt(sock, level, optname, optval, optlen);
+
+ if (optlen != sizeof(int))
+ return -EINVAL;
+
+ if (copy_from_sockptr(&val, optval, sizeof(val)))
+ return -EFAULT;
+
+ switch (optname) {
+ case SO_INQ:
+ if (sk->sk_type != SOCK_STREAM)
+ return -EINVAL;
+
+ if (val > 1 || val < 0)
+ return -EINVAL;
+
+ WRITE_ONCE(u->recvmsg_inq, val);
+ break;
+ default:
+ return -ENOPROTOOPT;
+ }
+
+ return 0;
+}
+
static const struct proto_ops unix_stream_ops = {
.family = PF_UNIX,
.owner = THIS_MODULE,
@@ -939,6 +991,7 @@ static const struct proto_ops unix_stream_ops = {
#endif
.listen = unix_listen,
.shutdown = unix_shutdown,
+ .setsockopt = unix_setsockopt,
.sendmsg = unix_stream_sendmsg,
.recvmsg = unix_stream_recvmsg,
.read_skb = unix_stream_read_skb,
@@ -1105,6 +1158,7 @@ static int unix_create(struct net *net, struct socket *sock, int protocol,
switch (sock->type) {
case SOCK_STREAM:
+ set_bit(SOCK_CUSTOM_SOCKOPT, &sock->flags);
sock->ops = &unix_stream_ops;
break;
/*
@@ -1836,6 +1890,9 @@ static int unix_accept(struct socket *sock, struct socket *newsock,
skb_free_datagram(sk, skb);
wake_up_interruptible(&unix_sk(sk)->peer_wait);
+ if (tsk->sk_type == SOCK_STREAM)
+ set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags);
+
/* attach accepted sock to socket */
unix_state_lock(tsk);
unix_update_edges(unix_sk(tsk));
@@ -1934,7 +1991,7 @@ static void unix_destruct_scm(struct sk_buff *skb)
struct scm_cookie scm;
memset(&scm, 0, sizeof(scm));
- scm.pid = UNIXCB(skb).pid;
+ scm.pid = UNIXCB(skb).pid;
if (UNIXCB(skb).fp)
unix_detach_fds(&scm, skb);
@@ -1960,21 +2017,46 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen
return err;
}
-/*
+static void unix_skb_to_scm(struct sk_buff *skb, struct scm_cookie *scm)
+{
+ scm_set_cred(scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
+ unix_set_secdata(scm, skb);
+}
+
+/**
+ * unix_maybe_add_creds() - Adds current task uid/gid and struct pid to skb if needed.
+ * @skb: skb to attach creds to.
+ * @sk: Sender sock.
+ * @other: Receiver sock.
+ *
* Some apps rely on write() giving SCM_CREDENTIALS
* We include credentials if source or destination socket
* asserted SOCK_PASSCRED.
+ *
+ * Context: May sleep.
+ * Return: On success zero, on error a negative error code is returned.
*/
-static void unix_maybe_add_creds(struct sk_buff *skb, const struct sock *sk,
- const struct sock *other)
+static int unix_maybe_add_creds(struct sk_buff *skb, const struct sock *sk,
+ const struct sock *other)
{
if (UNIXCB(skb).pid)
- return;
+ return 0;
+
+ if (unix_may_passcred(sk) || unix_may_passcred(other) ||
+ !other->sk_socket) {
+ struct pid *pid;
+ int err;
- if (unix_may_passcred(sk) || unix_may_passcred(other)) {
- UNIXCB(skb).pid = get_pid(task_tgid(current));
+ pid = task_tgid(current);
+ err = pidfs_register_pid(pid);
+ if (unlikely(err))
+ return err;
+
+ UNIXCB(skb).pid = get_pid(pid);
current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
}
+
+ return 0;
}
static bool unix_skb_scm_eq(struct sk_buff *skb,
@@ -2109,6 +2191,10 @@ lookup:
goto out_sock_put;
}
+ err = unix_maybe_add_creds(skb, sk, other);
+ if (err)
+ goto out_sock_put;
+
restart:
sk_locked = 0;
unix_state_lock(other);
@@ -2217,7 +2303,6 @@ restart_locked:
if (sock_flag(other, SOCK_RCVTSTAMP))
__net_timestamp(skb);
- unix_maybe_add_creds(skb, sk, other);
scm_stat_add(other, skb);
skb_queue_tail(&other->sk_receive_queue, skb);
unix_state_unlock(other);
@@ -2261,6 +2346,10 @@ static int queue_oob(struct sock *sk, struct msghdr *msg, struct sock *other,
if (err < 0)
goto out;
+ err = unix_maybe_add_creds(skb, sk, other);
+ if (err)
+ goto out;
+
skb_put(skb, 1);
err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1);
@@ -2280,11 +2369,11 @@ static int queue_oob(struct sock *sk, struct msghdr *msg, struct sock *other,
goto out_unlock;
}
- unix_maybe_add_creds(skb, sk, other);
scm_stat_add(other, skb);
spin_lock(&other->sk_receive_queue.lock);
WRITE_ONCE(ousk->oob_skb, skb);
+ WRITE_ONCE(ousk->inq_len, ousk->inq_len + 1);
__skb_queue_tail(&other->sk_receive_queue, skb);
spin_unlock(&other->sk_receive_queue.lock);
@@ -2307,6 +2396,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
struct sock *sk = sock->sk;
struct sk_buff *skb = NULL;
struct sock *other = NULL;
+ struct unix_sock *otheru;
struct scm_cookie scm;
bool fds_sent = false;
int err, sent = 0;
@@ -2330,14 +2420,16 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
if (msg->msg_namelen) {
err = READ_ONCE(sk->sk_state) == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
goto out_err;
- } else {
- other = unix_peer(sk);
- if (!other) {
- err = -ENOTCONN;
- goto out_err;
- }
}
+ other = unix_peer(sk);
+ if (!other) {
+ err = -ENOTCONN;
+ goto out_err;
+ }
+
+ otheru = unix_sk(other);
+
if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
goto out_pipe;
@@ -2374,10 +2466,13 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
fds_sent = true;
+ err = unix_maybe_add_creds(skb, sk, other);
+ if (err)
+ goto out_free;
+
if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
- err = skb_splice_from_iter(skb, &msg->msg_iter, size,
- sk->sk_allocation);
+ err = skb_splice_from_iter(skb, &msg->msg_iter, size);
if (err < 0)
goto out_free;
@@ -2404,9 +2499,13 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
goto out_free;
}
- unix_maybe_add_creds(skb, sk, other);
scm_stat_add(other, skb);
- skb_queue_tail(&other->sk_receive_queue, skb);
+
+ spin_lock(&other->sk_receive_queue.lock);
+ WRITE_ONCE(otheru->inq_len, otheru->inq_len + skb->len);
+ __skb_queue_tail(&other->sk_receive_queue, skb);
+ spin_unlock(&other->sk_receive_queue.lock);
+
unix_state_unlock(other);
other->sk_data_ready(other);
sent += size;
@@ -2516,12 +2615,10 @@ int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
&err, &timeo, last));
if (!skb) { /* implies iolock unlocked */
- unix_state_lock(sk);
/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
- (sk->sk_shutdown & RCV_SHUTDOWN))
+ (READ_ONCE(sk->sk_shutdown) & RCV_SHUTDOWN))
err = 0;
- unix_state_unlock(sk);
goto out;
}
@@ -2552,8 +2649,7 @@ int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
memset(&scm, 0, sizeof(scm));
- scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
- unix_set_secdata(&scm, skb);
+ unix_skb_to_scm(skb, &scm);
if (!(flags & MSG_PEEK)) {
if (UNIXCB(skb).fp)
@@ -2660,11 +2756,6 @@ static long unix_stream_data_wait(struct sock *sk, long timeo,
return timeo;
}
-static unsigned int unix_skb_len(const struct sk_buff *skb)
-{
- return skb->len - UNIXCB(skb).consumed;
-}
-
struct unix_stream_read_state {
int (*recv_actor)(struct sk_buff *, int, int,
struct unix_stream_read_state *);
@@ -2679,11 +2770,11 @@ struct unix_stream_read_state {
#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
static int unix_stream_recv_urg(struct unix_stream_read_state *state)
{
+ struct sk_buff *oob_skb, *read_skb = NULL;
struct socket *sock = state->socket;
struct sock *sk = sock->sk;
struct unix_sock *u = unix_sk(sk);
int chunk = 1;
- struct sk_buff *oob_skb;
mutex_lock(&u->iolock);
unix_state_lock(sk);
@@ -2698,8 +2789,16 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state)
oob_skb = u->oob_skb;
- if (!(state->flags & MSG_PEEK))
+ if (!(state->flags & MSG_PEEK)) {
WRITE_ONCE(u->oob_skb, NULL);
+ WRITE_ONCE(u->inq_len, u->inq_len - 1);
+
+ if (oob_skb->prev != (struct sk_buff *)&sk->sk_receive_queue &&
+ !unix_skb_len(oob_skb->prev)) {
+ read_skb = oob_skb->prev;
+ __skb_unlink(read_skb, &sk->sk_receive_queue);
+ }
+ }
spin_unlock(&sk->sk_receive_queue.lock);
unix_state_unlock(sk);
@@ -2711,6 +2810,8 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state)
mutex_unlock(&u->iolock);
+ consume_skb(read_skb);
+
if (chunk < 0)
return -EFAULT;
@@ -2773,6 +2874,7 @@ unlock:
static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
{
+ struct sk_buff_head *queue = &sk->sk_receive_queue;
struct unix_sock *u = unix_sk(sk);
struct sk_buff *skb;
int err;
@@ -2780,60 +2882,57 @@ static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED))
return -ENOTCONN;
- mutex_lock(&u->iolock);
- skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err);
- mutex_unlock(&u->iolock);
- if (!skb)
+ err = sock_error(sk);
+ if (err)
return err;
-#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
- if (unlikely(skb == READ_ONCE(u->oob_skb))) {
- bool drop = false;
-
- unix_state_lock(sk);
+ mutex_lock(&u->iolock);
+ spin_lock(&queue->lock);
- if (sock_flag(sk, SOCK_DEAD)) {
- unix_state_unlock(sk);
- kfree_skb_reason(skb, SKB_DROP_REASON_SOCKET_CLOSE);
- return -ECONNRESET;
- }
+ skb = __skb_dequeue(queue);
+ if (!skb) {
+ spin_unlock(&queue->lock);
+ mutex_unlock(&u->iolock);
+ return -EAGAIN;
+ }
- spin_lock(&sk->sk_receive_queue.lock);
- if (likely(skb == u->oob_skb)) {
- WRITE_ONCE(u->oob_skb, NULL);
- drop = true;
- }
- spin_unlock(&sk->sk_receive_queue.lock);
+ WRITE_ONCE(u->inq_len, u->inq_len - skb->len);
- unix_state_unlock(sk);
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+ if (skb == u->oob_skb) {
+ WRITE_ONCE(u->oob_skb, NULL);
+ spin_unlock(&queue->lock);
+ mutex_unlock(&u->iolock);
- if (drop) {
- kfree_skb_reason(skb, SKB_DROP_REASON_UNIX_SKIP_OOB);
- return -EAGAIN;
- }
+ kfree_skb_reason(skb, SKB_DROP_REASON_UNIX_SKIP_OOB);
+ return -EAGAIN;
}
#endif
+ spin_unlock(&queue->lock);
+ mutex_unlock(&u->iolock);
+
return recv_actor(sk, skb);
}
static int unix_stream_read_generic(struct unix_stream_read_state *state,
bool freezable)
{
- struct scm_cookie scm;
+ int noblock = state->flags & MSG_DONTWAIT;
struct socket *sock = state->socket;
+ struct msghdr *msg = state->msg;
struct sock *sk = sock->sk;
- struct unix_sock *u = unix_sk(sk);
- int copied = 0;
+ size_t size = state->size;
int flags = state->flags;
- int noblock = flags & MSG_DONTWAIT;
bool check_creds = false;
- int target;
+ struct scm_cookie scm;
+ unsigned int last_len;
+ struct unix_sock *u;
+ int copied = 0;
int err = 0;
long timeo;
+ int target;
int skip;
- size_t size = state->size;
- unsigned int last_len;
if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)) {
err = -EINVAL;
@@ -2853,6 +2952,8 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state,
memset(&scm, 0, sizeof(scm));
+ u = unix_sk(sk);
+
/* Lock the socket to prevent queue disordering
* while sleeps in memcpy_tomsg
*/
@@ -2938,20 +3039,17 @@ unlock:
break;
} else if (unix_may_passcred(sk)) {
/* Copy credentials */
- scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
- unix_set_secdata(&scm, skb);
+ unix_skb_to_scm(skb, &scm);
check_creds = true;
}
/* Copy address just once */
- if (state->msg && state->msg->msg_name) {
- DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
- state->msg->msg_name);
- unix_copy_addr(state->msg, skb->sk);
+ if (msg && msg->msg_name) {
+ DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
- BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk,
- state->msg->msg_name,
- &state->msg->msg_namelen);
+ unix_copy_addr(msg, skb->sk);
+ BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, msg->msg_name,
+ &msg->msg_namelen);
sunaddr = NULL;
}
@@ -2980,7 +3078,11 @@ unlock:
if (unix_skb_len(skb))
break;
- skb_unlink(skb, &sk->sk_receive_queue);
+ spin_lock(&sk->sk_receive_queue.lock);
+ WRITE_ONCE(u->inq_len, u->inq_len - skb->len);
+ __skb_unlink(skb, &sk->sk_receive_queue);
+ spin_unlock(&sk->sk_receive_queue.lock);
+
consume_skb(skb);
if (scm.fp)
@@ -3009,10 +3111,17 @@ unlock:
} while (size);
mutex_unlock(&u->iolock);
- if (state->msg)
- scm_recv_unix(sock, state->msg, &scm, flags);
- else
+ if (msg) {
+ scm_recv_unix(sock, msg, &scm, flags);
+
+ if (READ_ONCE(u->recvmsg_inq) || msg->msg_get_inq) {
+ msg->msg_inq = READ_ONCE(u->inq_len);
+ put_cmsg(msg, SOL_SOCKET, SCM_INQ,
+ sizeof(msg->msg_inq), &msg->msg_inq);
+ }
+ } else {
scm_destroy(&scm);
+ }
out:
return copied ? : err;
}
@@ -3151,9 +3260,11 @@ long unix_inq_len(struct sock *sk)
if (READ_ONCE(sk->sk_state) == TCP_LISTEN)
return -EINVAL;
+ if (sk->sk_type == SOCK_STREAM)
+ return READ_ONCE(unix_sk(sk)->inq_len);
+
spin_lock(&sk->sk_receive_queue.lock);
- if (sk->sk_type == SOCK_STREAM ||
- sk->sk_type == SOCK_SEQPACKET) {
+ if (sk->sk_type == SOCK_SEQPACKET) {
skb_queue_walk(&sk->sk_receive_queue, skb)
amount += unix_skb_len(skb);
} else {
@@ -3175,7 +3286,6 @@ EXPORT_SYMBOL_GPL(unix_outq_len);
static int unix_open_file(struct sock *sk)
{
- struct path path;
struct file *f;
int fd;
@@ -3185,27 +3295,20 @@ static int unix_open_file(struct sock *sk)
if (!smp_load_acquire(&unix_sk(sk)->addr))
return -ENOENT;
- path = unix_sk(sk)->path;
- if (!path.dentry)
+ if (!unix_sk(sk)->path.dentry)
return -ENOENT;
- path_get(&path);
-
fd = get_unused_fd_flags(O_CLOEXEC);
if (fd < 0)
- goto out;
+ return fd;
- f = dentry_open(&path, O_PATH, current_cred());
+ f = dentry_open(&unix_sk(sk)->path, O_PATH, current_cred());
if (IS_ERR(f)) {
put_unused_fd(fd);
- fd = PTR_ERR(f);
- goto out;
+ return PTR_ERR(f);
}
fd_install(fd, f);
-out:
- path_put(&path);
-
return fd;
}
@@ -3681,7 +3784,7 @@ static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
goto unlock;
}
- uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
+ uid = from_kuid_munged(seq_user_ns(seq), sk_uid(sk));
meta.seq = seq;
prog = bpf_iter_get_info(&meta, false);
ret = unix_prog_seq_show(prog, &meta, v, uid);
diff --git a/net/unix/diag.c b/net/unix/diag.c
index 79b182d0e62a..ca3473026151 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -106,7 +106,7 @@ static int sk_diag_show_rqlen(struct sock *sk, struct sk_buff *nlskb)
static int sk_diag_dump_uid(struct sock *sk, struct sk_buff *nlskb,
struct user_namespace *user_ns)
{
- uid_t uid = from_kuid_munged(user_ns, sock_i_uid(sk));
+ uid_t uid = from_kuid_munged(user_ns, sk_uid(sk));
return nla_put(nlskb, UNIX_DIAG_UID, sizeof(uid_t), &uid);
}
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 2e7a3034e965..ead6a3c14b87 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -407,6 +407,8 @@ EXPORT_SYMBOL_GPL(vsock_enqueue_accept);
static bool vsock_use_local_transport(unsigned int remote_cid)
{
+ lockdep_assert_held(&vsock_register_mutex);
+
if (!transport_local)
return false;
@@ -464,6 +466,8 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
remote_flags = vsk->remote_addr.svm_flags;
+ mutex_lock(&vsock_register_mutex);
+
switch (sk->sk_type) {
case SOCK_DGRAM:
new_transport = transport_dgram;
@@ -479,12 +483,15 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
new_transport = transport_h2g;
break;
default:
- return -ESOCKTNOSUPPORT;
+ ret = -ESOCKTNOSUPPORT;
+ goto err;
}
if (vsk->transport) {
- if (vsk->transport == new_transport)
- return 0;
+ if (vsk->transport == new_transport) {
+ ret = 0;
+ goto err;
+ }
/* transport->release() must be called with sock lock acquired.
* This path can only be taken during vsock_connect(), where we
@@ -508,8 +515,16 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
/* We increase the module refcnt to prevent the transport unloading
* while there are open sockets assigned to it.
*/
- if (!new_transport || !try_module_get(new_transport->module))
- return -ENODEV;
+ if (!new_transport || !try_module_get(new_transport->module)) {
+ ret = -ENODEV;
+ goto err;
+ }
+
+ /* It's safe to release the mutex after a successful try_module_get().
+ * Whichever transport `new_transport` points at, it won't go away until
+ * the last module_put() below or in vsock_deassign_transport().
+ */
+ mutex_unlock(&vsock_register_mutex);
if (sk->sk_type == SOCK_SEQPACKET) {
if (!new_transport->seqpacket_allow ||
@@ -528,12 +543,31 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
vsk->transport = new_transport;
return 0;
+err:
+ mutex_unlock(&vsock_register_mutex);
+ return ret;
}
EXPORT_SYMBOL_GPL(vsock_assign_transport);
+/*
+ * Provide safe access to static transport_{h2g,g2h,dgram,local} callbacks.
+ * Otherwise we may race with module removal. Do not use on `vsk->transport`.
+ */
+static u32 vsock_registered_transport_cid(const struct vsock_transport **transport)
+{
+ u32 cid = VMADDR_CID_ANY;
+
+ mutex_lock(&vsock_register_mutex);
+ if (*transport)
+ cid = (*transport)->get_local_cid();
+ mutex_unlock(&vsock_register_mutex);
+
+ return cid;
+}
+
bool vsock_find_cid(unsigned int cid)
{
- if (transport_g2h && cid == transport_g2h->get_local_cid())
+ if (cid == vsock_registered_transport_cid(&transport_g2h))
return true;
if (transport_h2g && cid == VMADDR_CID_HOST)
@@ -994,11 +1028,6 @@ static int vsock_getname(struct socket *sock,
vm_addr = &vsk->local_addr;
}
- if (!vm_addr) {
- err = -EINVAL;
- goto out;
- }
-
/* sys_getsockname() and sys_getpeername() pass us a
* MAX_SOCK_ADDR-sized buffer and don't set addr_len. Unfortunately
* that macro is defined in socket.c instead of .h, so we hardcode its
@@ -1389,6 +1418,28 @@ static int vsock_do_ioctl(struct socket *sock, unsigned int cmd,
vsk = vsock_sk(sk);
switch (cmd) {
+ case SIOCINQ: {
+ ssize_t n_bytes;
+
+ if (!vsk->transport) {
+ ret = -EOPNOTSUPP;
+ break;
+ }
+
+ if (sock_type_connectible(sk->sk_type) &&
+ sk->sk_state == TCP_LISTEN) {
+ ret = -EINVAL;
+ break;
+ }
+
+ n_bytes = vsock_stream_has_data(vsk);
+ if (n_bytes < 0) {
+ ret = n_bytes;
+ break;
+ }
+ ret = put_user(n_bytes, arg);
+ break;
+ }
case SIOCOUTQ: {
ssize_t n_bytes;
@@ -2536,18 +2587,19 @@ static long vsock_dev_do_ioctl(struct file *filp,
unsigned int cmd, void __user *ptr)
{
u32 __user *p = ptr;
- u32 cid = VMADDR_CID_ANY;
int retval = 0;
+ u32 cid;
switch (cmd) {
case IOCTL_VM_SOCKETS_GET_LOCAL_CID:
/* To be compatible with the VMCI behavior, we prioritize the
* guest CID instead of well-know host CID (VMADDR_CID_HOST).
*/
- if (transport_g2h)
- cid = transport_g2h->get_local_cid();
- else if (transport_h2g)
- cid = transport_h2g->get_local_cid();
+ cid = vsock_registered_transport_cid(&transport_g2h);
+ if (cid == VMADDR_CID_ANY)
+ cid = vsock_registered_transport_cid(&transport_h2g);
+ if (cid == VMADDR_CID_ANY)
+ cid = vsock_registered_transport_cid(&transport_local);
if (put_user(cid, p) != 0)
retval = -EFAULT;
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index 31342ab502b4..432fcbbd14d4 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -694,15 +694,26 @@ out:
static s64 hvs_stream_has_data(struct vsock_sock *vsk)
{
struct hvsock *hvs = vsk->trans;
+ bool need_refill;
s64 ret;
if (hvs->recv_data_len > 0)
- return 1;
+ return hvs->recv_data_len;
switch (hvs_channel_readable_payload(hvs->chan)) {
case 1:
- ret = 1;
- break;
+ need_refill = !hvs->recv_desc;
+ if (!need_refill)
+ return -EIO;
+
+ hvs->recv_desc = hv_pkt_iter_first(hvs->chan);
+ if (!hvs->recv_desc)
+ return -ENOBUFS;
+
+ ret = hvs_update_recv_data(hvs);
+ if (ret)
+ return ret;
+ return hvs->recv_data_len;
case 0:
vsk->peer_shutdown |= SEND_SHUTDOWN;
ret = 0;
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index b370070194fa..7eccd6708d66 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -119,6 +119,8 @@ vmci_transport_packet_init(struct vmci_transport_packet *pkt,
u16 proto,
struct vmci_handle handle)
{
+ memset(pkt, 0, sizeof(*pkt));
+
/* We register the stream control handler as an any cid handle so we
* must always send from a source address of VMADDR_CID_ANY
*/
@@ -131,8 +133,6 @@ vmci_transport_packet_init(struct vmci_transport_packet *pkt,
pkt->type = type;
pkt->src_port = src->svm_port;
pkt->dst_port = dst->svm_port;
- memset(&pkt->proto, 0, sizeof(pkt->proto));
- memset(&pkt->_reserved2, 0, sizeof(pkt->_reserved2));
switch (pkt->type) {
case VMCI_TRANSPORT_PACKET_TYPE_INVALID:
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 5c3c72df0591..a7e2931ffb2e 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -239,7 +239,7 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev,
rdev->opencount--;
- if (rdev->scan_req && rdev->scan_req->wdev == wdev) {
+ if (rdev->scan_req && rdev->scan_req->req.wdev == wdev) {
if (WARN_ON(!rdev->scan_req->notified &&
(!rdev->int_scan_req ||
!rdev->int_scan_req->notified)))
@@ -995,6 +995,24 @@ int wiphy_register(struct wiphy *wiphy)
wiphy->max_num_akm_suites > CFG80211_MAX_NUM_AKM_SUITES)
return -EINVAL;
+ /* Allocate radio configuration space for multi-radio wiphy */
+ if (wiphy->n_radio > 0) {
+ int idx;
+
+ wiphy->radio_cfg = kcalloc(wiphy->n_radio,
+ sizeof(*wiphy->radio_cfg),
+ GFP_KERNEL);
+ if (!wiphy->radio_cfg)
+ return -ENOMEM;
+ /*
+ * Initialize wiphy radio parameters to IEEE 802.11
+ * MIB default values. RTS threshold is disabled by
+ * default with the special -1 value.
+ */
+ for (idx = 0; idx < wiphy->n_radio; idx++)
+ wiphy->radio_cfg[idx].rts_threshold = (u32)-1;
+ }
+
/* check and set up bitrates */
ieee80211_set_bitrate_flags(wiphy);
@@ -1222,6 +1240,7 @@ void cfg80211_dev_free(struct cfg80211_registered_device *rdev)
void wiphy_free(struct wiphy *wiphy)
{
+ kfree(wiphy->radio_cfg);
put_device(&wiphy->dev);
}
EXPORT_SYMBOL(wiphy_free);
@@ -1555,7 +1574,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
case NETDEV_DOWN:
wiphy_lock(&rdev->wiphy);
cfg80211_update_iface_num(rdev, wdev->iftype, -1);
- if (rdev->scan_req && rdev->scan_req->wdev == wdev) {
+ if (rdev->scan_req && rdev->scan_req->req.wdev == wdev) {
if (WARN_ON(!rdev->scan_req->notified &&
(!rdev->int_scan_req ||
!rdev->int_scan_req->notified)))
diff --git a/net/wireless/core.h b/net/wireless/core.h
index c56a35040caa..b6bd7f4d6385 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -21,6 +21,13 @@
#define WIPHY_IDX_INVALID -1
+struct cfg80211_scan_request_int {
+ struct cfg80211_scan_info info;
+ bool notified;
+ /* must be last - variable members */
+ struct cfg80211_scan_request req;
+};
+
struct cfg80211_registered_device {
const struct cfg80211_ops *ops;
struct list_head list;
@@ -70,8 +77,8 @@ struct cfg80211_registered_device {
struct rb_root bss_tree;
u32 bss_generation;
u32 bss_entries;
- struct cfg80211_scan_request *scan_req; /* protected by RTNL */
- struct cfg80211_scan_request *int_scan_req;
+ struct cfg80211_scan_request_int *scan_req; /* protected by RTNL */
+ struct cfg80211_scan_request_int *int_scan_req;
struct sk_buff *scan_msg;
struct list_head sched_scan_req_list;
time64_t suspend_at;
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 05d44a443518..46394eb2086f 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -352,8 +352,25 @@ cfg80211_mlme_check_mlo_compat(const struct ieee80211_multi_link_elem *mle_a,
return -EINVAL;
}
- if (ieee80211_mle_get_ext_mld_capa_op((const u8 *)mle_a) !=
- ieee80211_mle_get_ext_mld_capa_op((const u8 *)mle_b)) {
+ /*
+ * Only verify the values in Extended MLD Capabilities that are
+ * not reserved when transmitted by an AP (and expected to remain the
+ * same over time).
+ * The Recommended Max Simultaneous Links subfield in particular is
+ * reserved when included in a unicast Probe Response frame and may
+ * also change when the AP adds/removes links. The BTM MLD
+ * Recommendation For Multiple APs Support subfield is reserved when
+ * transmitted by an AP. All other bits are currently reserved.
+ * See IEEE P802.11be/D7.0, Table 9-417o.
+ */
+ if ((ieee80211_mle_get_ext_mld_capa_op((const u8 *)mle_a) &
+ (IEEE80211_EHT_ML_EXT_MLD_CAPA_OP_PARAM_UPDATE |
+ IEEE80211_EHT_ML_EXT_MLD_CAPA_NSTR_UPDATE |
+ IEEE80211_EHT_ML_EXT_MLD_CAPA_EMLSR_ENA_ON_ONE_LINK)) !=
+ (ieee80211_mle_get_ext_mld_capa_op((const u8 *)mle_b) &
+ (IEEE80211_EHT_ML_EXT_MLD_CAPA_OP_PARAM_UPDATE |
+ IEEE80211_EHT_ML_EXT_MLD_CAPA_NSTR_UPDATE |
+ IEEE80211_EHT_ML_EXT_MLD_CAPA_EMLSR_ENA_ON_ONE_LINK))) {
NL_SET_ERR_MSG(extack,
"extended link MLD capabilities/ops mismatch");
return -EINVAL;
@@ -850,7 +867,8 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
mgmt = (const struct ieee80211_mgmt *)params->buf;
- if (!ieee80211_is_mgmt(mgmt->frame_control))
+ if (!ieee80211_is_mgmt(mgmt->frame_control) ||
+ ieee80211_has_order(mgmt->frame_control))
return -EINVAL;
stype = le16_to_cpu(mgmt->frame_control) & IEEE80211_FCTL_STYPE;
@@ -1331,7 +1349,8 @@ void cfg80211_mlo_reconf_add_done(struct net_device *dev,
lockdep_assert_wiphy(wiphy);
trace_cfg80211_mlo_reconf_add_done(dev, data->added_links,
- data->buf, data->len);
+ data->buf, data->len,
+ data->driver_initiated);
if (WARN_ON(!wdev->valid_links))
return;
@@ -1361,11 +1380,16 @@ void cfg80211_mlo_reconf_add_done(struct net_device *dev,
wdev->links[link_id].client.current_bss =
bss_from_pub(bss);
+ if (data->driver_initiated)
+ cfg80211_hold_bss(bss_from_pub(bss));
+
memcpy(wdev->links[link_id].addr,
data->links[link_id].addr,
ETH_ALEN);
} else {
- cfg80211_unhold_bss(bss_from_pub(bss));
+ if (!data->driver_initiated)
+ cfg80211_unhold_bss(bss_from_pub(bss));
+
cfg80211_put_bss(wiphy, bss);
}
}
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index fd5f79266471..89519aa52893 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -229,6 +229,7 @@ static int validate_beacon_head(const struct nlattr *attr,
unsigned int len = nla_len(attr);
const struct element *elem;
const struct ieee80211_mgmt *mgmt = (void *)data;
+ const struct ieee80211_ext *ext;
unsigned int fixedlen, hdrlen;
bool s1g_bcn;
@@ -237,8 +238,10 @@ static int validate_beacon_head(const struct nlattr *attr,
s1g_bcn = ieee80211_is_s1g_beacon(mgmt->frame_control);
if (s1g_bcn) {
- fixedlen = offsetof(struct ieee80211_ext,
- u.s1g_beacon.variable);
+ ext = (struct ieee80211_ext *)mgmt;
+ fixedlen =
+ offsetof(struct ieee80211_ext, u.s1g_beacon.variable) +
+ ieee80211_s1g_optional_len(ext->frame_control);
hdrlen = offsetof(struct ieee80211_ext, u.s1g_beacon);
} else {
fixedlen = offsetof(struct ieee80211_mgmt,
@@ -479,6 +482,16 @@ nl80211_sta_wme_policy[NL80211_STA_WME_MAX + 1] = {
[NL80211_STA_WME_MAX_SP] = { .type = NLA_U8 },
};
+static const struct nla_policy
+nl80211_s1g_short_beacon[NL80211_S1G_SHORT_BEACON_ATTR_MAX + 1] = {
+ [NL80211_S1G_SHORT_BEACON_ATTR_HEAD] =
+ NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_beacon_head,
+ IEEE80211_MAX_DATA_LEN),
+ [NL80211_S1G_SHORT_BEACON_ATTR_TAIL] =
+ NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_ie_attr,
+ IEEE80211_MAX_DATA_LEN),
+};
+
static const struct netlink_range_validation nl80211_punct_bitmap_range = {
.min = 0,
.max = 0xffff,
@@ -854,6 +867,10 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_MLO_RECONF_REM_LINKS] = { .type = NLA_U16 },
[NL80211_ATTR_EPCS] = { .type = NLA_FLAG },
[NL80211_ATTR_ASSOC_MLD_EXT_CAPA_OPS] = { .type = NLA_U16 },
+ [NL80211_ATTR_WIPHY_RADIO_INDEX] = { .type = NLA_U8 },
+ [NL80211_ATTR_S1G_LONG_BEACON_PERIOD] = NLA_POLICY_MIN(NLA_U8, 2),
+ [NL80211_ATTR_S1G_SHORT_BEACON] =
+ NLA_POLICY_NESTED(nl80211_s1g_short_beacon),
};
/* policy for the key attributes */
@@ -1583,7 +1600,7 @@ nl80211_parse_connkeys(struct cfg80211_registered_device *rdev,
return result;
error:
- kfree(result);
+ kfree_sensitive(result);
return ERR_PTR(err);
}
@@ -2446,6 +2463,7 @@ fail:
static int nl80211_put_radio(struct wiphy *wiphy, struct sk_buff *msg, int idx)
{
const struct wiphy_radio *r = &wiphy->radio[idx];
+ const struct wiphy_radio_cfg *rcfg = &wiphy->radio_cfg[idx];
struct nlattr *radio, *freq;
int i;
@@ -2456,6 +2474,11 @@ static int nl80211_put_radio(struct wiphy *wiphy, struct sk_buff *msg, int idx)
if (nla_put_u32(msg, NL80211_WIPHY_RADIO_ATTR_INDEX, idx))
goto nla_put_failure;
+ if (rcfg->rts_threshold &&
+ nla_put_u32(msg, NL80211_WIPHY_RADIO_ATTR_RTS_THRESHOLD,
+ rcfg->rts_threshold))
+ goto nla_put_failure;
+
if (r->antenna_mask &&
nla_put_u32(msg, NL80211_WIPHY_RADIO_ATTR_ANTENNA_MASK,
r->antenna_mask))
@@ -2639,7 +2662,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
u32 tx_ant = 0, rx_ant = 0;
int res;
- res = rdev_get_antenna(rdev, &tx_ant, &rx_ant);
+ res = rdev_get_antenna(rdev, -1, &tx_ant, &rx_ant);
if (!res) {
if (nla_put_u32(msg,
NL80211_ATTR_WIPHY_ANTENNA_TX,
@@ -3608,6 +3631,33 @@ static int nl80211_set_channel(struct sk_buff *skb, struct genl_info *info)
return __nl80211_set_channel(rdev, netdev, info, link_id);
}
+static int nl80211_set_wiphy_radio(struct genl_info *info,
+ struct cfg80211_registered_device *rdev,
+ int radio_idx)
+{
+ u32 rts_threshold = 0, old_rts, changed = 0;
+ int result;
+
+ if (!rdev->ops->set_wiphy_params)
+ return -EOPNOTSUPP;
+
+ if (info->attrs[NL80211_ATTR_WIPHY_RTS_THRESHOLD]) {
+ rts_threshold = nla_get_u32(
+ info->attrs[NL80211_ATTR_WIPHY_RTS_THRESHOLD]);
+ changed |= WIPHY_PARAM_RTS_THRESHOLD;
+ }
+
+ old_rts = rdev->wiphy.radio_cfg[radio_idx].rts_threshold;
+
+ rdev->wiphy.radio_cfg[radio_idx].rts_threshold = rts_threshold;
+
+ result = rdev_set_wiphy_params(rdev, radio_idx, changed);
+ if (result)
+ rdev->wiphy.radio_cfg[radio_idx].rts_threshold = old_rts;
+
+ return 0;
+}
+
static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
{
struct cfg80211_registered_device *rdev = NULL;
@@ -3620,6 +3670,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
u32 frag_threshold = 0, rts_threshold = 0;
u8 coverage_class = 0;
u32 txq_limit = 0, txq_memory_limit = 0, txq_quantum = 0;
+ int radio_idx = -1;
rtnl_lock();
/*
@@ -3670,6 +3721,19 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
if (result)
return result;
+ if (info->attrs[NL80211_ATTR_WIPHY_RADIO_INDEX]) {
+ /* Radio idx is not expected for non-multi radio wiphy */
+ if (rdev->wiphy.n_radio <= 0)
+ return -EINVAL;
+
+ radio_idx = nla_get_u8(
+ info->attrs[NL80211_ATTR_WIPHY_RADIO_INDEX]);
+ if (radio_idx >= rdev->wiphy.n_radio)
+ return -EINVAL;
+
+ return nl80211_set_wiphy_radio(info, rdev, radio_idx);
+ }
+
if (info->attrs[NL80211_ATTR_WIPHY_TXQ_PARAMS]) {
struct ieee80211_txq_params txq_params;
struct nlattr *tb[NL80211_TXQ_ATTR_MAX + 1];
@@ -3759,7 +3823,8 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
mbm = nla_get_u32(info->attrs[idx]);
}
- result = rdev_set_tx_power(rdev, txp_wdev, type, mbm);
+ result = rdev_set_tx_power(rdev, txp_wdev, radio_idx, type,
+ mbm);
if (result)
return result;
}
@@ -3785,7 +3850,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
tx_ant = tx_ant & rdev->wiphy.available_antennas_tx;
rx_ant = rx_ant & rdev->wiphy.available_antennas_rx;
- result = rdev_set_antenna(rdev, tx_ant, rx_ant);
+ result = rdev_set_antenna(rdev, radio_idx, tx_ant, rx_ant);
if (result)
return result;
}
@@ -3879,16 +3944,30 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
if (changed) {
u8 old_retry_short, old_retry_long;
u32 old_frag_threshold, old_rts_threshold;
- u8 old_coverage_class;
+ u8 old_coverage_class, i;
u32 old_txq_limit, old_txq_memory_limit, old_txq_quantum;
+ u32 *old_radio_rts_threshold = NULL;
if (!rdev->ops->set_wiphy_params)
return -EOPNOTSUPP;
+ if (rdev->wiphy.n_radio) {
+ old_radio_rts_threshold = kcalloc(rdev->wiphy.n_radio,
+ sizeof(u32),
+ GFP_KERNEL);
+ if (!old_radio_rts_threshold)
+ return -ENOMEM;
+ }
+
old_retry_short = rdev->wiphy.retry_short;
old_retry_long = rdev->wiphy.retry_long;
old_frag_threshold = rdev->wiphy.frag_threshold;
old_rts_threshold = rdev->wiphy.rts_threshold;
+ if (old_radio_rts_threshold) {
+ for (i = 0 ; i < rdev->wiphy.n_radio; i++)
+ old_radio_rts_threshold[i] =
+ rdev->wiphy.radio_cfg[i].rts_threshold;
+ }
old_coverage_class = rdev->wiphy.coverage_class;
old_txq_limit = rdev->wiphy.txq_limit;
old_txq_memory_limit = rdev->wiphy.txq_memory_limit;
@@ -3900,8 +3979,13 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
rdev->wiphy.retry_long = retry_long;
if (changed & WIPHY_PARAM_FRAG_THRESHOLD)
rdev->wiphy.frag_threshold = frag_threshold;
- if (changed & WIPHY_PARAM_RTS_THRESHOLD)
+ if ((changed & WIPHY_PARAM_RTS_THRESHOLD) &&
+ old_radio_rts_threshold) {
rdev->wiphy.rts_threshold = rts_threshold;
+ for (i = 0 ; i < rdev->wiphy.n_radio; i++)
+ rdev->wiphy.radio_cfg[i].rts_threshold =
+ rdev->wiphy.rts_threshold;
+ }
if (changed & WIPHY_PARAM_COVERAGE_CLASS)
rdev->wiphy.coverage_class = coverage_class;
if (changed & WIPHY_PARAM_TXQ_LIMIT)
@@ -3911,18 +3995,26 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
if (changed & WIPHY_PARAM_TXQ_QUANTUM)
rdev->wiphy.txq_quantum = txq_quantum;
- result = rdev_set_wiphy_params(rdev, changed);
+ result = rdev_set_wiphy_params(rdev, radio_idx, changed);
if (result) {
rdev->wiphy.retry_short = old_retry_short;
rdev->wiphy.retry_long = old_retry_long;
rdev->wiphy.frag_threshold = old_frag_threshold;
rdev->wiphy.rts_threshold = old_rts_threshold;
+ if (old_radio_rts_threshold) {
+ for (i = 0 ; i < rdev->wiphy.n_radio; i++)
+ rdev->wiphy.radio_cfg[i].rts_threshold =
+ old_radio_rts_threshold[i];
+ }
rdev->wiphy.coverage_class = old_coverage_class;
rdev->wiphy.txq_limit = old_txq_limit;
rdev->wiphy.txq_memory_limit = old_txq_memory_limit;
rdev->wiphy.txq_quantum = old_txq_quantum;
- return result;
}
+
+ if (old_rts_threshold)
+ kfree(old_radio_rts_threshold);
+ return result;
}
return 0;
@@ -4012,7 +4104,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
if (rdev->ops->get_tx_power && !wdev->valid_links) {
int dbm, ret;
- ret = rdev_get_tx_power(rdev, wdev, 0, &dbm);
+ ret = rdev_get_tx_power(rdev, wdev, -1, 0, &dbm);
if (ret == 0 &&
nla_put_u32(msg, NL80211_ATTR_WIPHY_TX_POWER_LEVEL,
DBM_TO_MBM(dbm)))
@@ -4084,7 +4176,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
if (rdev->ops->get_tx_power) {
int dbm, ret;
- ret = rdev_get_tx_power(rdev, wdev, link_id, &dbm);
+ ret = rdev_get_tx_power(rdev, wdev, -1, link_id, &dbm);
if (ret == 0 &&
nla_put_u32(msg, NL80211_ATTR_WIPHY_TX_POWER_LEVEL,
DBM_TO_MBM(dbm)))
@@ -6123,6 +6215,41 @@ static int nl80211_validate_ap_phy_operation(struct cfg80211_ap_settings *params
return 0;
}
+static int
+nl80211_parse_s1g_short_beacon(struct cfg80211_registered_device *rdev,
+ struct nlattr *attrs,
+ struct cfg80211_s1g_short_beacon *sb)
+{
+ struct nlattr *tb[NL80211_S1G_SHORT_BEACON_ATTR_MAX + 1];
+ int ret;
+
+ if (!rdev->wiphy.bands[NL80211_BAND_S1GHZ])
+ return -EINVAL;
+
+ ret = nla_parse_nested(tb, NL80211_S1G_SHORT_BEACON_ATTR_MAX, attrs,
+ NULL, NULL);
+ if (ret)
+ return ret;
+
+ /* Short beacon tail is optional (i.e might only include the TIM) */
+ if (!tb[NL80211_S1G_SHORT_BEACON_ATTR_HEAD])
+ return -EINVAL;
+
+ sb->short_head = nla_data(tb[NL80211_S1G_SHORT_BEACON_ATTR_HEAD]);
+ sb->short_head_len = nla_len(tb[NL80211_S1G_SHORT_BEACON_ATTR_HEAD]);
+ sb->short_tail_len = 0;
+
+ if (tb[NL80211_S1G_SHORT_BEACON_ATTR_TAIL]) {
+ sb->short_tail =
+ nla_data(tb[NL80211_S1G_SHORT_BEACON_ATTR_TAIL]);
+ sb->short_tail_len =
+ nla_len(tb[NL80211_S1G_SHORT_BEACON_ATTR_TAIL]);
+ }
+
+ sb->update = true;
+ return 0;
+}
+
static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
@@ -6363,6 +6490,22 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
goto out;
}
+ if (info->attrs[NL80211_ATTR_S1G_SHORT_BEACON]) {
+ if (!info->attrs[NL80211_ATTR_S1G_LONG_BEACON_PERIOD]) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ params->s1g_long_beacon_period = nla_get_u8(
+ info->attrs[NL80211_ATTR_S1G_LONG_BEACON_PERIOD]);
+
+ err = nl80211_parse_s1g_short_beacon(
+ rdev, info->attrs[NL80211_ATTR_S1G_SHORT_BEACON],
+ &params->s1g_short_beacon);
+ if (err)
+ goto out;
+ }
+
err = nl80211_calculate_ap_params(params);
if (err)
goto out;
@@ -6471,6 +6614,14 @@ static int nl80211_set_beacon(struct sk_buff *skb, struct genl_info *info)
goto out;
}
+ attr = info->attrs[NL80211_ATTR_S1G_SHORT_BEACON];
+ if (attr) {
+ err = nl80211_parse_s1g_short_beacon(rdev, attr,
+ &params->s1g_short_beacon);
+ if (err)
+ goto out;
+ }
+
err = rdev_change_beacon(rdev, dev, params);
out:
@@ -6728,6 +6879,185 @@ static bool nl80211_put_signal(struct sk_buff *msg, u8 mask, s8 *signal,
return true;
}
+static int nl80211_fill_link_station(struct sk_buff *msg,
+ struct cfg80211_registered_device *rdev,
+ struct link_station_info *link_sinfo)
+{
+ struct nlattr *bss_param, *link_sinfoattr;
+
+#define PUT_LINK_SINFO(attr, memb, type) do { \
+ BUILD_BUG_ON(sizeof(type) == sizeof(u64)); \
+ if (link_sinfo->filled & BIT_ULL(NL80211_STA_INFO_ ## attr) && \
+ nla_put_ ## type(msg, NL80211_STA_INFO_ ## attr, \
+ link_sinfo->memb)) \
+ goto nla_put_failure; \
+ } while (0)
+#define PUT_LINK_SINFO_U64(attr, memb) do { \
+ if (link_sinfo->filled & BIT_ULL(NL80211_STA_INFO_ ## attr) && \
+ nla_put_u64_64bit(msg, NL80211_STA_INFO_ ## attr, \
+ link_sinfo->memb, NL80211_STA_INFO_PAD)) \
+ goto nla_put_failure; \
+ } while (0)
+
+ link_sinfoattr = nla_nest_start_noflag(msg, NL80211_ATTR_STA_INFO);
+ if (!link_sinfoattr)
+ goto nla_put_failure;
+
+ PUT_LINK_SINFO(INACTIVE_TIME, inactive_time, u32);
+
+ if (link_sinfo->filled & (BIT_ULL(NL80211_STA_INFO_RX_BYTES) |
+ BIT_ULL(NL80211_STA_INFO_RX_BYTES64)) &&
+ nla_put_u32(msg, NL80211_STA_INFO_RX_BYTES,
+ (u32)link_sinfo->rx_bytes))
+ goto nla_put_failure;
+
+ if (link_sinfo->filled & (BIT_ULL(NL80211_STA_INFO_TX_BYTES) |
+ BIT_ULL(NL80211_STA_INFO_TX_BYTES64)) &&
+ nla_put_u32(msg, NL80211_STA_INFO_TX_BYTES,
+ (u32)link_sinfo->tx_bytes))
+ goto nla_put_failure;
+
+ PUT_LINK_SINFO_U64(RX_BYTES64, rx_bytes);
+ PUT_LINK_SINFO_U64(TX_BYTES64, tx_bytes);
+ PUT_LINK_SINFO_U64(RX_DURATION, rx_duration);
+ PUT_LINK_SINFO_U64(TX_DURATION, tx_duration);
+
+ if (wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_AIRTIME_FAIRNESS))
+ PUT_LINK_SINFO(AIRTIME_WEIGHT, airtime_weight, u16);
+
+ switch (rdev->wiphy.signal_type) {
+ case CFG80211_SIGNAL_TYPE_MBM:
+ PUT_LINK_SINFO(SIGNAL, signal, u8);
+ PUT_LINK_SINFO(SIGNAL_AVG, signal_avg, u8);
+ break;
+ default:
+ break;
+ }
+ if (link_sinfo->filled & BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL)) {
+ if (!nl80211_put_signal(msg, link_sinfo->chains,
+ link_sinfo->chain_signal,
+ NL80211_STA_INFO_CHAIN_SIGNAL))
+ goto nla_put_failure;
+ }
+ if (link_sinfo->filled & BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL_AVG)) {
+ if (!nl80211_put_signal(msg, link_sinfo->chains,
+ link_sinfo->chain_signal_avg,
+ NL80211_STA_INFO_CHAIN_SIGNAL_AVG))
+ goto nla_put_failure;
+ }
+ if (link_sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_BITRATE)) {
+ if (!nl80211_put_sta_rate(msg, &link_sinfo->txrate,
+ NL80211_STA_INFO_TX_BITRATE))
+ goto nla_put_failure;
+ }
+ if (link_sinfo->filled & BIT_ULL(NL80211_STA_INFO_RX_BITRATE)) {
+ if (!nl80211_put_sta_rate(msg, &link_sinfo->rxrate,
+ NL80211_STA_INFO_RX_BITRATE))
+ goto nla_put_failure;
+ }
+
+ PUT_LINK_SINFO(RX_PACKETS, rx_packets, u32);
+ PUT_LINK_SINFO(TX_PACKETS, tx_packets, u32);
+ PUT_LINK_SINFO(TX_RETRIES, tx_retries, u32);
+ PUT_LINK_SINFO(TX_FAILED, tx_failed, u32);
+ PUT_LINK_SINFO(EXPECTED_THROUGHPUT, expected_throughput, u32);
+ PUT_LINK_SINFO(BEACON_LOSS, beacon_loss_count, u32);
+
+ if (link_sinfo->filled & BIT_ULL(NL80211_STA_INFO_BSS_PARAM)) {
+ bss_param = nla_nest_start_noflag(msg,
+ NL80211_STA_INFO_BSS_PARAM);
+ if (!bss_param)
+ goto nla_put_failure;
+
+ if (((link_sinfo->bss_param.flags &
+ BSS_PARAM_FLAGS_CTS_PROT) &&
+ nla_put_flag(msg, NL80211_STA_BSS_PARAM_CTS_PROT)) ||
+ ((link_sinfo->bss_param.flags &
+ BSS_PARAM_FLAGS_SHORT_PREAMBLE) &&
+ nla_put_flag(msg,
+ NL80211_STA_BSS_PARAM_SHORT_PREAMBLE)) ||
+ ((link_sinfo->bss_param.flags &
+ BSS_PARAM_FLAGS_SHORT_SLOT_TIME) &&
+ nla_put_flag(msg,
+ NL80211_STA_BSS_PARAM_SHORT_SLOT_TIME)) ||
+ nla_put_u8(msg, NL80211_STA_BSS_PARAM_DTIM_PERIOD,
+ link_sinfo->bss_param.dtim_period) ||
+ nla_put_u16(msg, NL80211_STA_BSS_PARAM_BEACON_INTERVAL,
+ link_sinfo->bss_param.beacon_interval))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, bss_param);
+ }
+
+ PUT_LINK_SINFO_U64(RX_DROP_MISC, rx_dropped_misc);
+ PUT_LINK_SINFO_U64(BEACON_RX, rx_beacon);
+ PUT_LINK_SINFO(BEACON_SIGNAL_AVG, rx_beacon_signal_avg, u8);
+ PUT_LINK_SINFO(RX_MPDUS, rx_mpdu_count, u32);
+ PUT_LINK_SINFO(FCS_ERROR_COUNT, fcs_err_count, u32);
+ if (wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_ACK_SIGNAL_SUPPORT)) {
+ PUT_LINK_SINFO(ACK_SIGNAL, ack_signal, u8);
+ PUT_LINK_SINFO(ACK_SIGNAL_AVG, avg_ack_signal, s8);
+ }
+
+#undef PUT_LINK_SINFO
+#undef PUT_LINK_SINFO_U64
+
+ if (link_sinfo->pertid) {
+ struct nlattr *tidsattr;
+ int tid;
+
+ tidsattr = nla_nest_start_noflag(msg,
+ NL80211_STA_INFO_TID_STATS);
+ if (!tidsattr)
+ goto nla_put_failure;
+
+ for (tid = 0; tid < IEEE80211_NUM_TIDS + 1; tid++) {
+ struct cfg80211_tid_stats *tidstats;
+ struct nlattr *tidattr;
+
+ tidstats = &link_sinfo->pertid[tid];
+
+ if (!tidstats->filled)
+ continue;
+
+ tidattr = nla_nest_start_noflag(msg, tid + 1);
+ if (!tidattr)
+ goto nla_put_failure;
+
+#define PUT_TIDVAL_U64(attr, memb) do { \
+ if (tidstats->filled & BIT(NL80211_TID_STATS_ ## attr) && \
+ nla_put_u64_64bit(msg, NL80211_TID_STATS_ ## attr, \
+ tidstats->memb, NL80211_TID_STATS_PAD)) \
+ goto nla_put_failure; \
+ } while (0)
+
+ PUT_TIDVAL_U64(RX_MSDU, rx_msdu);
+ PUT_TIDVAL_U64(TX_MSDU, tx_msdu);
+ PUT_TIDVAL_U64(TX_MSDU_RETRIES, tx_msdu_retries);
+ PUT_TIDVAL_U64(TX_MSDU_FAILED, tx_msdu_failed);
+
+#undef PUT_TIDVAL_U64
+ if ((tidstats->filled &
+ BIT(NL80211_TID_STATS_TXQ_STATS)) &&
+ !nl80211_put_txq_stats(msg, &tidstats->txq_stats,
+ NL80211_TID_STATS_TXQ_STATS))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, tidattr);
+ }
+
+ nla_nest_end(msg, tidsattr);
+ }
+
+ nla_nest_end(msg, link_sinfoattr);
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid,
u32 seq, int flags,
struct cfg80211_registered_device *rdev,
@@ -6736,6 +7066,9 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid,
{
void *hdr;
struct nlattr *sinfoattr, *bss_param;
+ struct link_station_info *link_sinfo;
+ struct nlattr *links, *link;
+ int link_id;
hdr = nl80211hdr_put(msg, portid, seq, flags, cmd);
if (!hdr) {
@@ -6950,6 +7283,40 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid,
goto nla_put_failure;
}
+ if (sinfo->valid_links) {
+ links = nla_nest_start(msg, NL80211_ATTR_MLO_LINKS);
+ if (!links)
+ goto nla_put_failure;
+
+ for_each_valid_link(sinfo, link_id) {
+ link_sinfo = sinfo->links[link_id];
+
+ if (WARN_ON_ONCE(!link_sinfo))
+ continue;
+
+ if (!is_valid_ether_addr(link_sinfo->addr))
+ continue;
+
+ link = nla_nest_start(msg, link_id + 1);
+ if (!link)
+ goto nla_put_failure;
+
+ if (nla_put_u8(msg, NL80211_ATTR_MLO_LINK_ID,
+ link_id))
+ goto nla_put_failure;
+
+ if (nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN,
+ link_sinfo->addr))
+ goto nla_put_failure;
+
+ if (nl80211_fill_link_station(msg, rdev, link_sinfo))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, link);
+ }
+ nla_nest_end(msg, links);
+ }
+
cfg80211_sinfo_release_content(sinfo);
genlmsg_end(msg, hdr);
return 0;
@@ -6960,6 +7327,194 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid,
return -EMSGSIZE;
}
+static void cfg80211_sta_set_mld_sinfo(struct station_info *sinfo)
+{
+ struct link_station_info *link_sinfo;
+ int link_id, init = 0;
+ u32 link_inactive_time;
+
+ sinfo->signal = -99;
+
+ for_each_valid_link(sinfo, link_id) {
+ link_sinfo = sinfo->links[link_id];
+ if (!link_sinfo)
+ continue;
+
+ if ((link_sinfo->filled &
+ BIT_ULL(NL80211_STA_INFO_TX_PACKETS))) {
+ sinfo->tx_packets += link_sinfo->tx_packets;
+ sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_PACKETS);
+ }
+
+ if ((link_sinfo->filled &
+ BIT_ULL(NL80211_STA_INFO_RX_PACKETS))) {
+ sinfo->rx_packets += link_sinfo->rx_packets;
+ sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_PACKETS);
+ }
+
+ if (link_sinfo->filled &
+ (BIT_ULL(NL80211_STA_INFO_TX_BYTES) |
+ BIT_ULL(NL80211_STA_INFO_TX_BYTES64))) {
+ sinfo->tx_bytes += link_sinfo->tx_bytes;
+ sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BYTES);
+ }
+
+ if (link_sinfo->filled &
+ (BIT_ULL(NL80211_STA_INFO_RX_BYTES) |
+ BIT_ULL(NL80211_STA_INFO_TX_BYTES64))) {
+ sinfo->rx_bytes += link_sinfo->rx_bytes;
+ sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_BYTES);
+ }
+
+ if (link_sinfo->filled &
+ BIT_ULL(NL80211_STA_INFO_TX_RETRIES)) {
+ sinfo->tx_retries += link_sinfo->tx_retries;
+ sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_RETRIES);
+ }
+
+ if (link_sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_FAILED)) {
+ sinfo->tx_failed += link_sinfo->tx_failed;
+ sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_FAILED);
+ }
+
+ if (link_sinfo->filled &
+ BIT_ULL(NL80211_STA_INFO_RX_DROP_MISC)) {
+ sinfo->rx_dropped_misc += link_sinfo->rx_dropped_misc;
+ sinfo->filled |=
+ BIT_ULL(NL80211_STA_INFO_RX_DROP_MISC);
+ }
+
+ if (link_sinfo->filled &
+ BIT_ULL(NL80211_STA_INFO_BEACON_LOSS)) {
+ sinfo->beacon_loss_count +=
+ link_sinfo->beacon_loss_count;
+ sinfo->filled |= BIT_ULL(NL80211_STA_INFO_BEACON_LOSS);
+ }
+
+ if (link_sinfo->filled &
+ BIT_ULL(NL80211_STA_INFO_EXPECTED_THROUGHPUT)) {
+ sinfo->expected_throughput +=
+ link_sinfo->expected_throughput;
+ sinfo->filled |=
+ BIT_ULL(NL80211_STA_INFO_EXPECTED_THROUGHPUT);
+ }
+
+ if (link_sinfo->filled & BIT_ULL(NL80211_STA_INFO_RX_MPDUS)) {
+ sinfo->rx_mpdu_count += link_sinfo->rx_mpdu_count;
+ sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_MPDUS);
+ }
+
+ if (link_sinfo->filled &
+ BIT_ULL(NL80211_STA_INFO_FCS_ERROR_COUNT)) {
+ sinfo->fcs_err_count += link_sinfo->fcs_err_count;
+ sinfo->filled |=
+ BIT_ULL(NL80211_STA_INFO_FCS_ERROR_COUNT);
+ }
+
+ if (link_sinfo->filled &
+ BIT_ULL(NL80211_STA_INFO_BEACON_RX)) {
+ sinfo->rx_beacon += link_sinfo->rx_beacon;
+ sinfo->filled |= BIT_ULL(NL80211_STA_INFO_BEACON_RX);
+ }
+
+ /* Update MLO signal, signal_avg as best among links */
+ if ((link_sinfo->filled & BIT_ULL(NL80211_STA_INFO_SIGNAL)) &&
+ link_sinfo->signal > sinfo->signal) {
+ sinfo->signal = link_sinfo->signal;
+ sinfo->filled |= BIT_ULL(NL80211_STA_INFO_SIGNAL);
+ }
+
+ if ((link_sinfo->filled &
+ BIT_ULL(NL80211_STA_INFO_SIGNAL_AVG)) &&
+ link_sinfo->signal_avg > sinfo->signal_avg) {
+ sinfo->signal_avg = link_sinfo->signal_avg;
+ sinfo->filled |= BIT_ULL(NL80211_STA_INFO_SIGNAL_AVG);
+ }
+
+ /* Update MLO inactive_time, bss_param based on least
+ * value for corresponding field of link.
+ */
+ if ((link_sinfo->filled &
+ BIT_ULL(NL80211_STA_INFO_INACTIVE_TIME)) &&
+ (!init ||
+ link_inactive_time > link_sinfo->inactive_time)) {
+ link_inactive_time = link_sinfo->inactive_time;
+ sinfo->inactive_time = link_sinfo->inactive_time;
+ sinfo->filled |= NL80211_STA_INFO_INACTIVE_TIME;
+ }
+
+ if (link_sinfo->filled & BIT_ULL(NL80211_STA_INFO_BSS_PARAM) &&
+ (!init ||
+ sinfo->bss_param.dtim_period >
+ link_sinfo->bss_param.dtim_period)) {
+ sinfo->bss_param.dtim_period =
+ link_sinfo->bss_param.dtim_period;
+ sinfo->filled |= NL80211_STA_BSS_PARAM_DTIM_PERIOD;
+ sinfo->bss_param.beacon_interval =
+ link_sinfo->bss_param.beacon_interval;
+ sinfo->filled |= NL80211_STA_BSS_PARAM_BEACON_INTERVAL;
+ }
+
+ /* Update MLO rates as per last updated link rate */
+ if ((link_sinfo->filled &
+ BIT_ULL(NL80211_STA_INFO_TX_BITRATE)) &&
+ (!init ||
+ link_inactive_time > link_sinfo->inactive_time)) {
+ sinfo->txrate = link_sinfo->txrate;
+ sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BITRATE);
+ }
+ if ((link_sinfo->filled &
+ BIT_ULL(NL80211_STA_INFO_RX_BITRATE)) &&
+ (!init ||
+ link_inactive_time > link_sinfo->inactive_time)) {
+ sinfo->rxrate = link_sinfo->rxrate;
+ sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_BITRATE);
+ }
+
+ if (link_sinfo->filled &
+ BIT_ULL(NL80211_STA_INFO_TX_DURATION) &&
+ (!init ||
+ link_inactive_time > link_sinfo->inactive_time)) {
+ sinfo->tx_duration += link_sinfo->tx_duration;
+ sinfo->filled |=
+ BIT_ULL(NL80211_STA_INFO_TX_DURATION);
+ }
+ if (link_sinfo->filled &
+ BIT_ULL(NL80211_STA_INFO_RX_DURATION) &&
+ (!init ||
+ link_inactive_time > link_sinfo->inactive_time)) {
+ sinfo->rx_duration += link_sinfo->rx_duration;
+ sinfo->filled |=
+ BIT_ULL(NL80211_STA_INFO_RX_DURATION);
+ }
+ init++;
+
+ /* pertid stats accumulate for rx/tx fields */
+ if (sinfo->pertid) {
+ sinfo->pertid->rx_msdu +=
+ link_sinfo->pertid->rx_msdu;
+ sinfo->pertid->tx_msdu +=
+ link_sinfo->pertid->tx_msdu;
+ sinfo->pertid->tx_msdu_retries +=
+ link_sinfo->pertid->tx_msdu_retries;
+ sinfo->pertid->tx_msdu_failed +=
+ link_sinfo->pertid->tx_msdu_failed;
+
+ sinfo->pertid->filled |=
+ BIT(NL80211_TID_STATS_RX_MSDU) |
+ BIT(NL80211_TID_STATS_TX_MSDU) |
+ BIT(NL80211_TID_STATS_TX_MSDU_RETRIES) |
+ BIT(NL80211_TID_STATS_TX_MSDU_FAILED);
+ }
+ }
+
+ /* Reset sinfo->filled bits to exclude fields which don't make
+ * much sense at the MLO level.
+ */
+ sinfo->filled &= ~BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL);
+ sinfo->filled &= ~BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL_AVG);
+}
+
static int nl80211_dump_station(struct sk_buff *skb,
struct netlink_callback *cb)
{
@@ -6968,7 +7523,8 @@ static int nl80211_dump_station(struct sk_buff *skb,
struct wireless_dev *wdev;
u8 mac_addr[ETH_ALEN];
int sta_idx = cb->args[2];
- int err;
+ bool sinfo_alloc = false;
+ int err, i;
err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev, NULL);
if (err)
@@ -6988,6 +7544,17 @@ static int nl80211_dump_station(struct sk_buff *skb,
while (1) {
memset(&sinfo, 0, sizeof(sinfo));
+
+ for (i = 0; i < IEEE80211_MLD_MAX_NUM_LINKS; i++) {
+ sinfo.links[i] =
+ kzalloc(sizeof(*sinfo.links[0]), GFP_KERNEL);
+ if (!sinfo.links[i]) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+ sinfo_alloc = true;
+ }
+
err = rdev_dump_station(rdev, wdev->netdev, sta_idx,
mac_addr, &sinfo);
if (err == -ENOENT)
@@ -6995,6 +7562,14 @@ static int nl80211_dump_station(struct sk_buff *skb,
if (err)
goto out_err;
+ if (sinfo.valid_links)
+ cfg80211_sta_set_mld_sinfo(&sinfo);
+
+ /* reset the sinfo_alloc flag as nl80211_send_station()
+ * always releases sinfo
+ */
+ sinfo_alloc = false;
+
if (nl80211_send_station(skb, NL80211_CMD_NEW_STATION,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
@@ -7009,6 +7584,8 @@ static int nl80211_dump_station(struct sk_buff *skb,
cb->args[2] = sta_idx;
err = skb->len;
out_err:
+ if (sinfo_alloc)
+ cfg80211_sinfo_release_content(&sinfo);
wiphy_unlock(&rdev->wiphy);
return err;
@@ -7021,7 +7598,7 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info)
struct station_info sinfo;
struct sk_buff *msg;
u8 *mac_addr = NULL;
- int err;
+ int err, i;
memset(&sinfo, 0, sizeof(sinfo));
@@ -7033,9 +7610,19 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info)
if (!rdev->ops->get_station)
return -EOPNOTSUPP;
+ for (i = 0; i < IEEE80211_MLD_MAX_NUM_LINKS; i++) {
+ sinfo.links[i] = kzalloc(sizeof(*sinfo.links[0]), GFP_KERNEL);
+ if (!sinfo.links[i]) {
+ cfg80211_sinfo_release_content(&sinfo);
+ return -ENOMEM;
+ }
+ }
+
err = rdev_get_station(rdev, dev, mac_addr, &sinfo);
- if (err)
+ if (err) {
+ cfg80211_sinfo_release_content(&sinfo);
return err;
+ }
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg) {
@@ -7043,6 +7630,9 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info)
return -ENOMEM;
}
+ if (sinfo.valid_links)
+ cfg80211_sta_set_mld_sinfo(&sinfo);
+
if (nl80211_send_station(msg, NL80211_CMD_NEW_STATION,
info->snd_portid, info->snd_seq, 0,
rdev, dev, mac_addr, &sinfo) < 0) {
@@ -7349,6 +7939,10 @@ static int nl80211_set_station_tdls(struct genl_info *info,
}
}
+ if (info->attrs[NL80211_ATTR_S1G_CAPABILITY])
+ params->link_sta_params.s1g_capa =
+ nla_data(info->attrs[NL80211_ATTR_S1G_CAPABILITY]);
+
err = nl80211_parse_sta_channel_info(info, params);
if (err)
return err;
@@ -7675,6 +8269,10 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
params.link_sta_params.he_6ghz_capa =
nla_data(info->attrs[NL80211_ATTR_HE_6GHZ_CAPABILITY]);
+ if (info->attrs[NL80211_ATTR_S1G_CAPABILITY])
+ params.link_sta_params.s1g_capa =
+ nla_data(info->attrs[NL80211_ATTR_S1G_CAPABILITY]);
+
if (info->attrs[NL80211_ATTR_OPMODE_NOTIF]) {
params.link_sta_params.opmode_notif_used = true;
params.link_sta_params.opmode_notif =
@@ -9243,6 +9841,7 @@ static bool cfg80211_off_channel_oper_allowed(struct wireless_dev *wdev,
{
unsigned int link_id;
bool all_ok = true;
+ int radio_idx;
lockdep_assert_wiphy(wdev->wiphy);
@@ -9252,8 +9851,10 @@ static bool cfg80211_off_channel_oper_allowed(struct wireless_dev *wdev,
if (!cfg80211_beaconing_iface_active(wdev))
return true;
+ radio_idx = cfg80211_get_radio_idx_by_chan(wdev->wiphy, chan);
+
/*
- * FIXME: check if we have a free HW resource/link for chan
+ * FIXME: check if we have a free radio/link for chan
*
* This, as well as the FIXME below, requires knowing the link
* capabilities of the hardware.
@@ -9262,20 +9863,28 @@ static bool cfg80211_off_channel_oper_allowed(struct wireless_dev *wdev,
/* we cannot leave radar channels */
for_each_valid_link(wdev, link_id) {
struct cfg80211_chan_def *chandef;
+ int link_radio_idx;
chandef = wdev_chandef(wdev, link_id);
if (!chandef || !chandef->chan)
continue;
+ if (!(chandef->chan->flags & IEEE80211_CHAN_RADAR))
+ continue;
+
/*
- * FIXME: don't require all_ok, but rather check only the
- * correct HW resource/link onto which 'chan' falls,
- * as only that link leaves the channel for doing
- * the off-channel operation.
+ * chandef->chan is a radar channel. If the radio/link onto
+ * which this radar channel falls is the same radio/link onto
+ * which the input 'chan' falls, off-channel operation should
+ * not be allowed. Hence, set 'all_ok' to false.
*/
- if (chandef->chan->flags & IEEE80211_CHAN_RADAR)
+ link_radio_idx = cfg80211_get_radio_idx_by_chan(wdev->wiphy,
+ chandef->chan);
+ if (link_radio_idx == radio_idx) {
all_ok = false;
+ break;
+ }
}
if (all_ok)
@@ -9296,34 +9905,12 @@ static bool nl80211_check_scan_feat(struct wiphy *wiphy, u32 flags, u32 flag,
static int
nl80211_check_scan_flags(struct wiphy *wiphy, struct wireless_dev *wdev,
- void *request, struct nlattr **attrs,
- bool is_sched_scan)
+ struct nlattr **attrs, u8 *mac_addr, u8 *mac_addr_mask,
+ u32 *flags, enum nl80211_feature_flags randomness_flag)
{
- u8 *mac_addr, *mac_addr_mask;
- u32 *flags;
- enum nl80211_feature_flags randomness_flag;
-
if (!attrs[NL80211_ATTR_SCAN_FLAGS])
return 0;
- if (is_sched_scan) {
- struct cfg80211_sched_scan_request *req = request;
-
- randomness_flag = wdev ?
- NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR :
- NL80211_FEATURE_ND_RANDOM_MAC_ADDR;
- flags = &req->flags;
- mac_addr = req->mac_addr;
- mac_addr_mask = req->mac_addr_mask;
- } else {
- struct cfg80211_scan_request *req = request;
-
- randomness_flag = NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR;
- flags = &req->flags;
- mac_addr = req->mac_addr;
- mac_addr_mask = req->mac_addr_mask;
- }
-
*flags = nla_get_u32(attrs[NL80211_ATTR_SCAN_FLAGS]);
if (((*flags & NL80211_SCAN_FLAG_LOW_PRIORITY) &&
@@ -9372,11 +9959,35 @@ nl80211_check_scan_flags(struct wiphy *wiphy, struct wireless_dev *wdev,
return 0;
}
+static int
+nl80211_check_scan_flags_sched(struct wiphy *wiphy, struct wireless_dev *wdev,
+ struct nlattr **attrs,
+ struct cfg80211_sched_scan_request *req)
+{
+ return nl80211_check_scan_flags(wiphy, wdev, attrs,
+ req->mac_addr, req->mac_addr_mask,
+ &req->flags,
+ wdev ? NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR :
+ NL80211_FEATURE_ND_RANDOM_MAC_ADDR);
+}
+
+static int
+nl80211_check_scan_flags_reg(struct wiphy *wiphy, struct wireless_dev *wdev,
+ struct nlattr **attrs,
+ struct cfg80211_scan_request_int *req)
+{
+ return nl80211_check_scan_flags(wiphy, wdev, attrs,
+ req->req.mac_addr,
+ req->req.mac_addr_mask,
+ &req->req.flags,
+ NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR);
+}
+
static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct wireless_dev *wdev = info->user_ptr[1];
- struct cfg80211_scan_request *request;
+ struct cfg80211_scan_request_int *request;
struct nlattr *scan_freqs = NULL;
bool scan_freqs_khz = false;
struct nlattr *attr;
@@ -9428,21 +10039,20 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
if (ie_len > wiphy->max_scan_ie_len)
return -EINVAL;
- size = struct_size(request, channels, n_channels);
+ size = struct_size(request, req.channels, n_channels);
ssids_offset = size;
- size = size_add(size, array_size(sizeof(*request->ssids), n_ssids));
+ size = size_add(size, array_size(sizeof(*request->req.ssids), n_ssids));
ie_offset = size;
size = size_add(size, ie_len);
request = kzalloc(size, GFP_KERNEL);
if (!request)
return -ENOMEM;
- request->n_channels = n_channels;
if (n_ssids)
- request->ssids = (void *)request + ssids_offset;
- request->n_ssids = n_ssids;
+ request->req.ssids = (void *)request + ssids_offset;
+ request->req.n_ssids = n_ssids;
if (ie_len)
- request->ie = (void *)request + ie_offset;
+ request->req.ie = (void *)request + ie_offset;
i = 0;
if (scan_freqs) {
@@ -9465,7 +10075,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
!cfg80211_wdev_channel_allowed(wdev, chan))
continue;
- request->channels[i] = chan;
+ request->req.channels[i] = chan;
i++;
}
} else {
@@ -9486,7 +10096,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
!cfg80211_wdev_channel_allowed(wdev, chan))
continue;
- request->channels[i] = chan;
+ request->req.channels[i] = chan;
i++;
}
}
@@ -9497,10 +10107,10 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
goto out_free;
}
- request->n_channels = i;
+ request->req.n_channels = i;
- for (i = 0; i < request->n_channels; i++) {
- struct ieee80211_channel *chan = request->channels[i];
+ for (i = 0; i < request->req.n_channels; i++) {
+ struct ieee80211_channel *chan = request->req.channels[i];
/* if we can go off-channel to the target channel we're good */
if (cfg80211_off_channel_oper_allowed(wdev, chan))
@@ -9519,22 +10129,23 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
err = -EINVAL;
goto out_free;
}
- request->ssids[i].ssid_len = nla_len(attr);
- memcpy(request->ssids[i].ssid, nla_data(attr), nla_len(attr));
+ request->req.ssids[i].ssid_len = nla_len(attr);
+ memcpy(request->req.ssids[i].ssid,
+ nla_data(attr), nla_len(attr));
i++;
}
}
if (info->attrs[NL80211_ATTR_IE]) {
- request->ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
- memcpy((void *)request->ie,
+ request->req.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
+ memcpy((void *)request->req.ie,
nla_data(info->attrs[NL80211_ATTR_IE]),
- request->ie_len);
+ request->req.ie_len);
}
for (i = 0; i < NUM_NL80211_BANDS; i++)
if (wiphy->bands[i])
- request->rates[i] =
+ request->req.rates[i] =
(1 << wiphy->bands[i]->n_bitrates) - 1;
if (info->attrs[NL80211_ATTR_SCAN_SUPP_RATES]) {
@@ -9554,25 +10165,24 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
err = ieee80211_get_ratemask(wiphy->bands[band],
nla_data(attr),
nla_len(attr),
- &request->rates[band]);
+ &request->req.rates[band]);
if (err)
goto out_free;
}
}
if (info->attrs[NL80211_ATTR_MEASUREMENT_DURATION]) {
- request->duration =
+ request->req.duration =
nla_get_u16(info->attrs[NL80211_ATTR_MEASUREMENT_DURATION]);
- request->duration_mandatory =
+ request->req.duration_mandatory =
nla_get_flag(info->attrs[NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY]);
}
- err = nl80211_check_scan_flags(wiphy, wdev, request, info->attrs,
- false);
+ err = nl80211_check_scan_flags_reg(wiphy, wdev, info->attrs, request);
if (err)
goto out_free;
- request->no_cck =
+ request->req.no_cck =
nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]);
/* Initial implementation used NL80211_ATTR_MAC to set the specific
@@ -9585,19 +10195,21 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
* (NL80211_ATTR_SCAN_FLAGS is used to enable random MAC address use).
*/
if (info->attrs[NL80211_ATTR_BSSID])
- memcpy(request->bssid,
+ memcpy(request->req.bssid,
nla_data(info->attrs[NL80211_ATTR_BSSID]), ETH_ALEN);
- else if (!(request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) &&
+ else if (!(request->req.flags & NL80211_SCAN_FLAG_RANDOM_ADDR) &&
info->attrs[NL80211_ATTR_MAC])
- memcpy(request->bssid, nla_data(info->attrs[NL80211_ATTR_MAC]),
+ memcpy(request->req.bssid,
+ nla_data(info->attrs[NL80211_ATTR_MAC]),
ETH_ALEN);
else
- eth_broadcast_addr(request->bssid);
+ eth_broadcast_addr(request->req.bssid);
- request->tsf_report_link_id = nl80211_link_id_or_invalid(info->attrs);
- request->wdev = wdev;
- request->wiphy = &rdev->wiphy;
- request->scan_start = jiffies;
+ request->req.tsf_report_link_id =
+ nl80211_link_id_or_invalid(info->attrs);
+ request->req.wdev = wdev;
+ request->req.wiphy = &rdev->wiphy;
+ request->req.scan_start = jiffies;
rdev->scan_req = request;
err = cfg80211_scan(rdev);
@@ -10019,7 +10631,7 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
request->ie_len);
}
- err = nl80211_check_scan_flags(wiphy, wdev, request, attrs, true);
+ err = nl80211_check_scan_flags_sched(wiphy, wdev, attrs, request);
if (err)
goto out_free;
@@ -10473,6 +11085,16 @@ skip_beacons:
if (info->attrs[NL80211_ATTR_CH_SWITCH_BLOCK_TX])
params.block_tx = true;
+ if ((wdev->iftype == NL80211_IFTYPE_AP ||
+ wdev->iftype == NL80211_IFTYPE_P2P_GO) &&
+ info->attrs[NL80211_ATTR_UNSOL_BCAST_PROBE_RESP]) {
+ err = nl80211_parse_unsol_bcast_probe_resp(
+ rdev, info->attrs[NL80211_ATTR_UNSOL_BCAST_PROBE_RESP],
+ &params.unsol_bcast_probe_resp);
+ if (err)
+ goto free;
+ }
+
params.link_id = link_id;
err = rdev_channel_switch(rdev, dev, &params);
@@ -16275,6 +16897,14 @@ static int nl80211_color_change(struct sk_buff *skb, struct genl_info *info)
params.counter_offset_presp = offset;
}
+ if (info->attrs[NL80211_ATTR_UNSOL_BCAST_PROBE_RESP]) {
+ err = nl80211_parse_unsol_bcast_probe_resp(
+ rdev, info->attrs[NL80211_ATTR_UNSOL_BCAST_PROBE_RESP],
+ &params.unsol_bcast_probe_resp);
+ if (err)
+ goto out;
+ }
+
params.link_id = nl80211_link_id(info->attrs);
err = rdev_color_change(rdev, dev, &params);
@@ -16929,6 +17559,7 @@ static int nl80211_set_sar_specs(struct sk_buff *skb, struct genl_info *info)
if (!sar_spec)
return -ENOMEM;
+ sar_spec->num_sub_specs = specs;
sar_spec->type = type;
specs = 0;
nla_for_each_nested(spec_list, tb[NL80211_SAR_ATTR_SPECS], rem) {
@@ -17899,7 +18530,7 @@ void nl80211_notify_iface(struct cfg80211_registered_device *rdev,
static int nl80211_add_scan_req(struct sk_buff *msg,
struct cfg80211_registered_device *rdev)
{
- struct cfg80211_scan_request *req = rdev->scan_req;
+ struct cfg80211_scan_request_int *req = rdev->scan_req;
struct nlattr *nest;
int i;
struct cfg80211_scan_info *info;
@@ -17910,19 +18541,20 @@ static int nl80211_add_scan_req(struct sk_buff *msg,
nest = nla_nest_start_noflag(msg, NL80211_ATTR_SCAN_SSIDS);
if (!nest)
goto nla_put_failure;
- for (i = 0; i < req->n_ssids; i++) {
- if (nla_put(msg, i, req->ssids[i].ssid_len, req->ssids[i].ssid))
+ for (i = 0; i < req->req.n_ssids; i++) {
+ if (nla_put(msg, i, req->req.ssids[i].ssid_len,
+ req->req.ssids[i].ssid))
goto nla_put_failure;
}
nla_nest_end(msg, nest);
- if (req->flags & NL80211_SCAN_FLAG_FREQ_KHZ) {
+ if (req->req.flags & NL80211_SCAN_FLAG_FREQ_KHZ) {
nest = nla_nest_start(msg, NL80211_ATTR_SCAN_FREQ_KHZ);
if (!nest)
goto nla_put_failure;
- for (i = 0; i < req->n_channels; i++) {
+ for (i = 0; i < req->req.n_channels; i++) {
if (nla_put_u32(msg, i,
- ieee80211_channel_to_khz(req->channels[i])))
+ ieee80211_channel_to_khz(req->req.channels[i])))
goto nla_put_failure;
}
nla_nest_end(msg, nest);
@@ -17931,19 +18563,20 @@ static int nl80211_add_scan_req(struct sk_buff *msg,
NL80211_ATTR_SCAN_FREQUENCIES);
if (!nest)
goto nla_put_failure;
- for (i = 0; i < req->n_channels; i++) {
- if (nla_put_u32(msg, i, req->channels[i]->center_freq))
+ for (i = 0; i < req->req.n_channels; i++) {
+ if (nla_put_u32(msg, i,
+ req->req.channels[i]->center_freq))
goto nla_put_failure;
}
nla_nest_end(msg, nest);
}
- if (req->ie &&
- nla_put(msg, NL80211_ATTR_IE, req->ie_len, req->ie))
+ if (req->req.ie &&
+ nla_put(msg, NL80211_ATTR_IE, req->req.ie_len, req->req.ie))
goto nla_put_failure;
- if (req->flags &&
- nla_put_u32(msg, NL80211_ATTR_SCAN_FLAGS, req->flags))
+ if (req->req.flags &&
+ nla_put_u32(msg, NL80211_ATTR_SCAN_FLAGS, req->req.flags))
goto nla_put_failure;
info = rdev->int_scan_req ? &rdev->int_scan_req->info :
@@ -19123,7 +19756,7 @@ void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr,
EXPORT_SYMBOL(cfg80211_conn_failed);
static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd,
- const u8 *addr, gfp_t gfp)
+ const u8 *addr, int link_id, gfp_t gfp)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
@@ -19146,7 +19779,9 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd,
if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) ||
- nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr))
+ nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr) ||
+ (link_id >= 0 &&
+ nla_put_u8(msg, NL80211_ATTR_MLO_LINK_ID, link_id)))
goto nla_put_failure;
genlmsg_end(msg, hdr);
@@ -19158,13 +19793,13 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd,
return true;
}
-bool cfg80211_rx_spurious_frame(struct net_device *dev,
- const u8 *addr, gfp_t gfp)
+bool cfg80211_rx_spurious_frame(struct net_device *dev, const u8 *addr,
+ int link_id, gfp_t gfp)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
bool ret;
- trace_cfg80211_rx_spurious_frame(dev, addr);
+ trace_cfg80211_rx_spurious_frame(dev, addr, link_id);
if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP &&
wdev->iftype != NL80211_IFTYPE_P2P_GO)) {
@@ -19172,19 +19807,19 @@ bool cfg80211_rx_spurious_frame(struct net_device *dev,
return false;
}
ret = __nl80211_unexpected_frame(dev, NL80211_CMD_UNEXPECTED_FRAME,
- addr, gfp);
+ addr, link_id, gfp);
trace_cfg80211_return_bool(ret);
return ret;
}
EXPORT_SYMBOL(cfg80211_rx_spurious_frame);
-bool cfg80211_rx_unexpected_4addr_frame(struct net_device *dev,
- const u8 *addr, gfp_t gfp)
+bool cfg80211_rx_unexpected_4addr_frame(struct net_device *dev, const u8 *addr,
+ int link_id, gfp_t gfp)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
bool ret;
- trace_cfg80211_rx_unexpected_4addr_frame(dev, addr);
+ trace_cfg80211_rx_unexpected_4addr_frame(dev, addr, link_id);
if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP &&
wdev->iftype != NL80211_IFTYPE_P2P_GO &&
@@ -19194,7 +19829,7 @@ bool cfg80211_rx_unexpected_4addr_frame(struct net_device *dev,
}
ret = __nl80211_unexpected_frame(dev,
NL80211_CMD_UNEXPECTED_4ADDR_FRAME,
- addr, gfp);
+ addr, link_id, gfp);
trace_cfg80211_return_bool(ret);
return ret;
}
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 9f4783c2354c..ac6884bacf3f 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -456,15 +456,15 @@ rdev_set_monitor_channel(struct cfg80211_registered_device *rdev,
}
static inline int rdev_scan(struct cfg80211_registered_device *rdev,
- struct cfg80211_scan_request *request)
+ struct cfg80211_scan_request_int *request)
{
int ret;
- if (WARN_ON_ONCE(!request->n_ssids && request->ssids))
+ if (WARN_ON_ONCE(!request->req.n_ssids && request->req.ssids))
return -EINVAL;
trace_rdev_scan(&rdev->wiphy, request);
- ret = rdev->ops->scan(&rdev->wiphy, request);
+ ret = rdev->ops->scan(&rdev->wiphy, &request->req);
trace_rdev_return_int(&rdev->wiphy, ret);
return ret;
}
@@ -577,35 +577,40 @@ static inline int rdev_leave_ibss(struct cfg80211_registered_device *rdev,
}
static inline int
-rdev_set_wiphy_params(struct cfg80211_registered_device *rdev, u32 changed)
+rdev_set_wiphy_params(struct cfg80211_registered_device *rdev, int radio_idx,
+ u32 changed)
{
int ret = -EOPNOTSUPP;
- trace_rdev_set_wiphy_params(&rdev->wiphy, changed);
+ trace_rdev_set_wiphy_params(&rdev->wiphy, radio_idx, changed);
if (rdev->ops->set_wiphy_params)
- ret = rdev->ops->set_wiphy_params(&rdev->wiphy, changed);
+ ret = rdev->ops->set_wiphy_params(&rdev->wiphy, radio_idx,
+ changed);
trace_rdev_return_int(&rdev->wiphy, ret);
return ret;
}
static inline int rdev_set_tx_power(struct cfg80211_registered_device *rdev,
- struct wireless_dev *wdev,
- enum nl80211_tx_power_setting type, int mbm)
+ struct wireless_dev *wdev, int radio_idx,
+ enum nl80211_tx_power_setting type,
+ int mbm)
{
int ret;
- trace_rdev_set_tx_power(&rdev->wiphy, wdev, type, mbm);
- ret = rdev->ops->set_tx_power(&rdev->wiphy, wdev, type, mbm);
+ trace_rdev_set_tx_power(&rdev->wiphy, wdev, radio_idx, type, mbm);
+ ret = rdev->ops->set_tx_power(&rdev->wiphy, wdev, radio_idx, type,
+ mbm);
trace_rdev_return_int(&rdev->wiphy, ret);
return ret;
}
static inline int rdev_get_tx_power(struct cfg80211_registered_device *rdev,
- struct wireless_dev *wdev, unsigned int link_id,
- int *dbm)
+ struct wireless_dev *wdev, int radio_idx,
+ unsigned int link_id, int *dbm)
{
int ret;
- trace_rdev_get_tx_power(&rdev->wiphy, wdev, link_id);
- ret = rdev->ops->get_tx_power(&rdev->wiphy, wdev, link_id, dbm);
+ trace_rdev_get_tx_power(&rdev->wiphy, wdev, radio_idx, link_id);
+ ret = rdev->ops->get_tx_power(&rdev->wiphy, wdev, radio_idx, link_id,
+ dbm);
trace_rdev_return_int_int(&rdev->wiphy, ret, *dbm);
return ret;
}
@@ -857,21 +862,21 @@ rdev_update_mgmt_frame_registrations(struct cfg80211_registered_device *rdev,
}
static inline int rdev_set_antenna(struct cfg80211_registered_device *rdev,
- u32 tx_ant, u32 rx_ant)
+ int radio_idx, u32 tx_ant, u32 rx_ant)
{
int ret;
- trace_rdev_set_antenna(&rdev->wiphy, tx_ant, rx_ant);
- ret = rdev->ops->set_antenna(&rdev->wiphy, tx_ant, rx_ant);
+ trace_rdev_set_antenna(&rdev->wiphy, radio_idx, tx_ant, rx_ant);
+ ret = rdev->ops->set_antenna(&rdev->wiphy, -1, tx_ant, rx_ant);
trace_rdev_return_int(&rdev->wiphy, ret);
return ret;
}
static inline int rdev_get_antenna(struct cfg80211_registered_device *rdev,
- u32 *tx_ant, u32 *rx_ant)
+ int radio_idx, u32 *tx_ant, u32 *rx_ant)
{
int ret;
- trace_rdev_get_antenna(&rdev->wiphy);
- ret = rdev->ops->get_antenna(&rdev->wiphy, tx_ant, rx_ant);
+ trace_rdev_get_antenna(&rdev->wiphy, radio_idx);
+ ret = rdev->ops->get_antenna(&rdev->wiphy, radio_idx, tx_ant, rx_ant);
if (ret)
trace_rdev_return_int(&rdev->wiphy, ret);
else
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index c1752b31734f..3b0ac3437f81 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -53,7 +53,7 @@
#include <linux/list.h>
#include <linux/ctype.h>
#include <linux/nl80211.h>
-#include <linux/platform_device.h>
+#include <linux/device/faux.h>
#include <linux/verification.h>
#include <linux/moduleparam.h>
#include <linux/firmware.h>
@@ -105,7 +105,7 @@ static struct regulatory_request __rcu *last_request =
(void __force __rcu *)&core_request_world;
/* To trigger userspace events and load firmware */
-static struct platform_device *reg_pdev;
+static struct faux_device *reg_fdev;
/*
* Central wireless core regulatory domains, we only need two,
@@ -583,7 +583,7 @@ static int call_crda(const char *alpha2)
else
pr_debug("Calling CRDA to update world regulatory domain\n");
- ret = kobject_uevent_env(&reg_pdev->dev.kobj, KOBJ_CHANGE, env);
+ ret = kobject_uevent_env(&reg_fdev->dev.kobj, KOBJ_CHANGE, env);
if (ret)
return ret;
@@ -779,7 +779,7 @@ static bool regdb_has_valid_signature(const u8 *data, unsigned int size)
const struct firmware *sig;
bool result;
- if (request_firmware(&sig, "regulatory.db.p7s", &reg_pdev->dev))
+ if (request_firmware(&sig, "regulatory.db.p7s", &reg_fdev->dev))
return false;
result = verify_pkcs7_signature(data, size, sig->data, sig->size,
@@ -1061,7 +1061,7 @@ static int query_regdb_file(const char *alpha2)
return -ENOMEM;
err = request_firmware_nowait(THIS_MODULE, true, "regulatory.db",
- &reg_pdev->dev, GFP_KERNEL,
+ &reg_fdev->dev, GFP_KERNEL,
(void *)alpha2, regdb_fw_cb);
if (err)
kfree(alpha2);
@@ -1077,7 +1077,7 @@ int reg_reload_regdb(void)
const struct ieee80211_regdomain *current_regdomain;
struct regulatory_request *request;
- err = request_firmware(&fw, "regulatory.db", &reg_pdev->dev);
+ err = request_firmware(&fw, "regulatory.db", &reg_fdev->dev);
if (err)
return err;
@@ -4229,6 +4229,8 @@ static void cfg80211_check_and_end_cac(struct cfg80211_registered_device *rdev)
struct wireless_dev *wdev;
unsigned int link_id;
+ guard(wiphy)(&rdev->wiphy);
+
/* If we finished CAC or received radar, we should end any
* CAC running on the same channels.
* the check !cfg80211_chandef_dfs_usable contain 2 options:
@@ -4300,12 +4302,12 @@ static int __init regulatory_init_db(void)
* in that case, don't try to do any further work here as
* it's doomed to lead to crashes.
*/
- if (IS_ERR_OR_NULL(reg_pdev))
+ if (!reg_fdev)
return -EINVAL;
err = load_builtin_regdb_keys();
if (err) {
- platform_device_unregister(reg_pdev);
+ faux_device_destroy(reg_fdev);
return err;
}
@@ -4313,7 +4315,7 @@ static int __init regulatory_init_db(void)
err = regulatory_hint_core(cfg80211_world_regdom->alpha2);
if (err) {
if (err == -ENOMEM) {
- platform_device_unregister(reg_pdev);
+ faux_device_destroy(reg_fdev);
return err;
}
/*
@@ -4342,9 +4344,9 @@ late_initcall(regulatory_init_db);
int __init regulatory_init(void)
{
- reg_pdev = platform_device_register_simple("regulatory", 0, NULL, 0);
- if (IS_ERR(reg_pdev))
- return PTR_ERR(reg_pdev);
+ reg_fdev = faux_device_create("regulatory", NULL, NULL);
+ if (!reg_fdev)
+ return -ENODEV;
rcu_assign_pointer(cfg80211_regdomain, cfg80211_world_regdom);
@@ -4372,9 +4374,9 @@ void regulatory_exit(void)
reset_regdomains(true, NULL);
rtnl_unlock();
- dev_set_uevent_suppress(&reg_pdev->dev, true);
+ dev_set_uevent_suppress(&reg_fdev->dev, true);
- platform_device_unregister(reg_pdev);
+ faux_device_destroy(reg_fdev);
list_for_each_entry_safe(reg_beacon, btmp, &reg_pending_beacons, list) {
list_del(&reg_beacon->list);
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index e8a4fe44ec2d..a8339ed52404 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -782,9 +782,9 @@ cfg80211_parse_colocated_ap(const struct cfg80211_bss_ies *ies,
}
EXPORT_SYMBOL_IF_CFG80211_KUNIT(cfg80211_parse_colocated_ap);
-static void cfg80211_scan_req_add_chan(struct cfg80211_scan_request *request,
- struct ieee80211_channel *chan,
- bool add_to_6ghz)
+static void cfg80211_scan_req_add_chan(struct cfg80211_scan_request *request,
+ struct ieee80211_channel *chan,
+ bool add_to_6ghz)
{
int i;
u32 n_channels = request->n_channels;
@@ -838,30 +838,32 @@ static bool cfg80211_find_ssid_match(struct cfg80211_colocated_ap *ap,
return false;
}
-static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev)
+static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev,
+ bool first_part)
{
u8 i;
struct cfg80211_colocated_ap *ap;
int n_channels, count = 0, err;
- struct cfg80211_scan_request *request, *rdev_req = rdev->scan_req;
+ struct cfg80211_scan_request_int *request, *rdev_req = rdev->scan_req;
LIST_HEAD(coloc_ap_list);
bool need_scan_psc = true;
const struct ieee80211_sband_iftype_data *iftd;
size_t size, offs_ssids, offs_6ghz_params, offs_ies;
- rdev_req->scan_6ghz = true;
+ rdev_req->req.scan_6ghz = true;
+ rdev_req->req.first_part = first_part;
if (!rdev->wiphy.bands[NL80211_BAND_6GHZ])
return -EOPNOTSUPP;
iftd = ieee80211_get_sband_iftype_data(rdev->wiphy.bands[NL80211_BAND_6GHZ],
- rdev_req->wdev->iftype);
+ rdev_req->req.wdev->iftype);
if (!iftd || !iftd->he_cap.has_he)
return -EOPNOTSUPP;
n_channels = rdev->wiphy.bands[NL80211_BAND_6GHZ]->n_channels;
- if (rdev_req->flags & NL80211_SCAN_FLAG_COLOCATED_6GHZ) {
+ if (rdev_req->req.flags & NL80211_SCAN_FLAG_COLOCATED_6GHZ) {
struct cfg80211_internal_bss *intbss;
spin_lock_bh(&rdev->bss_lock);
@@ -883,8 +885,8 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev)
* This is relevant for ML probe requests when the lower
* band APs have not been discovered.
*/
- if (is_broadcast_ether_addr(rdev_req->bssid) ||
- !ether_addr_equal(rdev_req->bssid, res->bssid) ||
+ if (is_broadcast_ether_addr(rdev_req->req.bssid) ||
+ !ether_addr_equal(rdev_req->req.bssid, res->bssid) ||
res->channel->band != NL80211_BAND_6GHZ)
continue;
@@ -911,13 +913,13 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev)
spin_unlock_bh(&rdev->bss_lock);
}
- size = struct_size(request, channels, n_channels);
+ size = struct_size(request, req.channels, n_channels);
offs_ssids = size;
- size += sizeof(*request->ssids) * rdev_req->n_ssids;
+ size += sizeof(*request->req.ssids) * rdev_req->req.n_ssids;
offs_6ghz_params = size;
- size += sizeof(*request->scan_6ghz_params) * count;
+ size += sizeof(*request->req.scan_6ghz_params) * count;
offs_ies = size;
- size += rdev_req->ie_len;
+ size += rdev_req->req.ie_len;
request = kzalloc(size, GFP_KERNEL);
if (!request) {
@@ -926,26 +928,26 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev)
}
*request = *rdev_req;
- request->n_channels = 0;
- request->n_6ghz_params = 0;
- if (rdev_req->n_ssids) {
+ request->req.n_channels = 0;
+ request->req.n_6ghz_params = 0;
+ if (rdev_req->req.n_ssids) {
/*
* Add the ssids from the parent scan request to the new
* scan request, so the driver would be able to use them
* in its probe requests to discover hidden APs on PSC
* channels.
*/
- request->ssids = (void *)request + offs_ssids;
- memcpy(request->ssids, rdev_req->ssids,
- sizeof(*request->ssids) * request->n_ssids);
+ request->req.ssids = (void *)request + offs_ssids;
+ memcpy(request->req.ssids, rdev_req->req.ssids,
+ sizeof(*request->req.ssids) * request->req.n_ssids);
}
- request->scan_6ghz_params = (void *)request + offs_6ghz_params;
+ request->req.scan_6ghz_params = (void *)request + offs_6ghz_params;
- if (rdev_req->ie_len) {
+ if (rdev_req->req.ie_len) {
void *ie = (void *)request + offs_ies;
- memcpy(ie, rdev_req->ie, rdev_req->ie_len);
- request->ie = ie;
+ memcpy(ie, rdev_req->req.ie, rdev_req->req.ie_len);
+ request->req.ie = ie;
}
/*
@@ -953,10 +955,12 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev)
* and at least one of the reported co-located APs with same SSID
* indicating that all APs in the same ESS are co-located
*/
- if (count && request->n_ssids == 1 && request->ssids[0].ssid_len) {
+ if (count &&
+ request->req.n_ssids == 1 &&
+ request->req.ssids[0].ssid_len) {
list_for_each_entry(ap, &coloc_ap_list, list) {
if (ap->colocated_ess &&
- cfg80211_find_ssid_match(ap, request)) {
+ cfg80211_find_ssid_match(ap, &request->req)) {
need_scan_psc = false;
break;
}
@@ -968,51 +972,52 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev)
* regardless of the collocated APs (PSC channels or all channels
* in case that NL80211_SCAN_FLAG_COLOCATED_6GHZ is not set)
*/
- for (i = 0; i < rdev_req->n_channels; i++) {
- if (rdev_req->channels[i]->band == NL80211_BAND_6GHZ &&
+ for (i = 0; i < rdev_req->req.n_channels; i++) {
+ if (rdev_req->req.channels[i]->band == NL80211_BAND_6GHZ &&
((need_scan_psc &&
- cfg80211_channel_is_psc(rdev_req->channels[i])) ||
- !(rdev_req->flags & NL80211_SCAN_FLAG_COLOCATED_6GHZ))) {
- cfg80211_scan_req_add_chan(request,
- rdev_req->channels[i],
+ cfg80211_channel_is_psc(rdev_req->req.channels[i])) ||
+ !(rdev_req->req.flags & NL80211_SCAN_FLAG_COLOCATED_6GHZ))) {
+ cfg80211_scan_req_add_chan(&request->req,
+ rdev_req->req.channels[i],
false);
}
}
- if (!(rdev_req->flags & NL80211_SCAN_FLAG_COLOCATED_6GHZ))
+ if (!(rdev_req->req.flags & NL80211_SCAN_FLAG_COLOCATED_6GHZ))
goto skip;
list_for_each_entry(ap, &coloc_ap_list, list) {
bool found = false;
struct cfg80211_scan_6ghz_params *scan_6ghz_params =
- &request->scan_6ghz_params[request->n_6ghz_params];
+ &request->req.scan_6ghz_params[request->req.n_6ghz_params];
struct ieee80211_channel *chan =
ieee80211_get_channel(&rdev->wiphy, ap->center_freq);
if (!chan || chan->flags & IEEE80211_CHAN_DISABLED ||
- !cfg80211_wdev_channel_allowed(rdev_req->wdev, chan))
+ !cfg80211_wdev_channel_allowed(rdev_req->req.wdev, chan))
continue;
- for (i = 0; i < rdev_req->n_channels; i++) {
- if (rdev_req->channels[i] == chan)
+ for (i = 0; i < rdev_req->req.n_channels; i++) {
+ if (rdev_req->req.channels[i] == chan)
found = true;
}
if (!found)
continue;
- if (request->n_ssids > 0 &&
- !cfg80211_find_ssid_match(ap, request))
+ if (request->req.n_ssids > 0 &&
+ !cfg80211_find_ssid_match(ap, &request->req))
continue;
- if (!is_broadcast_ether_addr(request->bssid) &&
- !ether_addr_equal(request->bssid, ap->bssid))
+ if (!is_broadcast_ether_addr(request->req.bssid) &&
+ !ether_addr_equal(request->req.bssid, ap->bssid))
continue;
- if (!request->n_ssids && ap->multi_bss && !ap->transmitted_bssid)
+ if (!request->req.n_ssids && ap->multi_bss &&
+ !ap->transmitted_bssid)
continue;
- cfg80211_scan_req_add_chan(request, chan, true);
+ cfg80211_scan_req_add_chan(&request->req, chan, true);
memcpy(scan_6ghz_params->bssid, ap->bssid, ETH_ALEN);
scan_6ghz_params->short_ssid = ap->short_ssid;
scan_6ghz_params->short_ssid_valid = ap->short_ssid_valid;
@@ -1028,14 +1033,14 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev)
if (cfg80211_channel_is_psc(chan) && !need_scan_psc)
scan_6ghz_params->psc_no_listen = true;
- request->n_6ghz_params++;
+ request->req.n_6ghz_params++;
}
skip:
cfg80211_free_coloc_ap_list(&coloc_ap_list);
- if (request->n_channels) {
- struct cfg80211_scan_request *old = rdev->int_scan_req;
+ if (request->req.n_channels) {
+ struct cfg80211_scan_request_int *old = rdev->int_scan_req;
rdev->int_scan_req = request;
@@ -1043,7 +1048,7 @@ skip:
* If this scan follows a previous scan, save the scan start
* info from the first part of the scan
*/
- if (old)
+ if (!first_part && !WARN_ON(!old))
rdev->int_scan_req->info = old->info;
err = rdev_scan(rdev, request);
@@ -1063,35 +1068,39 @@ skip:
int cfg80211_scan(struct cfg80211_registered_device *rdev)
{
- struct cfg80211_scan_request *request;
- struct cfg80211_scan_request *rdev_req = rdev->scan_req;
+ struct cfg80211_scan_request_int *request;
+ struct cfg80211_scan_request_int *rdev_req = rdev->scan_req;
u32 n_channels = 0, idx, i;
- if (!(rdev->wiphy.flags & WIPHY_FLAG_SPLIT_SCAN_6GHZ))
+ if (!(rdev->wiphy.flags & WIPHY_FLAG_SPLIT_SCAN_6GHZ)) {
+ rdev_req->req.first_part = true;
return rdev_scan(rdev, rdev_req);
+ }
- for (i = 0; i < rdev_req->n_channels; i++) {
- if (rdev_req->channels[i]->band != NL80211_BAND_6GHZ)
+ for (i = 0; i < rdev_req->req.n_channels; i++) {
+ if (rdev_req->req.channels[i]->band != NL80211_BAND_6GHZ)
n_channels++;
}
if (!n_channels)
- return cfg80211_scan_6ghz(rdev);
+ return cfg80211_scan_6ghz(rdev, true);
- request = kzalloc(struct_size(request, channels, n_channels),
+ request = kzalloc(struct_size(request, req.channels, n_channels),
GFP_KERNEL);
if (!request)
return -ENOMEM;
*request = *rdev_req;
- request->n_channels = n_channels;
+ request->req.n_channels = n_channels;
- for (i = idx = 0; i < rdev_req->n_channels; i++) {
- if (rdev_req->channels[i]->band != NL80211_BAND_6GHZ)
- request->channels[idx++] = rdev_req->channels[i];
+ for (i = idx = 0; i < rdev_req->req.n_channels; i++) {
+ if (rdev_req->req.channels[i]->band != NL80211_BAND_6GHZ)
+ request->req.channels[idx++] =
+ rdev_req->req.channels[i];
}
- rdev_req->scan_6ghz = false;
+ rdev_req->req.scan_6ghz = false;
+ rdev_req->req.first_part = true;
rdev->int_scan_req = request;
return rdev_scan(rdev, request);
}
@@ -1099,7 +1108,7 @@ int cfg80211_scan(struct cfg80211_registered_device *rdev)
void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev,
bool send_message)
{
- struct cfg80211_scan_request *request, *rdev_req;
+ struct cfg80211_scan_request_int *request, *rdev_req;
struct wireless_dev *wdev;
struct sk_buff *msg;
#ifdef CONFIG_CFG80211_WEXT
@@ -1118,13 +1127,13 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev,
if (!rdev_req)
return;
- wdev = rdev_req->wdev;
+ wdev = rdev_req->req.wdev;
request = rdev->int_scan_req ? rdev->int_scan_req : rdev_req;
if (wdev_running(wdev) &&
(rdev->wiphy.flags & WIPHY_FLAG_SPLIT_SCAN_6GHZ) &&
- !rdev_req->scan_6ghz && !request->info.aborted &&
- !cfg80211_scan_6ghz(rdev))
+ !rdev_req->req.scan_6ghz && !request->info.aborted &&
+ !cfg80211_scan_6ghz(rdev, false))
return;
/*
@@ -1136,10 +1145,10 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev,
cfg80211_sme_scan_done(wdev->netdev);
if (!request->info.aborted &&
- request->flags & NL80211_SCAN_FLAG_FLUSH) {
+ request->req.flags & NL80211_SCAN_FLAG_FLUSH) {
/* flush entries from previous scans */
spin_lock_bh(&rdev->bss_lock);
- __cfg80211_bss_expire(rdev, request->scan_start);
+ __cfg80211_bss_expire(rdev, request->req.scan_start);
spin_unlock_bh(&rdev->bss_lock);
}
@@ -1175,13 +1184,16 @@ void __cfg80211_scan_done(struct wiphy *wiphy, struct wiphy_work *wk)
void cfg80211_scan_done(struct cfg80211_scan_request *request,
struct cfg80211_scan_info *info)
{
- struct cfg80211_scan_info old_info = request->info;
+ struct cfg80211_scan_request_int *intreq =
+ container_of(request, struct cfg80211_scan_request_int, req);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(request->wiphy);
+ struct cfg80211_scan_info old_info = intreq->info;
- trace_cfg80211_scan_done(request, info);
- WARN_ON(request != wiphy_to_rdev(request->wiphy)->scan_req &&
- request != wiphy_to_rdev(request->wiphy)->int_scan_req);
+ trace_cfg80211_scan_done(intreq, info);
+ WARN_ON(intreq != rdev->scan_req &&
+ intreq != rdev->int_scan_req);
- request->info = *info;
+ intreq->info = *info;
/*
* In case the scan is split, the scan_start_tsf and tsf_bssid should
@@ -1189,14 +1201,13 @@ void cfg80211_scan_done(struct cfg80211_scan_request *request,
* be non zero.
*/
if (request->scan_6ghz && old_info.scan_start_tsf) {
- request->info.scan_start_tsf = old_info.scan_start_tsf;
- memcpy(request->info.tsf_bssid, old_info.tsf_bssid,
- sizeof(request->info.tsf_bssid));
+ intreq->info.scan_start_tsf = old_info.scan_start_tsf;
+ memcpy(intreq->info.tsf_bssid, old_info.tsf_bssid,
+ sizeof(intreq->info.tsf_bssid));
}
- request->notified = true;
- wiphy_work_queue(request->wiphy,
- &wiphy_to_rdev(request->wiphy)->scan_done_wk);
+ intreq->notified = true;
+ wiphy_work_queue(request->wiphy, &rdev->scan_done_wk);
}
EXPORT_SYMBOL(cfg80211_scan_done);
@@ -2220,6 +2231,7 @@ cfg80211_get_6ghz_power_type(const u8 *elems, size_t elems_len)
return IEEE80211_REG_LPI_AP;
case IEEE80211_6GHZ_CTRL_REG_SP_AP:
case IEEE80211_6GHZ_CTRL_REG_INDOOR_SP_AP:
+ case IEEE80211_6GHZ_CTRL_REG_INDOOR_SP_AP_OLD:
return IEEE80211_REG_SP_AP;
case IEEE80211_6GHZ_CTRL_REG_VLP_AP:
return IEEE80211_REG_VLP_AP;
@@ -3496,7 +3508,7 @@ int cfg80211_wext_siwscan(struct net_device *dev,
struct cfg80211_registered_device *rdev;
struct wiphy *wiphy;
struct iw_scan_req *wreq = NULL;
- struct cfg80211_scan_request *creq;
+ struct cfg80211_scan_request_int *creq;
int i, err, n_channels = 0;
enum nl80211_band band;
@@ -3526,19 +3538,20 @@ int cfg80211_wext_siwscan(struct net_device *dev,
n_channels = ieee80211_get_num_supported_channels(wiphy);
}
- creq = kzalloc(struct_size(creq, channels, n_channels) +
+ creq = kzalloc(struct_size(creq, req.channels, n_channels) +
sizeof(struct cfg80211_ssid),
GFP_ATOMIC);
if (!creq)
return -ENOMEM;
- creq->wiphy = wiphy;
- creq->wdev = dev->ieee80211_ptr;
+ creq->req.wiphy = wiphy;
+ creq->req.wdev = dev->ieee80211_ptr;
/* SSIDs come after channels */
- creq->ssids = (void *)creq + struct_size(creq, channels, n_channels);
- creq->n_channels = n_channels;
- creq->n_ssids = 1;
- creq->scan_start = jiffies;
+ creq->req.ssids = (void *)creq +
+ struct_size(creq, req.channels, n_channels);
+ creq->req.n_channels = n_channels;
+ creq->req.n_ssids = 1;
+ creq->req.scan_start = jiffies;
/* translate "Scan on frequencies" request */
i = 0;
@@ -3554,7 +3567,7 @@ int cfg80211_wext_siwscan(struct net_device *dev,
/* ignore disabled channels */
chan = &wiphy->bands[band]->channels[j];
if (chan->flags & IEEE80211_CHAN_DISABLED ||
- !cfg80211_wdev_channel_allowed(creq->wdev, chan))
+ !cfg80211_wdev_channel_allowed(creq->req.wdev, chan))
continue;
/* If we have a wireless request structure and the
@@ -3577,7 +3590,8 @@ int cfg80211_wext_siwscan(struct net_device *dev,
}
wext_freq_found:
- creq->channels[i] = &wiphy->bands[band]->channels[j];
+ creq->req.channels[i] =
+ &wiphy->bands[band]->channels[j];
i++;
wext_freq_not_found: ;
}
@@ -3588,28 +3602,30 @@ int cfg80211_wext_siwscan(struct net_device *dev,
goto out;
}
- /* Set real number of channels specified in creq->channels[] */
- creq->n_channels = i;
+ /* Set real number of channels specified in creq->req.channels[] */
+ creq->req.n_channels = i;
/* translate "Scan for SSID" request */
if (wreq) {
if (wrqu->data.flags & IW_SCAN_THIS_ESSID) {
if (wreq->essid_len > IEEE80211_MAX_SSID_LEN)
return -EINVAL;
- memcpy(creq->ssids[0].ssid, wreq->essid, wreq->essid_len);
- creq->ssids[0].ssid_len = wreq->essid_len;
+ memcpy(creq->req.ssids[0].ssid, wreq->essid,
+ wreq->essid_len);
+ creq->req.ssids[0].ssid_len = wreq->essid_len;
}
if (wreq->scan_type == IW_SCAN_TYPE_PASSIVE) {
- creq->ssids = NULL;
- creq->n_ssids = 0;
+ creq->req.ssids = NULL;
+ creq->req.n_ssids = 0;
}
}
for (i = 0; i < NUM_NL80211_BANDS; i++)
if (wiphy->bands[i])
- creq->rates[i] = (1 << wiphy->bands[i]->n_bitrates) - 1;
+ creq->req.rates[i] =
+ (1 << wiphy->bands[i]->n_bitrates) - 1;
- eth_broadcast_addr(creq->bssid);
+ eth_broadcast_addr(creq->req.bssid);
scoped_guard(wiphy, &rdev->wiphy) {
rdev->scan_req = creq;
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index cf998500a965..826ec0a6355f 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -5,7 +5,7 @@
* (for nl80211's connect() and wext)
*
* Copyright 2009 Johannes Berg <johannes@sipsolutions.net>
- * Copyright (C) 2009, 2020, 2022-2024 Intel Corporation. All rights reserved.
+ * Copyright (C) 2009, 2020, 2022-2025 Intel Corporation. All rights reserved.
* Copyright 2017 Intel Deutschland GmbH
*/
@@ -64,7 +64,7 @@ static void cfg80211_sme_free(struct wireless_dev *wdev)
static int cfg80211_conn_scan(struct wireless_dev *wdev)
{
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
- struct cfg80211_scan_request *request;
+ struct cfg80211_scan_request_int *request;
int n_channels, err;
lockdep_assert_wiphy(wdev->wiphy);
@@ -77,13 +77,12 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev)
else
n_channels = ieee80211_get_num_supported_channels(wdev->wiphy);
- request = kzalloc(sizeof(*request) + sizeof(request->ssids[0]) +
- sizeof(request->channels[0]) * n_channels,
+ request = kzalloc(sizeof(*request) + sizeof(request->req.ssids[0]) +
+ sizeof(request->req.channels[0]) * n_channels,
GFP_KERNEL);
if (!request)
return -ENOMEM;
- request->n_channels = n_channels;
if (wdev->conn->params.channel) {
enum nl80211_band band = wdev->conn->params.channel->band;
struct ieee80211_supported_band *sband =
@@ -93,8 +92,8 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev)
kfree(request);
return -EINVAL;
}
- request->channels[0] = wdev->conn->params.channel;
- request->rates[band] = (1 << sband->n_bitrates) - 1;
+ request->req.channels[0] = wdev->conn->params.channel;
+ request->req.rates[band] = (1 << sband->n_bitrates) - 1;
} else {
int i = 0, j;
enum nl80211_band band;
@@ -109,26 +108,26 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev)
channel = &bands->channels[j];
if (channel->flags & IEEE80211_CHAN_DISABLED)
continue;
- request->channels[i++] = channel;
+ request->req.channels[i++] = channel;
}
- request->rates[band] = (1 << bands->n_bitrates) - 1;
+ request->req.rates[band] = (1 << bands->n_bitrates) - 1;
}
n_channels = i;
}
- request->n_channels = n_channels;
- request->ssids = (void *)request +
- struct_size(request, channels, n_channels);
- request->n_ssids = 1;
+ request->req.n_channels = n_channels;
+ request->req.ssids = (void *)request +
+ struct_size(request, req.channels, n_channels);
+ request->req.n_ssids = 1;
- memcpy(request->ssids[0].ssid, wdev->conn->params.ssid,
- wdev->conn->params.ssid_len);
- request->ssids[0].ssid_len = wdev->conn->params.ssid_len;
+ memcpy(request->req.ssids[0].ssid, wdev->conn->params.ssid,
+ wdev->conn->params.ssid_len);
+ request->req.ssids[0].ssid_len = wdev->conn->params.ssid_len;
- eth_broadcast_addr(request->bssid);
+ eth_broadcast_addr(request->req.bssid);
- request->wdev = wdev;
- request->wiphy = &rdev->wiphy;
- request->scan_start = jiffies;
+ request->req.wdev = wdev;
+ request->req.wiphy = &rdev->wiphy;
+ request->req.scan_start = jiffies;
rdev->scan_req = request;
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 4ed9fada4ec0..34c584a215e5 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -373,7 +373,8 @@ TRACE_EVENT(rdev_return_int,
);
TRACE_EVENT(rdev_scan,
- TP_PROTO(struct wiphy *wiphy, struct cfg80211_scan_request *request),
+ TP_PROTO(struct wiphy *wiphy,
+ struct cfg80211_scan_request_int *request),
TP_ARGS(wiphy, request),
TP_STRUCT__entry(
WIPHY_ENTRY
@@ -406,9 +407,19 @@ DEFINE_EVENT(wiphy_only_evt, rdev_return_void,
TP_ARGS(wiphy)
);
-DEFINE_EVENT(wiphy_only_evt, rdev_get_antenna,
- TP_PROTO(struct wiphy *wiphy),
- TP_ARGS(wiphy)
+TRACE_EVENT(rdev_get_antenna,
+ TP_PROTO(struct wiphy *wiphy, int radio_idx),
+ TP_ARGS(wiphy, radio_idx),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ __field(int, radio_idx)
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ __entry->radio_idx = radio_idx;
+ ),
+ TP_printk(WIPHY_PR_FMT ", radio_idx: %d",
+ WIPHY_PR_ARG, __entry->radio_idx)
);
DEFINE_EVENT(wiphy_only_evt, rdev_rfkill_poll,
@@ -1678,18 +1689,20 @@ TRACE_EVENT(rdev_join_ocb,
);
TRACE_EVENT(rdev_set_wiphy_params,
- TP_PROTO(struct wiphy *wiphy, u32 changed),
- TP_ARGS(wiphy, changed),
+ TP_PROTO(struct wiphy *wiphy, int radio_idx, u32 changed),
+ TP_ARGS(wiphy, radio_idx, changed),
TP_STRUCT__entry(
WIPHY_ENTRY
+ __field(int, radio_idx)
__field(u32, changed)
),
TP_fast_assign(
WIPHY_ASSIGN;
+ __entry->radio_idx = radio_idx;
__entry->changed = changed;
),
- TP_printk(WIPHY_PR_FMT ", changed: %u",
- WIPHY_PR_ARG, __entry->changed)
+ TP_printk(WIPHY_PR_FMT ", radio_idx: %d, changed: %u",
+ WIPHY_PR_ARG, __entry->radio_idx, __entry->changed)
);
DECLARE_EVENT_CLASS(wiphy_wdev_link_evt,
@@ -1710,30 +1723,51 @@ DECLARE_EVENT_CLASS(wiphy_wdev_link_evt,
WIPHY_PR_ARG, WDEV_PR_ARG, __entry->link_id)
);
-DEFINE_EVENT(wiphy_wdev_link_evt, rdev_get_tx_power,
+TRACE_EVENT(rdev_get_tx_power,
TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev,
- unsigned int link_id),
- TP_ARGS(wiphy, wdev, link_id)
+ int radio_idx, unsigned int link_id),
+ TP_ARGS(wiphy, wdev, radio_idx, link_id),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ WDEV_ENTRY
+ __field(int, radio_idx)
+ __field(unsigned int, link_id)
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ WDEV_ASSIGN;
+ __entry->radio_idx = radio_idx;
+ __entry->link_id = link_id;
+ ),
+ TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT
+ ", radio_idx: %d, link_id: %u",
+ WIPHY_PR_ARG, WDEV_PR_ARG,
+ __entry->radio_idx, __entry->link_id)
);
TRACE_EVENT(rdev_set_tx_power,
TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev,
- enum nl80211_tx_power_setting type, int mbm),
- TP_ARGS(wiphy, wdev, type, mbm),
+ int radio_idx, enum nl80211_tx_power_setting type,
+ int mbm),
+ TP_ARGS(wiphy, wdev, radio_idx, type, mbm),
TP_STRUCT__entry(
WIPHY_ENTRY
WDEV_ENTRY
+ __field(int, radio_idx)
__field(enum nl80211_tx_power_setting, type)
__field(int, mbm)
),
TP_fast_assign(
WIPHY_ASSIGN;
WDEV_ASSIGN;
+ __entry->radio_idx = radio_idx;
__entry->type = type;
__entry->mbm = mbm;
),
- TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", type: %u, mbm: %d",
- WIPHY_PR_ARG, WDEV_PR_ARG,__entry->type, __entry->mbm)
+ TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT
+ ", radio_idx: %d, type: %u, mbm: %d",
+ WIPHY_PR_ARG, WDEV_PR_ARG,
+ __entry->radio_idx, __entry->type, __entry->mbm)
);
TRACE_EVENT(rdev_return_int_int,
@@ -1866,26 +1900,24 @@ TRACE_EVENT(rdev_return_void_tx_rx,
__entry->rx_max)
);
-DECLARE_EVENT_CLASS(tx_rx_evt,
- TP_PROTO(struct wiphy *wiphy, u32 tx, u32 rx),
- TP_ARGS(wiphy, tx, rx),
+TRACE_EVENT(rdev_set_antenna,
+ TP_PROTO(struct wiphy *wiphy, int radio_idx, u32 tx, u32 rx),
+ TP_ARGS(wiphy, radio_idx, tx, rx),
TP_STRUCT__entry(
WIPHY_ENTRY
+ __field(int, radio_idx)
__field(u32, tx)
__field(u32, rx)
),
TP_fast_assign(
WIPHY_ASSIGN;
+ __entry->radio_idx = radio_idx;
__entry->tx = tx;
__entry->rx = rx;
),
- TP_printk(WIPHY_PR_FMT ", tx: %u, rx: %u ",
- WIPHY_PR_ARG, __entry->tx, __entry->rx)
-);
-
-DEFINE_EVENT(tx_rx_evt, rdev_set_antenna,
- TP_PROTO(struct wiphy *wiphy, u32 tx, u32 rx),
- TP_ARGS(wiphy, tx, rx)
+ TP_printk(WIPHY_PR_FMT ", radio_idx: %d, tx: %u, rx: %u ",
+ WIPHY_PR_ARG, __entry->radio_idx,
+ __entry->tx, __entry->rx)
);
DECLARE_EVENT_CLASS(wiphy_netdev_id_evt,
@@ -3538,27 +3570,30 @@ TRACE_EVENT(cfg80211_cac_event,
);
DECLARE_EVENT_CLASS(cfg80211_rx_evt,
- TP_PROTO(struct net_device *netdev, const u8 *addr),
- TP_ARGS(netdev, addr),
+ TP_PROTO(struct net_device *netdev, const u8 *addr, int link_id),
+ TP_ARGS(netdev, addr, link_id),
TP_STRUCT__entry(
NETDEV_ENTRY
MAC_ENTRY(addr)
+ __field(int, link_id)
),
TP_fast_assign(
NETDEV_ASSIGN;
MAC_ASSIGN(addr, addr);
+ __entry->link_id = link_id;
),
- TP_printk(NETDEV_PR_FMT ", %pM", NETDEV_PR_ARG, __entry->addr)
+ TP_printk(NETDEV_PR_FMT ", %pM, link_id:%d", NETDEV_PR_ARG,
+ __entry->addr, __entry->link_id)
);
DEFINE_EVENT(cfg80211_rx_evt, cfg80211_rx_spurious_frame,
- TP_PROTO(struct net_device *netdev, const u8 *addr),
- TP_ARGS(netdev, addr)
+ TP_PROTO(struct net_device *netdev, const u8 *addr, int link_id),
+ TP_ARGS(netdev, addr, link_id)
);
DEFINE_EVENT(cfg80211_rx_evt, cfg80211_rx_unexpected_4addr_frame,
- TP_PROTO(struct net_device *netdev, const u8 *addr),
- TP_ARGS(netdev, addr)
+ TP_PROTO(struct net_device *netdev, const u8 *addr, int link_id),
+ TP_ARGS(netdev, addr, link_id)
);
TRACE_EVENT(cfg80211_ibss_joined,
@@ -3685,12 +3720,12 @@ TRACE_EVENT(cfg80211_tdls_oper_request,
);
TRACE_EVENT(cfg80211_scan_done,
- TP_PROTO(struct cfg80211_scan_request *request,
+ TP_PROTO(struct cfg80211_scan_request_int *request,
struct cfg80211_scan_info *info),
TP_ARGS(request, info),
TP_STRUCT__entry(
__field(u32, n_channels)
- __dynamic_array(u8, ie, request ? request->ie_len : 0)
+ __dynamic_array(u8, ie, request ? request->req.ie_len : 0)
__array(u32, rates, NUM_NL80211_BANDS)
__field(u32, wdev_id)
MAC_ENTRY(wiphy_mac)
@@ -3701,16 +3736,16 @@ TRACE_EVENT(cfg80211_scan_done,
),
TP_fast_assign(
if (request) {
- memcpy(__get_dynamic_array(ie), request->ie,
- request->ie_len);
- memcpy(__entry->rates, request->rates,
+ memcpy(__get_dynamic_array(ie), request->req.ie,
+ request->req.ie_len);
+ memcpy(__entry->rates, request->req.rates,
NUM_NL80211_BANDS);
- __entry->wdev_id = request->wdev ?
- request->wdev->identifier : 0;
- if (request->wiphy)
+ __entry->wdev_id = request->req.wdev ?
+ request->req.wdev->identifier : 0;
+ if (request->req.wiphy)
MAC_ASSIGN(wiphy_mac,
- request->wiphy->perm_addr);
- __entry->no_cck = request->no_cck;
+ request->req.wiphy->perm_addr);
+ __entry->no_cck = request->req.no_cck;
}
if (info) {
__entry->aborted = info->aborted;
@@ -4126,20 +4161,22 @@ TRACE_EVENT(cfg80211_links_removed,
TRACE_EVENT(cfg80211_mlo_reconf_add_done,
TP_PROTO(struct net_device *netdev, u16 link_mask,
- const u8 *buf, size_t len),
- TP_ARGS(netdev, link_mask, buf, len),
+ const u8 *buf, size_t len, bool driver_initiated),
+ TP_ARGS(netdev, link_mask, buf, len, driver_initiated),
TP_STRUCT__entry(
NETDEV_ENTRY
__field(u16, link_mask)
__dynamic_array(u8, buf, len)
+ __field(bool, driver_initiated)
),
TP_fast_assign(
NETDEV_ASSIGN;
__entry->link_mask = link_mask;
memcpy(__get_dynamic_array(buf), buf, len);
+ __entry->driver_initiated = driver_initiated;
),
- TP_printk(NETDEV_PR_FMT ", link_mask:0x%x",
- NETDEV_PR_ARG, __entry->link_mask)
+ TP_printk(NETDEV_PR_FMT ", link_mask:0x%x, driver_initiated:%d",
+ NETDEV_PR_ARG, __entry->link_mask, __entry->driver_initiated)
);
TRACE_EVENT(rdev_assoc_ml_reconf,
diff --git a/net/wireless/util.c b/net/wireless/util.c
index ed868c0f7ca8..240c68baa3d1 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -820,6 +820,52 @@ bool ieee80211_is_valid_amsdu(struct sk_buff *skb, u8 mesh_hdr)
}
EXPORT_SYMBOL(ieee80211_is_valid_amsdu);
+
+/*
+ * Detects if an MSDU frame was maliciously converted into an A-MSDU
+ * frame by an adversary. This is done by parsing the received frame
+ * as if it were a regular MSDU, even though the A-MSDU flag is set.
+ *
+ * For non-mesh interfaces, detection involves checking whether the
+ * payload, when interpreted as an MSDU, begins with a valid RFC1042
+ * header. This is done by comparing the A-MSDU subheader's destination
+ * address to the start of the RFC1042 header.
+ *
+ * For mesh interfaces, the MSDU includes a 6-byte Mesh Control field
+ * and an optional variable-length Mesh Address Extension field before
+ * the RFC1042 header. The position of the RFC1042 header must therefore
+ * be calculated based on the mesh header length.
+ *
+ * Since this function intentionally parses an A-MSDU frame as an MSDU,
+ * it only assumes that the A-MSDU subframe header is present, and
+ * beyond this it performs its own bounds checks under the assumption
+ * that the frame is instead parsed as a non-aggregated MSDU.
+ */
+static bool
+is_amsdu_aggregation_attack(struct ethhdr *eth, struct sk_buff *skb,
+ enum nl80211_iftype iftype)
+{
+ int offset;
+
+ /* Non-mesh case can be directly compared */
+ if (iftype != NL80211_IFTYPE_MESH_POINT)
+ return ether_addr_equal(eth->h_dest, rfc1042_header);
+
+ offset = __ieee80211_get_mesh_hdrlen(eth->h_dest[0]);
+ if (offset == 6) {
+ /* Mesh case with empty address extension field */
+ return ether_addr_equal(eth->h_source, rfc1042_header);
+ } else if (offset + ETH_ALEN <= skb->len) {
+ /* Mesh case with non-empty address extension field */
+ u8 temp[ETH_ALEN];
+
+ skb_copy_bits(skb, offset, temp, ETH_ALEN);
+ return ether_addr_equal(temp, rfc1042_header);
+ }
+
+ return false;
+}
+
void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
const u8 *addr, enum nl80211_iftype iftype,
const unsigned int extra_headroom,
@@ -861,8 +907,10 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
/* the last MSDU has no padding */
if (subframe_len > remaining)
goto purge;
- /* mitigate A-MSDU aggregation injection attacks */
- if (ether_addr_equal(hdr.eth.h_dest, rfc1042_header))
+ /* mitigate A-MSDU aggregation injection attacks, to be
+ * checked when processing first subframe (offset == 0).
+ */
+ if (offset == 0 && is_amsdu_aggregation_attack(&hdr.eth, skb, iftype))
goto purge;
offset += sizeof(struct ethhdr);
@@ -2516,6 +2564,30 @@ int cfg80211_check_combinations(struct wiphy *wiphy,
}
EXPORT_SYMBOL(cfg80211_check_combinations);
+int cfg80211_get_radio_idx_by_chan(struct wiphy *wiphy,
+ const struct ieee80211_channel *chan)
+{
+ const struct wiphy_radio *radio;
+ int i, j;
+ u32 freq;
+
+ if (!chan)
+ return -EINVAL;
+
+ freq = ieee80211_channel_to_khz(chan);
+ for (i = 0; i < wiphy->n_radio; i++) {
+ radio = &wiphy->radio[i];
+ for (j = 0; j < radio->n_freq_range; j++) {
+ if (freq >= radio->freq_range[j].start_freq &&
+ freq < radio->freq_range[j].end_freq)
+ return i;
+ }
+ }
+
+ return -ENOENT;
+}
+EXPORT_SYMBOL(cfg80211_get_radio_idx_by_chan);
+
int ieee80211_get_ratemask(struct ieee80211_supported_band *sband,
const u8 *rates, unsigned int n_rates,
u32 *mask)
@@ -2626,6 +2698,18 @@ bool cfg80211_does_bw_fit_range(const struct ieee80211_freq_range *freq_range,
return false;
}
+int cfg80211_link_sinfo_alloc_tid_stats(struct link_station_info *link_sinfo,
+ gfp_t gfp)
+{
+ link_sinfo->pertid = kcalloc(IEEE80211_NUM_TIDS + 1,
+ sizeof(*link_sinfo->pertid), gfp);
+ if (!link_sinfo->pertid)
+ return -ENOMEM;
+
+ return 0;
+}
+EXPORT_SYMBOL(cfg80211_link_sinfo_alloc_tid_stats);
+
int cfg80211_sinfo_alloc_tid_stats(struct station_info *sinfo, gfp_t gfp)
{
sinfo->pertid = kcalloc(IEEE80211_NUM_TIDS + 1,
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index a74b1afc594e..1241fda78a68 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -263,7 +263,7 @@ int cfg80211_wext_siwrts(struct net_device *dev,
else
wdev->wiphy->rts_threshold = rts->value;
- err = rdev_set_wiphy_params(rdev, WIPHY_PARAM_RTS_THRESHOLD);
+ err = rdev_set_wiphy_params(rdev, -1, WIPHY_PARAM_RTS_THRESHOLD);
if (err)
wdev->wiphy->rts_threshold = orts;
return err;
@@ -304,7 +304,7 @@ int cfg80211_wext_siwfrag(struct net_device *dev,
wdev->wiphy->frag_threshold = frag->value & ~0x1;
}
- err = rdev_set_wiphy_params(rdev, WIPHY_PARAM_FRAG_THRESHOLD);
+ err = rdev_set_wiphy_params(rdev, -1, WIPHY_PARAM_FRAG_THRESHOLD);
if (err)
wdev->wiphy->frag_threshold = ofrag;
return err;
@@ -355,7 +355,7 @@ static int cfg80211_wext_siwretry(struct net_device *dev,
changed |= WIPHY_PARAM_RETRY_SHORT;
}
- err = rdev_set_wiphy_params(rdev, changed);
+ err = rdev_set_wiphy_params(rdev, -1, changed);
if (err) {
wdev->wiphy->retry_short = oshort;
wdev->wiphy->retry_long = olong;
@@ -890,7 +890,7 @@ static int cfg80211_wext_siwtxpower(struct net_device *dev,
guard(wiphy)(&rdev->wiphy);
- return rdev_set_tx_power(rdev, wdev, type, DBM_TO_MBM(dbm));
+ return rdev_set_tx_power(rdev, wdev, -1, type, DBM_TO_MBM(dbm));
}
static int cfg80211_wext_giwtxpower(struct net_device *dev,
@@ -910,7 +910,7 @@ static int cfg80211_wext_giwtxpower(struct net_device *dev,
return -EOPNOTSUPP;
scoped_guard(wiphy, &rdev->wiphy) {
- err = rdev_get_tx_power(rdev, wdev, 0, &val);
+ err = rdev_get_tx_power(rdev, wdev, -1, 0, &val);
}
if (err)
return err;
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index bea70eb6f034..c32a7c6903d5 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -431,7 +431,7 @@ static struct nlmsghdr *rtnetlink_ifinfo_prep(struct net_device *dev,
r->__ifi_pad = 0;
r->ifi_type = dev->type;
r->ifi_index = dev->ifindex;
- r->ifi_flags = dev_get_flags(dev);
+ r->ifi_flags = netif_get_flags(dev);
r->ifi_change = 0; /* Wireless changes don't affect those flags */
if (nla_put_string(skb, IFLA_IFNAME, dev->name))
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 1f8ae9f4a3f1..655d1e0ae25f 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -891,7 +891,7 @@ static int x25_accept(struct socket *sock, struct socket *newsock,
if (sk->sk_state != TCP_LISTEN)
goto out2;
- rc = x25_wait_for_data(sk, sk->sk_rcvtimeo);
+ rc = x25_wait_for_data(sk, READ_ONCE(sk->sk_rcvtimeo));
if (rc)
goto out2;
skb = skb_dequeue(&sk->sk_receive_queue);
diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c
index 748d8630ab58..fb8ac1aa5826 100644
--- a/net/x25/x25_dev.c
+++ b/net/x25/x25_dev.c
@@ -170,28 +170,6 @@ void x25_establish_link(struct x25_neigh *nb)
dev_queue_xmit(skb);
}
-void x25_terminate_link(struct x25_neigh *nb)
-{
- struct sk_buff *skb;
- unsigned char *ptr;
-
- if (nb->dev->type != ARPHRD_X25)
- return;
-
- skb = alloc_skb(1, GFP_ATOMIC);
- if (!skb) {
- pr_err("x25_dev: out of memory\n");
- return;
- }
-
- ptr = skb_put(skb, 1);
- *ptr = X25_IFACE_DISCONNECT;
-
- skb->protocol = htons(ETH_P_X25);
- skb->dev = nb->dev;
- dev_queue_xmit(skb);
-}
-
void x25_send_frame(struct sk_buff *skb, struct x25_neigh *nb)
{
unsigned char *dptr;
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 72c000c0ae5f..9c3acecc14b1 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -34,7 +34,7 @@
#include "xsk.h"
#define TX_BATCH_SIZE 32
-#define MAX_PER_SOCKET_BUDGET (TX_BATCH_SIZE)
+#define MAX_PER_SOCKET_BUDGET 32
void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool)
{
@@ -300,6 +300,13 @@ static bool xsk_tx_writeable(struct xdp_sock *xs)
return true;
}
+static void __xsk_tx_release(struct xdp_sock *xs)
+{
+ __xskq_cons_release(xs->tx);
+ if (xsk_tx_writeable(xs))
+ xs->sk.sk_write_space(&xs->sk);
+}
+
static bool xsk_is_bound(struct xdp_sock *xs)
{
if (READ_ONCE(xs->state) == XSK_BOUND) {
@@ -407,11 +414,8 @@ void xsk_tx_release(struct xsk_buff_pool *pool)
struct xdp_sock *xs;
rcu_read_lock();
- list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) {
- __xskq_cons_release(xs->tx);
- if (xsk_tx_writeable(xs))
- xs->sk.sk_write_space(&xs->sk);
- }
+ list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list)
+ __xsk_tx_release(xs);
rcu_read_unlock();
}
EXPORT_SYMBOL(xsk_tx_release);
@@ -779,10 +783,10 @@ free_err:
static int __xsk_generic_xmit(struct sock *sk)
{
struct xdp_sock *xs = xdp_sk(sk);
- u32 max_batch = TX_BATCH_SIZE;
bool sent_frame = false;
struct xdp_desc desc;
struct sk_buff *skb;
+ u32 max_batch;
int err = 0;
mutex_lock(&xs->mutex);
@@ -796,6 +800,7 @@ static int __xsk_generic_xmit(struct sock *sk)
if (xs->queue_id >= xs->dev->real_num_tx_queues)
goto out;
+ max_batch = READ_ONCE(xs->max_tx_budget);
while (xskq_cons_peek_desc(xs->tx, &desc, xs->pool)) {
if (max_batch-- == 0) {
err = -EAGAIN;
@@ -858,8 +863,7 @@ static int __xsk_generic_xmit(struct sock *sk)
out:
if (sent_frame)
- if (xsk_tx_writeable(xs))
- sk->sk_write_space(sk);
+ __xsk_tx_release(xs);
mutex_unlock(&xs->mutex);
return err;
@@ -1437,6 +1441,21 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
mutex_unlock(&xs->mutex);
return err;
}
+ case XDP_MAX_TX_SKB_BUDGET:
+ {
+ unsigned int budget;
+
+ if (optlen != sizeof(budget))
+ return -EINVAL;
+ if (copy_from_sockptr(&budget, optval, sizeof(budget)))
+ return -EFAULT;
+ if (!xs->tx ||
+ budget < TX_BATCH_SIZE || budget > xs->tx->nentries)
+ return -EACCES;
+
+ WRITE_ONCE(xs->max_tx_budget, budget);
+ return 0;
+ }
default:
break;
}
@@ -1734,6 +1753,7 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol,
xs = xdp_sk(sk);
xs->state = XSK_READY;
+ xs->max_tx_budget = TX_BATCH_SIZE;
mutex_init(&xs->mutex);
INIT_LIST_HEAD(&xs->map_list);
diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c
index 09dcea0cbbed..0e0bca031c03 100644
--- a/net/xdp/xsk_diag.c
+++ b/net/xdp/xsk_diag.c
@@ -119,7 +119,7 @@ static int xsk_diag_fill(struct sock *sk, struct sk_buff *nlskb,
if ((req->xdiag_show & XDP_SHOW_INFO) &&
nla_put_u32(nlskb, XDP_DIAG_UID,
- from_kuid_munged(user_ns, sock_i_uid(sk))))
+ from_kuid_munged(user_ns, sk_uid(sk))))
goto out_nlmsg_trim;
if ((req->xdiag_show & XDP_SHOW_RING_CFG) &&
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 81fd486b5e56..d2819baea414 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -305,7 +305,6 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
return -EINVAL;
}
- xfrm_set_type_offload(x);
if (!x->type_offload) {
NL_SET_ERR_MSG(extack, "Type doesn't support offload");
dev_put(dev);
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 7e6a71b9d6a3..c9ddef869aa5 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -503,6 +503,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
/* An encap_type of -1 indicates async resumption. */
if (encap_type == -1) {
async = 1;
+ dev_put(skb->dev);
seq = XFRM_SKB_CB(skb)->seq.input.low;
goto resume;
}
@@ -649,18 +650,18 @@ lock:
XFRM_SKB_CB(skb)->seq.input.low = seq;
XFRM_SKB_CB(skb)->seq.input.hi = seq_hi;
- dev_hold(skb->dev);
-
- if (crypto_done)
+ if (crypto_done) {
nexthdr = x->type_offload->input_tail(x, skb);
- else
+ } else {
+ dev_hold(skb->dev);
+
nexthdr = x->type->input(x, skb);
+ if (nexthdr == -EINPROGRESS)
+ return 0;
- if (nexthdr == -EINPROGRESS)
- return 0;
+ dev_put(skb->dev);
+ }
resume:
- dev_put(skb->dev);
-
spin_lock(&x->lock);
if (nexthdr < 0) {
if (nexthdr == -EBADMSG) {
diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c
index cb1e12740c87..330a05286a56 100644
--- a/net/xfrm/xfrm_interface_core.c
+++ b/net/xfrm/xfrm_interface_core.c
@@ -875,7 +875,7 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[],
return -EINVAL;
}
- if (p.collect_md) {
+ if (p.collect_md || xi->p.collect_md) {
NL_SET_ERR_MSG(extack, "collect_md can't be changed");
return -EINVAL;
}
@@ -886,11 +886,6 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[],
} else {
if (xi->dev != dev)
return -EEXIST;
- if (xi->p.collect_md) {
- NL_SET_ERR_MSG(extack,
- "device can't be changed to collect_md");
- return -EINVAL;
- }
}
return xfrmi_update(xi, &p);
diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c
index 907c3ccb440d..43fdc6ed8dd1 100644
--- a/net/xfrm/xfrm_ipcomp.c
+++ b/net/xfrm/xfrm_ipcomp.c
@@ -97,7 +97,7 @@ static int ipcomp_input_done2(struct sk_buff *skb, int err)
struct ip_comp_hdr *ipch = ip_comp_hdr(skb);
const int plen = skb->len;
- skb_reset_transport_header(skb);
+ skb->transport_header = skb->network_header + sizeof(*ipch);
return ipcomp_post_acomp(skb, err, 0) ?:
skb->len < (plen + sizeof(ip_comp_hdr)) ? -EINVAL :
@@ -313,7 +313,6 @@ void ipcomp_destroy(struct xfrm_state *x)
struct ipcomp_data *ipcd = x->data;
if (!ipcd)
return;
- xfrm_state_delete_tunnel(x);
ipcomp_free_data(ipcd);
kfree(ipcd);
}
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 094d2454602e..c5035a9bc3bb 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -3925,7 +3925,7 @@ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
* This will force stale_bundle() to fail on any xdst bundle with
* this dst linked in it.
*/
- if (dst->obsolete < 0 && !stale_bundle(dst))
+ if (READ_ONCE(dst->obsolete) < 0 && !stale_bundle(dst))
return dst;
return NULL;
@@ -3953,7 +3953,7 @@ static void xfrm_link_failure(struct sk_buff *skb)
static void xfrm_negative_advice(struct sock *sk, struct dst_entry *dst)
{
- if (dst->obsolete)
+ if (READ_ONCE(dst->obsolete))
sk_dst_reset(sk);
}
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 77cc418ad69e..77db3b5fe4ac 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -424,11 +424,10 @@ void xfrm_unregister_type_offload(const struct xfrm_type_offload *type,
}
EXPORT_SYMBOL(xfrm_unregister_type_offload);
-void xfrm_set_type_offload(struct xfrm_state *x)
+void xfrm_set_type_offload(struct xfrm_state *x, bool try_load)
{
const struct xfrm_type_offload *type = NULL;
struct xfrm_state_afinfo *afinfo;
- bool try_load = true;
retry:
afinfo = xfrm_state_get_afinfo(x->props.family);
@@ -593,7 +592,7 @@ void xfrm_state_free(struct xfrm_state *x)
}
EXPORT_SYMBOL(xfrm_state_free);
-static void ___xfrm_state_destroy(struct xfrm_state *x)
+static void xfrm_state_gc_destroy(struct xfrm_state *x)
{
if (x->mode_cbs && x->mode_cbs->destroy_state)
x->mode_cbs->destroy_state(x);
@@ -607,6 +606,7 @@ static void ___xfrm_state_destroy(struct xfrm_state *x)
kfree(x->coaddr);
kfree(x->replay_esn);
kfree(x->preplay_esn);
+ xfrm_unset_type_offload(x);
if (x->type) {
x->type->destructor(x);
xfrm_put_type(x->type);
@@ -631,7 +631,7 @@ static void xfrm_state_gc_task(struct work_struct *work)
synchronize_rcu();
hlist_for_each_entry_safe(x, tmp, &gc_list, gclist)
- ___xfrm_state_destroy(x);
+ xfrm_state_gc_destroy(x);
}
static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
@@ -780,8 +780,6 @@ void xfrm_dev_state_free(struct xfrm_state *x)
struct xfrm_dev_offload *xso = &x->xso;
struct net_device *dev = READ_ONCE(xso->dev);
- xfrm_unset_type_offload(x);
-
if (dev && dev->xfrmdev_ops) {
spin_lock_bh(&xfrm_state_dev_gc_lock);
if (!hlist_unhashed(&x->dev_gclist))
@@ -797,22 +795,18 @@ void xfrm_dev_state_free(struct xfrm_state *x)
}
#endif
-void __xfrm_state_destroy(struct xfrm_state *x, bool sync)
+void __xfrm_state_destroy(struct xfrm_state *x)
{
WARN_ON(x->km.state != XFRM_STATE_DEAD);
- if (sync) {
- synchronize_rcu();
- ___xfrm_state_destroy(x);
- } else {
- spin_lock_bh(&xfrm_state_gc_lock);
- hlist_add_head(&x->gclist, &xfrm_state_gc_list);
- spin_unlock_bh(&xfrm_state_gc_lock);
- schedule_work(&xfrm_state_gc_work);
- }
+ spin_lock_bh(&xfrm_state_gc_lock);
+ hlist_add_head(&x->gclist, &xfrm_state_gc_list);
+ spin_unlock_bh(&xfrm_state_gc_lock);
+ schedule_work(&xfrm_state_gc_work);
}
EXPORT_SYMBOL(__xfrm_state_destroy);
+static void xfrm_state_delete_tunnel(struct xfrm_state *x);
int __xfrm_state_delete(struct xfrm_state *x)
{
struct net *net = xs_net(x);
@@ -840,6 +834,8 @@ int __xfrm_state_delete(struct xfrm_state *x)
xfrm_dev_state_delete(x);
+ xfrm_state_delete_tunnel(x);
+
/* All xfrm_state objects are created by xfrm_state_alloc.
* The xfrm_state_alloc call gives a reference, and that
* is what we are dropping here.
@@ -921,7 +917,7 @@ xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool
}
#endif
-int xfrm_state_flush(struct net *net, u8 proto, bool task_valid, bool sync)
+int xfrm_state_flush(struct net *net, u8 proto, bool task_valid)
{
int i, err = 0, cnt = 0;
@@ -943,10 +939,7 @@ restart:
err = xfrm_state_delete(x);
xfrm_audit_state_delete(x, err ? 0 : 1,
task_valid);
- if (sync)
- xfrm_state_put_sync(x);
- else
- xfrm_state_put(x);
+ xfrm_state_put(x);
if (!err)
cnt++;
@@ -1307,14 +1300,8 @@ static void xfrm_hash_grow_check(struct net *net, int have_hash_collision)
static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x,
const struct flowi *fl, unsigned short family,
struct xfrm_state **best, int *acq_in_progress,
- int *error)
+ int *error, unsigned int pcpu_id)
{
- /* We need the cpu id just as a lookup key,
- * we don't require it to be stable.
- */
- unsigned int pcpu_id = get_cpu();
- put_cpu();
-
/* Resolution logic:
* 1. There is a valid state with matching selector. Done.
* 2. Valid state with inappropriate selector. Skip.
@@ -1381,14 +1368,15 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
/* We need the cpu id just as a lookup key,
* we don't require it to be stable.
*/
- pcpu_id = get_cpu();
- put_cpu();
+ pcpu_id = raw_smp_processor_id();
to_put = NULL;
sequence = read_seqcount_begin(&net->xfrm.xfrm_state_hash_generation);
rcu_read_lock();
+ xfrm_hash_ptrs_get(net, &state_ptrs);
+
hlist_for_each_entry_rcu(x, &pol->state_cache_list, state_cache) {
if (x->props.family == encap_family &&
x->props.reqid == tmpl->reqid &&
@@ -1400,7 +1388,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
tmpl->id.proto == x->id.proto &&
(tmpl->id.spi == x->id.spi || !tmpl->id.spi))
xfrm_state_look_at(pol, x, fl, encap_family,
- &best, &acquire_in_progress, &error);
+ &best, &acquire_in_progress, &error, pcpu_id);
}
if (best)
@@ -1417,7 +1405,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
tmpl->id.proto == x->id.proto &&
(tmpl->id.spi == x->id.spi || !tmpl->id.spi))
xfrm_state_look_at(pol, x, fl, family,
- &best, &acquire_in_progress, &error);
+ &best, &acquire_in_progress, &error, pcpu_id);
}
cached:
@@ -1429,8 +1417,6 @@ cached:
else if (acquire_in_progress) /* XXX: acquire_in_progress should not happen */
WARN_ON(1);
- xfrm_hash_ptrs_get(net, &state_ptrs);
-
h = __xfrm_dst_hash(daddr, saddr, tmpl->reqid, encap_family, state_ptrs.hmask);
hlist_for_each_entry_rcu(x, state_ptrs.bydst + h, bydst) {
#ifdef CONFIG_XFRM_OFFLOAD
@@ -1460,7 +1446,7 @@ cached:
tmpl->id.proto == x->id.proto &&
(tmpl->id.spi == x->id.spi || !tmpl->id.spi))
xfrm_state_look_at(pol, x, fl, family,
- &best, &acquire_in_progress, &error);
+ &best, &acquire_in_progress, &error, pcpu_id);
}
if (best || acquire_in_progress)
goto found;
@@ -1495,7 +1481,7 @@ cached:
tmpl->id.proto == x->id.proto &&
(tmpl->id.spi == x->id.spi || !tmpl->id.spi))
xfrm_state_look_at(pol, x, fl, family,
- &best, &acquire_in_progress, &error);
+ &best, &acquire_in_progress, &error, pcpu_id);
}
found:
@@ -1711,6 +1697,26 @@ struct xfrm_state *xfrm_state_lookup_byspi(struct net *net, __be32 spi,
}
EXPORT_SYMBOL(xfrm_state_lookup_byspi);
+static struct xfrm_state *xfrm_state_lookup_spi_proto(struct net *net, __be32 spi, u8 proto)
+{
+ struct xfrm_state *x;
+ unsigned int i;
+
+ rcu_read_lock();
+ for (i = 0; i <= net->xfrm.state_hmask; i++) {
+ hlist_for_each_entry_rcu(x, &net->xfrm.state_byspi[i], byspi) {
+ if (x->id.spi == spi && x->id.proto == proto) {
+ if (!xfrm_state_hold_rcu(x))
+ continue;
+ rcu_read_unlock();
+ return x;
+ }
+ }
+ }
+ rcu_read_unlock();
+ return NULL;
+}
+
static void __xfrm_state_insert(struct xfrm_state *x)
{
struct net *net = xs_net(x);
@@ -2262,7 +2268,12 @@ EXPORT_SYMBOL(xfrm_state_update);
int xfrm_state_check_expire(struct xfrm_state *x)
{
- xfrm_dev_state_update_stats(x);
+ /* All counters which are needed to decide if state is expired
+ * are handled by SW for non-packet offload modes. Simply skip
+ * the following update and save extra boilerplate in drivers.
+ */
+ if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET)
+ xfrm_dev_state_update_stats(x);
if (!READ_ONCE(x->curlft.use_time))
WRITE_ONCE(x->curlft.use_time, ktime_get_real_seconds());
@@ -2555,10 +2566,8 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high,
unsigned int h;
struct xfrm_state *x0;
int err = -ENOENT;
- __be32 minspi = htonl(low);
- __be32 maxspi = htonl(high);
+ u32 range = high - low + 1;
__be32 newspi = 0;
- u32 mark = x->mark.v & x->mark.m;
spin_lock_bh(&x->lock);
if (x->km.state == XFRM_STATE_DEAD) {
@@ -2572,38 +2581,34 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high,
err = -ENOENT;
- if (minspi == maxspi) {
- x0 = xfrm_state_lookup(net, mark, &x->id.daddr, minspi, x->id.proto, x->props.family);
- if (x0) {
- NL_SET_ERR_MSG(extack, "Requested SPI is already in use");
- xfrm_state_put(x0);
+ for (h = 0; h < range; h++) {
+ u32 spi = (low == high) ? low : get_random_u32_inclusive(low, high);
+ newspi = htonl(spi);
+
+ spin_lock_bh(&net->xfrm.xfrm_state_lock);
+ x0 = xfrm_state_lookup_spi_proto(net, newspi, x->id.proto);
+ if (!x0) {
+ x->id.spi = newspi;
+ h = xfrm_spi_hash(net, &x->id.daddr, newspi, x->id.proto, x->props.family);
+ XFRM_STATE_INSERT(byspi, &x->byspi, net->xfrm.state_byspi + h, x->xso.type);
+ spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+ err = 0;
goto unlock;
}
- newspi = minspi;
- } else {
- u32 spi = 0;
- for (h = 0; h < high-low+1; h++) {
- spi = get_random_u32_inclusive(low, high);
- x0 = xfrm_state_lookup(net, mark, &x->id.daddr, htonl(spi), x->id.proto, x->props.family);
- if (x0 == NULL) {
- newspi = htonl(spi);
- break;
- }
- xfrm_state_put(x0);
+ xfrm_state_put(x0);
+ spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+
+ if (signal_pending(current)) {
+ err = -ERESTARTSYS;
+ goto unlock;
}
+
+ if (low == high)
+ break;
}
- if (newspi) {
- spin_lock_bh(&net->xfrm.xfrm_state_lock);
- x->id.spi = newspi;
- h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family);
- XFRM_STATE_INSERT(byspi, &x->byspi, net->xfrm.state_byspi + h,
- x->xso.type);
- spin_unlock_bh(&net->xfrm.xfrm_state_lock);
- err = 0;
- } else {
+ if (err)
NL_SET_ERR_MSG(extack, "No SPI available in the requested range");
- }
unlock:
spin_unlock_bh(&x->lock);
@@ -3077,20 +3082,17 @@ void xfrm_flush_gc(void)
}
EXPORT_SYMBOL(xfrm_flush_gc);
-/* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
-void xfrm_state_delete_tunnel(struct xfrm_state *x)
+static void xfrm_state_delete_tunnel(struct xfrm_state *x)
{
if (x->tunnel) {
struct xfrm_state *t = x->tunnel;
- if (atomic_read(&t->tunnel_users) == 2)
+ if (atomic_dec_return(&t->tunnel_users) == 1)
xfrm_state_delete(t);
- atomic_dec(&t->tunnel_users);
- xfrm_state_put_sync(t);
+ xfrm_state_put(t);
x->tunnel = NULL;
}
}
-EXPORT_SYMBOL(xfrm_state_delete_tunnel);
u32 xfrm_state_mtu(struct xfrm_state *x, int mtu)
{
@@ -3295,8 +3297,8 @@ void xfrm_state_fini(struct net *net)
unsigned int sz;
flush_work(&net->xfrm.state_hash_work);
+ xfrm_state_flush(net, IPSEC_PROTO_ANY, false);
flush_work(&xfrm_state_gc_work);
- xfrm_state_flush(net, 0, false, true);
WARN_ON(!list_empty(&net->xfrm.state_all));
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 59f258daf830..684239018bec 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -977,6 +977,7 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
/* override default values from above */
xfrm_update_ae_params(x, attrs, 0);
+ xfrm_set_type_offload(x, attrs[XFRMA_OFFLOAD_DEV]);
/* configure the hardware if offload is requested */
if (attrs[XFRMA_OFFLOAD_DEV]) {
err = xfrm_dev_state_add(net, x,
@@ -2634,7 +2635,7 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
struct xfrm_usersa_flush *p = nlmsg_data(nlh);
int err;
- err = xfrm_state_flush(net, p->proto, true, false);
+ err = xfrm_state_flush(net, p->proto, true);
if (err) {
if (err == -ESRCH) /* empty table */
return 0;